1 1.1 mrg /* Find near-matches for strings. 2 1.1 mrg Copyright (C) 2015-2022 Free Software Foundation, Inc. 3 1.1 mrg 4 1.1 mrg This file is part of GCC. 5 1.1 mrg 6 1.1 mrg GCC is free software; you can redistribute it and/or modify it under 7 1.1 mrg the terms of the GNU General Public License as published by the Free 8 1.1 mrg Software Foundation; either version 3, or (at your option) any later 9 1.1 mrg version. 10 1.1 mrg 11 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 1.1 mrg for more details. 15 1.1 mrg 16 1.1 mrg You should have received a copy of the GNU General Public License 17 1.1 mrg along with GCC; see the file COPYING3. If not see 18 1.1 mrg <http://www.gnu.org/licenses/>. */ 19 1.1 mrg 20 1.1 mrg #include "config.h" 21 1.1 mrg #include "system.h" 22 1.1 mrg #include "coretypes.h" 23 1.1 mrg #include "tm.h" 24 1.1 mrg #include "tree.h" 25 1.1 mrg #include "spellcheck.h" 26 1.1 mrg #include "selftest.h" 27 1.1 mrg 28 1.1 mrg /* Cost of a case transformation. */ 29 1.1 mrg #define CASE_COST 1 30 1.1 mrg 31 1.1 mrg /* Cost of another kind of edit. */ 32 1.1 mrg #define BASE_COST 2 33 1.1 mrg 34 1.1 mrg /* Get the edit distance between the two strings: the minimal 35 1.1 mrg number of edits that are needed to change one string into another, 36 1.1 mrg where edits can be one-character insertions, removals, or substitutions, 37 1.1 mrg or transpositions of two adjacent characters (counting as one "edit"). 38 1.1 mrg 39 1.1 mrg This implementation uses a modified variant of the Wagner-Fischer 40 1.1 mrg algorithm for the Damerau-Levenshtein distance; specifically, the 41 1.1 mrg "optimal string alignment distance" or "restricted edit distance" 42 1.1 mrg variant. This implementation has been further modified to take 43 1.1 mrg case into account. */ 44 1.1 mrg 45 1.1 mrg edit_distance_t 46 1.1 mrg get_edit_distance (const char *s, int len_s, 47 1.1 mrg const char *t, int len_t) 48 1.1 mrg { 49 1.1 mrg const bool debug = false; 50 1.1 mrg 51 1.1 mrg if (debug) 52 1.1 mrg { 53 1.1 mrg printf ("s: \"%s\" (len_s=%i)\n", s, len_s); 54 1.1 mrg printf ("t: \"%s\" (len_t=%i)\n", t, len_t); 55 1.1 mrg } 56 1.1 mrg 57 1.1 mrg if (len_s == 0) 58 1.1 mrg return BASE_COST * len_t; 59 1.1 mrg if (len_t == 0) 60 1.1 mrg return BASE_COST * len_s; 61 1.1 mrg 62 1.1 mrg /* We effectively build a matrix where each (i, j) contains the 63 1.1 mrg distance between the prefix strings s[0:j] and t[0:i]. 64 1.1 mrg Rather than actually build an (len_t + 1) * (len_s + 1) matrix, 65 1.1 mrg we simply keep track of the last two rows, v_one_ago and v_two_ago, 66 1.1 mrg and a new row, v_next, which avoids an (len_t + 1) * (len_s + 1) 67 1.1 mrg allocation and memory accesses in favor of three (len_s + 1) 68 1.1 mrg allocations. These could potentially be 69 1.1 mrg statically-allocated if we impose a maximum length on the 70 1.1 mrg strings of interest. */ 71 1.1 mrg edit_distance_t *v_two_ago = new edit_distance_t[len_s + 1]; 72 1.1 mrg edit_distance_t *v_one_ago = new edit_distance_t[len_s + 1]; 73 1.1 mrg edit_distance_t *v_next = new edit_distance_t[len_s + 1]; 74 1.1 mrg 75 1.1 mrg /* The first row is for the case of an empty target string, which 76 1.1 mrg we can reach by deleting every character in the source string. */ 77 1.1 mrg for (int i = 0; i < len_s + 1; i++) 78 1.1 mrg v_one_ago[i] = i * BASE_COST; 79 1.1 mrg 80 1.1 mrg /* Build successive rows. */ 81 1.1 mrg for (int i = 0; i < len_t; i++) 82 1.1 mrg { 83 1.1 mrg if (debug) 84 1.1 mrg { 85 1.1 mrg printf ("i:%i v_one_ago = ", i); 86 1.1 mrg for (int j = 0; j < len_s + 1; j++) 87 1.1 mrg printf ("%i ", v_one_ago[j]); 88 1.1 mrg printf ("\n"); 89 1.1 mrg } 90 1.1 mrg 91 1.1 mrg /* The initial column is for the case of an empty source string; we 92 1.1 mrg can reach prefixes of the target string of length i 93 1.1 mrg by inserting i characters. */ 94 1.1 mrg v_next[0] = (i + 1) * BASE_COST; 95 1.1 mrg 96 1.1 mrg /* Build the rest of the row by considering neighbors to 97 1.1 mrg the north, west and northwest. */ 98 1.1 mrg for (int j = 0; j < len_s; j++) 99 1.1 mrg { 100 1.1 mrg edit_distance_t cost; 101 1.1 mrg 102 1.1 mrg if (s[j] == t[i]) 103 1.1 mrg cost = 0; 104 1.1 mrg else if (TOLOWER (s[j]) == TOLOWER (t[i])) 105 1.1 mrg cost = CASE_COST; 106 1.1 mrg else 107 1.1 mrg cost = BASE_COST; 108 1.1 mrg edit_distance_t deletion = v_next[j] + BASE_COST; 109 1.1 mrg edit_distance_t insertion = v_one_ago[j + 1] + BASE_COST; 110 1.1 mrg edit_distance_t substitution = v_one_ago[j] + cost; 111 1.1 mrg edit_distance_t cheapest = MIN (deletion, insertion); 112 1.1 mrg cheapest = MIN (cheapest, substitution); 113 1.1 mrg if (i > 0 && j > 0 && s[j] == t[i - 1] && s[j - 1] == t[i]) 114 1.1 mrg { 115 1.1 mrg edit_distance_t transposition = v_two_ago[j - 1] + BASE_COST; 116 1.1 mrg cheapest = MIN (cheapest, transposition); 117 1.1 mrg } 118 1.1 mrg v_next[j + 1] = cheapest; 119 1.1 mrg } 120 1.1 mrg 121 1.1 mrg /* Prepare to move on to next row. */ 122 1.1 mrg for (int j = 0; j < len_s + 1; j++) 123 1.1 mrg { 124 1.1 mrg v_two_ago[j] = v_one_ago[j]; 125 1.1 mrg v_one_ago[j] = v_next[j]; 126 1.1 mrg } 127 1.1 mrg } 128 1.1 mrg 129 1.1 mrg if (debug) 130 1.1 mrg { 131 1.1 mrg printf ("final v_next = "); 132 1.1 mrg for (int j = 0; j < len_s + 1; j++) 133 1.1 mrg printf ("%i ", v_next[j]); 134 1.1 mrg printf ("\n"); 135 1.1 mrg } 136 1.1 mrg 137 1.1 mrg edit_distance_t result = v_next[len_s]; 138 1.1 mrg delete[] v_two_ago; 139 1.1 mrg delete[] v_one_ago; 140 1.1 mrg delete[] v_next; 141 1.1 mrg return result; 142 1.1 mrg } 143 1.1 mrg 144 1.1 mrg /* Get the edit distance between two nil-terminated strings. */ 145 1.1 mrg 146 1.1 mrg edit_distance_t 147 1.1 mrg get_edit_distance (const char *s, const char *t) 148 1.1 mrg { 149 1.1 mrg return get_edit_distance (s, strlen (s), t, strlen (t)); 150 1.1 mrg } 151 1.1 mrg 152 1.1 mrg /* Given TARGET, a non-NULL string, and CANDIDATES, a non-NULL ptr to 153 1.1 mrg an autovec of non-NULL strings, determine which element within 154 1.1 mrg CANDIDATES has the lowest edit distance to TARGET. If there are 155 1.1 mrg multiple elements with the same minimal distance, the first in the 156 1.1 mrg vector wins. 157 1.1 mrg 158 1.1 mrg If more than half of the letters were misspelled, the suggestion is 159 1.1 mrg likely to be meaningless, so return NULL for this case. */ 160 1.1 mrg 161 1.1 mrg const char * 162 1.1 mrg find_closest_string (const char *target, 163 1.1 mrg const auto_vec<const char *> *candidates) 164 1.1 mrg { 165 1.1 mrg gcc_assert (target); 166 1.1 mrg gcc_assert (candidates); 167 1.1 mrg 168 1.1 mrg int i; 169 1.1 mrg const char *candidate; 170 1.1 mrg best_match<const char *, const char *> bm (target); 171 1.1 mrg FOR_EACH_VEC_ELT (*candidates, i, candidate) 172 1.1 mrg { 173 1.1 mrg gcc_assert (candidate); 174 1.1 mrg bm.consider (candidate); 175 1.1 mrg } 176 1.1 mrg 177 1.1 mrg return bm.get_best_meaningful_candidate (); 178 1.1 mrg } 179 1.1 mrg 180 1.1 mrg /* Generate the maximum edit distance for which we consider a suggestion 181 1.1 mrg to be meaningful, given a goal of length GOAL_LEN and a candidate of 182 1.1 mrg length CANDIDATE_LEN. 183 1.1 mrg 184 1.1 mrg This is a third of the length of the candidate or of the goal, 185 1.1 mrg whichever is bigger. */ 186 1.1 mrg 187 1.1 mrg edit_distance_t 188 1.1 mrg get_edit_distance_cutoff (size_t goal_len, size_t candidate_len) 189 1.1 mrg { 190 1.1 mrg size_t max_length = MAX (goal_len, candidate_len); 191 1.1 mrg size_t min_length = MIN (goal_len, candidate_len); 192 1.1 mrg 193 1.1 mrg gcc_assert (max_length >= min_length); 194 1.1 mrg 195 1.1 mrg /* Special case: don't offer suggestions for a pair of 196 1.1 mrg length == 1 strings (or empty strings). */ 197 1.1 mrg if (max_length <= 1) 198 1.1 mrg return 0; 199 1.1 mrg 200 1.1 mrg /* If the lengths are close, then round down. */ 201 1.1 mrg if (max_length - min_length <= 1) 202 1.1 mrg /* ...but allow an edit distance of at least 1. */ 203 1.1 mrg return BASE_COST * MAX (max_length / 3, 1); 204 1.1 mrg 205 1.1 mrg /* Otherwise, round up (thus giving a little extra leeway to some cases 206 1.1 mrg involving insertions/deletions). */ 207 1.1 mrg return BASE_COST * (max_length + 2) / 3; 208 1.1 mrg } 209 1.1 mrg 210 1.1 mrg #if CHECKING_P 211 1.1 mrg 212 1.1 mrg namespace selftest { 213 1.1 mrg 214 1.1 mrg /* Selftests. */ 215 1.1 mrg 216 1.1 mrg /* Verify that get_edit_distance (A, B) equals the expected value. */ 217 1.1 mrg 218 1.1 mrg static void 219 1.1 mrg test_get_edit_distance_one_way (const char *a, const char *b, 220 1.1 mrg edit_distance_t expected) 221 1.1 mrg { 222 1.1 mrg edit_distance_t actual = get_edit_distance (a, b); 223 1.1 mrg ASSERT_EQ (actual, expected); 224 1.1 mrg } 225 1.1 mrg 226 1.1 mrg /* Verify that both 227 1.1 mrg get_edit_distance (A, B) 228 1.1 mrg and 229 1.1 mrg get_edit_distance (B, A) 230 1.1 mrg equal the expected value, to ensure that the function is symmetric. */ 231 1.1 mrg 232 1.1 mrg static void 233 1.1 mrg test_get_edit_distance_both_ways (const char *a, const char *b, 234 1.1 mrg edit_distance_t expected) 235 1.1 mrg { 236 1.1 mrg test_get_edit_distance_one_way (a, b, expected); 237 1.1 mrg test_get_edit_distance_one_way (b, a, expected); 238 1.1 mrg } 239 1.1 mrg 240 1.1 mrg /* Verify get_edit_distance for a variety of pairs of pre-canned 241 1.1 mrg inputs, comparing against known-good values. */ 242 1.1 mrg 243 1.1 mrg static void 244 1.1 mrg test_edit_distances () 245 1.1 mrg { 246 1.1 mrg test_get_edit_distance_both_ways ("", "nonempty", 247 1.1 mrg BASE_COST * strlen ("nonempty")); 248 1.1 mrg test_get_edit_distance_both_ways ("saturday", "sunday", 249 1.1 mrg BASE_COST * 3); 250 1.1 mrg test_get_edit_distance_both_ways ("foo", "m_foo", BASE_COST * 2); 251 1.1 mrg test_get_edit_distance_both_ways ("hello_world", "HelloWorld", 4); 252 1.1 mrg test_get_edit_distance_both_ways 253 1.1 mrg ("the quick brown fox jumps over the lazy dog", "dog", BASE_COST * 40); 254 1.1 mrg test_get_edit_distance_both_ways 255 1.1 mrg ("the quick brown fox jumps over the lazy dog", 256 1.1 mrg "the quick brown dog jumps over the lazy fox", 257 1.1 mrg BASE_COST * 4); 258 1.1 mrg test_get_edit_distance_both_ways 259 1.1 mrg ("Lorem ipsum dolor sit amet, consectetur adipiscing elit,", 260 1.1 mrg "All your base are belong to us", 261 1.1 mrg BASE_COST * 44); 262 1.1 mrg test_get_edit_distance_both_ways ("foo", "FOO", 3); 263 1.1 mrg test_get_edit_distance_both_ways ("fee", "deed", BASE_COST * 2); 264 1.1 mrg test_get_edit_distance_both_ways ("coorzd1", "coordx1", BASE_COST * 2); 265 1.1 mrg test_get_edit_distance_both_ways ("assert", "sqrt", BASE_COST * 3); 266 1.1 mrg test_get_edit_distance_both_ways ("PATH_MAX", "INT8_MAX", BASE_COST * 3); 267 1.1 mrg test_get_edit_distance_both_ways ("time", "nice", BASE_COST * 2); 268 1.1 mrg test_get_edit_distance_both_ways ("bar", "carg", BASE_COST * 2); 269 1.1 mrg test_get_edit_distance_both_ways ("gtk_widget_show_all", 270 1.1 mrg "GtkWidgetShowAll", 10); 271 1.1 mrg test_get_edit_distance_both_ways ("m_bar", "bar", BASE_COST * 2); 272 1.1 mrg test_get_edit_distance_both_ways ("MACRO", "MACRAME", BASE_COST * 3); 273 1.1 mrg test_get_edit_distance_both_ways ("ab", "ac", BASE_COST * 1); 274 1.1 mrg test_get_edit_distance_both_ways ("ab", "a", BASE_COST * 1); 275 1.1 mrg test_get_edit_distance_both_ways ("a", "b", BASE_COST * 1); 276 1.1 mrg test_get_edit_distance_both_ways ("nanl", "name", BASE_COST * 2); 277 1.1 mrg test_get_edit_distance_both_ways ("char", "bar", BASE_COST * 2); 278 1.1 mrg test_get_edit_distance_both_ways ("-optimize", "fsanitize", BASE_COST * 5); 279 1.1 mrg test_get_edit_distance_both_ways ("__DATE__", "__i386__", BASE_COST * 4); 280 1.1 mrg 281 1.1 mrg /* Examples where transposition helps. */ 282 1.1 mrg test_get_edit_distance_both_ways ("ab", "ba", BASE_COST * 1); 283 1.1 mrg test_get_edit_distance_both_ways ("ba", "abc", BASE_COST * 2); 284 1.1 mrg test_get_edit_distance_both_ways ("coorzd1", "coordz1", BASE_COST * 1); 285 1.1 mrg test_get_edit_distance_both_ways ("abcdefghijklmnopqrstuvwxyz", 286 1.1 mrg "bacdefghijklmnopqrstuvwxzy", 287 1.1 mrg BASE_COST * 2); 288 1.1 mrg test_get_edit_distance_both_ways ("saturday", "sundya", BASE_COST * 4); 289 1.1 mrg test_get_edit_distance_both_ways ("signed", "singed", BASE_COST * 1); 290 1.1 mrg } 291 1.1 mrg 292 1.1 mrg /* Subroutine of test_get_edit_distance_cutoff, for emulating the 293 1.1 mrg spellchecking cutoff in up to GCC 8. */ 294 1.1 mrg 295 1.1 mrg static edit_distance_t 296 1.1 mrg get_old_cutoff (size_t goal_len, size_t candidate_len) 297 1.1 mrg { 298 1.1 mrg return BASE_COST * MAX (goal_len, candidate_len) / 2; 299 1.1 mrg } 300 1.1 mrg 301 1.1 mrg /* Verify that the cutoff for "meaningfulness" of suggestions is at least as 302 1.1 mrg conservative as in older GCC releases. 303 1.1 mrg 304 1.1 mrg This should ensure that we don't offer additional meaningless 305 1.1 mrg suggestions (apart from those for which transposition has helped). */ 306 1.1 mrg 307 1.1 mrg static void 308 1.1 mrg test_get_edit_distance_cutoff () 309 1.1 mrg { 310 1.1 mrg for (size_t goal_len = 0; goal_len < 30; goal_len++) 311 1.1 mrg for (size_t candidate_len = 0; candidate_len < 30; candidate_len++) 312 1.1 mrg ASSERT_TRUE (get_edit_distance_cutoff (goal_len, candidate_len) 313 1.1 mrg <= get_old_cutoff (goal_len, candidate_len)); 314 1.1 mrg } 315 1.1 mrg 316 1.1 mrg /* Assert that CANDIDATE is offered as a suggestion for TARGET. */ 317 1.1 mrg 318 1.1 mrg static void 319 1.1 mrg assert_suggested_for (const location &loc, const char *candidate, 320 1.1 mrg const char *target) 321 1.1 mrg { 322 1.1 mrg auto_vec<const char *> candidates; 323 1.1 mrg candidates.safe_push (candidate); 324 1.1 mrg ASSERT_EQ_AT (loc, candidate, find_closest_string (target, &candidates)); 325 1.1 mrg } 326 1.1 mrg 327 1.1 mrg /* Assert that CANDIDATE is offered as a suggestion for TARGET. */ 328 1.1 mrg 329 1.1 mrg #define ASSERT_SUGGESTED_FOR(CANDIDATE, TARGET) \ 330 1.1 mrg SELFTEST_BEGIN_STMT \ 331 1.1 mrg assert_suggested_for (SELFTEST_LOCATION, CANDIDATE, TARGET); \ 332 1.1 mrg SELFTEST_END_STMT 333 1.1 mrg 334 1.1 mrg /* Assert that CANDIDATE is not offered as a suggestion for TARGET. */ 335 1.1 mrg 336 1.1 mrg static void 337 1.1 mrg assert_not_suggested_for (const location &loc, const char *candidate, 338 1.1 mrg const char *target) 339 1.1 mrg { 340 1.1 mrg auto_vec<const char *> candidates; 341 1.1 mrg candidates.safe_push (candidate); 342 1.1 mrg ASSERT_EQ_AT (loc, NULL, find_closest_string (target, &candidates)); 343 1.1 mrg } 344 1.1 mrg 345 1.1 mrg /* Assert that CANDIDATE is not offered as a suggestion for TARGET. */ 346 1.1 mrg 347 1.1 mrg #define ASSERT_NOT_SUGGESTED_FOR(CANDIDATE, TARGET) \ 348 1.1 mrg SELFTEST_BEGIN_STMT \ 349 1.1 mrg assert_not_suggested_for (SELFTEST_LOCATION, CANDIDATE, TARGET); \ 350 1.1 mrg SELFTEST_END_STMT 351 1.1 mrg 352 1.1 mrg /* Verify that we offer varous suggestions that are meaningful, 353 1.1 mrg and that we don't offer various other ones that aren't (PR c/82967). */ 354 1.1 mrg 355 1.1 mrg static void 356 1.1 mrg test_suggestions () 357 1.1 mrg { 358 1.1 mrg /* Good suggestions. */ 359 1.1 mrg 360 1.1 mrg ASSERT_SUGGESTED_FOR ("m_bar", "bar"); 361 1.1 mrg // dist == 2, max_length == 5, min_length == 3 362 1.1 mrg 363 1.1 mrg ASSERT_SUGGESTED_FOR ("MACRO", "MACRAME"); 364 1.1 mrg // dist == 3, max_length == 7, min_length == 5 365 1.1 mrg 366 1.1 mrg ASSERT_SUGGESTED_FOR ("gtk_widget_show_all", "GtkWidgetShowAll"); 367 1.1 mrg // dist == 7, max_length == 16, min_length = 19 368 1.1 mrg 369 1.1 mrg ASSERT_SUGGESTED_FOR ("ab", "ac"); 370 1.1 mrg // dist == 1, max_length == min_length = 2 371 1.1 mrg 372 1.1 mrg ASSERT_SUGGESTED_FOR ("ab", "a"); 373 1.1 mrg // dist == 1, max_length == 2, min_length = 1 374 1.1 mrg 375 1.1 mrg /* Bad suggestions. */ 376 1.1 mrg 377 1.1 mrg ASSERT_NOT_SUGGESTED_FOR ("a", "b"); 378 1.1 mrg // dist == 1, max_length == min_length = 1 379 1.1 mrg 380 1.1 mrg ASSERT_NOT_SUGGESTED_FOR ("sqrt", "assert"); 381 1.1 mrg // dist == 3, max_length 6, min_length == 4 382 1.1 mrg 383 1.1 mrg ASSERT_NOT_SUGGESTED_FOR ("INT8_MAX", "PATH_MAX"); 384 1.1 mrg // dist == 3, max_length == min_length == 8 385 1.1 mrg 386 1.1 mrg ASSERT_NOT_SUGGESTED_FOR ("nice", "time"); 387 1.1 mrg ASSERT_NOT_SUGGESTED_FOR ("nanl", "name"); 388 1.1 mrg // dist == 2, max_length == min_length == 4 389 1.1 mrg 390 1.1 mrg ASSERT_NOT_SUGGESTED_FOR ("carg", "bar"); 391 1.1 mrg ASSERT_NOT_SUGGESTED_FOR ("char", "bar"); 392 1.1 mrg // dist == 2, max_length == 4, min_length == 3 393 1.1 mrg 394 1.1 mrg ASSERT_NOT_SUGGESTED_FOR ("-optimize", "fsanitize"); 395 1.1 mrg // dist == 5, max_length == min_length == 9 396 1.1 mrg 397 1.1 mrg ASSERT_NOT_SUGGESTED_FOR ("__DATE__", "__i386__"); 398 1.1 mrg // dist == 4, max_length == min_length == 8 399 1.1 mrg 400 1.1 mrg ASSERT_NOT_SUGGESTED_FOR ("start_input_device", "InputDevice"); 401 1.1 mrg // dist == 9, max_length == 18, min_length == 11 402 1.1 mrg } 403 1.1 mrg 404 1.1 mrg /* Verify that find_closest_string is sane. */ 405 1.1 mrg 406 1.1 mrg static void 407 1.1 mrg test_find_closest_string () 408 1.1 mrg { 409 1.1 mrg auto_vec<const char *> candidates; 410 1.1 mrg 411 1.1 mrg /* Verify that it can handle an empty vec. */ 412 1.1 mrg ASSERT_EQ (NULL, find_closest_string ("", &candidates)); 413 1.1 mrg 414 1.1 mrg /* Verify that it works sanely for non-empty vecs. */ 415 1.1 mrg candidates.safe_push ("apple"); 416 1.1 mrg candidates.safe_push ("banana"); 417 1.1 mrg candidates.safe_push ("cherry"); 418 1.1 mrg 419 1.1 mrg ASSERT_STREQ ("apple", find_closest_string ("app", &candidates)); 420 1.1 mrg ASSERT_STREQ ("banana", find_closest_string ("banyan", &candidates)); 421 1.1 mrg ASSERT_STREQ ("cherry", find_closest_string ("berry", &candidates)); 422 1.1 mrg ASSERT_EQ (NULL, find_closest_string ("not like the others", &candidates)); 423 1.1 mrg 424 1.1 mrg /* The order of the vec can matter, but it should not matter for these 425 1.1 mrg inputs. */ 426 1.1 mrg candidates.truncate (0); 427 1.1 mrg candidates.safe_push ("cherry"); 428 1.1 mrg candidates.safe_push ("banana"); 429 1.1 mrg candidates.safe_push ("apple"); 430 1.1 mrg ASSERT_STREQ ("apple", find_closest_string ("app", &candidates)); 431 1.1 mrg ASSERT_STREQ ("banana", find_closest_string ("banyan", &candidates)); 432 1.1 mrg ASSERT_STREQ ("cherry", find_closest_string ("berry", &candidates)); 433 1.1 mrg ASSERT_EQ (NULL, find_closest_string ("not like the others", &candidates)); 434 1.1 mrg 435 1.1 mrg /* If the goal string somehow makes it into the candidate list, offering 436 1.1 mrg it as a suggestion will be nonsensical. Verify that we don't offer such 437 1.1 mrg suggestions. */ 438 1.1 mrg ASSERT_EQ (NULL, find_closest_string ("banana", &candidates)); 439 1.1 mrg 440 1.1 mrg /* Example from PR 69968 where transposition helps. */ 441 1.1 mrg candidates.truncate (0); 442 1.1 mrg candidates.safe_push("coordx"); 443 1.1 mrg candidates.safe_push("coordy"); 444 1.1 mrg candidates.safe_push("coordz"); 445 1.1 mrg candidates.safe_push("coordx1"); 446 1.1 mrg candidates.safe_push("coordy1"); 447 1.1 mrg candidates.safe_push("coordz1"); 448 1.1 mrg ASSERT_STREQ ("coordz1", find_closest_string ("coorzd1", &candidates)); 449 1.1 mrg 450 1.1 mrg candidates.truncate (0); 451 1.1 mrg candidates.safe_push ("DWARF_GNAT_ENCODINGS_GDB"); 452 1.1 mrg candidates.safe_push ("DWARF_GNAT_ENCODINGS_ALL"); 453 1.1 mrg candidates.safe_push ("DWARF_GNAT_ENCODINGS_MINIMAL"); 454 1.1 mrg ASSERT_STREQ ("DWARF_GNAT_ENCODINGS_ALL", 455 1.1 mrg find_closest_string ("DWARF_GNAT_ENCODINGS_all", 456 1.1 mrg &candidates)); 457 1.1 mrg 458 1.1 mrg /* The same as the previous test, but with a different order of 459 1.1 mrg candidates. */ 460 1.1 mrg candidates.truncate (0); 461 1.1 mrg candidates.safe_push ("DWARF_GNAT_ENCODINGS_ALL"); 462 1.1 mrg candidates.safe_push ("DWARF_GNAT_ENCODINGS_GDB"); 463 1.1 mrg candidates.safe_push ("DWARF_GNAT_ENCODINGS_MINIMAL"); 464 1.1 mrg ASSERT_STREQ ("DWARF_GNAT_ENCODINGS_ALL", 465 1.1 mrg find_closest_string ("DWARF_GNAT_ENCODINGS_all", 466 1.1 mrg &candidates)); 467 1.1 mrg } 468 1.1 mrg 469 1.1 mrg /* Test data for test_metric_conditions. */ 470 1.1 mrg 471 1.1 mrg static const char * const test_data[] = { 472 1.1 mrg "", 473 1.1 mrg "foo", 474 1.1 mrg "food", 475 1.1 mrg "boo", 476 1.1 mrg "1234567890123456789012345678901234567890123456789012345678901234567890", 477 1.1 mrg "abc", 478 1.1 mrg "ac", 479 1.1 mrg "ca", 480 1.1 mrg }; 481 1.1 mrg 482 1.1 mrg /* Verify that get_edit_distance appears to be a sane distance function, 483 1.1 mrg even though it doesn't satisfy the conditions for being a metric. (This 484 1.1 mrg is because the triangle inequality fails to hold: the distance between 485 1.1 mrg "ca" and "ac" is 1, and so is the distance between "abc" and "ac", but 486 1.1 mrg the distance between "abc" and "ca" is 3. Algorithms that calculate the 487 1.1 mrg true Levenshtein-Damerau metric are much more expensive.) */ 488 1.1 mrg 489 1.1 mrg static void 490 1.1 mrg test_metric_conditions () 491 1.1 mrg { 492 1.1 mrg const int num_test_cases = sizeof (test_data) / sizeof (test_data[0]); 493 1.1 mrg 494 1.1 mrg for (int i = 0; i < num_test_cases; i++) 495 1.1 mrg { 496 1.1 mrg for (int j = 0; j < num_test_cases; j++) 497 1.1 mrg { 498 1.1 mrg edit_distance_t dist_ij 499 1.1 mrg = get_edit_distance (test_data[i], test_data[j]); 500 1.1 mrg 501 1.1 mrg /* Identity of indiscernibles: d(i, j) > 0 iff i == j. */ 502 1.1 mrg if (i == j) 503 1.1 mrg ASSERT_EQ (dist_ij, 0); 504 1.1 mrg else 505 1.1 mrg ASSERT_TRUE (dist_ij > 0); 506 1.1 mrg 507 1.1 mrg /* Symmetry: d(i, j) == d(j, i). */ 508 1.1 mrg edit_distance_t dist_ji 509 1.1 mrg = get_edit_distance (test_data[j], test_data[i]); 510 1.1 mrg ASSERT_EQ (dist_ij, dist_ji); 511 1.1 mrg } 512 1.1 mrg } 513 1.1 mrg } 514 1.1 mrg 515 1.1 mrg /* Run all of the selftests within this file. */ 516 1.1 mrg 517 1.1 mrg void 518 1.1 mrg spellcheck_cc_tests () 519 1.1 mrg { 520 1.1 mrg test_edit_distances (); 521 1.1 mrg test_get_edit_distance_cutoff (); 522 1.1 mrg test_suggestions (); 523 1.1 mrg test_find_closest_string (); 524 1.1 mrg test_metric_conditions (); 525 1.1 mrg } 526 1.1 mrg 527 1.1 mrg } // namespace selftest 528 1.1 mrg 529 1.1 mrg #endif /* #if CHECKING_P */ 530