Home | History | Annotate | Line # | Download | only in Sema
      1 //===- TypoCorrection.h - Class for typo correction results -----*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file defines the TypoCorrection class, which stores the results of
     10 // Sema's typo correction (Sema::CorrectTypo).
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_CLANG_SEMA_TYPOCORRECTION_H
     15 #define LLVM_CLANG_SEMA_TYPOCORRECTION_H
     16 
     17 #include "clang/AST/Decl.h"
     18 #include "clang/AST/DeclarationName.h"
     19 #include "clang/Basic/LLVM.h"
     20 #include "clang/Basic/PartialDiagnostic.h"
     21 #include "clang/Basic/SourceLocation.h"
     22 #include "clang/Sema/DeclSpec.h"
     23 #include "llvm/ADT/ArrayRef.h"
     24 #include "llvm/ADT/SmallVector.h"
     25 #include "llvm/Support/Casting.h"
     26 #include <cstddef>
     27 #include <limits>
     28 #include <string>
     29 #include <utility>
     30 #include <vector>
     31 
     32 namespace clang {
     33 
     34 class DeclContext;
     35 class IdentifierInfo;
     36 class LangOptions;
     37 class MemberExpr;
     38 class NestedNameSpecifier;
     39 class Sema;
     40 
     41 /// Simple class containing the result of Sema::CorrectTypo
     42 class TypoCorrection {
     43 public:
     44   // "Distance" for unusable corrections
     45   static const unsigned InvalidDistance = std::numeric_limits<unsigned>::max();
     46 
     47   // The largest distance still considered valid (larger edit distances are
     48   // mapped to InvalidDistance by getEditDistance).
     49   static const unsigned MaximumDistance = 10000U;
     50 
     51   // Relative weightings of the "edit distance" components. The higher the
     52   // weight, the more of a penalty to fitness the component will give (higher
     53   // weights mean greater contribution to the total edit distance, with the
     54   // best correction candidates having the lowest edit distance).
     55   static const unsigned CharDistanceWeight = 100U;
     56   static const unsigned QualifierDistanceWeight = 110U;
     57   static const unsigned CallbackDistanceWeight = 150U;
     58 
     59   TypoCorrection(const DeclarationName &Name, NamedDecl *NameDecl,
     60                  NestedNameSpecifier *NNS = nullptr, unsigned CharDistance = 0,
     61                  unsigned QualifierDistance = 0)
     62       : CorrectionName(Name), CorrectionNameSpec(NNS),
     63         CharDistance(CharDistance), QualifierDistance(QualifierDistance) {
     64     if (NameDecl)
     65       CorrectionDecls.push_back(NameDecl);
     66   }
     67 
     68   TypoCorrection(NamedDecl *Name, NestedNameSpecifier *NNS = nullptr,
     69                  unsigned CharDistance = 0)
     70       : CorrectionName(Name->getDeclName()), CorrectionNameSpec(NNS),
     71         CharDistance(CharDistance) {
     72     if (Name)
     73       CorrectionDecls.push_back(Name);
     74   }
     75 
     76   TypoCorrection(DeclarationName Name, NestedNameSpecifier *NNS = nullptr,
     77                  unsigned CharDistance = 0)
     78       : CorrectionName(Name), CorrectionNameSpec(NNS),
     79         CharDistance(CharDistance) {}
     80 
     81   TypoCorrection() = default;
     82 
     83   /// Gets the DeclarationName of the typo correction
     84   DeclarationName getCorrection() const { return CorrectionName; }
     85 
     86   IdentifierInfo *getCorrectionAsIdentifierInfo() const {
     87     return CorrectionName.getAsIdentifierInfo();
     88   }
     89 
     90   /// Gets the NestedNameSpecifier needed to use the typo correction
     91   NestedNameSpecifier *getCorrectionSpecifier() const {
     92     return CorrectionNameSpec;
     93   }
     94 
     95   void setCorrectionSpecifier(NestedNameSpecifier *NNS) {
     96     CorrectionNameSpec = NNS;
     97     ForceSpecifierReplacement = (NNS != nullptr);
     98   }
     99 
    100   void WillReplaceSpecifier(bool ForceReplacement) {
    101     ForceSpecifierReplacement = ForceReplacement;
    102   }
    103 
    104   bool WillReplaceSpecifier() const {
    105     return ForceSpecifierReplacement;
    106   }
    107 
    108   void setQualifierDistance(unsigned ED) {
    109     QualifierDistance = ED;
    110   }
    111 
    112   void setCallbackDistance(unsigned ED) {
    113     CallbackDistance = ED;
    114   }
    115 
    116   // Convert the given weighted edit distance to a roughly equivalent number of
    117   // single-character edits (typically for comparison to the length of the
    118   // string being edited).
    119   static unsigned NormalizeEditDistance(unsigned ED) {
    120     if (ED > MaximumDistance)
    121       return InvalidDistance;
    122     return (ED + CharDistanceWeight / 2) / CharDistanceWeight;
    123   }
    124 
    125   /// Gets the "edit distance" of the typo correction from the typo.
    126   /// If Normalized is true, scale the distance down by the CharDistanceWeight
    127   /// to return the edit distance in terms of single-character edits.
    128   unsigned getEditDistance(bool Normalized = true) const {
    129     if (CharDistance > MaximumDistance || QualifierDistance > MaximumDistance ||
    130         CallbackDistance > MaximumDistance)
    131       return InvalidDistance;
    132     unsigned ED =
    133         CharDistance * CharDistanceWeight +
    134         QualifierDistance * QualifierDistanceWeight +
    135         CallbackDistance * CallbackDistanceWeight;
    136     if (ED > MaximumDistance)
    137       return InvalidDistance;
    138     // Half the CharDistanceWeight is added to ED to simulate rounding since
    139     // integer division truncates the value (i.e. round-to-nearest-int instead
    140     // of round-to-zero).
    141     return Normalized ? NormalizeEditDistance(ED) : ED;
    142   }
    143 
    144   /// Get the correction declaration found by name lookup (before we
    145   /// looked through using shadow declarations and the like).
    146   NamedDecl *getFoundDecl() const {
    147     return hasCorrectionDecl() ? *(CorrectionDecls.begin()) : nullptr;
    148   }
    149 
    150   /// Gets the pointer to the declaration of the typo correction
    151   NamedDecl *getCorrectionDecl() const {
    152     auto *D = getFoundDecl();
    153     return D ? D->getUnderlyingDecl() : nullptr;
    154   }
    155   template <class DeclClass>
    156   DeclClass *getCorrectionDeclAs() const {
    157     return dyn_cast_or_null<DeclClass>(getCorrectionDecl());
    158   }
    159 
    160   /// Clears the list of NamedDecls.
    161   void ClearCorrectionDecls() {
    162     CorrectionDecls.clear();
    163   }
    164 
    165   /// Clears the list of NamedDecls before adding the new one.
    166   void setCorrectionDecl(NamedDecl *CDecl) {
    167     CorrectionDecls.clear();
    168     addCorrectionDecl(CDecl);
    169   }
    170 
    171   /// Clears the list of NamedDecls and adds the given set.
    172   void setCorrectionDecls(ArrayRef<NamedDecl*> Decls) {
    173     CorrectionDecls.clear();
    174     CorrectionDecls.insert(CorrectionDecls.begin(), Decls.begin(), Decls.end());
    175   }
    176 
    177   /// Add the given NamedDecl to the list of NamedDecls that are the
    178   /// declarations associated with the DeclarationName of this TypoCorrection
    179   void addCorrectionDecl(NamedDecl *CDecl);
    180 
    181   std::string getAsString(const LangOptions &LO) const;
    182 
    183   std::string getQuoted(const LangOptions &LO) const {
    184     return "'" + getAsString(LO) + "'";
    185   }
    186 
    187   /// Returns whether this TypoCorrection has a non-empty DeclarationName
    188   explicit operator bool() const { return bool(CorrectionName); }
    189 
    190   /// Mark this TypoCorrection as being a keyword.
    191   /// Since addCorrectionDeclsand setCorrectionDecl don't allow NULL to be
    192   /// added to the list of the correction's NamedDecl pointers, NULL is added
    193   /// as the only element in the list to mark this TypoCorrection as a keyword.
    194   void makeKeyword() {
    195     CorrectionDecls.clear();
    196     CorrectionDecls.push_back(nullptr);
    197     ForceSpecifierReplacement = true;
    198   }
    199 
    200   // Check if this TypoCorrection is a keyword by checking if the first
    201   // item in CorrectionDecls is NULL.
    202   bool isKeyword() const {
    203     return !CorrectionDecls.empty() && CorrectionDecls.front() == nullptr;
    204   }
    205 
    206   // Check if this TypoCorrection is the given keyword.
    207   template<std::size_t StrLen>
    208   bool isKeyword(const char (&Str)[StrLen]) const {
    209     return isKeyword() && getCorrectionAsIdentifierInfo()->isStr(Str);
    210   }
    211 
    212   // Returns true if the correction either is a keyword or has a known decl.
    213   bool isResolved() const { return !CorrectionDecls.empty(); }
    214 
    215   bool isOverloaded() const {
    216     return CorrectionDecls.size() > 1;
    217   }
    218 
    219   void setCorrectionRange(CXXScopeSpec *SS,
    220                           const DeclarationNameInfo &TypoName) {
    221     CorrectionRange = TypoName.getSourceRange();
    222     if (ForceSpecifierReplacement && SS && !SS->isEmpty())
    223       CorrectionRange.setBegin(SS->getBeginLoc());
    224   }
    225 
    226   SourceRange getCorrectionRange() const {
    227     return CorrectionRange;
    228   }
    229 
    230   using decl_iterator = SmallVectorImpl<NamedDecl *>::iterator;
    231 
    232   decl_iterator begin() {
    233     return isKeyword() ? CorrectionDecls.end() : CorrectionDecls.begin();
    234   }
    235 
    236   decl_iterator end() { return CorrectionDecls.end(); }
    237 
    238   using const_decl_iterator = SmallVectorImpl<NamedDecl *>::const_iterator;
    239 
    240   const_decl_iterator begin() const {
    241     return isKeyword() ? CorrectionDecls.end() : CorrectionDecls.begin();
    242   }
    243 
    244   const_decl_iterator end() const { return CorrectionDecls.end(); }
    245 
    246   /// Returns whether this typo correction is correcting to a
    247   /// declaration that was declared in a module that has not been imported.
    248   bool requiresImport() const { return RequiresImport; }
    249   void setRequiresImport(bool Req) { RequiresImport = Req; }
    250 
    251   /// Extra diagnostics are printed after the first diagnostic for the typo.
    252   /// This can be used to attach external notes to the diag.
    253   void addExtraDiagnostic(PartialDiagnostic PD) {
    254     ExtraDiagnostics.push_back(std::move(PD));
    255   }
    256   ArrayRef<PartialDiagnostic> getExtraDiagnostics() const {
    257     return ExtraDiagnostics;
    258   }
    259 
    260 private:
    261   bool hasCorrectionDecl() const {
    262     return (!isKeyword() && !CorrectionDecls.empty());
    263   }
    264 
    265   // Results.
    266   DeclarationName CorrectionName;
    267   NestedNameSpecifier *CorrectionNameSpec = nullptr;
    268   SmallVector<NamedDecl *, 1> CorrectionDecls;
    269   unsigned CharDistance = 0;
    270   unsigned QualifierDistance = 0;
    271   unsigned CallbackDistance = 0;
    272   SourceRange CorrectionRange;
    273   bool ForceSpecifierReplacement = false;
    274   bool RequiresImport = false;
    275 
    276   std::vector<PartialDiagnostic> ExtraDiagnostics;
    277 };
    278 
    279 /// Base class for callback objects used by Sema::CorrectTypo to check
    280 /// the validity of a potential typo correction.
    281 class CorrectionCandidateCallback {
    282 public:
    283   static const unsigned InvalidDistance = TypoCorrection::InvalidDistance;
    284 
    285   explicit CorrectionCandidateCallback(IdentifierInfo *Typo = nullptr,
    286                                        NestedNameSpecifier *TypoNNS = nullptr)
    287       : Typo(Typo), TypoNNS(TypoNNS) {}
    288 
    289   virtual ~CorrectionCandidateCallback() = default;
    290 
    291   /// Simple predicate used by the default RankCandidate to
    292   /// determine whether to return an edit distance of 0 or InvalidDistance.
    293   /// This can be overridden by validators that only need to determine if a
    294   /// candidate is viable, without ranking potentially viable candidates.
    295   /// Only ValidateCandidate or RankCandidate need to be overridden by a
    296   /// callback wishing to check the viability of correction candidates.
    297   /// The default predicate always returns true if the candidate is not a type
    298   /// name or keyword, true for types if WantTypeSpecifiers is true, and true
    299   /// for keywords if WantTypeSpecifiers, WantExpressionKeywords,
    300   /// WantCXXNamedCasts, WantRemainingKeywords, or WantObjCSuper is true.
    301   virtual bool ValidateCandidate(const TypoCorrection &candidate);
    302 
    303   /// Method used by Sema::CorrectTypo to assign an "edit distance" rank
    304   /// to a candidate (where a lower value represents a better candidate), or
    305   /// returning InvalidDistance if the candidate is not at all viable. For
    306   /// validation callbacks that only need to determine if a candidate is viable,
    307   /// the default RankCandidate returns either 0 or InvalidDistance depending
    308   /// whether ValidateCandidate returns true or false.
    309   virtual unsigned RankCandidate(const TypoCorrection &candidate) {
    310     return (!MatchesTypo(candidate) && ValidateCandidate(candidate))
    311                ? 0
    312                : InvalidDistance;
    313   }
    314 
    315   /// Clone this CorrectionCandidateCallback. CorrectionCandidateCallbacks are
    316   /// initially stack-allocated. However in case where delayed typo-correction
    317   /// is done we need to move the callback to storage with a longer lifetime.
    318   /// Every class deriving from CorrectionCandidateCallback must implement
    319   /// this method.
    320   virtual std::unique_ptr<CorrectionCandidateCallback> clone() = 0;
    321 
    322   void setTypoName(IdentifierInfo *II) { Typo = II; }
    323   void setTypoNNS(NestedNameSpecifier *NNS) { TypoNNS = NNS; }
    324 
    325   // Flags for context-dependent keywords. WantFunctionLikeCasts is only
    326   // used/meaningful when WantCXXNamedCasts is false.
    327   // TODO: Expand these to apply to non-keywords or possibly remove them.
    328   bool WantTypeSpecifiers = true;
    329   bool WantExpressionKeywords = true;
    330   bool WantCXXNamedCasts = true;
    331   bool WantFunctionLikeCasts = true;
    332   bool WantRemainingKeywords = true;
    333   bool WantObjCSuper = false;
    334   // Temporary hack for the one case where a CorrectTypoContext enum is used
    335   // when looking up results.
    336   bool IsObjCIvarLookup = false;
    337   bool IsAddressOfOperand = false;
    338 
    339 protected:
    340   bool MatchesTypo(const TypoCorrection &candidate) {
    341     return Typo && candidate.isResolved() && !candidate.requiresImport() &&
    342            candidate.getCorrectionAsIdentifierInfo() == Typo &&
    343            // FIXME: This probably does not return true when both
    344            // NestedNameSpecifiers have the same textual representation.
    345            candidate.getCorrectionSpecifier() == TypoNNS;
    346   }
    347 
    348   IdentifierInfo *Typo;
    349   NestedNameSpecifier *TypoNNS;
    350 };
    351 
    352 class DefaultFilterCCC final : public CorrectionCandidateCallback {
    353 public:
    354   explicit DefaultFilterCCC(IdentifierInfo *Typo = nullptr,
    355                             NestedNameSpecifier *TypoNNS = nullptr)
    356       : CorrectionCandidateCallback(Typo, TypoNNS) {}
    357 
    358   std::unique_ptr<CorrectionCandidateCallback> clone() override {
    359     return std::make_unique<DefaultFilterCCC>(*this);
    360   }
    361 };
    362 
    363 /// Simple template class for restricting typo correction candidates
    364 /// to ones having a single Decl* of the given type.
    365 template <class C>
    366 class DeclFilterCCC final : public CorrectionCandidateCallback {
    367 public:
    368   bool ValidateCandidate(const TypoCorrection &candidate) override {
    369     return candidate.getCorrectionDeclAs<C>();
    370   }
    371   std::unique_ptr<CorrectionCandidateCallback> clone() override {
    372     return std::make_unique<DeclFilterCCC>(*this);
    373   }
    374 };
    375 
    376 // Callback class to limit the allowed keywords and to only accept typo
    377 // corrections that are keywords or whose decls refer to functions (or template
    378 // functions) that accept the given number of arguments.
    379 class FunctionCallFilterCCC : public CorrectionCandidateCallback {
    380 public:
    381   FunctionCallFilterCCC(Sema &SemaRef, unsigned NumArgs,
    382                         bool HasExplicitTemplateArgs,
    383                         MemberExpr *ME = nullptr);
    384 
    385   bool ValidateCandidate(const TypoCorrection &candidate) override;
    386   std::unique_ptr<CorrectionCandidateCallback> clone() override {
    387     return std::make_unique<FunctionCallFilterCCC>(*this);
    388   }
    389 
    390 private:
    391   unsigned NumArgs;
    392   bool HasExplicitTemplateArgs;
    393   DeclContext *CurContext;
    394   MemberExpr *MemberFn;
    395 };
    396 
    397 // Callback class that effectively disabled typo correction
    398 class NoTypoCorrectionCCC final : public CorrectionCandidateCallback {
    399 public:
    400   NoTypoCorrectionCCC() {
    401     WantTypeSpecifiers = false;
    402     WantExpressionKeywords = false;
    403     WantCXXNamedCasts = false;
    404     WantFunctionLikeCasts = false;
    405     WantRemainingKeywords = false;
    406   }
    407 
    408   bool ValidateCandidate(const TypoCorrection &candidate) override {
    409     return false;
    410   }
    411   std::unique_ptr<CorrectionCandidateCallback> clone() override {
    412     return std::make_unique<NoTypoCorrectionCCC>(*this);
    413   }
    414 };
    415 
    416 } // namespace clang
    417 
    418 #endif // LLVM_CLANG_SEMA_TYPOCORRECTION_H
    419