Home | History | Annotate | Line # | Download | only in MCParser
      1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 
      9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
     10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
     11 
     12 #include "llvm/ADT/StringRef.h"
     13 #include "llvm/MC/MCExpr.h"
     14 #include "llvm/MC/MCInstrInfo.h"
     15 #include "llvm/MC/MCParser/MCAsmLexer.h"
     16 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
     17 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
     18 #include "llvm/MC/MCTargetOptions.h"
     19 #include "llvm/MC/SubtargetFeature.h"
     20 #include "llvm/Support/SMLoc.h"
     21 #include <cstdint>
     22 #include <memory>
     23 
     24 namespace llvm {
     25 
     26 class MCInst;
     27 class MCStreamer;
     28 class MCSubtargetInfo;
     29 template <typename T> class SmallVectorImpl;
     30 
     31 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
     32 
     33 enum AsmRewriteKind {
     34   AOK_Align,          // Rewrite align as .align.
     35   AOK_EVEN,           // Rewrite even as .even.
     36   AOK_Emit,           // Rewrite _emit as .byte.
     37   AOK_CallInput,      // Rewrite in terms of ${N:P}.
     38   AOK_Input,          // Rewrite in terms of $N.
     39   AOK_Output,         // Rewrite in terms of $N.
     40   AOK_SizeDirective,  // Add a sizing directive (e.g., dword ptr).
     41   AOK_Label,          // Rewrite local labels.
     42   AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
     43   AOK_Skip,           // Skip emission (e.g., offset/type operators).
     44   AOK_IntelExpr       // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
     45 };
     46 
     47 const char AsmRewritePrecedence [] = {
     48   2, // AOK_Align
     49   2, // AOK_EVEN
     50   2, // AOK_Emit
     51   3, // AOK_Input
     52   3, // AOK_CallInput
     53   3, // AOK_Output
     54   5, // AOK_SizeDirective
     55   1, // AOK_Label
     56   5, // AOK_EndOfStatement
     57   2, // AOK_Skip
     58   2  // AOK_IntelExpr
     59 };
     60 
     61 // Represnt the various parts which makes up an intel expression,
     62 // used for emitting compound intel expressions
     63 struct IntelExpr {
     64   bool NeedBracs;
     65   int64_t Imm;
     66   StringRef BaseReg;
     67   StringRef IndexReg;
     68   StringRef OffsetName;
     69   unsigned Scale;
     70 
     71   IntelExpr()
     72       : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()),
     73         OffsetName(StringRef()), Scale(1) {}
     74   // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression]
     75   IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale,
     76             StringRef offsetName, int64_t imm, bool needBracs)
     77       : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg),
     78         OffsetName(offsetName), Scale(1) {
     79     if (scale)
     80       Scale = scale;
     81   }
     82   bool hasBaseReg() const { return !BaseReg.empty(); }
     83   bool hasIndexReg() const { return !IndexReg.empty(); }
     84   bool hasRegs() const { return hasBaseReg() || hasIndexReg(); }
     85   bool hasOffset() const { return !OffsetName.empty(); }
     86   // Normally we won't emit immediates unconditionally,
     87   // unless we've got no other components
     88   bool emitImm() const { return !(hasRegs() || hasOffset()); }
     89   bool isValid() const {
     90     return (Scale == 1) ||
     91            (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
     92   }
     93 };
     94 
     95 struct AsmRewrite {
     96   AsmRewriteKind Kind;
     97   SMLoc Loc;
     98   unsigned Len;
     99   bool Done;
    100   int64_t Val;
    101   StringRef Label;
    102   IntelExpr IntelExp;
    103 
    104 public:
    105   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
    106     : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {}
    107   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
    108     : AsmRewrite(kind, loc, len) { Label = label; }
    109   AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
    110     : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
    111 };
    112 
    113 struct ParseInstructionInfo {
    114   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
    115 
    116   ParseInstructionInfo() = default;
    117   ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
    118     : AsmRewrites(rewrites) {}
    119 };
    120 
    121 enum OperandMatchResultTy {
    122   MatchOperand_Success,  // operand matched successfully
    123   MatchOperand_NoMatch,  // operand did not match
    124   MatchOperand_ParseFail // operand matched but had errors
    125 };
    126 
    127 enum class DiagnosticPredicateTy {
    128   Match,
    129   NearMatch,
    130   NoMatch,
    131 };
    132 
    133 // When an operand is parsed, the assembler will try to iterate through a set of
    134 // possible operand classes that the operand might match and call the
    135 // corresponding PredicateMethod to determine that.
    136 //
    137 // If there are two AsmOperands that would give a specific diagnostic if there
    138 // is no match, there is currently no mechanism to distinguish which operand is
    139 // a closer match. The DiagnosticPredicate distinguishes between 'completely
    140 // no match' and 'near match', so the assembler can decide whether to give a
    141 // specific diagnostic, or use 'InvalidOperand' and continue to find a
    142 // 'better matching' diagnostic.
    143 //
    144 // For example:
    145 //    opcode opnd0, onpd1, opnd2
    146 //
    147 // where:
    148 //    opnd2 could be an 'immediate of range [-8, 7]'
    149 //    opnd2 could be a  'register + shift/extend'.
    150 //
    151 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
    152 // little sense to give a diagnostic that the operand should be an immediate
    153 // in range [-8, 7].
    154 //
    155 // This is a light-weight alternative to the 'NearMissInfo' approach
    156 // below which collects *all* possible diagnostics. This alternative
    157 // is optional and fully backward compatible with existing
    158 // PredicateMethods that return a 'bool' (match or no match).
    159 struct DiagnosticPredicate {
    160   DiagnosticPredicateTy Type;
    161 
    162   explicit DiagnosticPredicate(bool Match)
    163       : Type(Match ? DiagnosticPredicateTy::Match
    164                    : DiagnosticPredicateTy::NearMatch) {}
    165   DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
    166   DiagnosticPredicate(const DiagnosticPredicate &) = default;
    167   DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default;
    168 
    169   operator bool() const { return Type == DiagnosticPredicateTy::Match; }
    170   bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
    171   bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
    172   bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
    173 };
    174 
    175 // When matching of an assembly instruction fails, there may be multiple
    176 // encodings that are close to being a match. It's often ambiguous which one
    177 // the programmer intended to use, so we want to report an error which mentions
    178 // each of these "near-miss" encodings. This struct contains information about
    179 // one such encoding, and why it did not match the parsed instruction.
    180 class NearMissInfo {
    181 public:
    182   enum NearMissKind {
    183     NoNearMiss,
    184     NearMissOperand,
    185     NearMissFeature,
    186     NearMissPredicate,
    187     NearMissTooFewOperands,
    188   };
    189 
    190   // The encoding is valid for the parsed assembly string. This is only used
    191   // internally to the table-generated assembly matcher.
    192   static NearMissInfo getSuccess() { return NearMissInfo(); }
    193 
    194   // The instruction encoding is not valid because it requires some target
    195   // features that are not currently enabled. MissingFeatures has a bit set for
    196   // each feature that the encoding needs but which is not enabled.
    197   static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) {
    198     NearMissInfo Result;
    199     Result.Kind = NearMissFeature;
    200     Result.Features = MissingFeatures;
    201     return Result;
    202   }
    203 
    204   // The instruction encoding is not valid because the target-specific
    205   // predicate function returned an error code. FailureCode is the
    206   // target-specific error code returned by the predicate.
    207   static NearMissInfo getMissedPredicate(unsigned FailureCode) {
    208     NearMissInfo Result;
    209     Result.Kind = NearMissPredicate;
    210     Result.PredicateError = FailureCode;
    211     return Result;
    212   }
    213 
    214   // The instruction encoding is not valid because one (and only one) parsed
    215   // operand is not of the correct type. OperandError is the error code
    216   // relating to the operand class expected by the encoding. OperandClass is
    217   // the type of the expected operand. Opcode is the opcode of the encoding.
    218   // OperandIndex is the index into the parsed operand list.
    219   static NearMissInfo getMissedOperand(unsigned OperandError,
    220                                        unsigned OperandClass, unsigned Opcode,
    221                                        unsigned OperandIndex) {
    222     NearMissInfo Result;
    223     Result.Kind = NearMissOperand;
    224     Result.MissedOperand.Error = OperandError;
    225     Result.MissedOperand.Class = OperandClass;
    226     Result.MissedOperand.Opcode = Opcode;
    227     Result.MissedOperand.Index = OperandIndex;
    228     return Result;
    229   }
    230 
    231   // The instruction encoding is not valid because it expects more operands
    232   // than were parsed. OperandClass is the class of the expected operand that
    233   // was not provided. Opcode is the instruction encoding.
    234   static NearMissInfo getTooFewOperands(unsigned OperandClass,
    235                                         unsigned Opcode) {
    236     NearMissInfo Result;
    237     Result.Kind = NearMissTooFewOperands;
    238     Result.TooFewOperands.Class = OperandClass;
    239     Result.TooFewOperands.Opcode = Opcode;
    240     return Result;
    241   }
    242 
    243   operator bool() const { return Kind != NoNearMiss; }
    244 
    245   NearMissKind getKind() const { return Kind; }
    246 
    247   // Feature flags required by the instruction, that the current target does
    248   // not have.
    249   const FeatureBitset& getFeatures() const {
    250     assert(Kind == NearMissFeature);
    251     return Features;
    252   }
    253   // Error code returned by the target predicate when validating this
    254   // instruction encoding.
    255   unsigned getPredicateError() const {
    256     assert(Kind == NearMissPredicate);
    257     return PredicateError;
    258   }
    259   // MatchClassKind of the operand that we expected to see.
    260   unsigned getOperandClass() const {
    261     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
    262     return MissedOperand.Class;
    263   }
    264   // Opcode of the encoding we were trying to match.
    265   unsigned getOpcode() const {
    266     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
    267     return MissedOperand.Opcode;
    268   }
    269   // Error code returned when validating the operand.
    270   unsigned getOperandError() const {
    271     assert(Kind == NearMissOperand);
    272     return MissedOperand.Error;
    273   }
    274   // Index of the actual operand we were trying to match in the list of parsed
    275   // operands.
    276   unsigned getOperandIndex() const {
    277     assert(Kind == NearMissOperand);
    278     return MissedOperand.Index;
    279   }
    280 
    281 private:
    282   NearMissKind Kind;
    283 
    284   // These two structs share a common prefix, so we can safely rely on the fact
    285   // that they overlap in the union.
    286   struct MissedOpInfo {
    287     unsigned Class;
    288     unsigned Opcode;
    289     unsigned Error;
    290     unsigned Index;
    291   };
    292 
    293   struct TooFewOperandsInfo {
    294     unsigned Class;
    295     unsigned Opcode;
    296   };
    297 
    298   union {
    299     FeatureBitset Features;
    300     unsigned PredicateError;
    301     MissedOpInfo MissedOperand;
    302     TooFewOperandsInfo TooFewOperands;
    303   };
    304 
    305   NearMissInfo() : Kind(NoNearMiss) {}
    306 };
    307 
    308 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
    309 class MCTargetAsmParser : public MCAsmParserExtension {
    310 public:
    311   enum MatchResultTy {
    312     Match_InvalidOperand,
    313     Match_InvalidTiedOperand,
    314     Match_MissingFeature,
    315     Match_MnemonicFail,
    316     Match_Success,
    317     Match_NearMisses,
    318     FIRST_TARGET_MATCH_RESULT_TY
    319   };
    320 
    321 protected: // Can only create subclasses.
    322   MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
    323                     const MCInstrInfo &MII);
    324 
    325   /// Create a copy of STI and return a non-const reference to it.
    326   MCSubtargetInfo &copySTI();
    327 
    328   /// AvailableFeatures - The current set of available features.
    329   FeatureBitset AvailableFeatures;
    330 
    331   /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly?
    332   bool ParsingMSInlineAsm = false;
    333 
    334   /// SemaCallback - The Sema callback implementation.  Must be set when parsing
    335   /// ms-style inline assembly.
    336   MCAsmParserSemaCallback *SemaCallback = nullptr;
    337 
    338   /// Set of options which affects instrumentation of inline assembly.
    339   MCTargetOptions MCOptions;
    340 
    341   /// Current STI.
    342   const MCSubtargetInfo *STI;
    343 
    344   const MCInstrInfo &MII;
    345 
    346 public:
    347   MCTargetAsmParser(const MCTargetAsmParser &) = delete;
    348   MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
    349 
    350   ~MCTargetAsmParser() override;
    351 
    352   const MCSubtargetInfo &getSTI() const;
    353 
    354   const FeatureBitset& getAvailableFeatures() const {
    355     return AvailableFeatures;
    356   }
    357   void setAvailableFeatures(const FeatureBitset& Value) {
    358     AvailableFeatures = Value;
    359   }
    360 
    361   bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; }
    362   void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; }
    363 
    364   MCTargetOptions getTargetOptions() const { return MCOptions; }
    365 
    366   void setSemaCallback(MCAsmParserSemaCallback *Callback) {
    367     SemaCallback = Callback;
    368   }
    369 
    370   // Target-specific parsing of expression.
    371   virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
    372     return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
    373   }
    374 
    375   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
    376                              SMLoc &EndLoc) = 0;
    377 
    378   /// tryParseRegister - parse one register if possible
    379   ///
    380   /// Check whether a register specification can be parsed at the current
    381   /// location, without failing the entire parse if it can't. Must not consume
    382   /// tokens if the parse fails.
    383   virtual OperandMatchResultTy
    384   tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) = 0;
    385 
    386   /// ParseInstruction - Parse one assembly instruction.
    387   ///
    388   /// The parser is positioned following the instruction name. The target
    389   /// specific instruction parser should parse the entire instruction and
    390   /// construct the appropriate MCInst, or emit an error. On success, the entire
    391   /// line should be parsed up to and including the end-of-statement token. On
    392   /// failure, the parser is not required to read to the end of the line.
    393   //
    394   /// \param Name - The instruction name.
    395   /// \param NameLoc - The source location of the name.
    396   /// \param Operands [out] - The list of parsed operands, this returns
    397   ///        ownership of them to the caller.
    398   /// \return True on failure.
    399   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
    400                                 SMLoc NameLoc, OperandVector &Operands) = 0;
    401   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
    402                                 AsmToken Token, OperandVector &Operands) {
    403     return ParseInstruction(Info, Name, Token.getLoc(), Operands);
    404   }
    405 
    406   /// ParseDirective - Parse a target specific assembler directive
    407   ///
    408   /// The parser is positioned following the directive name.  The target
    409   /// specific directive parser should parse the entire directive doing or
    410   /// recording any target specific work, or return true and do nothing if the
    411   /// directive is not target specific. If the directive is specific for
    412   /// the target, the entire line is parsed up to and including the
    413   /// end-of-statement token and false is returned.
    414   ///
    415   /// \param DirectiveID - the identifier token of the directive.
    416   virtual bool ParseDirective(AsmToken DirectiveID) = 0;
    417 
    418   /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
    419   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
    420   /// This returns false on success and returns true on failure to match.
    421   ///
    422   /// On failure, the target parser is responsible for emitting a diagnostic
    423   /// explaining the match failure.
    424   virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
    425                                        OperandVector &Operands, MCStreamer &Out,
    426                                        uint64_t &ErrorInfo,
    427                                        bool MatchingInlineAsm) = 0;
    428 
    429   /// Allows targets to let registers opt out of clobber lists.
    430   virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
    431 
    432   /// Allow a target to add special case operand matching for things that
    433   /// tblgen doesn't/can't handle effectively. For example, literal
    434   /// immediates on ARM. TableGen expects a token operand, but the parser
    435   /// will recognize them as immediates.
    436   virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
    437                                               unsigned Kind) {
    438     return Match_InvalidOperand;
    439   }
    440 
    441   /// Validate the instruction match against any complex target predicates
    442   /// before rendering any operands to it.
    443   virtual unsigned
    444   checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
    445     return Match_Success;
    446   }
    447 
    448   /// checkTargetMatchPredicate - Validate the instruction match against
    449   /// any complex target predicates not expressible via match classes.
    450   virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
    451     return Match_Success;
    452   }
    453 
    454   virtual void convertToMapAndConstraints(unsigned Kind,
    455                                           const OperandVector &Operands) = 0;
    456 
    457   /// Returns whether two registers are equal and is used by the tied-operands
    458   /// checks in the AsmMatcher. This method can be overridden allow e.g. a
    459   /// sub- or super-register as the tied operand.
    460   virtual bool regsEqual(const MCParsedAsmOperand &Op1,
    461                          const MCParsedAsmOperand &Op2) const {
    462     assert(Op1.isReg() && Op2.isReg() && "Operands not all regs");
    463     return Op1.getReg() == Op2.getReg();
    464   }
    465 
    466   // Return whether this parser uses assignment statements with equals tokens
    467   virtual bool equalIsAsmAssignment() { return true; };
    468   // Return whether this start of statement identifier is a label
    469   virtual bool isLabel(AsmToken &Token) { return true; };
    470   // Return whether this parser accept star as start of statement
    471   virtual bool starIsStartOfStatement() { return false; };
    472 
    473   virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
    474                                             MCSymbolRefExpr::VariantKind,
    475                                             MCContext &Ctx) {
    476     return nullptr;
    477   }
    478 
    479   // For actions that have to be performed before a label is emitted
    480   virtual void doBeforeLabelEmit(MCSymbol *Symbol) {}
    481 
    482   virtual void onLabelParsed(MCSymbol *Symbol) {}
    483 
    484   /// Ensure that all previously parsed instructions have been emitted to the
    485   /// output streamer, if the target does not emit them immediately.
    486   virtual void flushPendingInstructions(MCStreamer &Out) {}
    487 
    488   virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
    489                                               AsmToken::TokenKind OperatorToken,
    490                                               MCContext &Ctx) {
    491     return nullptr;
    492   }
    493 
    494   // For any initialization at the beginning of parsing.
    495   virtual void onBeginOfFile() {}
    496 
    497   // For any checks or cleanups at the end of parsing.
    498   virtual void onEndOfFile() {}
    499 };
    500 
    501 } // end namespace llvm
    502 
    503 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
    504