Home | History | Annotate | Line # | Download | only in MCParser
      1 //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 
      9 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
     10 #define LLVM_MC_MCPARSER_MCASMLEXER_H
     11 
     12 #include "llvm/ADT/ArrayRef.h"
     13 #include "llvm/ADT/SmallVector.h"
     14 #include "llvm/MC/MCAsmMacro.h"
     15 #include <algorithm>
     16 #include <cassert>
     17 #include <cstddef>
     18 #include <cstdint>
     19 #include <string>
     20 
     21 namespace llvm {
     22 
     23 /// A callback class which is notified of each comment in an assembly file as
     24 /// it is lexed.
     25 class AsmCommentConsumer {
     26 public:
     27   virtual ~AsmCommentConsumer() = default;
     28 
     29   /// Callback function for when a comment is lexed. Loc is the start of the
     30   /// comment text (excluding the comment-start marker). CommentText is the text
     31   /// of the comment, excluding the comment start and end markers, and the
     32   /// newline for single-line comments.
     33   virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
     34 };
     35 
     36 
     37 /// Generic assembler lexer interface, for use by target specific assembly
     38 /// lexers.
     39 class MCAsmLexer {
     40   /// The current token, stored in the base class for faster access.
     41   SmallVector<AsmToken, 1> CurTok;
     42 
     43   /// The location and description of the current error
     44   SMLoc ErrLoc;
     45   std::string Err;
     46 
     47 protected: // Can only create subclasses.
     48   const char *TokStart = nullptr;
     49   bool SkipSpace = true;
     50   bool AllowAtInIdentifier;
     51   bool AllowHashInIdentifier = false;
     52   bool IsAtStartOfStatement = true;
     53   bool LexMasmHexFloats = false;
     54   bool LexMasmIntegers = false;
     55   bool LexMasmStrings = false;
     56   bool LexMotorolaIntegers = false;
     57   bool UseMasmDefaultRadix = false;
     58   unsigned DefaultRadix = 10;
     59   bool LexHLASMIntegers = false;
     60   bool LexHLASMStrings = false;
     61   AsmCommentConsumer *CommentConsumer = nullptr;
     62 
     63   MCAsmLexer();
     64 
     65   virtual AsmToken LexToken() = 0;
     66 
     67   void SetError(SMLoc errLoc, const std::string &err) {
     68     ErrLoc = errLoc;
     69     Err = err;
     70   }
     71 
     72 public:
     73   MCAsmLexer(const MCAsmLexer &) = delete;
     74   MCAsmLexer &operator=(const MCAsmLexer &) = delete;
     75   virtual ~MCAsmLexer();
     76 
     77   /// Consume the next token from the input stream and return it.
     78   ///
     79   /// The lexer will continuously return the end-of-file token once the end of
     80   /// the main input file has been reached.
     81   const AsmToken &Lex() {
     82     assert(!CurTok.empty());
     83     // Mark if we parsing out a EndOfStatement.
     84     IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
     85     CurTok.erase(CurTok.begin());
     86     // LexToken may generate multiple tokens via UnLex but will always return
     87     // the first one. Place returned value at head of CurTok vector.
     88     if (CurTok.empty()) {
     89       AsmToken T = LexToken();
     90       CurTok.insert(CurTok.begin(), T);
     91     }
     92     return CurTok.front();
     93   }
     94 
     95   void UnLex(AsmToken const &Token) {
     96     IsAtStartOfStatement = false;
     97     CurTok.insert(CurTok.begin(), Token);
     98   }
     99 
    100   bool isAtStartOfStatement() { return IsAtStartOfStatement; }
    101 
    102   virtual StringRef LexUntilEndOfStatement() = 0;
    103 
    104   /// Get the current source location.
    105   SMLoc getLoc() const;
    106 
    107   /// Get the current (last) lexed token.
    108   const AsmToken &getTok() const {
    109     return CurTok[0];
    110   }
    111 
    112   /// Look ahead at the next token to be lexed.
    113   const AsmToken peekTok(bool ShouldSkipSpace = true) {
    114     AsmToken Tok;
    115 
    116     MutableArrayRef<AsmToken> Buf(Tok);
    117     size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
    118 
    119     assert(ReadCount == 1);
    120     (void)ReadCount;
    121 
    122     return Tok;
    123   }
    124 
    125   /// Look ahead an arbitrary number of tokens.
    126   virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
    127                             bool ShouldSkipSpace = true) = 0;
    128 
    129   /// Get the current error location
    130   SMLoc getErrLoc() {
    131     return ErrLoc;
    132   }
    133 
    134   /// Get the current error string
    135   const std::string &getErr() {
    136     return Err;
    137   }
    138 
    139   /// Get the kind of current token.
    140   AsmToken::TokenKind getKind() const { return getTok().getKind(); }
    141 
    142   /// Check if the current token has kind \p K.
    143   bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
    144 
    145   /// Check if the current token has kind \p K.
    146   bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
    147 
    148   /// Set whether spaces should be ignored by the lexer
    149   void setSkipSpace(bool val) { SkipSpace = val; }
    150 
    151   bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
    152   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
    153 
    154   void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; }
    155 
    156   void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
    157     this->CommentConsumer = CommentConsumer;
    158   }
    159 
    160   /// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified
    161   /// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]).
    162   void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
    163 
    164   /// Set whether to use masm-style default-radix integer literals. If disabled,
    165   /// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]).
    166   void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; }
    167 
    168   unsigned getMasmDefaultRadix() const { return DefaultRadix; }
    169   void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; }
    170 
    171   /// Set whether to lex masm-style hex float literals, such as 3f800000r.
    172   void setLexMasmHexFloats(bool V) { LexMasmHexFloats = V; }
    173 
    174   /// Set whether to lex masm-style string literals, such as 'Can''t find file'
    175   /// and "This ""value"" not found".
    176   void setLexMasmStrings(bool V) { LexMasmStrings = V; }
    177 
    178   /// Set whether to lex Motorola-style integer literals, such as $deadbeef or
    179   /// %01010110.
    180   void setLexMotorolaIntegers(bool V) { LexMotorolaIntegers = V; }
    181 
    182   /// Set whether to lex HLASM-flavour integers. For now this is only [0-9]*
    183   void setLexHLASMIntegers(bool V) { LexHLASMIntegers = V; }
    184 
    185   /// Set whether to "lex" HLASM-flavour character and string literals. For now,
    186   /// setting this option to true, will disable lexing for character and string
    187   /// literals.
    188   void setLexHLASMStrings(bool V) { LexHLASMStrings = V; }
    189 };
    190 
    191 } // end namespace llvm
    192 
    193 #endif // LLVM_MC_MCPARSER_MCASMLEXER_H
    194