Home | History | Annotate | Line # | Download | only in MCDisassembler
      1 //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 
      9 #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
     10 #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
     11 
     12 #include "llvm/ADT/Optional.h"
     13 #include "llvm/ADT/StringRef.h"
     14 #include "llvm/BinaryFormat/XCOFF.h"
     15 #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
     16 #include <cstdint>
     17 #include <memory>
     18 #include <vector>
     19 
     20 namespace llvm {
     21 
     22 struct XCOFFSymbolInfo {
     23   Optional<XCOFF::StorageMappingClass> StorageMappingClass;
     24   Optional<uint32_t> Index;
     25   bool IsLabel;
     26   XCOFFSymbolInfo(Optional<XCOFF::StorageMappingClass> Smc,
     27                   Optional<uint32_t> Idx, bool Label)
     28       : StorageMappingClass(Smc), Index(Idx), IsLabel(Label) {}
     29 
     30   bool operator<(const XCOFFSymbolInfo &SymInfo) const;
     31 };
     32 
     33 struct SymbolInfoTy {
     34   uint64_t Addr;
     35   StringRef Name;
     36   union {
     37     uint8_t Type;
     38     XCOFFSymbolInfo XCOFFSymInfo;
     39   };
     40 
     41 private:
     42   bool IsXCOFF;
     43 
     44 public:
     45   SymbolInfoTy(uint64_t Addr, StringRef Name,
     46                Optional<XCOFF::StorageMappingClass> Smc, Optional<uint32_t> Idx,
     47                bool Label)
     48       : Addr(Addr), Name(Name), XCOFFSymInfo(Smc, Idx, Label), IsXCOFF(true) {}
     49   SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type)
     50       : Addr(Addr), Name(Name), Type(Type), IsXCOFF(false) {}
     51   bool isXCOFF() const { return IsXCOFF; }
     52 
     53 private:
     54   friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
     55     assert(P1.IsXCOFF == P2.IsXCOFF &&
     56            "P1.IsXCOFF should be equal to P2.IsXCOFF.");
     57     if (P1.IsXCOFF)
     58       return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
     59              std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
     60 
     61     return std::tie(P1.Addr, P1.Name, P1.Type) <
     62              std::tie(P2.Addr, P2.Name, P2.Type);
     63   }
     64 };
     65 
     66 using SectionSymbolsTy = std::vector<SymbolInfoTy>;
     67 
     68 template <typename T> class ArrayRef;
     69 class MCContext;
     70 class MCInst;
     71 class MCSubtargetInfo;
     72 class raw_ostream;
     73 
     74 /// Superclass for all disassemblers. Consumes a memory region and provides an
     75 /// array of assembly instructions.
     76 class MCDisassembler {
     77 public:
     78   /// Ternary decode status. Most backends will just use Fail and
     79   /// Success, however some have a concept of an instruction with
     80   /// understandable semantics but which is architecturally
     81   /// incorrect. An example of this is ARM UNPREDICTABLE instructions
     82   /// which are disassemblable but cause undefined behaviour.
     83   ///
     84   /// Because it makes sense to disassemble these instructions, there
     85   /// is a "soft fail" failure mode that indicates the MCInst& is
     86   /// valid but architecturally incorrect.
     87   ///
     88   /// The enum numbers are deliberately chosen such that reduction
     89   /// from Success->SoftFail ->Fail can be done with a simple
     90   /// bitwise-AND:
     91   ///
     92   ///   LEFT & TOP =  | Success       Unpredictable   Fail
     93   ///   --------------+-----------------------------------
     94   ///   Success       | Success       Unpredictable   Fail
     95   ///   Unpredictable | Unpredictable Unpredictable   Fail
     96   ///   Fail          | Fail          Fail            Fail
     97   ///
     98   /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
     99   /// Success, SoftFail, Fail respectively.
    100   enum DecodeStatus {
    101     Fail = 0,
    102     SoftFail = 1,
    103     Success = 3
    104   };
    105 
    106   MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
    107     : Ctx(Ctx), STI(STI) {}
    108 
    109   virtual ~MCDisassembler();
    110 
    111   /// Returns the disassembly of a single instruction.
    112   ///
    113   /// \param Instr    - An MCInst to populate with the contents of the
    114   ///                   instruction.
    115   /// \param Size     - A value to populate with the size of the instruction, or
    116   ///                   the number of bytes consumed while attempting to decode
    117   ///                   an invalid instruction.
    118   /// \param Address  - The address, in the memory space of region, of the first
    119   ///                   byte of the instruction.
    120   /// \param Bytes    - A reference to the actual bytes of the instruction.
    121   /// \param CStream  - The stream to print comments and annotations on.
    122   /// \return         - MCDisassembler::Success if the instruction is valid,
    123   ///                   MCDisassembler::SoftFail if the instruction was
    124   ///                                            disassemblable but invalid,
    125   ///                   MCDisassembler::Fail if the instruction was invalid.
    126   virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
    127                                       ArrayRef<uint8_t> Bytes, uint64_t Address,
    128                                       raw_ostream &CStream) const = 0;
    129 
    130   /// Used to perform separate target specific disassembly for a particular
    131   /// symbol. May parse any prelude that precedes instructions after the
    132   /// start of a symbol, or the entire symbol.
    133   /// This is used for example by WebAssembly to decode preludes.
    134   ///
    135   /// Base implementation returns None. So all targets by default ignore to
    136   /// treat symbols separately.
    137   ///
    138   /// \param Symbol   - The symbol.
    139   /// \param Size     - The number of bytes consumed.
    140   /// \param Address  - The address, in the memory space of region, of the first
    141   ///                   byte of the symbol.
    142   /// \param Bytes    - A reference to the actual bytes at the symbol location.
    143   /// \param CStream  - The stream to print comments and annotations on.
    144   /// \return         - MCDisassembler::Success if bytes are decoded
    145   ///                   successfully. Size must hold the number of bytes that
    146   ///                   were decoded.
    147   ///                 - MCDisassembler::Fail if the bytes are invalid. Size
    148   ///                   must hold the number of bytes that were decoded before
    149   ///                   failing. The target must print nothing. This can be
    150   ///                   done by buffering the output if needed.
    151   ///                 - None if the target doesn't want to handle the symbol
    152   ///                   separately. Value of Size is ignored in this case.
    153   virtual Optional<DecodeStatus>
    154   onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
    155                 uint64_t Address, raw_ostream &CStream) const;
    156   // TODO:
    157   // Implement similar hooks that can be used at other points during
    158   // disassembly. Something along the following lines:
    159   // - onBeforeInstructionDecode()
    160   // - onAfterInstructionDecode()
    161   // - onSymbolEnd()
    162   // It should help move much of the target specific code from llvm-objdump to
    163   // respective target disassemblers.
    164 
    165 private:
    166   MCContext &Ctx;
    167 
    168 protected:
    169   // Subtarget information, for instruction decoding predicates if required.
    170   const MCSubtargetInfo &STI;
    171   std::unique_ptr<MCSymbolizer> Symbolizer;
    172 
    173 public:
    174   // Helpers around MCSymbolizer
    175   bool tryAddingSymbolicOperand(MCInst &Inst,
    176                                 int64_t Value,
    177                                 uint64_t Address, bool IsBranch,
    178                                 uint64_t Offset, uint64_t InstSize) const;
    179 
    180   void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
    181 
    182   /// Set \p Symzer as the current symbolizer.
    183   /// This takes ownership of \p Symzer, and deletes the previously set one.
    184   void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
    185 
    186   MCContext& getContext() const { return Ctx; }
    187 
    188   const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
    189 
    190   // Marked mutable because we cache it inside the disassembler, rather than
    191   // having to pass it around as an argument through all the autogenerated code.
    192   mutable raw_ostream *CommentStream = nullptr;
    193 };
    194 
    195 } // end namespace llvm
    196 
    197 #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
    198