Home | History | Annotate | Line # | Download | only in lib
      1 //===- FileAnalysis.h -------------------------------------------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 
      9 #ifndef LLVM_CFI_VERIFY_FILE_ANALYSIS_H
     10 #define LLVM_CFI_VERIFY_FILE_ANALYSIS_H
     11 
     12 #include "llvm/ADT/DenseMap.h"
     13 #include "llvm/ADT/SmallSet.h"
     14 #include "llvm/BinaryFormat/ELF.h"
     15 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
     16 #include "llvm/MC/MCAsmInfo.h"
     17 #include "llvm/MC/MCContext.h"
     18 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
     19 #include "llvm/MC/MCInst.h"
     20 #include "llvm/MC/MCInstPrinter.h"
     21 #include "llvm/MC/MCInstrAnalysis.h"
     22 #include "llvm/MC/MCInstrDesc.h"
     23 #include "llvm/MC/MCInstrInfo.h"
     24 #include "llvm/MC/MCObjectFileInfo.h"
     25 #include "llvm/MC/MCRegisterInfo.h"
     26 #include "llvm/MC/MCSubtargetInfo.h"
     27 #include "llvm/Object/Binary.h"
     28 #include "llvm/Object/COFF.h"
     29 #include "llvm/Object/ELFObjectFile.h"
     30 #include "llvm/Object/ObjectFile.h"
     31 #include "llvm/Support/Casting.h"
     32 #include "llvm/Support/CommandLine.h"
     33 #include "llvm/Support/Error.h"
     34 #include "llvm/Support/MemoryBuffer.h"
     35 #include "llvm/Support/TargetRegistry.h"
     36 #include "llvm/Support/TargetSelect.h"
     37 #include "llvm/Support/raw_ostream.h"
     38 
     39 #include <functional>
     40 #include <set>
     41 #include <string>
     42 #include <unordered_map>
     43 
     44 namespace llvm {
     45 namespace cfi_verify {
     46 
     47 struct GraphResult;
     48 
     49 extern bool IgnoreDWARFFlag;
     50 
     51 enum class CFIProtectionStatus {
     52   // This instruction is protected by CFI.
     53   PROTECTED,
     54   // The instruction is not an indirect control flow instruction, and thus
     55   // shouldn't be protected.
     56   FAIL_NOT_INDIRECT_CF,
     57   // There is a path to the instruction that was unexpected.
     58   FAIL_ORPHANS,
     59   // There is a path to the instruction from a conditional branch that does not
     60   // properly check the destination for this vcall/icall.
     61   FAIL_BAD_CONDITIONAL_BRANCH,
     62   // One of the operands of the indirect CF instruction is modified between the
     63   // CFI-check and execution.
     64   FAIL_REGISTER_CLOBBERED,
     65   // The instruction referenced does not exist. This normally indicates an
     66   // error in the program, where you try and validate a graph that was created
     67   // in a different FileAnalysis object.
     68   FAIL_INVALID_INSTRUCTION,
     69 };
     70 
     71 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status);
     72 
     73 // Disassembler and analysis tool for machine code files. Keeps track of non-
     74 // sequential control flows, including indirect control flow instructions.
     75 class FileAnalysis {
     76 public:
     77   // A metadata struct for an instruction.
     78   struct Instr {
     79     uint64_t VMAddress;       // Virtual memory address of this instruction.
     80     MCInst Instruction;       // Instruction.
     81     uint64_t InstructionSize; // Size of this instruction.
     82     bool Valid; // Is this a valid instruction? If false, Instr::Instruction is
     83                 // undefined.
     84   };
     85 
     86   // Construct a FileAnalysis from a file path.
     87   static Expected<FileAnalysis> Create(StringRef Filename);
     88 
     89   // Construct and take ownership of the supplied object. Do not use this
     90   // constructor, prefer to use FileAnalysis::Create instead.
     91   FileAnalysis(object::OwningBinary<object::Binary> Binary);
     92   FileAnalysis() = delete;
     93   FileAnalysis(const FileAnalysis &) = delete;
     94   FileAnalysis(FileAnalysis &&Other) = default;
     95 
     96   // Returns the instruction at the provided address. Returns nullptr if there
     97   // is no instruction at the provided address.
     98   const Instr *getInstruction(uint64_t Address) const;
     99 
    100   // Returns the instruction at the provided adress, dying if the instruction is
    101   // not found.
    102   const Instr &getInstructionOrDie(uint64_t Address) const;
    103 
    104   // Returns a pointer to the previous/next instruction in sequence,
    105   // respectively. Returns nullptr if the next/prev instruction doesn't exist,
    106   // or if the provided instruction doesn't exist.
    107   const Instr *getPrevInstructionSequential(const Instr &InstrMeta) const;
    108   const Instr *getNextInstructionSequential(const Instr &InstrMeta) const;
    109 
    110   // Returns whether this instruction is used by CFI to trap the program.
    111   bool isCFITrap(const Instr &InstrMeta) const;
    112 
    113   // Returns whether this instruction is a call to a function that will trap on
    114   // CFI violations (i.e., it serves as a trap in this instance).
    115   bool willTrapOnCFIViolation(const Instr &InstrMeta) const;
    116 
    117   // Returns whether this function can fall through to the next instruction.
    118   // Undefined (and bad) instructions cannot fall through, and instruction that
    119   // modify the control flow can only fall through if they are conditional
    120   // branches or calls.
    121   bool canFallThrough(const Instr &InstrMeta) const;
    122 
    123   // Returns the definitive next instruction. This is different from the next
    124   // instruction sequentially as it will follow unconditional branches (assuming
    125   // they can be resolved at compile time, i.e. not indirect). This method
    126   // returns nullptr if the provided instruction does not transfer control flow
    127   // to exactly one instruction that is known deterministically at compile time.
    128   // Also returns nullptr if the deterministic target does not exist in this
    129   // file.
    130   const Instr *getDefiniteNextInstruction(const Instr &InstrMeta) const;
    131 
    132   // Get a list of deterministic control flows that lead to the provided
    133   // instruction. This list includes all static control flow cross-references as
    134   // well as the previous instruction if it can fall through.
    135   std::set<const Instr *>
    136   getDirectControlFlowXRefs(const Instr &InstrMeta) const;
    137 
    138   // Returns whether this instruction uses a register operand.
    139   bool usesRegisterOperand(const Instr &InstrMeta) const;
    140 
    141   // Returns the list of indirect instructions.
    142   const std::set<object::SectionedAddress> &getIndirectInstructions() const;
    143 
    144   const MCRegisterInfo *getRegisterInfo() const;
    145   const MCInstrInfo *getMCInstrInfo() const;
    146   const MCInstrAnalysis *getMCInstrAnalysis() const;
    147 
    148   // Returns the inlining information for the provided address.
    149   Expected<DIInliningInfo>
    150   symbolizeInlinedCode(object::SectionedAddress Address);
    151 
    152   // Returns whether the provided Graph represents a protected indirect control
    153   // flow instruction in this file.
    154   CFIProtectionStatus validateCFIProtection(const GraphResult &Graph) const;
    155 
    156   // Returns the first place the operand register is clobbered between the CFI-
    157   // check and the indirect CF instruction execution. We do this by walking
    158   // backwards from the indirect CF and ensuring there is at most one load
    159   // involving the operand register (which is the indirect CF itself on x86).
    160   // If the register is not modified, returns the address of the indirect CF
    161   // instruction. The result is undefined if the provided graph does not fall
    162   // under either the FAIL_REGISTER_CLOBBERED or PROTECTED status (see
    163   // CFIProtectionStatus).
    164   uint64_t indirectCFOperandClobber(const GraphResult& Graph) const;
    165 
    166   // Prints an instruction to the provided stream using this object's pretty-
    167   // printers.
    168   void printInstruction(const Instr &InstrMeta, raw_ostream &OS) const;
    169 
    170 protected:
    171   // Construct a blank object with the provided triple and features. Used in
    172   // testing, where a sub class will dependency inject protected methods to
    173   // allow analysis of raw binary, without requiring a fully valid ELF file.
    174   FileAnalysis(const Triple &ObjectTriple, const SubtargetFeatures &Features);
    175 
    176   // Add an instruction to this object.
    177   void addInstruction(const Instr &Instruction);
    178 
    179   // Disassemble and parse the provided bytes into this object. Instruction
    180   // address calculation is done relative to the provided SectionAddress.
    181   void parseSectionContents(ArrayRef<uint8_t> SectionBytes,
    182                             object::SectionedAddress Address);
    183 
    184   // Constructs and initialises members required for disassembly.
    185   Error initialiseDisassemblyMembers();
    186 
    187   // Parses code sections from the internal object file. Saves them into the
    188   // internal members. Should only be called once by Create().
    189   Error parseCodeSections();
    190 
    191   // Parses the symbol table to look for the addresses of functions that will
    192   // trap on CFI violations.
    193   Error parseSymbolTable();
    194 
    195 private:
    196   // Members that describe the input file.
    197   object::OwningBinary<object::Binary> Binary;
    198   const object::ObjectFile *Object = nullptr;
    199   Triple ObjectTriple;
    200   std::string ArchName;
    201   std::string MCPU;
    202   const Target *ObjectTarget = nullptr;
    203   SubtargetFeatures Features;
    204 
    205   // Members required for disassembly.
    206   std::unique_ptr<const MCRegisterInfo> RegisterInfo;
    207   std::unique_ptr<const MCAsmInfo> AsmInfo;
    208   std::unique_ptr<MCSubtargetInfo> SubtargetInfo;
    209   std::unique_ptr<const MCInstrInfo> MII;
    210   std::unique_ptr<MCContext> Context;
    211   std::unique_ptr<const MCDisassembler> Disassembler;
    212   std::unique_ptr<const MCInstrAnalysis> MIA;
    213   std::unique_ptr<MCInstPrinter> Printer;
    214 
    215   // Symbolizer used for debug information parsing.
    216   std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
    217 
    218   // A mapping between the virtual memory address to the instruction metadata
    219   // struct. TODO(hctim): Reimplement this as a sorted vector to avoid per-
    220   // insertion allocation.
    221   std::map<uint64_t, Instr> Instructions;
    222 
    223   // Contains a mapping between a specific address, and a list of instructions
    224   // that use this address as a branch target (including call instructions).
    225   DenseMap<uint64_t, std::vector<uint64_t>> StaticBranchTargetings;
    226 
    227   // A list of addresses of indirect control flow instructions.
    228   std::set<object::SectionedAddress> IndirectInstructions;
    229 
    230   // The addresses of functions that will trap on CFI violations.
    231   SmallSet<uint64_t, 4> TrapOnFailFunctionAddresses;
    232 };
    233 
    234 class UnsupportedDisassembly : public ErrorInfo<UnsupportedDisassembly> {
    235 public:
    236   static char ID;
    237   std::string Text;
    238 
    239   UnsupportedDisassembly(StringRef Text);
    240 
    241   void log(raw_ostream &OS) const override;
    242   std::error_code convertToErrorCode() const override;
    243 };
    244 
    245 } // namespace cfi_verify
    246 } // namespace llvm
    247 
    248 #endif // LLVM_CFI_VERIFY_FILE_ANALYSIS_H
    249