Home | History | Annotate | Line # | Download | only in Instrumentation
      1 //===- PoisonChecking.cpp - -----------------------------------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // Implements a transform pass which instruments IR such that poison semantics
     10 // are made explicit.  That is, it provides a (possibly partial) executable
     11 // semantics for every instruction w.r.t. poison as specified in the LLVM
     12 // LangRef.  There are obvious parallels to the sanitizer tools, but this pass
     13 // is focused purely on the semantics of LLVM IR, not any particular source
     14 // language.   If you're looking for something to see if your C/C++ contains
     15 // UB, this is not it.
     16 //
     17 // The rewritten semantics of each instruction will include the following
     18 // components:
     19 //
     20 // 1) The original instruction, unmodified.
     21 // 2) A propagation rule which translates dynamic information about the poison
     22 //    state of each input to whether the dynamic output of the instruction
     23 //    produces poison.
     24 // 3) A creation rule which validates any poison producing flags on the
     25 //    instruction itself (e.g. checks for overflow on nsw).
     26 // 4) A check rule which traps (to a handler function) if this instruction must
     27 //    execute undefined behavior given the poison state of it's inputs.
     28 //
     29 // This is a must analysis based transform; that is, the resulting code may
     30 // produce a false negative result (not report UB when actually exists
     31 // according to the LangRef spec), but should never produce a false positive
     32 // (report UB where it doesn't exist).
     33 //
     34 // Use cases for this pass include:
     35 // - Understanding (and testing!) the implications of the definition of poison
     36 //   from the LangRef.
     37 // - Validating the output of a IR fuzzer to ensure that all programs produced
     38 //   are well defined on the specific input used.
     39 // - Finding/confirming poison specific miscompiles by checking the poison
     40 //   status of an input/IR pair is the same before and after an optimization
     41 //   transform.
     42 // - Checking that a bugpoint reduction does not introduce UB which didn't
     43 //   exist in the original program being reduced.
     44 //
     45 // The major sources of inaccuracy are currently:
     46 // - Most validation rules not yet implemented for instructions with poison
     47 //   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
     48 // - UB which is control dependent on a branch on poison is not yet
     49 //   reported. Currently, only data flow dependence is modeled.
     50 // - Poison which is propagated through memory is not modeled.  As such,
     51 //   storing poison to memory and then reloading it will cause a false negative
     52 //   as we consider the reloaded value to not be poisoned.
     53 // - Poison propagation across function boundaries is not modeled.  At the
     54 //   moment, all arguments and return values are assumed not to be poison.
     55 // - Undef is not modeled.  In particular, the optimizer's freedom to pick
     56 //   concrete values for undef bits so as to maximize potential for producing
     57 //   poison is not modeled.
     58 //
     59 //===----------------------------------------------------------------------===//
     60 
     61 #include "llvm/Transforms/Instrumentation/PoisonChecking.h"
     62 #include "llvm/ADT/DenseMap.h"
     63 #include "llvm/ADT/Statistic.h"
     64 #include "llvm/Analysis/MemoryBuiltins.h"
     65 #include "llvm/Analysis/ValueTracking.h"
     66 #include "llvm/IR/IRBuilder.h"
     67 #include "llvm/IR/InstVisitor.h"
     68 #include "llvm/IR/IntrinsicInst.h"
     69 #include "llvm/IR/PatternMatch.h"
     70 #include "llvm/Support/CommandLine.h"
     71 #include "llvm/Support/Debug.h"
     72 
     73 using namespace llvm;
     74 
     75 #define DEBUG_TYPE "poison-checking"
     76 
     77 static cl::opt<bool>
     78 LocalCheck("poison-checking-function-local",
     79            cl::init(false),
     80            cl::desc("Check that returns are non-poison (for testing)"));
     81 
     82 
     83 static bool isConstantFalse(Value* V) {
     84   assert(V->getType()->isIntegerTy(1));
     85   if (auto *CI = dyn_cast<ConstantInt>(V))
     86     return CI->isZero();
     87   return false;
     88 }
     89 
     90 static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
     91   if (Ops.size() == 0)
     92     return B.getFalse();
     93   unsigned i = 0;
     94   for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
     95   if (i == Ops.size())
     96     return B.getFalse();
     97   Value *Accum = Ops[i++];
     98   for (; i < Ops.size(); i++)
     99     if (!isConstantFalse(Ops[i]))
    100       Accum = B.CreateOr(Accum, Ops[i]);
    101   return Accum;
    102 }
    103 
    104 static void generateCreationChecksForBinOp(Instruction &I,
    105                                            SmallVectorImpl<Value*> &Checks) {
    106   assert(isa<BinaryOperator>(I));
    107 
    108   IRBuilder<> B(&I);
    109   Value *LHS = I.getOperand(0);
    110   Value *RHS = I.getOperand(1);
    111   switch (I.getOpcode()) {
    112   default:
    113     return;
    114   case Instruction::Add: {
    115     if (I.hasNoSignedWrap()) {
    116       auto *OverflowOp =
    117         B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
    118       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
    119     }
    120     if (I.hasNoUnsignedWrap()) {
    121       auto *OverflowOp =
    122         B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
    123       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
    124     }
    125     break;
    126   }
    127   case Instruction::Sub: {
    128     if (I.hasNoSignedWrap()) {
    129       auto *OverflowOp =
    130         B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
    131       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
    132     }
    133     if (I.hasNoUnsignedWrap()) {
    134       auto *OverflowOp =
    135         B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
    136       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
    137     }
    138     break;
    139   }
    140   case Instruction::Mul: {
    141     if (I.hasNoSignedWrap()) {
    142       auto *OverflowOp =
    143         B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
    144       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
    145     }
    146     if (I.hasNoUnsignedWrap()) {
    147       auto *OverflowOp =
    148         B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
    149       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
    150     }
    151     break;
    152   }
    153   case Instruction::UDiv: {
    154     if (I.isExact()) {
    155       auto *Check =
    156         B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
    157                      ConstantInt::get(LHS->getType(), 0));
    158       Checks.push_back(Check);
    159     }
    160     break;
    161   }
    162   case Instruction::SDiv: {
    163     if (I.isExact()) {
    164       auto *Check =
    165         B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
    166                      ConstantInt::get(LHS->getType(), 0));
    167       Checks.push_back(Check);
    168     }
    169     break;
    170   }
    171   case Instruction::AShr:
    172   case Instruction::LShr:
    173   case Instruction::Shl: {
    174     Value *ShiftCheck =
    175       B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
    176                    ConstantInt::get(RHS->getType(),
    177                                     LHS->getType()->getScalarSizeInBits()));
    178     Checks.push_back(ShiftCheck);
    179     break;
    180   }
    181   };
    182 }
    183 
    184 /// Given an instruction which can produce poison on non-poison inputs
    185 /// (i.e. canCreatePoison returns true), generate runtime checks to produce
    186 /// boolean indicators of when poison would result.
    187 static void generateCreationChecks(Instruction &I,
    188                                    SmallVectorImpl<Value*> &Checks) {
    189   IRBuilder<> B(&I);
    190   if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
    191     generateCreationChecksForBinOp(I, Checks);
    192 
    193   // Handle non-binops separately
    194   switch (I.getOpcode()) {
    195   default:
    196     // Note there are a couple of missing cases here, once implemented, this
    197     // should become an llvm_unreachable.
    198     break;
    199   case Instruction::ExtractElement: {
    200     Value *Vec = I.getOperand(0);
    201     auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
    202     if (!VecVTy)
    203       break;
    204     Value *Idx = I.getOperand(1);
    205     unsigned NumElts = VecVTy->getNumElements();
    206     Value *Check =
    207       B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
    208                    ConstantInt::get(Idx->getType(), NumElts));
    209     Checks.push_back(Check);
    210     break;
    211   }
    212   case Instruction::InsertElement: {
    213     Value *Vec = I.getOperand(0);
    214     auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
    215     if (!VecVTy)
    216       break;
    217     Value *Idx = I.getOperand(2);
    218     unsigned NumElts = VecVTy->getNumElements();
    219     Value *Check =
    220       B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
    221                    ConstantInt::get(Idx->getType(), NumElts));
    222     Checks.push_back(Check);
    223     break;
    224   }
    225   };
    226 }
    227 
    228 static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
    229   auto Itr = ValToPoison.find(V);
    230   if (Itr != ValToPoison.end())
    231     return Itr->second;
    232   if (isa<Constant>(V)) {
    233     return ConstantInt::getFalse(V->getContext());
    234   }
    235   // Return false for unknwon values - this implements a non-strict mode where
    236   // unhandled IR constructs are simply considered to never produce poison.  At
    237   // some point in the future, we probably want a "strict mode" for testing if
    238   // nothing else.
    239   return ConstantInt::getFalse(V->getContext());
    240 }
    241 
    242 static void CreateAssert(IRBuilder<> &B, Value *Cond) {
    243   assert(Cond->getType()->isIntegerTy(1));
    244   if (auto *CI = dyn_cast<ConstantInt>(Cond))
    245     if (CI->isAllOnesValue())
    246       return;
    247 
    248   Module *M = B.GetInsertBlock()->getModule();
    249   M->getOrInsertFunction("__poison_checker_assert",
    250                          Type::getVoidTy(M->getContext()),
    251                          Type::getInt1Ty(M->getContext()));
    252   Function *TrapFunc = M->getFunction("__poison_checker_assert");
    253   B.CreateCall(TrapFunc, Cond);
    254 }
    255 
    256 static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
    257   assert(Cond->getType()->isIntegerTy(1));
    258   CreateAssert(B, B.CreateNot(Cond));
    259 }
    260 
    261 static bool rewrite(Function &F) {
    262   auto * const Int1Ty = Type::getInt1Ty(F.getContext());
    263 
    264   DenseMap<Value *, Value *> ValToPoison;
    265 
    266   for (BasicBlock &BB : F)
    267     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
    268       auto *OldPHI = cast<PHINode>(&*I);
    269       auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues());
    270       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
    271         NewPHI->addIncoming(UndefValue::get(Int1Ty),
    272                             OldPHI->getIncomingBlock(i));
    273       NewPHI->insertBefore(OldPHI);
    274       ValToPoison[OldPHI] = NewPHI;
    275     }
    276 
    277   for (BasicBlock &BB : F)
    278     for (Instruction &I : BB) {
    279       if (isa<PHINode>(I)) continue;
    280 
    281       IRBuilder<> B(cast<Instruction>(&I));
    282 
    283       // Note: There are many more sources of documented UB, but this pass only
    284       // attempts to find UB triggered by propagation of poison.
    285       SmallPtrSet<const Value *, 4> NonPoisonOps;
    286       getGuaranteedNonPoisonOps(&I, NonPoisonOps);
    287       for (const Value *Op : NonPoisonOps)
    288         CreateAssertNot(B, getPoisonFor(ValToPoison, const_cast<Value *>(Op)));
    289 
    290       if (LocalCheck)
    291         if (auto *RI = dyn_cast<ReturnInst>(&I))
    292           if (RI->getNumOperands() != 0) {
    293             Value *Op = RI->getOperand(0);
    294             CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
    295           }
    296 
    297       SmallVector<Value*, 4> Checks;
    298       if (propagatesPoison(cast<Operator>(&I)))
    299         for (Value *V : I.operands())
    300           Checks.push_back(getPoisonFor(ValToPoison, V));
    301 
    302       if (canCreatePoison(cast<Operator>(&I)))
    303         generateCreationChecks(I, Checks);
    304       ValToPoison[&I] = buildOrChain(B, Checks);
    305     }
    306 
    307   for (BasicBlock &BB : F)
    308     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
    309       auto *OldPHI = cast<PHINode>(&*I);
    310       if (!ValToPoison.count(OldPHI))
    311         continue; // skip the newly inserted phis
    312       auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
    313       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
    314         auto *OldVal = OldPHI->getIncomingValue(i);
    315         NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
    316       }
    317     }
    318   return true;
    319 }
    320 
    321 
    322 PreservedAnalyses PoisonCheckingPass::run(Module &M,
    323                                           ModuleAnalysisManager &AM) {
    324   bool Changed = false;
    325   for (auto &F : M)
    326     Changed |= rewrite(F);
    327 
    328   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
    329 }
    330 
    331 PreservedAnalyses PoisonCheckingPass::run(Function &F,
    332                                           FunctionAnalysisManager &AM) {
    333   return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
    334 }
    335 
    336 /* Major TODO Items:
    337    - Control dependent poison UB
    338    - Strict mode - (i.e. must analyze every operand)
    339      - Poison through memory
    340      - Function ABIs
    341      - Full coverage of intrinsics, etc.. (ouch)
    342 
    343    Instructions w/Unclear Semantics:
    344    - shufflevector - It would seem reasonable for an out of bounds mask element
    345      to produce poison, but the LangRef does not state.
    346    - all binary ops w/vector operands - The likely interpretation would be that
    347      any element overflowing should produce poison for the entire result, but
    348      the LangRef does not state.
    349    - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems
    350      strange that only certian flags should be documented as producing poison.
    351 
    352    Cases of clear poison semantics not yet implemented:
    353    - Exact flags on ashr/lshr produce poison
    354    - NSW/NUW flags on shl produce poison
    355    - Inbounds flag on getelementptr produce poison
    356    - fptosi/fptoui (out of bounds input) produce poison
    357    - Scalable vector types for insertelement/extractelement
    358    - Floating point binary ops w/fmf nnan/noinfs flags produce poison
    359  */
    360