Home | History | Annotate | Line # | Download | only in SystemZ
      1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This pass looks for instructions that can be replaced by a Test Data Class
     10 // instruction, and replaces them when profitable.
     11 //
     12 // Roughly, the following rules are recognized:
     13 //
     14 // 1: fcmp pred X, 0 -> tdc X, mask
     15 // 2: fcmp pred X, +-inf -> tdc X, mask
     16 // 3: fcmp pred X, +-minnorm -> tdc X, mask
     17 // 4: tdc (fabs X), mask -> tdc X, newmask
     18 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
     19 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
     20 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
     21 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
     22 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
     23 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
     24 //
     25 // The pass works in 4 steps:
     26 //
     27 // 1. All fcmp and icmp instructions in a function are checked for a match
     28 //    with rules 1-3 and 5-7.  Their TDC equivalents are stored in
     29 //    the ConvertedInsts mapping.  If the operand of a fcmp instruction is
     30 //    a fabs, it's also folded according to rule 4.
     31 // 2. All and/or/xor i1 instructions whose both operands have been already
     32 //    mapped are mapped according to rules 8-10.  LogicOpsWorklist is used
     33 //    as a queue of instructions to check.
     34 // 3. All mapped instructions that are considered worthy of conversion (ie.
     35 //    replacing them will actually simplify the final code) are replaced
     36 //    with a call to the s390.tdc intrinsic.
     37 // 4. All intermediate results of replaced instructions are removed if unused.
     38 //
     39 // Instructions that match rules 1-3 are considered unworthy of conversion
     40 // on their own (since a comparison instruction is superior), but are mapped
     41 // in the hopes of folding the result using rules 4 and 8-10 (likely removing
     42 // the original comparison in the process).
     43 //
     44 //===----------------------------------------------------------------------===//
     45 
     46 #include "SystemZ.h"
     47 #include "SystemZSubtarget.h"
     48 #include "llvm/ADT/MapVector.h"
     49 #include "llvm/CodeGen/TargetPassConfig.h"
     50 #include "llvm/IR/Constants.h"
     51 #include "llvm/IR/IRBuilder.h"
     52 #include "llvm/IR/InstIterator.h"
     53 #include "llvm/IR/Instructions.h"
     54 #include "llvm/IR/IntrinsicInst.h"
     55 #include "llvm/IR/IntrinsicsS390.h"
     56 #include "llvm/IR/LegacyPassManager.h"
     57 #include "llvm/IR/Module.h"
     58 #include "llvm/Target/TargetMachine.h"
     59 #include <deque>
     60 #include <set>
     61 
     62 using namespace llvm;
     63 
     64 namespace llvm {
     65   void initializeSystemZTDCPassPass(PassRegistry&);
     66 }
     67 
     68 namespace {
     69 
     70 class SystemZTDCPass : public FunctionPass {
     71 public:
     72   static char ID;
     73   SystemZTDCPass() : FunctionPass(ID) {
     74     initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
     75   }
     76 
     77   bool runOnFunction(Function &F) override;
     78 
     79   void getAnalysisUsage(AnalysisUsage &AU) const override {
     80     AU.addRequired<TargetPassConfig>();
     81  }
     82 
     83 private:
     84   // Maps seen instructions that can be mapped to a TDC, values are
     85   // (TDC operand, TDC mask, worthy flag) triples.
     86   MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
     87   // The queue of and/or/xor i1 instructions to be potentially folded.
     88   std::vector<BinaryOperator *> LogicOpsWorklist;
     89   // Instructions matched while folding, to be removed at the end if unused.
     90   std::set<Instruction *> PossibleJunk;
     91 
     92   // Tries to convert a fcmp instruction.
     93   void convertFCmp(CmpInst &I);
     94 
     95   // Tries to convert an icmp instruction.
     96   void convertICmp(CmpInst &I);
     97 
     98   // Tries to convert an i1 and/or/xor instruction, whose both operands
     99   // have been already converted.
    100   void convertLogicOp(BinaryOperator &I);
    101 
    102   // Marks an instruction as converted - adds it to ConvertedInsts and adds
    103   // any and/or/xor i1 users to the queue.
    104   void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
    105     ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
    106     auto &M = *I->getFunction()->getParent();
    107     auto &Ctx = M.getContext();
    108     for (auto *U : I->users()) {
    109       auto *LI = dyn_cast<BinaryOperator>(U);
    110       if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
    111           (LI->getOpcode() == Instruction::And ||
    112            LI->getOpcode() == Instruction::Or ||
    113            LI->getOpcode() == Instruction::Xor)) {
    114         LogicOpsWorklist.push_back(LI);
    115       }
    116     }
    117   }
    118 };
    119 
    120 } // end anonymous namespace
    121 
    122 char SystemZTDCPass::ID = 0;
    123 INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
    124                 "SystemZ Test Data Class optimization", false, false)
    125 
    126 FunctionPass *llvm::createSystemZTDCPass() {
    127   return new SystemZTDCPass();
    128 }
    129 
    130 void SystemZTDCPass::convertFCmp(CmpInst &I) {
    131   Value *Op0 = I.getOperand(0);
    132   auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
    133   auto Pred = I.getPredicate();
    134   // Only comparisons with consts are interesting.
    135   if (!Const)
    136     return;
    137   // Compute the smallest normal number (and its negation).
    138   auto &Sem = Op0->getType()->getFltSemantics();
    139   APFloat Smallest = APFloat::getSmallestNormalized(Sem);
    140   APFloat NegSmallest = Smallest;
    141   NegSmallest.changeSign();
    142   // Check if Const is one of our recognized consts.
    143   int WhichConst;
    144   if (Const->isZero()) {
    145     // All comparisons with 0 can be converted.
    146     WhichConst = 0;
    147   } else if (Const->isInfinity()) {
    148     // Likewise for infinities.
    149     WhichConst = Const->isNegative() ? 2 : 1;
    150   } else if (Const->isExactlyValue(Smallest)) {
    151     // For Smallest, we cannot do EQ separately from GT.
    152     if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
    153         (Pred & CmpInst::FCMP_OGE) != 0)
    154       return;
    155     WhichConst = 3;
    156   } else if (Const->isExactlyValue(NegSmallest)) {
    157     // Likewise for NegSmallest, we cannot do EQ separately from LT.
    158     if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
    159         (Pred & CmpInst::FCMP_OLE) != 0)
    160       return;
    161     WhichConst = 4;
    162   } else {
    163     // Not one of our special constants.
    164     return;
    165   }
    166   // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
    167   static const int Masks[][4] = {
    168     { // 0
    169       SystemZ::TDCMASK_ZERO,              // eq
    170       SystemZ::TDCMASK_POSITIVE,          // gt
    171       SystemZ::TDCMASK_NEGATIVE,          // lt
    172       SystemZ::TDCMASK_NAN,               // un
    173     },
    174     { // inf
    175       SystemZ::TDCMASK_INFINITY_PLUS,     // eq
    176       0,                                  // gt
    177       (SystemZ::TDCMASK_ZERO |
    178        SystemZ::TDCMASK_NEGATIVE |
    179        SystemZ::TDCMASK_NORMAL_PLUS |
    180        SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
    181       SystemZ::TDCMASK_NAN,               // un
    182     },
    183     { // -inf
    184       SystemZ::TDCMASK_INFINITY_MINUS,    // eq
    185       (SystemZ::TDCMASK_ZERO |
    186        SystemZ::TDCMASK_POSITIVE |
    187        SystemZ::TDCMASK_NORMAL_MINUS |
    188        SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
    189       0,                                  // lt
    190       SystemZ::TDCMASK_NAN,               // un
    191     },
    192     { // minnorm
    193       0,                                  // eq (unsupported)
    194       (SystemZ::TDCMASK_NORMAL_PLUS |
    195        SystemZ::TDCMASK_INFINITY_PLUS),   // gt (actually ge)
    196       (SystemZ::TDCMASK_ZERO |
    197        SystemZ::TDCMASK_NEGATIVE |
    198        SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
    199       SystemZ::TDCMASK_NAN,               // un
    200     },
    201     { // -minnorm
    202       0,                                  // eq (unsupported)
    203       (SystemZ::TDCMASK_ZERO |
    204        SystemZ::TDCMASK_POSITIVE |
    205        SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
    206       (SystemZ::TDCMASK_NORMAL_MINUS |
    207        SystemZ::TDCMASK_INFINITY_MINUS),  // lt (actually le)
    208       SystemZ::TDCMASK_NAN,               // un
    209     }
    210   };
    211   // Construct the mask as a combination of the partial masks.
    212   int Mask = 0;
    213   if (Pred & CmpInst::FCMP_OEQ)
    214     Mask |= Masks[WhichConst][0];
    215   if (Pred & CmpInst::FCMP_OGT)
    216     Mask |= Masks[WhichConst][1];
    217   if (Pred & CmpInst::FCMP_OLT)
    218     Mask |= Masks[WhichConst][2];
    219   if (Pred & CmpInst::FCMP_UNO)
    220     Mask |= Masks[WhichConst][3];
    221   // A lone fcmp is unworthy of tdc conversion on its own, but may become
    222   // worthy if combined with fabs.
    223   bool Worthy = false;
    224   if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
    225     Function *F = CI->getCalledFunction();
    226     if (F && F->getIntrinsicID() == Intrinsic::fabs) {
    227       // Fold with fabs - adjust the mask appropriately.
    228       Mask &= SystemZ::TDCMASK_PLUS;
    229       Mask |= Mask >> 1;
    230       Op0 = CI->getArgOperand(0);
    231       // A combination of fcmp with fabs is a win, unless the constant
    232       // involved is 0 (which is handled by later passes).
    233       Worthy = WhichConst != 0;
    234       PossibleJunk.insert(CI);
    235     }
    236   }
    237   converted(&I, Op0, Mask, Worthy);
    238 }
    239 
    240 void SystemZTDCPass::convertICmp(CmpInst &I) {
    241   Value *Op0 = I.getOperand(0);
    242   auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
    243   auto Pred = I.getPredicate();
    244   // All our icmp rules involve comparisons with consts.
    245   if (!Const)
    246     return;
    247   if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
    248     // Check for icmp+bitcast used for signbit.
    249     if (!Cast->getSrcTy()->isFloatTy() &&
    250         !Cast->getSrcTy()->isDoubleTy() &&
    251         !Cast->getSrcTy()->isFP128Ty())
    252       return;
    253     Value *V = Cast->getOperand(0);
    254     int Mask;
    255     if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
    256       // icmp slt (bitcast X), 0 - set if sign bit true
    257       Mask = SystemZ::TDCMASK_MINUS;
    258     } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
    259       // icmp sgt (bitcast X), -1 - set if sign bit false
    260       Mask = SystemZ::TDCMASK_PLUS;
    261     } else {
    262       // Not a sign bit check.
    263       return;
    264     }
    265     PossibleJunk.insert(Cast);
    266     converted(&I, V, Mask, true);
    267   } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
    268     // Check if this is a pre-existing call of our tdc intrinsic.
    269     Function *F = CI->getCalledFunction();
    270     if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
    271       return;
    272     if (!Const->isZero())
    273       return;
    274     Value *V = CI->getArgOperand(0);
    275     auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
    276     // Bail if the mask is not a constant.
    277     if (!MaskC)
    278       return;
    279     int Mask = MaskC->getZExtValue();
    280     Mask &= SystemZ::TDCMASK_ALL;
    281     if (Pred == CmpInst::ICMP_NE) {
    282       // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
    283     } else if (Pred == CmpInst::ICMP_EQ) {
    284       // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
    285       Mask ^= SystemZ::TDCMASK_ALL;
    286     } else {
    287       // An unknown comparison - ignore.
    288       return;
    289     }
    290     PossibleJunk.insert(CI);
    291     converted(&I, V, Mask, false);
    292   }
    293 }
    294 
    295 void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
    296   Value *Op0, *Op1;
    297   int Mask0, Mask1;
    298   bool Worthy0, Worthy1;
    299   std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
    300   std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
    301   if (Op0 != Op1)
    302     return;
    303   int Mask;
    304   switch (I.getOpcode()) {
    305     case Instruction::And:
    306       Mask = Mask0 & Mask1;
    307       break;
    308     case Instruction::Or:
    309       Mask = Mask0 | Mask1;
    310       break;
    311     case Instruction::Xor:
    312       Mask = Mask0 ^ Mask1;
    313       break;
    314     default:
    315       llvm_unreachable("Unknown op in convertLogicOp");
    316   }
    317   converted(&I, Op0, Mask, true);
    318 }
    319 
    320 bool SystemZTDCPass::runOnFunction(Function &F) {
    321   auto &TPC = getAnalysis<TargetPassConfig>();
    322   if (TPC.getTM<TargetMachine>()
    323           .getSubtarget<SystemZSubtarget>(F)
    324           .hasSoftFloat())
    325     return false;
    326 
    327   ConvertedInsts.clear();
    328   LogicOpsWorklist.clear();
    329   PossibleJunk.clear();
    330 
    331   // Look for icmp+fcmp instructions.
    332   for (auto &I : instructions(F)) {
    333     if (I.getOpcode() == Instruction::FCmp)
    334       convertFCmp(cast<CmpInst>(I));
    335     else if (I.getOpcode() == Instruction::ICmp)
    336       convertICmp(cast<CmpInst>(I));
    337   }
    338 
    339   // If none found, bail already.
    340   if (ConvertedInsts.empty())
    341     return false;
    342 
    343   // Process the queue of logic instructions.
    344   while (!LogicOpsWorklist.empty()) {
    345     BinaryOperator *Op = LogicOpsWorklist.back();
    346     LogicOpsWorklist.pop_back();
    347     // If both operands mapped, and the instruction itself not yet mapped,
    348     // convert it.
    349     if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
    350         ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
    351         !ConvertedInsts.count(Op))
    352       convertLogicOp(*Op);
    353   }
    354 
    355   // Time to actually replace the instructions.  Do it in the reverse order
    356   // of finding them, since there's a good chance the earlier ones will be
    357   // unused (due to being folded into later ones).
    358   Module &M = *F.getParent();
    359   auto &Ctx = M.getContext();
    360   Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
    361   bool MadeChange = false;
    362   for (auto &It : reverse(ConvertedInsts)) {
    363     Instruction *I = It.first;
    364     Value *V;
    365     int Mask;
    366     bool Worthy;
    367     std::tie(V, Mask, Worthy) = It.second;
    368     if (!I->user_empty()) {
    369       // If used and unworthy of conversion, skip it.
    370       if (!Worthy)
    371         continue;
    372       // Call the intrinsic, compare result with 0.
    373       Function *TDCFunc =
    374           Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType());
    375       IRBuilder<> IRB(I);
    376       Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
    377       Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
    378       Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
    379       I->replaceAllUsesWith(ICmp);
    380     }
    381     // If unused, or used and converted, remove it.
    382     I->eraseFromParent();
    383     MadeChange = true;
    384   }
    385 
    386   if (!MadeChange)
    387     return false;
    388 
    389   // We've actually done something - now clear misc accumulated junk (fabs,
    390   // bitcast).
    391   for (auto *I : PossibleJunk)
    392     if (I->user_empty())
    393       I->eraseFromParent();
    394 
    395   return true;
    396 }
    397