Home | History | Annotate | Line # | Download | only in X86
      1 //===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // Define several functions to decode x86 specific shuffle semantics using
     10 // constants from the constant pool.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "X86ShuffleDecodeConstantPool.h"
     15 #include "MCTargetDesc/X86ShuffleDecode.h"
     16 #include "llvm/ADT/APInt.h"
     17 #include "llvm/ADT/SmallVector.h"
     18 #include "llvm/IR/Constants.h"
     19 
     20 //===----------------------------------------------------------------------===//
     21 //  Vector Mask Decoding
     22 //===----------------------------------------------------------------------===//
     23 
     24 namespace llvm {
     25 
     26 static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
     27                                 APInt &UndefElts,
     28                                 SmallVectorImpl<uint64_t> &RawMask) {
     29   // It is not an error for shuffle masks to not be a vector of
     30   // MaskEltSizeInBits because the constant pool uniques constants by their
     31   // bit representation.
     32   // e.g. the following take up the same space in the constant pool:
     33   //   i128 -170141183420855150465331762880109871104
     34   //
     35   //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
     36   //
     37   //   <4 x i32> <i32 -2147483648, i32 -2147483648,
     38   //              i32 -2147483648, i32 -2147483648>
     39   auto *CstTy = dyn_cast<FixedVectorType>(C->getType());
     40   if (!CstTy)
     41     return false;
     42 
     43   Type *CstEltTy = CstTy->getElementType();
     44   if (!CstEltTy->isIntegerTy())
     45     return false;
     46 
     47   unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
     48   unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
     49   unsigned NumCstElts = CstTy->getNumElements();
     50 
     51   assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
     52          "Unaligned shuffle mask size");
     53 
     54   unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
     55   UndefElts = APInt(NumMaskElts, 0);
     56   RawMask.resize(NumMaskElts, 0);
     57 
     58   // Fast path - if the constants match the mask size then copy direct.
     59   if (MaskEltSizeInBits == CstEltSizeInBits) {
     60     assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size");
     61     for (unsigned i = 0; i != NumMaskElts; ++i) {
     62       Constant *COp = C->getAggregateElement(i);
     63       if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
     64         return false;
     65 
     66       if (isa<UndefValue>(COp)) {
     67         UndefElts.setBit(i);
     68         RawMask[i] = 0;
     69         continue;
     70       }
     71 
     72       auto *Elt = cast<ConstantInt>(COp);
     73       RawMask[i] = Elt->getValue().getZExtValue();
     74     }
     75     return true;
     76   }
     77 
     78   // Extract all the undef/constant element data and pack into single bitsets.
     79   APInt UndefBits(CstSizeInBits, 0);
     80   APInt MaskBits(CstSizeInBits, 0);
     81   for (unsigned i = 0; i != NumCstElts; ++i) {
     82     Constant *COp = C->getAggregateElement(i);
     83     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
     84       return false;
     85 
     86     unsigned BitOffset = i * CstEltSizeInBits;
     87 
     88     if (isa<UndefValue>(COp)) {
     89       UndefBits.setBits(BitOffset, BitOffset + CstEltSizeInBits);
     90       continue;
     91     }
     92 
     93     MaskBits.insertBits(cast<ConstantInt>(COp)->getValue(), BitOffset);
     94   }
     95 
     96   // Now extract the undef/constant bit data into the raw shuffle masks.
     97   for (unsigned i = 0; i != NumMaskElts; ++i) {
     98     unsigned BitOffset = i * MaskEltSizeInBits;
     99     APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);
    100 
    101     // Only treat the element as UNDEF if all bits are UNDEF, otherwise
    102     // treat it as zero.
    103     if (EltUndef.isAllOnesValue()) {
    104       UndefElts.setBit(i);
    105       RawMask[i] = 0;
    106       continue;
    107     }
    108 
    109     APInt EltBits = MaskBits.extractBits(MaskEltSizeInBits, BitOffset);
    110     RawMask[i] = EltBits.getZExtValue();
    111   }
    112 
    113   return true;
    114 }
    115 
    116 void DecodePSHUFBMask(const Constant *C, unsigned Width,
    117                       SmallVectorImpl<int> &ShuffleMask) {
    118   assert((Width == 128 || Width == 256 || Width == 512) &&
    119          C->getType()->getPrimitiveSizeInBits() >= Width &&
    120          "Unexpected vector size.");
    121 
    122   // The shuffle mask requires a byte vector.
    123   APInt UndefElts;
    124   SmallVector<uint64_t, 64> RawMask;
    125   if (!extractConstantMask(C, 8, UndefElts, RawMask))
    126     return;
    127 
    128   unsigned NumElts = Width / 8;
    129   assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
    130          "Unexpected number of vector elements.");
    131 
    132   for (unsigned i = 0; i != NumElts; ++i) {
    133     if (UndefElts[i]) {
    134       ShuffleMask.push_back(SM_SentinelUndef);
    135       continue;
    136     }
    137 
    138     uint64_t Element = RawMask[i];
    139     // If the high bit (7) of the byte is set, the element is zeroed.
    140     if (Element & (1 << 7))
    141       ShuffleMask.push_back(SM_SentinelZero);
    142     else {
    143       // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
    144       // lane of the vector we're inside.
    145       unsigned Base = i & ~0xf;
    146 
    147       // Only the least significant 4 bits of the byte are used.
    148       int Index = Base + (Element & 0xf);
    149       ShuffleMask.push_back(Index);
    150     }
    151   }
    152 }
    153 
    154 void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
    155                         SmallVectorImpl<int> &ShuffleMask) {
    156   assert((Width == 128 || Width == 256 || Width == 512) &&
    157          C->getType()->getPrimitiveSizeInBits() >= Width &&
    158          "Unexpected vector size.");
    159   assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");
    160 
    161   // The shuffle mask requires elements the same size as the target.
    162   APInt UndefElts;
    163   SmallVector<uint64_t, 16> RawMask;
    164   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
    165     return;
    166 
    167   unsigned NumElts = Width / ElSize;
    168   unsigned NumEltsPerLane = 128 / ElSize;
    169   assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) &&
    170          "Unexpected number of vector elements.");
    171 
    172   for (unsigned i = 0; i != NumElts; ++i) {
    173     if (UndefElts[i]) {
    174       ShuffleMask.push_back(SM_SentinelUndef);
    175       continue;
    176     }
    177 
    178     int Index = i & ~(NumEltsPerLane - 1);
    179     uint64_t Element = RawMask[i];
    180     if (ElSize == 64)
    181       Index += (Element >> 1) & 0x1;
    182     else
    183       Index += Element & 0x3;
    184 
    185     ShuffleMask.push_back(Index);
    186   }
    187 }
    188 
    189 void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
    190                          unsigned Width, SmallVectorImpl<int> &ShuffleMask) {
    191   Type *MaskTy = C->getType();
    192   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
    193   (void)MaskTySize;
    194   assert((MaskTySize == 128 || MaskTySize == 256) && Width >= MaskTySize &&
    195          "Unexpected vector size.");
    196 
    197   // The shuffle mask requires elements the same size as the target.
    198   APInt UndefElts;
    199   SmallVector<uint64_t, 8> RawMask;
    200   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
    201     return;
    202 
    203   unsigned NumElts = Width / ElSize;
    204   unsigned NumEltsPerLane = 128 / ElSize;
    205   assert((NumElts == 2 || NumElts == 4 || NumElts == 8) &&
    206          "Unexpected number of vector elements.");
    207 
    208   for (unsigned i = 0; i != NumElts; ++i) {
    209     if (UndefElts[i]) {
    210       ShuffleMask.push_back(SM_SentinelUndef);
    211       continue;
    212     }
    213 
    214     // VPERMIL2 Operation.
    215     // Bits[3] - Match Bit.
    216     // Bits[2:1] - (Per Lane) PD Shuffle Mask.
    217     // Bits[2:0] - (Per Lane) PS Shuffle Mask.
    218     uint64_t Selector = RawMask[i];
    219     unsigned MatchBit = (Selector >> 3) & 0x1;
    220 
    221     // M2Z[0:1]     MatchBit
    222     //   0Xb           X        Source selected by Selector index.
    223     //   10b           0        Source selected by Selector index.
    224     //   10b           1        Zero.
    225     //   11b           0        Zero.
    226     //   11b           1        Source selected by Selector index.
    227     if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) {
    228       ShuffleMask.push_back(SM_SentinelZero);
    229       continue;
    230     }
    231 
    232     int Index = i & ~(NumEltsPerLane - 1);
    233     if (ElSize == 64)
    234       Index += (Selector >> 1) & 0x1;
    235     else
    236       Index += Selector & 0x3;
    237 
    238     int Src = (Selector >> 2) & 0x1;
    239     Index += Src * NumElts;
    240     ShuffleMask.push_back(Index);
    241   }
    242 }
    243 
    244 void DecodeVPPERMMask(const Constant *C, unsigned Width,
    245                       SmallVectorImpl<int> &ShuffleMask) {
    246   Type *MaskTy = C->getType();
    247   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
    248   (void)MaskTySize;
    249   assert(Width == 128 && Width >= MaskTySize && "Unexpected vector size.");
    250 
    251   // The shuffle mask requires a byte vector.
    252   APInt UndefElts;
    253   SmallVector<uint64_t, 16> RawMask;
    254   if (!extractConstantMask(C, 8, UndefElts, RawMask))
    255     return;
    256 
    257   unsigned NumElts = Width / 8;
    258   assert(NumElts == 16 && "Unexpected number of vector elements.");
    259 
    260   for (unsigned i = 0; i != NumElts; ++i) {
    261     if (UndefElts[i]) {
    262       ShuffleMask.push_back(SM_SentinelUndef);
    263       continue;
    264     }
    265 
    266     // VPPERM Operation
    267     // Bits[4:0] - Byte Index (0 - 31)
    268     // Bits[7:5] - Permute Operation
    269     //
    270     // Permute Operation:
    271     // 0 - Source byte (no logical operation).
    272     // 1 - Invert source byte.
    273     // 2 - Bit reverse of source byte.
    274     // 3 - Bit reverse of inverted source byte.
    275     // 4 - 00h (zero - fill).
    276     // 5 - FFh (ones - fill).
    277     // 6 - Most significant bit of source byte replicated in all bit positions.
    278     // 7 - Invert most significant bit of source byte and replicate in all bit
    279     // positions.
    280     uint64_t Element = RawMask[i];
    281     uint64_t Index = Element & 0x1F;
    282     uint64_t PermuteOp = (Element >> 5) & 0x7;
    283 
    284     if (PermuteOp == 4) {
    285       ShuffleMask.push_back(SM_SentinelZero);
    286       continue;
    287     }
    288     if (PermuteOp != 0) {
    289       ShuffleMask.clear();
    290       return;
    291     }
    292     ShuffleMask.push_back((int)Index);
    293   }
    294 }
    295 
    296 } // namespace llvm
    297