1 //===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Define several functions to decode x86 specific shuffle semantics using 10 // constants from the constant pool. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "X86ShuffleDecodeConstantPool.h" 15 #include "MCTargetDesc/X86ShuffleDecode.h" 16 #include "llvm/ADT/APInt.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/IR/Constants.h" 19 20 //===----------------------------------------------------------------------===// 21 // Vector Mask Decoding 22 //===----------------------------------------------------------------------===// 23 24 namespace llvm { 25 26 static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits, 27 APInt &UndefElts, 28 SmallVectorImpl<uint64_t> &RawMask) { 29 // It is not an error for shuffle masks to not be a vector of 30 // MaskEltSizeInBits because the constant pool uniques constants by their 31 // bit representation. 32 // e.g. the following take up the same space in the constant pool: 33 // i128 -170141183420855150465331762880109871104 34 // 35 // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160> 36 // 37 // <4 x i32> <i32 -2147483648, i32 -2147483648, 38 // i32 -2147483648, i32 -2147483648> 39 auto *CstTy = dyn_cast<FixedVectorType>(C->getType()); 40 if (!CstTy) 41 return false; 42 43 Type *CstEltTy = CstTy->getElementType(); 44 if (!CstEltTy->isIntegerTy()) 45 return false; 46 47 unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); 48 unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); 49 unsigned NumCstElts = CstTy->getNumElements(); 50 51 assert((CstSizeInBits % MaskEltSizeInBits) == 0 && 52 "Unaligned shuffle mask size"); 53 54 unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits; 55 UndefElts = APInt(NumMaskElts, 0); 56 RawMask.resize(NumMaskElts, 0); 57 58 // Fast path - if the constants match the mask size then copy direct. 59 if (MaskEltSizeInBits == CstEltSizeInBits) { 60 assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size"); 61 for (unsigned i = 0; i != NumMaskElts; ++i) { 62 Constant *COp = C->getAggregateElement(i); 63 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 64 return false; 65 66 if (isa<UndefValue>(COp)) { 67 UndefElts.setBit(i); 68 RawMask[i] = 0; 69 continue; 70 } 71 72 auto *Elt = cast<ConstantInt>(COp); 73 RawMask[i] = Elt->getValue().getZExtValue(); 74 } 75 return true; 76 } 77 78 // Extract all the undef/constant element data and pack into single bitsets. 79 APInt UndefBits(CstSizeInBits, 0); 80 APInt MaskBits(CstSizeInBits, 0); 81 for (unsigned i = 0; i != NumCstElts; ++i) { 82 Constant *COp = C->getAggregateElement(i); 83 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 84 return false; 85 86 unsigned BitOffset = i * CstEltSizeInBits; 87 88 if (isa<UndefValue>(COp)) { 89 UndefBits.setBits(BitOffset, BitOffset + CstEltSizeInBits); 90 continue; 91 } 92 93 MaskBits.insertBits(cast<ConstantInt>(COp)->getValue(), BitOffset); 94 } 95 96 // Now extract the undef/constant bit data into the raw shuffle masks. 97 for (unsigned i = 0; i != NumMaskElts; ++i) { 98 unsigned BitOffset = i * MaskEltSizeInBits; 99 APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset); 100 101 // Only treat the element as UNDEF if all bits are UNDEF, otherwise 102 // treat it as zero. 103 if (EltUndef.isAllOnesValue()) { 104 UndefElts.setBit(i); 105 RawMask[i] = 0; 106 continue; 107 } 108 109 APInt EltBits = MaskBits.extractBits(MaskEltSizeInBits, BitOffset); 110 RawMask[i] = EltBits.getZExtValue(); 111 } 112 113 return true; 114 } 115 116 void DecodePSHUFBMask(const Constant *C, unsigned Width, 117 SmallVectorImpl<int> &ShuffleMask) { 118 assert((Width == 128 || Width == 256 || Width == 512) && 119 C->getType()->getPrimitiveSizeInBits() >= Width && 120 "Unexpected vector size."); 121 122 // The shuffle mask requires a byte vector. 123 APInt UndefElts; 124 SmallVector<uint64_t, 64> RawMask; 125 if (!extractConstantMask(C, 8, UndefElts, RawMask)) 126 return; 127 128 unsigned NumElts = Width / 8; 129 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) && 130 "Unexpected number of vector elements."); 131 132 for (unsigned i = 0; i != NumElts; ++i) { 133 if (UndefElts[i]) { 134 ShuffleMask.push_back(SM_SentinelUndef); 135 continue; 136 } 137 138 uint64_t Element = RawMask[i]; 139 // If the high bit (7) of the byte is set, the element is zeroed. 140 if (Element & (1 << 7)) 141 ShuffleMask.push_back(SM_SentinelZero); 142 else { 143 // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte 144 // lane of the vector we're inside. 145 unsigned Base = i & ~0xf; 146 147 // Only the least significant 4 bits of the byte are used. 148 int Index = Base + (Element & 0xf); 149 ShuffleMask.push_back(Index); 150 } 151 } 152 } 153 154 void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width, 155 SmallVectorImpl<int> &ShuffleMask) { 156 assert((Width == 128 || Width == 256 || Width == 512) && 157 C->getType()->getPrimitiveSizeInBits() >= Width && 158 "Unexpected vector size."); 159 assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size."); 160 161 // The shuffle mask requires elements the same size as the target. 162 APInt UndefElts; 163 SmallVector<uint64_t, 16> RawMask; 164 if (!extractConstantMask(C, ElSize, UndefElts, RawMask)) 165 return; 166 167 unsigned NumElts = Width / ElSize; 168 unsigned NumEltsPerLane = 128 / ElSize; 169 assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) && 170 "Unexpected number of vector elements."); 171 172 for (unsigned i = 0; i != NumElts; ++i) { 173 if (UndefElts[i]) { 174 ShuffleMask.push_back(SM_SentinelUndef); 175 continue; 176 } 177 178 int Index = i & ~(NumEltsPerLane - 1); 179 uint64_t Element = RawMask[i]; 180 if (ElSize == 64) 181 Index += (Element >> 1) & 0x1; 182 else 183 Index += Element & 0x3; 184 185 ShuffleMask.push_back(Index); 186 } 187 } 188 189 void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize, 190 unsigned Width, SmallVectorImpl<int> &ShuffleMask) { 191 Type *MaskTy = C->getType(); 192 unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); 193 (void)MaskTySize; 194 assert((MaskTySize == 128 || MaskTySize == 256) && Width >= MaskTySize && 195 "Unexpected vector size."); 196 197 // The shuffle mask requires elements the same size as the target. 198 APInt UndefElts; 199 SmallVector<uint64_t, 8> RawMask; 200 if (!extractConstantMask(C, ElSize, UndefElts, RawMask)) 201 return; 202 203 unsigned NumElts = Width / ElSize; 204 unsigned NumEltsPerLane = 128 / ElSize; 205 assert((NumElts == 2 || NumElts == 4 || NumElts == 8) && 206 "Unexpected number of vector elements."); 207 208 for (unsigned i = 0; i != NumElts; ++i) { 209 if (UndefElts[i]) { 210 ShuffleMask.push_back(SM_SentinelUndef); 211 continue; 212 } 213 214 // VPERMIL2 Operation. 215 // Bits[3] - Match Bit. 216 // Bits[2:1] - (Per Lane) PD Shuffle Mask. 217 // Bits[2:0] - (Per Lane) PS Shuffle Mask. 218 uint64_t Selector = RawMask[i]; 219 unsigned MatchBit = (Selector >> 3) & 0x1; 220 221 // M2Z[0:1] MatchBit 222 // 0Xb X Source selected by Selector index. 223 // 10b 0 Source selected by Selector index. 224 // 10b 1 Zero. 225 // 11b 0 Zero. 226 // 11b 1 Source selected by Selector index. 227 if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) { 228 ShuffleMask.push_back(SM_SentinelZero); 229 continue; 230 } 231 232 int Index = i & ~(NumEltsPerLane - 1); 233 if (ElSize == 64) 234 Index += (Selector >> 1) & 0x1; 235 else 236 Index += Selector & 0x3; 237 238 int Src = (Selector >> 2) & 0x1; 239 Index += Src * NumElts; 240 ShuffleMask.push_back(Index); 241 } 242 } 243 244 void DecodeVPPERMMask(const Constant *C, unsigned Width, 245 SmallVectorImpl<int> &ShuffleMask) { 246 Type *MaskTy = C->getType(); 247 unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); 248 (void)MaskTySize; 249 assert(Width == 128 && Width >= MaskTySize && "Unexpected vector size."); 250 251 // The shuffle mask requires a byte vector. 252 APInt UndefElts; 253 SmallVector<uint64_t, 16> RawMask; 254 if (!extractConstantMask(C, 8, UndefElts, RawMask)) 255 return; 256 257 unsigned NumElts = Width / 8; 258 assert(NumElts == 16 && "Unexpected number of vector elements."); 259 260 for (unsigned i = 0; i != NumElts; ++i) { 261 if (UndefElts[i]) { 262 ShuffleMask.push_back(SM_SentinelUndef); 263 continue; 264 } 265 266 // VPPERM Operation 267 // Bits[4:0] - Byte Index (0 - 31) 268 // Bits[7:5] - Permute Operation 269 // 270 // Permute Operation: 271 // 0 - Source byte (no logical operation). 272 // 1 - Invert source byte. 273 // 2 - Bit reverse of source byte. 274 // 3 - Bit reverse of inverted source byte. 275 // 4 - 00h (zero - fill). 276 // 5 - FFh (ones - fill). 277 // 6 - Most significant bit of source byte replicated in all bit positions. 278 // 7 - Invert most significant bit of source byte and replicate in all bit 279 // positions. 280 uint64_t Element = RawMask[i]; 281 uint64_t Index = Element & 0x1F; 282 uint64_t PermuteOp = (Element >> 5) & 0x7; 283 284 if (PermuteOp == 4) { 285 ShuffleMask.push_back(SM_SentinelZero); 286 continue; 287 } 288 if (PermuteOp != 0) { 289 ShuffleMask.clear(); 290 return; 291 } 292 ShuffleMask.push_back((int)Index); 293 } 294 } 295 296 } // namespace llvm 297