1 //===- Scalarizer.cpp - Scalarize vector operations -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass converts vector operations into scalar operations (or, optionally, 10 // operations on smaller vector widths), in order to expose optimization 11 // opportunities on the individual scalar operations. 12 // It is mainly intended for targets that do not have vector units, but it 13 // may also be useful for revectorizing code to different vector widths. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Scalar/Scalarizer.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/Analysis/VectorUtils.h" 22 #include "llvm/IR/Argument.h" 23 #include "llvm/IR/BasicBlock.h" 24 #include "llvm/IR/Constants.h" 25 #include "llvm/IR/DataLayout.h" 26 #include "llvm/IR/DerivedTypes.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/IR/IRBuilder.h" 30 #include "llvm/IR/InstVisitor.h" 31 #include "llvm/IR/InstrTypes.h" 32 #include "llvm/IR/Instruction.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/Intrinsics.h" 35 #include "llvm/IR/LLVMContext.h" 36 #include "llvm/IR/Module.h" 37 #include "llvm/IR/Type.h" 38 #include "llvm/IR/Value.h" 39 #include "llvm/Support/Casting.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Transforms/Utils/Local.h" 42 #include <cassert> 43 #include <cstdint> 44 #include <iterator> 45 #include <map> 46 #include <utility> 47 48 using namespace llvm; 49 50 #define DEBUG_TYPE "scalarizer" 51 52 static cl::opt<bool> ClScalarizeVariableInsertExtract( 53 "scalarize-variable-insert-extract", cl::init(true), cl::Hidden, 54 cl::desc("Allow the scalarizer pass to scalarize " 55 "insertelement/extractelement with variable index")); 56 57 // This is disabled by default because having separate loads and stores 58 // makes it more likely that the -combiner-alias-analysis limits will be 59 // reached. 60 static cl::opt<bool> ClScalarizeLoadStore( 61 "scalarize-load-store", cl::init(false), cl::Hidden, 62 cl::desc("Allow the scalarizer pass to scalarize loads and store")); 63 64 // Split vectors larger than this size into fragments, where each fragment is 65 // either a vector no larger than this size or a scalar. 66 // 67 // Instructions with operands or results of different sizes that would be split 68 // into a different number of fragments are currently left as-is. 69 static cl::opt<unsigned> ClScalarizeMinBits( 70 "scalarize-min-bits", cl::init(0), cl::Hidden, 71 cl::desc("Instruct the scalarizer pass to attempt to keep values of a " 72 "minimum number of bits")); 73 74 namespace { 75 76 BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) { 77 BasicBlock *BB = Itr->getParent(); 78 if (isa<PHINode>(Itr)) 79 Itr = BB->getFirstInsertionPt(); 80 if (Itr != BB->end()) 81 Itr = skipDebugIntrinsics(Itr); 82 return Itr; 83 } 84 85 // Used to store the scattered form of a vector. 86 using ValueVector = SmallVector<Value *, 8>; 87 88 // Used to map a vector Value and associated type to its scattered form. 89 // The associated type is only non-null for pointer values that are "scattered" 90 // when used as pointer operands to load or store. 91 // 92 // We use std::map because we want iterators to persist across insertion and 93 // because the values are relatively large. 94 using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>; 95 96 // Lists Instructions that have been replaced with scalar implementations, 97 // along with a pointer to their scattered forms. 98 using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>; 99 100 struct VectorSplit { 101 // The type of the vector. 102 FixedVectorType *VecTy = nullptr; 103 104 // The number of elements packed in a fragment (other than the remainder). 105 unsigned NumPacked = 0; 106 107 // The number of fragments (scalars or smaller vectors) into which the vector 108 // shall be split. 109 unsigned NumFragments = 0; 110 111 // The type of each complete fragment. 112 Type *SplitTy = nullptr; 113 114 // The type of the remainder (last) fragment; null if all fragments are 115 // complete. 116 Type *RemainderTy = nullptr; 117 118 Type *getFragmentType(unsigned I) const { 119 return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy; 120 } 121 }; 122 123 // Provides a very limited vector-like interface for lazily accessing one 124 // component of a scattered vector or vector pointer. 125 class Scatterer { 126 public: 127 Scatterer() = default; 128 129 // Scatter V into Size components. If new instructions are needed, 130 // insert them before BBI in BB. If Cache is nonnull, use it to cache 131 // the results. 132 Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, 133 const VectorSplit &VS, ValueVector *cachePtr = nullptr); 134 135 // Return component I, creating a new Value for it if necessary. 136 Value *operator[](unsigned I); 137 138 // Return the number of components. 139 unsigned size() const { return VS.NumFragments; } 140 141 private: 142 BasicBlock *BB; 143 BasicBlock::iterator BBI; 144 Value *V; 145 VectorSplit VS; 146 bool IsPointer; 147 ValueVector *CachePtr; 148 ValueVector Tmp; 149 }; 150 151 // FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp 152 // called Name that compares X and Y in the same way as FCI. 153 struct FCmpSplitter { 154 FCmpSplitter(FCmpInst &fci) : FCI(fci) {} 155 156 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, 157 const Twine &Name) const { 158 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name); 159 } 160 161 FCmpInst &FCI; 162 }; 163 164 // ICmpSplitter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp 165 // called Name that compares X and Y in the same way as ICI. 166 struct ICmpSplitter { 167 ICmpSplitter(ICmpInst &ici) : ICI(ici) {} 168 169 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, 170 const Twine &Name) const { 171 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name); 172 } 173 174 ICmpInst &ICI; 175 }; 176 177 // UnarySplitter(UO)(Builder, X, Name) uses Builder to create 178 // a unary operator like UO called Name with operand X. 179 struct UnarySplitter { 180 UnarySplitter(UnaryOperator &uo) : UO(uo) {} 181 182 Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const { 183 return Builder.CreateUnOp(UO.getOpcode(), Op, Name); 184 } 185 186 UnaryOperator &UO; 187 }; 188 189 // BinarySplitter(BO)(Builder, X, Y, Name) uses Builder to create 190 // a binary operator like BO called Name with operands X and Y. 191 struct BinarySplitter { 192 BinarySplitter(BinaryOperator &bo) : BO(bo) {} 193 194 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, 195 const Twine &Name) const { 196 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name); 197 } 198 199 BinaryOperator &BO; 200 }; 201 202 // Information about a load or store that we're scalarizing. 203 struct VectorLayout { 204 VectorLayout() = default; 205 206 // Return the alignment of fragment Frag. 207 Align getFragmentAlign(unsigned Frag) { 208 return commonAlignment(VecAlign, Frag * SplitSize); 209 } 210 211 // The split of the underlying vector type. 212 VectorSplit VS; 213 214 // The alignment of the vector. 215 Align VecAlign; 216 217 // The size of each (non-remainder) fragment in bytes. 218 uint64_t SplitSize = 0; 219 }; 220 221 /// Concatenate the given fragments to a single vector value of the type 222 /// described in @p VS. 223 static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments, 224 const VectorSplit &VS, Twine Name) { 225 unsigned NumElements = VS.VecTy->getNumElements(); 226 SmallVector<int> ExtendMask; 227 SmallVector<int> InsertMask; 228 229 if (VS.NumPacked > 1) { 230 // Prepare the shufflevector masks once and re-use them for all 231 // fragments. 232 ExtendMask.resize(NumElements, -1); 233 for (unsigned I = 0; I < VS.NumPacked; ++I) 234 ExtendMask[I] = I; 235 236 InsertMask.resize(NumElements); 237 for (unsigned I = 0; I < NumElements; ++I) 238 InsertMask[I] = I; 239 } 240 241 Value *Res = PoisonValue::get(VS.VecTy); 242 for (unsigned I = 0; I < VS.NumFragments; ++I) { 243 Value *Fragment = Fragments[I]; 244 245 unsigned NumPacked = VS.NumPacked; 246 if (I == VS.NumFragments - 1 && VS.RemainderTy) { 247 if (auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy)) 248 NumPacked = RemVecTy->getNumElements(); 249 else 250 NumPacked = 1; 251 } 252 253 if (NumPacked == 1) { 254 Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked, 255 Name + ".upto" + Twine(I)); 256 } else { 257 Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask); 258 if (I == 0) { 259 Res = Fragment; 260 } else { 261 for (unsigned J = 0; J < NumPacked; ++J) 262 InsertMask[I * VS.NumPacked + J] = NumElements + J; 263 Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask, 264 Name + ".upto" + Twine(I)); 265 for (unsigned J = 0; J < NumPacked; ++J) 266 InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J; 267 } 268 } 269 } 270 271 return Res; 272 } 273 274 template <typename T> 275 T getWithDefaultOverride(const cl::opt<T> &ClOption, 276 const std::optional<T> &DefaultOverride) { 277 return ClOption.getNumOccurrences() ? ClOption 278 : DefaultOverride.value_or(ClOption); 279 } 280 281 class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> { 282 public: 283 ScalarizerVisitor(DominatorTree *DT, ScalarizerPassOptions Options) 284 : DT(DT), ScalarizeVariableInsertExtract(getWithDefaultOverride( 285 ClScalarizeVariableInsertExtract, 286 Options.ScalarizeVariableInsertExtract)), 287 ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore, 288 Options.ScalarizeLoadStore)), 289 ScalarizeMinBits(getWithDefaultOverride(ClScalarizeMinBits, 290 Options.ScalarizeMinBits)) {} 291 292 bool visit(Function &F); 293 294 // InstVisitor methods. They return true if the instruction was scalarized, 295 // false if nothing changed. 296 bool visitInstruction(Instruction &I) { return false; } 297 bool visitSelectInst(SelectInst &SI); 298 bool visitICmpInst(ICmpInst &ICI); 299 bool visitFCmpInst(FCmpInst &FCI); 300 bool visitUnaryOperator(UnaryOperator &UO); 301 bool visitBinaryOperator(BinaryOperator &BO); 302 bool visitGetElementPtrInst(GetElementPtrInst &GEPI); 303 bool visitCastInst(CastInst &CI); 304 bool visitBitCastInst(BitCastInst &BCI); 305 bool visitInsertElementInst(InsertElementInst &IEI); 306 bool visitExtractElementInst(ExtractElementInst &EEI); 307 bool visitShuffleVectorInst(ShuffleVectorInst &SVI); 308 bool visitPHINode(PHINode &PHI); 309 bool visitLoadInst(LoadInst &LI); 310 bool visitStoreInst(StoreInst &SI); 311 bool visitCallInst(CallInst &ICI); 312 bool visitFreezeInst(FreezeInst &FI); 313 314 private: 315 Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS); 316 void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS); 317 void replaceUses(Instruction *Op, Value *CV); 318 bool canTransferMetadata(unsigned Kind); 319 void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV); 320 std::optional<VectorSplit> getVectorSplit(Type *Ty); 321 std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment, 322 const DataLayout &DL); 323 bool finish(); 324 325 template<typename T> bool splitUnary(Instruction &, const T &); 326 template<typename T> bool splitBinary(Instruction &, const T &); 327 328 bool splitCall(CallInst &CI); 329 330 ScatterMap Scattered; 331 GatherList Gathered; 332 bool Scalarized; 333 334 SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs; 335 336 DominatorTree *DT; 337 338 const bool ScalarizeVariableInsertExtract; 339 const bool ScalarizeLoadStore; 340 const unsigned ScalarizeMinBits; 341 }; 342 343 } // end anonymous namespace 344 345 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, 346 const VectorSplit &VS, ValueVector *cachePtr) 347 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) { 348 IsPointer = V->getType()->isPointerTy(); 349 if (!CachePtr) { 350 Tmp.resize(VS.NumFragments, nullptr); 351 } else { 352 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() || 353 IsPointer) && 354 "Inconsistent vector sizes"); 355 if (VS.NumFragments > CachePtr->size()) 356 CachePtr->resize(VS.NumFragments, nullptr); 357 } 358 } 359 360 // Return fragment Frag, creating a new Value for it if necessary. 361 Value *Scatterer::operator[](unsigned Frag) { 362 ValueVector &CV = CachePtr ? *CachePtr : Tmp; 363 // Try to reuse a previous value. 364 if (CV[Frag]) 365 return CV[Frag]; 366 IRBuilder<> Builder(BB, BBI); 367 if (IsPointer) { 368 if (Frag == 0) 369 CV[Frag] = V; 370 else 371 CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag, 372 V->getName() + ".i" + Twine(Frag)); 373 return CV[Frag]; 374 } 375 376 Type *FragmentTy = VS.getFragmentType(Frag); 377 378 if (auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) { 379 SmallVector<int> Mask; 380 for (unsigned J = 0; J < VecTy->getNumElements(); ++J) 381 Mask.push_back(Frag * VS.NumPacked + J); 382 CV[Frag] = 383 Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask, 384 V->getName() + ".i" + Twine(Frag)); 385 } else { 386 // Search through a chain of InsertElementInsts looking for element Frag. 387 // Record other elements in the cache. The new V is still suitable 388 // for all uncached indices. 389 while (true) { 390 InsertElementInst *Insert = dyn_cast<InsertElementInst>(V); 391 if (!Insert) 392 break; 393 ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2)); 394 if (!Idx) 395 break; 396 unsigned J = Idx->getZExtValue(); 397 V = Insert->getOperand(0); 398 if (Frag * VS.NumPacked == J) { 399 CV[Frag] = Insert->getOperand(1); 400 return CV[Frag]; 401 } 402 403 if (VS.NumPacked == 1 && !CV[J]) { 404 // Only cache the first entry we find for each index we're not actively 405 // searching for. This prevents us from going too far up the chain and 406 // caching incorrect entries. 407 CV[J] = Insert->getOperand(1); 408 } 409 } 410 CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked, 411 V->getName() + ".i" + Twine(Frag)); 412 } 413 414 return CV[Frag]; 415 } 416 417 bool ScalarizerVisitor::visit(Function &F) { 418 assert(Gathered.empty() && Scattered.empty()); 419 420 Scalarized = false; 421 422 // To ensure we replace gathered components correctly we need to do an ordered 423 // traversal of the basic blocks in the function. 424 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock()); 425 for (BasicBlock *BB : RPOT) { 426 for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { 427 Instruction *I = &*II; 428 bool Done = InstVisitor::visit(I); 429 ++II; 430 if (Done && I->getType()->isVoidTy()) 431 I->eraseFromParent(); 432 } 433 } 434 return finish(); 435 } 436 437 // Return a scattered form of V that can be accessed by Point. V must be a 438 // vector or a pointer to a vector. 439 Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V, 440 const VectorSplit &VS) { 441 if (Argument *VArg = dyn_cast<Argument>(V)) { 442 // Put the scattered form of arguments in the entry block, 443 // so that it can be used everywhere. 444 Function *F = VArg->getParent(); 445 BasicBlock *BB = &F->getEntryBlock(); 446 return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]); 447 } 448 if (Instruction *VOp = dyn_cast<Instruction>(V)) { 449 // When scalarizing PHI nodes we might try to examine/rewrite InsertElement 450 // nodes in predecessors. If those predecessors are unreachable from entry, 451 // then the IR in those blocks could have unexpected properties resulting in 452 // infinite loops in Scatterer::operator[]. By simply treating values 453 // originating from instructions in unreachable blocks as undef we do not 454 // need to analyse them further. 455 if (!DT->isReachableFromEntry(VOp->getParent())) 456 return Scatterer(Point->getParent(), Point->getIterator(), 457 PoisonValue::get(V->getType()), VS); 458 // Put the scattered form of an instruction directly after the 459 // instruction, skipping over PHI nodes and debug intrinsics. 460 BasicBlock *BB = VOp->getParent(); 461 return Scatterer( 462 BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS, 463 &Scattered[{V, VS.SplitTy}]); 464 } 465 // In the fallback case, just put the scattered before Point and 466 // keep the result local to Point. 467 return Scatterer(Point->getParent(), Point->getIterator(), V, VS); 468 } 469 470 // Replace Op with the gathered form of the components in CV. Defer the 471 // deletion of Op and creation of the gathered form to the end of the pass, 472 // so that we can avoid creating the gathered form if all uses of Op are 473 // replaced with uses of CV. 474 void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV, 475 const VectorSplit &VS) { 476 transferMetadataAndIRFlags(Op, CV); 477 478 // If we already have a scattered form of Op (created from ExtractElements 479 // of Op itself), replace them with the new form. 480 ValueVector &SV = Scattered[{Op, VS.SplitTy}]; 481 if (!SV.empty()) { 482 for (unsigned I = 0, E = SV.size(); I != E; ++I) { 483 Value *V = SV[I]; 484 if (V == nullptr || SV[I] == CV[I]) 485 continue; 486 487 Instruction *Old = cast<Instruction>(V); 488 if (isa<Instruction>(CV[I])) 489 CV[I]->takeName(Old); 490 Old->replaceAllUsesWith(CV[I]); 491 PotentiallyDeadInstrs.emplace_back(Old); 492 } 493 } 494 SV = CV; 495 Gathered.push_back(GatherList::value_type(Op, &SV)); 496 } 497 498 // Replace Op with CV and collect Op has a potentially dead instruction. 499 void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) { 500 if (CV != Op) { 501 Op->replaceAllUsesWith(CV); 502 PotentiallyDeadInstrs.emplace_back(Op); 503 Scalarized = true; 504 } 505 } 506 507 // Return true if it is safe to transfer the given metadata tag from 508 // vector to scalar instructions. 509 bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) { 510 return (Tag == LLVMContext::MD_tbaa 511 || Tag == LLVMContext::MD_fpmath 512 || Tag == LLVMContext::MD_tbaa_struct 513 || Tag == LLVMContext::MD_invariant_load 514 || Tag == LLVMContext::MD_alias_scope 515 || Tag == LLVMContext::MD_noalias 516 || Tag == LLVMContext::MD_mem_parallel_loop_access 517 || Tag == LLVMContext::MD_access_group); 518 } 519 520 // Transfer metadata from Op to the instructions in CV if it is known 521 // to be safe to do so. 522 void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op, 523 const ValueVector &CV) { 524 SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; 525 Op->getAllMetadataOtherThanDebugLoc(MDs); 526 for (unsigned I = 0, E = CV.size(); I != E; ++I) { 527 if (Instruction *New = dyn_cast<Instruction>(CV[I])) { 528 for (const auto &MD : MDs) 529 if (canTransferMetadata(MD.first)) 530 New->setMetadata(MD.first, MD.second); 531 New->copyIRFlags(Op); 532 if (Op->getDebugLoc() && !New->getDebugLoc()) 533 New->setDebugLoc(Op->getDebugLoc()); 534 } 535 } 536 } 537 538 // Determine how Ty is split, if at all. 539 std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) { 540 VectorSplit Split; 541 Split.VecTy = dyn_cast<FixedVectorType>(Ty); 542 if (!Split.VecTy) 543 return {}; 544 545 unsigned NumElems = Split.VecTy->getNumElements(); 546 Type *ElemTy = Split.VecTy->getElementType(); 547 548 if (NumElems == 1 || ElemTy->isPointerTy() || 549 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) { 550 Split.NumPacked = 1; 551 Split.NumFragments = NumElems; 552 Split.SplitTy = ElemTy; 553 } else { 554 Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits(); 555 if (Split.NumPacked >= NumElems) 556 return {}; 557 558 Split.NumFragments = divideCeil(NumElems, Split.NumPacked); 559 Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked); 560 561 unsigned RemainderElems = NumElems % Split.NumPacked; 562 if (RemainderElems > 1) 563 Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems); 564 else if (RemainderElems == 1) 565 Split.RemainderTy = ElemTy; 566 } 567 568 return Split; 569 } 570 571 // Try to fill in Layout from Ty, returning true on success. Alignment is 572 // the alignment of the vector, or std::nullopt if the ABI default should be 573 // used. 574 std::optional<VectorLayout> 575 ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment, 576 const DataLayout &DL) { 577 std::optional<VectorSplit> VS = getVectorSplit(Ty); 578 if (!VS) 579 return {}; 580 581 VectorLayout Layout; 582 Layout.VS = *VS; 583 // Check that we're dealing with full-byte fragments. 584 if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) || 585 (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy))) 586 return {}; 587 Layout.VecAlign = Alignment; 588 Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy); 589 return Layout; 590 } 591 592 // Scalarize one-operand instruction I, using Split(Builder, X, Name) 593 // to create an instruction like I with operand X and name Name. 594 template<typename Splitter> 595 bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) { 596 std::optional<VectorSplit> VS = getVectorSplit(I.getType()); 597 if (!VS) 598 return false; 599 600 std::optional<VectorSplit> OpVS; 601 if (I.getOperand(0)->getType() == I.getType()) { 602 OpVS = VS; 603 } else { 604 OpVS = getVectorSplit(I.getOperand(0)->getType()); 605 if (!OpVS || VS->NumPacked != OpVS->NumPacked) 606 return false; 607 } 608 609 IRBuilder<> Builder(&I); 610 Scatterer Op = scatter(&I, I.getOperand(0), *OpVS); 611 assert(Op.size() == VS->NumFragments && "Mismatched unary operation"); 612 ValueVector Res; 613 Res.resize(VS->NumFragments); 614 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) 615 Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag)); 616 gather(&I, Res, *VS); 617 return true; 618 } 619 620 // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name) 621 // to create an instruction like I with operands X and Y and name Name. 622 template<typename Splitter> 623 bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) { 624 std::optional<VectorSplit> VS = getVectorSplit(I.getType()); 625 if (!VS) 626 return false; 627 628 std::optional<VectorSplit> OpVS; 629 if (I.getOperand(0)->getType() == I.getType()) { 630 OpVS = VS; 631 } else { 632 OpVS = getVectorSplit(I.getOperand(0)->getType()); 633 if (!OpVS || VS->NumPacked != OpVS->NumPacked) 634 return false; 635 } 636 637 IRBuilder<> Builder(&I); 638 Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS); 639 Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS); 640 assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation"); 641 assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation"); 642 ValueVector Res; 643 Res.resize(VS->NumFragments); 644 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) { 645 Value *Op0 = VOp0[Frag]; 646 Value *Op1 = VOp1[Frag]; 647 Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag)); 648 } 649 gather(&I, Res, *VS); 650 return true; 651 } 652 653 static bool isTriviallyScalariable(Intrinsic::ID ID) { 654 return isTriviallyVectorizable(ID); 655 } 656 657 /// If a call to a vector typed intrinsic function, split into a scalar call per 658 /// element if possible for the intrinsic. 659 bool ScalarizerVisitor::splitCall(CallInst &CI) { 660 std::optional<VectorSplit> VS = getVectorSplit(CI.getType()); 661 if (!VS) 662 return false; 663 664 Function *F = CI.getCalledFunction(); 665 if (!F) 666 return false; 667 668 Intrinsic::ID ID = F->getIntrinsicID(); 669 if (ID == Intrinsic::not_intrinsic || !isTriviallyScalariable(ID)) 670 return false; 671 672 // unsigned NumElems = VT->getNumElements(); 673 unsigned NumArgs = CI.arg_size(); 674 675 ValueVector ScalarOperands(NumArgs); 676 SmallVector<Scatterer, 8> Scattered(NumArgs); 677 SmallVector<int> OverloadIdx(NumArgs, -1); 678 679 SmallVector<llvm::Type *, 3> Tys; 680 // Add return type if intrinsic is overloaded on it. 681 if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) 682 Tys.push_back(VS->SplitTy); 683 684 // Assumes that any vector type has the same number of elements as the return 685 // vector type, which is true for all current intrinsics. 686 for (unsigned I = 0; I != NumArgs; ++I) { 687 Value *OpI = CI.getOperand(I); 688 if ([[maybe_unused]] auto *OpVecTy = 689 dyn_cast<FixedVectorType>(OpI->getType())) { 690 assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements()); 691 std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType()); 692 if (!OpVS || OpVS->NumPacked != VS->NumPacked) { 693 // The natural split of the operand doesn't match the result. This could 694 // happen if the vector elements are different and the ScalarizeMinBits 695 // option is used. 696 // 697 // We could in principle handle this case as well, at the cost of 698 // complicating the scattering machinery to support multiple scattering 699 // granularities for a single value. 700 return false; 701 } 702 703 Scattered[I] = scatter(&CI, OpI, *OpVS); 704 if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) { 705 OverloadIdx[I] = Tys.size(); 706 Tys.push_back(OpVS->SplitTy); 707 } 708 } else { 709 ScalarOperands[I] = OpI; 710 if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) 711 Tys.push_back(OpI->getType()); 712 } 713 } 714 715 ValueVector Res(VS->NumFragments); 716 ValueVector ScalarCallOps(NumArgs); 717 718 Function *NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys); 719 IRBuilder<> Builder(&CI); 720 721 // Perform actual scalarization, taking care to preserve any scalar operands. 722 for (unsigned I = 0; I < VS->NumFragments; ++I) { 723 bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy; 724 ScalarCallOps.clear(); 725 726 if (IsRemainder) 727 Tys[0] = VS->RemainderTy; 728 729 for (unsigned J = 0; J != NumArgs; ++J) { 730 if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) { 731 ScalarCallOps.push_back(ScalarOperands[J]); 732 } else { 733 ScalarCallOps.push_back(Scattered[J][I]); 734 if (IsRemainder && OverloadIdx[J] >= 0) 735 Tys[OverloadIdx[J]] = Scattered[J][I]->getType(); 736 } 737 } 738 739 if (IsRemainder) 740 NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys); 741 742 Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps, 743 CI.getName() + ".i" + Twine(I)); 744 } 745 746 gather(&CI, Res, *VS); 747 return true; 748 } 749 750 bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) { 751 std::optional<VectorSplit> VS = getVectorSplit(SI.getType()); 752 if (!VS) 753 return false; 754 755 std::optional<VectorSplit> CondVS; 756 if (isa<FixedVectorType>(SI.getCondition()->getType())) { 757 CondVS = getVectorSplit(SI.getCondition()->getType()); 758 if (!CondVS || CondVS->NumPacked != VS->NumPacked) { 759 // This happens when ScalarizeMinBits is used. 760 return false; 761 } 762 } 763 764 IRBuilder<> Builder(&SI); 765 Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS); 766 Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS); 767 assert(VOp1.size() == VS->NumFragments && "Mismatched select"); 768 assert(VOp2.size() == VS->NumFragments && "Mismatched select"); 769 ValueVector Res; 770 Res.resize(VS->NumFragments); 771 772 if (CondVS) { 773 Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS); 774 assert(VOp0.size() == CondVS->NumFragments && "Mismatched select"); 775 for (unsigned I = 0; I < VS->NumFragments; ++I) { 776 Value *Op0 = VOp0[I]; 777 Value *Op1 = VOp1[I]; 778 Value *Op2 = VOp2[I]; 779 Res[I] = Builder.CreateSelect(Op0, Op1, Op2, 780 SI.getName() + ".i" + Twine(I)); 781 } 782 } else { 783 Value *Op0 = SI.getOperand(0); 784 for (unsigned I = 0; I < VS->NumFragments; ++I) { 785 Value *Op1 = VOp1[I]; 786 Value *Op2 = VOp2[I]; 787 Res[I] = Builder.CreateSelect(Op0, Op1, Op2, 788 SI.getName() + ".i" + Twine(I)); 789 } 790 } 791 gather(&SI, Res, *VS); 792 return true; 793 } 794 795 bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) { 796 return splitBinary(ICI, ICmpSplitter(ICI)); 797 } 798 799 bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) { 800 return splitBinary(FCI, FCmpSplitter(FCI)); 801 } 802 803 bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) { 804 return splitUnary(UO, UnarySplitter(UO)); 805 } 806 807 bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) { 808 return splitBinary(BO, BinarySplitter(BO)); 809 } 810 811 bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { 812 std::optional<VectorSplit> VS = getVectorSplit(GEPI.getType()); 813 if (!VS) 814 return false; 815 816 IRBuilder<> Builder(&GEPI); 817 unsigned NumIndices = GEPI.getNumIndices(); 818 819 // The base pointer and indices might be scalar even if it's a vector GEP. 820 SmallVector<Value *, 8> ScalarOps{1 + NumIndices}; 821 SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices}; 822 823 for (unsigned I = 0; I < 1 + NumIndices; ++I) { 824 if (auto *VecTy = 825 dyn_cast<FixedVectorType>(GEPI.getOperand(I)->getType())) { 826 std::optional<VectorSplit> OpVS = getVectorSplit(VecTy); 827 if (!OpVS || OpVS->NumPacked != VS->NumPacked) { 828 // This can happen when ScalarizeMinBits is used. 829 return false; 830 } 831 ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS); 832 } else { 833 ScalarOps[I] = GEPI.getOperand(I); 834 } 835 } 836 837 ValueVector Res; 838 Res.resize(VS->NumFragments); 839 for (unsigned I = 0; I < VS->NumFragments; ++I) { 840 SmallVector<Value *, 8> SplitOps; 841 SplitOps.resize(1 + NumIndices); 842 for (unsigned J = 0; J < 1 + NumIndices; ++J) { 843 if (ScalarOps[J]) 844 SplitOps[J] = ScalarOps[J]; 845 else 846 SplitOps[J] = ScatterOps[J][I]; 847 } 848 Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0], 849 ArrayRef(SplitOps).drop_front(), 850 GEPI.getName() + ".i" + Twine(I)); 851 if (GEPI.isInBounds()) 852 if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I])) 853 NewGEPI->setIsInBounds(); 854 } 855 gather(&GEPI, Res, *VS); 856 return true; 857 } 858 859 bool ScalarizerVisitor::visitCastInst(CastInst &CI) { 860 std::optional<VectorSplit> DestVS = getVectorSplit(CI.getDestTy()); 861 if (!DestVS) 862 return false; 863 864 std::optional<VectorSplit> SrcVS = getVectorSplit(CI.getSrcTy()); 865 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked) 866 return false; 867 868 IRBuilder<> Builder(&CI); 869 Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS); 870 assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast"); 871 ValueVector Res; 872 Res.resize(DestVS->NumFragments); 873 for (unsigned I = 0; I < DestVS->NumFragments; ++I) 874 Res[I] = 875 Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I), 876 CI.getName() + ".i" + Twine(I)); 877 gather(&CI, Res, *DestVS); 878 return true; 879 } 880 881 bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) { 882 std::optional<VectorSplit> DstVS = getVectorSplit(BCI.getDestTy()); 883 std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.getSrcTy()); 884 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy) 885 return false; 886 887 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy(); 888 889 // Vectors of pointers are always fully scalarized. 890 assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1)); 891 892 IRBuilder<> Builder(&BCI); 893 Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS); 894 ValueVector Res; 895 Res.resize(DstVS->NumFragments); 896 897 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits(); 898 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits(); 899 900 if (isPointerTy || DstSplitBits == SrcSplitBits) { 901 assert(DstVS->NumFragments == SrcVS->NumFragments); 902 for (unsigned I = 0; I < DstVS->NumFragments; ++I) { 903 Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I), 904 BCI.getName() + ".i" + Twine(I)); 905 } 906 } else if (SrcSplitBits % DstSplitBits == 0) { 907 // Convert each source fragment to the same-sized destination vector and 908 // then scatter the result to the destination. 909 VectorSplit MidVS; 910 MidVS.NumPacked = DstVS->NumPacked; 911 MidVS.NumFragments = SrcSplitBits / DstSplitBits; 912 MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(), 913 MidVS.NumPacked * MidVS.NumFragments); 914 MidVS.SplitTy = DstVS->SplitTy; 915 916 unsigned ResI = 0; 917 for (unsigned I = 0; I < SrcVS->NumFragments; ++I) { 918 Value *V = Op0[I]; 919 920 // Look through any existing bitcasts before converting to <N x t2>. 921 // In the best case, the resulting conversion might be a no-op. 922 Instruction *VI; 923 while ((VI = dyn_cast<Instruction>(V)) && 924 VI->getOpcode() == Instruction::BitCast) 925 V = VI->getOperand(0); 926 927 V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast"); 928 929 Scatterer Mid = scatter(&BCI, V, MidVS); 930 for (unsigned J = 0; J < MidVS.NumFragments; ++J) 931 Res[ResI++] = Mid[J]; 932 } 933 } else if (DstSplitBits % SrcSplitBits == 0) { 934 // Gather enough source fragments to make up a destination fragment and 935 // then convert to the destination type. 936 VectorSplit MidVS; 937 MidVS.NumFragments = DstSplitBits / SrcSplitBits; 938 MidVS.NumPacked = SrcVS->NumPacked; 939 MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(), 940 MidVS.NumPacked * MidVS.NumFragments); 941 MidVS.SplitTy = SrcVS->SplitTy; 942 943 unsigned SrcI = 0; 944 SmallVector<Value *, 8> ConcatOps; 945 ConcatOps.resize(MidVS.NumFragments); 946 for (unsigned I = 0; I < DstVS->NumFragments; ++I) { 947 for (unsigned J = 0; J < MidVS.NumFragments; ++J) 948 ConcatOps[J] = Op0[SrcI++]; 949 Value *V = concatenate(Builder, ConcatOps, MidVS, 950 BCI.getName() + ".i" + Twine(I)); 951 Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I), 952 BCI.getName() + ".i" + Twine(I)); 953 } 954 } else { 955 return false; 956 } 957 958 gather(&BCI, Res, *DstVS); 959 return true; 960 } 961 962 bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) { 963 std::optional<VectorSplit> VS = getVectorSplit(IEI.getType()); 964 if (!VS) 965 return false; 966 967 IRBuilder<> Builder(&IEI); 968 Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS); 969 Value *NewElt = IEI.getOperand(1); 970 Value *InsIdx = IEI.getOperand(2); 971 972 ValueVector Res; 973 Res.resize(VS->NumFragments); 974 975 if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) { 976 unsigned Idx = CI->getZExtValue(); 977 unsigned Fragment = Idx / VS->NumPacked; 978 for (unsigned I = 0; I < VS->NumFragments; ++I) { 979 if (I == Fragment) { 980 bool IsPacked = VS->NumPacked > 1; 981 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy && 982 !VS->RemainderTy->isVectorTy()) 983 IsPacked = false; 984 if (IsPacked) { 985 Res[I] = 986 Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked); 987 } else { 988 Res[I] = NewElt; 989 } 990 } else { 991 Res[I] = Op0[I]; 992 } 993 } 994 } else { 995 // Never split a variable insertelement that isn't fully scalarized. 996 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1) 997 return false; 998 999 for (unsigned I = 0; I < VS->NumFragments; ++I) { 1000 Value *ShouldReplace = 1001 Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I), 1002 InsIdx->getName() + ".is." + Twine(I)); 1003 Value *OldElt = Op0[I]; 1004 Res[I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt, 1005 IEI.getName() + ".i" + Twine(I)); 1006 } 1007 } 1008 1009 gather(&IEI, Res, *VS); 1010 return true; 1011 } 1012 1013 bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { 1014 std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType()); 1015 if (!VS) 1016 return false; 1017 1018 IRBuilder<> Builder(&EEI); 1019 Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS); 1020 Value *ExtIdx = EEI.getOperand(1); 1021 1022 if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) { 1023 unsigned Idx = CI->getZExtValue(); 1024 unsigned Fragment = Idx / VS->NumPacked; 1025 Value *Res = Op0[Fragment]; 1026 bool IsPacked = VS->NumPacked > 1; 1027 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy && 1028 !VS->RemainderTy->isVectorTy()) 1029 IsPacked = false; 1030 if (IsPacked) 1031 Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked); 1032 replaceUses(&EEI, Res); 1033 return true; 1034 } 1035 1036 // Never split a variable extractelement that isn't fully scalarized. 1037 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1) 1038 return false; 1039 1040 Value *Res = PoisonValue::get(VS->VecTy->getElementType()); 1041 for (unsigned I = 0; I < VS->NumFragments; ++I) { 1042 Value *ShouldExtract = 1043 Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I), 1044 ExtIdx->getName() + ".is." + Twine(I)); 1045 Value *Elt = Op0[I]; 1046 Res = Builder.CreateSelect(ShouldExtract, Elt, Res, 1047 EEI.getName() + ".upto" + Twine(I)); 1048 } 1049 replaceUses(&EEI, Res); 1050 return true; 1051 } 1052 1053 bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 1054 std::optional<VectorSplit> VS = getVectorSplit(SVI.getType()); 1055 std::optional<VectorSplit> VSOp = 1056 getVectorSplit(SVI.getOperand(0)->getType()); 1057 if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1) 1058 return false; 1059 1060 Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp); 1061 Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp); 1062 ValueVector Res; 1063 Res.resize(VS->NumFragments); 1064 1065 for (unsigned I = 0; I < VS->NumFragments; ++I) { 1066 int Selector = SVI.getMaskValue(I); 1067 if (Selector < 0) 1068 Res[I] = PoisonValue::get(VS->VecTy->getElementType()); 1069 else if (unsigned(Selector) < Op0.size()) 1070 Res[I] = Op0[Selector]; 1071 else 1072 Res[I] = Op1[Selector - Op0.size()]; 1073 } 1074 gather(&SVI, Res, *VS); 1075 return true; 1076 } 1077 1078 bool ScalarizerVisitor::visitPHINode(PHINode &PHI) { 1079 std::optional<VectorSplit> VS = getVectorSplit(PHI.getType()); 1080 if (!VS) 1081 return false; 1082 1083 IRBuilder<> Builder(&PHI); 1084 ValueVector Res; 1085 Res.resize(VS->NumFragments); 1086 1087 unsigned NumOps = PHI.getNumOperands(); 1088 for (unsigned I = 0; I < VS->NumFragments; ++I) { 1089 Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps, 1090 PHI.getName() + ".i" + Twine(I)); 1091 } 1092 1093 for (unsigned I = 0; I < NumOps; ++I) { 1094 Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS); 1095 BasicBlock *IncomingBlock = PHI.getIncomingBlock(I); 1096 for (unsigned J = 0; J < VS->NumFragments; ++J) 1097 cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock); 1098 } 1099 gather(&PHI, Res, *VS); 1100 return true; 1101 } 1102 1103 bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) { 1104 if (!ScalarizeLoadStore) 1105 return false; 1106 if (!LI.isSimple()) 1107 return false; 1108 1109 std::optional<VectorLayout> Layout = getVectorLayout( 1110 LI.getType(), LI.getAlign(), LI.getModule()->getDataLayout()); 1111 if (!Layout) 1112 return false; 1113 1114 IRBuilder<> Builder(&LI); 1115 Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS); 1116 ValueVector Res; 1117 Res.resize(Layout->VS.NumFragments); 1118 1119 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) { 1120 Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I], 1121 Align(Layout->getFragmentAlign(I)), 1122 LI.getName() + ".i" + Twine(I)); 1123 } 1124 gather(&LI, Res, Layout->VS); 1125 return true; 1126 } 1127 1128 bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) { 1129 if (!ScalarizeLoadStore) 1130 return false; 1131 if (!SI.isSimple()) 1132 return false; 1133 1134 Value *FullValue = SI.getValueOperand(); 1135 std::optional<VectorLayout> Layout = getVectorLayout( 1136 FullValue->getType(), SI.getAlign(), SI.getModule()->getDataLayout()); 1137 if (!Layout) 1138 return false; 1139 1140 IRBuilder<> Builder(&SI); 1141 Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS); 1142 Scatterer VVal = scatter(&SI, FullValue, Layout->VS); 1143 1144 ValueVector Stores; 1145 Stores.resize(Layout->VS.NumFragments); 1146 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) { 1147 Value *Val = VVal[I]; 1148 Value *Ptr = VPtr[I]; 1149 Stores[I] = 1150 Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I)); 1151 } 1152 transferMetadataAndIRFlags(&SI, Stores); 1153 return true; 1154 } 1155 1156 bool ScalarizerVisitor::visitCallInst(CallInst &CI) { 1157 return splitCall(CI); 1158 } 1159 1160 bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) { 1161 return splitUnary(FI, [](IRBuilder<> &Builder, Value *Op, const Twine &Name) { 1162 return Builder.CreateFreeze(Op, Name); 1163 }); 1164 } 1165 1166 // Delete the instructions that we scalarized. If a full vector result 1167 // is still needed, recreate it using InsertElements. 1168 bool ScalarizerVisitor::finish() { 1169 // The presence of data in Gathered or Scattered indicates changes 1170 // made to the Function. 1171 if (Gathered.empty() && Scattered.empty() && !Scalarized) 1172 return false; 1173 for (const auto &GMI : Gathered) { 1174 Instruction *Op = GMI.first; 1175 ValueVector &CV = *GMI.second; 1176 if (!Op->use_empty()) { 1177 // The value is still needed, so recreate it using a series of 1178 // insertelements and/or shufflevectors. 1179 Value *Res; 1180 if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) { 1181 BasicBlock *BB = Op->getParent(); 1182 IRBuilder<> Builder(Op); 1183 if (isa<PHINode>(Op)) 1184 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 1185 1186 VectorSplit VS = *getVectorSplit(Ty); 1187 assert(VS.NumFragments == CV.size()); 1188 1189 Res = concatenate(Builder, CV, VS, Op->getName()); 1190 1191 Res->takeName(Op); 1192 } else { 1193 assert(CV.size() == 1 && Op->getType() == CV[0]->getType()); 1194 Res = CV[0]; 1195 if (Op == Res) 1196 continue; 1197 } 1198 Op->replaceAllUsesWith(Res); 1199 } 1200 PotentiallyDeadInstrs.emplace_back(Op); 1201 } 1202 Gathered.clear(); 1203 Scattered.clear(); 1204 Scalarized = false; 1205 1206 RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs); 1207 1208 return true; 1209 } 1210 1211 PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) { 1212 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1213 ScalarizerVisitor Impl(DT, Options); 1214 bool Changed = Impl.visit(F); 1215 PreservedAnalyses PA; 1216 PA.preserve<DominatorTreeAnalysis>(); 1217 return Changed ? PA : PreservedAnalyses::all(); 1218 } 1219