1 //===- Scalarizer.cpp - Scalarize vector operations -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass converts vector operations into scalar operations (or, optionally, 10 // operations on smaller vector widths), in order to expose optimization 11 // opportunities on the individual scalar operations. 12 // It is mainly intended for targets that do not have vector units, but it 13 // may also be useful for revectorizing code to different vector widths. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Scalar/Scalarizer.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/Analysis/VectorUtils.h" 22 #include "llvm/IR/Argument.h" 23 #include "llvm/IR/BasicBlock.h" 24 #include "llvm/IR/Constants.h" 25 #include "llvm/IR/DataLayout.h" 26 #include "llvm/IR/DerivedTypes.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/IR/IRBuilder.h" 30 #include "llvm/IR/InstVisitor.h" 31 #include "llvm/IR/InstrTypes.h" 32 #include "llvm/IR/Instruction.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/Intrinsics.h" 35 #include "llvm/IR/LLVMContext.h" 36 #include "llvm/IR/Module.h" 37 #include "llvm/IR/Type.h" 38 #include "llvm/IR/Value.h" 39 #include "llvm/InitializePasses.h" 40 #include "llvm/Pass.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/CommandLine.h" 43 #include "llvm/Transforms/Utils/Local.h" 44 #include <cassert> 45 #include <cstdint> 46 #include <iterator> 47 #include <map> 48 #include <utility> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "scalarizer" 53 54 static cl::opt<bool> ClScalarizeVariableInsertExtract( 55 "scalarize-variable-insert-extract", cl::init(true), cl::Hidden, 56 cl::desc("Allow the scalarizer pass to scalarize " 57 "insertelement/extractelement with variable index")); 58 59 // This is disabled by default because having separate loads and stores 60 // makes it more likely that the -combiner-alias-analysis limits will be 61 // reached. 62 static cl::opt<bool> ClScalarizeLoadStore( 63 "scalarize-load-store", cl::init(false), cl::Hidden, 64 cl::desc("Allow the scalarizer pass to scalarize loads and store")); 65 66 // Split vectors larger than this size into fragments, where each fragment is 67 // either a vector no larger than this size or a scalar. 68 // 69 // Instructions with operands or results of different sizes that would be split 70 // into a different number of fragments are currently left as-is. 71 static cl::opt<unsigned> ClScalarizeMinBits( 72 "scalarize-min-bits", cl::init(0), cl::Hidden, 73 cl::desc("Instruct the scalarizer pass to attempt to keep values of a " 74 "minimum number of bits")); 75 76 namespace { 77 78 BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) { 79 BasicBlock *BB = Itr->getParent(); 80 if (isa<PHINode>(Itr)) 81 Itr = BB->getFirstInsertionPt(); 82 if (Itr != BB->end()) 83 Itr = skipDebugIntrinsics(Itr); 84 return Itr; 85 } 86 87 // Used to store the scattered form of a vector. 88 using ValueVector = SmallVector<Value *, 8>; 89 90 // Used to map a vector Value and associated type to its scattered form. 91 // The associated type is only non-null for pointer values that are "scattered" 92 // when used as pointer operands to load or store. 93 // 94 // We use std::map because we want iterators to persist across insertion and 95 // because the values are relatively large. 96 using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>; 97 98 // Lists Instructions that have been replaced with scalar implementations, 99 // along with a pointer to their scattered forms. 100 using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>; 101 102 struct VectorSplit { 103 // The type of the vector. 104 FixedVectorType *VecTy = nullptr; 105 106 // The number of elements packed in a fragment (other than the remainder). 107 unsigned NumPacked = 0; 108 109 // The number of fragments (scalars or smaller vectors) into which the vector 110 // shall be split. 111 unsigned NumFragments = 0; 112 113 // The type of each complete fragment. 114 Type *SplitTy = nullptr; 115 116 // The type of the remainder (last) fragment; null if all fragments are 117 // complete. 118 Type *RemainderTy = nullptr; 119 120 Type *getFragmentType(unsigned I) const { 121 return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy; 122 } 123 }; 124 125 // Provides a very limited vector-like interface for lazily accessing one 126 // component of a scattered vector or vector pointer. 127 class Scatterer { 128 public: 129 Scatterer() = default; 130 131 // Scatter V into Size components. If new instructions are needed, 132 // insert them before BBI in BB. If Cache is nonnull, use it to cache 133 // the results. 134 Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, 135 const VectorSplit &VS, ValueVector *cachePtr = nullptr); 136 137 // Return component I, creating a new Value for it if necessary. 138 Value *operator[](unsigned I); 139 140 // Return the number of components. 141 unsigned size() const { return VS.NumFragments; } 142 143 private: 144 BasicBlock *BB; 145 BasicBlock::iterator BBI; 146 Value *V; 147 VectorSplit VS; 148 bool IsPointer; 149 ValueVector *CachePtr; 150 ValueVector Tmp; 151 }; 152 153 // FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp 154 // called Name that compares X and Y in the same way as FCI. 155 struct FCmpSplitter { 156 FCmpSplitter(FCmpInst &fci) : FCI(fci) {} 157 158 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, 159 const Twine &Name) const { 160 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name); 161 } 162 163 FCmpInst &FCI; 164 }; 165 166 // ICmpSplitter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp 167 // called Name that compares X and Y in the same way as ICI. 168 struct ICmpSplitter { 169 ICmpSplitter(ICmpInst &ici) : ICI(ici) {} 170 171 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, 172 const Twine &Name) const { 173 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name); 174 } 175 176 ICmpInst &ICI; 177 }; 178 179 // UnarySplitter(UO)(Builder, X, Name) uses Builder to create 180 // a unary operator like UO called Name with operand X. 181 struct UnarySplitter { 182 UnarySplitter(UnaryOperator &uo) : UO(uo) {} 183 184 Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const { 185 return Builder.CreateUnOp(UO.getOpcode(), Op, Name); 186 } 187 188 UnaryOperator &UO; 189 }; 190 191 // BinarySplitter(BO)(Builder, X, Y, Name) uses Builder to create 192 // a binary operator like BO called Name with operands X and Y. 193 struct BinarySplitter { 194 BinarySplitter(BinaryOperator &bo) : BO(bo) {} 195 196 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, 197 const Twine &Name) const { 198 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name); 199 } 200 201 BinaryOperator &BO; 202 }; 203 204 // Information about a load or store that we're scalarizing. 205 struct VectorLayout { 206 VectorLayout() = default; 207 208 // Return the alignment of fragment Frag. 209 Align getFragmentAlign(unsigned Frag) { 210 return commonAlignment(VecAlign, Frag * SplitSize); 211 } 212 213 // The split of the underlying vector type. 214 VectorSplit VS; 215 216 // The alignment of the vector. 217 Align VecAlign; 218 219 // The size of each (non-remainder) fragment in bytes. 220 uint64_t SplitSize = 0; 221 }; 222 223 /// Concatenate the given fragments to a single vector value of the type 224 /// described in @p VS. 225 static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments, 226 const VectorSplit &VS, Twine Name) { 227 unsigned NumElements = VS.VecTy->getNumElements(); 228 SmallVector<int> ExtendMask; 229 SmallVector<int> InsertMask; 230 231 if (VS.NumPacked > 1) { 232 // Prepare the shufflevector masks once and re-use them for all 233 // fragments. 234 ExtendMask.resize(NumElements, -1); 235 for (unsigned I = 0; I < VS.NumPacked; ++I) 236 ExtendMask[I] = I; 237 238 InsertMask.resize(NumElements); 239 for (unsigned I = 0; I < NumElements; ++I) 240 InsertMask[I] = I; 241 } 242 243 Value *Res = PoisonValue::get(VS.VecTy); 244 for (unsigned I = 0; I < VS.NumFragments; ++I) { 245 Value *Fragment = Fragments[I]; 246 247 unsigned NumPacked = VS.NumPacked; 248 if (I == VS.NumFragments - 1 && VS.RemainderTy) { 249 if (auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy)) 250 NumPacked = RemVecTy->getNumElements(); 251 else 252 NumPacked = 1; 253 } 254 255 if (NumPacked == 1) { 256 Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked, 257 Name + ".upto" + Twine(I)); 258 } else { 259 Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask); 260 if (I == 0) { 261 Res = Fragment; 262 } else { 263 for (unsigned J = 0; J < NumPacked; ++J) 264 InsertMask[I * VS.NumPacked + J] = NumElements + J; 265 Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask, 266 Name + ".upto" + Twine(I)); 267 for (unsigned J = 0; J < NumPacked; ++J) 268 InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J; 269 } 270 } 271 } 272 273 return Res; 274 } 275 276 template <typename T> 277 T getWithDefaultOverride(const cl::opt<T> &ClOption, 278 const std::optional<T> &DefaultOverride) { 279 return ClOption.getNumOccurrences() ? ClOption 280 : DefaultOverride.value_or(ClOption); 281 } 282 283 class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> { 284 public: 285 ScalarizerVisitor(unsigned ParallelLoopAccessMDKind, DominatorTree *DT, 286 ScalarizerPassOptions Options) 287 : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind), DT(DT), 288 ScalarizeVariableInsertExtract( 289 getWithDefaultOverride(ClScalarizeVariableInsertExtract, 290 Options.ScalarizeVariableInsertExtract)), 291 ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore, 292 Options.ScalarizeLoadStore)), 293 ScalarizeMinBits(getWithDefaultOverride(ClScalarizeMinBits, 294 Options.ScalarizeMinBits)) {} 295 296 bool visit(Function &F); 297 298 // InstVisitor methods. They return true if the instruction was scalarized, 299 // false if nothing changed. 300 bool visitInstruction(Instruction &I) { return false; } 301 bool visitSelectInst(SelectInst &SI); 302 bool visitICmpInst(ICmpInst &ICI); 303 bool visitFCmpInst(FCmpInst &FCI); 304 bool visitUnaryOperator(UnaryOperator &UO); 305 bool visitBinaryOperator(BinaryOperator &BO); 306 bool visitGetElementPtrInst(GetElementPtrInst &GEPI); 307 bool visitCastInst(CastInst &CI); 308 bool visitBitCastInst(BitCastInst &BCI); 309 bool visitInsertElementInst(InsertElementInst &IEI); 310 bool visitExtractElementInst(ExtractElementInst &EEI); 311 bool visitShuffleVectorInst(ShuffleVectorInst &SVI); 312 bool visitPHINode(PHINode &PHI); 313 bool visitLoadInst(LoadInst &LI); 314 bool visitStoreInst(StoreInst &SI); 315 bool visitCallInst(CallInst &ICI); 316 bool visitFreezeInst(FreezeInst &FI); 317 318 private: 319 Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS); 320 void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS); 321 void replaceUses(Instruction *Op, Value *CV); 322 bool canTransferMetadata(unsigned Kind); 323 void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV); 324 std::optional<VectorSplit> getVectorSplit(Type *Ty); 325 std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment, 326 const DataLayout &DL); 327 bool finish(); 328 329 template<typename T> bool splitUnary(Instruction &, const T &); 330 template<typename T> bool splitBinary(Instruction &, const T &); 331 332 bool splitCall(CallInst &CI); 333 334 ScatterMap Scattered; 335 GatherList Gathered; 336 bool Scalarized; 337 338 SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs; 339 340 unsigned ParallelLoopAccessMDKind; 341 342 DominatorTree *DT; 343 344 const bool ScalarizeVariableInsertExtract; 345 const bool ScalarizeLoadStore; 346 const unsigned ScalarizeMinBits; 347 }; 348 349 class ScalarizerLegacyPass : public FunctionPass { 350 public: 351 static char ID; 352 353 ScalarizerLegacyPass() : FunctionPass(ID) { 354 initializeScalarizerLegacyPassPass(*PassRegistry::getPassRegistry()); 355 } 356 357 bool runOnFunction(Function &F) override; 358 359 void getAnalysisUsage(AnalysisUsage& AU) const override { 360 AU.addRequired<DominatorTreeWrapperPass>(); 361 AU.addPreserved<DominatorTreeWrapperPass>(); 362 } 363 }; 364 365 } // end anonymous namespace 366 367 char ScalarizerLegacyPass::ID = 0; 368 INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer", 369 "Scalarize vector operations", false, false) 370 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 371 INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer", 372 "Scalarize vector operations", false, false) 373 374 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, 375 const VectorSplit &VS, ValueVector *cachePtr) 376 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) { 377 IsPointer = V->getType()->isPointerTy(); 378 if (!CachePtr) { 379 Tmp.resize(VS.NumFragments, nullptr); 380 } else { 381 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() || 382 IsPointer) && 383 "Inconsistent vector sizes"); 384 if (VS.NumFragments > CachePtr->size()) 385 CachePtr->resize(VS.NumFragments, nullptr); 386 } 387 } 388 389 // Return fragment Frag, creating a new Value for it if necessary. 390 Value *Scatterer::operator[](unsigned Frag) { 391 ValueVector &CV = CachePtr ? *CachePtr : Tmp; 392 // Try to reuse a previous value. 393 if (CV[Frag]) 394 return CV[Frag]; 395 IRBuilder<> Builder(BB, BBI); 396 if (IsPointer) { 397 if (Frag == 0) 398 CV[Frag] = V; 399 else 400 CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag, 401 V->getName() + ".i" + Twine(Frag)); 402 return CV[Frag]; 403 } 404 405 Type *FragmentTy = VS.getFragmentType(Frag); 406 407 if (auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) { 408 SmallVector<int> Mask; 409 for (unsigned J = 0; J < VecTy->getNumElements(); ++J) 410 Mask.push_back(Frag * VS.NumPacked + J); 411 CV[Frag] = 412 Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask, 413 V->getName() + ".i" + Twine(Frag)); 414 } else { 415 // Search through a chain of InsertElementInsts looking for element Frag. 416 // Record other elements in the cache. The new V is still suitable 417 // for all uncached indices. 418 while (true) { 419 InsertElementInst *Insert = dyn_cast<InsertElementInst>(V); 420 if (!Insert) 421 break; 422 ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2)); 423 if (!Idx) 424 break; 425 unsigned J = Idx->getZExtValue(); 426 V = Insert->getOperand(0); 427 if (Frag * VS.NumPacked == J) { 428 CV[Frag] = Insert->getOperand(1); 429 return CV[Frag]; 430 } 431 432 if (VS.NumPacked == 1 && !CV[J]) { 433 // Only cache the first entry we find for each index we're not actively 434 // searching for. This prevents us from going too far up the chain and 435 // caching incorrect entries. 436 CV[J] = Insert->getOperand(1); 437 } 438 } 439 CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked, 440 V->getName() + ".i" + Twine(Frag)); 441 } 442 443 return CV[Frag]; 444 } 445 446 bool ScalarizerLegacyPass::runOnFunction(Function &F) { 447 if (skipFunction(F)) 448 return false; 449 450 Module &M = *F.getParent(); 451 unsigned ParallelLoopAccessMDKind = 452 M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); 453 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 454 ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, ScalarizerPassOptions()); 455 return Impl.visit(F); 456 } 457 458 FunctionPass *llvm::createScalarizerPass() { 459 return new ScalarizerLegacyPass(); 460 } 461 462 bool ScalarizerVisitor::visit(Function &F) { 463 assert(Gathered.empty() && Scattered.empty()); 464 465 Scalarized = false; 466 467 // To ensure we replace gathered components correctly we need to do an ordered 468 // traversal of the basic blocks in the function. 469 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock()); 470 for (BasicBlock *BB : RPOT) { 471 for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { 472 Instruction *I = &*II; 473 bool Done = InstVisitor::visit(I); 474 ++II; 475 if (Done && I->getType()->isVoidTy()) 476 I->eraseFromParent(); 477 } 478 } 479 return finish(); 480 } 481 482 // Return a scattered form of V that can be accessed by Point. V must be a 483 // vector or a pointer to a vector. 484 Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V, 485 const VectorSplit &VS) { 486 if (Argument *VArg = dyn_cast<Argument>(V)) { 487 // Put the scattered form of arguments in the entry block, 488 // so that it can be used everywhere. 489 Function *F = VArg->getParent(); 490 BasicBlock *BB = &F->getEntryBlock(); 491 return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]); 492 } 493 if (Instruction *VOp = dyn_cast<Instruction>(V)) { 494 // When scalarizing PHI nodes we might try to examine/rewrite InsertElement 495 // nodes in predecessors. If those predecessors are unreachable from entry, 496 // then the IR in those blocks could have unexpected properties resulting in 497 // infinite loops in Scatterer::operator[]. By simply treating values 498 // originating from instructions in unreachable blocks as undef we do not 499 // need to analyse them further. 500 if (!DT->isReachableFromEntry(VOp->getParent())) 501 return Scatterer(Point->getParent(), Point->getIterator(), 502 PoisonValue::get(V->getType()), VS); 503 // Put the scattered form of an instruction directly after the 504 // instruction, skipping over PHI nodes and debug intrinsics. 505 BasicBlock *BB = VOp->getParent(); 506 return Scatterer( 507 BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS, 508 &Scattered[{V, VS.SplitTy}]); 509 } 510 // In the fallback case, just put the scattered before Point and 511 // keep the result local to Point. 512 return Scatterer(Point->getParent(), Point->getIterator(), V, VS); 513 } 514 515 // Replace Op with the gathered form of the components in CV. Defer the 516 // deletion of Op and creation of the gathered form to the end of the pass, 517 // so that we can avoid creating the gathered form if all uses of Op are 518 // replaced with uses of CV. 519 void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV, 520 const VectorSplit &VS) { 521 transferMetadataAndIRFlags(Op, CV); 522 523 // If we already have a scattered form of Op (created from ExtractElements 524 // of Op itself), replace them with the new form. 525 ValueVector &SV = Scattered[{Op, VS.SplitTy}]; 526 if (!SV.empty()) { 527 for (unsigned I = 0, E = SV.size(); I != E; ++I) { 528 Value *V = SV[I]; 529 if (V == nullptr || SV[I] == CV[I]) 530 continue; 531 532 Instruction *Old = cast<Instruction>(V); 533 if (isa<Instruction>(CV[I])) 534 CV[I]->takeName(Old); 535 Old->replaceAllUsesWith(CV[I]); 536 PotentiallyDeadInstrs.emplace_back(Old); 537 } 538 } 539 SV = CV; 540 Gathered.push_back(GatherList::value_type(Op, &SV)); 541 } 542 543 // Replace Op with CV and collect Op has a potentially dead instruction. 544 void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) { 545 if (CV != Op) { 546 Op->replaceAllUsesWith(CV); 547 PotentiallyDeadInstrs.emplace_back(Op); 548 Scalarized = true; 549 } 550 } 551 552 // Return true if it is safe to transfer the given metadata tag from 553 // vector to scalar instructions. 554 bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) { 555 return (Tag == LLVMContext::MD_tbaa 556 || Tag == LLVMContext::MD_fpmath 557 || Tag == LLVMContext::MD_tbaa_struct 558 || Tag == LLVMContext::MD_invariant_load 559 || Tag == LLVMContext::MD_alias_scope 560 || Tag == LLVMContext::MD_noalias 561 || Tag == ParallelLoopAccessMDKind 562 || Tag == LLVMContext::MD_access_group); 563 } 564 565 // Transfer metadata from Op to the instructions in CV if it is known 566 // to be safe to do so. 567 void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op, 568 const ValueVector &CV) { 569 SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; 570 Op->getAllMetadataOtherThanDebugLoc(MDs); 571 for (unsigned I = 0, E = CV.size(); I != E; ++I) { 572 if (Instruction *New = dyn_cast<Instruction>(CV[I])) { 573 for (const auto &MD : MDs) 574 if (canTransferMetadata(MD.first)) 575 New->setMetadata(MD.first, MD.second); 576 New->copyIRFlags(Op); 577 if (Op->getDebugLoc() && !New->getDebugLoc()) 578 New->setDebugLoc(Op->getDebugLoc()); 579 } 580 } 581 } 582 583 // Determine how Ty is split, if at all. 584 std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) { 585 VectorSplit Split; 586 Split.VecTy = dyn_cast<FixedVectorType>(Ty); 587 if (!Split.VecTy) 588 return {}; 589 590 unsigned NumElems = Split.VecTy->getNumElements(); 591 Type *ElemTy = Split.VecTy->getElementType(); 592 593 if (NumElems == 1 || ElemTy->isPointerTy() || 594 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) { 595 Split.NumPacked = 1; 596 Split.NumFragments = NumElems; 597 Split.SplitTy = ElemTy; 598 } else { 599 Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits(); 600 if (Split.NumPacked >= NumElems) 601 return {}; 602 603 Split.NumFragments = divideCeil(NumElems, Split.NumPacked); 604 Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked); 605 606 unsigned RemainderElems = NumElems % Split.NumPacked; 607 if (RemainderElems > 1) 608 Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems); 609 else if (RemainderElems == 1) 610 Split.RemainderTy = ElemTy; 611 } 612 613 return Split; 614 } 615 616 // Try to fill in Layout from Ty, returning true on success. Alignment is 617 // the alignment of the vector, or std::nullopt if the ABI default should be 618 // used. 619 std::optional<VectorLayout> 620 ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment, 621 const DataLayout &DL) { 622 std::optional<VectorSplit> VS = getVectorSplit(Ty); 623 if (!VS) 624 return {}; 625 626 VectorLayout Layout; 627 Layout.VS = *VS; 628 // Check that we're dealing with full-byte fragments. 629 if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) || 630 (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy))) 631 return {}; 632 Layout.VecAlign = Alignment; 633 Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy); 634 return Layout; 635 } 636 637 // Scalarize one-operand instruction I, using Split(Builder, X, Name) 638 // to create an instruction like I with operand X and name Name. 639 template<typename Splitter> 640 bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) { 641 std::optional<VectorSplit> VS = getVectorSplit(I.getType()); 642 if (!VS) 643 return false; 644 645 std::optional<VectorSplit> OpVS; 646 if (I.getOperand(0)->getType() == I.getType()) { 647 OpVS = VS; 648 } else { 649 OpVS = getVectorSplit(I.getOperand(0)->getType()); 650 if (!OpVS || VS->NumPacked != OpVS->NumPacked) 651 return false; 652 } 653 654 IRBuilder<> Builder(&I); 655 Scatterer Op = scatter(&I, I.getOperand(0), *OpVS); 656 assert(Op.size() == VS->NumFragments && "Mismatched unary operation"); 657 ValueVector Res; 658 Res.resize(VS->NumFragments); 659 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) 660 Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag)); 661 gather(&I, Res, *VS); 662 return true; 663 } 664 665 // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name) 666 // to create an instruction like I with operands X and Y and name Name. 667 template<typename Splitter> 668 bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) { 669 std::optional<VectorSplit> VS = getVectorSplit(I.getType()); 670 if (!VS) 671 return false; 672 673 std::optional<VectorSplit> OpVS; 674 if (I.getOperand(0)->getType() == I.getType()) { 675 OpVS = VS; 676 } else { 677 OpVS = getVectorSplit(I.getOperand(0)->getType()); 678 if (!OpVS || VS->NumPacked != OpVS->NumPacked) 679 return false; 680 } 681 682 IRBuilder<> Builder(&I); 683 Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS); 684 Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS); 685 assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation"); 686 assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation"); 687 ValueVector Res; 688 Res.resize(VS->NumFragments); 689 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) { 690 Value *Op0 = VOp0[Frag]; 691 Value *Op1 = VOp1[Frag]; 692 Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag)); 693 } 694 gather(&I, Res, *VS); 695 return true; 696 } 697 698 static bool isTriviallyScalariable(Intrinsic::ID ID) { 699 return isTriviallyVectorizable(ID); 700 } 701 702 /// If a call to a vector typed intrinsic function, split into a scalar call per 703 /// element if possible for the intrinsic. 704 bool ScalarizerVisitor::splitCall(CallInst &CI) { 705 std::optional<VectorSplit> VS = getVectorSplit(CI.getType()); 706 if (!VS) 707 return false; 708 709 Function *F = CI.getCalledFunction(); 710 if (!F) 711 return false; 712 713 Intrinsic::ID ID = F->getIntrinsicID(); 714 if (ID == Intrinsic::not_intrinsic || !isTriviallyScalariable(ID)) 715 return false; 716 717 // unsigned NumElems = VT->getNumElements(); 718 unsigned NumArgs = CI.arg_size(); 719 720 ValueVector ScalarOperands(NumArgs); 721 SmallVector<Scatterer, 8> Scattered(NumArgs); 722 SmallVector<int> OverloadIdx(NumArgs, -1); 723 724 SmallVector<llvm::Type *, 3> Tys; 725 // Add return type if intrinsic is overloaded on it. 726 if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) 727 Tys.push_back(VS->SplitTy); 728 729 // Assumes that any vector type has the same number of elements as the return 730 // vector type, which is true for all current intrinsics. 731 for (unsigned I = 0; I != NumArgs; ++I) { 732 Value *OpI = CI.getOperand(I); 733 if (auto *OpVecTy = dyn_cast<FixedVectorType>(OpI->getType())) { 734 assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements()); 735 std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType()); 736 if (!OpVS || OpVS->NumPacked != VS->NumPacked) { 737 // The natural split of the operand doesn't match the result. This could 738 // happen if the vector elements are different and the ScalarizeMinBits 739 // option is used. 740 // 741 // We could in principle handle this case as well, at the cost of 742 // complicating the scattering machinery to support multiple scattering 743 // granularities for a single value. 744 return false; 745 } 746 747 Scattered[I] = scatter(&CI, OpI, *OpVS); 748 if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) { 749 OverloadIdx[I] = Tys.size(); 750 Tys.push_back(OpVS->SplitTy); 751 } 752 } else { 753 ScalarOperands[I] = OpI; 754 if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) 755 Tys.push_back(OpI->getType()); 756 } 757 } 758 759 ValueVector Res(VS->NumFragments); 760 ValueVector ScalarCallOps(NumArgs); 761 762 Function *NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys); 763 IRBuilder<> Builder(&CI); 764 765 // Perform actual scalarization, taking care to preserve any scalar operands. 766 for (unsigned I = 0; I < VS->NumFragments; ++I) { 767 bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy; 768 ScalarCallOps.clear(); 769 770 if (IsRemainder) 771 Tys[0] = VS->RemainderTy; 772 773 for (unsigned J = 0; J != NumArgs; ++J) { 774 if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) { 775 ScalarCallOps.push_back(ScalarOperands[J]); 776 } else { 777 ScalarCallOps.push_back(Scattered[J][I]); 778 if (IsRemainder && OverloadIdx[J] >= 0) 779 Tys[OverloadIdx[J]] = Scattered[J][I]->getType(); 780 } 781 } 782 783 if (IsRemainder) 784 NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys); 785 786 Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps, 787 CI.getName() + ".i" + Twine(I)); 788 } 789 790 gather(&CI, Res, *VS); 791 return true; 792 } 793 794 bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) { 795 std::optional<VectorSplit> VS = getVectorSplit(SI.getType()); 796 if (!VS) 797 return false; 798 799 std::optional<VectorSplit> CondVS; 800 if (isa<FixedVectorType>(SI.getCondition()->getType())) { 801 CondVS = getVectorSplit(SI.getCondition()->getType()); 802 if (!CondVS || CondVS->NumPacked != VS->NumPacked) { 803 // This happens when ScalarizeMinBits is used. 804 return false; 805 } 806 } 807 808 IRBuilder<> Builder(&SI); 809 Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS); 810 Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS); 811 assert(VOp1.size() == VS->NumFragments && "Mismatched select"); 812 assert(VOp2.size() == VS->NumFragments && "Mismatched select"); 813 ValueVector Res; 814 Res.resize(VS->NumFragments); 815 816 if (CondVS) { 817 Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS); 818 assert(VOp0.size() == CondVS->NumFragments && "Mismatched select"); 819 for (unsigned I = 0; I < VS->NumFragments; ++I) { 820 Value *Op0 = VOp0[I]; 821 Value *Op1 = VOp1[I]; 822 Value *Op2 = VOp2[I]; 823 Res[I] = Builder.CreateSelect(Op0, Op1, Op2, 824 SI.getName() + ".i" + Twine(I)); 825 } 826 } else { 827 Value *Op0 = SI.getOperand(0); 828 for (unsigned I = 0; I < VS->NumFragments; ++I) { 829 Value *Op1 = VOp1[I]; 830 Value *Op2 = VOp2[I]; 831 Res[I] = Builder.CreateSelect(Op0, Op1, Op2, 832 SI.getName() + ".i" + Twine(I)); 833 } 834 } 835 gather(&SI, Res, *VS); 836 return true; 837 } 838 839 bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) { 840 return splitBinary(ICI, ICmpSplitter(ICI)); 841 } 842 843 bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) { 844 return splitBinary(FCI, FCmpSplitter(FCI)); 845 } 846 847 bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) { 848 return splitUnary(UO, UnarySplitter(UO)); 849 } 850 851 bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) { 852 return splitBinary(BO, BinarySplitter(BO)); 853 } 854 855 bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { 856 std::optional<VectorSplit> VS = getVectorSplit(GEPI.getType()); 857 if (!VS) 858 return false; 859 860 IRBuilder<> Builder(&GEPI); 861 unsigned NumIndices = GEPI.getNumIndices(); 862 863 // The base pointer and indices might be scalar even if it's a vector GEP. 864 SmallVector<Value *, 8> ScalarOps{1 + NumIndices}; 865 SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices}; 866 867 for (unsigned I = 0; I < 1 + NumIndices; ++I) { 868 if (auto *VecTy = 869 dyn_cast<FixedVectorType>(GEPI.getOperand(I)->getType())) { 870 std::optional<VectorSplit> OpVS = getVectorSplit(VecTy); 871 if (!OpVS || OpVS->NumPacked != VS->NumPacked) { 872 // This can happen when ScalarizeMinBits is used. 873 return false; 874 } 875 ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS); 876 } else { 877 ScalarOps[I] = GEPI.getOperand(I); 878 } 879 } 880 881 ValueVector Res; 882 Res.resize(VS->NumFragments); 883 for (unsigned I = 0; I < VS->NumFragments; ++I) { 884 SmallVector<Value *, 8> SplitOps; 885 SplitOps.resize(1 + NumIndices); 886 for (unsigned J = 0; J < 1 + NumIndices; ++J) { 887 if (ScalarOps[J]) 888 SplitOps[J] = ScalarOps[J]; 889 else 890 SplitOps[J] = ScatterOps[J][I]; 891 } 892 Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0], 893 ArrayRef(SplitOps).drop_front(), 894 GEPI.getName() + ".i" + Twine(I)); 895 if (GEPI.isInBounds()) 896 if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I])) 897 NewGEPI->setIsInBounds(); 898 } 899 gather(&GEPI, Res, *VS); 900 return true; 901 } 902 903 bool ScalarizerVisitor::visitCastInst(CastInst &CI) { 904 std::optional<VectorSplit> DestVS = getVectorSplit(CI.getDestTy()); 905 if (!DestVS) 906 return false; 907 908 std::optional<VectorSplit> SrcVS = getVectorSplit(CI.getSrcTy()); 909 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked) 910 return false; 911 912 IRBuilder<> Builder(&CI); 913 Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS); 914 assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast"); 915 ValueVector Res; 916 Res.resize(DestVS->NumFragments); 917 for (unsigned I = 0; I < DestVS->NumFragments; ++I) 918 Res[I] = 919 Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I), 920 CI.getName() + ".i" + Twine(I)); 921 gather(&CI, Res, *DestVS); 922 return true; 923 } 924 925 bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) { 926 std::optional<VectorSplit> DstVS = getVectorSplit(BCI.getDestTy()); 927 std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.getSrcTy()); 928 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy) 929 return false; 930 931 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy(); 932 933 // Vectors of pointers are always fully scalarized. 934 assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1)); 935 936 IRBuilder<> Builder(&BCI); 937 Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS); 938 ValueVector Res; 939 Res.resize(DstVS->NumFragments); 940 941 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits(); 942 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits(); 943 944 if (isPointerTy || DstSplitBits == SrcSplitBits) { 945 assert(DstVS->NumFragments == SrcVS->NumFragments); 946 for (unsigned I = 0; I < DstVS->NumFragments; ++I) { 947 Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I), 948 BCI.getName() + ".i" + Twine(I)); 949 } 950 } else if (SrcSplitBits % DstSplitBits == 0) { 951 // Convert each source fragment to the same-sized destination vector and 952 // then scatter the result to the destination. 953 VectorSplit MidVS; 954 MidVS.NumPacked = DstVS->NumPacked; 955 MidVS.NumFragments = SrcSplitBits / DstSplitBits; 956 MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(), 957 MidVS.NumPacked * MidVS.NumFragments); 958 MidVS.SplitTy = DstVS->SplitTy; 959 960 unsigned ResI = 0; 961 for (unsigned I = 0; I < SrcVS->NumFragments; ++I) { 962 Value *V = Op0[I]; 963 964 // Look through any existing bitcasts before converting to <N x t2>. 965 // In the best case, the resulting conversion might be a no-op. 966 Instruction *VI; 967 while ((VI = dyn_cast<Instruction>(V)) && 968 VI->getOpcode() == Instruction::BitCast) 969 V = VI->getOperand(0); 970 971 V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast"); 972 973 Scatterer Mid = scatter(&BCI, V, MidVS); 974 for (unsigned J = 0; J < MidVS.NumFragments; ++J) 975 Res[ResI++] = Mid[J]; 976 } 977 } else if (DstSplitBits % SrcSplitBits == 0) { 978 // Gather enough source fragments to make up a destination fragment and 979 // then convert to the destination type. 980 VectorSplit MidVS; 981 MidVS.NumFragments = DstSplitBits / SrcSplitBits; 982 MidVS.NumPacked = SrcVS->NumPacked; 983 MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(), 984 MidVS.NumPacked * MidVS.NumFragments); 985 MidVS.SplitTy = SrcVS->SplitTy; 986 987 unsigned SrcI = 0; 988 SmallVector<Value *, 8> ConcatOps; 989 ConcatOps.resize(MidVS.NumFragments); 990 for (unsigned I = 0; I < DstVS->NumFragments; ++I) { 991 for (unsigned J = 0; J < MidVS.NumFragments; ++J) 992 ConcatOps[J] = Op0[SrcI++]; 993 Value *V = concatenate(Builder, ConcatOps, MidVS, 994 BCI.getName() + ".i" + Twine(I)); 995 Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I), 996 BCI.getName() + ".i" + Twine(I)); 997 } 998 } else { 999 return false; 1000 } 1001 1002 gather(&BCI, Res, *DstVS); 1003 return true; 1004 } 1005 1006 bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) { 1007 std::optional<VectorSplit> VS = getVectorSplit(IEI.getType()); 1008 if (!VS) 1009 return false; 1010 1011 IRBuilder<> Builder(&IEI); 1012 Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS); 1013 Value *NewElt = IEI.getOperand(1); 1014 Value *InsIdx = IEI.getOperand(2); 1015 1016 ValueVector Res; 1017 Res.resize(VS->NumFragments); 1018 1019 if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) { 1020 unsigned Idx = CI->getZExtValue(); 1021 unsigned Fragment = Idx / VS->NumPacked; 1022 for (unsigned I = 0; I < VS->NumFragments; ++I) { 1023 if (I == Fragment) { 1024 bool IsPacked = VS->NumPacked > 1; 1025 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy && 1026 !VS->RemainderTy->isVectorTy()) 1027 IsPacked = false; 1028 if (IsPacked) { 1029 Res[I] = 1030 Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked); 1031 } else { 1032 Res[I] = NewElt; 1033 } 1034 } else { 1035 Res[I] = Op0[I]; 1036 } 1037 } 1038 } else { 1039 // Never split a variable insertelement that isn't fully scalarized. 1040 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1) 1041 return false; 1042 1043 for (unsigned I = 0; I < VS->NumFragments; ++I) { 1044 Value *ShouldReplace = 1045 Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I), 1046 InsIdx->getName() + ".is." + Twine(I)); 1047 Value *OldElt = Op0[I]; 1048 Res[I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt, 1049 IEI.getName() + ".i" + Twine(I)); 1050 } 1051 } 1052 1053 gather(&IEI, Res, *VS); 1054 return true; 1055 } 1056 1057 bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { 1058 std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType()); 1059 if (!VS) 1060 return false; 1061 1062 IRBuilder<> Builder(&EEI); 1063 Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS); 1064 Value *ExtIdx = EEI.getOperand(1); 1065 1066 if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) { 1067 unsigned Idx = CI->getZExtValue(); 1068 unsigned Fragment = Idx / VS->NumPacked; 1069 Value *Res = Op0[Fragment]; 1070 bool IsPacked = VS->NumPacked > 1; 1071 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy && 1072 !VS->RemainderTy->isVectorTy()) 1073 IsPacked = false; 1074 if (IsPacked) 1075 Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked); 1076 replaceUses(&EEI, Res); 1077 return true; 1078 } 1079 1080 // Never split a variable extractelement that isn't fully scalarized. 1081 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1) 1082 return false; 1083 1084 Value *Res = PoisonValue::get(VS->VecTy->getElementType()); 1085 for (unsigned I = 0; I < VS->NumFragments; ++I) { 1086 Value *ShouldExtract = 1087 Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I), 1088 ExtIdx->getName() + ".is." + Twine(I)); 1089 Value *Elt = Op0[I]; 1090 Res = Builder.CreateSelect(ShouldExtract, Elt, Res, 1091 EEI.getName() + ".upto" + Twine(I)); 1092 } 1093 replaceUses(&EEI, Res); 1094 return true; 1095 } 1096 1097 bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 1098 std::optional<VectorSplit> VS = getVectorSplit(SVI.getType()); 1099 std::optional<VectorSplit> VSOp = 1100 getVectorSplit(SVI.getOperand(0)->getType()); 1101 if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1) 1102 return false; 1103 1104 Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp); 1105 Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp); 1106 ValueVector Res; 1107 Res.resize(VS->NumFragments); 1108 1109 for (unsigned I = 0; I < VS->NumFragments; ++I) { 1110 int Selector = SVI.getMaskValue(I); 1111 if (Selector < 0) 1112 Res[I] = PoisonValue::get(VS->VecTy->getElementType()); 1113 else if (unsigned(Selector) < Op0.size()) 1114 Res[I] = Op0[Selector]; 1115 else 1116 Res[I] = Op1[Selector - Op0.size()]; 1117 } 1118 gather(&SVI, Res, *VS); 1119 return true; 1120 } 1121 1122 bool ScalarizerVisitor::visitPHINode(PHINode &PHI) { 1123 std::optional<VectorSplit> VS = getVectorSplit(PHI.getType()); 1124 if (!VS) 1125 return false; 1126 1127 IRBuilder<> Builder(&PHI); 1128 ValueVector Res; 1129 Res.resize(VS->NumFragments); 1130 1131 unsigned NumOps = PHI.getNumOperands(); 1132 for (unsigned I = 0; I < VS->NumFragments; ++I) { 1133 Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps, 1134 PHI.getName() + ".i" + Twine(I)); 1135 } 1136 1137 for (unsigned I = 0; I < NumOps; ++I) { 1138 Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS); 1139 BasicBlock *IncomingBlock = PHI.getIncomingBlock(I); 1140 for (unsigned J = 0; J < VS->NumFragments; ++J) 1141 cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock); 1142 } 1143 gather(&PHI, Res, *VS); 1144 return true; 1145 } 1146 1147 bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) { 1148 if (!ScalarizeLoadStore) 1149 return false; 1150 if (!LI.isSimple()) 1151 return false; 1152 1153 std::optional<VectorLayout> Layout = getVectorLayout( 1154 LI.getType(), LI.getAlign(), LI.getModule()->getDataLayout()); 1155 if (!Layout) 1156 return false; 1157 1158 IRBuilder<> Builder(&LI); 1159 Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS); 1160 ValueVector Res; 1161 Res.resize(Layout->VS.NumFragments); 1162 1163 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) { 1164 Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I], 1165 Align(Layout->getFragmentAlign(I)), 1166 LI.getName() + ".i" + Twine(I)); 1167 } 1168 gather(&LI, Res, Layout->VS); 1169 return true; 1170 } 1171 1172 bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) { 1173 if (!ScalarizeLoadStore) 1174 return false; 1175 if (!SI.isSimple()) 1176 return false; 1177 1178 Value *FullValue = SI.getValueOperand(); 1179 std::optional<VectorLayout> Layout = getVectorLayout( 1180 FullValue->getType(), SI.getAlign(), SI.getModule()->getDataLayout()); 1181 if (!Layout) 1182 return false; 1183 1184 IRBuilder<> Builder(&SI); 1185 Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS); 1186 Scatterer VVal = scatter(&SI, FullValue, Layout->VS); 1187 1188 ValueVector Stores; 1189 Stores.resize(Layout->VS.NumFragments); 1190 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) { 1191 Value *Val = VVal[I]; 1192 Value *Ptr = VPtr[I]; 1193 Stores[I] = 1194 Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I)); 1195 } 1196 transferMetadataAndIRFlags(&SI, Stores); 1197 return true; 1198 } 1199 1200 bool ScalarizerVisitor::visitCallInst(CallInst &CI) { 1201 return splitCall(CI); 1202 } 1203 1204 bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) { 1205 return splitUnary(FI, [](IRBuilder<> &Builder, Value *Op, const Twine &Name) { 1206 return Builder.CreateFreeze(Op, Name); 1207 }); 1208 } 1209 1210 // Delete the instructions that we scalarized. If a full vector result 1211 // is still needed, recreate it using InsertElements. 1212 bool ScalarizerVisitor::finish() { 1213 // The presence of data in Gathered or Scattered indicates changes 1214 // made to the Function. 1215 if (Gathered.empty() && Scattered.empty() && !Scalarized) 1216 return false; 1217 for (const auto &GMI : Gathered) { 1218 Instruction *Op = GMI.first; 1219 ValueVector &CV = *GMI.second; 1220 if (!Op->use_empty()) { 1221 // The value is still needed, so recreate it using a series of 1222 // insertelements and/or shufflevectors. 1223 Value *Res; 1224 if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) { 1225 BasicBlock *BB = Op->getParent(); 1226 IRBuilder<> Builder(Op); 1227 if (isa<PHINode>(Op)) 1228 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 1229 1230 VectorSplit VS = *getVectorSplit(Ty); 1231 assert(VS.NumFragments == CV.size()); 1232 1233 Res = concatenate(Builder, CV, VS, Op->getName()); 1234 1235 Res->takeName(Op); 1236 } else { 1237 assert(CV.size() == 1 && Op->getType() == CV[0]->getType()); 1238 Res = CV[0]; 1239 if (Op == Res) 1240 continue; 1241 } 1242 Op->replaceAllUsesWith(Res); 1243 } 1244 PotentiallyDeadInstrs.emplace_back(Op); 1245 } 1246 Gathered.clear(); 1247 Scattered.clear(); 1248 Scalarized = false; 1249 1250 RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs); 1251 1252 return true; 1253 } 1254 1255 PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) { 1256 Module &M = *F.getParent(); 1257 unsigned ParallelLoopAccessMDKind = 1258 M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); 1259 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1260 ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, Options); 1261 bool Changed = Impl.visit(F); 1262 PreservedAnalyses PA; 1263 PA.preserve<DominatorTreeAnalysis>(); 1264 return Changed ? PA : PreservedAnalyses::all(); 1265 } 1266