Lines Matching +full:f +full:- +full:tile
1 //===-- X86LowerAMXIntrinsics.cpp -X86 Scalarize AMX Intrinsics------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This pass is always enabled and it skips when it is not -O0 and has no
11 /// optnone attributes. With -O0 or optnone attribute, the def of shape to amx
13 /// point which post-dominate all the shape and dominate all amx intrinsics.
17 //===----------------------------------------------------------------------===//
45 #define DEBUG_TYPE "lower-amx-intrinsics"
50 return FVT->getNumElements() == 256 && in isV256I32Ty()
51 FVT->getElementType()->isIntegerTy(32); in isV256I32Ty()
57 X86ScalarizeAMX("enable-x86-scalar-amx", cl::init(false), cl::Hidden,
65 X86LowerAMXIntrinsics(Function &F, DomTreeUpdater &DomTU, LoopInfo *LoopI) in X86LowerAMXIntrinsics() argument
66 : Func(F), DTU(DomTU), LI(LoopI) {} in X86LowerAMXIntrinsics()
78 Value *Ptr, Value *Stride, Value *Tile);
107 LLVMContext &Ctx = Preheader->getContext(); in createLoop()
109 BasicBlock::Create(Ctx, Name + ".header", Preheader->getParent(), Exit); in createLoop()
111 BasicBlock::Create(Ctx, Name + ".body", Header->getParent(), Exit); in createLoop()
113 BasicBlock::Create(Ctx, Name + ".latch", Header->getParent(), Exit); in createLoop()
119 PHINode::Create(I16Ty, 2, Name + ".iv", Header->getTerminator()->getIterator()); in createLoop()
120 IV->addIncoming(ConstantInt::get(I16Ty, 0), Preheader); in createLoop()
126 IV->addIncoming(Inc, Latch); in createLoop()
128 BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator()); in createLoop()
129 BasicBlock *Tmp = PreheaderBr->getSuccessor(0); in createLoop()
130 PreheaderBr->setSuccessor(0, Header); in createLoop()
140 L->addBasicBlockToLoop(Header, *LI); in createLoop()
141 L->addBasicBlockToLoop(Body, *LI); in createLoop()
142 L->addBasicBlockToLoop(Latch, *LI); in createLoop()
150 Value *Col, Value *Ptr, Value *Stride, Value *Tile) { in createTileLoadStoreLoops() argument
155 RowLoop = LI->AllocateLoop(); in createTileLoadStoreLoops()
156 ColLoop = LI->AllocateLoop(); in createTileLoadStoreLoops()
157 RowLoop->addChildLoop(ColLoop); in createTileLoadStoreLoops()
158 if (Loop *ParentL = LI->getLoopFor(Start)) in createTileLoadStoreLoops()
159 ParentL->addChildLoop(RowLoop); in createTileLoadStoreLoops()
161 LI->addTopLevelLoop(RowLoop); in createTileLoadStoreLoops()
166 BasicBlock *RowLatch = RowBody->getSingleSuccessor(); in createTileLoadStoreLoops()
171 BasicBlock *ColLoopLatch = ColBody->getSingleSuccessor(); in createTileLoadStoreLoops()
172 BasicBlock *ColLoopHeader = ColBody->getSinglePredecessor(); in createTileLoadStoreLoops()
173 BasicBlock *RowLoopHeader = RowBody->getSinglePredecessor(); in createTileLoadStoreLoops()
174 Value *CurrentRow = &*RowLoopHeader->begin(); in createTileLoadStoreLoops()
175 Value *CurrentCol = &*ColLoopHeader->begin(); in createTileLoadStoreLoops()
182 B.SetInsertPoint(ColBody->getTerminator()); in createTileLoadStoreLoops()
183 Value *CurrentRowZExt = B.CreateZExt(CurrentRow, Stride->getType()); in createTileLoadStoreLoops()
184 Value *CurrentColZExt = B.CreateZExt(CurrentCol, Stride->getType()); in createTileLoadStoreLoops()
193 B.SetInsertPoint(RowLoopHeader->getTerminator()); in createTileLoadStoreLoops()
196 VecCPhiRowLoop->addIncoming(VecZero, Start); in createTileLoadStoreLoops()
201 B.SetInsertPoint(ColLoopHeader->getTerminator()); in createTileLoadStoreLoops()
203 VecPhi->addIncoming(VecCPhiRowLoop, RowBody); in createTileLoadStoreLoops()
210 B.SetInsertPoint(ColBody->getTerminator()); in createTileLoadStoreLoops()
213 VecPhi->addIncoming(ResVec, ColLoopLatch); in createTileLoadStoreLoops()
214 VecCPhiRowLoop->addIncoming(ResVec, RowLatch); in createTileLoadStoreLoops()
218 auto *BitCast = cast<BitCastInst>(Tile); in createTileLoadStoreLoops()
219 Value *Vec = BitCast->getOperand(0); in createTileLoadStoreLoops()
220 assert(isV256I32Ty(Vec->getType()) && "bitcast from non-v256i32 to x86amx"); in createTileLoadStoreLoops()
226 B.SetInsertPoint(ColBody->getTerminator()); in createTileLoadStoreLoops()
267 RowLoop = LI->AllocateLoop(); in createTileDPLoops()
268 ColLoop = LI->AllocateLoop(); in createTileDPLoops()
269 InnerLoop = LI->AllocateLoop(); in createTileDPLoops()
270 ColLoop->addChildLoop(InnerLoop); in createTileDPLoops()
271 RowLoop->addChildLoop(ColLoop); in createTileDPLoops()
272 if (Loop *ParentL = LI->getLoopFor(Start)) in createTileDPLoops()
273 ParentL->addChildLoop(RowLoop); in createTileDPLoops()
275 LI->addTopLevelLoop(RowLoop); in createTileDPLoops()
280 BasicBlock *RowLatch = RowBody->getSingleSuccessor(); in createTileDPLoops()
285 BasicBlock *ColLoopLatch = ColBody->getSingleSuccessor(); in createTileDPLoops()
287 B.SetInsertPoint(ColBody->getTerminator()); in createTileDPLoops()
292 BasicBlock *ColLoopHeader = ColBody->getSinglePredecessor(); in createTileDPLoops()
293 BasicBlock *RowLoopHeader = RowBody->getSinglePredecessor(); in createTileDPLoops()
294 BasicBlock *InnerLoopHeader = InnerBody->getSinglePredecessor(); in createTileDPLoops()
295 BasicBlock *InnerLoopLatch = InnerBody->getSingleSuccessor(); in createTileDPLoops()
296 Value *CurrentRow = &*RowLoopHeader->begin(); in createTileDPLoops()
297 Value *CurrentCol = &*ColLoopHeader->begin(); in createTileDPLoops()
298 Value *CurrentInner = &*InnerLoopHeader->begin(); in createTileDPLoops()
302 Value *VecC = BitCastAcc->getOperand(0); in createTileDPLoops()
303 assert(isV256I32Ty(VecC->getType()) && "bitcast from non-v256i32 to x86amx"); in createTileDPLoops()
306 // to vector. However with -O0, it doesn't happen. in createTileDPLoops()
308 Value *VecA = BitCastLHS->getOperand(0); in createTileDPLoops()
309 assert(isV256I32Ty(VecA->getType()) && "bitcast from non-v256i32 to x86amx"); in createTileDPLoops()
311 Value *VecB = BitCastRHS->getOperand(0); in createTileDPLoops()
312 assert(isV256I32Ty(VecB->getType()) && "bitcast from non-v256i32 to x86amx"); in createTileDPLoops()
320 B.SetInsertPoint(RowLoopHeader->getTerminator()); in createTileDPLoops()
322 VecCPhiRowLoop->addIncoming(VecC, Start); in createTileDPLoops()
325 VecDPhiRowLoop->addIncoming(VecZero, Start); in createTileDPLoops()
337 B.SetInsertPoint(ColLoopHeader->getTerminator()); in createTileDPLoops()
339 VecCPhiColLoop->addIncoming(VecCPhiRowLoop, RowBody); in createTileDPLoops()
341 VecDPhiColLoop->addIncoming(VecDPhiRowLoop, RowBody); in createTileDPLoops()
350 B.SetInsertPoint(InnerLoopHeader->getTerminator()); in createTileDPLoops()
352 VecCPhi->addIncoming(VecCPhiColLoop, ColBody); in createTileDPLoops()
354 B.SetInsertPoint(InnerBody->getTerminator()); in createTileDPLoops()
455 B.SetInsertPoint(ColLoopLatch->getTerminator()); in createTileDPLoops()
459 VecCPhi->addIncoming(NewVecC, InnerLoopLatch); in createTileDPLoops()
460 VecCPhiRowLoop->addIncoming(NewVecC, RowLatch); in createTileDPLoops()
461 VecCPhiColLoop->addIncoming(NewVecC, ColLoopLatch); in createTileDPLoops()
462 VecDPhiRowLoop->addIncoming(NewVecD, RowLatch); in createTileDPLoops()
463 VecDPhiColLoop->addIncoming(NewVecD, ColLoopLatch); in createTileDPLoops()
487 BasicBlock *Start = InsertI->getParent(); in lowerTileDP()
489 SplitBlock(InsertI->getParent(), InsertI, &DTU, LI, nullptr, "continue"); in lowerTileDP()
495 Builder.SetInsertPoint(End, End->getFirstNonPHIIt()); in lowerTileDP()
498 // Delete TileDP intrinsic and do some clean-up. in lowerTileDP()
499 for (Use &U : llvm::make_early_inc_range(TileDP->uses())) { in lowerTileDP()
503 I->replaceAllUsesWith(ResVec); in lowerTileDP()
504 I->eraseFromParent(); in lowerTileDP()
507 TileDP->replaceAllUsesWith(ResAMX); in lowerTileDP()
508 TileDP->eraseFromParent(); in lowerTileDP()
514 Value *M, *N, *Ptr, *Stride, *Tile; in lowerTileLoadStore() local
522 m_Value(Stride), m_Value(Tile))); in lowerTileLoadStore()
529 BasicBlock *Start = InsertI->getParent(); in lowerTileLoadStore()
531 SplitBlock(InsertI->getParent(), InsertI, &DTU, LI, nullptr, "continue"); in lowerTileLoadStore()
535 IsTileLoad ? nullptr : Tile); in lowerTileLoadStore()
539 Builder.SetInsertPoint(End, End->getFirstNonPHIIt()); in lowerTileLoadStore()
542 // Delete tileloadd6 intrinsic and do some clean-up in lowerTileLoadStore()
543 for (Use &U : llvm::make_early_inc_range(TileLoadStore->uses())) { in lowerTileLoadStore()
547 I->replaceAllUsesWith(ResVec); in lowerTileLoadStore()
548 I->eraseFromParent(); in lowerTileLoadStore()
551 TileLoadStore->replaceAllUsesWith(ResAMX); in lowerTileLoadStore()
553 TileLoadStore->eraseFromParent(); in lowerTileLoadStore()
561 for (Use &U : llvm::make_early_inc_range(TileZero->uses())) { in lowerTileZero()
565 I->replaceAllUsesWith(VecZero); in lowerTileZero()
566 I->eraseFromParent(); in lowerTileZero()
569 TileZero->eraseFromParent(); in lowerTileZero()
577 for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { in visit()
579 switch (Inst->getIntrinsicID()) { in visit()
598 switch (Inst->getIntrinsicID()) { in visit()
641 bool runOnFunction(Function &F) override { in runOnFunction() argument
645 if (!F.hasFnAttribute(Attribute::OptimizeNone) && in runOnFunction()
646 TM->getOptLevel() != CodeGenOptLevel::None) in runOnFunction()
650 auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; in runOnFunction()
652 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; in runOnFunction()
655 X86LowerAMXIntrinsics LAT(F, DTU, LI); in runOnFunction()