Lines Matching +full:256 +full:b

50     return FVT->getNumElements() == 256 &&  in isV256I32Ty()
73 Value *Step, StringRef Name, IRBuilderBase &B,
77 IRBuilderBase &B, Value *Row, Value *Col,
86 createTileDPLoops(BasicBlock *Start, BasicBlock *End, IRBuilderBase &B,
106 IRBuilderBase &B, Loop *L) { in createLoop() argument
122 B.SetInsertPoint(Latch); in createLoop()
123 Value *Inc = B.CreateAdd(IV, Step, Name + ".step"); in createLoop()
124 Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond"); in createLoop()
149 BasicBlock *Start, BasicBlock *End, IRBuilderBase &B, Value *Row, in createTileLoadStoreLoops() argument
164 BasicBlock *RowBody = createLoop(Start, End, Row, B.getInt16(1), in createTileLoadStoreLoops()
165 IntrinName + ".scalarize.rows", B, RowLoop); in createTileLoadStoreLoops()
168 BasicBlock *ColBody = createLoop(RowBody, RowLatch, Col, B.getInt16(1), in createTileLoadStoreLoops()
169 IntrinName + ".scalarize.cols", B, ColLoop); in createTileLoadStoreLoops()
176 Type *EltTy = B.getInt32Ty(); in createTileLoadStoreLoops()
177 FixedVectorType *V256I32Ty = FixedVectorType::get(EltTy, 256); in createTileLoadStoreLoops()
182 B.SetInsertPoint(ColBody->getTerminator()); in createTileLoadStoreLoops()
183 Value *CurrentRowZExt = B.CreateZExt(CurrentRow, Stride->getType()); in createTileLoadStoreLoops()
184 Value *CurrentColZExt = B.CreateZExt(CurrentCol, Stride->getType()); in createTileLoadStoreLoops()
186 B.CreateAdd(B.CreateMul(CurrentRowZExt, Stride), CurrentColZExt); in createTileLoadStoreLoops()
187 Value *EltPtr = B.CreateGEP(EltTy, Ptr, Offset); in createTileLoadStoreLoops()
188 Value *Idx = B.CreateAdd(B.CreateMul(CurrentRow, B.getInt16(16)), CurrentCol); in createTileLoadStoreLoops()
191 // %vec.phi.row = phi <256 x i32> [ zeroinitializer, %entry ], [ %ResVec, in createTileLoadStoreLoops()
193 B.SetInsertPoint(RowLoopHeader->getTerminator()); in createTileLoadStoreLoops()
195 PHINode *VecCPhiRowLoop = B.CreatePHI(V256I32Ty, 2, "vec.phi.row"); in createTileLoadStoreLoops()
199 // %vec.phi = phi <256 x i32> [ %vec.phi.row, %tileload.scalarize.rows.body in createTileLoadStoreLoops()
201 B.SetInsertPoint(ColLoopHeader->getTerminator()); in createTileLoadStoreLoops()
202 PHINode *VecPhi = B.CreatePHI(V256I32Ty, 2, "vec.phi"); in createTileLoadStoreLoops()
209 // %ResVec = insertelement <256 x i32> %vec.phi, i32 %elt, i16 %idxvec in createTileLoadStoreLoops()
210 B.SetInsertPoint(ColBody->getTerminator()); in createTileLoadStoreLoops()
211 Value *Elt = B.CreateLoad(EltTy, EltPtr); in createTileLoadStoreLoops()
212 Value *ResVec = B.CreateInsertElement(VecPhi, Elt, Idx); in createTileLoadStoreLoops()
226 B.SetInsertPoint(ColBody->getTerminator()); in createTileLoadStoreLoops()
227 Value *Elt = B.CreateExtractElement(Vec, Idx); in createTileLoadStoreLoops()
229 B.CreateStore(Elt, EltPtr); in createTileLoadStoreLoops()
242 IRBuilderBase &B, Value *Row, in createTileDPLoops() argument
278 BasicBlock *RowBody = createLoop(Start, End, Row, B.getInt16(1), in createTileDPLoops()
279 IntrinName + ".scalarize.rows", B, RowLoop); in createTileDPLoops()
282 BasicBlock *ColBody = createLoop(RowBody, RowLatch, Col, B.getInt16(1), in createTileDPLoops()
283 IntrinName + ".scalarize.cols", B, ColLoop); in createTileDPLoops()
287 B.SetInsertPoint(ColBody->getTerminator()); in createTileDPLoops()
289 createLoop(ColBody, ColLoopLatch, K, B.getInt16(1), in createTileDPLoops()
290 IntrinName + ".scalarize.inner", B, InnerLoop); in createTileDPLoops()
300 FixedVectorType *V256I32Ty = FixedVectorType::get(B.getInt32Ty(), 256); in createTileDPLoops()
315 // %vec.c.phi.row = phi <256 x i32> [ %VecC, %continue ], [ %NewVecC, in createTileDPLoops()
318 // %vec.d.phi.row = phi <256 x i32> [ zeroinitializer, %continue ], [ in createTileDPLoops()
320 B.SetInsertPoint(RowLoopHeader->getTerminator()); in createTileDPLoops()
321 PHINode *VecCPhiRowLoop = B.CreatePHI(V256I32Ty, 2, "vec.c.phi.row"); in createTileDPLoops()
324 PHINode *VecDPhiRowLoop = B.CreatePHI(V256I32Ty, 2, "vec.d.phi.row"); in createTileDPLoops()
328 // %vec.c.phi.col = phi <256 x i32> [ %vec.c.phi.row, in createTileDPLoops()
332 // %vec.d.phi.col = phi <256 x i32> [ in createTileDPLoops()
337 B.SetInsertPoint(ColLoopHeader->getTerminator()); in createTileDPLoops()
338 PHINode *VecCPhiColLoop = B.CreatePHI(V256I32Ty, 2, "vec.c.phi.col"); in createTileDPLoops()
340 PHINode *VecDPhiColLoop = B.CreatePHI(V256I32Ty, 2, "vec.d.phi.col"); in createTileDPLoops()
343 B.CreateAdd(B.CreateMul(CurrentRow, B.getInt16(16)), CurrentCol); in createTileDPLoops()
346 // %vec.c.inner.phi = phi <256 x i32> [ %vec.c.phi.col, in createTileDPLoops()
350 B.SetInsertPoint(InnerLoopHeader->getTerminator()); in createTileDPLoops()
351 PHINode *VecCPhi = B.CreatePHI(V256I32Ty, 2, "vec.c.inner.phi"); in createTileDPLoops()
354 B.SetInsertPoint(InnerBody->getTerminator()); in createTileDPLoops()
356 B.CreateAdd(B.CreateMul(CurrentRow, B.getInt16(16)), CurrentInner); in createTileDPLoops()
358 B.CreateAdd(B.CreateMul(CurrentInner, B.getInt16(16)), CurrentCol); in createTileDPLoops()
364 // %eltc = extractelement <256 x i32> %vec.c.inner.phi, i16 %idxc in createTileDPLoops()
365 // %elta = extractelement <256 x i32> %veca, i16 %idxa in createTileDPLoops()
367 // %eltb = extractelement <256 x i32> %vecb, i16 %idxb in createTileDPLoops()
374 // %NewVecC = insertelement <256 x i32> %vec.c.inner.phi, i32 %neweltc, in createTileDPLoops()
376 FixedVectorType *V4I8Ty = FixedVectorType::get(B.getInt8Ty(), 4); in createTileDPLoops()
377 FixedVectorType *V4I32Ty = FixedVectorType::get(B.getInt32Ty(), 4); in createTileDPLoops()
378 Value *EltC = B.CreateExtractElement(VecCPhi, IdxC); in createTileDPLoops()
379 Value *EltA = B.CreateExtractElement(VecA, IdxA); in createTileDPLoops()
380 Value *SubVecA = B.CreateBitCast(EltA, V4I8Ty); in createTileDPLoops()
381 Value *EltB = B.CreateExtractElement(VecB, IdxB); in createTileDPLoops()
382 Value *SubVecB = B.CreateBitCast(EltB, V4I8Ty); in createTileDPLoops()
387 SEXTSubVecB = B.CreateSExt(SubVecB, V4I32Ty); in createTileDPLoops()
388 SEXTSubVecA = B.CreateSExt(SubVecA, V4I32Ty); in createTileDPLoops()
391 SEXTSubVecB = B.CreateZExt(SubVecB, V4I32Ty); in createTileDPLoops()
392 SEXTSubVecA = B.CreateSExt(SubVecA, V4I32Ty); in createTileDPLoops()
395 SEXTSubVecB = B.CreateSExt(SubVecB, V4I32Ty); in createTileDPLoops()
396 SEXTSubVecA = B.CreateZExt(SubVecA, V4I32Ty); in createTileDPLoops()
399 SEXTSubVecB = B.CreateZExt(SubVecB, V4I32Ty); in createTileDPLoops()
400 SEXTSubVecA = B.CreateZExt(SubVecA, V4I32Ty); in createTileDPLoops()
405 Value *SubVecR = B.CreateAddReduce(B.CreateMul(SEXTSubVecA, SEXTSubVecB)); in createTileDPLoops()
406 Value *ResElt = B.CreateAdd(EltC, SubVecR); in createTileDPLoops()
407 NewVecC = B.CreateInsertElement(VecCPhi, ResElt, IdxC); in createTileDPLoops()
411 // %eltc = extractelement <256 x i32> %vec.c.inner.phi, i16 %idxc in createTileDPLoops()
413 // %elta = extractelement <256 x i32> %veca, i16 %idxa in createTileDPLoops()
415 // %eltb = extractelement <256 x i32> %vecb, i16 %idxb in createTileDPLoops()
427 // %NewVecC = insertelement <256 x i32> %vec.c.inner.phi, i32 %neweltc, in createTileDPLoops()
429 // %NewVecD = insertelement <256 x i32> %vec.d.inner.phi, i32 %neweltc, in createTileDPLoops()
431 FixedVectorType *V2I16Ty = FixedVectorType::get(B.getInt16Ty(), 2); in createTileDPLoops()
432 FixedVectorType *V2F32Ty = FixedVectorType::get(B.getFloatTy(), 2); in createTileDPLoops()
433 Value *EltC = B.CreateExtractElement(VecCPhi, IdxC); in createTileDPLoops()
434 Value *EltCF32 = B.CreateBitCast(EltC, B.getFloatTy()); in createTileDPLoops()
435 Value *EltA = B.CreateExtractElement(VecA, IdxA); in createTileDPLoops()
436 Value *SubVecA = B.CreateBitCast(EltA, V2I16Ty); in createTileDPLoops()
437 Value *EltB = B.CreateExtractElement(VecB, IdxB); in createTileDPLoops()
438 Value *SubVecB = B.CreateBitCast(EltB, V2I16Ty); in createTileDPLoops()
442 Value *AV2F32 = B.CreateBitCast( in createTileDPLoops()
443 B.CreateShuffleVector(SubVecA, ZeroV2I16, ShuffleArray), V2F32Ty); in createTileDPLoops()
444 Value *BV2F32 = B.CreateBitCast( in createTileDPLoops()
445 B.CreateShuffleVector(SubVecB, ZeroV2I16, ShuffleArray), V2F32Ty); in createTileDPLoops()
446 Value *SubVecR = B.CreateFAddReduce(EltCF32, B.CreateFMul(AV2F32, BV2F32)); in createTileDPLoops()
447 Value *ResElt = B.CreateBitCast(SubVecR, B.getInt32Ty()); in createTileDPLoops()
448 NewVecC = B.CreateInsertElement(VecCPhi, ResElt, IdxC); in createTileDPLoops()
452 // %NewEltC = extractelement <256 x i32> %vec.c.phi.col, i16 %idxc in createTileDPLoops()
453 // %NewVecD = insertelement <256 x i32> %vec.d.phi.col, i32 %NewEltC, in createTileDPLoops()
455 B.SetInsertPoint(ColLoopLatch->getTerminator()); in createTileDPLoops()
456 Value *NewEltC = B.CreateExtractElement(NewVecC, IdxC); in createTileDPLoops()
457 Value *NewVecD = B.CreateInsertElement(VecDPhiColLoop, NewEltC, IdxC); in createTileDPLoops()
476 Value *M, *N, *K, *C, *A, *B; in lowerTileDP() local
478 m_Value(C), m_Value(A), m_Value(B))); in lowerTileDP()
492 KDWord, C, A, B); in lowerTileDP()
559 FixedVectorType *V256I32Ty = FixedVectorType::get(Builder.getInt32Ty(), 256); in lowerTileZero()