1*fe6060f1SDimitry Andric //===- MVELaneInterleaving.cpp - Inverleave for MVE instructions ----------===// 2*fe6060f1SDimitry Andric // 3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*fe6060f1SDimitry Andric // 7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8*fe6060f1SDimitry Andric // 9*fe6060f1SDimitry Andric // This pass interleaves around sext/zext/trunc instructions. MVE does not have 10*fe6060f1SDimitry Andric // a single sext/zext or trunc instruction that takes the bottom half of a 11*fe6060f1SDimitry Andric // vector and extends to a full width, like NEON has with MOVL. Instead it is 12*fe6060f1SDimitry Andric // expected that this happens through top/bottom instructions. So the MVE 13*fe6060f1SDimitry Andric // equivalent VMOVLT/B instructions take either the even or odd elements of the 14*fe6060f1SDimitry Andric // input and extend them to the larger type, producing a vector with half the 15*fe6060f1SDimitry Andric // number of elements each of double the bitwidth. As there is no simple 16*fe6060f1SDimitry Andric // instruction, we often have to turn sext/zext/trunc into a series of lane 17*fe6060f1SDimitry Andric // moves (or stack loads/stores, which we do not do yet). 18*fe6060f1SDimitry Andric // 19*fe6060f1SDimitry Andric // This pass takes vector code that starts at truncs, looks for interconnected 20*fe6060f1SDimitry Andric // blobs of operations that end with sext/zext (or constants/splats) of the 21*fe6060f1SDimitry Andric // form: 22*fe6060f1SDimitry Andric // %sa = sext v8i16 %a to v8i32 23*fe6060f1SDimitry Andric // %sb = sext v8i16 %b to v8i32 24*fe6060f1SDimitry Andric // %add = add v8i32 %sa, %sb 25*fe6060f1SDimitry Andric // %r = trunc %add to v8i16 26*fe6060f1SDimitry Andric // And adds shuffles to allow the use of VMOVL/VMOVN instrctions: 27*fe6060f1SDimitry Andric // %sha = shuffle v8i16 %a, undef, <0, 2, 4, 6, 1, 3, 5, 7> 28*fe6060f1SDimitry Andric // %sa = sext v8i16 %sha to v8i32 29*fe6060f1SDimitry Andric // %shb = shuffle v8i16 %b, undef, <0, 2, 4, 6, 1, 3, 5, 7> 30*fe6060f1SDimitry Andric // %sb = sext v8i16 %shb to v8i32 31*fe6060f1SDimitry Andric // %add = add v8i32 %sa, %sb 32*fe6060f1SDimitry Andric // %r = trunc %add to v8i16 33*fe6060f1SDimitry Andric // %shr = shuffle v8i16 %r, undef, <0, 4, 1, 5, 2, 6, 3, 7> 34*fe6060f1SDimitry Andric // Which can then be split and lowered to MVE instructions efficiently: 35*fe6060f1SDimitry Andric // %sa_b = VMOVLB.s16 %a 36*fe6060f1SDimitry Andric // %sa_t = VMOVLT.s16 %a 37*fe6060f1SDimitry Andric // %sb_b = VMOVLB.s16 %b 38*fe6060f1SDimitry Andric // %sb_t = VMOVLT.s16 %b 39*fe6060f1SDimitry Andric // %add_b = VADD.i32 %sa_b, %sb_b 40*fe6060f1SDimitry Andric // %add_t = VADD.i32 %sa_t, %sb_t 41*fe6060f1SDimitry Andric // %r = VMOVNT.i16 %add_b, %add_t 42*fe6060f1SDimitry Andric // 43*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 44*fe6060f1SDimitry Andric 45*fe6060f1SDimitry Andric #include "ARM.h" 46*fe6060f1SDimitry Andric #include "ARMBaseInstrInfo.h" 47*fe6060f1SDimitry Andric #include "ARMSubtarget.h" 48*fe6060f1SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 49*fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 50*fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 51*fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 52*fe6060f1SDimitry Andric #include "llvm/IR/BasicBlock.h" 53*fe6060f1SDimitry Andric #include "llvm/IR/Constant.h" 54*fe6060f1SDimitry Andric #include "llvm/IR/Constants.h" 55*fe6060f1SDimitry Andric #include "llvm/IR/DerivedTypes.h" 56*fe6060f1SDimitry Andric #include "llvm/IR/Function.h" 57*fe6060f1SDimitry Andric #include "llvm/IR/IRBuilder.h" 58*fe6060f1SDimitry Andric #include "llvm/IR/InstIterator.h" 59*fe6060f1SDimitry Andric #include "llvm/IR/InstrTypes.h" 60*fe6060f1SDimitry Andric #include "llvm/IR/Instruction.h" 61*fe6060f1SDimitry Andric #include "llvm/IR/Instructions.h" 62*fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 63*fe6060f1SDimitry Andric #include "llvm/IR/Intrinsics.h" 64*fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsARM.h" 65*fe6060f1SDimitry Andric #include "llvm/IR/PatternMatch.h" 66*fe6060f1SDimitry Andric #include "llvm/IR/Type.h" 67*fe6060f1SDimitry Andric #include "llvm/IR/Value.h" 68*fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 69*fe6060f1SDimitry Andric #include "llvm/Pass.h" 70*fe6060f1SDimitry Andric #include "llvm/Support/Casting.h" 71*fe6060f1SDimitry Andric #include <algorithm> 72*fe6060f1SDimitry Andric #include <cassert> 73*fe6060f1SDimitry Andric 74*fe6060f1SDimitry Andric using namespace llvm; 75*fe6060f1SDimitry Andric 76*fe6060f1SDimitry Andric #define DEBUG_TYPE "mve-laneinterleave" 77*fe6060f1SDimitry Andric 78*fe6060f1SDimitry Andric cl::opt<bool> EnableInterleave( 79*fe6060f1SDimitry Andric "enable-mve-interleave", cl::Hidden, cl::init(true), 80*fe6060f1SDimitry Andric cl::desc("Enable interleave MVE vector operation lowering")); 81*fe6060f1SDimitry Andric 82*fe6060f1SDimitry Andric namespace { 83*fe6060f1SDimitry Andric 84*fe6060f1SDimitry Andric class MVELaneInterleaving : public FunctionPass { 85*fe6060f1SDimitry Andric public: 86*fe6060f1SDimitry Andric static char ID; // Pass identification, replacement for typeid 87*fe6060f1SDimitry Andric 88*fe6060f1SDimitry Andric explicit MVELaneInterleaving() : FunctionPass(ID) { 89*fe6060f1SDimitry Andric initializeMVELaneInterleavingPass(*PassRegistry::getPassRegistry()); 90*fe6060f1SDimitry Andric } 91*fe6060f1SDimitry Andric 92*fe6060f1SDimitry Andric bool runOnFunction(Function &F) override; 93*fe6060f1SDimitry Andric 94*fe6060f1SDimitry Andric StringRef getPassName() const override { return "MVE lane interleaving"; } 95*fe6060f1SDimitry Andric 96*fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 97*fe6060f1SDimitry Andric AU.setPreservesCFG(); 98*fe6060f1SDimitry Andric AU.addRequired<TargetPassConfig>(); 99*fe6060f1SDimitry Andric FunctionPass::getAnalysisUsage(AU); 100*fe6060f1SDimitry Andric } 101*fe6060f1SDimitry Andric }; 102*fe6060f1SDimitry Andric 103*fe6060f1SDimitry Andric } // end anonymous namespace 104*fe6060f1SDimitry Andric 105*fe6060f1SDimitry Andric char MVELaneInterleaving::ID = 0; 106*fe6060f1SDimitry Andric 107*fe6060f1SDimitry Andric INITIALIZE_PASS(MVELaneInterleaving, DEBUG_TYPE, "MVE lane interleaving", false, 108*fe6060f1SDimitry Andric false) 109*fe6060f1SDimitry Andric 110*fe6060f1SDimitry Andric Pass *llvm::createMVELaneInterleavingPass() { 111*fe6060f1SDimitry Andric return new MVELaneInterleaving(); 112*fe6060f1SDimitry Andric } 113*fe6060f1SDimitry Andric 114*fe6060f1SDimitry Andric static bool isProfitableToInterleave(SmallSetVector<Instruction *, 4> &Exts, 115*fe6060f1SDimitry Andric SmallSetVector<Instruction *, 4> &Truncs) { 116*fe6060f1SDimitry Andric // This is not always beneficial to transform. Exts can be incorporated into 117*fe6060f1SDimitry Andric // loads, Truncs can be folded into stores. 118*fe6060f1SDimitry Andric // Truncs are usually the same number of instructions, 119*fe6060f1SDimitry Andric // VSTRH.32(A);VSTRH.32(B) vs VSTRH.16(VMOVNT A, B) with interleaving 120*fe6060f1SDimitry Andric // Exts are unfortunately more instructions in the general case: 121*fe6060f1SDimitry Andric // A=VLDRH.32; B=VLDRH.32; 122*fe6060f1SDimitry Andric // vs with interleaving: 123*fe6060f1SDimitry Andric // T=VLDRH.16; A=VMOVNB T; B=VMOVNT T 124*fe6060f1SDimitry Andric // But those VMOVL may be folded into a VMULL. 125*fe6060f1SDimitry Andric 126*fe6060f1SDimitry Andric // But expensive extends/truncs are always good to remove. FPExts always 127*fe6060f1SDimitry Andric // involve extra VCVT's so are always considered to be beneficial to convert. 128*fe6060f1SDimitry Andric for (auto *E : Exts) { 129*fe6060f1SDimitry Andric if (isa<FPExtInst>(E) || !isa<LoadInst>(E->getOperand(0))) { 130*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Beneficial due to " << *E << "\n"); 131*fe6060f1SDimitry Andric return true; 132*fe6060f1SDimitry Andric } 133*fe6060f1SDimitry Andric } 134*fe6060f1SDimitry Andric for (auto *T : Truncs) { 135*fe6060f1SDimitry Andric if (T->hasOneUse() && !isa<StoreInst>(*T->user_begin())) { 136*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Beneficial due to " << *T << "\n"); 137*fe6060f1SDimitry Andric return true; 138*fe6060f1SDimitry Andric } 139*fe6060f1SDimitry Andric } 140*fe6060f1SDimitry Andric 141*fe6060f1SDimitry Andric // Otherwise, we know we have a load(ext), see if any of the Extends are a 142*fe6060f1SDimitry Andric // vmull. This is a simple heuristic and certainly not perfect. 143*fe6060f1SDimitry Andric for (auto *E : Exts) { 144*fe6060f1SDimitry Andric if (!E->hasOneUse() || 145*fe6060f1SDimitry Andric cast<Instruction>(*E->user_begin())->getOpcode() != Instruction::Mul) { 146*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Not beneficial due to " << *E << "\n"); 147*fe6060f1SDimitry Andric return false; 148*fe6060f1SDimitry Andric } 149*fe6060f1SDimitry Andric } 150*fe6060f1SDimitry Andric return true; 151*fe6060f1SDimitry Andric } 152*fe6060f1SDimitry Andric 153*fe6060f1SDimitry Andric static bool tryInterleave(Instruction *Start, 154*fe6060f1SDimitry Andric SmallPtrSetImpl<Instruction *> &Visited) { 155*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "tryInterleave from " << *Start << "\n"); 156*fe6060f1SDimitry Andric auto *VT = cast<FixedVectorType>(Start->getType()); 157*fe6060f1SDimitry Andric 158*fe6060f1SDimitry Andric if (!isa<Instruction>(Start->getOperand(0))) 159*fe6060f1SDimitry Andric return false; 160*fe6060f1SDimitry Andric 161*fe6060f1SDimitry Andric // Look for connected operations starting from Ext's, terminating at Truncs. 162*fe6060f1SDimitry Andric std::vector<Instruction *> Worklist; 163*fe6060f1SDimitry Andric Worklist.push_back(Start); 164*fe6060f1SDimitry Andric Worklist.push_back(cast<Instruction>(Start->getOperand(0))); 165*fe6060f1SDimitry Andric 166*fe6060f1SDimitry Andric SmallSetVector<Instruction *, 4> Truncs; 167*fe6060f1SDimitry Andric SmallSetVector<Instruction *, 4> Exts; 168*fe6060f1SDimitry Andric SmallSetVector<Use *, 4> OtherLeafs; 169*fe6060f1SDimitry Andric SmallSetVector<Instruction *, 4> Ops; 170*fe6060f1SDimitry Andric 171*fe6060f1SDimitry Andric while (!Worklist.empty()) { 172*fe6060f1SDimitry Andric Instruction *I = Worklist.back(); 173*fe6060f1SDimitry Andric Worklist.pop_back(); 174*fe6060f1SDimitry Andric 175*fe6060f1SDimitry Andric switch (I->getOpcode()) { 176*fe6060f1SDimitry Andric // Truncs 177*fe6060f1SDimitry Andric case Instruction::Trunc: 178*fe6060f1SDimitry Andric case Instruction::FPTrunc: 179*fe6060f1SDimitry Andric if (Truncs.count(I)) 180*fe6060f1SDimitry Andric continue; 181*fe6060f1SDimitry Andric Truncs.insert(I); 182*fe6060f1SDimitry Andric Visited.insert(I); 183*fe6060f1SDimitry Andric break; 184*fe6060f1SDimitry Andric 185*fe6060f1SDimitry Andric // Extend leafs 186*fe6060f1SDimitry Andric case Instruction::SExt: 187*fe6060f1SDimitry Andric case Instruction::ZExt: 188*fe6060f1SDimitry Andric case Instruction::FPExt: 189*fe6060f1SDimitry Andric if (Exts.count(I)) 190*fe6060f1SDimitry Andric continue; 191*fe6060f1SDimitry Andric for (auto *Use : I->users()) 192*fe6060f1SDimitry Andric Worklist.push_back(cast<Instruction>(Use)); 193*fe6060f1SDimitry Andric Exts.insert(I); 194*fe6060f1SDimitry Andric break; 195*fe6060f1SDimitry Andric 196*fe6060f1SDimitry Andric case Instruction::Call: { 197*fe6060f1SDimitry Andric IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 198*fe6060f1SDimitry Andric if (!II) 199*fe6060f1SDimitry Andric return false; 200*fe6060f1SDimitry Andric 201*fe6060f1SDimitry Andric switch (II->getIntrinsicID()) { 202*fe6060f1SDimitry Andric case Intrinsic::abs: 203*fe6060f1SDimitry Andric case Intrinsic::smin: 204*fe6060f1SDimitry Andric case Intrinsic::smax: 205*fe6060f1SDimitry Andric case Intrinsic::umin: 206*fe6060f1SDimitry Andric case Intrinsic::umax: 207*fe6060f1SDimitry Andric case Intrinsic::sadd_sat: 208*fe6060f1SDimitry Andric case Intrinsic::ssub_sat: 209*fe6060f1SDimitry Andric case Intrinsic::uadd_sat: 210*fe6060f1SDimitry Andric case Intrinsic::usub_sat: 211*fe6060f1SDimitry Andric case Intrinsic::minnum: 212*fe6060f1SDimitry Andric case Intrinsic::maxnum: 213*fe6060f1SDimitry Andric case Intrinsic::fabs: 214*fe6060f1SDimitry Andric case Intrinsic::fma: 215*fe6060f1SDimitry Andric case Intrinsic::ceil: 216*fe6060f1SDimitry Andric case Intrinsic::floor: 217*fe6060f1SDimitry Andric case Intrinsic::rint: 218*fe6060f1SDimitry Andric case Intrinsic::round: 219*fe6060f1SDimitry Andric case Intrinsic::trunc: 220*fe6060f1SDimitry Andric break; 221*fe6060f1SDimitry Andric default: 222*fe6060f1SDimitry Andric return false; 223*fe6060f1SDimitry Andric } 224*fe6060f1SDimitry Andric LLVM_FALLTHROUGH; // Fall through to treating these like an operator below. 225*fe6060f1SDimitry Andric } 226*fe6060f1SDimitry Andric // Binary/tertiary ops 227*fe6060f1SDimitry Andric case Instruction::Add: 228*fe6060f1SDimitry Andric case Instruction::Sub: 229*fe6060f1SDimitry Andric case Instruction::Mul: 230*fe6060f1SDimitry Andric case Instruction::AShr: 231*fe6060f1SDimitry Andric case Instruction::LShr: 232*fe6060f1SDimitry Andric case Instruction::Shl: 233*fe6060f1SDimitry Andric case Instruction::ICmp: 234*fe6060f1SDimitry Andric case Instruction::FCmp: 235*fe6060f1SDimitry Andric case Instruction::FAdd: 236*fe6060f1SDimitry Andric case Instruction::FMul: 237*fe6060f1SDimitry Andric case Instruction::Select: 238*fe6060f1SDimitry Andric if (Ops.count(I)) 239*fe6060f1SDimitry Andric continue; 240*fe6060f1SDimitry Andric Ops.insert(I); 241*fe6060f1SDimitry Andric 242*fe6060f1SDimitry Andric for (Use &Op : I->operands()) { 243*fe6060f1SDimitry Andric if (!isa<FixedVectorType>(Op->getType())) 244*fe6060f1SDimitry Andric continue; 245*fe6060f1SDimitry Andric if (isa<Instruction>(Op)) 246*fe6060f1SDimitry Andric Worklist.push_back(cast<Instruction>(&Op)); 247*fe6060f1SDimitry Andric else 248*fe6060f1SDimitry Andric OtherLeafs.insert(&Op); 249*fe6060f1SDimitry Andric } 250*fe6060f1SDimitry Andric 251*fe6060f1SDimitry Andric for (auto *Use : I->users()) 252*fe6060f1SDimitry Andric Worklist.push_back(cast<Instruction>(Use)); 253*fe6060f1SDimitry Andric break; 254*fe6060f1SDimitry Andric 255*fe6060f1SDimitry Andric case Instruction::ShuffleVector: 256*fe6060f1SDimitry Andric // A shuffle of a splat is a splat. 257*fe6060f1SDimitry Andric if (cast<ShuffleVectorInst>(I)->isZeroEltSplat()) 258*fe6060f1SDimitry Andric continue; 259*fe6060f1SDimitry Andric LLVM_FALLTHROUGH; 260*fe6060f1SDimitry Andric 261*fe6060f1SDimitry Andric default: 262*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Unhandled instruction: " << *I << "\n"); 263*fe6060f1SDimitry Andric return false; 264*fe6060f1SDimitry Andric } 265*fe6060f1SDimitry Andric } 266*fe6060f1SDimitry Andric 267*fe6060f1SDimitry Andric if (Exts.empty() && OtherLeafs.empty()) 268*fe6060f1SDimitry Andric return false; 269*fe6060f1SDimitry Andric 270*fe6060f1SDimitry Andric LLVM_DEBUG({ 271*fe6060f1SDimitry Andric dbgs() << "Found group:\n Exts:"; 272*fe6060f1SDimitry Andric for (auto *I : Exts) 273*fe6060f1SDimitry Andric dbgs() << " " << *I << "\n"; 274*fe6060f1SDimitry Andric dbgs() << " Ops:"; 275*fe6060f1SDimitry Andric for (auto *I : Ops) 276*fe6060f1SDimitry Andric dbgs() << " " << *I << "\n"; 277*fe6060f1SDimitry Andric dbgs() << " OtherLeafs:"; 278*fe6060f1SDimitry Andric for (auto *I : OtherLeafs) 279*fe6060f1SDimitry Andric dbgs() << " " << *I->get() << " of " << *I->getUser() << "\n"; 280*fe6060f1SDimitry Andric dbgs() << "Truncs:"; 281*fe6060f1SDimitry Andric for (auto *I : Truncs) 282*fe6060f1SDimitry Andric dbgs() << " " << *I << "\n"; 283*fe6060f1SDimitry Andric }); 284*fe6060f1SDimitry Andric 285*fe6060f1SDimitry Andric assert(!Truncs.empty() && "Expected some truncs"); 286*fe6060f1SDimitry Andric 287*fe6060f1SDimitry Andric // Check types 288*fe6060f1SDimitry Andric unsigned NumElts = VT->getNumElements(); 289*fe6060f1SDimitry Andric unsigned BaseElts = VT->getScalarSizeInBits() == 16 290*fe6060f1SDimitry Andric ? 8 291*fe6060f1SDimitry Andric : (VT->getScalarSizeInBits() == 8 ? 16 : 0); 292*fe6060f1SDimitry Andric if (BaseElts == 0 || NumElts % BaseElts != 0) { 293*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Type is unsupported\n"); 294*fe6060f1SDimitry Andric return false; 295*fe6060f1SDimitry Andric } 296*fe6060f1SDimitry Andric if (Start->getOperand(0)->getType()->getScalarSizeInBits() != 297*fe6060f1SDimitry Andric VT->getScalarSizeInBits() * 2) { 298*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Type not double sized\n"); 299*fe6060f1SDimitry Andric return false; 300*fe6060f1SDimitry Andric } 301*fe6060f1SDimitry Andric for (Instruction *I : Exts) 302*fe6060f1SDimitry Andric if (I->getOperand(0)->getType() != VT) { 303*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Wrong type on " << *I << "\n"); 304*fe6060f1SDimitry Andric return false; 305*fe6060f1SDimitry Andric } 306*fe6060f1SDimitry Andric for (Instruction *I : Truncs) 307*fe6060f1SDimitry Andric if (I->getType() != VT) { 308*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Wrong type on " << *I << "\n"); 309*fe6060f1SDimitry Andric return false; 310*fe6060f1SDimitry Andric } 311*fe6060f1SDimitry Andric 312*fe6060f1SDimitry Andric // Check that it looks beneficial 313*fe6060f1SDimitry Andric if (!isProfitableToInterleave(Exts, Truncs)) 314*fe6060f1SDimitry Andric return false; 315*fe6060f1SDimitry Andric 316*fe6060f1SDimitry Andric // Create new shuffles around the extends / truncs / other leaves. 317*fe6060f1SDimitry Andric IRBuilder<> Builder(Start); 318*fe6060f1SDimitry Andric 319*fe6060f1SDimitry Andric SmallVector<int, 16> LeafMask; 320*fe6060f1SDimitry Andric SmallVector<int, 16> TruncMask; 321*fe6060f1SDimitry Andric // LeafMask : 0, 2, 4, 6, 1, 3, 5, 7 8, 10, 12, 14, 9, 11, 13, 15 322*fe6060f1SDimitry Andric // TruncMask: 0, 4, 1, 5, 2, 6, 3, 7 8, 12, 9, 13, 10, 14, 11, 15 323*fe6060f1SDimitry Andric for (unsigned Base = 0; Base < NumElts; Base += BaseElts) { 324*fe6060f1SDimitry Andric for (unsigned i = 0; i < BaseElts / 2; i++) 325*fe6060f1SDimitry Andric LeafMask.push_back(Base + i * 2); 326*fe6060f1SDimitry Andric for (unsigned i = 0; i < BaseElts / 2; i++) 327*fe6060f1SDimitry Andric LeafMask.push_back(Base + i * 2 + 1); 328*fe6060f1SDimitry Andric } 329*fe6060f1SDimitry Andric for (unsigned Base = 0; Base < NumElts; Base += BaseElts) { 330*fe6060f1SDimitry Andric for (unsigned i = 0; i < BaseElts / 2; i++) { 331*fe6060f1SDimitry Andric TruncMask.push_back(Base + i); 332*fe6060f1SDimitry Andric TruncMask.push_back(Base + i + BaseElts / 2); 333*fe6060f1SDimitry Andric } 334*fe6060f1SDimitry Andric } 335*fe6060f1SDimitry Andric 336*fe6060f1SDimitry Andric for (Instruction *I : Exts) { 337*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing ext " << *I << "\n"); 338*fe6060f1SDimitry Andric Builder.SetInsertPoint(I); 339*fe6060f1SDimitry Andric Value *Shuffle = Builder.CreateShuffleVector(I->getOperand(0), LeafMask); 340*fe6060f1SDimitry Andric bool FPext = isa<FPExtInst>(I); 341*fe6060f1SDimitry Andric bool Sext = isa<SExtInst>(I); 342*fe6060f1SDimitry Andric Value *Ext = FPext ? Builder.CreateFPExt(Shuffle, I->getType()) 343*fe6060f1SDimitry Andric : Sext ? Builder.CreateSExt(Shuffle, I->getType()) 344*fe6060f1SDimitry Andric : Builder.CreateZExt(Shuffle, I->getType()); 345*fe6060f1SDimitry Andric I->replaceAllUsesWith(Ext); 346*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " with " << *Shuffle << "\n"); 347*fe6060f1SDimitry Andric } 348*fe6060f1SDimitry Andric 349*fe6060f1SDimitry Andric for (Use *I : OtherLeafs) { 350*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing leaf " << *I << "\n"); 351*fe6060f1SDimitry Andric Builder.SetInsertPoint(cast<Instruction>(I->getUser())); 352*fe6060f1SDimitry Andric Value *Shuffle = Builder.CreateShuffleVector(I->get(), LeafMask); 353*fe6060f1SDimitry Andric I->getUser()->setOperand(I->getOperandNo(), Shuffle); 354*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " with " << *Shuffle << "\n"); 355*fe6060f1SDimitry Andric } 356*fe6060f1SDimitry Andric 357*fe6060f1SDimitry Andric for (Instruction *I : Truncs) { 358*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing trunc " << *I << "\n"); 359*fe6060f1SDimitry Andric 360*fe6060f1SDimitry Andric Builder.SetInsertPoint(I->getParent(), ++I->getIterator()); 361*fe6060f1SDimitry Andric Value *Shuf = Builder.CreateShuffleVector(I, TruncMask); 362*fe6060f1SDimitry Andric I->replaceAllUsesWith(Shuf); 363*fe6060f1SDimitry Andric cast<Instruction>(Shuf)->setOperand(0, I); 364*fe6060f1SDimitry Andric 365*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " with " << *Shuf << "\n"); 366*fe6060f1SDimitry Andric } 367*fe6060f1SDimitry Andric 368*fe6060f1SDimitry Andric return true; 369*fe6060f1SDimitry Andric } 370*fe6060f1SDimitry Andric 371*fe6060f1SDimitry Andric bool MVELaneInterleaving::runOnFunction(Function &F) { 372*fe6060f1SDimitry Andric if (!EnableInterleave) 373*fe6060f1SDimitry Andric return false; 374*fe6060f1SDimitry Andric auto &TPC = getAnalysis<TargetPassConfig>(); 375*fe6060f1SDimitry Andric auto &TM = TPC.getTM<TargetMachine>(); 376*fe6060f1SDimitry Andric auto *ST = &TM.getSubtarget<ARMSubtarget>(F); 377*fe6060f1SDimitry Andric if (!ST->hasMVEIntegerOps()) 378*fe6060f1SDimitry Andric return false; 379*fe6060f1SDimitry Andric 380*fe6060f1SDimitry Andric bool Changed = false; 381*fe6060f1SDimitry Andric 382*fe6060f1SDimitry Andric SmallPtrSet<Instruction *, 16> Visited; 383*fe6060f1SDimitry Andric for (Instruction &I : reverse(instructions(F))) { 384*fe6060f1SDimitry Andric if (I.getType()->isVectorTy() && 385*fe6060f1SDimitry Andric (isa<TruncInst>(I) || isa<FPTruncInst>(I)) && !Visited.count(&I)) 386*fe6060f1SDimitry Andric Changed |= tryInterleave(&I, Visited); 387*fe6060f1SDimitry Andric } 388*fe6060f1SDimitry Andric 389*fe6060f1SDimitry Andric return Changed; 390*fe6060f1SDimitry Andric } 391