10b57cec5SDimitry Andric //===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric /// This file implements SLP analysis based on VPlan. The analysis is based on
90b57cec5SDimitry Andric /// the ideas described in
100b57cec5SDimitry Andric ///
110b57cec5SDimitry Andric /// Look-ahead SLP: auto-vectorization in the presence of commutative
120b57cec5SDimitry Andric /// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
130b57cec5SDimitry Andric /// Luís F. W. Góes
140b57cec5SDimitry Andric ///
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric
170b57cec5SDimitry Andric #include "VPlan.h"
1881ad6265SDimitry Andric #include "VPlanValue.h"
1981ad6265SDimitry Andric #include "llvm/ADT/DenseMap.h"
200b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
220b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
230b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
240b57cec5SDimitry Andric #include "llvm/IR/Type.h"
250b57cec5SDimitry Andric #include "llvm/IR/Value.h"
260b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
270b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
280b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
290b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
30fe6060f1SDimitry Andric #include <algorithm>
310b57cec5SDimitry Andric #include <cassert>
32*bdd1243dSDimitry Andric #include <optional>
33fe6060f1SDimitry Andric #include <utility>
340b57cec5SDimitry Andric
350b57cec5SDimitry Andric using namespace llvm;
360b57cec5SDimitry Andric
370b57cec5SDimitry Andric #define DEBUG_TYPE "vplan-slp"
380b57cec5SDimitry Andric
390b57cec5SDimitry Andric // Number of levels to look ahead when re-ordering multi node operands.
400b57cec5SDimitry Andric static unsigned LookaheadMaxDepth = 5;
410b57cec5SDimitry Andric
markFailed()420b57cec5SDimitry Andric VPInstruction *VPlanSlp::markFailed() {
430b57cec5SDimitry Andric // FIXME: Currently this is used to signal we hit instructions we cannot
440b57cec5SDimitry Andric // trivially SLP'ize.
450b57cec5SDimitry Andric CompletelySLP = false;
460b57cec5SDimitry Andric return nullptr;
470b57cec5SDimitry Andric }
480b57cec5SDimitry Andric
addCombined(ArrayRef<VPValue * > Operands,VPInstruction * New)490b57cec5SDimitry Andric void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {
500b57cec5SDimitry Andric if (all_of(Operands, [](VPValue *V) {
510b57cec5SDimitry Andric return cast<VPInstruction>(V)->getUnderlyingInstr();
520b57cec5SDimitry Andric })) {
530b57cec5SDimitry Andric unsigned BundleSize = 0;
540b57cec5SDimitry Andric for (VPValue *V : Operands) {
550b57cec5SDimitry Andric Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
560b57cec5SDimitry Andric assert(!T->isVectorTy() && "Only scalar types supported for now");
570b57cec5SDimitry Andric BundleSize += T->getScalarSizeInBits();
580b57cec5SDimitry Andric }
590b57cec5SDimitry Andric WidestBundleBits = std::max(WidestBundleBits, BundleSize);
600b57cec5SDimitry Andric }
610b57cec5SDimitry Andric
620b57cec5SDimitry Andric auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
630b57cec5SDimitry Andric assert(Res.second &&
640b57cec5SDimitry Andric "Already created a combined instruction for the operand bundle");
650b57cec5SDimitry Andric (void)Res;
660b57cec5SDimitry Andric }
670b57cec5SDimitry Andric
areVectorizable(ArrayRef<VPValue * > Operands) const680b57cec5SDimitry Andric bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
690b57cec5SDimitry Andric // Currently we only support VPInstructions.
700b57cec5SDimitry Andric if (!all_of(Operands, [](VPValue *Op) {
710b57cec5SDimitry Andric return Op && isa<VPInstruction>(Op) &&
720b57cec5SDimitry Andric cast<VPInstruction>(Op)->getUnderlyingInstr();
730b57cec5SDimitry Andric })) {
740b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
750b57cec5SDimitry Andric return false;
760b57cec5SDimitry Andric }
770b57cec5SDimitry Andric
780b57cec5SDimitry Andric // Check if opcodes and type width agree for all instructions in the bundle.
790b57cec5SDimitry Andric // FIXME: Differing widths/opcodes can be handled by inserting additional
800b57cec5SDimitry Andric // instructions.
810b57cec5SDimitry Andric // FIXME: Deal with non-primitive types.
820b57cec5SDimitry Andric const Instruction *OriginalInstr =
830b57cec5SDimitry Andric cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
840b57cec5SDimitry Andric unsigned Opcode = OriginalInstr->getOpcode();
850b57cec5SDimitry Andric unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
860b57cec5SDimitry Andric if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
870b57cec5SDimitry Andric const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();
880b57cec5SDimitry Andric return I->getOpcode() == Opcode &&
890b57cec5SDimitry Andric I->getType()->getPrimitiveSizeInBits() == Width;
900b57cec5SDimitry Andric })) {
910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
920b57cec5SDimitry Andric return false;
930b57cec5SDimitry Andric }
940b57cec5SDimitry Andric
950b57cec5SDimitry Andric // For now, all operands must be defined in the same BB.
960b57cec5SDimitry Andric if (any_of(Operands, [this](VPValue *Op) {
970b57cec5SDimitry Andric return cast<VPInstruction>(Op)->getParent() != &this->BB;
980b57cec5SDimitry Andric })) {
990b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
1000b57cec5SDimitry Andric return false;
1010b57cec5SDimitry Andric }
1020b57cec5SDimitry Andric
1030b57cec5SDimitry Andric if (any_of(Operands,
1040b57cec5SDimitry Andric [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
1050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
1060b57cec5SDimitry Andric return false;
1070b57cec5SDimitry Andric }
1080b57cec5SDimitry Andric
1090b57cec5SDimitry Andric // For loads, check that there are no instructions writing to memory in
1100b57cec5SDimitry Andric // between them.
1110b57cec5SDimitry Andric // TODO: we only have to forbid instructions writing to memory that could
1120b57cec5SDimitry Andric // interfere with any of the loads in the bundle
1130b57cec5SDimitry Andric if (Opcode == Instruction::Load) {
1140b57cec5SDimitry Andric unsigned LoadsSeen = 0;
1150b57cec5SDimitry Andric VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
1160b57cec5SDimitry Andric for (auto &I : *Parent) {
117fe6060f1SDimitry Andric auto *VPI = dyn_cast<VPInstruction>(&I);
118fe6060f1SDimitry Andric if (!VPI)
119fe6060f1SDimitry Andric break;
1200b57cec5SDimitry Andric if (VPI->getOpcode() == Instruction::Load &&
121e8d8bef9SDimitry Andric llvm::is_contained(Operands, VPI))
1220b57cec5SDimitry Andric LoadsSeen++;
1230b57cec5SDimitry Andric
1240b57cec5SDimitry Andric if (LoadsSeen == Operands.size())
1250b57cec5SDimitry Andric break;
1260b57cec5SDimitry Andric if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
1270b57cec5SDimitry Andric LLVM_DEBUG(
1280b57cec5SDimitry Andric dbgs() << "VPSLP: instruction modifying memory between loads\n");
1290b57cec5SDimitry Andric return false;
1300b57cec5SDimitry Andric }
1310b57cec5SDimitry Andric }
1320b57cec5SDimitry Andric
1330b57cec5SDimitry Andric if (!all_of(Operands, [](VPValue *Op) {
1340b57cec5SDimitry Andric return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
1350b57cec5SDimitry Andric ->isSimple();
1360b57cec5SDimitry Andric })) {
1370b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
1380b57cec5SDimitry Andric return false;
1390b57cec5SDimitry Andric }
1400b57cec5SDimitry Andric }
1410b57cec5SDimitry Andric
1420b57cec5SDimitry Andric if (Opcode == Instruction::Store)
1430b57cec5SDimitry Andric if (!all_of(Operands, [](VPValue *Op) {
1440b57cec5SDimitry Andric return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
1450b57cec5SDimitry Andric ->isSimple();
1460b57cec5SDimitry Andric })) {
1470b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
1480b57cec5SDimitry Andric return false;
1490b57cec5SDimitry Andric }
1500b57cec5SDimitry Andric
1510b57cec5SDimitry Andric return true;
1520b57cec5SDimitry Andric }
1530b57cec5SDimitry Andric
getOperands(ArrayRef<VPValue * > Values,unsigned OperandIndex)1540b57cec5SDimitry Andric static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,
1550b57cec5SDimitry Andric unsigned OperandIndex) {
1560b57cec5SDimitry Andric SmallVector<VPValue *, 4> Operands;
1570b57cec5SDimitry Andric for (VPValue *V : Values) {
158e8d8bef9SDimitry Andric // Currently we only support VPInstructions.
159e8d8bef9SDimitry Andric auto *U = cast<VPInstruction>(V);
1600b57cec5SDimitry Andric Operands.push_back(U->getOperand(OperandIndex));
1610b57cec5SDimitry Andric }
1620b57cec5SDimitry Andric return Operands;
1630b57cec5SDimitry Andric }
1640b57cec5SDimitry Andric
areCommutative(ArrayRef<VPValue * > Values)1650b57cec5SDimitry Andric static bool areCommutative(ArrayRef<VPValue *> Values) {
1660b57cec5SDimitry Andric return Instruction::isCommutative(
1670b57cec5SDimitry Andric cast<VPInstruction>(Values[0])->getOpcode());
1680b57cec5SDimitry Andric }
1690b57cec5SDimitry Andric
1700b57cec5SDimitry Andric static SmallVector<SmallVector<VPValue *, 4>, 4>
getOperands(ArrayRef<VPValue * > Values)1710b57cec5SDimitry Andric getOperands(ArrayRef<VPValue *> Values) {
1720b57cec5SDimitry Andric SmallVector<SmallVector<VPValue *, 4>, 4> Result;
1730b57cec5SDimitry Andric auto *VPI = cast<VPInstruction>(Values[0]);
1740b57cec5SDimitry Andric
1750b57cec5SDimitry Andric switch (VPI->getOpcode()) {
1760b57cec5SDimitry Andric case Instruction::Load:
1770b57cec5SDimitry Andric llvm_unreachable("Loads terminate a tree, no need to get operands");
1780b57cec5SDimitry Andric case Instruction::Store:
1790b57cec5SDimitry Andric Result.push_back(getOperands(Values, 0));
1800b57cec5SDimitry Andric break;
1810b57cec5SDimitry Andric default:
1820b57cec5SDimitry Andric for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
1830b57cec5SDimitry Andric Result.push_back(getOperands(Values, I));
1840b57cec5SDimitry Andric break;
1850b57cec5SDimitry Andric }
1860b57cec5SDimitry Andric
1870b57cec5SDimitry Andric return Result;
1880b57cec5SDimitry Andric }
1890b57cec5SDimitry Andric
1900b57cec5SDimitry Andric /// Returns the opcode of Values or ~0 if they do not all agree.
getOpcode(ArrayRef<VPValue * > Values)191*bdd1243dSDimitry Andric static std::optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) {
1920b57cec5SDimitry Andric unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
1930b57cec5SDimitry Andric if (any_of(Values, [Opcode](VPValue *V) {
1940b57cec5SDimitry Andric return cast<VPInstruction>(V)->getOpcode() != Opcode;
1950b57cec5SDimitry Andric }))
196*bdd1243dSDimitry Andric return std::nullopt;
1970b57cec5SDimitry Andric return {Opcode};
1980b57cec5SDimitry Andric }
1990b57cec5SDimitry Andric
2000b57cec5SDimitry Andric /// Returns true if A and B access sequential memory if they are loads or
2010b57cec5SDimitry Andric /// stores or if they have identical opcodes otherwise.
areConsecutiveOrMatch(VPInstruction * A,VPInstruction * B,VPInterleavedAccessInfo & IAI)2020b57cec5SDimitry Andric static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,
2030b57cec5SDimitry Andric VPInterleavedAccessInfo &IAI) {
2040b57cec5SDimitry Andric if (A->getOpcode() != B->getOpcode())
2050b57cec5SDimitry Andric return false;
2060b57cec5SDimitry Andric
2070b57cec5SDimitry Andric if (A->getOpcode() != Instruction::Load &&
2080b57cec5SDimitry Andric A->getOpcode() != Instruction::Store)
2090b57cec5SDimitry Andric return true;
2100b57cec5SDimitry Andric auto *GA = IAI.getInterleaveGroup(A);
2110b57cec5SDimitry Andric auto *GB = IAI.getInterleaveGroup(B);
2120b57cec5SDimitry Andric
2130b57cec5SDimitry Andric return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
2140b57cec5SDimitry Andric }
2150b57cec5SDimitry Andric
2160b57cec5SDimitry Andric /// Implements getLAScore from Listing 7 in the paper.
2170b57cec5SDimitry Andric /// Traverses and compares operands of V1 and V2 to MaxLevel.
getLAScore(VPValue * V1,VPValue * V2,unsigned MaxLevel,VPInterleavedAccessInfo & IAI)2180b57cec5SDimitry Andric static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
2190b57cec5SDimitry Andric VPInterleavedAccessInfo &IAI) {
220e8d8bef9SDimitry Andric auto *I1 = dyn_cast<VPInstruction>(V1);
221e8d8bef9SDimitry Andric auto *I2 = dyn_cast<VPInstruction>(V2);
222e8d8bef9SDimitry Andric // Currently we only support VPInstructions.
223e8d8bef9SDimitry Andric if (!I1 || !I2)
2240b57cec5SDimitry Andric return 0;
2250b57cec5SDimitry Andric
2260b57cec5SDimitry Andric if (MaxLevel == 0)
227e8d8bef9SDimitry Andric return (unsigned)areConsecutiveOrMatch(I1, I2, IAI);
2280b57cec5SDimitry Andric
2290b57cec5SDimitry Andric unsigned Score = 0;
230e8d8bef9SDimitry Andric for (unsigned I = 0, EV1 = I1->getNumOperands(); I < EV1; ++I)
231e8d8bef9SDimitry Andric for (unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J)
232e8d8bef9SDimitry Andric Score +=
233e8d8bef9SDimitry Andric getLAScore(I1->getOperand(I), I2->getOperand(J), MaxLevel - 1, IAI);
2340b57cec5SDimitry Andric return Score;
2350b57cec5SDimitry Andric }
2360b57cec5SDimitry Andric
2370b57cec5SDimitry Andric std::pair<VPlanSlp::OpMode, VPValue *>
getBest(OpMode Mode,VPValue * Last,SmallPtrSetImpl<VPValue * > & Candidates,VPInterleavedAccessInfo & IAI)2380b57cec5SDimitry Andric VPlanSlp::getBest(OpMode Mode, VPValue *Last,
2390b57cec5SDimitry Andric SmallPtrSetImpl<VPValue *> &Candidates,
2400b57cec5SDimitry Andric VPInterleavedAccessInfo &IAI) {
2410b57cec5SDimitry Andric assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
2420b57cec5SDimitry Andric "Currently we only handle load and commutative opcodes");
2430b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " getBest\n");
2440b57cec5SDimitry Andric
2450b57cec5SDimitry Andric SmallVector<VPValue *, 4> BestCandidates;
2460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Candidates for "
2470b57cec5SDimitry Andric << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");
2480b57cec5SDimitry Andric for (auto *Candidate : Candidates) {
2490b57cec5SDimitry Andric auto *LastI = cast<VPInstruction>(Last);
2500b57cec5SDimitry Andric auto *CandidateI = cast<VPInstruction>(Candidate);
2510b57cec5SDimitry Andric if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
2520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
2530b57cec5SDimitry Andric << " ");
2540b57cec5SDimitry Andric BestCandidates.push_back(Candidate);
2550b57cec5SDimitry Andric }
2560b57cec5SDimitry Andric }
2570b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
2580b57cec5SDimitry Andric
2590b57cec5SDimitry Andric if (BestCandidates.empty())
2600b57cec5SDimitry Andric return {OpMode::Failed, nullptr};
2610b57cec5SDimitry Andric
2620b57cec5SDimitry Andric if (BestCandidates.size() == 1)
2630b57cec5SDimitry Andric return {Mode, BestCandidates[0]};
2640b57cec5SDimitry Andric
2650b57cec5SDimitry Andric VPValue *Best = nullptr;
2660b57cec5SDimitry Andric unsigned BestScore = 0;
2670b57cec5SDimitry Andric for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
2680b57cec5SDimitry Andric unsigned PrevScore = ~0u;
2690b57cec5SDimitry Andric bool AllSame = true;
2700b57cec5SDimitry Andric
2710b57cec5SDimitry Andric // FIXME: Avoid visiting the same operands multiple times.
2720b57cec5SDimitry Andric for (auto *Candidate : BestCandidates) {
2730b57cec5SDimitry Andric unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
2740b57cec5SDimitry Andric if (PrevScore == ~0u)
2750b57cec5SDimitry Andric PrevScore = Score;
2760b57cec5SDimitry Andric if (PrevScore != Score)
2770b57cec5SDimitry Andric AllSame = false;
2780b57cec5SDimitry Andric PrevScore = Score;
2790b57cec5SDimitry Andric
2800b57cec5SDimitry Andric if (Score > BestScore) {
2810b57cec5SDimitry Andric BestScore = Score;
2820b57cec5SDimitry Andric Best = Candidate;
2830b57cec5SDimitry Andric }
2840b57cec5SDimitry Andric }
2850b57cec5SDimitry Andric if (!AllSame)
2860b57cec5SDimitry Andric break;
2870b57cec5SDimitry Andric }
2880b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found best "
2890b57cec5SDimitry Andric << *cast<VPInstruction>(Best)->getUnderlyingInstr()
2900b57cec5SDimitry Andric << "\n");
2910b57cec5SDimitry Andric Candidates.erase(Best);
2920b57cec5SDimitry Andric
2930b57cec5SDimitry Andric return {Mode, Best};
2940b57cec5SDimitry Andric }
2950b57cec5SDimitry Andric
reorderMultiNodeOps()2960b57cec5SDimitry Andric SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
2970b57cec5SDimitry Andric SmallVector<MultiNodeOpTy, 4> FinalOrder;
2980b57cec5SDimitry Andric SmallVector<OpMode, 4> Mode;
2990b57cec5SDimitry Andric FinalOrder.reserve(MultiNodeOps.size());
3000b57cec5SDimitry Andric Mode.reserve(MultiNodeOps.size());
3010b57cec5SDimitry Andric
3020b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Reordering multinode\n");
3030b57cec5SDimitry Andric
3040b57cec5SDimitry Andric for (auto &Operands : MultiNodeOps) {
3050b57cec5SDimitry Andric FinalOrder.push_back({Operands.first, {Operands.second[0]}});
3060b57cec5SDimitry Andric if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
3070b57cec5SDimitry Andric Instruction::Load)
3080b57cec5SDimitry Andric Mode.push_back(OpMode::Load);
3090b57cec5SDimitry Andric else
3100b57cec5SDimitry Andric Mode.push_back(OpMode::Opcode);
3110b57cec5SDimitry Andric }
3120b57cec5SDimitry Andric
3130b57cec5SDimitry Andric for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
3140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n");
3150b57cec5SDimitry Andric SmallPtrSet<VPValue *, 4> Candidates;
3160b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Candidates ");
3170b57cec5SDimitry Andric for (auto Ops : MultiNodeOps) {
3180b57cec5SDimitry Andric LLVM_DEBUG(
3190b57cec5SDimitry Andric dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
3200b57cec5SDimitry Andric << " ");
3210b57cec5SDimitry Andric Candidates.insert(Ops.second[Lane]);
3220b57cec5SDimitry Andric }
3230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
3240b57cec5SDimitry Andric
3250b57cec5SDimitry Andric for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
3260b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Checking " << Op << "\n");
3270b57cec5SDimitry Andric if (Mode[Op] == OpMode::Failed)
3280b57cec5SDimitry Andric continue;
3290b57cec5SDimitry Andric
3300b57cec5SDimitry Andric VPValue *Last = FinalOrder[Op].second[Lane - 1];
3310b57cec5SDimitry Andric std::pair<OpMode, VPValue *> Res =
3320b57cec5SDimitry Andric getBest(Mode[Op], Last, Candidates, IAI);
3330b57cec5SDimitry Andric if (Res.second)
3340b57cec5SDimitry Andric FinalOrder[Op].second.push_back(Res.second);
3350b57cec5SDimitry Andric else
3360b57cec5SDimitry Andric // TODO: handle this case
3370b57cec5SDimitry Andric FinalOrder[Op].second.push_back(markFailed());
3380b57cec5SDimitry Andric }
3390b57cec5SDimitry Andric }
3400b57cec5SDimitry Andric
3410b57cec5SDimitry Andric return FinalOrder;
3420b57cec5SDimitry Andric }
3430b57cec5SDimitry Andric
344fe6060f1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dumpBundle(ArrayRef<VPValue * > Values)3450b57cec5SDimitry Andric void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
3460b57cec5SDimitry Andric dbgs() << " Ops: ";
347*bdd1243dSDimitry Andric for (auto *Op : Values) {
3488bcb0991SDimitry Andric if (auto *VPInstr = cast_or_null<VPInstruction>(Op))
3498bcb0991SDimitry Andric if (auto *Instr = VPInstr->getUnderlyingInstr()) {
3500b57cec5SDimitry Andric dbgs() << *Instr << " | ";
3518bcb0991SDimitry Andric continue;
3528bcb0991SDimitry Andric }
3530b57cec5SDimitry Andric dbgs() << " nullptr | ";
3548bcb0991SDimitry Andric }
3550b57cec5SDimitry Andric dbgs() << "\n";
3560b57cec5SDimitry Andric }
357fe6060f1SDimitry Andric #endif
3580b57cec5SDimitry Andric
buildGraph(ArrayRef<VPValue * > Values)3590b57cec5SDimitry Andric VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
3600b57cec5SDimitry Andric assert(!Values.empty() && "Need some operands!");
3610b57cec5SDimitry Andric
3620b57cec5SDimitry Andric // If we already visited this instruction bundle, re-use the existing node
3630b57cec5SDimitry Andric auto I = BundleToCombined.find(to_vector<4>(Values));
3640b57cec5SDimitry Andric if (I != BundleToCombined.end()) {
3650b57cec5SDimitry Andric #ifndef NDEBUG
3660b57cec5SDimitry Andric // Check that the resulting graph is a tree. If we re-use a node, this means
3670b57cec5SDimitry Andric // its values have multiple users. We only allow this, if all users of each
3680b57cec5SDimitry Andric // value are the same instruction.
3690b57cec5SDimitry Andric for (auto *V : Values) {
3700b57cec5SDimitry Andric auto UI = V->user_begin();
3710b57cec5SDimitry Andric auto *FirstUser = *UI++;
3720b57cec5SDimitry Andric while (UI != V->user_end()) {
3730b57cec5SDimitry Andric assert(*UI == FirstUser && "Currently we only support SLP trees.");
3740b57cec5SDimitry Andric UI++;
3750b57cec5SDimitry Andric }
3760b57cec5SDimitry Andric }
3770b57cec5SDimitry Andric #endif
3780b57cec5SDimitry Andric return I->second;
3790b57cec5SDimitry Andric }
3800b57cec5SDimitry Andric
3810b57cec5SDimitry Andric // Dump inputs
3820b57cec5SDimitry Andric LLVM_DEBUG({
3830b57cec5SDimitry Andric dbgs() << "buildGraph: ";
3840b57cec5SDimitry Andric dumpBundle(Values);
3850b57cec5SDimitry Andric });
3860b57cec5SDimitry Andric
3870b57cec5SDimitry Andric if (!areVectorizable(Values))
3880b57cec5SDimitry Andric return markFailed();
3890b57cec5SDimitry Andric
3900b57cec5SDimitry Andric assert(getOpcode(Values) && "Opcodes for all values must match");
39181ad6265SDimitry Andric unsigned ValuesOpcode = *getOpcode(Values);
3920b57cec5SDimitry Andric
3930b57cec5SDimitry Andric SmallVector<VPValue *, 4> CombinedOperands;
3940b57cec5SDimitry Andric if (areCommutative(Values)) {
3950b57cec5SDimitry Andric bool MultiNodeRoot = !MultiNodeActive;
3960b57cec5SDimitry Andric MultiNodeActive = true;
3970b57cec5SDimitry Andric for (auto &Operands : getOperands(Values)) {
3980b57cec5SDimitry Andric LLVM_DEBUG({
3990b57cec5SDimitry Andric dbgs() << " Visiting Commutative";
4000b57cec5SDimitry Andric dumpBundle(Operands);
4010b57cec5SDimitry Andric });
4020b57cec5SDimitry Andric
4030b57cec5SDimitry Andric auto OperandsOpcode = getOpcode(Operands);
4040b57cec5SDimitry Andric if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
4050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Same opcode, continue building\n");
4060b57cec5SDimitry Andric CombinedOperands.push_back(buildGraph(Operands));
4070b57cec5SDimitry Andric } else {
4080b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Adding multinode Ops\n");
4090b57cec5SDimitry Andric // Create dummy VPInstruction, which will we replace later by the
4100b57cec5SDimitry Andric // re-ordered operand.
4110b57cec5SDimitry Andric VPInstruction *Op = new VPInstruction(0, {});
4120b57cec5SDimitry Andric CombinedOperands.push_back(Op);
4130b57cec5SDimitry Andric MultiNodeOps.emplace_back(Op, Operands);
4140b57cec5SDimitry Andric }
4150b57cec5SDimitry Andric }
4160b57cec5SDimitry Andric
4170b57cec5SDimitry Andric if (MultiNodeRoot) {
4180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Reorder \n");
4190b57cec5SDimitry Andric MultiNodeActive = false;
4200b57cec5SDimitry Andric
4210b57cec5SDimitry Andric auto FinalOrder = reorderMultiNodeOps();
4220b57cec5SDimitry Andric
4230b57cec5SDimitry Andric MultiNodeOps.clear();
4240b57cec5SDimitry Andric for (auto &Ops : FinalOrder) {
4250b57cec5SDimitry Andric VPInstruction *NewOp = buildGraph(Ops.second);
4260b57cec5SDimitry Andric Ops.first->replaceAllUsesWith(NewOp);
4270b57cec5SDimitry Andric for (unsigned i = 0; i < CombinedOperands.size(); i++)
4280b57cec5SDimitry Andric if (CombinedOperands[i] == Ops.first)
4290b57cec5SDimitry Andric CombinedOperands[i] = NewOp;
4300b57cec5SDimitry Andric delete Ops.first;
4310b57cec5SDimitry Andric Ops.first = NewOp;
4320b57cec5SDimitry Andric }
4330b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found final order\n");
4340b57cec5SDimitry Andric }
4350b57cec5SDimitry Andric } else {
4360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " NonCommuntative\n");
4370b57cec5SDimitry Andric if (ValuesOpcode == Instruction::Load)
4380b57cec5SDimitry Andric for (VPValue *V : Values)
4390b57cec5SDimitry Andric CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
4400b57cec5SDimitry Andric else
4410b57cec5SDimitry Andric for (auto &Operands : getOperands(Values))
4420b57cec5SDimitry Andric CombinedOperands.push_back(buildGraph(Operands));
4430b57cec5SDimitry Andric }
4440b57cec5SDimitry Andric
4450b57cec5SDimitry Andric unsigned Opcode;
4460b57cec5SDimitry Andric switch (ValuesOpcode) {
4470b57cec5SDimitry Andric case Instruction::Load:
4480b57cec5SDimitry Andric Opcode = VPInstruction::SLPLoad;
4490b57cec5SDimitry Andric break;
4500b57cec5SDimitry Andric case Instruction::Store:
4510b57cec5SDimitry Andric Opcode = VPInstruction::SLPStore;
4520b57cec5SDimitry Andric break;
4530b57cec5SDimitry Andric default:
4540b57cec5SDimitry Andric Opcode = ValuesOpcode;
4550b57cec5SDimitry Andric break;
4560b57cec5SDimitry Andric }
4570b57cec5SDimitry Andric
4580b57cec5SDimitry Andric if (!CompletelySLP)
4590b57cec5SDimitry Andric return markFailed();
4600b57cec5SDimitry Andric
4610b57cec5SDimitry Andric assert(CombinedOperands.size() > 0 && "Need more some operands");
4620eae32dcSDimitry Andric auto *Inst = cast<VPInstruction>(Values[0])->getUnderlyingInstr();
4630eae32dcSDimitry Andric auto *VPI = new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc());
4640b57cec5SDimitry Andric
465e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " "
466e8d8bef9SDimitry Andric << *cast<VPInstruction>(Values[0]) << "\n");
4670b57cec5SDimitry Andric addCombined(Values, VPI);
4680b57cec5SDimitry Andric return VPI;
4690b57cec5SDimitry Andric }
470