xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp (revision 7a6dacaca14b62ca4b74406814becb87a3fefac0)
10b57cec5SDimitry Andric //===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "Hexagon.h"
100b57cec5SDimitry Andric #include "HexagonISelDAGToDAG.h"
110b57cec5SDimitry Andric #include "HexagonISelLowering.h"
120b57cec5SDimitry Andric #include "HexagonTargetMachine.h"
13bdd1243dSDimitry Andric #include "llvm/ADT/BitVector.h"
140b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h"
150b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAGISel.h"
170b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
18480093f4SDimitry Andric #include "llvm/IR/IntrinsicsHexagon.h"
190b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
200b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
21bdd1243dSDimitry Andric #include "llvm/Support/MathExtras.h"
220b57cec5SDimitry Andric 
23bdd1243dSDimitry Andric #include <algorithm>
24bdd1243dSDimitry Andric #include <cmath>
250b57cec5SDimitry Andric #include <deque>
26bdd1243dSDimitry Andric #include <functional>
270b57cec5SDimitry Andric #include <map>
28bdd1243dSDimitry Andric #include <optional>
290b57cec5SDimitry Andric #include <set>
30bdd1243dSDimitry Andric #include <unordered_map>
310b57cec5SDimitry Andric #include <utility>
320b57cec5SDimitry Andric #include <vector>
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric #define DEBUG_TYPE "hexagon-isel"
350b57cec5SDimitry Andric using namespace llvm;
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric namespace {
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric // --------------------------------------------------------------------
400b57cec5SDimitry Andric // Implementation of permutation networks.
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric // Implementation of the node routing through butterfly networks:
430b57cec5SDimitry Andric // - Forward delta.
440b57cec5SDimitry Andric // - Reverse delta.
450b57cec5SDimitry Andric // - Benes.
460b57cec5SDimitry Andric //
470b57cec5SDimitry Andric //
480b57cec5SDimitry Andric // Forward delta network consists of log(N) steps, where N is the number
490b57cec5SDimitry Andric // of inputs. In each step, an input can stay in place, or it can get
500b57cec5SDimitry Andric // routed to another position[1]. The step after that consists of two
510b57cec5SDimitry Andric // networks, each half in size in terms of the number of nodes. In those
520b57cec5SDimitry Andric // terms, in the given step, an input can go to either the upper or the
530b57cec5SDimitry Andric // lower network in the next step.
540b57cec5SDimitry Andric //
550b57cec5SDimitry Andric // [1] Hexagon's vdelta/vrdelta allow an element to be routed to both
560b57cec5SDimitry Andric // positions as long as there is no conflict.
570b57cec5SDimitry Andric 
580b57cec5SDimitry Andric // Here's a delta network for 8 inputs, only the switching routes are
590b57cec5SDimitry Andric // shown:
600b57cec5SDimitry Andric //
610b57cec5SDimitry Andric //         Steps:
620b57cec5SDimitry Andric //         |- 1 ---------------|- 2 -----|- 3 -|
630b57cec5SDimitry Andric //
640b57cec5SDimitry Andric // Inp[0] ***                 ***       ***   *** Out[0]
650b57cec5SDimitry Andric //           \               /   \     /   \ /
660b57cec5SDimitry Andric //            \             /     \   /     X
670b57cec5SDimitry Andric //             \           /       \ /     / \
680b57cec5SDimitry Andric // Inp[1] ***   \         /   ***   X   ***   *** Out[1]
690b57cec5SDimitry Andric //           \   \       /   /   \ / \ /
700b57cec5SDimitry Andric //            \   \     /   /     X   X
710b57cec5SDimitry Andric //             \   \   /   /     / \ / \
720b57cec5SDimitry Andric // Inp[2] ***   \   \ /   /   ***   X   ***   *** Out[2]
730b57cec5SDimitry Andric //           \   \   X   /   /     / \     \ /
740b57cec5SDimitry Andric //            \   \ / \ /   /     /   \     X
750b57cec5SDimitry Andric //             \   X   X   /     /     \   / \
760b57cec5SDimitry Andric // Inp[3] ***   \ / \ / \ /   ***       ***   *** Out[3]
770b57cec5SDimitry Andric //           \   X   X   X   /
780b57cec5SDimitry Andric //            \ / \ / \ / \ /
790b57cec5SDimitry Andric //             X   X   X   X
800b57cec5SDimitry Andric //            / \ / \ / \ / \
810b57cec5SDimitry Andric //           /   X   X   X   \
820b57cec5SDimitry Andric // Inp[4] ***   / \ / \ / \   ***       ***   *** Out[4]
830b57cec5SDimitry Andric //             /   X   X   \     \     /   \ /
840b57cec5SDimitry Andric //            /   / \ / \   \     \   /     X
850b57cec5SDimitry Andric //           /   /   X   \   \     \ /     / \
860b57cec5SDimitry Andric // Inp[5] ***   /   / \   \   ***   X   ***   *** Out[5]
870b57cec5SDimitry Andric //             /   /   \   \     \ / \ /
880b57cec5SDimitry Andric //            /   /     \   \     X   X
890b57cec5SDimitry Andric //           /   /       \   \   / \ / \
900b57cec5SDimitry Andric // Inp[6] ***   /         \   ***   X   ***   *** Out[6]
910b57cec5SDimitry Andric //             /           \       / \     \ /
920b57cec5SDimitry Andric //            /             \     /   \     X
930b57cec5SDimitry Andric //           /               \   /     \   / \
940b57cec5SDimitry Andric // Inp[7] ***                 ***       ***   *** Out[7]
950b57cec5SDimitry Andric //
960b57cec5SDimitry Andric //
970b57cec5SDimitry Andric // Reverse delta network is same as delta network, with the steps in
980b57cec5SDimitry Andric // the opposite order.
990b57cec5SDimitry Andric //
1000b57cec5SDimitry Andric //
1010b57cec5SDimitry Andric // Benes network is a forward delta network immediately followed by
1020b57cec5SDimitry Andric // a reverse delta network.
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric enum class ColorKind { None, Red, Black };
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric // Graph coloring utility used to partition nodes into two groups:
1070b57cec5SDimitry Andric // they will correspond to nodes routed to the upper and lower networks.
1080b57cec5SDimitry Andric struct Coloring {
1090b57cec5SDimitry Andric   using Node = int;
1100b57cec5SDimitry Andric   using MapType = std::map<Node, ColorKind>;
1110b57cec5SDimitry Andric   static constexpr Node Ignore = Node(-1);
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric   Coloring(ArrayRef<Node> Ord) : Order(Ord) {
1140b57cec5SDimitry Andric     build();
1150b57cec5SDimitry Andric     if (!color())
1160b57cec5SDimitry Andric       Colors.clear();
1170b57cec5SDimitry Andric   }
1180b57cec5SDimitry Andric 
1190b57cec5SDimitry Andric   const MapType &colors() const {
1200b57cec5SDimitry Andric     return Colors;
1210b57cec5SDimitry Andric   }
1220b57cec5SDimitry Andric 
1230b57cec5SDimitry Andric   ColorKind other(ColorKind Color) {
1240b57cec5SDimitry Andric     if (Color == ColorKind::None)
1250b57cec5SDimitry Andric       return ColorKind::Red;
1260b57cec5SDimitry Andric     return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
1270b57cec5SDimitry Andric   }
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric   LLVM_DUMP_METHOD void dump() const;
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric private:
1320b57cec5SDimitry Andric   ArrayRef<Node> Order;
1330b57cec5SDimitry Andric   MapType Colors;
1340b57cec5SDimitry Andric   std::set<Node> Needed;
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric   using NodeSet = std::set<Node>;
1370b57cec5SDimitry Andric   std::map<Node,NodeSet> Edges;
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric   Node conj(Node Pos) {
1400b57cec5SDimitry Andric     Node Num = Order.size();
1410b57cec5SDimitry Andric     return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
1420b57cec5SDimitry Andric   }
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric   ColorKind getColor(Node N) {
1450b57cec5SDimitry Andric     auto F = Colors.find(N);
1460b57cec5SDimitry Andric     return F != Colors.end() ? F->second : ColorKind::None;
1470b57cec5SDimitry Andric   }
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric   std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes);
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric   void build();
1520b57cec5SDimitry Andric   bool color();
1530b57cec5SDimitry Andric };
1540b57cec5SDimitry Andric } // namespace
1550b57cec5SDimitry Andric 
1560b57cec5SDimitry Andric std::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) {
1570b57cec5SDimitry Andric   auto Color = ColorKind::None;
1580b57cec5SDimitry Andric   for (Node N : Nodes) {
1590b57cec5SDimitry Andric     ColorKind ColorN = getColor(N);
1600b57cec5SDimitry Andric     if (ColorN == ColorKind::None)
1610b57cec5SDimitry Andric       continue;
1620b57cec5SDimitry Andric     if (Color == ColorKind::None)
1630b57cec5SDimitry Andric       Color = ColorN;
1640b57cec5SDimitry Andric     else if (Color != ColorKind::None && Color != ColorN)
1650b57cec5SDimitry Andric       return { false, ColorKind::None };
1660b57cec5SDimitry Andric   }
1670b57cec5SDimitry Andric   return { true, Color };
1680b57cec5SDimitry Andric }
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric void Coloring::build() {
1710b57cec5SDimitry Andric   // Add Order[P] and Order[conj(P)] to Edges.
1720b57cec5SDimitry Andric   for (unsigned P = 0; P != Order.size(); ++P) {
1730b57cec5SDimitry Andric     Node I = Order[P];
1740b57cec5SDimitry Andric     if (I != Ignore) {
1750b57cec5SDimitry Andric       Needed.insert(I);
1760b57cec5SDimitry Andric       Node PC = Order[conj(P)];
1770b57cec5SDimitry Andric       if (PC != Ignore && PC != I)
1780b57cec5SDimitry Andric         Edges[I].insert(PC);
1790b57cec5SDimitry Andric     }
1800b57cec5SDimitry Andric   }
1810b57cec5SDimitry Andric   // Add I and conj(I) to Edges.
1820b57cec5SDimitry Andric   for (unsigned I = 0; I != Order.size(); ++I) {
1830b57cec5SDimitry Andric     if (!Needed.count(I))
1840b57cec5SDimitry Andric       continue;
1850b57cec5SDimitry Andric     Node C = conj(I);
1860b57cec5SDimitry Andric     // This will create an entry in the edge table, even if I is not
1870b57cec5SDimitry Andric     // connected to any other node. This is necessary, because it still
1880b57cec5SDimitry Andric     // needs to be colored.
1890b57cec5SDimitry Andric     NodeSet &Is = Edges[I];
1900b57cec5SDimitry Andric     if (Needed.count(C))
1910b57cec5SDimitry Andric       Is.insert(C);
1920b57cec5SDimitry Andric   }
1930b57cec5SDimitry Andric }
1940b57cec5SDimitry Andric 
1950b57cec5SDimitry Andric bool Coloring::color() {
1960b57cec5SDimitry Andric   SetVector<Node> FirstQ;
1970b57cec5SDimitry Andric   auto Enqueue = [this,&FirstQ] (Node N) {
1980b57cec5SDimitry Andric     SetVector<Node> Q;
1990b57cec5SDimitry Andric     Q.insert(N);
2000b57cec5SDimitry Andric     for (unsigned I = 0; I != Q.size(); ++I) {
2010b57cec5SDimitry Andric       NodeSet &Ns = Edges[Q[I]];
2020b57cec5SDimitry Andric       Q.insert(Ns.begin(), Ns.end());
2030b57cec5SDimitry Andric     }
2040b57cec5SDimitry Andric     FirstQ.insert(Q.begin(), Q.end());
2050b57cec5SDimitry Andric   };
2060b57cec5SDimitry Andric   for (Node N : Needed)
2070b57cec5SDimitry Andric     Enqueue(N);
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric   for (Node N : FirstQ) {
2100b57cec5SDimitry Andric     if (Colors.count(N))
2110b57cec5SDimitry Andric       continue;
2120b57cec5SDimitry Andric     NodeSet &Ns = Edges[N];
2130b57cec5SDimitry Andric     auto P = getUniqueColor(Ns);
2140b57cec5SDimitry Andric     if (!P.first)
2150b57cec5SDimitry Andric       return false;
2160b57cec5SDimitry Andric     Colors[N] = other(P.second);
2170b57cec5SDimitry Andric   }
2180b57cec5SDimitry Andric 
2190b57cec5SDimitry Andric   // First, color nodes that don't have any dups.
2200b57cec5SDimitry Andric   for (auto E : Edges) {
2210b57cec5SDimitry Andric     Node N = E.first;
2220b57cec5SDimitry Andric     if (!Needed.count(conj(N)) || Colors.count(N))
2230b57cec5SDimitry Andric       continue;
2240b57cec5SDimitry Andric     auto P = getUniqueColor(E.second);
2250b57cec5SDimitry Andric     if (!P.first)
2260b57cec5SDimitry Andric       return false;
2270b57cec5SDimitry Andric     Colors[N] = other(P.second);
2280b57cec5SDimitry Andric   }
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric   // Now, nodes that are still uncolored. Since the graph can be modified
2310b57cec5SDimitry Andric   // in this step, create a work queue.
2320b57cec5SDimitry Andric   std::vector<Node> WorkQ;
2330b57cec5SDimitry Andric   for (auto E : Edges) {
2340b57cec5SDimitry Andric     Node N = E.first;
2350b57cec5SDimitry Andric     if (!Colors.count(N))
2360b57cec5SDimitry Andric       WorkQ.push_back(N);
2370b57cec5SDimitry Andric   }
2380b57cec5SDimitry Andric 
23904eeddc0SDimitry Andric   for (Node N : WorkQ) {
2400b57cec5SDimitry Andric     NodeSet &Ns = Edges[N];
2410b57cec5SDimitry Andric     auto P = getUniqueColor(Ns);
2420b57cec5SDimitry Andric     if (P.first) {
2430b57cec5SDimitry Andric       Colors[N] = other(P.second);
2440b57cec5SDimitry Andric       continue;
2450b57cec5SDimitry Andric     }
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric     // Coloring failed. Split this node.
2480b57cec5SDimitry Andric     Node C = conj(N);
2490b57cec5SDimitry Andric     ColorKind ColorN = other(ColorKind::None);
2500b57cec5SDimitry Andric     ColorKind ColorC = other(ColorN);
2510b57cec5SDimitry Andric     NodeSet &Cs = Edges[C];
2520b57cec5SDimitry Andric     NodeSet CopyNs = Ns;
2530b57cec5SDimitry Andric     for (Node M : CopyNs) {
2540b57cec5SDimitry Andric       ColorKind ColorM = getColor(M);
2550b57cec5SDimitry Andric       if (ColorM == ColorC) {
2560b57cec5SDimitry Andric         // Connect M with C, disconnect M from N.
2570b57cec5SDimitry Andric         Cs.insert(M);
2580b57cec5SDimitry Andric         Edges[M].insert(C);
2590b57cec5SDimitry Andric         Ns.erase(M);
2600b57cec5SDimitry Andric         Edges[M].erase(N);
2610b57cec5SDimitry Andric       }
2620b57cec5SDimitry Andric     }
2630b57cec5SDimitry Andric     Colors[N] = ColorN;
2640b57cec5SDimitry Andric     Colors[C] = ColorC;
2650b57cec5SDimitry Andric   }
2660b57cec5SDimitry Andric 
2670b57cec5SDimitry Andric   // Explicitly assign "None" to all uncolored nodes.
2680b57cec5SDimitry Andric   for (unsigned I = 0; I != Order.size(); ++I)
2690b57cec5SDimitry Andric     if (Colors.count(I) == 0)
2700b57cec5SDimitry Andric       Colors[I] = ColorKind::None;
2710b57cec5SDimitry Andric 
2720b57cec5SDimitry Andric   return true;
2730b57cec5SDimitry Andric }
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2760b57cec5SDimitry Andric void Coloring::dump() const {
2770b57cec5SDimitry Andric   dbgs() << "{ Order:   {";
27804eeddc0SDimitry Andric   for (Node P : Order) {
2790b57cec5SDimitry Andric     if (P != Ignore)
2800b57cec5SDimitry Andric       dbgs() << ' ' << P;
2810b57cec5SDimitry Andric     else
2820b57cec5SDimitry Andric       dbgs() << " -";
2830b57cec5SDimitry Andric   }
2840b57cec5SDimitry Andric   dbgs() << " }\n";
2850b57cec5SDimitry Andric   dbgs() << "  Needed: {";
2860b57cec5SDimitry Andric   for (Node N : Needed)
2870b57cec5SDimitry Andric     dbgs() << ' ' << N;
2880b57cec5SDimitry Andric   dbgs() << " }\n";
2890b57cec5SDimitry Andric 
2900b57cec5SDimitry Andric   dbgs() << "  Edges: {\n";
2910b57cec5SDimitry Andric   for (auto E : Edges) {
2920b57cec5SDimitry Andric     dbgs() << "    " << E.first << " -> {";
2930b57cec5SDimitry Andric     for (auto N : E.second)
2940b57cec5SDimitry Andric       dbgs() << ' ' << N;
2950b57cec5SDimitry Andric     dbgs() << " }\n";
2960b57cec5SDimitry Andric   }
2970b57cec5SDimitry Andric   dbgs() << "  }\n";
2980b57cec5SDimitry Andric 
2990b57cec5SDimitry Andric   auto ColorKindToName = [](ColorKind C) {
3000b57cec5SDimitry Andric     switch (C) {
3010b57cec5SDimitry Andric     case ColorKind::None:
3020b57cec5SDimitry Andric       return "None";
3030b57cec5SDimitry Andric     case ColorKind::Red:
3040b57cec5SDimitry Andric       return "Red";
3050b57cec5SDimitry Andric     case ColorKind::Black:
3060b57cec5SDimitry Andric       return "Black";
3070b57cec5SDimitry Andric     }
3080b57cec5SDimitry Andric     llvm_unreachable("all ColorKinds should be handled by the switch above");
3090b57cec5SDimitry Andric   };
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric   dbgs() << "  Colors: {\n";
3120b57cec5SDimitry Andric   for (auto C : Colors)
3130b57cec5SDimitry Andric     dbgs() << "    " << C.first << " -> " << ColorKindToName(C.second) << "\n";
3140b57cec5SDimitry Andric   dbgs() << "  }\n}\n";
3150b57cec5SDimitry Andric }
3160b57cec5SDimitry Andric #endif
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric namespace {
3190b57cec5SDimitry Andric // Base class of for reordering networks. They don't strictly need to be
3200b57cec5SDimitry Andric // permutations, as outputs with repeated occurrences of an input element
3210b57cec5SDimitry Andric // are allowed.
3220b57cec5SDimitry Andric struct PermNetwork {
3230b57cec5SDimitry Andric   using Controls = std::vector<uint8_t>;
3240b57cec5SDimitry Andric   using ElemType = int;
3250b57cec5SDimitry Andric   static constexpr ElemType Ignore = ElemType(-1);
3260b57cec5SDimitry Andric 
3270b57cec5SDimitry Andric   enum : uint8_t {
3280b57cec5SDimitry Andric     None,
3290b57cec5SDimitry Andric     Pass,
3300b57cec5SDimitry Andric     Switch
3310b57cec5SDimitry Andric   };
3320b57cec5SDimitry Andric   enum : uint8_t {
3330b57cec5SDimitry Andric     Forward,
3340b57cec5SDimitry Andric     Reverse
3350b57cec5SDimitry Andric   };
3360b57cec5SDimitry Andric 
3370b57cec5SDimitry Andric   PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) {
3380b57cec5SDimitry Andric     Order.assign(Ord.data(), Ord.data()+Ord.size());
3390b57cec5SDimitry Andric     Log = 0;
3400b57cec5SDimitry Andric 
3410b57cec5SDimitry Andric     unsigned S = Order.size();
3420b57cec5SDimitry Andric     while (S >>= 1)
3430b57cec5SDimitry Andric       ++Log;
3440b57cec5SDimitry Andric 
3450b57cec5SDimitry Andric     Table.resize(Order.size());
3460b57cec5SDimitry Andric     for (RowType &Row : Table)
3470b57cec5SDimitry Andric       Row.resize(Mult*Log, None);
3480b57cec5SDimitry Andric   }
3490b57cec5SDimitry Andric 
3500b57cec5SDimitry Andric   void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const {
3510b57cec5SDimitry Andric     unsigned Size = Order.size();
3520b57cec5SDimitry Andric     V.resize(Size);
3530b57cec5SDimitry Andric     for (unsigned I = 0; I != Size; ++I) {
3540b57cec5SDimitry Andric       unsigned W = 0;
3550b57cec5SDimitry Andric       for (unsigned L = 0; L != Log; ++L) {
3560b57cec5SDimitry Andric         unsigned C = ctl(I, StartAt+L) == Switch;
3570b57cec5SDimitry Andric         if (Dir == Forward)
3580b57cec5SDimitry Andric           W |= C << (Log-1-L);
3590b57cec5SDimitry Andric         else
3600b57cec5SDimitry Andric           W |= C << L;
3610b57cec5SDimitry Andric       }
3620b57cec5SDimitry Andric       assert(isUInt<8>(W));
3630b57cec5SDimitry Andric       V[I] = uint8_t(W);
3640b57cec5SDimitry Andric     }
3650b57cec5SDimitry Andric   }
3660b57cec5SDimitry Andric 
3670b57cec5SDimitry Andric   uint8_t ctl(ElemType Pos, unsigned Step) const {
3680b57cec5SDimitry Andric     return Table[Pos][Step];
3690b57cec5SDimitry Andric   }
3700b57cec5SDimitry Andric   unsigned size() const {
3710b57cec5SDimitry Andric     return Order.size();
3720b57cec5SDimitry Andric   }
3730b57cec5SDimitry Andric   unsigned steps() const {
3740b57cec5SDimitry Andric     return Log;
3750b57cec5SDimitry Andric   }
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric protected:
3780b57cec5SDimitry Andric   unsigned Log;
3790b57cec5SDimitry Andric   std::vector<ElemType> Order;
3800b57cec5SDimitry Andric   using RowType = std::vector<uint8_t>;
3810b57cec5SDimitry Andric   std::vector<RowType> Table;
3820b57cec5SDimitry Andric };
3830b57cec5SDimitry Andric 
3840b57cec5SDimitry Andric struct ForwardDeltaNetwork : public PermNetwork {
3850b57cec5SDimitry Andric   ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric   bool run(Controls &V) {
3880b57cec5SDimitry Andric     if (!route(Order.data(), Table.data(), size(), 0))
3890b57cec5SDimitry Andric       return false;
3900b57cec5SDimitry Andric     getControls(V, 0, Forward);
3910b57cec5SDimitry Andric     return true;
3920b57cec5SDimitry Andric   }
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric private:
3950b57cec5SDimitry Andric   bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
3960b57cec5SDimitry Andric };
3970b57cec5SDimitry Andric 
3980b57cec5SDimitry Andric struct ReverseDeltaNetwork : public PermNetwork {
3990b57cec5SDimitry Andric   ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
4000b57cec5SDimitry Andric 
4010b57cec5SDimitry Andric   bool run(Controls &V) {
4020b57cec5SDimitry Andric     if (!route(Order.data(), Table.data(), size(), 0))
4030b57cec5SDimitry Andric       return false;
4040b57cec5SDimitry Andric     getControls(V, 0, Reverse);
4050b57cec5SDimitry Andric     return true;
4060b57cec5SDimitry Andric   }
4070b57cec5SDimitry Andric 
4080b57cec5SDimitry Andric private:
4090b57cec5SDimitry Andric   bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
4100b57cec5SDimitry Andric };
4110b57cec5SDimitry Andric 
4120b57cec5SDimitry Andric struct BenesNetwork : public PermNetwork {
4130b57cec5SDimitry Andric   BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {}
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric   bool run(Controls &F, Controls &R) {
4160b57cec5SDimitry Andric     if (!route(Order.data(), Table.data(), size(), 0))
4170b57cec5SDimitry Andric       return false;
4180b57cec5SDimitry Andric 
4190b57cec5SDimitry Andric     getControls(F, 0, Forward);
4200b57cec5SDimitry Andric     getControls(R, Log, Reverse);
4210b57cec5SDimitry Andric     return true;
4220b57cec5SDimitry Andric   }
4230b57cec5SDimitry Andric 
4240b57cec5SDimitry Andric private:
4250b57cec5SDimitry Andric   bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
4260b57cec5SDimitry Andric };
4270b57cec5SDimitry Andric } // namespace
4280b57cec5SDimitry Andric 
4290b57cec5SDimitry Andric bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
4300b57cec5SDimitry Andric                                 unsigned Step) {
4310b57cec5SDimitry Andric   bool UseUp = false, UseDown = false;
4320b57cec5SDimitry Andric   ElemType Num = Size;
4330b57cec5SDimitry Andric 
4340b57cec5SDimitry Andric   // Cannot use coloring here, because coloring is used to determine
4350b57cec5SDimitry Andric   // the "big" switch, i.e. the one that changes halves, and in a forward
4360b57cec5SDimitry Andric   // network, a color can be simultaneously routed to both halves in the
4370b57cec5SDimitry Andric   // step we're working on.
4380b57cec5SDimitry Andric   for (ElemType J = 0; J != Num; ++J) {
4390b57cec5SDimitry Andric     ElemType I = P[J];
4400b57cec5SDimitry Andric     // I is the position in the input,
4410b57cec5SDimitry Andric     // J is the position in the output.
4420b57cec5SDimitry Andric     if (I == Ignore)
4430b57cec5SDimitry Andric       continue;
4440b57cec5SDimitry Andric     uint8_t S;
4450b57cec5SDimitry Andric     if (I < Num/2)
4460b57cec5SDimitry Andric       S = (J < Num/2) ? Pass : Switch;
4470b57cec5SDimitry Andric     else
4480b57cec5SDimitry Andric       S = (J < Num/2) ? Switch : Pass;
4490b57cec5SDimitry Andric 
4500b57cec5SDimitry Andric     // U is the element in the table that needs to be updated.
4510b57cec5SDimitry Andric     ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2);
4520b57cec5SDimitry Andric     if (U < Num/2)
4530b57cec5SDimitry Andric       UseUp = true;
4540b57cec5SDimitry Andric     else
4550b57cec5SDimitry Andric       UseDown = true;
4560b57cec5SDimitry Andric     if (T[U][Step] != S && T[U][Step] != None)
4570b57cec5SDimitry Andric       return false;
4580b57cec5SDimitry Andric     T[U][Step] = S;
4590b57cec5SDimitry Andric   }
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric   for (ElemType J = 0; J != Num; ++J)
4620b57cec5SDimitry Andric     if (P[J] != Ignore && P[J] >= Num/2)
4630b57cec5SDimitry Andric       P[J] -= Num/2;
4640b57cec5SDimitry Andric 
4650b57cec5SDimitry Andric   if (Step+1 < Log) {
4660b57cec5SDimitry Andric     if (UseUp   && !route(P,        T,        Size/2, Step+1))
4670b57cec5SDimitry Andric       return false;
4680b57cec5SDimitry Andric     if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
4690b57cec5SDimitry Andric       return false;
4700b57cec5SDimitry Andric   }
4710b57cec5SDimitry Andric   return true;
4720b57cec5SDimitry Andric }
4730b57cec5SDimitry Andric 
4740b57cec5SDimitry Andric bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
4750b57cec5SDimitry Andric                                 unsigned Step) {
4760b57cec5SDimitry Andric   unsigned Pets = Log-1 - Step;
4770b57cec5SDimitry Andric   bool UseUp = false, UseDown = false;
4780b57cec5SDimitry Andric   ElemType Num = Size;
4790b57cec5SDimitry Andric 
4800b57cec5SDimitry Andric   // In this step half-switching occurs, so coloring can be used.
4810b57cec5SDimitry Andric   Coloring G({P,Size});
4820b57cec5SDimitry Andric   const Coloring::MapType &M = G.colors();
4830b57cec5SDimitry Andric   if (M.empty())
4840b57cec5SDimitry Andric     return false;
4850b57cec5SDimitry Andric 
4860b57cec5SDimitry Andric   ColorKind ColorUp = ColorKind::None;
4870b57cec5SDimitry Andric   for (ElemType J = 0; J != Num; ++J) {
4880b57cec5SDimitry Andric     ElemType I = P[J];
4890b57cec5SDimitry Andric     // I is the position in the input,
4900b57cec5SDimitry Andric     // J is the position in the output.
4910b57cec5SDimitry Andric     if (I == Ignore)
4920b57cec5SDimitry Andric       continue;
4930b57cec5SDimitry Andric     ColorKind C = M.at(I);
4940b57cec5SDimitry Andric     if (C == ColorKind::None)
4950b57cec5SDimitry Andric       continue;
4960b57cec5SDimitry Andric     // During "Step", inputs cannot switch halves, so if the "up" color
4970b57cec5SDimitry Andric     // is still unknown, make sure that it is selected in such a way that
4980b57cec5SDimitry Andric     // "I" will stay in the same half.
4990b57cec5SDimitry Andric     bool InpUp = I < Num/2;
5000b57cec5SDimitry Andric     if (ColorUp == ColorKind::None)
5010b57cec5SDimitry Andric       ColorUp = InpUp ? C : G.other(C);
5020b57cec5SDimitry Andric     if ((C == ColorUp) != InpUp) {
5030b57cec5SDimitry Andric       // If I should go to a different half than where is it now, give up.
5040b57cec5SDimitry Andric       return false;
5050b57cec5SDimitry Andric     }
5060b57cec5SDimitry Andric 
5070b57cec5SDimitry Andric     uint8_t S;
5080b57cec5SDimitry Andric     if (InpUp) {
5090b57cec5SDimitry Andric       S = (J < Num/2) ? Pass : Switch;
5100b57cec5SDimitry Andric       UseUp = true;
5110b57cec5SDimitry Andric     } else {
5120b57cec5SDimitry Andric       S = (J < Num/2) ? Switch : Pass;
5130b57cec5SDimitry Andric       UseDown = true;
5140b57cec5SDimitry Andric     }
5150b57cec5SDimitry Andric     T[J][Pets] = S;
5160b57cec5SDimitry Andric   }
5170b57cec5SDimitry Andric 
5180b57cec5SDimitry Andric   // Reorder the working permutation according to the computed switch table
5190b57cec5SDimitry Andric   // for the last step (i.e. Pets).
5200b57cec5SDimitry Andric   for (ElemType J = 0, E = Size / 2; J != E; ++J) {
5210b57cec5SDimitry Andric     ElemType PJ = P[J];         // Current values of P[J]
5220b57cec5SDimitry Andric     ElemType PC = P[J+Size/2];  // and P[conj(J)]
5230b57cec5SDimitry Andric     ElemType QJ = PJ;           // New values of P[J]
5240b57cec5SDimitry Andric     ElemType QC = PC;           // and P[conj(J)]
5250b57cec5SDimitry Andric     if (T[J][Pets] == Switch)
5260b57cec5SDimitry Andric       QC = PJ;
5270b57cec5SDimitry Andric     if (T[J+Size/2][Pets] == Switch)
5280b57cec5SDimitry Andric       QJ = PC;
5290b57cec5SDimitry Andric     P[J] = QJ;
5300b57cec5SDimitry Andric     P[J+Size/2] = QC;
5310b57cec5SDimitry Andric   }
5320b57cec5SDimitry Andric 
5330b57cec5SDimitry Andric   for (ElemType J = 0; J != Num; ++J)
5340b57cec5SDimitry Andric     if (P[J] != Ignore && P[J] >= Num/2)
5350b57cec5SDimitry Andric       P[J] -= Num/2;
5360b57cec5SDimitry Andric 
5370b57cec5SDimitry Andric   if (Step+1 < Log) {
5380b57cec5SDimitry Andric     if (UseUp && !route(P, T, Size/2, Step+1))
5390b57cec5SDimitry Andric       return false;
5400b57cec5SDimitry Andric     if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
5410b57cec5SDimitry Andric       return false;
5420b57cec5SDimitry Andric   }
5430b57cec5SDimitry Andric   return true;
5440b57cec5SDimitry Andric }
5450b57cec5SDimitry Andric 
5460b57cec5SDimitry Andric bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
5470b57cec5SDimitry Andric                          unsigned Step) {
5480b57cec5SDimitry Andric   Coloring G({P,Size});
5490b57cec5SDimitry Andric   const Coloring::MapType &M = G.colors();
5500b57cec5SDimitry Andric   if (M.empty())
5510b57cec5SDimitry Andric     return false;
5520b57cec5SDimitry Andric   ElemType Num = Size;
5530b57cec5SDimitry Andric 
5540b57cec5SDimitry Andric   unsigned Pets = 2*Log-1 - Step;
5550b57cec5SDimitry Andric   bool UseUp = false, UseDown = false;
5560b57cec5SDimitry Andric 
5570b57cec5SDimitry Andric   // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
5580b57cec5SDimitry Andric   // result in different controls. Let's pick the one where the first
5590b57cec5SDimitry Andric   // control will be "Pass".
5600b57cec5SDimitry Andric   ColorKind ColorUp = ColorKind::None;
5610b57cec5SDimitry Andric   for (ElemType J = 0; J != Num; ++J) {
5620b57cec5SDimitry Andric     ElemType I = P[J];
5630b57cec5SDimitry Andric     if (I == Ignore)
5640b57cec5SDimitry Andric       continue;
5650b57cec5SDimitry Andric     ColorKind C = M.at(I);
5660b57cec5SDimitry Andric     if (C == ColorKind::None)
5670b57cec5SDimitry Andric       continue;
5680b57cec5SDimitry Andric     if (ColorUp == ColorKind::None) {
5690b57cec5SDimitry Andric       ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black;
5700b57cec5SDimitry Andric     }
5710b57cec5SDimitry Andric     unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
5720b57cec5SDimitry Andric     if (C == ColorUp) {
5730b57cec5SDimitry Andric       if (I < Num/2)
5740b57cec5SDimitry Andric         T[I][Step] = Pass;
5750b57cec5SDimitry Andric       else
5760b57cec5SDimitry Andric         T[CI][Step] = Switch;
5770b57cec5SDimitry Andric       T[J][Pets] = (J < Num/2) ? Pass : Switch;
5780b57cec5SDimitry Andric       UseUp = true;
5790b57cec5SDimitry Andric     } else { // Down
5800b57cec5SDimitry Andric       if (I < Num/2)
5810b57cec5SDimitry Andric         T[CI][Step] = Switch;
5820b57cec5SDimitry Andric       else
5830b57cec5SDimitry Andric         T[I][Step] = Pass;
5840b57cec5SDimitry Andric       T[J][Pets] = (J < Num/2) ? Switch : Pass;
5850b57cec5SDimitry Andric       UseDown = true;
5860b57cec5SDimitry Andric     }
5870b57cec5SDimitry Andric   }
5880b57cec5SDimitry Andric 
5890b57cec5SDimitry Andric   // Reorder the working permutation according to the computed switch table
5900b57cec5SDimitry Andric   // for the last step (i.e. Pets).
5910b57cec5SDimitry Andric   for (ElemType J = 0; J != Num/2; ++J) {
5920b57cec5SDimitry Andric     ElemType PJ = P[J];         // Current values of P[J]
5930b57cec5SDimitry Andric     ElemType PC = P[J+Num/2];   // and P[conj(J)]
5940b57cec5SDimitry Andric     ElemType QJ = PJ;           // New values of P[J]
5950b57cec5SDimitry Andric     ElemType QC = PC;           // and P[conj(J)]
5960b57cec5SDimitry Andric     if (T[J][Pets] == Switch)
5970b57cec5SDimitry Andric       QC = PJ;
5980b57cec5SDimitry Andric     if (T[J+Num/2][Pets] == Switch)
5990b57cec5SDimitry Andric       QJ = PC;
6000b57cec5SDimitry Andric     P[J] = QJ;
6010b57cec5SDimitry Andric     P[J+Num/2] = QC;
6020b57cec5SDimitry Andric   }
6030b57cec5SDimitry Andric 
6040b57cec5SDimitry Andric   for (ElemType J = 0; J != Num; ++J)
6050b57cec5SDimitry Andric     if (P[J] != Ignore && P[J] >= Num/2)
6060b57cec5SDimitry Andric       P[J] -= Num/2;
6070b57cec5SDimitry Andric 
6080b57cec5SDimitry Andric   if (Step+1 < Log) {
6090b57cec5SDimitry Andric     if (UseUp && !route(P, T, Size/2, Step+1))
6100b57cec5SDimitry Andric       return false;
6110b57cec5SDimitry Andric     if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
6120b57cec5SDimitry Andric       return false;
6130b57cec5SDimitry Andric   }
6140b57cec5SDimitry Andric   return true;
6150b57cec5SDimitry Andric }
6160b57cec5SDimitry Andric 
6170b57cec5SDimitry Andric // --------------------------------------------------------------------
6180b57cec5SDimitry Andric // Support for building selection results (output instructions that are
6190b57cec5SDimitry Andric // parts of the final selection).
6200b57cec5SDimitry Andric 
6210b57cec5SDimitry Andric namespace {
6220b57cec5SDimitry Andric struct OpRef {
6230b57cec5SDimitry Andric   OpRef(SDValue V) : OpV(V) {}
6240b57cec5SDimitry Andric   bool isValue() const { return OpV.getNode() != nullptr; }
6250b57cec5SDimitry Andric   bool isValid() const { return isValue() || !(OpN & Invalid); }
626bdd1243dSDimitry Andric   bool isUndef() const { return OpN & Undef; }
6270b57cec5SDimitry Andric   static OpRef res(int N) { return OpRef(Whole | (N & Index)); }
6280b57cec5SDimitry Andric   static OpRef fail() { return OpRef(Invalid); }
6290b57cec5SDimitry Andric 
6300b57cec5SDimitry Andric   static OpRef lo(const OpRef &R) {
6310b57cec5SDimitry Andric     assert(!R.isValue());
6320b57cec5SDimitry Andric     return OpRef(R.OpN & (Undef | Index | LoHalf));
6330b57cec5SDimitry Andric   }
6340b57cec5SDimitry Andric   static OpRef hi(const OpRef &R) {
6350b57cec5SDimitry Andric     assert(!R.isValue());
6360b57cec5SDimitry Andric     return OpRef(R.OpN & (Undef | Index | HiHalf));
6370b57cec5SDimitry Andric   }
6380b57cec5SDimitry Andric   static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); }
6390b57cec5SDimitry Andric 
6400b57cec5SDimitry Andric   // Direct value.
6410b57cec5SDimitry Andric   SDValue OpV = SDValue();
6420b57cec5SDimitry Andric 
6430b57cec5SDimitry Andric   // Reference to the operand of the input node:
6440b57cec5SDimitry Andric   // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the
6450b57cec5SDimitry Andric   // operand index:
6460b57cec5SDimitry Andric   // If bit 30 is set, it's the high half of the operand.
6470b57cec5SDimitry Andric   // If bit 29 is set, it's the low half of the operand.
6480b57cec5SDimitry Andric   unsigned OpN = 0;
6490b57cec5SDimitry Andric 
6500b57cec5SDimitry Andric   enum : unsigned {
6510b57cec5SDimitry Andric     Invalid = 0x10000000,
6520b57cec5SDimitry Andric     LoHalf  = 0x20000000,
6530b57cec5SDimitry Andric     HiHalf  = 0x40000000,
6540b57cec5SDimitry Andric     Whole   = LoHalf | HiHalf,
6550b57cec5SDimitry Andric     Undef   = 0x80000000,
6560b57cec5SDimitry Andric     Index   = 0x0FFFFFFF,  // Mask of the index value.
6570b57cec5SDimitry Andric     IndexBits = 28,
6580b57cec5SDimitry Andric   };
6590b57cec5SDimitry Andric 
6600b57cec5SDimitry Andric   LLVM_DUMP_METHOD
6610b57cec5SDimitry Andric   void print(raw_ostream &OS, const SelectionDAG &G) const;
6620b57cec5SDimitry Andric 
6630b57cec5SDimitry Andric private:
6640b57cec5SDimitry Andric   OpRef(unsigned N) : OpN(N) {}
6650b57cec5SDimitry Andric };
6660b57cec5SDimitry Andric 
6670b57cec5SDimitry Andric struct NodeTemplate {
6680b57cec5SDimitry Andric   NodeTemplate() = default;
6690b57cec5SDimitry Andric   unsigned Opc = 0;
6700b57cec5SDimitry Andric   MVT Ty = MVT::Other;
6710b57cec5SDimitry Andric   std::vector<OpRef> Ops;
6720b57cec5SDimitry Andric 
6730b57cec5SDimitry Andric   LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
6740b57cec5SDimitry Andric };
6750b57cec5SDimitry Andric 
6760b57cec5SDimitry Andric struct ResultStack {
6770b57cec5SDimitry Andric   ResultStack(SDNode *Inp)
6780b57cec5SDimitry Andric     : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {}
6790b57cec5SDimitry Andric   SDNode *InpNode;
6800b57cec5SDimitry Andric   MVT InpTy;
6810b57cec5SDimitry Andric   unsigned push(const NodeTemplate &Res) {
6820b57cec5SDimitry Andric     List.push_back(Res);
6830b57cec5SDimitry Andric     return List.size()-1;
6840b57cec5SDimitry Andric   }
6850b57cec5SDimitry Andric   unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) {
6860b57cec5SDimitry Andric     NodeTemplate Res;
6870b57cec5SDimitry Andric     Res.Opc = Opc;
6880b57cec5SDimitry Andric     Res.Ty = Ty;
6890b57cec5SDimitry Andric     Res.Ops = Ops;
6900b57cec5SDimitry Andric     return push(Res);
6910b57cec5SDimitry Andric   }
6920b57cec5SDimitry Andric   bool empty() const { return List.empty(); }
6930b57cec5SDimitry Andric   unsigned size() const { return List.size(); }
6940b57cec5SDimitry Andric   unsigned top() const { return size()-1; }
6950b57cec5SDimitry Andric   const NodeTemplate &operator[](unsigned I) const { return List[I]; }
6960b57cec5SDimitry Andric   unsigned reset(unsigned NewTop) {
6970b57cec5SDimitry Andric     List.resize(NewTop+1);
6980b57cec5SDimitry Andric     return NewTop;
6990b57cec5SDimitry Andric   }
7000b57cec5SDimitry Andric 
7010b57cec5SDimitry Andric   using BaseType = std::vector<NodeTemplate>;
7020b57cec5SDimitry Andric   BaseType::iterator begin() { return List.begin(); }
7030b57cec5SDimitry Andric   BaseType::iterator end()   { return List.end(); }
7040b57cec5SDimitry Andric   BaseType::const_iterator begin() const { return List.begin(); }
7050b57cec5SDimitry Andric   BaseType::const_iterator end() const   { return List.end(); }
7060b57cec5SDimitry Andric 
7070b57cec5SDimitry Andric   BaseType List;
7080b57cec5SDimitry Andric 
7090b57cec5SDimitry Andric   LLVM_DUMP_METHOD
7100b57cec5SDimitry Andric   void print(raw_ostream &OS, const SelectionDAG &G) const;
7110b57cec5SDimitry Andric };
7120b57cec5SDimitry Andric } // namespace
7130b57cec5SDimitry Andric 
7140b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
7150b57cec5SDimitry Andric void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
7160b57cec5SDimitry Andric   if (isValue()) {
7170b57cec5SDimitry Andric     OpV.getNode()->print(OS, &G);
7180b57cec5SDimitry Andric     return;
7190b57cec5SDimitry Andric   }
7200b57cec5SDimitry Andric   if (OpN & Invalid) {
7210b57cec5SDimitry Andric     OS << "invalid";
7220b57cec5SDimitry Andric     return;
7230b57cec5SDimitry Andric   }
7240b57cec5SDimitry Andric   if (OpN & Undef) {
7250b57cec5SDimitry Andric     OS << "undef";
7260b57cec5SDimitry Andric     return;
7270b57cec5SDimitry Andric   }
7280b57cec5SDimitry Andric   if ((OpN & Whole) != Whole) {
7290b57cec5SDimitry Andric     assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
7300b57cec5SDimitry Andric     if (OpN & LoHalf)
7310b57cec5SDimitry Andric       OS << "lo ";
7320b57cec5SDimitry Andric     else
7330b57cec5SDimitry Andric       OS << "hi ";
7340b57cec5SDimitry Andric   }
7350b57cec5SDimitry Andric   OS << '#' << SignExtend32(OpN & Index, IndexBits);
7360b57cec5SDimitry Andric }
7370b57cec5SDimitry Andric 
7380b57cec5SDimitry Andric void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const {
7390b57cec5SDimitry Andric   const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo();
7400b57cec5SDimitry Andric   OS << format("%8s", EVT(Ty).getEVTString().c_str()) << "  "
7410b57cec5SDimitry Andric      << TII.getName(Opc);
7420b57cec5SDimitry Andric   bool Comma = false;
7430b57cec5SDimitry Andric   for (const auto &R : Ops) {
7440b57cec5SDimitry Andric     if (Comma)
7450b57cec5SDimitry Andric       OS << ',';
7460b57cec5SDimitry Andric     Comma = true;
7470b57cec5SDimitry Andric     OS << ' ';
7480b57cec5SDimitry Andric     R.print(OS, G);
7490b57cec5SDimitry Andric   }
7500b57cec5SDimitry Andric }
7510b57cec5SDimitry Andric 
7520b57cec5SDimitry Andric void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
7530b57cec5SDimitry Andric   OS << "Input node:\n";
7540b57cec5SDimitry Andric #ifndef NDEBUG
7550b57cec5SDimitry Andric   InpNode->dumpr(&G);
7560b57cec5SDimitry Andric #endif
7570b57cec5SDimitry Andric   OS << "Result templates:\n";
7580b57cec5SDimitry Andric   for (unsigned I = 0, E = List.size(); I != E; ++I) {
7590b57cec5SDimitry Andric     OS << '[' << I << "] ";
7600b57cec5SDimitry Andric     List[I].print(OS, G);
7610b57cec5SDimitry Andric     OS << '\n';
7620b57cec5SDimitry Andric   }
7630b57cec5SDimitry Andric }
7640b57cec5SDimitry Andric #endif
7650b57cec5SDimitry Andric 
7660b57cec5SDimitry Andric namespace {
7670b57cec5SDimitry Andric struct ShuffleMask {
7680b57cec5SDimitry Andric   ShuffleMask(ArrayRef<int> M) : Mask(M) {
76904eeddc0SDimitry Andric     for (int M : Mask) {
7700b57cec5SDimitry Andric       if (M == -1)
7710b57cec5SDimitry Andric         continue;
7720b57cec5SDimitry Andric       MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
7730b57cec5SDimitry Andric       MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M);
7740b57cec5SDimitry Andric     }
7750b57cec5SDimitry Andric   }
7760b57cec5SDimitry Andric 
7770b57cec5SDimitry Andric   ArrayRef<int> Mask;
7780b57cec5SDimitry Andric   int MinSrc = -1, MaxSrc = -1;
7790b57cec5SDimitry Andric 
7800b57cec5SDimitry Andric   ShuffleMask lo() const {
7810b57cec5SDimitry Andric     size_t H = Mask.size()/2;
7820b57cec5SDimitry Andric     return ShuffleMask(Mask.take_front(H));
7830b57cec5SDimitry Andric   }
7840b57cec5SDimitry Andric   ShuffleMask hi() const {
7850b57cec5SDimitry Andric     size_t H = Mask.size()/2;
7860b57cec5SDimitry Andric     return ShuffleMask(Mask.take_back(H));
7870b57cec5SDimitry Andric   }
7880b57cec5SDimitry Andric 
7890b57cec5SDimitry Andric   void print(raw_ostream &OS) const {
7900b57cec5SDimitry Andric     OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {";
7910b57cec5SDimitry Andric     for (int M : Mask)
7920b57cec5SDimitry Andric       OS << ' ' << M;
7930b57cec5SDimitry Andric     OS << " }";
7940b57cec5SDimitry Andric   }
7950b57cec5SDimitry Andric };
796e8d8bef9SDimitry Andric 
797e8d8bef9SDimitry Andric LLVM_ATTRIBUTE_UNUSED
798e8d8bef9SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
799e8d8bef9SDimitry Andric   SM.print(OS);
800e8d8bef9SDimitry Andric   return OS;
801e8d8bef9SDimitry Andric }
8020b57cec5SDimitry Andric } // namespace
8030b57cec5SDimitry Andric 
804bdd1243dSDimitry Andric namespace shuffles {
805bdd1243dSDimitry Andric using MaskT = SmallVector<int, 128>;
806bdd1243dSDimitry Andric // Vdd = vshuffvdd(Vu, Vv, Rt)
807bdd1243dSDimitry Andric // Vdd = vdealvdd(Vu, Vv, Rt)
808bdd1243dSDimitry Andric // Vd  = vpack(Vu, Vv, Size, TakeOdd)
809bdd1243dSDimitry Andric // Vd  = vshuff(Vu, Vv, Size, TakeOdd)
810bdd1243dSDimitry Andric // Vd  = vdeal(Vu, Vv, Size, TakeOdd)
811bdd1243dSDimitry Andric // Vd  = vdealb4w(Vu, Vv)
812bdd1243dSDimitry Andric 
813bdd1243dSDimitry Andric ArrayRef<int> lo(ArrayRef<int> Vuu) { return Vuu.take_front(Vuu.size() / 2); }
814bdd1243dSDimitry Andric ArrayRef<int> hi(ArrayRef<int> Vuu) { return Vuu.take_back(Vuu.size() / 2); }
815bdd1243dSDimitry Andric 
816bdd1243dSDimitry Andric MaskT vshuffvdd(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Rt) {
817bdd1243dSDimitry Andric   int Len = Vu.size();
818bdd1243dSDimitry Andric   MaskT Vdd(2 * Len);
819bdd1243dSDimitry Andric   std::copy(Vv.begin(), Vv.end(), Vdd.begin());
820bdd1243dSDimitry Andric   std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len);
821bdd1243dSDimitry Andric 
822bdd1243dSDimitry Andric   auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
823bdd1243dSDimitry Andric   auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
824bdd1243dSDimitry Andric 
825bdd1243dSDimitry Andric   for (int Offset = 1; Offset < Len; Offset *= 2) {
826bdd1243dSDimitry Andric     if ((Rt & Offset) == 0)
827bdd1243dSDimitry Andric       continue;
828bdd1243dSDimitry Andric     for (int i = 0; i != Len; ++i) {
829bdd1243dSDimitry Andric       if ((i & Offset) == 0)
830bdd1243dSDimitry Andric         std::swap(Vd1[i], Vd0[i + Offset]);
831bdd1243dSDimitry Andric     }
832bdd1243dSDimitry Andric   }
833bdd1243dSDimitry Andric   return Vdd;
834bdd1243dSDimitry Andric }
835bdd1243dSDimitry Andric 
836bdd1243dSDimitry Andric MaskT vdealvdd(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Rt) {
837bdd1243dSDimitry Andric   int Len = Vu.size();
838bdd1243dSDimitry Andric   MaskT Vdd(2 * Len);
839bdd1243dSDimitry Andric   std::copy(Vv.begin(), Vv.end(), Vdd.begin());
840bdd1243dSDimitry Andric   std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len);
841bdd1243dSDimitry Andric 
842bdd1243dSDimitry Andric   auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
843bdd1243dSDimitry Andric   auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
844bdd1243dSDimitry Andric 
845bdd1243dSDimitry Andric   for (int Offset = Len / 2; Offset > 0; Offset /= 2) {
846bdd1243dSDimitry Andric     if ((Rt & Offset) == 0)
847bdd1243dSDimitry Andric       continue;
848bdd1243dSDimitry Andric     for (int i = 0; i != Len; ++i) {
849bdd1243dSDimitry Andric       if ((i & Offset) == 0)
850bdd1243dSDimitry Andric         std::swap(Vd1[i], Vd0[i + Offset]);
851bdd1243dSDimitry Andric     }
852bdd1243dSDimitry Andric   }
853bdd1243dSDimitry Andric   return Vdd;
854bdd1243dSDimitry Andric }
855bdd1243dSDimitry Andric 
856bdd1243dSDimitry Andric MaskT vpack(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
857bdd1243dSDimitry Andric   int Len = Vu.size();
858bdd1243dSDimitry Andric   MaskT Vd(Len);
859bdd1243dSDimitry Andric   auto Odd = static_cast<int>(TakeOdd);
860bdd1243dSDimitry Andric   for (int i = 0, e = Len / (2 * Size); i != e; ++i) {
861bdd1243dSDimitry Andric     for (int b = 0; b != static_cast<int>(Size); ++b) {
862bdd1243dSDimitry Andric       // clang-format off
863bdd1243dSDimitry Andric       Vd[i * Size + b]           = Vv[(2 * i + Odd) * Size + b];
864bdd1243dSDimitry Andric       Vd[i * Size + b + Len / 2] = Vu[(2 * i + Odd) * Size + b];
865bdd1243dSDimitry Andric       // clang-format on
866bdd1243dSDimitry Andric     }
867bdd1243dSDimitry Andric   }
868bdd1243dSDimitry Andric   return Vd;
869bdd1243dSDimitry Andric }
870bdd1243dSDimitry Andric 
871bdd1243dSDimitry Andric MaskT vshuff(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
872bdd1243dSDimitry Andric   int Len = Vu.size();
873bdd1243dSDimitry Andric   MaskT Vd(Len);
874bdd1243dSDimitry Andric   auto Odd = static_cast<int>(TakeOdd);
875bdd1243dSDimitry Andric   for (int i = 0, e = Len / (2 * Size); i != e; ++i) {
876bdd1243dSDimitry Andric     for (int b = 0; b != static_cast<int>(Size); ++b) {
877bdd1243dSDimitry Andric       Vd[(2 * i + 0) * Size + b] = Vv[(2 * i + Odd) * Size + b];
878bdd1243dSDimitry Andric       Vd[(2 * i + 1) * Size + b] = Vu[(2 * i + Odd) * Size + b];
879bdd1243dSDimitry Andric     }
880bdd1243dSDimitry Andric   }
881bdd1243dSDimitry Andric   return Vd;
882bdd1243dSDimitry Andric }
883bdd1243dSDimitry Andric 
884bdd1243dSDimitry Andric MaskT vdeal(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
885bdd1243dSDimitry Andric   int Len = Vu.size();
886bdd1243dSDimitry Andric   MaskT T = vdealvdd(Vu, Vv, Len - 2 * Size);
887bdd1243dSDimitry Andric   return vpack(hi(T), lo(T), Size, TakeOdd);
888bdd1243dSDimitry Andric }
889bdd1243dSDimitry Andric 
890bdd1243dSDimitry Andric MaskT vdealb4w(ArrayRef<int> Vu, ArrayRef<int> Vv) {
891bdd1243dSDimitry Andric   int Len = Vu.size();
892bdd1243dSDimitry Andric   MaskT Vd(Len);
893bdd1243dSDimitry Andric   for (int i = 0, e = Len / 4; i != e; ++i) {
894bdd1243dSDimitry Andric     Vd[0 * (Len / 4) + i] = Vv[4 * i + 0];
895bdd1243dSDimitry Andric     Vd[1 * (Len / 4) + i] = Vv[4 * i + 2];
896bdd1243dSDimitry Andric     Vd[2 * (Len / 4) + i] = Vu[4 * i + 0];
897bdd1243dSDimitry Andric     Vd[3 * (Len / 4) + i] = Vu[4 * i + 2];
898bdd1243dSDimitry Andric   }
899bdd1243dSDimitry Andric   return Vd;
900bdd1243dSDimitry Andric }
901bdd1243dSDimitry Andric 
902bdd1243dSDimitry Andric template <typename ShuffFunc, typename... OptArgs>
903bdd1243dSDimitry Andric auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT {
904bdd1243dSDimitry Andric   MaskT Vu(Length), Vv(Length);
905bdd1243dSDimitry Andric   std::iota(Vu.begin(), Vu.end(), Length); // High
906bdd1243dSDimitry Andric   std::iota(Vv.begin(), Vv.end(), 0);      // Low
907bdd1243dSDimitry Andric   return S(Vu, Vv, args...);
908bdd1243dSDimitry Andric }
909bdd1243dSDimitry Andric 
910bdd1243dSDimitry Andric } // namespace shuffles
911bdd1243dSDimitry Andric 
9120b57cec5SDimitry Andric // --------------------------------------------------------------------
9130b57cec5SDimitry Andric // The HvxSelector class.
9140b57cec5SDimitry Andric 
9150b57cec5SDimitry Andric static const HexagonTargetLowering &getHexagonLowering(SelectionDAG &G) {
9160b57cec5SDimitry Andric   return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo());
9170b57cec5SDimitry Andric }
9180b57cec5SDimitry Andric static const HexagonSubtarget &getHexagonSubtarget(SelectionDAG &G) {
91981ad6265SDimitry Andric   return G.getSubtarget<HexagonSubtarget>();
9200b57cec5SDimitry Andric }
9210b57cec5SDimitry Andric 
9220b57cec5SDimitry Andric namespace llvm {
9230b57cec5SDimitry Andric   struct HvxSelector {
9240b57cec5SDimitry Andric     const HexagonTargetLowering &Lower;
9250b57cec5SDimitry Andric     HexagonDAGToDAGISel &ISel;
9260b57cec5SDimitry Andric     SelectionDAG &DAG;
9270b57cec5SDimitry Andric     const HexagonSubtarget &HST;
9280b57cec5SDimitry Andric     const unsigned HwLen;
9290b57cec5SDimitry Andric 
9300b57cec5SDimitry Andric     HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G)
9310b57cec5SDimitry Andric         : Lower(getHexagonLowering(G)), ISel(HS), DAG(G),
9320b57cec5SDimitry Andric           HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {}
9330b57cec5SDimitry Andric 
9340b57cec5SDimitry Andric     MVT getSingleVT(MVT ElemTy) const {
935fe6060f1SDimitry Andric       assert(ElemTy != MVT::i1 && "Use getBoolVT for predicates");
9360b57cec5SDimitry Andric       unsigned NumElems = HwLen / (ElemTy.getSizeInBits() / 8);
9370b57cec5SDimitry Andric       return MVT::getVectorVT(ElemTy, NumElems);
9380b57cec5SDimitry Andric     }
9390b57cec5SDimitry Andric 
9400b57cec5SDimitry Andric     MVT getPairVT(MVT ElemTy) const {
941fe6060f1SDimitry Andric       assert(ElemTy != MVT::i1); // Suspicious: there are no predicate pairs.
9420b57cec5SDimitry Andric       unsigned NumElems = (2 * HwLen) / (ElemTy.getSizeInBits() / 8);
9430b57cec5SDimitry Andric       return MVT::getVectorVT(ElemTy, NumElems);
9440b57cec5SDimitry Andric     }
9450b57cec5SDimitry Andric 
946fe6060f1SDimitry Andric     MVT getBoolVT() const {
947fe6060f1SDimitry Andric       // Return HwLen x i1.
948fe6060f1SDimitry Andric       return MVT::getVectorVT(MVT::i1, HwLen);
949fe6060f1SDimitry Andric     }
950fe6060f1SDimitry Andric 
951bdd1243dSDimitry Andric     void selectExtractSubvector(SDNode *N);
9520b57cec5SDimitry Andric     void selectShuffle(SDNode *N);
9530b57cec5SDimitry Andric     void selectRor(SDNode *N);
9540b57cec5SDimitry Andric     void selectVAlign(SDNode *N);
9550b57cec5SDimitry Andric 
956bdd1243dSDimitry Andric     static SmallVector<uint32_t, 8> getPerfectCompletions(ShuffleMask SM,
957bdd1243dSDimitry Andric                                                           unsigned Width);
958bdd1243dSDimitry Andric     static SmallVector<uint32_t, 8> completeToPerfect(
959bdd1243dSDimitry Andric         ArrayRef<uint32_t> Completions, unsigned Width);
960bdd1243dSDimitry Andric     static std::optional<int> rotationDistance(ShuffleMask SM, unsigned WrapAt);
961bdd1243dSDimitry Andric 
9620b57cec5SDimitry Andric   private:
963e8d8bef9SDimitry Andric     void select(SDNode *ISelN);
9640b57cec5SDimitry Andric     void materialize(const ResultStack &Results);
9650b57cec5SDimitry Andric 
966fe6060f1SDimitry Andric     SDValue getConst32(int Val, const SDLoc &dl);
9670b57cec5SDimitry Andric     SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl);
9680b57cec5SDimitry Andric 
9690b57cec5SDimitry Andric     enum : unsigned {
9700b57cec5SDimitry Andric       None,
9710b57cec5SDimitry Andric       PackMux,
9720b57cec5SDimitry Andric     };
973fe6060f1SDimitry Andric     OpRef concats(OpRef Va, OpRef Vb, ResultStack &Results);
974bdd1243dSDimitry Andric     OpRef funnels(OpRef Va, OpRef Vb, int Amount, ResultStack &Results);
975bdd1243dSDimitry Andric 
9760b57cec5SDimitry Andric     OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
9770b57cec5SDimitry Andric                 MutableArrayRef<int> NewMask, unsigned Options = None);
9780b57cec5SDimitry Andric     OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
9790b57cec5SDimitry Andric                 MutableArrayRef<int> NewMask);
9800b57cec5SDimitry Andric     OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
9810b57cec5SDimitry Andric                 ResultStack &Results);
9820b57cec5SDimitry Andric     OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
9830b57cec5SDimitry Andric                 ResultStack &Results);
9840b57cec5SDimitry Andric 
9850b57cec5SDimitry Andric     OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results);
9860b57cec5SDimitry Andric     OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
9870b57cec5SDimitry Andric     OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results);
9880b57cec5SDimitry Andric     OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
9890b57cec5SDimitry Andric 
9900b57cec5SDimitry Andric     OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
9910b57cec5SDimitry Andric     OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
9920b57cec5SDimitry Andric     OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results);
9930b57cec5SDimitry Andric     OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
9940b57cec5SDimitry Andric 
9950b57cec5SDimitry Andric     bool selectVectorConstants(SDNode *N);
9960b57cec5SDimitry Andric     bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy,
9970b57cec5SDimitry Andric                           SDValue Va, SDValue Vb, SDNode *N);
9980b57cec5SDimitry Andric   };
999bdd1243dSDimitry Andric } // namespace llvm
10000b57cec5SDimitry Andric 
10010b57cec5SDimitry Andric static void splitMask(ArrayRef<int> Mask, MutableArrayRef<int> MaskL,
10020b57cec5SDimitry Andric                       MutableArrayRef<int> MaskR) {
10030b57cec5SDimitry Andric   unsigned VecLen = Mask.size();
10040b57cec5SDimitry Andric   assert(MaskL.size() == VecLen && MaskR.size() == VecLen);
10050b57cec5SDimitry Andric   for (unsigned I = 0; I != VecLen; ++I) {
10060b57cec5SDimitry Andric     int M = Mask[I];
10070b57cec5SDimitry Andric     if (M < 0) {
10080b57cec5SDimitry Andric       MaskL[I] = MaskR[I] = -1;
10090b57cec5SDimitry Andric     } else if (unsigned(M) < VecLen) {
10100b57cec5SDimitry Andric       MaskL[I] = M;
10110b57cec5SDimitry Andric       MaskR[I] = -1;
10120b57cec5SDimitry Andric     } else {
10130b57cec5SDimitry Andric       MaskL[I] = -1;
10140b57cec5SDimitry Andric       MaskR[I] = M-VecLen;
10150b57cec5SDimitry Andric     }
10160b57cec5SDimitry Andric   }
10170b57cec5SDimitry Andric }
10180b57cec5SDimitry Andric 
10190b57cec5SDimitry Andric static std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc,
10200b57cec5SDimitry Andric                                          unsigned MaxLen) {
10210b57cec5SDimitry Andric   assert(A.size() > 0 && A.size() >= MaxLen);
10220b57cec5SDimitry Andric   int F = A[0];
10230b57cec5SDimitry Andric   int E = F;
10240b57cec5SDimitry Andric   for (unsigned I = 1; I != MaxLen; ++I) {
10250b57cec5SDimitry Andric     if (A[I] - E != Inc)
10260b57cec5SDimitry Andric       return { F, I };
10270b57cec5SDimitry Andric     E = A[I];
10280b57cec5SDimitry Andric   }
10290b57cec5SDimitry Andric   return { F, MaxLen };
10300b57cec5SDimitry Andric }
10310b57cec5SDimitry Andric 
10320b57cec5SDimitry Andric static bool isUndef(ArrayRef<int> Mask) {
10330b57cec5SDimitry Andric   for (int Idx : Mask)
10340b57cec5SDimitry Andric     if (Idx != -1)
10350b57cec5SDimitry Andric       return false;
10360b57cec5SDimitry Andric   return true;
10370b57cec5SDimitry Andric }
10380b57cec5SDimitry Andric 
10390b57cec5SDimitry Andric static bool isIdentity(ArrayRef<int> Mask) {
10400b57cec5SDimitry Andric   for (int I = 0, E = Mask.size(); I != E; ++I) {
10410b57cec5SDimitry Andric     int M = Mask[I];
10420b57cec5SDimitry Andric     if (M >= 0 && M != I)
10430b57cec5SDimitry Andric       return false;
10440b57cec5SDimitry Andric   }
10450b57cec5SDimitry Andric   return true;
10460b57cec5SDimitry Andric }
10470b57cec5SDimitry Andric 
1048bdd1243dSDimitry Andric static bool isLowHalfOnly(ArrayRef<int> Mask) {
1049bdd1243dSDimitry Andric   int L = Mask.size();
1050bdd1243dSDimitry Andric   assert(L % 2 == 0);
1051bdd1243dSDimitry Andric   // Check if the second half of the mask is all-undef.
1052bdd1243dSDimitry Andric   return llvm::all_of(Mask.drop_front(L / 2), [](int M) { return M < 0; });
1053bdd1243dSDimitry Andric }
1054bdd1243dSDimitry Andric 
1055fe6060f1SDimitry Andric static SmallVector<unsigned, 4> getInputSegmentList(ShuffleMask SM,
1056fe6060f1SDimitry Andric                                                     unsigned SegLen) {
1057fe6060f1SDimitry Andric   assert(isPowerOf2_32(SegLen));
1058fe6060f1SDimitry Andric   SmallVector<unsigned, 4> SegList;
1059fe6060f1SDimitry Andric   if (SM.MaxSrc == -1)
1060fe6060f1SDimitry Andric     return SegList;
1061fe6060f1SDimitry Andric 
1062fe6060f1SDimitry Andric   unsigned Shift = Log2_32(SegLen);
1063fe6060f1SDimitry Andric   BitVector Segs(alignTo(SM.MaxSrc + 1, SegLen) >> Shift);
1064fe6060f1SDimitry Andric 
106504eeddc0SDimitry Andric   for (int M : SM.Mask) {
1066fe6060f1SDimitry Andric     if (M >= 0)
1067fe6060f1SDimitry Andric       Segs.set(M >> Shift);
1068fe6060f1SDimitry Andric   }
1069fe6060f1SDimitry Andric 
1070fe6060f1SDimitry Andric   for (unsigned B : Segs.set_bits())
1071fe6060f1SDimitry Andric     SegList.push_back(B);
1072fe6060f1SDimitry Andric   return SegList;
1073fe6060f1SDimitry Andric }
1074fe6060f1SDimitry Andric 
1075fe6060f1SDimitry Andric static SmallVector<unsigned, 4> getOutputSegmentMap(ShuffleMask SM,
1076fe6060f1SDimitry Andric                                                     unsigned SegLen) {
1077fe6060f1SDimitry Andric   // Calculate the layout of the output segments in terms of the input
1078fe6060f1SDimitry Andric   // segments.
1079fe6060f1SDimitry Andric   // For example [1,3,1,0] means that the output consists of 4 output
1080fe6060f1SDimitry Andric   // segments, where the first output segment has only elements of the
1081fe6060f1SDimitry Andric   // input segment at index 1. The next output segment only has elements
1082fe6060f1SDimitry Andric   // of the input segment 3, etc.
1083fe6060f1SDimitry Andric   // If an output segment only has undef elements, the value will be ~0u.
1084fe6060f1SDimitry Andric   // If an output segment has elements from more than one input segment,
1085fe6060f1SDimitry Andric   // the corresponding value will be ~1u.
1086fe6060f1SDimitry Andric   unsigned MaskLen = SM.Mask.size();
1087fe6060f1SDimitry Andric   assert(MaskLen % SegLen == 0);
1088fe6060f1SDimitry Andric   SmallVector<unsigned, 4> Map(MaskLen / SegLen);
1089fe6060f1SDimitry Andric 
1090fe6060f1SDimitry Andric   for (int S = 0, E = Map.size(); S != E; ++S) {
1091fe6060f1SDimitry Andric     unsigned Idx = ~0u;
1092fe6060f1SDimitry Andric     for (int I = 0; I != static_cast<int>(SegLen); ++I) {
1093fe6060f1SDimitry Andric       int M = SM.Mask[S*SegLen + I];
1094fe6060f1SDimitry Andric       if (M < 0)
1095fe6060f1SDimitry Andric         continue;
1096fe6060f1SDimitry Andric       unsigned G = M / SegLen; // Input segment of this element.
1097fe6060f1SDimitry Andric       if (Idx == ~0u) {
1098fe6060f1SDimitry Andric         Idx = G;
1099fe6060f1SDimitry Andric       } else if (Idx != G) {
1100fe6060f1SDimitry Andric         Idx = ~1u;
1101fe6060f1SDimitry Andric         break;
1102fe6060f1SDimitry Andric       }
1103fe6060f1SDimitry Andric     }
1104fe6060f1SDimitry Andric     Map[S] = Idx;
1105fe6060f1SDimitry Andric   }
1106fe6060f1SDimitry Andric 
1107fe6060f1SDimitry Andric   return Map;
1108fe6060f1SDimitry Andric }
1109fe6060f1SDimitry Andric 
1110fe6060f1SDimitry Andric static void packSegmentMask(ArrayRef<int> Mask, ArrayRef<unsigned> OutSegMap,
1111fe6060f1SDimitry Andric                             unsigned SegLen, MutableArrayRef<int> PackedMask) {
1112fe6060f1SDimitry Andric   SmallVector<unsigned, 4> InvMap;
1113fe6060f1SDimitry Andric   for (int I = OutSegMap.size() - 1; I >= 0; --I) {
1114fe6060f1SDimitry Andric     unsigned S = OutSegMap[I];
1115fe6060f1SDimitry Andric     assert(S != ~0u && "Unexpected undef");
1116fe6060f1SDimitry Andric     assert(S != ~1u && "Unexpected multi");
1117fe6060f1SDimitry Andric     if (InvMap.size() <= S)
1118fe6060f1SDimitry Andric       InvMap.resize(S+1);
1119fe6060f1SDimitry Andric     InvMap[S] = I;
1120fe6060f1SDimitry Andric   }
1121fe6060f1SDimitry Andric 
1122fe6060f1SDimitry Andric   unsigned Shift = Log2_32(SegLen);
1123fe6060f1SDimitry Andric   for (int I = 0, E = Mask.size(); I != E; ++I) {
1124fe6060f1SDimitry Andric     int M = Mask[I];
1125fe6060f1SDimitry Andric     if (M >= 0) {
1126fe6060f1SDimitry Andric       int OutIdx = InvMap[M >> Shift];
1127fe6060f1SDimitry Andric       M = (M & (SegLen-1)) + SegLen*OutIdx;
1128fe6060f1SDimitry Andric     }
1129fe6060f1SDimitry Andric     PackedMask[I] = M;
1130fe6060f1SDimitry Andric   }
1131fe6060f1SDimitry Andric }
1132fe6060f1SDimitry Andric 
11330b57cec5SDimitry Andric bool HvxSelector::selectVectorConstants(SDNode *N) {
11340b57cec5SDimitry Andric   // Constant vectors are generated as loads from constant pools or as
11350b57cec5SDimitry Andric   // splats of a constant value. Since they are generated during the
11360b57cec5SDimitry Andric   // selection process, the main selection algorithm is not aware of them.
11370b57cec5SDimitry Andric   // Select them directly here.
11380b57cec5SDimitry Andric   SmallVector<SDNode*,4> Nodes;
11390b57cec5SDimitry Andric   SetVector<SDNode*> WorkQ;
11400b57cec5SDimitry Andric 
11410b57cec5SDimitry Andric   // The DAG can change (due to CSE) during selection, so cache all the
11420b57cec5SDimitry Andric   // unselected nodes first to avoid traversing a mutating DAG.
11430b57cec5SDimitry Andric   WorkQ.insert(N);
11440b57cec5SDimitry Andric   for (unsigned i = 0; i != WorkQ.size(); ++i) {
11450b57cec5SDimitry Andric     SDNode *W = WorkQ[i];
1146e8d8bef9SDimitry Andric     if (!W->isMachineOpcode() && W->getOpcode() == HexagonISD::ISEL)
11470b57cec5SDimitry Andric       Nodes.push_back(W);
11480b57cec5SDimitry Andric     for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
11490b57cec5SDimitry Andric       WorkQ.insert(W->getOperand(j).getNode());
11500b57cec5SDimitry Andric   }
11510b57cec5SDimitry Andric 
11520b57cec5SDimitry Andric   for (SDNode *L : Nodes)
1153e8d8bef9SDimitry Andric     select(L);
11540b57cec5SDimitry Andric 
11550b57cec5SDimitry Andric   return !Nodes.empty();
11560b57cec5SDimitry Andric }
11570b57cec5SDimitry Andric 
11580b57cec5SDimitry Andric void HvxSelector::materialize(const ResultStack &Results) {
11590b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {
11600b57cec5SDimitry Andric     dbgs() << "Materializing\n";
11610b57cec5SDimitry Andric     Results.print(dbgs(), DAG);
11620b57cec5SDimitry Andric   });
11630b57cec5SDimitry Andric   if (Results.empty())
11640b57cec5SDimitry Andric     return;
11650b57cec5SDimitry Andric   const SDLoc &dl(Results.InpNode);
11660b57cec5SDimitry Andric   std::vector<SDValue> Output;
11670b57cec5SDimitry Andric 
11680b57cec5SDimitry Andric   for (unsigned I = 0, E = Results.size(); I != E; ++I) {
11690b57cec5SDimitry Andric     const NodeTemplate &Node = Results[I];
11700b57cec5SDimitry Andric     std::vector<SDValue> Ops;
11710b57cec5SDimitry Andric     for (const OpRef &R : Node.Ops) {
11720b57cec5SDimitry Andric       assert(R.isValid());
11730b57cec5SDimitry Andric       if (R.isValue()) {
11740b57cec5SDimitry Andric         Ops.push_back(R.OpV);
11750b57cec5SDimitry Andric         continue;
11760b57cec5SDimitry Andric       }
11770b57cec5SDimitry Andric       if (R.OpN & OpRef::Undef) {
11780b57cec5SDimitry Andric         MVT::SimpleValueType SVT = MVT::SimpleValueType(R.OpN & OpRef::Index);
11790b57cec5SDimitry Andric         Ops.push_back(ISel.selectUndef(dl, MVT(SVT)));
11800b57cec5SDimitry Andric         continue;
11810b57cec5SDimitry Andric       }
11820b57cec5SDimitry Andric       // R is an index of a result.
11830b57cec5SDimitry Andric       unsigned Part = R.OpN & OpRef::Whole;
11840b57cec5SDimitry Andric       int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits);
11850b57cec5SDimitry Andric       if (Idx < 0)
11860b57cec5SDimitry Andric         Idx += I;
11870b57cec5SDimitry Andric       assert(Idx >= 0 && unsigned(Idx) < Output.size());
11880b57cec5SDimitry Andric       SDValue Op = Output[Idx];
11890b57cec5SDimitry Andric       MVT OpTy = Op.getValueType().getSimpleVT();
11900b57cec5SDimitry Andric       if (Part != OpRef::Whole) {
11910b57cec5SDimitry Andric         assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
11920b57cec5SDimitry Andric         MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(),
11930b57cec5SDimitry Andric                                       OpTy.getVectorNumElements()/2);
11940b57cec5SDimitry Andric         unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
11950b57cec5SDimitry Andric                                                : Hexagon::vsub_hi;
11960b57cec5SDimitry Andric         Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op);
11970b57cec5SDimitry Andric       }
11980b57cec5SDimitry Andric       Ops.push_back(Op);
11990b57cec5SDimitry Andric     } // for (Node : Results)
12000b57cec5SDimitry Andric 
12010b57cec5SDimitry Andric     assert(Node.Ty != MVT::Other);
12020b57cec5SDimitry Andric     SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
12030b57cec5SDimitry Andric                       ? Ops.front().getNode()
12040b57cec5SDimitry Andric                       : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops);
12050b57cec5SDimitry Andric     Output.push_back(SDValue(ResN, 0));
12060b57cec5SDimitry Andric   }
12070b57cec5SDimitry Andric 
12080b57cec5SDimitry Andric   SDNode *OutN = Output.back().getNode();
12090b57cec5SDimitry Andric   SDNode *InpN = Results.InpNode;
12100b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {
12110b57cec5SDimitry Andric     dbgs() << "Generated node:\n";
12120b57cec5SDimitry Andric     OutN->dumpr(&DAG);
12130b57cec5SDimitry Andric   });
12140b57cec5SDimitry Andric 
12150b57cec5SDimitry Andric   ISel.ReplaceNode(InpN, OutN);
12160b57cec5SDimitry Andric   selectVectorConstants(OutN);
12170b57cec5SDimitry Andric   DAG.RemoveDeadNodes();
12180b57cec5SDimitry Andric }
12190b57cec5SDimitry Andric 
1220fe6060f1SDimitry Andric OpRef HvxSelector::concats(OpRef Lo, OpRef Hi, ResultStack &Results) {
12210b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
12220b57cec5SDimitry Andric   const SDLoc &dl(Results.InpNode);
12230b57cec5SDimitry Andric   Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), {
1224fe6060f1SDimitry Andric     getConst32(Hexagon::HvxWRRegClassID, dl),
1225fe6060f1SDimitry Andric     Lo, getConst32(Hexagon::vsub_lo, dl),
1226fe6060f1SDimitry Andric     Hi, getConst32(Hexagon::vsub_hi, dl),
12270b57cec5SDimitry Andric   });
12280b57cec5SDimitry Andric   return OpRef::res(Results.top());
12290b57cec5SDimitry Andric }
12300b57cec5SDimitry Andric 
1231bdd1243dSDimitry Andric OpRef HvxSelector::funnels(OpRef Va, OpRef Vb, int Amount,
1232bdd1243dSDimitry Andric                            ResultStack &Results) {
1233bdd1243dSDimitry Andric   // Do a funnel shift towards the low end (shift right) by Amount bytes.
1234bdd1243dSDimitry Andric   // If Amount < 0, treat it as shift left, i.e. do a shift right by
1235bdd1243dSDimitry Andric   // Amount + HwLen.
1236bdd1243dSDimitry Andric   auto VecLen = static_cast<int>(HwLen);
1237bdd1243dSDimitry Andric 
1238bdd1243dSDimitry Andric   if (Amount == 0)
1239bdd1243dSDimitry Andric     return Va;
1240bdd1243dSDimitry Andric   if (Amount == VecLen)
1241bdd1243dSDimitry Andric     return Vb;
1242bdd1243dSDimitry Andric 
1243bdd1243dSDimitry Andric   MVT Ty = getSingleVT(MVT::i8);
1244bdd1243dSDimitry Andric   const SDLoc &dl(Results.InpNode);
1245bdd1243dSDimitry Andric 
1246bdd1243dSDimitry Andric   if (Amount < 0)
1247bdd1243dSDimitry Andric     Amount += VecLen;
1248bdd1243dSDimitry Andric   if (Amount > VecLen) {
1249bdd1243dSDimitry Andric     Amount -= VecLen;
1250bdd1243dSDimitry Andric     std::swap(Va, Vb);
1251bdd1243dSDimitry Andric   }
1252bdd1243dSDimitry Andric 
1253bdd1243dSDimitry Andric   if (isUInt<3>(Amount)) {
1254bdd1243dSDimitry Andric     SDValue A = getConst32(Amount, dl);
1255bdd1243dSDimitry Andric     Results.push(Hexagon::V6_valignbi, Ty, {Vb, Va, A});
1256bdd1243dSDimitry Andric   } else if (isUInt<3>(VecLen - Amount)) {
1257bdd1243dSDimitry Andric     SDValue A = getConst32(VecLen - Amount, dl);
1258bdd1243dSDimitry Andric     Results.push(Hexagon::V6_vlalignbi, Ty, {Vb, Va, A});
1259bdd1243dSDimitry Andric   } else {
1260bdd1243dSDimitry Andric     SDValue A = getConst32(Amount, dl);
1261bdd1243dSDimitry Andric     Results.push(Hexagon::A2_tfrsi, Ty, {A});
1262bdd1243dSDimitry Andric     Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(-1)});
1263bdd1243dSDimitry Andric   }
1264bdd1243dSDimitry Andric   return OpRef::res(Results.top());
1265bdd1243dSDimitry Andric }
1266bdd1243dSDimitry Andric 
1267fe6060f1SDimitry Andric // Va, Vb are single vectors. If SM only uses two vector halves from Va/Vb,
1268fe6060f1SDimitry Andric // pack these halves into a single vector, and remap SM into NewMask to use
1269fe6060f1SDimitry Andric // the new vector instead.
12700b57cec5SDimitry Andric OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
12710b57cec5SDimitry Andric                          ResultStack &Results, MutableArrayRef<int> NewMask,
12720b57cec5SDimitry Andric                          unsigned Options) {
12730b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
12740b57cec5SDimitry Andric   if (!Va.isValid() || !Vb.isValid())
12750b57cec5SDimitry Andric     return OpRef::fail();
12760b57cec5SDimitry Andric 
1277bdd1243dSDimitry Andric   if (Vb.isUndef()) {
1278bdd1243dSDimitry Andric     std::copy(SM.Mask.begin(), SM.Mask.end(), NewMask.begin());
1279bdd1243dSDimitry Andric     return Va;
1280bdd1243dSDimitry Andric   }
1281bdd1243dSDimitry Andric   if (Va.isUndef()) {
1282bdd1243dSDimitry Andric     std::copy(SM.Mask.begin(), SM.Mask.end(), NewMask.begin());
1283bdd1243dSDimitry Andric     ShuffleVectorSDNode::commuteMask(NewMask);
1284bdd1243dSDimitry Andric     return Vb;
1285bdd1243dSDimitry Andric   }
1286bdd1243dSDimitry Andric 
12870b57cec5SDimitry Andric   MVT Ty = getSingleVT(MVT::i8);
1288fe6060f1SDimitry Andric   MVT PairTy = getPairVT(MVT::i8);
1289fe6060f1SDimitry Andric   OpRef Inp[2] = {Va, Vb};
1290fe6060f1SDimitry Andric   unsigned VecLen = SM.Mask.size();
12910b57cec5SDimitry Andric 
1292fe6060f1SDimitry Andric   auto valign = [this](OpRef Lo, OpRef Hi, unsigned Amt, MVT Ty,
1293fe6060f1SDimitry Andric                        ResultStack &Results) {
1294fe6060f1SDimitry Andric     if (Amt == 0)
1295fe6060f1SDimitry Andric       return Lo;
12960b57cec5SDimitry Andric     const SDLoc &dl(Results.InpNode);
1297fe6060f1SDimitry Andric     if (isUInt<3>(Amt) || isUInt<3>(HwLen - Amt)) {
1298fe6060f1SDimitry Andric       bool IsRight = isUInt<3>(Amt); // Right align.
1299fe6060f1SDimitry Andric       SDValue S = getConst32(IsRight ? Amt : HwLen - Amt, dl);
1300fe6060f1SDimitry Andric       unsigned Opc = IsRight ? Hexagon::V6_valignbi : Hexagon::V6_vlalignbi;
1301fe6060f1SDimitry Andric       Results.push(Opc, Ty, {Hi, Lo, S});
13020b57cec5SDimitry Andric       return OpRef::res(Results.top());
13030b57cec5SDimitry Andric     }
1304fe6060f1SDimitry Andric     Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(Amt, dl)});
1305fe6060f1SDimitry Andric     OpRef A = OpRef::res(Results.top());
1306fe6060f1SDimitry Andric     Results.push(Hexagon::V6_valignb, Ty, {Hi, Lo, A});
1307fe6060f1SDimitry Andric     return OpRef::res(Results.top());
1308fe6060f1SDimitry Andric   };
1309fe6060f1SDimitry Andric 
1310fe6060f1SDimitry Andric   // Segment is a vector half.
1311fe6060f1SDimitry Andric   unsigned SegLen = HwLen / 2;
1312fe6060f1SDimitry Andric 
1313fe6060f1SDimitry Andric   // Check if we can shuffle vector halves around to get the used elements
1314fe6060f1SDimitry Andric   // into a single vector.
1315bdd1243dSDimitry Andric   shuffles::MaskT MaskH(SM.Mask);
1316fe6060f1SDimitry Andric   SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, SegLen);
1317fe6060f1SDimitry Andric   unsigned SegCount = SegList.size();
1318fe6060f1SDimitry Andric   SmallVector<unsigned, 4> SegMap = getOutputSegmentMap(SM.Mask, SegLen);
1319fe6060f1SDimitry Andric 
1320fe6060f1SDimitry Andric   if (SegList.empty())
1321fe6060f1SDimitry Andric     return OpRef::undef(Ty);
1322fe6060f1SDimitry Andric 
1323fe6060f1SDimitry Andric   // NOTE:
1324fe6060f1SDimitry Andric   // In the following part of the function, where the segments are rearranged,
1325fe6060f1SDimitry Andric   // the shuffle mask SM can be of any length that is a multiple of a vector
1326fe6060f1SDimitry Andric   // (i.e. a multiple of 2*SegLen), and non-zero.
1327fe6060f1SDimitry Andric   // The output segment map is computed, and it may have any even number of
1328fe6060f1SDimitry Andric   // entries, but the rearrangement of input segments will be done based only
1329fe6060f1SDimitry Andric   // on the first two (non-undef) entries in the segment map.
1330fe6060f1SDimitry Andric   // For example, if the output map is 3, 1, 1, 3 (it can have at most two
1331fe6060f1SDimitry Andric   // distinct entries!), the segments 1 and 3 of Va/Vb will be packaged into
1332fe6060f1SDimitry Andric   // a single vector V = 3:1. The output mask will then be updated to use
1333fe6060f1SDimitry Andric   // seg(0,V), seg(1,V), seg(1,V), seg(0,V).
1334fe6060f1SDimitry Andric   //
1335fe6060f1SDimitry Andric   // Picking the segments based on the output map is an optimization. For
1336fe6060f1SDimitry Andric   // correctness it is only necessary that Seg0 and Seg1 are the two input
1337fe6060f1SDimitry Andric   // segments that are used in the output.
1338fe6060f1SDimitry Andric 
1339fe6060f1SDimitry Andric   unsigned Seg0 = ~0u, Seg1 = ~0u;
1340cb14a3feSDimitry Andric   for (unsigned X : SegMap) {
1341fe6060f1SDimitry Andric     if (X == ~0u)
1342fe6060f1SDimitry Andric       continue;
1343fe6060f1SDimitry Andric     if (Seg0 == ~0u)
1344fe6060f1SDimitry Andric       Seg0 = X;
1345fe6060f1SDimitry Andric     else if (Seg1 != ~0u)
1346fe6060f1SDimitry Andric       break;
1347fe6060f1SDimitry Andric     if (X == ~1u || X != Seg0)
1348fe6060f1SDimitry Andric       Seg1 = X;
1349fe6060f1SDimitry Andric   }
1350fe6060f1SDimitry Andric 
1351fe6060f1SDimitry Andric   if (SegCount == 1) {
1352fe6060f1SDimitry Andric     unsigned SrcOp = SegList[0] / 2;
1353fe6060f1SDimitry Andric     for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1354fe6060f1SDimitry Andric       int M = SM.Mask[I];
1355fe6060f1SDimitry Andric       if (M >= 0) {
1356fe6060f1SDimitry Andric         M -= SrcOp * HwLen;
1357fe6060f1SDimitry Andric         assert(M >= 0);
1358fe6060f1SDimitry Andric       }
1359fe6060f1SDimitry Andric       NewMask[I] = M;
1360fe6060f1SDimitry Andric     }
1361fe6060f1SDimitry Andric     return Inp[SrcOp];
1362fe6060f1SDimitry Andric   }
1363fe6060f1SDimitry Andric 
1364fe6060f1SDimitry Andric   if (SegCount == 2) {
1365fe6060f1SDimitry Andric     // Seg0 should not be undef here: this would imply a SegList
1366fe6060f1SDimitry Andric     // with <= 1 elements, which was checked earlier.
1367fe6060f1SDimitry Andric     assert(Seg0 != ~0u);
1368fe6060f1SDimitry Andric 
1369fe6060f1SDimitry Andric     // If Seg0 or Seg1 are "multi-defined", pick them from the input
1370fe6060f1SDimitry Andric     // segment list instead.
1371fe6060f1SDimitry Andric     if (Seg0 == ~1u || Seg1 == ~1u) {
1372fe6060f1SDimitry Andric       if (Seg0 == Seg1) {
1373fe6060f1SDimitry Andric         Seg0 = SegList[0];
1374fe6060f1SDimitry Andric         Seg1 = SegList[1];
1375fe6060f1SDimitry Andric       } else if (Seg0 == ~1u) {
1376fe6060f1SDimitry Andric         Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1];
1377fe6060f1SDimitry Andric       } else {
13784824e7fdSDimitry Andric         assert(Seg1 == ~1u);
1379fe6060f1SDimitry Andric         Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1];
1380fe6060f1SDimitry Andric       }
1381fe6060f1SDimitry Andric     }
1382fe6060f1SDimitry Andric     assert(Seg0 != ~1u && Seg1 != ~1u);
1383fe6060f1SDimitry Andric 
1384fe6060f1SDimitry Andric     assert(Seg0 != Seg1 && "Expecting different segments");
1385fe6060f1SDimitry Andric     const SDLoc &dl(Results.InpNode);
1386fe6060f1SDimitry Andric     Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(SegLen, dl)});
1387fe6060f1SDimitry Andric     OpRef HL = OpRef::res(Results.top());
1388fe6060f1SDimitry Andric 
1389fe6060f1SDimitry Andric     // Va = AB, Vb = CD
1390fe6060f1SDimitry Andric 
1391fe6060f1SDimitry Andric     if (Seg0 / 2 == Seg1 / 2) {
1392fe6060f1SDimitry Andric       // Same input vector.
1393fe6060f1SDimitry Andric       Va = Inp[Seg0 / 2];
1394fe6060f1SDimitry Andric       if (Seg0 > Seg1) {
1395fe6060f1SDimitry Andric         // Swap halves.
1396fe6060f1SDimitry Andric         Results.push(Hexagon::V6_vror, Ty, {Inp[Seg0 / 2], HL});
1397fe6060f1SDimitry Andric         Va = OpRef::res(Results.top());
1398fe6060f1SDimitry Andric       }
1399fe6060f1SDimitry Andric       packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1400fe6060f1SDimitry Andric     } else if (Seg0 % 2 == Seg1 % 2) {
1401fe6060f1SDimitry Andric       // Picking AC, BD, CA, or DB.
1402fe6060f1SDimitry Andric       // vshuff(CD,AB,HL) -> BD:AC
1403fe6060f1SDimitry Andric       // vshuff(AB,CD,HL) -> DB:CA
1404fe6060f1SDimitry Andric       auto Vs = (Seg0 == 0 || Seg0 == 1) ? std::make_pair(Vb, Va)  // AC or BD
1405fe6060f1SDimitry Andric                                          : std::make_pair(Va, Vb); // CA or DB
1406fe6060f1SDimitry Andric       Results.push(Hexagon::V6_vshuffvdd, PairTy, {Vs.first, Vs.second, HL});
1407fe6060f1SDimitry Andric       OpRef P = OpRef::res(Results.top());
1408fe6060f1SDimitry Andric       Va = (Seg0 == 0 || Seg0 == 2) ? OpRef::lo(P) : OpRef::hi(P);
1409fe6060f1SDimitry Andric       packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1410fe6060f1SDimitry Andric     } else {
1411fe6060f1SDimitry Andric       // Picking AD, BC, CB, or DA.
1412fe6060f1SDimitry Andric       if ((Seg0 == 0 && Seg1 == 3) || (Seg0 == 2 && Seg1 == 1)) {
1413fe6060f1SDimitry Andric         // AD or BC: this can be done using vmux.
1414fe6060f1SDimitry Andric         // Q = V6_pred_scalar2 SegLen
1415fe6060f1SDimitry Andric         // V = V6_vmux Q, (Va, Vb) or (Vb, Va)
1416fe6060f1SDimitry Andric         Results.push(Hexagon::V6_pred_scalar2, getBoolVT(), {HL});
1417fe6060f1SDimitry Andric         OpRef Qt = OpRef::res(Results.top());
1418fe6060f1SDimitry Andric         auto Vs = (Seg0 == 0) ? std::make_pair(Va, Vb)  // AD
1419fe6060f1SDimitry Andric                               : std::make_pair(Vb, Va); // CB
1420fe6060f1SDimitry Andric         Results.push(Hexagon::V6_vmux, Ty, {Qt, Vs.first, Vs.second});
1421fe6060f1SDimitry Andric         Va = OpRef::res(Results.top());
1422fe6060f1SDimitry Andric         packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1423fe6060f1SDimitry Andric       } else {
1424fe6060f1SDimitry Andric         // BC or DA: this could be done via valign by SegLen.
1425fe6060f1SDimitry Andric         // Do nothing here, because valign (if possible) will be generated
14264824e7fdSDimitry Andric         // later on (make sure the Seg0 values are as expected).
1427fe6060f1SDimitry Andric         assert(Seg0 == 1 || Seg0 == 3);
1428fe6060f1SDimitry Andric       }
1429fe6060f1SDimitry Andric     }
1430fe6060f1SDimitry Andric   }
1431fe6060f1SDimitry Andric 
1432fe6060f1SDimitry Andric   // Check if the arguments can be packed by valign(Va,Vb) or valign(Vb,Va).
1433bdd1243dSDimitry Andric   // FIXME: maybe remove this?
1434fe6060f1SDimitry Andric   ShuffleMask SMH(MaskH);
1435fe6060f1SDimitry Andric   assert(SMH.Mask.size() == VecLen);
1436bdd1243dSDimitry Andric   shuffles::MaskT MaskA(SMH.Mask);
1437fe6060f1SDimitry Andric 
1438fe6060f1SDimitry Andric   if (SMH.MaxSrc - SMH.MinSrc >= static_cast<int>(HwLen)) {
1439fe6060f1SDimitry Andric     // valign(Lo=Va,Hi=Vb) won't work. Try swapping Va/Vb.
1440bdd1243dSDimitry Andric     shuffles::MaskT Swapped(SMH.Mask);
1441fe6060f1SDimitry Andric     ShuffleVectorSDNode::commuteMask(Swapped);
1442fe6060f1SDimitry Andric     ShuffleMask SW(Swapped);
1443fe6060f1SDimitry Andric     if (SW.MaxSrc - SW.MinSrc < static_cast<int>(HwLen)) {
1444fe6060f1SDimitry Andric       MaskA.assign(SW.Mask.begin(), SW.Mask.end());
1445fe6060f1SDimitry Andric       std::swap(Va, Vb);
1446fe6060f1SDimitry Andric     }
1447fe6060f1SDimitry Andric   }
1448fe6060f1SDimitry Andric   ShuffleMask SMA(MaskA);
1449fe6060f1SDimitry Andric   assert(SMA.Mask.size() == VecLen);
1450fe6060f1SDimitry Andric 
1451fe6060f1SDimitry Andric   if (SMA.MaxSrc - SMA.MinSrc < static_cast<int>(HwLen)) {
1452fe6060f1SDimitry Andric     int ShiftR = SMA.MinSrc;
1453fe6060f1SDimitry Andric     if (ShiftR >= static_cast<int>(HwLen)) {
1454fe6060f1SDimitry Andric       Va = Vb;
1455fe6060f1SDimitry Andric       Vb = OpRef::undef(Ty);
1456fe6060f1SDimitry Andric       ShiftR -= HwLen;
1457fe6060f1SDimitry Andric     }
1458fe6060f1SDimitry Andric     OpRef RetVal = valign(Va, Vb, ShiftR, Ty, Results);
1459fe6060f1SDimitry Andric 
1460fe6060f1SDimitry Andric     for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1461fe6060f1SDimitry Andric       int M = SMA.Mask[I];
1462fe6060f1SDimitry Andric       if (M != -1)
1463fe6060f1SDimitry Andric         M -= SMA.MinSrc;
1464fe6060f1SDimitry Andric       NewMask[I] = M;
1465fe6060f1SDimitry Andric     }
1466fe6060f1SDimitry Andric     return RetVal;
1467fe6060f1SDimitry Andric   }
1468fe6060f1SDimitry Andric 
1469fe6060f1SDimitry Andric   // By here, packing by segment (half-vector) shuffling, and vector alignment
1470fe6060f1SDimitry Andric   // failed. Try vmux.
1471fe6060f1SDimitry Andric   // Note: since this is using the original mask, Va and Vb must not have been
1472fe6060f1SDimitry Andric   // modified.
14730b57cec5SDimitry Andric 
14740b57cec5SDimitry Andric   if (Options & PackMux) {
14750b57cec5SDimitry Andric     // If elements picked from Va and Vb have all different (source) indexes
14760b57cec5SDimitry Andric     // (relative to the start of the argument), do a mux, and update the mask.
14770b57cec5SDimitry Andric     BitVector Picked(HwLen);
14780b57cec5SDimitry Andric     SmallVector<uint8_t,128> MuxBytes(HwLen);
14790b57cec5SDimitry Andric     bool CanMux = true;
1480fe6060f1SDimitry Andric     for (int I = 0; I != static_cast<int>(VecLen); ++I) {
14810b57cec5SDimitry Andric       int M = SM.Mask[I];
14820b57cec5SDimitry Andric       if (M == -1)
14830b57cec5SDimitry Andric         continue;
1484fe6060f1SDimitry Andric       if (M >= static_cast<int>(HwLen))
14850b57cec5SDimitry Andric         M -= HwLen;
14860b57cec5SDimitry Andric       else
14870b57cec5SDimitry Andric         MuxBytes[M] = 0xFF;
14880b57cec5SDimitry Andric       if (Picked[M]) {
14890b57cec5SDimitry Andric         CanMux = false;
14900b57cec5SDimitry Andric         break;
14910b57cec5SDimitry Andric       }
14920b57cec5SDimitry Andric       NewMask[I] = M;
14930b57cec5SDimitry Andric     }
14940b57cec5SDimitry Andric     if (CanMux)
14950b57cec5SDimitry Andric       return vmuxs(MuxBytes, Va, Vb, Results);
14960b57cec5SDimitry Andric   }
14970b57cec5SDimitry Andric   return OpRef::fail();
14980b57cec5SDimitry Andric }
14990b57cec5SDimitry Andric 
1500fe6060f1SDimitry Andric // Va, Vb are vector pairs. If SM only uses two single vectors from Va/Vb,
1501fe6060f1SDimitry Andric // pack these vectors into a pair, and remap SM into NewMask to use the
1502fe6060f1SDimitry Andric // new pair instead.
15030b57cec5SDimitry Andric OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
15040b57cec5SDimitry Andric                          ResultStack &Results, MutableArrayRef<int> NewMask) {
15050b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1506fe6060f1SDimitry Andric   SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, HwLen);
1507fe6060f1SDimitry Andric   if (SegList.empty())
15080b57cec5SDimitry Andric     return OpRef::undef(getPairVT(MVT::i8));
15090b57cec5SDimitry Andric 
15100b57cec5SDimitry Andric   // If more than two halves are used, bail.
15110b57cec5SDimitry Andric   // TODO: be more aggressive here?
1512fe6060f1SDimitry Andric   unsigned SegCount = SegList.size();
1513fe6060f1SDimitry Andric   if (SegCount > 2)
15140b57cec5SDimitry Andric     return OpRef::fail();
15150b57cec5SDimitry Andric 
15160b57cec5SDimitry Andric   MVT HalfTy = getSingleVT(MVT::i8);
15170b57cec5SDimitry Andric 
15180b57cec5SDimitry Andric   OpRef Inp[2] = { Va, Vb };
15190b57cec5SDimitry Andric   OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
15200b57cec5SDimitry Andric 
1521fe6060f1SDimitry Andric   // Really make sure we have at most 2 vectors used in the mask.
1522fe6060f1SDimitry Andric   assert(SegCount <= 2);
1523fe6060f1SDimitry Andric 
1524fe6060f1SDimitry Andric   for (int I = 0, E = SegList.size(); I != E; ++I) {
1525fe6060f1SDimitry Andric     unsigned S = SegList[I];
1526fe6060f1SDimitry Andric     OpRef Op = Inp[S / 2];
1527fe6060f1SDimitry Andric     Out[I] = (S & 1) ? OpRef::hi(Op) : OpRef::lo(Op);
15280b57cec5SDimitry Andric   }
15290b57cec5SDimitry Andric 
1530fe6060f1SDimitry Andric   // NOTE: Using SegList as the packing map here (not SegMap). This works,
1531fe6060f1SDimitry Andric   // because we're not concerned here about the order of the segments (i.e.
1532fe6060f1SDimitry Andric   // single vectors) in the output pair. Changing the order of vectors is
1533fe6060f1SDimitry Andric   // free (as opposed to changing the order of vector halves as in packs),
1534fe6060f1SDimitry Andric   // and so there is no extra cost added in case the order needs to be
1535fe6060f1SDimitry Andric   // changed later.
1536fe6060f1SDimitry Andric   packSegmentMask(SM.Mask, SegList, HwLen, NewMask);
1537fe6060f1SDimitry Andric   return concats(Out[0], Out[1], Results);
15380b57cec5SDimitry Andric }
15390b57cec5SDimitry Andric 
15400b57cec5SDimitry Andric OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
15410b57cec5SDimitry Andric                          ResultStack &Results) {
15420b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
15430b57cec5SDimitry Andric   MVT ByteTy = getSingleVT(MVT::i8);
15445ffd83dbSDimitry Andric   MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
15450b57cec5SDimitry Andric   const SDLoc &dl(Results.InpNode);
15460b57cec5SDimitry Andric   SDValue B = getVectorConstant(Bytes, dl);
15470b57cec5SDimitry Andric   Results.push(Hexagon::V6_vd0, ByteTy, {});
15480b57cec5SDimitry Andric   Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)});
15490b57cec5SDimitry Andric   Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
15500b57cec5SDimitry Andric   return OpRef::res(Results.top());
15510b57cec5SDimitry Andric }
15520b57cec5SDimitry Andric 
15530b57cec5SDimitry Andric OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
15540b57cec5SDimitry Andric                          ResultStack &Results) {
15550b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
15560b57cec5SDimitry Andric   size_t S = Bytes.size() / 2;
15570b57cec5SDimitry Andric   OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results);
15580b57cec5SDimitry Andric   OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results);
1559fe6060f1SDimitry Andric   return concats(L, H, Results);
15600b57cec5SDimitry Andric }
15610b57cec5SDimitry Andric 
15620b57cec5SDimitry Andric OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
15630b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
15640b57cec5SDimitry Andric   unsigned VecLen = SM.Mask.size();
15650b57cec5SDimitry Andric   assert(HwLen == VecLen);
15660b57cec5SDimitry Andric   (void)VecLen;
15670b57cec5SDimitry Andric   assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); }));
15680b57cec5SDimitry Andric 
15690b57cec5SDimitry Andric   if (isIdentity(SM.Mask))
15700b57cec5SDimitry Andric     return Va;
15710b57cec5SDimitry Andric   if (isUndef(SM.Mask))
15720b57cec5SDimitry Andric     return OpRef::undef(getSingleVT(MVT::i8));
15730b57cec5SDimitry Andric 
1574bdd1243dSDimitry Andric   // First, check for rotations.
1575bdd1243dSDimitry Andric   if (auto Dist = rotationDistance(SM, VecLen)) {
1576bdd1243dSDimitry Andric     OpRef Rotate = funnels(Va, Va, *Dist, Results);
1577bdd1243dSDimitry Andric     if (Rotate.isValid())
1578bdd1243dSDimitry Andric       return Rotate;
1579bdd1243dSDimitry Andric   }
1580fe6060f1SDimitry Andric   unsigned HalfLen = HwLen / 2;
15814824e7fdSDimitry Andric   assert(isPowerOf2_32(HalfLen));
1582fe6060f1SDimitry Andric 
1583fe6060f1SDimitry Andric   // Handle special case where the output is the same half of the input
1584fe6060f1SDimitry Andric   // repeated twice, i.e. if Va = AB, then handle the output of AA or BB.
1585fe6060f1SDimitry Andric   std::pair<int, unsigned> Strip1 = findStrip(SM.Mask, 1, HalfLen);
1586fe6060f1SDimitry Andric   if ((Strip1.first & ~HalfLen) == 0 && Strip1.second == HalfLen) {
1587fe6060f1SDimitry Andric     std::pair<int, unsigned> Strip2 =
1588fe6060f1SDimitry Andric         findStrip(SM.Mask.drop_front(HalfLen), 1, HalfLen);
1589fe6060f1SDimitry Andric     if (Strip1 == Strip2) {
1590fe6060f1SDimitry Andric       const SDLoc &dl(Results.InpNode);
1591fe6060f1SDimitry Andric       Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(HalfLen, dl)});
1592fe6060f1SDimitry Andric       Results.push(Hexagon::V6_vshuffvdd, getPairVT(MVT::i8),
1593fe6060f1SDimitry Andric                    {Va, Va, OpRef::res(Results.top())});
1594fe6060f1SDimitry Andric       OpRef S = OpRef::res(Results.top());
1595fe6060f1SDimitry Andric       return (Strip1.first == 0) ? OpRef::lo(S) : OpRef::hi(S);
1596fe6060f1SDimitry Andric     }
1597fe6060f1SDimitry Andric   }
1598fe6060f1SDimitry Andric 
15990b57cec5SDimitry Andric   OpRef P = perfect(SM, Va, Results);
16000b57cec5SDimitry Andric   if (P.isValid())
16010b57cec5SDimitry Andric     return P;
16020b57cec5SDimitry Andric   return butterfly(SM, Va, Results);
16030b57cec5SDimitry Andric }
16040b57cec5SDimitry Andric 
16050b57cec5SDimitry Andric OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
16060b57cec5SDimitry Andric                            ResultStack &Results) {
16070b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
16080b57cec5SDimitry Andric   if (isUndef(SM.Mask))
16090b57cec5SDimitry Andric     return OpRef::undef(getSingleVT(MVT::i8));
16100b57cec5SDimitry Andric 
16110b57cec5SDimitry Andric   OpRef C = contracting(SM, Va, Vb, Results);
16120b57cec5SDimitry Andric   if (C.isValid())
16130b57cec5SDimitry Andric     return C;
16140b57cec5SDimitry Andric 
16150b57cec5SDimitry Andric   int VecLen = SM.Mask.size();
1616bdd1243dSDimitry Andric   shuffles::MaskT PackedMask(VecLen);
1617fe6060f1SDimitry Andric   OpRef P = packs(SM, Va, Vb, Results, PackedMask);
16180b57cec5SDimitry Andric   if (P.isValid())
1619fe6060f1SDimitry Andric     return shuffs1(ShuffleMask(PackedMask), P, Results);
1620fe6060f1SDimitry Andric 
1621fe6060f1SDimitry Andric   // TODO: Before we split the mask, try perfect shuffle on concatenated
1622bdd1243dSDimitry Andric   // operands.
16230b57cec5SDimitry Andric 
1624bdd1243dSDimitry Andric   shuffles::MaskT MaskL(VecLen), MaskR(VecLen);
16250b57cec5SDimitry Andric   splitMask(SM.Mask, MaskL, MaskR);
16260b57cec5SDimitry Andric 
16270b57cec5SDimitry Andric   OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results);
16280b57cec5SDimitry Andric   OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results);
16290b57cec5SDimitry Andric   if (!L.isValid() || !R.isValid())
16300b57cec5SDimitry Andric     return OpRef::fail();
16310b57cec5SDimitry Andric 
16320b57cec5SDimitry Andric   SmallVector<uint8_t, 128> Bytes(VecLen);
16330b57cec5SDimitry Andric   for (int I = 0; I != VecLen; ++I) {
16340b57cec5SDimitry Andric     if (MaskL[I] != -1)
16350b57cec5SDimitry Andric       Bytes[I] = 0xFF;
16360b57cec5SDimitry Andric   }
16370b57cec5SDimitry Andric   return vmuxs(Bytes, L, R, Results);
16380b57cec5SDimitry Andric }
16390b57cec5SDimitry Andric 
16400b57cec5SDimitry Andric OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
16410b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
16420b57cec5SDimitry Andric   int VecLen = SM.Mask.size();
16430b57cec5SDimitry Andric 
16440b57cec5SDimitry Andric   if (isIdentity(SM.Mask))
16450b57cec5SDimitry Andric     return Va;
16460b57cec5SDimitry Andric   if (isUndef(SM.Mask))
16470b57cec5SDimitry Andric     return OpRef::undef(getPairVT(MVT::i8));
16480b57cec5SDimitry Andric 
1649bdd1243dSDimitry Andric   shuffles::MaskT PackedMask(VecLen);
16500b57cec5SDimitry Andric   OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask);
16510b57cec5SDimitry Andric   if (P.isValid()) {
16520b57cec5SDimitry Andric     ShuffleMask PM(PackedMask);
16530b57cec5SDimitry Andric     OpRef E = expanding(PM, P, Results);
16540b57cec5SDimitry Andric     if (E.isValid())
16550b57cec5SDimitry Andric       return E;
16560b57cec5SDimitry Andric 
16570b57cec5SDimitry Andric     OpRef L = shuffs1(PM.lo(), P, Results);
16580b57cec5SDimitry Andric     OpRef H = shuffs1(PM.hi(), P, Results);
16590b57cec5SDimitry Andric     if (L.isValid() && H.isValid())
1660fe6060f1SDimitry Andric       return concats(L, H, Results);
16610b57cec5SDimitry Andric   }
16620b57cec5SDimitry Andric 
1663bdd1243dSDimitry Andric   if (!isLowHalfOnly(SM.Mask)) {
1664bdd1243dSDimitry Andric     // Doing a perfect shuffle on a low-half mask (i.e. where the upper half
1665bdd1243dSDimitry Andric     // is all-undef) may produce a perfect shuffle that generates legitimate
1666bdd1243dSDimitry Andric     // upper half. This isn't wrong, but if the perfect shuffle was possible,
1667bdd1243dSDimitry Andric     // then there is a good chance that a shorter (contracting) code may be
1668bdd1243dSDimitry Andric     // used as well (e.g. V6_vshuffeb, etc).
16690b57cec5SDimitry Andric     OpRef R = perfect(SM, Va, Results);
16700b57cec5SDimitry Andric     if (R.isValid())
16710b57cec5SDimitry Andric       return R;
16720b57cec5SDimitry Andric     // TODO commute the mask and try the opposite order of the halves.
1673bdd1243dSDimitry Andric   }
16740b57cec5SDimitry Andric 
16750b57cec5SDimitry Andric   OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results);
16760b57cec5SDimitry Andric   OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results);
16770b57cec5SDimitry Andric   if (L.isValid() && H.isValid())
1678fe6060f1SDimitry Andric     return concats(L, H, Results);
16790b57cec5SDimitry Andric 
16800b57cec5SDimitry Andric   return OpRef::fail();
16810b57cec5SDimitry Andric }
16820b57cec5SDimitry Andric 
16830b57cec5SDimitry Andric OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
16840b57cec5SDimitry Andric                            ResultStack &Results) {
16850b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
16860b57cec5SDimitry Andric   if (isUndef(SM.Mask))
16870b57cec5SDimitry Andric     return OpRef::undef(getPairVT(MVT::i8));
16880b57cec5SDimitry Andric 
16890b57cec5SDimitry Andric   int VecLen = SM.Mask.size();
16900b57cec5SDimitry Andric   SmallVector<int,256> PackedMask(VecLen);
16910b57cec5SDimitry Andric   OpRef P = packp(SM, Va, Vb, Results, PackedMask);
16920b57cec5SDimitry Andric   if (P.isValid())
16930b57cec5SDimitry Andric     return shuffp1(ShuffleMask(PackedMask), P, Results);
16940b57cec5SDimitry Andric 
16950b57cec5SDimitry Andric   SmallVector<int,256> MaskL(VecLen), MaskR(VecLen);
16960b57cec5SDimitry Andric   splitMask(SM.Mask, MaskL, MaskR);
16970b57cec5SDimitry Andric 
16980b57cec5SDimitry Andric   OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results);
16990b57cec5SDimitry Andric   OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results);
17000b57cec5SDimitry Andric   if (!L.isValid() || !R.isValid())
17010b57cec5SDimitry Andric     return OpRef::fail();
17020b57cec5SDimitry Andric 
17030b57cec5SDimitry Andric   // Mux the results.
17040b57cec5SDimitry Andric   SmallVector<uint8_t,256> Bytes(VecLen);
17050b57cec5SDimitry Andric   for (int I = 0; I != VecLen; ++I) {
17060b57cec5SDimitry Andric     if (MaskL[I] != -1)
17070b57cec5SDimitry Andric       Bytes[I] = 0xFF;
17080b57cec5SDimitry Andric   }
17090b57cec5SDimitry Andric   return vmuxp(Bytes, L, R, Results);
17100b57cec5SDimitry Andric }
17110b57cec5SDimitry Andric 
17120b57cec5SDimitry Andric namespace {
17130b57cec5SDimitry Andric   struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
17140b57cec5SDimitry Andric     template <typename T>
17150b57cec5SDimitry Andric     Deleter(SelectionDAG &D, T &C)
17160b57cec5SDimitry Andric       : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
17170b57cec5SDimitry Andric                                                   C.erase(N);
17180b57cec5SDimitry Andric                                                 }) {}
17190b57cec5SDimitry Andric   };
17200b57cec5SDimitry Andric 
17210b57cec5SDimitry Andric   template <typename T>
17220b57cec5SDimitry Andric   struct NullifyingVector : public T {
17230b57cec5SDimitry Andric     DenseMap<SDNode*, SDNode**> Refs;
17240b57cec5SDimitry Andric     NullifyingVector(T &&V) : T(V) {
17250b57cec5SDimitry Andric       for (unsigned i = 0, e = T::size(); i != e; ++i) {
17260b57cec5SDimitry Andric         SDNode *&N = T::operator[](i);
17270b57cec5SDimitry Andric         Refs[N] = &N;
17280b57cec5SDimitry Andric       }
17290b57cec5SDimitry Andric     }
17300b57cec5SDimitry Andric     void erase(SDNode *N) {
17310b57cec5SDimitry Andric       auto F = Refs.find(N);
17320b57cec5SDimitry Andric       if (F != Refs.end())
17330b57cec5SDimitry Andric         *F->second = nullptr;
17340b57cec5SDimitry Andric     }
17350b57cec5SDimitry Andric   };
17360b57cec5SDimitry Andric }
17370b57cec5SDimitry Andric 
1738e8d8bef9SDimitry Andric void HvxSelector::select(SDNode *ISelN) {
1739e8d8bef9SDimitry Andric   // What's important here is to select the right set of nodes. The main
1740e8d8bef9SDimitry Andric   // selection algorithm loops over nodes in a topological order, i.e. users
1741e8d8bef9SDimitry Andric   // are visited before their operands.
1742e8d8bef9SDimitry Andric   //
1743e8d8bef9SDimitry Andric   // It is an error to have an unselected node with a selected operand, and
1744e8d8bef9SDimitry Andric   // there is an assertion in the main selector code to enforce that.
1745e8d8bef9SDimitry Andric   //
1746e8d8bef9SDimitry Andric   // Such a situation could occur if we selected a node, which is both a
1747e8d8bef9SDimitry Andric   // subnode of ISelN, and a subnode of an unrelated (and yet unselected)
1748e8d8bef9SDimitry Andric   // node in the DAG.
1749e8d8bef9SDimitry Andric   assert(ISelN->getOpcode() == HexagonISD::ISEL);
1750e8d8bef9SDimitry Andric   SDNode *N0 = ISelN->getOperand(0).getNode();
1751e8d8bef9SDimitry Andric 
1752e8d8bef9SDimitry Andric   // There could have been nodes created (i.e. inserted into the DAG)
1753e8d8bef9SDimitry Andric   // that are now dead. Remove them, in case they use any of the nodes
1754e8d8bef9SDimitry Andric   // to select (and make them look shared).
1755e8d8bef9SDimitry Andric   DAG.RemoveDeadNodes();
1756e8d8bef9SDimitry Andric 
175706c3fb27SDimitry Andric   SetVector<SDNode *> SubNodes;
1758e8d8bef9SDimitry Andric 
175906c3fb27SDimitry Andric   if (!N0->isMachineOpcode()) {
1760e8d8bef9SDimitry Andric     // Don't want to select N0 if it's shared with another node, except if
1761e8d8bef9SDimitry Andric     // it's shared with other ISELs.
1762e8d8bef9SDimitry Andric     auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
1763e8d8bef9SDimitry Andric     if (llvm::all_of(N0->uses(), IsISelN))
1764e8d8bef9SDimitry Andric       SubNodes.insert(N0);
176506c3fb27SDimitry Andric   }
176606c3fb27SDimitry Andric   if (SubNodes.empty()) {
176706c3fb27SDimitry Andric     ISel.ReplaceNode(ISelN, N0);
176806c3fb27SDimitry Andric     return;
176906c3fb27SDimitry Andric   }
1770e8d8bef9SDimitry Andric 
177106c3fb27SDimitry Andric   // Need to manually select the nodes that are dominated by the ISEL. Other
177206c3fb27SDimitry Andric   // nodes are reachable from the rest of the DAG, and so will be selected
177306c3fb27SDimitry Andric   // by the DAG selection routine.
177406c3fb27SDimitry Andric   SetVector<SDNode*> Dom, NonDom;
177506c3fb27SDimitry Andric   Dom.insert(N0);
177606c3fb27SDimitry Andric 
177706c3fb27SDimitry Andric   auto IsDomRec = [&Dom, &NonDom] (SDNode *T, auto Rec) -> bool {
177806c3fb27SDimitry Andric     if (Dom.count(T))
177906c3fb27SDimitry Andric       return true;
178006c3fb27SDimitry Andric     if (T->use_empty() || NonDom.count(T))
178106c3fb27SDimitry Andric       return false;
178206c3fb27SDimitry Andric     for (SDNode *U : T->uses()) {
178306c3fb27SDimitry Andric       // If T is reachable from a known non-dominated node, then T itself
178406c3fb27SDimitry Andric       // is non-dominated.
178506c3fb27SDimitry Andric       if (!Rec(U, Rec)) {
178606c3fb27SDimitry Andric         NonDom.insert(T);
178706c3fb27SDimitry Andric         return false;
178806c3fb27SDimitry Andric       }
178906c3fb27SDimitry Andric     }
179006c3fb27SDimitry Andric     Dom.insert(T);
179106c3fb27SDimitry Andric     return true;
179206c3fb27SDimitry Andric   };
179306c3fb27SDimitry Andric 
179406c3fb27SDimitry Andric   auto IsDom = [&IsDomRec] (SDNode *T) { return IsDomRec(T, IsDomRec); };
179506c3fb27SDimitry Andric 
179606c3fb27SDimitry Andric   // Add the rest of nodes dominated by ISEL to SubNodes.
1797e8d8bef9SDimitry Andric   for (unsigned I = 0; I != SubNodes.size(); ++I) {
179806c3fb27SDimitry Andric     for (SDValue Op : SubNodes[I]->ops()) {
1799e8d8bef9SDimitry Andric       SDNode *O = Op.getNode();
180006c3fb27SDimitry Andric       if (IsDom(O))
1801e8d8bef9SDimitry Andric         SubNodes.insert(O);
1802e8d8bef9SDimitry Andric     }
1803e8d8bef9SDimitry Andric   }
180406c3fb27SDimitry Andric 
180506c3fb27SDimitry Andric   // Do a topological sort of nodes from Dom.
180606c3fb27SDimitry Andric   SetVector<SDNode*> TmpQ;
180706c3fb27SDimitry Andric 
180806c3fb27SDimitry Andric   std::map<SDNode *, unsigned> OpCount;
180906c3fb27SDimitry Andric   for (SDNode *T : Dom) {
181006c3fb27SDimitry Andric     unsigned NumDomOps = llvm::count_if(T->ops(), [&Dom](const SDUse &U) {
181106c3fb27SDimitry Andric                              return Dom.count(U.getNode());
181206c3fb27SDimitry Andric                            });
181306c3fb27SDimitry Andric 
181406c3fb27SDimitry Andric     OpCount.insert({T, NumDomOps});
181506c3fb27SDimitry Andric     if (NumDomOps == 0)
181606c3fb27SDimitry Andric       TmpQ.insert(T);
1817e8d8bef9SDimitry Andric   }
1818e8d8bef9SDimitry Andric 
1819e8d8bef9SDimitry Andric   for (unsigned I = 0; I != TmpQ.size(); ++I) {
1820e8d8bef9SDimitry Andric     SDNode *S = TmpQ[I];
1821e8d8bef9SDimitry Andric     for (SDNode *U : S->uses()) {
1822e8d8bef9SDimitry Andric       if (U == ISelN)
1823e8d8bef9SDimitry Andric         continue;
182406c3fb27SDimitry Andric       auto F = OpCount.find(U);
182506c3fb27SDimitry Andric       assert(F != OpCount.end());
1826e8d8bef9SDimitry Andric       if (F->second > 0 && !--F->second)
1827e8d8bef9SDimitry Andric         TmpQ.insert(F->first);
1828e8d8bef9SDimitry Andric     }
1829e8d8bef9SDimitry Andric   }
1830e8d8bef9SDimitry Andric 
1831e8d8bef9SDimitry Andric   // Remove the marker.
1832e8d8bef9SDimitry Andric   ISel.ReplaceNode(ISelN, N0);
1833e8d8bef9SDimitry Andric 
1834e8d8bef9SDimitry Andric   assert(SubNodes.size() == TmpQ.size());
1835e8d8bef9SDimitry Andric   NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
1836e8d8bef9SDimitry Andric 
1837e8d8bef9SDimitry Andric   Deleter DUQ(DAG, Queue);
1838e8d8bef9SDimitry Andric   for (SDNode *S : reverse(Queue)) {
1839e8d8bef9SDimitry Andric     if (S == nullptr)
1840e8d8bef9SDimitry Andric       continue;
1841e8d8bef9SDimitry Andric     DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S->dump(&DAG);});
1842e8d8bef9SDimitry Andric     ISel.Select(S);
1843e8d8bef9SDimitry Andric   }
1844e8d8bef9SDimitry Andric }
1845e8d8bef9SDimitry Andric 
18460b57cec5SDimitry Andric bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
18470b57cec5SDimitry Andric                                    MVT ResTy, SDValue Va, SDValue Vb,
18480b57cec5SDimitry Andric                                    SDNode *N) {
18490b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
18500b57cec5SDimitry Andric   MVT ElemTy = ResTy.getVectorElementType();
18510b57cec5SDimitry Andric   assert(ElemTy == MVT::i8);
18520b57cec5SDimitry Andric   unsigned VecLen = Mask.size();
18530b57cec5SDimitry Andric   bool HavePairs = (2*HwLen == VecLen);
18540b57cec5SDimitry Andric   MVT SingleTy = getSingleVT(MVT::i8);
18550b57cec5SDimitry Andric 
18560b57cec5SDimitry Andric   // The prior attempts to handle this shuffle may have left a bunch of
18570b57cec5SDimitry Andric   // dead nodes in the DAG (such as constants). These nodes will be added
18580b57cec5SDimitry Andric   // at the end of DAG's node list, which at that point had already been
18590b57cec5SDimitry Andric   // sorted topologically. In the main selection loop, the node list is
18600b57cec5SDimitry Andric   // traversed backwards from the root node, which means that any new
18610b57cec5SDimitry Andric   // nodes (from the end of the list) will not be visited.
18620b57cec5SDimitry Andric   // Scalarization will replace the shuffle node with the scalarized
18630b57cec5SDimitry Andric   // expression, and if that expression reused any if the leftoever (dead)
18640b57cec5SDimitry Andric   // nodes, these nodes would not be selected (since the "local" selection
18650b57cec5SDimitry Andric   // only visits nodes that are not in AllNodes).
18660b57cec5SDimitry Andric   // To avoid this issue, remove all dead nodes from the DAG now.
1867e8d8bef9SDimitry Andric //  DAG.RemoveDeadNodes();
18680b57cec5SDimitry Andric 
18690b57cec5SDimitry Andric   SmallVector<SDValue,128> Ops;
18700b57cec5SDimitry Andric   LLVMContext &Ctx = *DAG.getContext();
18710b57cec5SDimitry Andric   MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
18720b57cec5SDimitry Andric   for (int I : Mask) {
18730b57cec5SDimitry Andric     if (I < 0) {
18740b57cec5SDimitry Andric       Ops.push_back(ISel.selectUndef(dl, LegalTy));
18750b57cec5SDimitry Andric       continue;
18760b57cec5SDimitry Andric     }
18770b57cec5SDimitry Andric     SDValue Vec;
18780b57cec5SDimitry Andric     unsigned M = I;
18790b57cec5SDimitry Andric     if (M < VecLen) {
18800b57cec5SDimitry Andric       Vec = Va;
18810b57cec5SDimitry Andric     } else {
18820b57cec5SDimitry Andric       Vec = Vb;
18830b57cec5SDimitry Andric       M -= VecLen;
18840b57cec5SDimitry Andric     }
18850b57cec5SDimitry Andric     if (HavePairs) {
18860b57cec5SDimitry Andric       if (M < HwLen) {
18870b57cec5SDimitry Andric         Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec);
18880b57cec5SDimitry Andric       } else {
18890b57cec5SDimitry Andric         Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec);
18900b57cec5SDimitry Andric         M -= HwLen;
18910b57cec5SDimitry Andric       }
18920b57cec5SDimitry Andric     }
18930b57cec5SDimitry Andric     SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
18940b57cec5SDimitry Andric     SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
18950b57cec5SDimitry Andric     SDValue L = Lower.LowerOperation(Ex, DAG);
18960b57cec5SDimitry Andric     assert(L.getNode());
18970b57cec5SDimitry Andric     Ops.push_back(L);
18980b57cec5SDimitry Andric   }
18990b57cec5SDimitry Andric 
19000b57cec5SDimitry Andric   SDValue LV;
19010b57cec5SDimitry Andric   if (2*HwLen == VecLen) {
19020b57cec5SDimitry Andric     SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen});
19030b57cec5SDimitry Andric     SDValue L0 = Lower.LowerOperation(B0, DAG);
19040b57cec5SDimitry Andric     SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen});
19050b57cec5SDimitry Andric     SDValue L1 = Lower.LowerOperation(B1, DAG);
19060b57cec5SDimitry Andric     // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
19070b57cec5SDimitry Andric     // functions may expect to be called only for illegal operations, so
19080b57cec5SDimitry Andric     // make sure that they are not called for legal ones. Develop a better
19090b57cec5SDimitry Andric     // mechanism for dealing with this.
19100b57cec5SDimitry Andric     LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1});
19110b57cec5SDimitry Andric   } else {
19120b57cec5SDimitry Andric     SDValue BV = DAG.getBuildVector(ResTy, dl, Ops);
19130b57cec5SDimitry Andric     LV = Lower.LowerOperation(BV, DAG);
19140b57cec5SDimitry Andric   }
19150b57cec5SDimitry Andric 
19160b57cec5SDimitry Andric   assert(!N->use_empty());
1917e8d8bef9SDimitry Andric   SDValue IS = DAG.getNode(HexagonISD::ISEL, dl, ResTy, LV);
1918e8d8bef9SDimitry Andric   ISel.ReplaceNode(N, IS.getNode());
1919e8d8bef9SDimitry Andric   select(IS.getNode());
19200b57cec5SDimitry Andric   DAG.RemoveDeadNodes();
19210b57cec5SDimitry Andric   return true;
19220b57cec5SDimitry Andric }
19230b57cec5SDimitry Andric 
1924bdd1243dSDimitry Andric SmallVector<uint32_t, 8> HvxSelector::getPerfectCompletions(ShuffleMask SM,
1925bdd1243dSDimitry Andric                                                             unsigned Width) {
1926bdd1243dSDimitry Andric   auto possibilities = [](ArrayRef<uint8_t> Bs, unsigned Width) -> uint32_t {
1927bdd1243dSDimitry Andric     unsigned Impossible = ~(1u << Width) + 1;
1928bdd1243dSDimitry Andric     for (unsigned I = 0, E = Bs.size(); I != E; ++I) {
1929bdd1243dSDimitry Andric       uint8_t B = Bs[I];
1930bdd1243dSDimitry Andric       if (B == 0xff)
1931bdd1243dSDimitry Andric         continue;
1932bdd1243dSDimitry Andric       if (~Impossible == 0)
1933bdd1243dSDimitry Andric         break;
1934bdd1243dSDimitry Andric       for (unsigned Log = 0; Log != Width; ++Log) {
1935bdd1243dSDimitry Andric         if (Impossible & (1u << Log))
1936bdd1243dSDimitry Andric           continue;
1937bdd1243dSDimitry Andric         unsigned Expected = (I >> Log) % 2;
1938bdd1243dSDimitry Andric         if (B != Expected)
1939bdd1243dSDimitry Andric           Impossible |= (1u << Log);
1940bdd1243dSDimitry Andric       }
1941bdd1243dSDimitry Andric     }
1942bdd1243dSDimitry Andric     return ~Impossible;
1943bdd1243dSDimitry Andric   };
1944bdd1243dSDimitry Andric 
1945bdd1243dSDimitry Andric   SmallVector<uint32_t, 8> Worklist(Width);
1946bdd1243dSDimitry Andric 
1947bdd1243dSDimitry Andric   for (unsigned BitIdx = 0; BitIdx != Width; ++BitIdx) {
1948bdd1243dSDimitry Andric     SmallVector<uint8_t> BitValues(SM.Mask.size());
1949bdd1243dSDimitry Andric     for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
1950bdd1243dSDimitry Andric       int M = SM.Mask[i];
1951bdd1243dSDimitry Andric       if (M < 0)
1952bdd1243dSDimitry Andric         BitValues[i] = 0xff;
1953bdd1243dSDimitry Andric       else
1954bdd1243dSDimitry Andric         BitValues[i] = (M & (1u << BitIdx)) != 0;
1955bdd1243dSDimitry Andric     }
1956bdd1243dSDimitry Andric     Worklist[BitIdx] = possibilities(BitValues, Width);
1957bdd1243dSDimitry Andric   }
1958bdd1243dSDimitry Andric 
1959bdd1243dSDimitry Andric   // If there is a word P in Worklist that matches multiple possibilities,
1960bdd1243dSDimitry Andric   // then if any other word Q matches any of the possibilities matched by P,
1961bdd1243dSDimitry Andric   // then Q matches all the possibilities matched by P. In fact, P == Q.
1962bdd1243dSDimitry Andric   // In other words, for each words P, Q, the sets of possibilities matched
1963bdd1243dSDimitry Andric   // by P and Q are either equal or disjoint (no partial overlap).
1964bdd1243dSDimitry Andric   //
1965bdd1243dSDimitry Andric   // Illustration: For 4-bit values there are 4 complete sequences:
1966bdd1243dSDimitry Andric   // a:  0 1 0 1  0 1 0 1  0 1 0 1  0 1 0 1
1967bdd1243dSDimitry Andric   // b:  0 0 1 1  0 0 1 1  0 0 1 1  0 0 1 1
1968bdd1243dSDimitry Andric   // c:  0 0 0 0  1 1 1 1  0 0 0 0  1 1 1 1
1969bdd1243dSDimitry Andric   // d:  0 0 0 0  0 0 0 0  1 1 1 1  1 1 1 1
1970bdd1243dSDimitry Andric   //
1971bdd1243dSDimitry Andric   // Words containing unknown bits that match two of the complete
1972bdd1243dSDimitry Andric   // sequences:
1973bdd1243dSDimitry Andric   // ab: 0 u u 1  0 u u 1  0 u u 1  0 u u 1
1974bdd1243dSDimitry Andric   // ac: 0 u 0 u  u 1 u 1  0 u 0 u  u 1 u 1
1975bdd1243dSDimitry Andric   // ad: 0 u 0 u  0 u 0 u  u 1 u 1  u 1 u 1
1976bdd1243dSDimitry Andric   // bc: 0 0 u u  u u 1 1  0 0 u u  u u 1 1
1977bdd1243dSDimitry Andric   // bd: 0 0 u u  0 0 u u  u u 1 1  u u 1 1
1978bdd1243dSDimitry Andric   // cd: 0 0 0 0  u u u u  u u u u  1 1 1 1
1979bdd1243dSDimitry Andric   //
1980bdd1243dSDimitry Andric   // Proof of the claim above:
1981bdd1243dSDimitry Andric   // Let P be a word that matches s0 and s1. For that to happen, all known
1982bdd1243dSDimitry Andric   // bits in P must match s0 and s1 exactly.
1983bdd1243dSDimitry Andric   // Assume there is Q that matches s1. Note that since P and Q came from
1984bdd1243dSDimitry Andric   // the same shuffle mask, the positions of unknown bits in P and Q match
1985bdd1243dSDimitry Andric   // exactly, which makes the indices of known bits be exactly the same
1986bdd1243dSDimitry Andric   // between P and Q. Since P matches s0 and s1, the known bits of P much
1987bdd1243dSDimitry Andric   // match both s0 and s1. Also, since Q matches s1, the known bits in Q
1988bdd1243dSDimitry Andric   // are exactly the same as in s1, which means that they are exactly the
1989bdd1243dSDimitry Andric   // same as in P. This implies that P == Q.
1990bdd1243dSDimitry Andric 
1991bdd1243dSDimitry Andric   // There can be a situation where there are more entries with the same
1992bdd1243dSDimitry Andric   // bits set than there are set bits (e.g. value 9 occuring more than 2
1993bdd1243dSDimitry Andric   // times). In such cases it will be impossible to complete this to a
1994bdd1243dSDimitry Andric   // perfect shuffle.
1995bdd1243dSDimitry Andric   SmallVector<uint32_t, 8> Sorted(Worklist);
1996bdd1243dSDimitry Andric   llvm::sort(Sorted.begin(), Sorted.end());
1997bdd1243dSDimitry Andric 
1998bdd1243dSDimitry Andric   for (unsigned I = 0, E = Sorted.size(); I != E;) {
1999bdd1243dSDimitry Andric     unsigned P = Sorted[I], Count = 1;
2000bdd1243dSDimitry Andric     while (++I != E && P == Sorted[I])
2001bdd1243dSDimitry Andric       ++Count;
2002bdd1243dSDimitry Andric     if ((unsigned)llvm::popcount(P) < Count) {
2003bdd1243dSDimitry Andric       // Reset all occurences of P, if there are more occurrences of P
2004bdd1243dSDimitry Andric       // than there are bits in P.
20055f757f3fSDimitry Andric       for (unsigned &Q : Worklist) {
2006bdd1243dSDimitry Andric         if (Q == P)
2007bdd1243dSDimitry Andric           Q = 0;
20085f757f3fSDimitry Andric       }
2009bdd1243dSDimitry Andric     }
2010bdd1243dSDimitry Andric   }
2011bdd1243dSDimitry Andric 
2012bdd1243dSDimitry Andric   return Worklist;
2013bdd1243dSDimitry Andric }
2014bdd1243dSDimitry Andric 
2015bdd1243dSDimitry Andric SmallVector<uint32_t, 8>
2016bdd1243dSDimitry Andric HvxSelector::completeToPerfect(ArrayRef<uint32_t> Completions, unsigned Width) {
2017bdd1243dSDimitry Andric   // Pick a completion if there are multiple possibilities. For now just
2018bdd1243dSDimitry Andric   // select any valid completion.
2019bdd1243dSDimitry Andric   SmallVector<uint32_t, 8> Comps(Completions);
2020bdd1243dSDimitry Andric 
2021bdd1243dSDimitry Andric   for (unsigned I = 0; I != Width; ++I) {
2022bdd1243dSDimitry Andric     uint32_t P = Comps[I];
2023bdd1243dSDimitry Andric     assert(P != 0);
2024bdd1243dSDimitry Andric     if (isPowerOf2_32(P))
2025bdd1243dSDimitry Andric       continue;
2026bdd1243dSDimitry Andric     // T = least significant bit of P.
2027bdd1243dSDimitry Andric     uint32_t T = P ^ ((P - 1) & P);
2028bdd1243dSDimitry Andric     // Clear T in all remaining words matching P.
2029bdd1243dSDimitry Andric     for (unsigned J = I + 1; J != Width; ++J) {
2030bdd1243dSDimitry Andric       if (Comps[J] == P)
2031bdd1243dSDimitry Andric         Comps[J] ^= T;
2032bdd1243dSDimitry Andric     }
2033bdd1243dSDimitry Andric     Comps[I] = T;
2034bdd1243dSDimitry Andric   }
2035bdd1243dSDimitry Andric 
2036bdd1243dSDimitry Andric #ifndef NDEBUG
2037bdd1243dSDimitry Andric   // Check that we have generated a valid completion.
2038bdd1243dSDimitry Andric   uint32_t OrAll = 0;
2039cb14a3feSDimitry Andric   for (uint32_t C : Comps) {
2040bdd1243dSDimitry Andric     assert(isPowerOf2_32(C));
2041bdd1243dSDimitry Andric     OrAll |= C;
2042bdd1243dSDimitry Andric   }
2043bdd1243dSDimitry Andric   assert(OrAll == (1u << Width) -1);
2044bdd1243dSDimitry Andric #endif
2045bdd1243dSDimitry Andric 
2046bdd1243dSDimitry Andric   return Comps;
2047bdd1243dSDimitry Andric }
2048bdd1243dSDimitry Andric 
2049bdd1243dSDimitry Andric std::optional<int> HvxSelector::rotationDistance(ShuffleMask SM,
2050bdd1243dSDimitry Andric                                                  unsigned WrapAt) {
2051bdd1243dSDimitry Andric   std::optional<int> Dist;
2052bdd1243dSDimitry Andric   for (int I = 0, E = SM.Mask.size(); I != E; ++I) {
2053bdd1243dSDimitry Andric     int M = SM.Mask[I];
2054bdd1243dSDimitry Andric     if (M < 0)
2055bdd1243dSDimitry Andric       continue;
2056bdd1243dSDimitry Andric     if (Dist) {
2057bdd1243dSDimitry Andric       if ((I + *Dist) % static_cast<int>(WrapAt) != M)
2058bdd1243dSDimitry Andric         return std::nullopt;
2059bdd1243dSDimitry Andric     } else {
2060bdd1243dSDimitry Andric       // Integer a%b operator assumes rounding towards zero by /, so it
2061bdd1243dSDimitry Andric       // "misbehaves" when a crosses 0 (the remainder also changes sign).
2062bdd1243dSDimitry Andric       // Add WrapAt in an attempt to keep I+Dist non-negative.
2063bdd1243dSDimitry Andric       Dist = M - I;
2064bdd1243dSDimitry Andric       if (Dist < 0)
2065bdd1243dSDimitry Andric         Dist = *Dist + WrapAt;
2066bdd1243dSDimitry Andric     }
2067bdd1243dSDimitry Andric   }
2068bdd1243dSDimitry Andric   return Dist;
2069bdd1243dSDimitry Andric }
2070bdd1243dSDimitry Andric 
20710b57cec5SDimitry Andric OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
20720b57cec5SDimitry Andric                                ResultStack &Results) {
20730b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", { dbgs() << __func__ << '\n'; });
20740b57cec5SDimitry Andric   if (!Va.isValid() || !Vb.isValid())
20750b57cec5SDimitry Andric     return OpRef::fail();
20760b57cec5SDimitry Andric 
20770b57cec5SDimitry Andric   // Contracting shuffles, i.e. instructions that always discard some bytes
20780b57cec5SDimitry Andric   // from the operand vectors.
20790b57cec5SDimitry Andric   //
2080bdd1243dSDimitry Andric   // Funnel shifts
20810b57cec5SDimitry Andric   // V6_vshuff{e,o}b
2082bdd1243dSDimitry Andric   // V6_vshuf{e,o}h
20830b57cec5SDimitry Andric   // V6_vdealb4w
20840b57cec5SDimitry Andric   // V6_vpack{e,o}{b,h}
20850b57cec5SDimitry Andric 
20860b57cec5SDimitry Andric   int VecLen = SM.Mask.size();
20870b57cec5SDimitry Andric 
2088bdd1243dSDimitry Andric   // First, check for funnel shifts.
2089bdd1243dSDimitry Andric   if (auto Dist = rotationDistance(SM, 2 * VecLen)) {
2090bdd1243dSDimitry Andric     OpRef Funnel = funnels(Va, Vb, *Dist, Results);
2091bdd1243dSDimitry Andric     if (Funnel.isValid())
2092bdd1243dSDimitry Andric       return Funnel;
2093bdd1243dSDimitry Andric   }
20940b57cec5SDimitry Andric 
2095bdd1243dSDimitry Andric   MVT SingleTy = getSingleVT(MVT::i8);
2096bdd1243dSDimitry Andric   MVT PairTy = getPairVT(MVT::i8);
20970b57cec5SDimitry Andric 
2098bdd1243dSDimitry Andric   auto same = [](ArrayRef<int> Mask1, ArrayRef<int> Mask2) -> bool {
2099bdd1243dSDimitry Andric     return Mask1 == Mask2;
2100bdd1243dSDimitry Andric   };
21010b57cec5SDimitry Andric 
2102bdd1243dSDimitry Andric   using PackConfig = std::pair<unsigned, bool>;
2103bdd1243dSDimitry Andric   PackConfig Packs[] = {
2104bdd1243dSDimitry Andric       {1, false}, // byte, even
2105bdd1243dSDimitry Andric       {1, true},  // byte, odd
2106bdd1243dSDimitry Andric       {2, false}, // half, even
2107bdd1243dSDimitry Andric       {2, true},  // half, odd
2108bdd1243dSDimitry Andric   };
2109bdd1243dSDimitry Andric 
2110bdd1243dSDimitry Andric   { // Check vpack
2111bdd1243dSDimitry Andric     unsigned Opcodes[] = {
2112bdd1243dSDimitry Andric         Hexagon::V6_vpackeb,
2113bdd1243dSDimitry Andric         Hexagon::V6_vpackob,
2114bdd1243dSDimitry Andric         Hexagon::V6_vpackeh,
2115bdd1243dSDimitry Andric         Hexagon::V6_vpackoh,
2116bdd1243dSDimitry Andric     };
2117bdd1243dSDimitry Andric     for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2118bdd1243dSDimitry Andric       auto [Size, Odd] = Packs[i];
2119bdd1243dSDimitry Andric       if (same(SM.Mask, shuffles::mask(shuffles::vpack, HwLen, Size, Odd))) {
2120bdd1243dSDimitry Andric         Results.push(Opcodes[i], SingleTy, {Vb, Va});
2121bdd1243dSDimitry Andric         return OpRef::res(Results.top());
2122bdd1243dSDimitry Andric       }
2123bdd1243dSDimitry Andric     }
2124bdd1243dSDimitry Andric   }
2125bdd1243dSDimitry Andric 
2126bdd1243dSDimitry Andric   { // Check vshuff
2127bdd1243dSDimitry Andric     unsigned Opcodes[] = {
2128bdd1243dSDimitry Andric         Hexagon::V6_vshuffeb,
2129bdd1243dSDimitry Andric         Hexagon::V6_vshuffob,
2130bdd1243dSDimitry Andric         Hexagon::V6_vshufeh,
2131bdd1243dSDimitry Andric         Hexagon::V6_vshufoh,
2132bdd1243dSDimitry Andric     };
2133bdd1243dSDimitry Andric     for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2134bdd1243dSDimitry Andric       auto [Size, Odd] = Packs[i];
2135bdd1243dSDimitry Andric       if (same(SM.Mask, shuffles::mask(shuffles::vshuff, HwLen, Size, Odd))) {
2136bdd1243dSDimitry Andric         Results.push(Opcodes[i], SingleTy, {Vb, Va});
2137bdd1243dSDimitry Andric         return OpRef::res(Results.top());
2138bdd1243dSDimitry Andric       }
2139bdd1243dSDimitry Andric     }
2140bdd1243dSDimitry Andric   }
2141bdd1243dSDimitry Andric 
2142bdd1243dSDimitry Andric   { // Check vdeal
2143bdd1243dSDimitry Andric     // There is no "V6_vdealeb", etc, but the supposed behavior of vdealeb
2144bdd1243dSDimitry Andric     // is equivalent to "(V6_vpackeb (V6_vdealvdd Vu, Vv, -2))". Other such
2145bdd1243dSDimitry Andric     // variants of "deal" can be done similarly.
2146bdd1243dSDimitry Andric     unsigned Opcodes[] = {
2147bdd1243dSDimitry Andric         Hexagon::V6_vpackeb,
2148bdd1243dSDimitry Andric         Hexagon::V6_vpackob,
2149bdd1243dSDimitry Andric         Hexagon::V6_vpackeh,
2150bdd1243dSDimitry Andric         Hexagon::V6_vpackoh,
2151bdd1243dSDimitry Andric     };
2152bdd1243dSDimitry Andric     const SDLoc &dl(Results.InpNode);
2153bdd1243dSDimitry Andric 
2154bdd1243dSDimitry Andric     for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2155bdd1243dSDimitry Andric       auto [Size, Odd] = Packs[i];
2156bdd1243dSDimitry Andric       if (same(SM.Mask, shuffles::mask(shuffles::vdeal, HwLen, Size, Odd))) {
2157bdd1243dSDimitry Andric         Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(-2 * Size, dl)});
2158bdd1243dSDimitry Andric         Results.push(Hexagon::V6_vdealvdd, PairTy, {Vb, Va, OpRef::res(-1)});
2159bdd1243dSDimitry Andric         auto vdeal = OpRef::res(Results.top());
2160bdd1243dSDimitry Andric         Results.push(Opcodes[i], SingleTy,
2161bdd1243dSDimitry Andric                      {OpRef::hi(vdeal), OpRef::lo(vdeal)});
2162bdd1243dSDimitry Andric         return OpRef::res(Results.top());
2163bdd1243dSDimitry Andric       }
2164bdd1243dSDimitry Andric     }
2165bdd1243dSDimitry Andric   }
2166bdd1243dSDimitry Andric 
2167bdd1243dSDimitry Andric   if (same(SM.Mask, shuffles::mask(shuffles::vdealb4w, HwLen))) {
2168bdd1243dSDimitry Andric     Results.push(Hexagon::V6_vdealb4w, SingleTy, {Vb, Va});
21690b57cec5SDimitry Andric     return OpRef::res(Results.top());
21700b57cec5SDimitry Andric   }
21710b57cec5SDimitry Andric 
21720b57cec5SDimitry Andric   return OpRef::fail();
21730b57cec5SDimitry Andric }
21740b57cec5SDimitry Andric 
21750b57cec5SDimitry Andric OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
21760b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
21770b57cec5SDimitry Andric   // Expanding shuffles (using all elements and inserting into larger vector):
21780b57cec5SDimitry Andric   //
21790b57cec5SDimitry Andric   // V6_vunpacku{b,h} [*]
21800b57cec5SDimitry Andric   //
21810b57cec5SDimitry Andric   // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
21820b57cec5SDimitry Andric   //
21830b57cec5SDimitry Andric   // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
21840b57cec5SDimitry Andric   // they are not shuffles.
21850b57cec5SDimitry Andric   //
21860b57cec5SDimitry Andric   // The argument is a single vector.
21870b57cec5SDimitry Andric 
21880b57cec5SDimitry Andric   int VecLen = SM.Mask.size();
21890b57cec5SDimitry Andric   assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type");
21900b57cec5SDimitry Andric 
21910b57cec5SDimitry Andric   std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
21920b57cec5SDimitry Andric 
21930b57cec5SDimitry Andric   // The patterns for the unpacks, in terms of the starting offsets of the
21940b57cec5SDimitry Andric   // consecutive strips (L = length of the strip, N = VecLen):
21950b57cec5SDimitry Andric   //
21960b57cec5SDimitry Andric   // vunpacku:  0, -1, L, -1, 2L, -1 ...
21970b57cec5SDimitry Andric 
21980b57cec5SDimitry Andric   if (Strip.first != 0)
21990b57cec5SDimitry Andric     return OpRef::fail();
22000b57cec5SDimitry Andric 
22010b57cec5SDimitry Andric   // The vunpackus only handle byte and half-word.
22020b57cec5SDimitry Andric   if (Strip.second != 1 && Strip.second != 2)
22030b57cec5SDimitry Andric     return OpRef::fail();
22040b57cec5SDimitry Andric 
22050b57cec5SDimitry Andric   int N = VecLen;
22060b57cec5SDimitry Andric   int L = Strip.second;
22070b57cec5SDimitry Andric 
22080b57cec5SDimitry Andric   // First, check the non-ignored strips.
2209fe6060f1SDimitry Andric   for (int I = 2*L; I < N; I += 2*L) {
22100b57cec5SDimitry Andric     auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
22110b57cec5SDimitry Andric     if (S.second != unsigned(L))
22120b57cec5SDimitry Andric       return OpRef::fail();
22130b57cec5SDimitry Andric     if (2*S.first != I)
22140b57cec5SDimitry Andric       return OpRef::fail();
22150b57cec5SDimitry Andric   }
22160b57cec5SDimitry Andric   // Check the -1s.
2217fe6060f1SDimitry Andric   for (int I = L; I < N; I += 2*L) {
22180b57cec5SDimitry Andric     auto S = findStrip(SM.Mask.drop_front(I), 0, N-I);
22190b57cec5SDimitry Andric     if (S.first != -1 || S.second != unsigned(L))
22200b57cec5SDimitry Andric       return OpRef::fail();
22210b57cec5SDimitry Andric   }
22220b57cec5SDimitry Andric 
22230b57cec5SDimitry Andric   unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
22240b57cec5SDimitry Andric                                    : Hexagon::V6_vunpackuh;
22250b57cec5SDimitry Andric   Results.push(Opc, getPairVT(MVT::i8), {Va});
22260b57cec5SDimitry Andric   return OpRef::res(Results.top());
22270b57cec5SDimitry Andric }
22280b57cec5SDimitry Andric 
22290b57cec5SDimitry Andric OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
22300b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", { dbgs() << __func__ << '\n'; });
22310b57cec5SDimitry Andric   // V6_vdeal{b,h}
22320b57cec5SDimitry Andric   // V6_vshuff{b,h}
22330b57cec5SDimitry Andric 
22340b57cec5SDimitry Andric   // V6_vshufoe{b,h}  those are quivalent to vshuffvdd(..,{1,2})
22350b57cec5SDimitry Andric   // V6_vshuffvdd (V6_vshuff)
22360b57cec5SDimitry Andric   // V6_dealvdd (V6_vdeal)
22370b57cec5SDimitry Andric 
22380b57cec5SDimitry Andric   int VecLen = SM.Mask.size();
22390b57cec5SDimitry Andric   assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8);
22400b57cec5SDimitry Andric   unsigned LogLen = Log2_32(VecLen);
22410b57cec5SDimitry Andric   unsigned HwLog = Log2_32(HwLen);
22420b57cec5SDimitry Andric   // The result length must be the same as the length of a single vector,
22430b57cec5SDimitry Andric   // or a vector pair.
22440b57cec5SDimitry Andric   assert(LogLen == HwLog || LogLen == HwLog + 1);
2245e8d8bef9SDimitry Andric   bool HavePairs = LogLen == HwLog + 1;
22460b57cec5SDimitry Andric 
22470b57cec5SDimitry Andric   SmallVector<unsigned, 8> Perm(LogLen);
22480b57cec5SDimitry Andric 
22490b57cec5SDimitry Andric   // Check if this could be a perfect shuffle, or a combination of perfect
22500b57cec5SDimitry Andric   // shuffles.
22510b57cec5SDimitry Andric   //
22520b57cec5SDimitry Andric   // Consider this permutation (using hex digits to make the ASCII diagrams
22530b57cec5SDimitry Andric   // easier to read):
22540b57cec5SDimitry Andric   //   { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }.
22550b57cec5SDimitry Andric   // This is a "deal" operation: divide the input into two halves, and
22560b57cec5SDimitry Andric   // create the output by picking elements by alternating between these two
22570b57cec5SDimitry Andric   // halves:
22580b57cec5SDimitry Andric   //   0 1 2 3 4 5 6 7    -->    0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F  [*]
22590b57cec5SDimitry Andric   //   8 9 A B C D E F
22600b57cec5SDimitry Andric   //
22610b57cec5SDimitry Andric   // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides
22620b57cec5SDimitry Andric   // a somwehat different mechanism that could be used to perform shuffle/
22630b57cec5SDimitry Andric   // deal operations: a 2x2 transpose.
22640b57cec5SDimitry Andric   // Consider the halves of inputs again, they can be interpreted as a 2x8
22650b57cec5SDimitry Andric   // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated
22660b57cec5SDimitry Andric   // together. Now, when considering 2 elements at a time, it will be a 2x4
22670b57cec5SDimitry Andric   // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices:
22680b57cec5SDimitry Andric   //   01 23  45 67
22690b57cec5SDimitry Andric   //   89 AB  CD EF
22700b57cec5SDimitry Andric   // With groups of 4, this will become a single 2x2 matrix, and so on.
22710b57cec5SDimitry Andric   //
22720b57cec5SDimitry Andric   // The 2x2 transpose instruction works by transposing each of the 2x2
22730b57cec5SDimitry Andric   // matrices (or "sub-matrices"), given a specific group size. For example,
22740b57cec5SDimitry Andric   // if the group size is 1 (i.e. each element is its own group), there
22750b57cec5SDimitry Andric   // will be four transposes of the four 2x2 matrices that form the 2x8.
22760b57cec5SDimitry Andric   // For example, with the inputs as above, the result will be:
22770b57cec5SDimitry Andric   //   0 8  2 A  4 C  6 E
22780b57cec5SDimitry Andric   //   1 9  3 B  5 D  7 F
22790b57cec5SDimitry Andric   // Now, this result can be tranposed again, but with the group size of 2:
22800b57cec5SDimitry Andric   //   08 19  4C 5D
22810b57cec5SDimitry Andric   //   2A 3B  6E 7F
22820b57cec5SDimitry Andric   // If we then transpose that result, but with the group size of 4, we get:
22830b57cec5SDimitry Andric   //   0819 2A3B
22840b57cec5SDimitry Andric   //   4C5D 6E7F
22850b57cec5SDimitry Andric   // If we concatenate these two rows, it will be
22860b57cec5SDimitry Andric   //   0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F
22870b57cec5SDimitry Andric   // which is the same as the "deal" [*] above.
22880b57cec5SDimitry Andric   //
22890b57cec5SDimitry Andric   // In general, a "deal" of individual elements is a series of 2x2 transposes,
22900b57cec5SDimitry Andric   // with changing group size. HVX has two instructions:
22910b57cec5SDimitry Andric   //   Vdd = V6_vdealvdd Vu, Vv, Rt
22920b57cec5SDimitry Andric   //   Vdd = V6_shufvdd  Vu, Vv, Rt
22930b57cec5SDimitry Andric   // that perform exactly that. The register Rt controls which transposes are
22940b57cec5SDimitry Andric   // going to happen: a bit at position n (counting from 0) indicates that a
22950b57cec5SDimitry Andric   // transpose with a group size of 2^n will take place. If multiple bits are
22960b57cec5SDimitry Andric   // set, multiple transposes will happen: vdealvdd will perform them starting
22970b57cec5SDimitry Andric   // with the largest group size, vshuffvdd will do them in the reverse order.
22980b57cec5SDimitry Andric   //
22990b57cec5SDimitry Andric   // The main observation is that each 2x2 transpose corresponds to swapping
23000b57cec5SDimitry Andric   // columns of bits in the binary representation of the values.
23010b57cec5SDimitry Andric   //
23020b57cec5SDimitry Andric   // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits
23030b57cec5SDimitry Andric   // in a given column. The * denote the columns that will be swapped.
23040b57cec5SDimitry Andric   // The transpose with the group size 2^n corresponds to swapping columns
23050b57cec5SDimitry Andric   // 3 (the highest log) and log2(n):
23060b57cec5SDimitry Andric   //
23070b57cec5SDimitry Andric   //     3 2 1 0         0 2 1 3         0 2 3 1
23080b57cec5SDimitry Andric   //     *     *             * *           * *
23090b57cec5SDimitry Andric   //  0  0 0 0 0      0  0 0 0 0      0  0 0 0 0      0  0 0 0 0
23100b57cec5SDimitry Andric   //  1  0 0 0 1      8  1 0 0 0      8  1 0 0 0      8  1 0 0 0
23110b57cec5SDimitry Andric   //  2  0 0 1 0      2  0 0 1 0      1  0 0 0 1      1  0 0 0 1
23120b57cec5SDimitry Andric   //  3  0 0 1 1      A  1 0 1 0      9  1 0 0 1      9  1 0 0 1
23130b57cec5SDimitry Andric   //  4  0 1 0 0      4  0 1 0 0      4  0 1 0 0      2  0 0 1 0
23140b57cec5SDimitry Andric   //  5  0 1 0 1      C  1 1 0 0      C  1 1 0 0      A  1 0 1 0
23150b57cec5SDimitry Andric   //  6  0 1 1 0      6  0 1 1 0      5  0 1 0 1      3  0 0 1 1
23160b57cec5SDimitry Andric   //  7  0 1 1 1      E  1 1 1 0      D  1 1 0 1      B  1 0 1 1
23170b57cec5SDimitry Andric   //  8  1 0 0 0      1  0 0 0 1      2  0 0 1 0      4  0 1 0 0
23180b57cec5SDimitry Andric   //  9  1 0 0 1      9  1 0 0 1      A  1 0 1 0      C  1 1 0 0
23190b57cec5SDimitry Andric   //  A  1 0 1 0      3  0 0 1 1      3  0 0 1 1      5  0 1 0 1
23200b57cec5SDimitry Andric   //  B  1 0 1 1      B  1 0 1 1      B  1 0 1 1      D  1 1 0 1
23210b57cec5SDimitry Andric   //  C  1 1 0 0      5  0 1 0 1      6  0 1 1 0      6  0 1 1 0
23220b57cec5SDimitry Andric   //  D  1 1 0 1      D  1 1 0 1      E  1 1 1 0      E  1 1 1 0
23230b57cec5SDimitry Andric   //  E  1 1 1 0      7  0 1 1 1      7  0 1 1 1      7  0 1 1 1
23240b57cec5SDimitry Andric   //  F  1 1 1 1      F  1 1 1 1      F  1 1 1 1      F  1 1 1 1
23250b57cec5SDimitry Andric 
2326bdd1243dSDimitry Andric   // There is one special case that is not a perfect shuffle, but can be
2327bdd1243dSDimitry Andric   // turned into one easily: when the shuffle operates on a vector pair,
2328bdd1243dSDimitry Andric   // but the two vectors in the pair are swapped. The code that identifies
2329bdd1243dSDimitry Andric   // perfect shuffles will reject it, unless the order is reversed.
2330bdd1243dSDimitry Andric   shuffles::MaskT MaskStorage(SM.Mask);
2331e8d8bef9SDimitry Andric   bool InvertedPair = false;
2332e8d8bef9SDimitry Andric   if (HavePairs && SM.Mask[0] >= int(HwLen)) {
2333e8d8bef9SDimitry Andric     for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
2334e8d8bef9SDimitry Andric       int M = SM.Mask[i];
2335e8d8bef9SDimitry Andric       MaskStorage[i] = M >= int(HwLen) ? M - HwLen : M + HwLen;
2336e8d8bef9SDimitry Andric     }
2337e8d8bef9SDimitry Andric     InvertedPair = true;
2338bdd1243dSDimitry Andric     SM = ShuffleMask(MaskStorage);
2339e8d8bef9SDimitry Andric   }
2340e8d8bef9SDimitry Andric 
2341bdd1243dSDimitry Andric   auto Comps = getPerfectCompletions(SM, LogLen);
23425f757f3fSDimitry Andric   if (llvm::is_contained(Comps, 0))
2343bdd1243dSDimitry Andric     return OpRef::fail();
23440b57cec5SDimitry Andric 
2345bdd1243dSDimitry Andric   auto Pick = completeToPerfect(Comps, LogLen);
2346bdd1243dSDimitry Andric   for (unsigned I = 0; I != LogLen; ++I)
2347bdd1243dSDimitry Andric     Perm[I] = Log2_32(Pick[I]);
23480b57cec5SDimitry Andric 
23490b57cec5SDimitry Andric   // Once we have Perm, represent it as cycles. Denote the maximum log2
23500b57cec5SDimitry Andric   // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
23510b57cec5SDimitry Andric   // written as (M a1 a2 a3 ... an). That cycle can be broken up into
23520b57cec5SDimitry Andric   // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition
23530b57cec5SDimitry Andric   // order being from left to right. Any (contiguous) segment where the
23540b57cec5SDimitry Andric   // values ai, ai+1...aj are either all increasing or all decreasing,
23550b57cec5SDimitry Andric   // can be implemented via a single vshuffvdd/vdealvdd respectively.
23560b57cec5SDimitry Andric   //
23570b57cec5SDimitry Andric   // If there is a cycle (a1 a2 ... an) that does not involve M, it can
23580b57cec5SDimitry Andric   // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can
23590b57cec5SDimitry Andric   // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure
23600b57cec5SDimitry Andric   // can be used to generate a sequence of vshuffvdd/vdealvdd.
23610b57cec5SDimitry Andric   //
23620b57cec5SDimitry Andric   // Example:
23630b57cec5SDimitry Andric   // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written
23640b57cec5SDimitry Andric   // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply
23650b57cec5SDimitry Andric   //   (4 0 1)(4 0)(4 2 3)(4 2).
23660b57cec5SDimitry Andric   // It can then be expanded into swaps as
23670b57cec5SDimitry Andric   //   (4 0)(4 1)(4 0)(4 2)(4 3)(4 2),
23680b57cec5SDimitry Andric   // and broken up into "increasing" segments as
23690b57cec5SDimitry Andric   //   [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)].
23700b57cec5SDimitry Andric   // This is equivalent to
23710b57cec5SDimitry Andric   //   (4 0 1)(4 0 2 3)(4 2),
23720b57cec5SDimitry Andric   // which can be implemented as 3 vshufvdd instructions.
23730b57cec5SDimitry Andric 
23740b57cec5SDimitry Andric   using CycleType = SmallVector<unsigned, 8>;
23750b57cec5SDimitry Andric   std::set<CycleType> Cycles;
23760b57cec5SDimitry Andric   std::set<unsigned> All;
23770b57cec5SDimitry Andric 
23780b57cec5SDimitry Andric   for (unsigned I : Perm)
23790b57cec5SDimitry Andric     All.insert(I);
23800b57cec5SDimitry Andric 
23810b57cec5SDimitry Andric   // If the cycle contains LogLen-1, move it to the front of the cycle.
23820b57cec5SDimitry Andric   // Otherwise, return the cycle unchanged.
23830b57cec5SDimitry Andric   auto canonicalize = [LogLen](const CycleType &C) -> CycleType {
23840b57cec5SDimitry Andric     unsigned LogPos, N = C.size();
23850b57cec5SDimitry Andric     for (LogPos = 0; LogPos != N; ++LogPos)
23860b57cec5SDimitry Andric       if (C[LogPos] == LogLen - 1)
23870b57cec5SDimitry Andric         break;
23880b57cec5SDimitry Andric     if (LogPos == N)
23890b57cec5SDimitry Andric       return C;
23900b57cec5SDimitry Andric 
23910b57cec5SDimitry Andric     CycleType NewC(C.begin() + LogPos, C.end());
23920b57cec5SDimitry Andric     NewC.append(C.begin(), C.begin() + LogPos);
23930b57cec5SDimitry Andric     return NewC;
23940b57cec5SDimitry Andric   };
23950b57cec5SDimitry Andric 
23960b57cec5SDimitry Andric   auto pfs = [](const std::set<CycleType> &Cs, unsigned Len) {
23970b57cec5SDimitry Andric     // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6),
23980b57cec5SDimitry Andric     // for bytes zero is included, for halfwords is not.
23990b57cec5SDimitry Andric     if (Cs.size() != 1)
24000b57cec5SDimitry Andric       return 0u;
24010b57cec5SDimitry Andric     const CycleType &C = *Cs.begin();
24020b57cec5SDimitry Andric     if (C[0] != Len - 1)
24030b57cec5SDimitry Andric       return 0u;
24040b57cec5SDimitry Andric     int D = Len - C.size();
24050b57cec5SDimitry Andric     if (D != 0 && D != 1)
24060b57cec5SDimitry Andric       return 0u;
24070b57cec5SDimitry Andric 
24080b57cec5SDimitry Andric     bool IsDeal = true, IsShuff = true;
24090b57cec5SDimitry Andric     for (unsigned I = 1; I != Len - D; ++I) {
24100b57cec5SDimitry Andric       if (C[I] != Len - 1 - I)
24110b57cec5SDimitry Andric         IsDeal = false;
24120b57cec5SDimitry Andric       if (C[I] != I - (1 - D)) // I-1, I
24130b57cec5SDimitry Andric         IsShuff = false;
24140b57cec5SDimitry Andric     }
24150b57cec5SDimitry Andric     // At most one, IsDeal or IsShuff, can be non-zero.
24160b57cec5SDimitry Andric     assert(!(IsDeal || IsShuff) || IsDeal != IsShuff);
24170b57cec5SDimitry Andric     static unsigned Deals[] = {Hexagon::V6_vdealb, Hexagon::V6_vdealh};
24180b57cec5SDimitry Andric     static unsigned Shufs[] = {Hexagon::V6_vshuffb, Hexagon::V6_vshuffh};
24190b57cec5SDimitry Andric     return IsDeal ? Deals[D] : (IsShuff ? Shufs[D] : 0);
24200b57cec5SDimitry Andric   };
24210b57cec5SDimitry Andric 
24220b57cec5SDimitry Andric   while (!All.empty()) {
24230b57cec5SDimitry Andric     unsigned A = *All.begin();
24240b57cec5SDimitry Andric     All.erase(A);
24250b57cec5SDimitry Andric     CycleType C;
24260b57cec5SDimitry Andric     C.push_back(A);
24270b57cec5SDimitry Andric     for (unsigned B = Perm[A]; B != A; B = Perm[B]) {
24280b57cec5SDimitry Andric       C.push_back(B);
24290b57cec5SDimitry Andric       All.erase(B);
24300b57cec5SDimitry Andric     }
24310b57cec5SDimitry Andric     if (C.size() <= 1)
24320b57cec5SDimitry Andric       continue;
24330b57cec5SDimitry Andric     Cycles.insert(canonicalize(C));
24340b57cec5SDimitry Andric   }
24350b57cec5SDimitry Andric 
24360b57cec5SDimitry Andric   MVT SingleTy = getSingleVT(MVT::i8);
24370b57cec5SDimitry Andric   MVT PairTy = getPairVT(MVT::i8);
24380b57cec5SDimitry Andric 
24390b57cec5SDimitry Andric   // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}.
24400b57cec5SDimitry Andric   if (unsigned(VecLen) == HwLen) {
24410b57cec5SDimitry Andric     if (unsigned SingleOpc = pfs(Cycles, LogLen)) {
24420b57cec5SDimitry Andric       Results.push(SingleOpc, SingleTy, {Va});
24430b57cec5SDimitry Andric       return OpRef::res(Results.top());
24440b57cec5SDimitry Andric     }
24450b57cec5SDimitry Andric   }
24460b57cec5SDimitry Andric 
2447e8d8bef9SDimitry Andric   // From the cycles, construct the sequence of values that will
2448e8d8bef9SDimitry Andric   // then form the control values for vdealvdd/vshuffvdd, i.e.
2449e8d8bef9SDimitry Andric   // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5
2450e8d8bef9SDimitry Andric   // This essentially strips the M value from the cycles where
2451e8d8bef9SDimitry Andric   // it's present, and performs the insertion of M (then stripping)
2452e8d8bef9SDimitry Andric   // for cycles without M (as described in an earlier comment).
24530b57cec5SDimitry Andric   SmallVector<unsigned, 8> SwapElems;
2454e8d8bef9SDimitry Andric   // When the input is extended (i.e. single vector becomes a pair),
2455e8d8bef9SDimitry Andric   // this is done by using an "undef" vector as the second input.
2456e8d8bef9SDimitry Andric   // However, then we get
2457e8d8bef9SDimitry Andric   //   input 1: GOODBITS
2458e8d8bef9SDimitry Andric   //   input 2: ........
2459e8d8bef9SDimitry Andric   // but we need
2460e8d8bef9SDimitry Andric   //   input 1: ....BITS
2461e8d8bef9SDimitry Andric   //   input 2: ....GOOD
2462e8d8bef9SDimitry Andric   // Then at the end, this needs to be undone. To accomplish this,
2463e8d8bef9SDimitry Andric   // artificially add "LogLen-1" at both ends of the sequence.
2464e8d8bef9SDimitry Andric   if (!HavePairs)
24650b57cec5SDimitry Andric     SwapElems.push_back(LogLen - 1);
24660b57cec5SDimitry Andric   for (const CycleType &C : Cycles) {
2467e8d8bef9SDimitry Andric     // Do the transformation: (a1..an) -> (M a1..an)(M a1).
24680b57cec5SDimitry Andric     unsigned First = (C[0] == LogLen - 1) ? 1 : 0;
24690b57cec5SDimitry Andric     SwapElems.append(C.begin() + First, C.end());
24700b57cec5SDimitry Andric     if (First == 0)
24710b57cec5SDimitry Andric       SwapElems.push_back(C[0]);
24720b57cec5SDimitry Andric   }
2473e8d8bef9SDimitry Andric   if (!HavePairs)
2474e8d8bef9SDimitry Andric     SwapElems.push_back(LogLen - 1);
24750b57cec5SDimitry Andric 
24760b57cec5SDimitry Andric   const SDLoc &dl(Results.InpNode);
2477bdd1243dSDimitry Andric   OpRef Arg = HavePairs ? Va : concats(Va, OpRef::undef(SingleTy), Results);
2478e8d8bef9SDimitry Andric   if (InvertedPair)
2479fe6060f1SDimitry Andric     Arg = concats(OpRef::hi(Arg), OpRef::lo(Arg), Results);
24800b57cec5SDimitry Andric 
24810b57cec5SDimitry Andric   for (unsigned I = 0, E = SwapElems.size(); I != E;) {
24820b57cec5SDimitry Andric     bool IsInc = I == E - 1 || SwapElems[I] < SwapElems[I + 1];
24830b57cec5SDimitry Andric     unsigned S = (1u << SwapElems[I]);
24840b57cec5SDimitry Andric     if (I < E - 1) {
24850b57cec5SDimitry Andric       while (++I < E - 1 && IsInc == (SwapElems[I] < SwapElems[I + 1]))
24860b57cec5SDimitry Andric         S |= 1u << SwapElems[I];
24870b57cec5SDimitry Andric       // The above loop will not add a bit for the final SwapElems[I+1],
24880b57cec5SDimitry Andric       // so add it here.
24890b57cec5SDimitry Andric       S |= 1u << SwapElems[I];
24900b57cec5SDimitry Andric     }
24910b57cec5SDimitry Andric     ++I;
24920b57cec5SDimitry Andric 
24930b57cec5SDimitry Andric     NodeTemplate Res;
2494fe6060f1SDimitry Andric     Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(S, dl)});
24950b57cec5SDimitry Andric     Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
24960b57cec5SDimitry Andric     Res.Ty = PairTy;
24970b57cec5SDimitry Andric     Res.Ops = {OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1)};
24980b57cec5SDimitry Andric     Results.push(Res);
24990b57cec5SDimitry Andric     Arg = OpRef::res(Results.top());
25000b57cec5SDimitry Andric   }
25010b57cec5SDimitry Andric 
2502e8d8bef9SDimitry Andric   return HavePairs ? Arg : OpRef::lo(Arg);
25030b57cec5SDimitry Andric }
25040b57cec5SDimitry Andric 
25050b57cec5SDimitry Andric OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) {
25060b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
25070b57cec5SDimitry Andric   // Butterfly shuffles.
25080b57cec5SDimitry Andric   //
25090b57cec5SDimitry Andric   // V6_vdelta
25100b57cec5SDimitry Andric   // V6_vrdelta
25110b57cec5SDimitry Andric   // V6_vror
25120b57cec5SDimitry Andric 
25130b57cec5SDimitry Andric   // The assumption here is that all elements picked by Mask are in the
25140b57cec5SDimitry Andric   // first operand to the vector_shuffle. This assumption is enforced
25150b57cec5SDimitry Andric   // by the caller.
25160b57cec5SDimitry Andric 
25170b57cec5SDimitry Andric   MVT ResTy = getSingleVT(MVT::i8);
25180b57cec5SDimitry Andric   PermNetwork::Controls FC, RC;
25190b57cec5SDimitry Andric   const SDLoc &dl(Results.InpNode);
25200b57cec5SDimitry Andric   int VecLen = SM.Mask.size();
25210b57cec5SDimitry Andric 
25220b57cec5SDimitry Andric   for (int M : SM.Mask) {
25230b57cec5SDimitry Andric     if (M != -1 && M >= VecLen)
25240b57cec5SDimitry Andric       return OpRef::fail();
25250b57cec5SDimitry Andric   }
25260b57cec5SDimitry Andric 
25270b57cec5SDimitry Andric   // Try the deltas/benes for both single vectors and vector pairs.
25280b57cec5SDimitry Andric   ForwardDeltaNetwork FN(SM.Mask);
25290b57cec5SDimitry Andric   if (FN.run(FC)) {
25300b57cec5SDimitry Andric     SDValue Ctl = getVectorConstant(FC, dl);
25310b57cec5SDimitry Andric     Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
25320b57cec5SDimitry Andric     return OpRef::res(Results.top());
25330b57cec5SDimitry Andric   }
25340b57cec5SDimitry Andric 
25350b57cec5SDimitry Andric   // Try reverse delta.
25360b57cec5SDimitry Andric   ReverseDeltaNetwork RN(SM.Mask);
25370b57cec5SDimitry Andric   if (RN.run(RC)) {
25380b57cec5SDimitry Andric     SDValue Ctl = getVectorConstant(RC, dl);
25390b57cec5SDimitry Andric     Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
25400b57cec5SDimitry Andric     return OpRef::res(Results.top());
25410b57cec5SDimitry Andric   }
25420b57cec5SDimitry Andric 
25430b57cec5SDimitry Andric   // Do Benes.
25440b57cec5SDimitry Andric   BenesNetwork BN(SM.Mask);
25450b57cec5SDimitry Andric   if (BN.run(FC, RC)) {
25460b57cec5SDimitry Andric     SDValue CtlF = getVectorConstant(FC, dl);
25470b57cec5SDimitry Andric     SDValue CtlR = getVectorConstant(RC, dl);
25480b57cec5SDimitry Andric     Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
25490b57cec5SDimitry Andric     Results.push(Hexagon::V6_vrdelta, ResTy,
25500b57cec5SDimitry Andric                  {OpRef::res(-1), OpRef(CtlR)});
25510b57cec5SDimitry Andric     return OpRef::res(Results.top());
25520b57cec5SDimitry Andric   }
25530b57cec5SDimitry Andric 
25540b57cec5SDimitry Andric   return OpRef::fail();
25550b57cec5SDimitry Andric }
25560b57cec5SDimitry Andric 
2557fe6060f1SDimitry Andric SDValue HvxSelector::getConst32(int Val, const SDLoc &dl) {
2558fe6060f1SDimitry Andric   return DAG.getTargetConstant(Val, dl, MVT::i32);
2559fe6060f1SDimitry Andric }
2560fe6060f1SDimitry Andric 
25610b57cec5SDimitry Andric SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data,
25620b57cec5SDimitry Andric                                        const SDLoc &dl) {
25630b57cec5SDimitry Andric   SmallVector<SDValue, 128> Elems;
25640b57cec5SDimitry Andric   for (uint8_t C : Data)
25650b57cec5SDimitry Andric     Elems.push_back(DAG.getConstant(C, dl, MVT::i8));
25660b57cec5SDimitry Andric   MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size());
25670b57cec5SDimitry Andric   SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
25680b57cec5SDimitry Andric   SDValue LV = Lower.LowerOperation(BV, DAG);
25690b57cec5SDimitry Andric   DAG.RemoveDeadNode(BV.getNode());
2570e8d8bef9SDimitry Andric   return DAG.getNode(HexagonISD::ISEL, dl, VecTy, LV);
25710b57cec5SDimitry Andric }
25720b57cec5SDimitry Andric 
2573bdd1243dSDimitry Andric void HvxSelector::selectExtractSubvector(SDNode *N) {
2574bdd1243dSDimitry Andric   SDValue Inp = N->getOperand(0);
2575bdd1243dSDimitry Andric   MVT ResTy = N->getValueType(0).getSimpleVT();
2576*7a6dacacSDimitry Andric   unsigned Idx = N->getConstantOperandVal(1);
2577bdd1243dSDimitry Andric 
2578bdd1243dSDimitry Andric   [[maybe_unused]] MVT InpTy = Inp.getValueType().getSimpleVT();
2579bdd1243dSDimitry Andric   [[maybe_unused]] unsigned ResLen = ResTy.getVectorNumElements();
2580bdd1243dSDimitry Andric   assert(InpTy.getVectorElementType() == ResTy.getVectorElementType());
2581bdd1243dSDimitry Andric   assert(2 * ResLen == InpTy.getVectorNumElements());
2582bdd1243dSDimitry Andric   assert(Idx == 0 || Idx == ResLen);
2583bdd1243dSDimitry Andric 
2584bdd1243dSDimitry Andric   unsigned SubReg = Idx == 0 ? Hexagon::vsub_lo : Hexagon::vsub_hi;
2585bdd1243dSDimitry Andric   SDValue Ext = DAG.getTargetExtractSubreg(SubReg, SDLoc(N), ResTy, Inp);
2586bdd1243dSDimitry Andric 
2587bdd1243dSDimitry Andric   ISel.ReplaceNode(N, Ext.getNode());
2588bdd1243dSDimitry Andric }
2589bdd1243dSDimitry Andric 
25900b57cec5SDimitry Andric void HvxSelector::selectShuffle(SDNode *N) {
25910b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {
25920b57cec5SDimitry Andric     dbgs() << "Starting " << __func__ << " on node:\n";
25930b57cec5SDimitry Andric     N->dump(&DAG);
25940b57cec5SDimitry Andric   });
25950b57cec5SDimitry Andric   MVT ResTy = N->getValueType(0).getSimpleVT();
25960b57cec5SDimitry Andric   // Assume that vector shuffles operate on vectors of bytes.
25970b57cec5SDimitry Andric   assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8);
25980b57cec5SDimitry Andric 
25990b57cec5SDimitry Andric   auto *SN = cast<ShuffleVectorSDNode>(N);
26000b57cec5SDimitry Andric   std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end());
26010b57cec5SDimitry Andric   // This shouldn't really be necessary. Is it?
26020b57cec5SDimitry Andric   for (int &Idx : Mask)
26030b57cec5SDimitry Andric     if (Idx != -1 && Idx < 0)
26040b57cec5SDimitry Andric       Idx = -1;
26050b57cec5SDimitry Andric 
26060b57cec5SDimitry Andric   unsigned VecLen = Mask.size();
26070b57cec5SDimitry Andric   bool HavePairs = (2*HwLen == VecLen);
26080b57cec5SDimitry Andric   assert(ResTy.getSizeInBits() / 8 == VecLen);
26090b57cec5SDimitry Andric 
26100b57cec5SDimitry Andric   // Vd = vector_shuffle Va, Vb, Mask
26110b57cec5SDimitry Andric   //
26120b57cec5SDimitry Andric 
26130b57cec5SDimitry Andric   bool UseLeft = false, UseRight = false;
26140b57cec5SDimitry Andric   for (unsigned I = 0; I != VecLen; ++I) {
26150b57cec5SDimitry Andric     if (Mask[I] == -1)
26160b57cec5SDimitry Andric       continue;
26170b57cec5SDimitry Andric     unsigned Idx = Mask[I];
26180b57cec5SDimitry Andric     assert(Idx < 2*VecLen);
26190b57cec5SDimitry Andric     if (Idx < VecLen)
26200b57cec5SDimitry Andric       UseLeft = true;
26210b57cec5SDimitry Andric     else
26220b57cec5SDimitry Andric       UseRight = true;
26230b57cec5SDimitry Andric   }
26240b57cec5SDimitry Andric 
26250b57cec5SDimitry Andric   DEBUG_WITH_TYPE("isel", {
26260b57cec5SDimitry Andric     dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft="
26270b57cec5SDimitry Andric            << UseLeft << " UseRight=" << UseRight << " HavePairs="
26280b57cec5SDimitry Andric            << HavePairs << '\n';
26290b57cec5SDimitry Andric   });
26300b57cec5SDimitry Andric   // If the mask is all -1's, generate "undef".
26310b57cec5SDimitry Andric   if (!UseLeft && !UseRight) {
26320b57cec5SDimitry Andric     ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
26330b57cec5SDimitry Andric     return;
26340b57cec5SDimitry Andric   }
26350b57cec5SDimitry Andric 
26360b57cec5SDimitry Andric   SDValue Vec0 = N->getOperand(0);
26370b57cec5SDimitry Andric   SDValue Vec1 = N->getOperand(1);
2638bdd1243dSDimitry Andric   assert(Vec0.getValueType() == ResTy && Vec1.getValueType() == ResTy);
2639bdd1243dSDimitry Andric 
26400b57cec5SDimitry Andric   ResultStack Results(SN);
2641bdd1243dSDimitry Andric   OpRef Va = OpRef::undef(ResTy);
2642bdd1243dSDimitry Andric   OpRef Vb = OpRef::undef(ResTy);
2643bdd1243dSDimitry Andric 
2644bdd1243dSDimitry Andric   if (!Vec0.isUndef()) {
26450b57cec5SDimitry Andric     Results.push(TargetOpcode::COPY, ResTy, {Vec0});
2646bdd1243dSDimitry Andric     Va = OpRef::OpRef::res(Results.top());
2647bdd1243dSDimitry Andric   }
2648bdd1243dSDimitry Andric   if (!Vec1.isUndef()) {
26490b57cec5SDimitry Andric     Results.push(TargetOpcode::COPY, ResTy, {Vec1});
2650bdd1243dSDimitry Andric     Vb = OpRef::res(Results.top());
2651bdd1243dSDimitry Andric   }
26520b57cec5SDimitry Andric 
26530b57cec5SDimitry Andric   OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results)
26540b57cec5SDimitry Andric                          : shuffp2(ShuffleMask(Mask), Va, Vb, Results);
26550b57cec5SDimitry Andric 
26560b57cec5SDimitry Andric   bool Done = Res.isValid();
26570b57cec5SDimitry Andric   if (Done) {
26580b57cec5SDimitry Andric     // Make sure that Res is on the stack before materializing.
26590b57cec5SDimitry Andric     Results.push(TargetOpcode::COPY, ResTy, {Res});
26600b57cec5SDimitry Andric     materialize(Results);
26610b57cec5SDimitry Andric   } else {
26620b57cec5SDimitry Andric     Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N);
26630b57cec5SDimitry Andric   }
26640b57cec5SDimitry Andric 
26650b57cec5SDimitry Andric   if (!Done) {
26660b57cec5SDimitry Andric #ifndef NDEBUG
26670b57cec5SDimitry Andric     dbgs() << "Unhandled shuffle:\n";
26680b57cec5SDimitry Andric     SN->dumpr(&DAG);
26690b57cec5SDimitry Andric #endif
26700b57cec5SDimitry Andric     llvm_unreachable("Failed to select vector shuffle");
26710b57cec5SDimitry Andric   }
26720b57cec5SDimitry Andric }
26730b57cec5SDimitry Andric 
26740b57cec5SDimitry Andric void HvxSelector::selectRor(SDNode *N) {
26750b57cec5SDimitry Andric   // If this is a rotation by less than 8, use V6_valignbi.
26760b57cec5SDimitry Andric   MVT Ty = N->getValueType(0).getSimpleVT();
26770b57cec5SDimitry Andric   const SDLoc &dl(N);
26780b57cec5SDimitry Andric   SDValue VecV = N->getOperand(0);
26790b57cec5SDimitry Andric   SDValue RotV = N->getOperand(1);
26800b57cec5SDimitry Andric   SDNode *NewN = nullptr;
26810b57cec5SDimitry Andric 
26820b57cec5SDimitry Andric   if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) {
26830b57cec5SDimitry Andric     unsigned S = CN->getZExtValue() % HST.getVectorLength();
26840b57cec5SDimitry Andric     if (S == 0) {
26850b57cec5SDimitry Andric       NewN = VecV.getNode();
26860b57cec5SDimitry Andric     } else if (isUInt<3>(S)) {
26870b57cec5SDimitry Andric       NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty,
2688fe6060f1SDimitry Andric                                 {VecV, VecV, getConst32(S, dl)});
26890b57cec5SDimitry Andric     }
26900b57cec5SDimitry Andric   }
26910b57cec5SDimitry Andric 
26920b57cec5SDimitry Andric   if (!NewN)
26930b57cec5SDimitry Andric     NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
26940b57cec5SDimitry Andric 
26950b57cec5SDimitry Andric   ISel.ReplaceNode(N, NewN);
26960b57cec5SDimitry Andric }
26970b57cec5SDimitry Andric 
26980b57cec5SDimitry Andric void HvxSelector::selectVAlign(SDNode *N) {
26990b57cec5SDimitry Andric   SDValue Vv = N->getOperand(0);
27000b57cec5SDimitry Andric   SDValue Vu = N->getOperand(1);
27010b57cec5SDimitry Andric   SDValue Rt = N->getOperand(2);
27020b57cec5SDimitry Andric   SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N),
27030b57cec5SDimitry Andric                                     N->getValueType(0), {Vv, Vu, Rt});
27040b57cec5SDimitry Andric   ISel.ReplaceNode(N, NewN);
27050b57cec5SDimitry Andric   DAG.RemoveDeadNode(N);
27060b57cec5SDimitry Andric }
27070b57cec5SDimitry Andric 
2708bdd1243dSDimitry Andric void HexagonDAGToDAGISel::PreprocessHvxISelDAG() {
2709bdd1243dSDimitry Andric   auto getNodes = [this]() -> std::vector<SDNode *> {
2710bdd1243dSDimitry Andric     std::vector<SDNode *> T;
2711bdd1243dSDimitry Andric     T.reserve(CurDAG->allnodes_size());
2712bdd1243dSDimitry Andric     for (SDNode &N : CurDAG->allnodes())
2713bdd1243dSDimitry Andric       T.push_back(&N);
2714bdd1243dSDimitry Andric     return T;
2715bdd1243dSDimitry Andric   };
2716bdd1243dSDimitry Andric 
2717bdd1243dSDimitry Andric   ppHvxShuffleOfShuffle(getNodes());
2718bdd1243dSDimitry Andric }
2719bdd1243dSDimitry Andric 
2720bdd1243dSDimitry Andric template <> struct std::hash<SDValue> {
2721bdd1243dSDimitry Andric   std::size_t operator()(SDValue V) const {
2722bdd1243dSDimitry Andric     return std::hash<const void *>()(V.getNode()) +
2723bdd1243dSDimitry Andric            std::hash<unsigned>()(V.getResNo());
2724bdd1243dSDimitry Andric   };
2725bdd1243dSDimitry Andric };
2726bdd1243dSDimitry Andric 
2727bdd1243dSDimitry Andric void HexagonDAGToDAGISel::ppHvxShuffleOfShuffle(std::vector<SDNode *> &&Nodes) {
2728bdd1243dSDimitry Andric   // Motivating case:
2729bdd1243dSDimitry Andric   //   t10: v64i32 = ...
2730bdd1243dSDimitry Andric   //         t46: v128i8 = vector_shuffle<...> t44, t45
2731bdd1243dSDimitry Andric   //         t48: v128i8 = vector_shuffle<...> t44, t45
2732bdd1243dSDimitry Andric   //       t42: v128i8 = vector_shuffle<...> t46, t48
2733bdd1243dSDimitry Andric   //     t12: v32i32 = extract_subvector t10, Constant:i32<0>
2734bdd1243dSDimitry Andric   //   t44: v128i8 = bitcast t12
2735bdd1243dSDimitry Andric   //     t15: v32i32 = extract_subvector t10, Constant:i32<32>
2736bdd1243dSDimitry Andric   //   t45: v128i8 = bitcast t15
2737bdd1243dSDimitry Andric   SelectionDAG &DAG = *CurDAG;
2738bdd1243dSDimitry Andric   unsigned HwLen = HST->getVectorLength();
2739bdd1243dSDimitry Andric 
2740bdd1243dSDimitry Andric   struct SubVectorInfo {
2741bdd1243dSDimitry Andric     SubVectorInfo(SDValue S, unsigned H) : Src(S), HalfIdx(H) {}
2742bdd1243dSDimitry Andric     SDValue Src;
2743bdd1243dSDimitry Andric     unsigned HalfIdx;
2744bdd1243dSDimitry Andric   };
2745bdd1243dSDimitry Andric 
2746bdd1243dSDimitry Andric   using MapType = std::unordered_map<SDValue, unsigned>;
2747bdd1243dSDimitry Andric 
2748bdd1243dSDimitry Andric   auto getMaskElt = [&](unsigned Idx, ShuffleVectorSDNode *Shuff0,
2749bdd1243dSDimitry Andric                         ShuffleVectorSDNode *Shuff1,
2750bdd1243dSDimitry Andric                         const MapType &OpMap) -> int {
2751bdd1243dSDimitry Andric     // Treat Shuff0 and Shuff1 as operands to another vector shuffle, and
2752bdd1243dSDimitry Andric     // Idx as a (non-undef) element of the top level shuffle's mask, that
2753bdd1243dSDimitry Andric     // is, index into concat(Shuff0, Shuff1).
2754bdd1243dSDimitry Andric     // Assuming that Shuff0 and Shuff1 both operate on subvectors of the
2755bdd1243dSDimitry Andric     // same source vector (as described by OpMap), return the index of
2756bdd1243dSDimitry Andric     // that source vector corresponding to Idx.
2757bdd1243dSDimitry Andric     ShuffleVectorSDNode *OpShuff = Idx < HwLen ? Shuff0 : Shuff1;
2758bdd1243dSDimitry Andric     if (Idx >= HwLen)
2759bdd1243dSDimitry Andric       Idx -= HwLen;
2760bdd1243dSDimitry Andric 
2761bdd1243dSDimitry Andric     // Get the mask index that M points at in the corresponding operand.
2762bdd1243dSDimitry Andric     int MaybeN = OpShuff->getMaskElt(Idx);
2763bdd1243dSDimitry Andric     if (MaybeN < 0)
2764bdd1243dSDimitry Andric       return -1;
2765bdd1243dSDimitry Andric 
2766bdd1243dSDimitry Andric     auto N = static_cast<unsigned>(MaybeN);
2767bdd1243dSDimitry Andric     unsigned SrcBase = N < HwLen ? OpMap.at(OpShuff->getOperand(0))
2768bdd1243dSDimitry Andric                                  : OpMap.at(OpShuff->getOperand(1));
2769bdd1243dSDimitry Andric     if (N >= HwLen)
2770bdd1243dSDimitry Andric       N -= HwLen;
2771bdd1243dSDimitry Andric 
2772bdd1243dSDimitry Andric     return N + SrcBase;
2773bdd1243dSDimitry Andric   };
2774bdd1243dSDimitry Andric 
2775bdd1243dSDimitry Andric   auto fold3 = [&](SDValue TopShuff, SDValue Inp, MapType &&OpMap) -> SDValue {
2776bdd1243dSDimitry Andric     // Fold all 3 shuffles into a single one.
2777bdd1243dSDimitry Andric     auto *This = cast<ShuffleVectorSDNode>(TopShuff);
2778bdd1243dSDimitry Andric     auto *S0 = cast<ShuffleVectorSDNode>(TopShuff.getOperand(0));
2779bdd1243dSDimitry Andric     auto *S1 = cast<ShuffleVectorSDNode>(TopShuff.getOperand(1));
2780bdd1243dSDimitry Andric     ArrayRef<int> TopMask = This->getMask();
2781bdd1243dSDimitry Andric     // This should be guaranteed by type checks in the caller, and the fact
2782bdd1243dSDimitry Andric     // that all shuffles should have been promoted to operate on MVT::i8.
2783bdd1243dSDimitry Andric     assert(TopMask.size() == S0->getMask().size() &&
2784bdd1243dSDimitry Andric            TopMask.size() == S1->getMask().size());
2785bdd1243dSDimitry Andric     assert(TopMask.size() == HwLen);
2786bdd1243dSDimitry Andric 
2787bdd1243dSDimitry Andric     SmallVector<int, 256> FoldedMask(2 * HwLen);
2788bdd1243dSDimitry Andric     for (unsigned I = 0; I != HwLen; ++I) {
2789bdd1243dSDimitry Andric       int MaybeM = TopMask[I];
2790bdd1243dSDimitry Andric       if (MaybeM >= 0) {
2791bdd1243dSDimitry Andric         FoldedMask[I] =
2792bdd1243dSDimitry Andric             getMaskElt(static_cast<unsigned>(MaybeM), S0, S1, OpMap);
2793bdd1243dSDimitry Andric       } else {
2794bdd1243dSDimitry Andric         FoldedMask[I] = -1;
2795bdd1243dSDimitry Andric       }
2796bdd1243dSDimitry Andric     }
2797bdd1243dSDimitry Andric     // The second half of the result will be all-undef.
2798bdd1243dSDimitry Andric     std::fill(FoldedMask.begin() + HwLen, FoldedMask.end(), -1);
2799bdd1243dSDimitry Andric 
2800bdd1243dSDimitry Andric     // Return
2801bdd1243dSDimitry Andric     //   FoldedShuffle = (Shuffle Inp, undef, FoldedMask)
2802bdd1243dSDimitry Andric     //   (LoHalf FoldedShuffle)
2803bdd1243dSDimitry Andric     const SDLoc &dl(TopShuff);
2804bdd1243dSDimitry Andric     MVT SingleTy = MVT::getVectorVT(MVT::i8, HwLen);
2805bdd1243dSDimitry Andric     MVT PairTy = MVT::getVectorVT(MVT::i8, 2 * HwLen);
2806bdd1243dSDimitry Andric     SDValue FoldedShuff =
2807bdd1243dSDimitry Andric         DAG.getVectorShuffle(PairTy, dl, DAG.getBitcast(PairTy, Inp),
2808bdd1243dSDimitry Andric                              DAG.getUNDEF(PairTy), FoldedMask);
2809bdd1243dSDimitry Andric     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SingleTy, FoldedShuff,
2810bdd1243dSDimitry Andric                        DAG.getConstant(0, dl, MVT::i32));
2811bdd1243dSDimitry Andric   };
2812bdd1243dSDimitry Andric 
2813bdd1243dSDimitry Andric   auto getSourceInfo = [](SDValue V) -> std::optional<SubVectorInfo> {
2814bdd1243dSDimitry Andric     while (V.getOpcode() == ISD::BITCAST)
2815bdd1243dSDimitry Andric       V = V.getOperand(0);
2816bdd1243dSDimitry Andric     if (V.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2817bdd1243dSDimitry Andric       return std::nullopt;
2818bdd1243dSDimitry Andric     return SubVectorInfo(V.getOperand(0),
2819bdd1243dSDimitry Andric                          !cast<ConstantSDNode>(V.getOperand(1))->isZero());
2820bdd1243dSDimitry Andric   };
2821bdd1243dSDimitry Andric 
2822bdd1243dSDimitry Andric   for (SDNode *N : Nodes) {
2823bdd1243dSDimitry Andric     if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
2824bdd1243dSDimitry Andric       continue;
2825bdd1243dSDimitry Andric     EVT ResTy = N->getValueType(0);
2826bdd1243dSDimitry Andric     if (ResTy.getVectorElementType() != MVT::i8)
2827bdd1243dSDimitry Andric       continue;
2828bdd1243dSDimitry Andric     if (ResTy.getVectorNumElements() != HwLen)
2829bdd1243dSDimitry Andric       continue;
2830bdd1243dSDimitry Andric 
2831bdd1243dSDimitry Andric     SDValue V0 = N->getOperand(0);
2832bdd1243dSDimitry Andric     SDValue V1 = N->getOperand(1);
2833bdd1243dSDimitry Andric     if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
2834bdd1243dSDimitry Andric       continue;
2835bdd1243dSDimitry Andric     if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
2836bdd1243dSDimitry Andric       continue;
2837bdd1243dSDimitry Andric     if (V0.getValueType() != ResTy || V1.getValueType() != ResTy)
2838bdd1243dSDimitry Andric       continue;
2839bdd1243dSDimitry Andric 
2840bdd1243dSDimitry Andric     // Check if all operands of the two operand shuffles are extract_subvectors
2841bdd1243dSDimitry Andric     // from the same vector pair.
2842bdd1243dSDimitry Andric     auto V0A = getSourceInfo(V0.getOperand(0));
2843bdd1243dSDimitry Andric     if (!V0A.has_value())
2844bdd1243dSDimitry Andric       continue;
2845bdd1243dSDimitry Andric     auto V0B = getSourceInfo(V0.getOperand(1));
2846bdd1243dSDimitry Andric     if (!V0B.has_value() || V0B->Src != V0A->Src)
2847bdd1243dSDimitry Andric       continue;
2848bdd1243dSDimitry Andric     auto V1A = getSourceInfo(V1.getOperand(0));
2849bdd1243dSDimitry Andric     if (!V1A.has_value() || V1A->Src != V0A->Src)
2850bdd1243dSDimitry Andric       continue;
2851bdd1243dSDimitry Andric     auto V1B = getSourceInfo(V1.getOperand(1));
2852bdd1243dSDimitry Andric     if (!V1B.has_value() || V1B->Src != V0A->Src)
2853bdd1243dSDimitry Andric       continue;
2854bdd1243dSDimitry Andric 
2855bdd1243dSDimitry Andric     // The source must be a pair. This should be guaranteed here,
2856bdd1243dSDimitry Andric     // but check just in case.
2857bdd1243dSDimitry Andric     assert(V0A->Src.getValueType().getSizeInBits() == 16 * HwLen);
2858bdd1243dSDimitry Andric 
2859bdd1243dSDimitry Andric     MapType OpMap = {
2860bdd1243dSDimitry Andric         {V0.getOperand(0), V0A->HalfIdx * HwLen},
2861bdd1243dSDimitry Andric         {V0.getOperand(1), V0B->HalfIdx * HwLen},
2862bdd1243dSDimitry Andric         {V1.getOperand(0), V1A->HalfIdx * HwLen},
2863bdd1243dSDimitry Andric         {V1.getOperand(1), V1B->HalfIdx * HwLen},
2864bdd1243dSDimitry Andric     };
2865bdd1243dSDimitry Andric     SDValue NewS = fold3(SDValue(N, 0), V0A->Src, std::move(OpMap));
2866bdd1243dSDimitry Andric     ReplaceNode(N, NewS.getNode());
2867bdd1243dSDimitry Andric   }
2868bdd1243dSDimitry Andric }
2869bdd1243dSDimitry Andric 
2870bdd1243dSDimitry Andric void HexagonDAGToDAGISel::SelectHvxExtractSubvector(SDNode *N) {
2871bdd1243dSDimitry Andric   HvxSelector(*this, *CurDAG).selectExtractSubvector(N);
2872bdd1243dSDimitry Andric }
2873bdd1243dSDimitry Andric 
28740b57cec5SDimitry Andric void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) {
28750b57cec5SDimitry Andric   HvxSelector(*this, *CurDAG).selectShuffle(N);
28760b57cec5SDimitry Andric }
28770b57cec5SDimitry Andric 
28780b57cec5SDimitry Andric void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) {
28790b57cec5SDimitry Andric   HvxSelector(*this, *CurDAG).selectRor(N);
28800b57cec5SDimitry Andric }
28810b57cec5SDimitry Andric 
28820b57cec5SDimitry Andric void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
28830b57cec5SDimitry Andric   HvxSelector(*this, *CurDAG).selectVAlign(N);
28840b57cec5SDimitry Andric }
28850b57cec5SDimitry Andric 
28860b57cec5SDimitry Andric void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {
28870b57cec5SDimitry Andric   const SDLoc &dl(N);
28880b57cec5SDimitry Andric   SDValue Chain = N->getOperand(0);
28890b57cec5SDimitry Andric   SDValue Address = N->getOperand(2);
28900b57cec5SDimitry Andric   SDValue Predicate = N->getOperand(3);
28910b57cec5SDimitry Andric   SDValue Base = N->getOperand(4);
28920b57cec5SDimitry Andric   SDValue Modifier = N->getOperand(5);
28930b57cec5SDimitry Andric   SDValue Offset = N->getOperand(6);
289404eeddc0SDimitry Andric   SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
28950b57cec5SDimitry Andric 
28960b57cec5SDimitry Andric   unsigned Opcode;
2897647cbc5dSDimitry Andric   unsigned IntNo = N->getConstantOperandVal(1);
28980b57cec5SDimitry Andric   switch (IntNo) {
28990b57cec5SDimitry Andric   default:
29000b57cec5SDimitry Andric     llvm_unreachable("Unexpected HVX gather intrinsic.");
29010b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vgathermhq:
29020b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vgathermhq_128B:
29030b57cec5SDimitry Andric     Opcode = Hexagon::V6_vgathermhq_pseudo;
29040b57cec5SDimitry Andric     break;
29050b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vgathermwq:
29060b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vgathermwq_128B:
29070b57cec5SDimitry Andric     Opcode = Hexagon::V6_vgathermwq_pseudo;
29080b57cec5SDimitry Andric     break;
29090b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vgathermhwq:
29100b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vgathermhwq_128B:
29110b57cec5SDimitry Andric     Opcode = Hexagon::V6_vgathermhwq_pseudo;
29120b57cec5SDimitry Andric     break;
29130b57cec5SDimitry Andric   }
29140b57cec5SDimitry Andric 
29150b57cec5SDimitry Andric   SDVTList VTs = CurDAG->getVTList(MVT::Other);
291604eeddc0SDimitry Andric   SDValue Ops[] = { Address, ImmOperand,
291704eeddc0SDimitry Andric                     Predicate, Base, Modifier, Offset, Chain };
29180b57cec5SDimitry Andric   SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
29190b57cec5SDimitry Andric 
29200b57cec5SDimitry Andric   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
29210b57cec5SDimitry Andric   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
29220b57cec5SDimitry Andric 
29230b57cec5SDimitry Andric   ReplaceNode(N, Result);
29240b57cec5SDimitry Andric }
29250b57cec5SDimitry Andric 
29260b57cec5SDimitry Andric void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
29270b57cec5SDimitry Andric   const SDLoc &dl(N);
29280b57cec5SDimitry Andric   SDValue Chain = N->getOperand(0);
29290b57cec5SDimitry Andric   SDValue Address = N->getOperand(2);
29300b57cec5SDimitry Andric   SDValue Base = N->getOperand(3);
29310b57cec5SDimitry Andric   SDValue Modifier = N->getOperand(4);
29320b57cec5SDimitry Andric   SDValue Offset = N->getOperand(5);
293304eeddc0SDimitry Andric   SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
29340b57cec5SDimitry Andric 
29350b57cec5SDimitry Andric   unsigned Opcode;
2936647cbc5dSDimitry Andric   unsigned IntNo = N->getConstantOperandVal(1);
29370b57cec5SDimitry Andric   switch (IntNo) {
29380b57cec5SDimitry Andric   default:
29390b57cec5SDimitry Andric     llvm_unreachable("Unexpected HVX gather intrinsic.");
29400b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vgathermh:
29410b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vgathermh_128B:
29420b57cec5SDimitry Andric     Opcode = Hexagon::V6_vgathermh_pseudo;
29430b57cec5SDimitry Andric     break;
29440b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vgathermw:
29450b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vgathermw_128B:
29460b57cec5SDimitry Andric     Opcode = Hexagon::V6_vgathermw_pseudo;
29470b57cec5SDimitry Andric     break;
29480b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vgathermhw:
29490b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vgathermhw_128B:
29500b57cec5SDimitry Andric     Opcode = Hexagon::V6_vgathermhw_pseudo;
29510b57cec5SDimitry Andric     break;
29520b57cec5SDimitry Andric   }
29530b57cec5SDimitry Andric 
29540b57cec5SDimitry Andric   SDVTList VTs = CurDAG->getVTList(MVT::Other);
295504eeddc0SDimitry Andric   SDValue Ops[] = { Address, ImmOperand, Base, Modifier, Offset, Chain };
29560b57cec5SDimitry Andric   SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
29570b57cec5SDimitry Andric 
29580b57cec5SDimitry Andric   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
29590b57cec5SDimitry Andric   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
29600b57cec5SDimitry Andric 
29610b57cec5SDimitry Andric   ReplaceNode(N, Result);
29620b57cec5SDimitry Andric }
29630b57cec5SDimitry Andric 
29640b57cec5SDimitry Andric void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) {
2965647cbc5dSDimitry Andric   unsigned IID = N->getConstantOperandVal(0);
29660b57cec5SDimitry Andric   SDNode *Result;
29670b57cec5SDimitry Andric   switch (IID) {
29680b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vaddcarry: {
29695ffd83dbSDimitry Andric     std::array<SDValue, 3> Ops = {
29705ffd83dbSDimitry Andric         {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
29715ffd83dbSDimitry Andric     SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
29720b57cec5SDimitry Andric     Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
29730b57cec5SDimitry Andric     break;
29740b57cec5SDimitry Andric   }
29750b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vaddcarry_128B: {
29765ffd83dbSDimitry Andric     std::array<SDValue, 3> Ops = {
29775ffd83dbSDimitry Andric         {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
29785ffd83dbSDimitry Andric     SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
29790b57cec5SDimitry Andric     Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
29800b57cec5SDimitry Andric     break;
29810b57cec5SDimitry Andric   }
29820b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vsubcarry: {
29835ffd83dbSDimitry Andric     std::array<SDValue, 3> Ops = {
29845ffd83dbSDimitry Andric         {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
29855ffd83dbSDimitry Andric     SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
29860b57cec5SDimitry Andric     Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
29870b57cec5SDimitry Andric     break;
29880b57cec5SDimitry Andric   }
29890b57cec5SDimitry Andric   case Intrinsic::hexagon_V6_vsubcarry_128B: {
29905ffd83dbSDimitry Andric     std::array<SDValue, 3> Ops = {
29915ffd83dbSDimitry Andric         {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
29925ffd83dbSDimitry Andric     SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
29930b57cec5SDimitry Andric     Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
29940b57cec5SDimitry Andric     break;
29950b57cec5SDimitry Andric   }
29960b57cec5SDimitry Andric   default:
29970b57cec5SDimitry Andric     llvm_unreachable("Unexpected HVX dual output intrinsic.");
29980b57cec5SDimitry Andric   }
29990b57cec5SDimitry Andric   ReplaceUses(N, Result);
30000b57cec5SDimitry Andric   ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
30010b57cec5SDimitry Andric   ReplaceUses(SDValue(N, 1), SDValue(Result, 1));
30020b57cec5SDimitry Andric   CurDAG->RemoveDeadNode(N);
30030b57cec5SDimitry Andric }
3004