Lines Matching +full:num +full:- +full:vectors
1 //===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
34 #define DEBUG_TYPE "hexagon-isel"
39 // --------------------------------------------------------------------
43 // - Forward delta.
44 // - Reverse delta.
45 // - Benes.
62 // |- 1 ---------------|- 2 -----|- 3 -|
111 static constexpr Node Ignore = Node(-1);
140 Node Num = Order.size();
141 return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
146 return F != Colors.end() ? F->second : ColorKind::None;
282 dbgs() << " -";
292 dbgs() << " " << E.first << " -> {";
313 dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n";
325 static constexpr ElemType Ignore = ElemType(-1);
358 W |= C << (Log-1-L);
432 ElemType Num = Size;
438 for (ElemType J = 0; J != Num; ++J) {
445 if (I < Num/2)
446 S = (J < Num/2) ? Pass : Switch;
448 S = (J < Num/2) ? Switch : Pass;
451 ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2);
452 if (U < Num/2)
461 for (ElemType J = 0; J != Num; ++J)
462 if (P[J] != Ignore && P[J] >= Num/2)
463 P[J] -= Num/2;
476 unsigned Pets = Log-1 - Step;
478 ElemType Num = Size;
480 // In this step half-switching occurs, so coloring can be used.
487 for (ElemType J = 0; J != Num; ++J) {
499 bool InpUp = I < Num/2;
509 S = (J < Num/2) ? Pass : Switch;
512 S = (J < Num/2) ? Switch : Pass;
533 for (ElemType J = 0; J != Num; ++J)
534 if (P[J] != Ignore && P[J] >= Num/2)
535 P[J] -= Num/2;
552 ElemType Num = Size;
554 unsigned Pets = 2*Log-1 - Step;
557 // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
561 for (ElemType J = 0; J != Num; ++J) {
569 ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black;
571 unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
573 if (I < Num/2)
577 T[J][Pets] = (J < Num/2) ? Pass : Switch;
580 if (I < Num/2)
584 T[J][Pets] = (J < Num/2) ? Switch : Pass;
591 for (ElemType J = 0; J != Num/2; ++J) {
593 ElemType PC = P[J+Num/2]; // and P[conj(J)]
598 if (T[J+Num/2][Pets] == Switch)
601 P[J+Num/2] = QC;
604 for (ElemType J = 0; J != Num; ++J)
605 if (P[J] != Ignore && P[J] >= Num/2)
606 P[J] -= Num/2;
617 // --------------------------------------------------------------------
678 : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {}
683 return List.size()-1;
694 unsigned top() const { return size()-1; }
717 OpV.getNode()->print(OS, &G);
755 InpNode->dumpr(&G);
770 if (M == -1)
772 MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
773 MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M);
778 int MinSrc = -1, MaxSrc = -1;
862 // clang-format off
865 // clang-format on
886 MaskT T = vdealvdd(Vu, Vv, Len - 2 * Size);
903 auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT {
912 // --------------------------------------------------------------------
1008 MaskL[I] = MaskR[I] = -1;
1011 MaskR[I] = -1;
1013 MaskL[I] = -1;
1014 MaskR[I] = M-VecLen;
1025 if (A[I] - E != Inc)
1034 if (Idx != -1)
1051 // Check if the second half of the mask is all-undef.
1059 if (SM.MaxSrc == -1)
1113 for (int I = OutSegMap.size() - 1; I >= 0; --I) {
1127 M = (M & (SegLen-1)) + SegLen*OutIdx;
1134 // Constant vectors are generated as loads from constant pools or as
1146 if (!W->isMachineOpcode() && W->getOpcode() == HexagonISD::ISEL)
1148 for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
1149 WorkQ.insert(W->getOperand(j).getNode());
1212 OutN->dumpr(&DAG);
1249 Amount -= VecLen;
1256 } else if (isUInt<3>(VecLen - Amount)) {
1257 SDValue A = getConst32(VecLen - Amount, dl);
1262 Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(-1)});
1267 // Va, Vb are single vectors. If SM only uses two vector halves from Va/Vb,
1297 if (isUInt<3>(Amt) || isUInt<3>(HwLen - Amt)) {
1299 SDValue S = getConst32(IsRight ? Amt : HwLen - Amt, dl);
1326 // (i.e. a multiple of 2*SegLen), and non-zero.
1329 // on the first two (non-undef) entries in the segment map.
1356 M -= SrcOp * HwLen;
1369 // If Seg0 or Seg1 are "multi-defined", pick them from the input
1402 // vshuff(CD,AB,HL) -> BD:AC
1403 // vshuff(AB,CD,HL) -> DB:CA
1438 if (SMH.MaxSrc - SMH.MinSrc >= static_cast<int>(HwLen)) {
1443 if (SW.MaxSrc - SW.MinSrc < static_cast<int>(HwLen)) {
1451 if (SMA.MaxSrc - SMA.MinSrc < static_cast<int>(HwLen)) {
1456 ShiftR -= HwLen;
1462 if (M != -1)
1463 M -= SMA.MinSrc;
1469 // By here, packing by segment (half-vector) shuffling, and vector alignment
1482 if (M == -1)
1485 M -= HwLen;
1500 // Va, Vb are vector pairs. If SM only uses two single vectors from Va/Vb,
1501 // pack these vectors into a pair, and remap SM into NewMask to use the
1521 // Really make sure we have at most 2 vectors used in the mask.
1532 // single vectors) in the output pair. Changing the order of vectors is
1548 Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)});
1549 Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
1567 assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); }));
1634 if (MaskL[I] != -1)
1664 // Doing a perfect shuffle on a low-half mask (i.e. where the upper half
1665 // is all-undef) may produce a perfect shuffle that generates legitimate
1706 if (MaskL[I] != -1)
1733 *F->second = nullptr;
1749 assert(ISelN->getOpcode() == HexagonISD::ISEL);
1750 SDNode *N0 = ISelN->getOperand(0).getNode();
1759 if (!N0->isMachineOpcode()) {
1762 auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
1763 if (llvm::all_of(N0->uses(), IsISelN))
1777 auto IsDomRec = [&Dom, &NonDom] (SDNode *T, auto Rec) -> bool {
1780 if (T->use_empty() || NonDom.count(T))
1782 for (SDNode *U : T->uses()) {
1783 // If T is reachable from a known non-dominated node, then T itself
1784 // is non-dominated.
1798 for (SDValue Op : SubNodes[I]->ops()) {
1810 unsigned NumDomOps = llvm::count_if(T->ops(), [&Dom](const SDUse &U) {
1821 for (SDNode *U : S->uses()) {
1826 if (F->second > 0 && !--F->second)
1827 TmpQ.insert(F->first);
1841 DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S->dump(&DAG);});
1883 M -= VecLen;
1890 M -= HwLen;
1906 // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
1916 assert(!N->use_empty());
1926 auto possibilities = [](ArrayRef<uint8_t> Bs, unsigned Width) -> uint32_t {
1965 // Illustration: For 4-bit values there are 4 complete sequences:
2027 uint32_t T = P ^ ((P - 1) & P);
2043 assert(OrAll == (1u << Width) -1);
2062 // Add WrapAt in an attempt to keep I+Dist non-negative.
2063 Dist = M - I;
2078 // from the operand vectors.
2098 auto same = [](ArrayRef<int> Mask1, ArrayRef<int> Mask2) -> bool {
2144 // is equivalent to "(V6_vpackeb (V6_vdealvdd Vu, Vv, -2))". Other such
2157 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(-2 * Size, dl)});
2158 Results.push(Hexagon::V6_vdealvdd, PairTy, {Vb, Va, OpRef::res(-1)});
2183 // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
2189 assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type");
2196 // vunpacku: 0, -1, L, -1, 2L, -1 ...
2201 // The vunpackus only handle byte and half-word.
2208 // First, check the non-ignored strips.
2210 auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
2216 // Check the -1s.
2218 auto S = findStrip(SM.Mask.drop_front(I), 0, N-I);
2219 if (S.first != -1 || S.second != unsigned(L))
2258 // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*]
2273 // matrices (or "sub-matrices"), given a specific group size. For example,
2328 // but the two vectors in the pair are swapped. The code that identifies
2335 MaskStorage[i] = M >= int(HwLen) ? M - HwLen : M + HwLen;
2350 // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
2381 // If the cycle contains LogLen-1, move it to the front of the cycle.
2383 auto canonicalize = [LogLen](const CycleType &C) -> CycleType {
2386 if (C[LogPos] == LogLen - 1)
2402 if (C[0] != Len - 1)
2404 int D = Len - C.size();
2409 for (unsigned I = 1; I != Len - D; ++I) {
2410 if (C[I] != Len - 1 - I)
2412 if (C[I] != I - (1 - D)) // I-1, I
2415 // At most one, IsDeal or IsShuff, can be non-zero.
2449 // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5
2463 // artificially add "LogLen-1" at both ends of the sequence.
2465 SwapElems.push_back(LogLen - 1);
2467 // Do the transformation: (a1..an) -> (M a1..an)(M a1).
2468 unsigned First = (C[0] == LogLen - 1) ? 1 : 0;
2474 SwapElems.push_back(LogLen - 1);
2482 bool IsInc = I == E - 1 || SwapElems[I] < SwapElems[I + 1];
2484 if (I < E - 1) {
2485 while (++I < E - 1 && IsInc == (SwapElems[I] < SwapElems[I + 1]))
2497 Res.Ops = {OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1)};
2523 if (M != -1 && M >= VecLen)
2527 // Try the deltas/benes for both single vectors and vector pairs.
2550 {OpRef::res(-1), OpRef(CtlR)});
2574 SDValue Inp = N->getOperand(0);
2575 MVT ResTy = N->getValueType(0).getSimpleVT();
2576 unsigned Idx = N->getConstantOperandVal(1);
2593 N->dump(&DAG);
2595 MVT ResTy = N->getValueType(0).getSimpleVT();
2596 // Assume that vector shuffles operate on vectors of bytes.
2600 std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end());
2603 if (Idx != -1 && Idx < 0)
2604 Idx = -1;
2615 if (Mask[I] == -1)
2630 // If the mask is all -1's, generate "undef".
2636 SDValue Vec0 = N->getOperand(0);
2637 SDValue Vec1 = N->getOperand(1);
2668 SN->dumpr(&DAG);
2676 MVT Ty = N->getValueType(0).getSimpleVT();
2678 SDValue VecV = N->getOperand(0);
2679 SDValue RotV = N->getOperand(1);
2683 unsigned S = CN->getZExtValue() % HST.getVectorLength();
2699 SDValue Vv = N->getOperand(0);
2700 SDValue Vu = N->getOperand(1);
2701 SDValue Rt = N->getOperand(2);
2703 N->getValueType(0), {Vv, Vu, Rt});
2709 auto getNodes = [this]() -> std::vector<SDNode *> {
2711 T.reserve(CurDAG->allnodes_size());
2712 for (SDNode &N : CurDAG->allnodes())
2738 unsigned HwLen = HST->getVectorLength();
2750 const MapType &OpMap) -> int {
2752 // Idx as a (non-undef) element of the top level shuffle's mask, that
2759 Idx -= HwLen;
2762 int MaybeN = OpShuff->getMaskElt(Idx);
2764 return -1;
2767 unsigned SrcBase = N < HwLen ? OpMap.at(OpShuff->getOperand(0))
2768 : OpMap.at(OpShuff->getOperand(1));
2770 N -= HwLen;
2775 auto fold3 = [&](SDValue TopShuff, SDValue Inp, MapType &&OpMap) -> SDValue {
2780 ArrayRef<int> TopMask = This->getMask();
2783 assert(TopMask.size() == S0->getMask().size() &&
2784 TopMask.size() == S1->getMask().size());
2794 FoldedMask[I] = -1;
2797 // The second half of the result will be all-undef.
2798 std::fill(FoldedMask.begin() + HwLen, FoldedMask.end(), -1);
2813 auto getSourceInfo = [](SDValue V) -> std::optional<SubVectorInfo> {
2819 !cast<ConstantSDNode>(V.getOperand(1))->isZero());
2823 if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
2825 EVT ResTy = N->getValueType(0);
2831 SDValue V0 = N->getOperand(0);
2832 SDValue V1 = N->getOperand(1);
2846 if (!V0B.has_value() || V0B->Src != V0A->Src)
2849 if (!V1A.has_value() || V1A->Src != V0A->Src)
2852 if (!V1B.has_value() || V1B->Src != V0A->Src)
2857 assert(V0A->Src.getValueType().getSizeInBits() == 16 * HwLen);
2860 {V0.getOperand(0), V0A->HalfIdx * HwLen},
2861 {V0.getOperand(1), V0B->HalfIdx * HwLen},
2862 {V1.getOperand(0), V1A->HalfIdx * HwLen},
2863 {V1.getOperand(1), V1B->HalfIdx * HwLen},
2865 SDValue NewS = fold3(SDValue(N, 0), V0A->Src, std::move(OpMap));
2888 SDValue Chain = N->getOperand(0);
2889 SDValue Address = N->getOperand(2);
2890 SDValue Predicate = N->getOperand(3);
2891 SDValue Base = N->getOperand(4);
2892 SDValue Modifier = N->getOperand(5);
2893 SDValue Offset = N->getOperand(6);
2894 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2897 unsigned IntNo = N->getConstantOperandVal(1);
2915 SDVTList VTs = CurDAG->getVTList(MVT::Other);
2918 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2920 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2921 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2928 SDValue Chain = N->getOperand(0);
2929 SDValue Address = N->getOperand(2);
2930 SDValue Base = N->getOperand(3);
2931 SDValue Modifier = N->getOperand(4);
2932 SDValue Offset = N->getOperand(5);
2933 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2936 unsigned IntNo = N->getConstantOperandVal(1);
2954 SDVTList VTs = CurDAG->getVTList(MVT::Other);
2956 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2958 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2959 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2965 unsigned IID = N->getConstantOperandVal(0);
2970 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2971 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2972 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2977 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2978 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2979 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2984 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2985 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2986 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2991 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2992 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2993 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
3002 CurDAG->RemoveDeadNode(N);