10b57cec5SDimitry Andric //===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the Hexagon specific subclass of TargetSubtarget.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
13e8d8bef9SDimitry Andric #include "HexagonSubtarget.h"
140b57cec5SDimitry Andric #include "Hexagon.h"
150b57cec5SDimitry Andric #include "HexagonInstrInfo.h"
160b57cec5SDimitry Andric #include "HexagonRegisterInfo.h"
170b57cec5SDimitry Andric #include "MCTargetDesc/HexagonMCTargetDesc.h"
180b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
190b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h"
200b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
210b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
240b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAGInstrs.h"
27bdd1243dSDimitry Andric #include "llvm/IR/IntrinsicsHexagon.h"
280b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
290b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
30e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
310b57cec5SDimitry Andric #include <algorithm>
320b57cec5SDimitry Andric #include <cassert>
330b57cec5SDimitry Andric #include <map>
34bdd1243dSDimitry Andric #include <optional>
350b57cec5SDimitry Andric
360b57cec5SDimitry Andric using namespace llvm;
370b57cec5SDimitry Andric
380b57cec5SDimitry Andric #define DEBUG_TYPE "hexagon-subtarget"
390b57cec5SDimitry Andric
400b57cec5SDimitry Andric #define GET_SUBTARGETINFO_CTOR
410b57cec5SDimitry Andric #define GET_SUBTARGETINFO_TARGET_DESC
420b57cec5SDimitry Andric #include "HexagonGenSubtargetInfo.inc"
430b57cec5SDimitry Andric
4481ad6265SDimitry Andric static cl::opt<bool> EnableBSBSched("enable-bsb-sched", cl::Hidden,
4581ad6265SDimitry Andric cl::init(true));
460b57cec5SDimitry Andric
4781ad6265SDimitry Andric static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden,
4881ad6265SDimitry Andric cl::init(false));
490b57cec5SDimitry Andric
5081ad6265SDimitry Andric static cl::opt<bool>
5181ad6265SDimitry Andric EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true),
520b57cec5SDimitry Andric cl::desc("Enable the scheduler to generate .cur"));
530b57cec5SDimitry Andric
5481ad6265SDimitry Andric static cl::opt<bool>
5581ad6265SDimitry Andric DisableHexagonMISched("disable-hexagon-misched", cl::Hidden,
560b57cec5SDimitry Andric cl::desc("Disable Hexagon MI Scheduling"));
570b57cec5SDimitry Andric
5881ad6265SDimitry Andric static cl::opt<bool> OverrideLongCalls(
5981ad6265SDimitry Andric "hexagon-long-calls", cl::Hidden,
600b57cec5SDimitry Andric cl::desc("If present, forces/disables the use of long calls"));
610b57cec5SDimitry Andric
6281ad6265SDimitry Andric static cl::opt<bool>
6381ad6265SDimitry Andric EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden,
640b57cec5SDimitry Andric cl::desc("Consider calls to be predicable"));
650b57cec5SDimitry Andric
6681ad6265SDimitry Andric static cl::opt<bool> SchedPredsCloser("sched-preds-closer", cl::Hidden,
6781ad6265SDimitry Andric cl::init(true));
680b57cec5SDimitry Andric
690b57cec5SDimitry Andric static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
7081ad6265SDimitry Andric cl::Hidden, cl::init(true));
710b57cec5SDimitry Andric
7281ad6265SDimitry Andric static cl::opt<bool> EnableCheckBankConflict(
7381ad6265SDimitry Andric "hexagon-check-bank-conflict", cl::Hidden, cl::init(true),
740b57cec5SDimitry Andric cl::desc("Enable checking for cache bank conflicts"));
750b57cec5SDimitry Andric
HexagonSubtarget(const Triple & TT,StringRef CPU,StringRef FS,const TargetMachine & TM)760b57cec5SDimitry Andric HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
770b57cec5SDimitry Andric StringRef FS, const TargetMachine &TM)
78e8d8bef9SDimitry Andric : HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
79e8d8bef9SDimitry Andric OptLevel(TM.getOptLevel()),
805ffd83dbSDimitry Andric CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))),
815ffd83dbSDimitry Andric TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
820b57cec5SDimitry Andric RegInfo(getHwMode()), TLInfo(TM, *this),
830b57cec5SDimitry Andric InstrItins(getInstrItineraryForCPU(CPUString)) {
845ffd83dbSDimitry Andric Hexagon_MC::addArchSubtarget(this, FS);
850b57cec5SDimitry Andric // Beware of the default constructor of InstrItineraryData: it will
860b57cec5SDimitry Andric // reset all members to 0.
870b57cec5SDimitry Andric assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized");
880b57cec5SDimitry Andric }
890b57cec5SDimitry Andric
900b57cec5SDimitry Andric HexagonSubtarget &
initializeSubtargetDependencies(StringRef CPU,StringRef FS)910b57cec5SDimitry Andric HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
92bdd1243dSDimitry Andric std::optional<Hexagon::ArchEnum> ArchVer = Hexagon::getCpu(CPUString);
935ffd83dbSDimitry Andric if (ArchVer)
945ffd83dbSDimitry Andric HexagonArchVersion = *ArchVer;
950b57cec5SDimitry Andric else
960b57cec5SDimitry Andric llvm_unreachable("Unrecognized Hexagon processor version");
970b57cec5SDimitry Andric
980b57cec5SDimitry Andric UseHVX128BOps = false;
990b57cec5SDimitry Andric UseHVX64BOps = false;
1005ffd83dbSDimitry Andric UseAudioOps = false;
1010b57cec5SDimitry Andric UseLongCalls = false;
1020b57cec5SDimitry Andric
1030eae32dcSDimitry Andric SubtargetFeatures Features(FS);
1040b57cec5SDimitry Andric
1050eae32dcSDimitry Andric // Turn on QFloat if the HVX version is v68+.
1060eae32dcSDimitry Andric // The function ParseSubtargetFeatures will set feature bits and initialize
1070eae32dcSDimitry Andric // subtarget's variables all in one, so there isn't a good way to preprocess
1080eae32dcSDimitry Andric // the feature string, other than by tinkering with it directly.
1090eae32dcSDimitry Andric auto IsQFloatFS = [](StringRef F) {
1100eae32dcSDimitry Andric return F == "+hvx-qfloat" || F == "-hvx-qfloat";
1110eae32dcSDimitry Andric };
1120eae32dcSDimitry Andric if (!llvm::count_if(Features.getFeatures(), IsQFloatFS)) {
1130eae32dcSDimitry Andric auto getHvxVersion = [&Features](StringRef FS) -> StringRef {
1140eae32dcSDimitry Andric for (StringRef F : llvm::reverse(Features.getFeatures())) {
1155f757f3fSDimitry Andric if (F.starts_with("+hvxv"))
1160eae32dcSDimitry Andric return F;
1170eae32dcSDimitry Andric }
1180eae32dcSDimitry Andric for (StringRef F : llvm::reverse(Features.getFeatures())) {
1190eae32dcSDimitry Andric if (F == "-hvx")
1200eae32dcSDimitry Andric return StringRef();
1215f757f3fSDimitry Andric if (F.starts_with("+hvx") || F == "-hvx")
1220eae32dcSDimitry Andric return F.take_front(4); // Return "+hvx" or "-hvx".
1230eae32dcSDimitry Andric }
1240eae32dcSDimitry Andric return StringRef();
1250eae32dcSDimitry Andric };
1260eae32dcSDimitry Andric
1270eae32dcSDimitry Andric bool AddQFloat = false;
1280eae32dcSDimitry Andric StringRef HvxVer = getHvxVersion(FS);
1295f757f3fSDimitry Andric if (HvxVer.starts_with("+hvxv")) {
1300eae32dcSDimitry Andric int Ver = 0;
1310eae32dcSDimitry Andric if (!HvxVer.drop_front(5).consumeInteger(10, Ver) && Ver >= 68)
1320eae32dcSDimitry Andric AddQFloat = true;
1330eae32dcSDimitry Andric } else if (HvxVer == "+hvx") {
1340eae32dcSDimitry Andric if (hasV68Ops())
1350eae32dcSDimitry Andric AddQFloat = true;
1360eae32dcSDimitry Andric }
1370eae32dcSDimitry Andric
1380eae32dcSDimitry Andric if (AddQFloat)
1390eae32dcSDimitry Andric Features.AddFeature("+hvx-qfloat");
1400eae32dcSDimitry Andric }
1410eae32dcSDimitry Andric
1420eae32dcSDimitry Andric std::string FeatureString = Features.getString();
1430eae32dcSDimitry Andric ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FeatureString);
1440eae32dcSDimitry Andric
145bdd1243dSDimitry Andric if (useHVXV68Ops())
146bdd1243dSDimitry Andric UseHVXFloatingPoint = UseHVXIEEEFPOps || UseHVXQFloatOps;
1470eae32dcSDimitry Andric
1480eae32dcSDimitry Andric if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint)
1490eae32dcSDimitry Andric LLVM_DEBUG(
1500eae32dcSDimitry Andric dbgs() << "Behavior is undefined for simultaneous qfloat and ieee hvx codegen...");
1510b57cec5SDimitry Andric
1520b57cec5SDimitry Andric if (OverrideLongCalls.getPosition())
1530b57cec5SDimitry Andric UseLongCalls = OverrideLongCalls;
1540b57cec5SDimitry Andric
1550eae32dcSDimitry Andric UseBSBScheduling = hasV60Ops() && EnableBSBSched;
1560eae32dcSDimitry Andric
1575ffd83dbSDimitry Andric if (isTinyCore()) {
1585ffd83dbSDimitry Andric // Tiny core has a single thread, so back-to-back scheduling is enabled by
1595ffd83dbSDimitry Andric // default.
1605ffd83dbSDimitry Andric if (!EnableBSBSched.getPosition())
1615ffd83dbSDimitry Andric UseBSBScheduling = false;
1625ffd83dbSDimitry Andric }
1635ffd83dbSDimitry Andric
1640eae32dcSDimitry Andric FeatureBitset FeatureBits = getFeatureBits();
1650b57cec5SDimitry Andric if (HexagonDisableDuplex)
1660eae32dcSDimitry Andric setFeatureBits(FeatureBits.reset(Hexagon::FeatureDuplex));
1670eae32dcSDimitry Andric setFeatureBits(Hexagon_MC::completeHVXFeatures(FeatureBits));
1680b57cec5SDimitry Andric
1690b57cec5SDimitry Andric return *this;
1700b57cec5SDimitry Andric }
1710b57cec5SDimitry Andric
isHVXElementType(MVT Ty,bool IncludeBool) const172e8d8bef9SDimitry Andric bool HexagonSubtarget::isHVXElementType(MVT Ty, bool IncludeBool) const {
173e8d8bef9SDimitry Andric if (!useHVXOps())
174e8d8bef9SDimitry Andric return false;
175e8d8bef9SDimitry Andric if (Ty.isVector())
176e8d8bef9SDimitry Andric Ty = Ty.getVectorElementType();
177e8d8bef9SDimitry Andric if (IncludeBool && Ty == MVT::i1)
178e8d8bef9SDimitry Andric return true;
179e8d8bef9SDimitry Andric ArrayRef<MVT> ElemTypes = getHVXElementTypes();
180e8d8bef9SDimitry Andric return llvm::is_contained(ElemTypes, Ty);
181e8d8bef9SDimitry Andric }
182e8d8bef9SDimitry Andric
isHVXVectorType(EVT VecTy,bool IncludeBool) const183bdd1243dSDimitry Andric bool HexagonSubtarget::isHVXVectorType(EVT VecTy, bool IncludeBool) const {
184bdd1243dSDimitry Andric if (!VecTy.isSimple())
185bdd1243dSDimitry Andric return false;
186e8d8bef9SDimitry Andric if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector())
187e8d8bef9SDimitry Andric return false;
188bdd1243dSDimitry Andric MVT ElemTy = VecTy.getSimpleVT().getVectorElementType();
189e8d8bef9SDimitry Andric if (!IncludeBool && ElemTy == MVT::i1)
190e8d8bef9SDimitry Andric return false;
191e8d8bef9SDimitry Andric
192e8d8bef9SDimitry Andric unsigned HwLen = getVectorLength();
193e8d8bef9SDimitry Andric unsigned NumElems = VecTy.getVectorNumElements();
194e8d8bef9SDimitry Andric ArrayRef<MVT> ElemTypes = getHVXElementTypes();
195e8d8bef9SDimitry Andric
196e8d8bef9SDimitry Andric if (IncludeBool && ElemTy == MVT::i1) {
197e8d8bef9SDimitry Andric // Boolean HVX vector types are formed from regular HVX vector types
198e8d8bef9SDimitry Andric // by replacing the element type with i1.
199e8d8bef9SDimitry Andric for (MVT T : ElemTypes)
200e8d8bef9SDimitry Andric if (NumElems * T.getSizeInBits() == 8 * HwLen)
201e8d8bef9SDimitry Andric return true;
202e8d8bef9SDimitry Andric return false;
203e8d8bef9SDimitry Andric }
204e8d8bef9SDimitry Andric
205e8d8bef9SDimitry Andric unsigned VecWidth = VecTy.getSizeInBits();
206e8d8bef9SDimitry Andric if (VecWidth != 8 * HwLen && VecWidth != 16 * HwLen)
207e8d8bef9SDimitry Andric return false;
208e8d8bef9SDimitry Andric return llvm::is_contained(ElemTypes, ElemTy);
209e8d8bef9SDimitry Andric }
210e8d8bef9SDimitry Andric
isTypeForHVX(Type * VecTy,bool IncludeBool) const211e8d8bef9SDimitry Andric bool HexagonSubtarget::isTypeForHVX(Type *VecTy, bool IncludeBool) const {
212e8d8bef9SDimitry Andric if (!VecTy->isVectorTy() || isa<ScalableVectorType>(VecTy))
213e8d8bef9SDimitry Andric return false;
214e8d8bef9SDimitry Andric // Avoid types like <2 x i32*>.
21504eeddc0SDimitry Andric Type *ScalTy = VecTy->getScalarType();
21604eeddc0SDimitry Andric if (!ScalTy->isIntegerTy() &&
21704eeddc0SDimitry Andric !(ScalTy->isFloatingPointTy() && useHVXFloatingPoint()))
218e8d8bef9SDimitry Andric return false;
219e8d8bef9SDimitry Andric // The given type may be something like <17 x i32>, which is not MVT,
220e8d8bef9SDimitry Andric // but can be represented as (non-simple) EVT.
221e8d8bef9SDimitry Andric EVT Ty = EVT::getEVT(VecTy, /*HandleUnknown*/false);
222bdd1243dSDimitry Andric if (!Ty.getVectorElementType().isSimple())
223e8d8bef9SDimitry Andric return false;
224e8d8bef9SDimitry Andric
225e8d8bef9SDimitry Andric auto isHvxTy = [this, IncludeBool](MVT SimpleTy) {
226e8d8bef9SDimitry Andric if (isHVXVectorType(SimpleTy, IncludeBool))
227e8d8bef9SDimitry Andric return true;
228e8d8bef9SDimitry Andric auto Action = getTargetLowering()->getPreferredVectorAction(SimpleTy);
229e8d8bef9SDimitry Andric return Action == TargetLoweringBase::TypeWidenVector;
230e8d8bef9SDimitry Andric };
231e8d8bef9SDimitry Andric
232e8d8bef9SDimitry Andric // Round up EVT to have power-of-2 elements, and keep checking if it
233e8d8bef9SDimitry Andric // qualifies for HVX, dividing it in half after each step.
234e8d8bef9SDimitry Andric MVT ElemTy = Ty.getVectorElementType().getSimpleVT();
235e8d8bef9SDimitry Andric unsigned VecLen = PowerOf2Ceil(Ty.getVectorNumElements());
236bdd1243dSDimitry Andric while (VecLen > 1) {
237e8d8bef9SDimitry Andric MVT SimpleTy = MVT::getVectorVT(ElemTy, VecLen);
238e8d8bef9SDimitry Andric if (SimpleTy.isValid() && isHvxTy(SimpleTy))
239e8d8bef9SDimitry Andric return true;
240e8d8bef9SDimitry Andric VecLen /= 2;
241e8d8bef9SDimitry Andric }
242e8d8bef9SDimitry Andric
243e8d8bef9SDimitry Andric return false;
244e8d8bef9SDimitry Andric }
245e8d8bef9SDimitry Andric
apply(ScheduleDAGInstrs * DAG)2460b57cec5SDimitry Andric void HexagonSubtarget::UsrOverflowMutation::apply(ScheduleDAGInstrs *DAG) {
2470b57cec5SDimitry Andric for (SUnit &SU : DAG->SUnits) {
2480b57cec5SDimitry Andric if (!SU.isInstr())
2490b57cec5SDimitry Andric continue;
2500b57cec5SDimitry Andric SmallVector<SDep, 4> Erase;
2510b57cec5SDimitry Andric for (auto &D : SU.Preds)
2520b57cec5SDimitry Andric if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF)
2530b57cec5SDimitry Andric Erase.push_back(D);
2540b57cec5SDimitry Andric for (auto &E : Erase)
2550b57cec5SDimitry Andric SU.removePred(E);
2560b57cec5SDimitry Andric }
2570b57cec5SDimitry Andric }
2580b57cec5SDimitry Andric
apply(ScheduleDAGInstrs * DAG)2590b57cec5SDimitry Andric void HexagonSubtarget::HVXMemLatencyMutation::apply(ScheduleDAGInstrs *DAG) {
2600b57cec5SDimitry Andric for (SUnit &SU : DAG->SUnits) {
2610b57cec5SDimitry Andric // Update the latency of chain edges between v60 vector load or store
2620b57cec5SDimitry Andric // instructions to be 1. These instruction cannot be scheduled in the
2630b57cec5SDimitry Andric // same packet.
2640b57cec5SDimitry Andric MachineInstr &MI1 = *SU.getInstr();
2650b57cec5SDimitry Andric auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
2660b57cec5SDimitry Andric bool IsStoreMI1 = MI1.mayStore();
2670b57cec5SDimitry Andric bool IsLoadMI1 = MI1.mayLoad();
2680b57cec5SDimitry Andric if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
2690b57cec5SDimitry Andric continue;
2700b57cec5SDimitry Andric for (SDep &SI : SU.Succs) {
2710b57cec5SDimitry Andric if (SI.getKind() != SDep::Order || SI.getLatency() != 0)
2720b57cec5SDimitry Andric continue;
2730b57cec5SDimitry Andric MachineInstr &MI2 = *SI.getSUnit()->getInstr();
2740b57cec5SDimitry Andric if (!QII->isHVXVec(MI2))
2750b57cec5SDimitry Andric continue;
2760b57cec5SDimitry Andric if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) {
2770b57cec5SDimitry Andric SI.setLatency(1);
2780b57cec5SDimitry Andric SU.setHeightDirty();
2790b57cec5SDimitry Andric // Change the dependence in the opposite direction too.
2800b57cec5SDimitry Andric for (SDep &PI : SI.getSUnit()->Preds) {
2810b57cec5SDimitry Andric if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order)
2820b57cec5SDimitry Andric continue;
2830b57cec5SDimitry Andric PI.setLatency(1);
2840b57cec5SDimitry Andric SI.getSUnit()->setDepthDirty();
2850b57cec5SDimitry Andric }
2860b57cec5SDimitry Andric }
2870b57cec5SDimitry Andric }
2880b57cec5SDimitry Andric }
2890b57cec5SDimitry Andric }
2900b57cec5SDimitry Andric
2910b57cec5SDimitry Andric // Check if a call and subsequent A2_tfrpi instructions should maintain
2920b57cec5SDimitry Andric // scheduling affinity. We are looking for the TFRI to be consumed in
2930b57cec5SDimitry Andric // the next instruction. This should help reduce the instances of
2940b57cec5SDimitry Andric // double register pairs being allocated and scheduled before a call
2950b57cec5SDimitry Andric // when not used until after the call. This situation is exacerbated
2960b57cec5SDimitry Andric // by the fact that we allocate the pair from the callee saves list,
2970b57cec5SDimitry Andric // leading to excess spills and restores.
shouldTFRICallBind(const HexagonInstrInfo & HII,const SUnit & Inst1,const SUnit & Inst2) const2980b57cec5SDimitry Andric bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
2990b57cec5SDimitry Andric const HexagonInstrInfo &HII, const SUnit &Inst1,
3000b57cec5SDimitry Andric const SUnit &Inst2) const {
3010b57cec5SDimitry Andric if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
3020b57cec5SDimitry Andric return false;
3030b57cec5SDimitry Andric
3040b57cec5SDimitry Andric // TypeXTYPE are 64 bit operations.
3050b57cec5SDimitry Andric unsigned Type = HII.getType(*Inst2.getInstr());
3060b57cec5SDimitry Andric return Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op ||
3070b57cec5SDimitry Andric Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM;
3080b57cec5SDimitry Andric }
3090b57cec5SDimitry Andric
apply(ScheduleDAGInstrs * DAGInstrs)3100b57cec5SDimitry Andric void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
3110b57cec5SDimitry Andric ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
3120b57cec5SDimitry Andric SUnit* LastSequentialCall = nullptr;
3130b57cec5SDimitry Andric // Map from virtual register to physical register from the copy.
3140b57cec5SDimitry Andric DenseMap<unsigned, unsigned> VRegHoldingReg;
3150b57cec5SDimitry Andric // Map from the physical register to the instruction that uses virtual
3160b57cec5SDimitry Andric // register. This is used to create the barrier edge.
3170b57cec5SDimitry Andric DenseMap<unsigned, SUnit *> LastVRegUse;
3180b57cec5SDimitry Andric auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
3190b57cec5SDimitry Andric auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
3200b57cec5SDimitry Andric
3210b57cec5SDimitry Andric // Currently we only catch the situation when compare gets scheduled
3220b57cec5SDimitry Andric // before preceding call.
3230b57cec5SDimitry Andric for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
3240b57cec5SDimitry Andric // Remember the call.
3250b57cec5SDimitry Andric if (DAG->SUnits[su].getInstr()->isCall())
3260b57cec5SDimitry Andric LastSequentialCall = &DAG->SUnits[su];
3270b57cec5SDimitry Andric // Look for a compare that defines a predicate.
3280b57cec5SDimitry Andric else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
3290b57cec5SDimitry Andric DAG->addEdge(&DAG->SUnits[su], SDep(LastSequentialCall, SDep::Barrier));
3300b57cec5SDimitry Andric // Look for call and tfri* instructions.
3310b57cec5SDimitry Andric else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
3320b57cec5SDimitry Andric shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
3330b57cec5SDimitry Andric DAG->addEdge(&DAG->SUnits[su], SDep(&DAG->SUnits[su-1], SDep::Barrier));
3340b57cec5SDimitry Andric // Prevent redundant register copies due to reads and writes of physical
3350b57cec5SDimitry Andric // registers. The original motivation for this was the code generated
3360b57cec5SDimitry Andric // between two calls, which are caused both the return value and the
3370b57cec5SDimitry Andric // argument for the next call being in %r0.
3380b57cec5SDimitry Andric // Example:
3390b57cec5SDimitry Andric // 1: <call1>
3400b57cec5SDimitry Andric // 2: %vreg = COPY %r0
3410b57cec5SDimitry Andric // 3: <use of %vreg>
3420b57cec5SDimitry Andric // 4: %r0 = ...
3430b57cec5SDimitry Andric // 5: <call2>
3440b57cec5SDimitry Andric // The scheduler would often swap 3 and 4, so an additional register is
3450b57cec5SDimitry Andric // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
3460b57cec5SDimitry Andric // this.
3470b57cec5SDimitry Andric // The code below checks for all the physical registers, not just R0/D0/V0.
3480b57cec5SDimitry Andric else if (SchedRetvalOptimization) {
3490b57cec5SDimitry Andric const MachineInstr *MI = DAG->SUnits[su].getInstr();
350bdd1243dSDimitry Andric if (MI->isCopy() && MI->getOperand(1).getReg().isPhysical()) {
3510b57cec5SDimitry Andric // %vregX = COPY %r0
3520b57cec5SDimitry Andric VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg();
3530b57cec5SDimitry Andric LastVRegUse.erase(MI->getOperand(1).getReg());
3540b57cec5SDimitry Andric } else {
3554824e7fdSDimitry Andric for (const MachineOperand &MO : MI->operands()) {
3560b57cec5SDimitry Andric if (!MO.isReg())
3570b57cec5SDimitry Andric continue;
3580b57cec5SDimitry Andric if (MO.isUse() && !MI->isCopy() &&
3590b57cec5SDimitry Andric VRegHoldingReg.count(MO.getReg())) {
3600b57cec5SDimitry Andric // <use of %vregX>
3610b57cec5SDimitry Andric LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su];
362bdd1243dSDimitry Andric } else if (MO.isDef() && MO.getReg().isPhysical()) {
3630b57cec5SDimitry Andric for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid();
3640b57cec5SDimitry Andric ++AI) {
3650b57cec5SDimitry Andric if (LastVRegUse.count(*AI) &&
3660b57cec5SDimitry Andric LastVRegUse[*AI] != &DAG->SUnits[su])
3670b57cec5SDimitry Andric // %r0 = ...
3680b57cec5SDimitry Andric DAG->addEdge(&DAG->SUnits[su], SDep(LastVRegUse[*AI], SDep::Barrier));
3690b57cec5SDimitry Andric LastVRegUse.erase(*AI);
3700b57cec5SDimitry Andric }
3710b57cec5SDimitry Andric }
3720b57cec5SDimitry Andric }
3730b57cec5SDimitry Andric }
3740b57cec5SDimitry Andric }
3750b57cec5SDimitry Andric }
3760b57cec5SDimitry Andric }
3770b57cec5SDimitry Andric
apply(ScheduleDAGInstrs * DAG)3780b57cec5SDimitry Andric void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
3790b57cec5SDimitry Andric if (!EnableCheckBankConflict)
3800b57cec5SDimitry Andric return;
3810b57cec5SDimitry Andric
3820b57cec5SDimitry Andric const auto &HII = static_cast<const HexagonInstrInfo&>(*DAG->TII);
3830b57cec5SDimitry Andric
3840b57cec5SDimitry Andric // Create artificial edges between loads that could likely cause a bank
3850b57cec5SDimitry Andric // conflict. Since such loads would normally not have any dependency
3860b57cec5SDimitry Andric // between them, we cannot rely on existing edges.
3870b57cec5SDimitry Andric for (unsigned i = 0, e = DAG->SUnits.size(); i != e; ++i) {
3880b57cec5SDimitry Andric SUnit &S0 = DAG->SUnits[i];
3890b57cec5SDimitry Andric MachineInstr &L0 = *S0.getInstr();
3900b57cec5SDimitry Andric if (!L0.mayLoad() || L0.mayStore() ||
3910b57cec5SDimitry Andric HII.getAddrMode(L0) != HexagonII::BaseImmOffset)
3920b57cec5SDimitry Andric continue;
3930b57cec5SDimitry Andric int64_t Offset0;
394*0fca6ea1SDimitry Andric LocationSize Size0 = 0;
3950b57cec5SDimitry Andric MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
3960b57cec5SDimitry Andric // Is the access size is longer than the L1 cache line, skip the check.
397*0fca6ea1SDimitry Andric if (BaseOp0 == nullptr || !BaseOp0->isReg() || !Size0.hasValue() ||
398*0fca6ea1SDimitry Andric Size0.getValue() >= 32)
3990b57cec5SDimitry Andric continue;
4000b57cec5SDimitry Andric // Scan only up to 32 instructions ahead (to avoid n^2 complexity).
4010b57cec5SDimitry Andric for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
4020b57cec5SDimitry Andric SUnit &S1 = DAG->SUnits[j];
4030b57cec5SDimitry Andric MachineInstr &L1 = *S1.getInstr();
4040b57cec5SDimitry Andric if (!L1.mayLoad() || L1.mayStore() ||
4050b57cec5SDimitry Andric HII.getAddrMode(L1) != HexagonII::BaseImmOffset)
4060b57cec5SDimitry Andric continue;
4070b57cec5SDimitry Andric int64_t Offset1;
408*0fca6ea1SDimitry Andric LocationSize Size1 = 0;
4090b57cec5SDimitry Andric MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
410*0fca6ea1SDimitry Andric if (BaseOp1 == nullptr || !BaseOp1->isReg() || !Size0.hasValue() ||
411*0fca6ea1SDimitry Andric Size1.getValue() >= 32 || BaseOp0->getReg() != BaseOp1->getReg())
4120b57cec5SDimitry Andric continue;
4130b57cec5SDimitry Andric // Check bits 3 and 4 of the offset: if they differ, a bank conflict
4140b57cec5SDimitry Andric // is unlikely.
4150b57cec5SDimitry Andric if (((Offset0 ^ Offset1) & 0x18) != 0)
4160b57cec5SDimitry Andric continue;
4170b57cec5SDimitry Andric // Bits 3 and 4 are the same, add an artificial edge and set extra
4180b57cec5SDimitry Andric // latency.
4190b57cec5SDimitry Andric SDep A(&S0, SDep::Artificial);
4200b57cec5SDimitry Andric A.setLatency(1);
4210b57cec5SDimitry Andric S1.addPred(A, true);
4220b57cec5SDimitry Andric }
4230b57cec5SDimitry Andric }
4240b57cec5SDimitry Andric }
4250b57cec5SDimitry Andric
4260b57cec5SDimitry Andric /// Enable use of alias analysis during code generation (during MI
4270b57cec5SDimitry Andric /// scheduling, DAGCombine, etc.).
useAA() const4280b57cec5SDimitry Andric bool HexagonSubtarget::useAA() const {
4295f757f3fSDimitry Andric if (OptLevel != CodeGenOptLevel::None)
4300b57cec5SDimitry Andric return true;
4310b57cec5SDimitry Andric return false;
4320b57cec5SDimitry Andric }
4330b57cec5SDimitry Andric
4340b57cec5SDimitry Andric /// Perform target specific adjustments to the latency of a schedule
4350b57cec5SDimitry Andric /// dependency.
adjustSchedDependency(SUnit * Src,int SrcOpIdx,SUnit * Dst,int DstOpIdx,SDep & Dep,const TargetSchedModel * SchedModel) const436*0fca6ea1SDimitry Andric void HexagonSubtarget::adjustSchedDependency(
437*0fca6ea1SDimitry Andric SUnit *Src, int SrcOpIdx, SUnit *Dst, int DstOpIdx, SDep &Dep,
438*0fca6ea1SDimitry Andric const TargetSchedModel *SchedModel) const {
4390b57cec5SDimitry Andric if (!Src->isInstr() || !Dst->isInstr())
4400b57cec5SDimitry Andric return;
4410b57cec5SDimitry Andric
4425ffd83dbSDimitry Andric MachineInstr *SrcInst = Src->getInstr();
4435ffd83dbSDimitry Andric MachineInstr *DstInst = Dst->getInstr();
4440b57cec5SDimitry Andric const HexagonInstrInfo *QII = getInstrInfo();
4450b57cec5SDimitry Andric
4460b57cec5SDimitry Andric // Instructions with .new operands have zero latency.
4470b57cec5SDimitry Andric SmallSet<SUnit *, 4> ExclSrc;
4480b57cec5SDimitry Andric SmallSet<SUnit *, 4> ExclDst;
4490b57cec5SDimitry Andric if (QII->canExecuteInBundle(*SrcInst, *DstInst) &&
4500b57cec5SDimitry Andric isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
4510b57cec5SDimitry Andric Dep.setLatency(0);
4520b57cec5SDimitry Andric return;
4530b57cec5SDimitry Andric }
4540b57cec5SDimitry Andric
45504eeddc0SDimitry Andric // Set the latency for a copy to zero since we hope that is will get
45604eeddc0SDimitry Andric // removed.
4570b57cec5SDimitry Andric if (DstInst->isCopy())
4580b57cec5SDimitry Andric Dep.setLatency(0);
4590b57cec5SDimitry Andric
4600b57cec5SDimitry Andric // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine
4610b57cec5SDimitry Andric // the correct latency.
46204eeddc0SDimitry Andric // If there are multiple uses of the def of COPY/REG_SEQUENCE, set the latency
46304eeddc0SDimitry Andric // only if the latencies on all the uses are equal, otherwise set it to
46404eeddc0SDimitry Andric // default.
46504eeddc0SDimitry Andric if ((DstInst->isRegSequence() || DstInst->isCopy())) {
4668bcb0991SDimitry Andric Register DReg = DstInst->getOperand(0).getReg();
4675f757f3fSDimitry Andric std::optional<unsigned> DLatency;
46804eeddc0SDimitry Andric for (const auto &DDep : Dst->Succs) {
46904eeddc0SDimitry Andric MachineInstr *DDst = DDep.getSUnit()->getInstr();
47004eeddc0SDimitry Andric int UseIdx = -1;
4710b57cec5SDimitry Andric for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) {
4720b57cec5SDimitry Andric const MachineOperand &MO = DDst->getOperand(OpNum);
4730b57cec5SDimitry Andric if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) {
4740b57cec5SDimitry Andric UseIdx = OpNum;
4750b57cec5SDimitry Andric break;
4760b57cec5SDimitry Andric }
4770b57cec5SDimitry Andric }
47804eeddc0SDimitry Andric
47904eeddc0SDimitry Andric if (UseIdx == -1)
48004eeddc0SDimitry Andric continue;
48104eeddc0SDimitry Andric
4825f757f3fSDimitry Andric std::optional<unsigned> Latency =
4835f757f3fSDimitry Andric InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, *DDst, UseIdx);
4845f757f3fSDimitry Andric
48504eeddc0SDimitry Andric // Set DLatency for the first time.
4865f757f3fSDimitry Andric if (!DLatency)
4875f757f3fSDimitry Andric DLatency = Latency;
48804eeddc0SDimitry Andric
48904eeddc0SDimitry Andric // For multiple uses, if the Latency is different across uses, reset
49004eeddc0SDimitry Andric // DLatency.
49104eeddc0SDimitry Andric if (DLatency != Latency) {
4925f757f3fSDimitry Andric DLatency = std::nullopt;
49304eeddc0SDimitry Andric break;
49404eeddc0SDimitry Andric }
49504eeddc0SDimitry Andric }
4965f757f3fSDimitry Andric Dep.setLatency(DLatency ? *DLatency : 0);
4970b57cec5SDimitry Andric }
4980b57cec5SDimitry Andric
4990b57cec5SDimitry Andric // Try to schedule uses near definitions to generate .cur.
5000b57cec5SDimitry Andric ExclSrc.clear();
5010b57cec5SDimitry Andric ExclDst.clear();
5020b57cec5SDimitry Andric if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) &&
5030b57cec5SDimitry Andric isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
5040b57cec5SDimitry Andric Dep.setLatency(0);
5050b57cec5SDimitry Andric return;
5060b57cec5SDimitry Andric }
50704eeddc0SDimitry Andric int Latency = Dep.getLatency();
50804eeddc0SDimitry Andric bool IsArtificial = Dep.isArtificial();
50904eeddc0SDimitry Andric Latency = updateLatency(*SrcInst, *DstInst, IsArtificial, Latency);
51004eeddc0SDimitry Andric Dep.setLatency(Latency);
5110b57cec5SDimitry Andric }
5120b57cec5SDimitry Andric
getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>> & Mutations) const5130b57cec5SDimitry Andric void HexagonSubtarget::getPostRAMutations(
5140b57cec5SDimitry Andric std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
5158bcb0991SDimitry Andric Mutations.push_back(std::make_unique<UsrOverflowMutation>());
5168bcb0991SDimitry Andric Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
5178bcb0991SDimitry Andric Mutations.push_back(std::make_unique<BankConflictMutation>());
5180b57cec5SDimitry Andric }
5190b57cec5SDimitry Andric
getSMSMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>> & Mutations) const5200b57cec5SDimitry Andric void HexagonSubtarget::getSMSMutations(
5210b57cec5SDimitry Andric std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
5228bcb0991SDimitry Andric Mutations.push_back(std::make_unique<UsrOverflowMutation>());
5238bcb0991SDimitry Andric Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
5240b57cec5SDimitry Andric }
5250b57cec5SDimitry Andric
5260b57cec5SDimitry Andric // Pin the vtable to this file.
anchor()5270b57cec5SDimitry Andric void HexagonSubtarget::anchor() {}
5280b57cec5SDimitry Andric
enableMachineScheduler() const5290b57cec5SDimitry Andric bool HexagonSubtarget::enableMachineScheduler() const {
5300b57cec5SDimitry Andric if (DisableHexagonMISched.getNumOccurrences())
5310b57cec5SDimitry Andric return !DisableHexagonMISched;
5320b57cec5SDimitry Andric return true;
5330b57cec5SDimitry Andric }
5340b57cec5SDimitry Andric
usePredicatedCalls() const5350b57cec5SDimitry Andric bool HexagonSubtarget::usePredicatedCalls() const {
5360b57cec5SDimitry Andric return EnablePredicatedCalls;
5370b57cec5SDimitry Andric }
5380b57cec5SDimitry Andric
updateLatency(MachineInstr & SrcInst,MachineInstr & DstInst,bool IsArtificial,int Latency) const53904eeddc0SDimitry Andric int HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
54004eeddc0SDimitry Andric MachineInstr &DstInst, bool IsArtificial,
54104eeddc0SDimitry Andric int Latency) const {
54204eeddc0SDimitry Andric if (IsArtificial)
54304eeddc0SDimitry Andric return 1;
5440b57cec5SDimitry Andric if (!hasV60Ops())
54504eeddc0SDimitry Andric return Latency;
5460b57cec5SDimitry Andric
5470b57cec5SDimitry Andric auto &QII = static_cast<const HexagonInstrInfo &>(*getInstrInfo());
5480b57cec5SDimitry Andric // BSB scheduling.
5490b57cec5SDimitry Andric if (QII.isHVXVec(SrcInst) || useBSBScheduling())
55004eeddc0SDimitry Andric Latency = (Latency + 1) >> 1;
55104eeddc0SDimitry Andric return Latency;
5520b57cec5SDimitry Andric }
5530b57cec5SDimitry Andric
restoreLatency(SUnit * Src,SUnit * Dst) const5540b57cec5SDimitry Andric void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
5550b57cec5SDimitry Andric MachineInstr *SrcI = Src->getInstr();
5560b57cec5SDimitry Andric for (auto &I : Src->Succs) {
5570b57cec5SDimitry Andric if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
5580b57cec5SDimitry Andric continue;
559e8d8bef9SDimitry Andric Register DepR = I.getReg();
5600b57cec5SDimitry Andric int DefIdx = -1;
5610b57cec5SDimitry Andric for (unsigned OpNum = 0; OpNum < SrcI->getNumOperands(); OpNum++) {
5620b57cec5SDimitry Andric const MachineOperand &MO = SrcI->getOperand(OpNum);
5635ffd83dbSDimitry Andric bool IsSameOrSubReg = false;
5645ffd83dbSDimitry Andric if (MO.isReg()) {
565e8d8bef9SDimitry Andric Register MOReg = MO.getReg();
566e8d8bef9SDimitry Andric if (DepR.isVirtual()) {
5675ffd83dbSDimitry Andric IsSameOrSubReg = (MOReg == DepR);
5685ffd83dbSDimitry Andric } else {
5695ffd83dbSDimitry Andric IsSameOrSubReg = getRegisterInfo()->isSubRegisterEq(DepR, MOReg);
5705ffd83dbSDimitry Andric }
5715ffd83dbSDimitry Andric if (MO.isDef() && IsSameOrSubReg)
5720b57cec5SDimitry Andric DefIdx = OpNum;
5730b57cec5SDimitry Andric }
5745ffd83dbSDimitry Andric }
5750b57cec5SDimitry Andric assert(DefIdx >= 0 && "Def Reg not found in Src MI");
5760b57cec5SDimitry Andric MachineInstr *DstI = Dst->getInstr();
5770b57cec5SDimitry Andric SDep T = I;
5780b57cec5SDimitry Andric for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) {
5790b57cec5SDimitry Andric const MachineOperand &MO = DstI->getOperand(OpNum);
5800b57cec5SDimitry Andric if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) {
5815f757f3fSDimitry Andric std::optional<unsigned> Latency = InstrInfo.getOperandLatency(
5825f757f3fSDimitry Andric &InstrItins, *SrcI, DefIdx, *DstI, OpNum);
5830b57cec5SDimitry Andric
5840b57cec5SDimitry Andric // For some instructions (ex: COPY), we might end up with < 0 latency
5850b57cec5SDimitry Andric // as they don't have any Itinerary class associated with them.
5865f757f3fSDimitry Andric if (!Latency)
5875f757f3fSDimitry Andric Latency = 0;
58804eeddc0SDimitry Andric bool IsArtificial = I.isArtificial();
5895f757f3fSDimitry Andric Latency = updateLatency(*SrcI, *DstI, IsArtificial, *Latency);
5905f757f3fSDimitry Andric I.setLatency(*Latency);
5910b57cec5SDimitry Andric }
5920b57cec5SDimitry Andric }
5930b57cec5SDimitry Andric
5940b57cec5SDimitry Andric // Update the latency of opposite edge too.
5950b57cec5SDimitry Andric T.setSUnit(Src);
596e8d8bef9SDimitry Andric auto F = find(Dst->Preds, T);
5970b57cec5SDimitry Andric assert(F != Dst->Preds.end());
5980b57cec5SDimitry Andric F->setLatency(I.getLatency());
5990b57cec5SDimitry Andric }
6000b57cec5SDimitry Andric }
6010b57cec5SDimitry Andric
6020b57cec5SDimitry Andric /// Change the latency between the two SUnits.
changeLatency(SUnit * Src,SUnit * Dst,unsigned Lat) const6030b57cec5SDimitry Andric void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat)
6040b57cec5SDimitry Andric const {
6050b57cec5SDimitry Andric for (auto &I : Src->Succs) {
6060b57cec5SDimitry Andric if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
6070b57cec5SDimitry Andric continue;
6080b57cec5SDimitry Andric SDep T = I;
6090b57cec5SDimitry Andric I.setLatency(Lat);
6100b57cec5SDimitry Andric
6110b57cec5SDimitry Andric // Update the latency of opposite edge too.
6120b57cec5SDimitry Andric T.setSUnit(Src);
613e8d8bef9SDimitry Andric auto F = find(Dst->Preds, T);
6140b57cec5SDimitry Andric assert(F != Dst->Preds.end());
6150b57cec5SDimitry Andric F->setLatency(Lat);
6160b57cec5SDimitry Andric }
6170b57cec5SDimitry Andric }
6180b57cec5SDimitry Andric
6190b57cec5SDimitry Andric /// If the SUnit has a zero latency edge, return the other SUnit.
getZeroLatency(SUnit * N,SmallVector<SDep,4> & Deps)6200b57cec5SDimitry Andric static SUnit *getZeroLatency(SUnit *N, SmallVector<SDep, 4> &Deps) {
6210b57cec5SDimitry Andric for (auto &I : Deps)
6220b57cec5SDimitry Andric if (I.isAssignedRegDep() && I.getLatency() == 0 &&
6230b57cec5SDimitry Andric !I.getSUnit()->getInstr()->isPseudo())
6240b57cec5SDimitry Andric return I.getSUnit();
6250b57cec5SDimitry Andric return nullptr;
6260b57cec5SDimitry Andric }
6270b57cec5SDimitry Andric
6280b57cec5SDimitry Andric // Return true if these are the best two instructions to schedule
6290b57cec5SDimitry Andric // together with a zero latency. Only one dependence should have a zero
6300b57cec5SDimitry Andric // latency. If there are multiple choices, choose the best, and change
6310b57cec5SDimitry Andric // the others, if needed.
isBestZeroLatency(SUnit * Src,SUnit * Dst,const HexagonInstrInfo * TII,SmallSet<SUnit *,4> & ExclSrc,SmallSet<SUnit *,4> & ExclDst) const6320b57cec5SDimitry Andric bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst,
6330b57cec5SDimitry Andric const HexagonInstrInfo *TII, SmallSet<SUnit*, 4> &ExclSrc,
6340b57cec5SDimitry Andric SmallSet<SUnit*, 4> &ExclDst) const {
6350b57cec5SDimitry Andric MachineInstr &SrcInst = *Src->getInstr();
6360b57cec5SDimitry Andric MachineInstr &DstInst = *Dst->getInstr();
6370b57cec5SDimitry Andric
6380b57cec5SDimitry Andric // Ignore Boundary SU nodes as these have null instructions.
6390b57cec5SDimitry Andric if (Dst->isBoundaryNode())
6400b57cec5SDimitry Andric return false;
6410b57cec5SDimitry Andric
6420b57cec5SDimitry Andric if (SrcInst.isPHI() || DstInst.isPHI())
6430b57cec5SDimitry Andric return false;
6440b57cec5SDimitry Andric
6450b57cec5SDimitry Andric if (!TII->isToBeScheduledASAP(SrcInst, DstInst) &&
6460b57cec5SDimitry Andric !TII->canExecuteInBundle(SrcInst, DstInst))
6470b57cec5SDimitry Andric return false;
6480b57cec5SDimitry Andric
6490b57cec5SDimitry Andric // The architecture doesn't allow three dependent instructions in the same
6500b57cec5SDimitry Andric // packet. So, if the destination has a zero latency successor, then it's
6510b57cec5SDimitry Andric // not a candidate for a zero latency predecessor.
6520b57cec5SDimitry Andric if (getZeroLatency(Dst, Dst->Succs) != nullptr)
6530b57cec5SDimitry Andric return false;
6540b57cec5SDimitry Andric
6550b57cec5SDimitry Andric // Check if the Dst instruction is the best candidate first.
6560b57cec5SDimitry Andric SUnit *Best = nullptr;
6570b57cec5SDimitry Andric SUnit *DstBest = nullptr;
6580b57cec5SDimitry Andric SUnit *SrcBest = getZeroLatency(Dst, Dst->Preds);
6590b57cec5SDimitry Andric if (SrcBest == nullptr || Src->NodeNum >= SrcBest->NodeNum) {
6600b57cec5SDimitry Andric // Check that Src doesn't have a better candidate.
6610b57cec5SDimitry Andric DstBest = getZeroLatency(Src, Src->Succs);
6620b57cec5SDimitry Andric if (DstBest == nullptr || Dst->NodeNum <= DstBest->NodeNum)
6630b57cec5SDimitry Andric Best = Dst;
6640b57cec5SDimitry Andric }
6650b57cec5SDimitry Andric if (Best != Dst)
6660b57cec5SDimitry Andric return false;
6670b57cec5SDimitry Andric
6680b57cec5SDimitry Andric // The caller frequently adds the same dependence twice. If so, then
6690b57cec5SDimitry Andric // return true for this case too.
6700b57cec5SDimitry Andric if ((Src == SrcBest && Dst == DstBest ) ||
6710b57cec5SDimitry Andric (SrcBest == nullptr && Dst == DstBest) ||
6720b57cec5SDimitry Andric (Src == SrcBest && Dst == nullptr))
6730b57cec5SDimitry Andric return true;
6740b57cec5SDimitry Andric
6750b57cec5SDimitry Andric // Reassign the latency for the previous bests, which requires setting
6760b57cec5SDimitry Andric // the dependence edge in both directions.
6770b57cec5SDimitry Andric if (SrcBest != nullptr) {
6780b57cec5SDimitry Andric if (!hasV60Ops())
6790b57cec5SDimitry Andric changeLatency(SrcBest, Dst, 1);
6800b57cec5SDimitry Andric else
6810b57cec5SDimitry Andric restoreLatency(SrcBest, Dst);
6820b57cec5SDimitry Andric }
6830b57cec5SDimitry Andric if (DstBest != nullptr) {
6840b57cec5SDimitry Andric if (!hasV60Ops())
6850b57cec5SDimitry Andric changeLatency(Src, DstBest, 1);
6860b57cec5SDimitry Andric else
6870b57cec5SDimitry Andric restoreLatency(Src, DstBest);
6880b57cec5SDimitry Andric }
6890b57cec5SDimitry Andric
6900b57cec5SDimitry Andric // Attempt to find another opprotunity for zero latency in a different
6910b57cec5SDimitry Andric // dependence.
6920b57cec5SDimitry Andric if (SrcBest && DstBest)
6930b57cec5SDimitry Andric // If there is an edge from SrcBest to DstBst, then try to change that
6940b57cec5SDimitry Andric // to 0 now.
6950b57cec5SDimitry Andric changeLatency(SrcBest, DstBest, 0);
6960b57cec5SDimitry Andric else if (DstBest) {
6970b57cec5SDimitry Andric // Check if the previous best destination instruction has a new zero
6980b57cec5SDimitry Andric // latency dependence opportunity.
6990b57cec5SDimitry Andric ExclSrc.insert(Src);
7000b57cec5SDimitry Andric for (auto &I : DstBest->Preds)
7010b57cec5SDimitry Andric if (ExclSrc.count(I.getSUnit()) == 0 &&
7020b57cec5SDimitry Andric isBestZeroLatency(I.getSUnit(), DstBest, TII, ExclSrc, ExclDst))
7030b57cec5SDimitry Andric changeLatency(I.getSUnit(), DstBest, 0);
7040b57cec5SDimitry Andric } else if (SrcBest) {
7050b57cec5SDimitry Andric // Check if previous best source instruction has a new zero latency
7060b57cec5SDimitry Andric // dependence opportunity.
7070b57cec5SDimitry Andric ExclDst.insert(Dst);
7080b57cec5SDimitry Andric for (auto &I : SrcBest->Succs)
7090b57cec5SDimitry Andric if (ExclDst.count(I.getSUnit()) == 0 &&
7100b57cec5SDimitry Andric isBestZeroLatency(SrcBest, I.getSUnit(), TII, ExclSrc, ExclDst))
7110b57cec5SDimitry Andric changeLatency(SrcBest, I.getSUnit(), 0);
7120b57cec5SDimitry Andric }
7130b57cec5SDimitry Andric
7140b57cec5SDimitry Andric return true;
7150b57cec5SDimitry Andric }
7160b57cec5SDimitry Andric
getL1CacheLineSize() const7170b57cec5SDimitry Andric unsigned HexagonSubtarget::getL1CacheLineSize() const {
7180b57cec5SDimitry Andric return 32;
7190b57cec5SDimitry Andric }
7200b57cec5SDimitry Andric
getL1PrefetchDistance() const7210b57cec5SDimitry Andric unsigned HexagonSubtarget::getL1PrefetchDistance() const {
7220b57cec5SDimitry Andric return 32;
7230b57cec5SDimitry Andric }
7240b57cec5SDimitry Andric
enableSubRegLiveness() const725*0fca6ea1SDimitry Andric bool HexagonSubtarget::enableSubRegLiveness() const { return true; }
726bdd1243dSDimitry Andric
getIntrinsicId(unsigned Opc) const727bdd1243dSDimitry Andric Intrinsic::ID HexagonSubtarget::getIntrinsicId(unsigned Opc) const {
728bdd1243dSDimitry Andric struct Scalar {
729bdd1243dSDimitry Andric unsigned Opcode;
730bdd1243dSDimitry Andric Intrinsic::ID IntId;
731bdd1243dSDimitry Andric };
732bdd1243dSDimitry Andric struct Hvx {
733bdd1243dSDimitry Andric unsigned Opcode;
734bdd1243dSDimitry Andric Intrinsic::ID Int64Id, Int128Id;
735bdd1243dSDimitry Andric };
736bdd1243dSDimitry Andric
737bdd1243dSDimitry Andric static Scalar ScalarInts[] = {
738bdd1243dSDimitry Andric #define GET_SCALAR_INTRINSICS
739bdd1243dSDimitry Andric #include "HexagonDepInstrIntrinsics.inc"
740bdd1243dSDimitry Andric #undef GET_SCALAR_INTRINSICS
741bdd1243dSDimitry Andric };
742bdd1243dSDimitry Andric
743bdd1243dSDimitry Andric static Hvx HvxInts[] = {
744bdd1243dSDimitry Andric #define GET_HVX_INTRINSICS
745bdd1243dSDimitry Andric #include "HexagonDepInstrIntrinsics.inc"
746bdd1243dSDimitry Andric #undef GET_HVX_INTRINSICS
747bdd1243dSDimitry Andric };
748bdd1243dSDimitry Andric
749bdd1243dSDimitry Andric const auto CmpOpcode = [](auto A, auto B) { return A.Opcode < B.Opcode; };
750bdd1243dSDimitry Andric [[maybe_unused]] static bool SortedScalar =
751bdd1243dSDimitry Andric (llvm::sort(ScalarInts, CmpOpcode), true);
752bdd1243dSDimitry Andric [[maybe_unused]] static bool SortedHvx =
753bdd1243dSDimitry Andric (llvm::sort(HvxInts, CmpOpcode), true);
754bdd1243dSDimitry Andric
755bdd1243dSDimitry Andric auto [BS, ES] = std::make_pair(std::begin(ScalarInts), std::end(ScalarInts));
756bdd1243dSDimitry Andric auto [BH, EH] = std::make_pair(std::begin(HvxInts), std::end(HvxInts));
757bdd1243dSDimitry Andric
758bdd1243dSDimitry Andric auto FoundScalar = std::lower_bound(BS, ES, Scalar{Opc, 0}, CmpOpcode);
759bdd1243dSDimitry Andric if (FoundScalar != ES && FoundScalar->Opcode == Opc)
760bdd1243dSDimitry Andric return FoundScalar->IntId;
761bdd1243dSDimitry Andric
762bdd1243dSDimitry Andric auto FoundHvx = std::lower_bound(BH, EH, Hvx{Opc, 0, 0}, CmpOpcode);
763bdd1243dSDimitry Andric if (FoundHvx != EH && FoundHvx->Opcode == Opc) {
764bdd1243dSDimitry Andric unsigned HwLen = getVectorLength();
765bdd1243dSDimitry Andric if (HwLen == 64)
766bdd1243dSDimitry Andric return FoundHvx->Int64Id;
767bdd1243dSDimitry Andric if (HwLen == 128)
768bdd1243dSDimitry Andric return FoundHvx->Int128Id;
769bdd1243dSDimitry Andric }
770bdd1243dSDimitry Andric
771bdd1243dSDimitry Andric std::string error = "Invalid opcode (" + std::to_string(Opc) + ")";
772bdd1243dSDimitry Andric llvm_unreachable(error.c_str());
773bdd1243dSDimitry Andric return 0;
774bdd1243dSDimitry Andric }
775