10b57cec5SDimitry Andric //===----------- PPCVSXSwapRemoval.cpp - Remove VSX LE Swaps -------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass analyzes vector computations and removes unnecessary
100b57cec5SDimitry Andric // doubleword swaps (xxswapd instructions). This pass is performed
110b57cec5SDimitry Andric // only for little-endian VSX code generation.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // For this specific case, loads and stores of v4i32, v4f32, v2i64,
140b57cec5SDimitry Andric // and v2f64 vectors are inefficient. These are implemented using
150b57cec5SDimitry Andric // the lxvd2x and stxvd2x instructions, which invert the order of
160b57cec5SDimitry Andric // doublewords in a vector register. Thus code generation inserts
170b57cec5SDimitry Andric // an xxswapd after each such load, and prior to each such store.
180b57cec5SDimitry Andric //
190b57cec5SDimitry Andric // The extra xxswapd instructions reduce performance. The purpose
200b57cec5SDimitry Andric // of this pass is to reduce the number of xxswapd instructions
210b57cec5SDimitry Andric // required for correctness.
220b57cec5SDimitry Andric //
230b57cec5SDimitry Andric // The primary insight is that much code that operates on vectors
240b57cec5SDimitry Andric // does not care about the relative order of elements in a register,
250b57cec5SDimitry Andric // so long as the correct memory order is preserved. If we have a
260b57cec5SDimitry Andric // computation where all input values are provided by lxvd2x/xxswapd,
270b57cec5SDimitry Andric // all outputs are stored using xxswapd/lxvd2x, and all intermediate
280b57cec5SDimitry Andric // computations are lane-insensitive (independent of element order),
290b57cec5SDimitry Andric // then all the xxswapd instructions associated with the loads and
300b57cec5SDimitry Andric // stores may be removed without changing observable semantics.
310b57cec5SDimitry Andric //
320b57cec5SDimitry Andric // This pass uses standard equivalence class infrastructure to create
330b57cec5SDimitry Andric // maximal webs of computations fitting the above description. Each
340b57cec5SDimitry Andric // such web is then optimized by removing its unnecessary xxswapd
350b57cec5SDimitry Andric // instructions.
360b57cec5SDimitry Andric //
370b57cec5SDimitry Andric // There are some lane-sensitive operations for which we can still
380b57cec5SDimitry Andric // permit the optimization, provided we modify those operations
390b57cec5SDimitry Andric // accordingly. Such operations are identified as using "special
400b57cec5SDimitry Andric // handling" within this module.
410b57cec5SDimitry Andric //
420b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
430b57cec5SDimitry Andric
440b57cec5SDimitry Andric #include "PPC.h"
450b57cec5SDimitry Andric #include "PPCInstrBuilder.h"
460b57cec5SDimitry Andric #include "PPCInstrInfo.h"
470b57cec5SDimitry Andric #include "PPCTargetMachine.h"
480b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
490b57cec5SDimitry Andric #include "llvm/ADT/EquivalenceClasses.h"
500b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
510b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
520b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
530b57cec5SDimitry Andric #include "llvm/Config/llvm-config.h"
540b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
550b57cec5SDimitry Andric #include "llvm/Support/Format.h"
560b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
570b57cec5SDimitry Andric
580b57cec5SDimitry Andric using namespace llvm;
590b57cec5SDimitry Andric
600b57cec5SDimitry Andric #define DEBUG_TYPE "ppc-vsx-swaps"
610b57cec5SDimitry Andric
620b57cec5SDimitry Andric namespace {
630b57cec5SDimitry Andric
640b57cec5SDimitry Andric // A PPCVSXSwapEntry is created for each machine instruction that
650b57cec5SDimitry Andric // is relevant to a vector computation.
660b57cec5SDimitry Andric struct PPCVSXSwapEntry {
670b57cec5SDimitry Andric // Pointer to the instruction.
680b57cec5SDimitry Andric MachineInstr *VSEMI;
690b57cec5SDimitry Andric
700b57cec5SDimitry Andric // Unique ID (position in the swap vector).
710b57cec5SDimitry Andric int VSEId;
720b57cec5SDimitry Andric
730b57cec5SDimitry Andric // Attributes of this node.
740b57cec5SDimitry Andric unsigned int IsLoad : 1;
750b57cec5SDimitry Andric unsigned int IsStore : 1;
760b57cec5SDimitry Andric unsigned int IsSwap : 1;
770b57cec5SDimitry Andric unsigned int MentionsPhysVR : 1;
780b57cec5SDimitry Andric unsigned int IsSwappable : 1;
790b57cec5SDimitry Andric unsigned int MentionsPartialVR : 1;
800b57cec5SDimitry Andric unsigned int SpecialHandling : 3;
810b57cec5SDimitry Andric unsigned int WebRejected : 1;
820b57cec5SDimitry Andric unsigned int WillRemove : 1;
830b57cec5SDimitry Andric };
840b57cec5SDimitry Andric
850b57cec5SDimitry Andric enum SHValues {
860b57cec5SDimitry Andric SH_NONE = 0,
870b57cec5SDimitry Andric SH_EXTRACT,
880b57cec5SDimitry Andric SH_INSERT,
890b57cec5SDimitry Andric SH_NOSWAP_LD,
900b57cec5SDimitry Andric SH_NOSWAP_ST,
910b57cec5SDimitry Andric SH_SPLAT,
920b57cec5SDimitry Andric SH_XXPERMDI,
930b57cec5SDimitry Andric SH_COPYWIDEN
940b57cec5SDimitry Andric };
950b57cec5SDimitry Andric
960b57cec5SDimitry Andric struct PPCVSXSwapRemoval : public MachineFunctionPass {
970b57cec5SDimitry Andric
980b57cec5SDimitry Andric static char ID;
990b57cec5SDimitry Andric const PPCInstrInfo *TII;
1000b57cec5SDimitry Andric MachineFunction *MF;
1010b57cec5SDimitry Andric MachineRegisterInfo *MRI;
1020b57cec5SDimitry Andric
1030b57cec5SDimitry Andric // Swap entries are allocated in a vector for better performance.
1040b57cec5SDimitry Andric std::vector<PPCVSXSwapEntry> SwapVector;
1050b57cec5SDimitry Andric
1060b57cec5SDimitry Andric // A mapping is maintained between machine instructions and
1070b57cec5SDimitry Andric // their swap entries. The key is the address of the MI.
1080b57cec5SDimitry Andric DenseMap<MachineInstr*, int> SwapMap;
1090b57cec5SDimitry Andric
1100b57cec5SDimitry Andric // Equivalence classes are used to gather webs of related computation.
1110b57cec5SDimitry Andric // Swap entries are represented by their VSEId fields.
1120b57cec5SDimitry Andric EquivalenceClasses<int> *EC;
1130b57cec5SDimitry Andric
PPCVSXSwapRemoval__anon0761410a0111::PPCVSXSwapRemoval1140b57cec5SDimitry Andric PPCVSXSwapRemoval() : MachineFunctionPass(ID) {
1150b57cec5SDimitry Andric initializePPCVSXSwapRemovalPass(*PassRegistry::getPassRegistry());
1160b57cec5SDimitry Andric }
1170b57cec5SDimitry Andric
1180b57cec5SDimitry Andric private:
1190b57cec5SDimitry Andric // Initialize data structures.
1200b57cec5SDimitry Andric void initialize(MachineFunction &MFParm);
1210b57cec5SDimitry Andric
1220b57cec5SDimitry Andric // Walk the machine instructions to gather vector usage information.
1230b57cec5SDimitry Andric // Return true iff vector mentions are present.
1240b57cec5SDimitry Andric bool gatherVectorInstructions();
1250b57cec5SDimitry Andric
1260b57cec5SDimitry Andric // Add an entry to the swap vector and swap map.
1270b57cec5SDimitry Andric int addSwapEntry(MachineInstr *MI, PPCVSXSwapEntry &SwapEntry);
1280b57cec5SDimitry Andric
1290b57cec5SDimitry Andric // Hunt backwards through COPY and SUBREG_TO_REG chains for a
1300b57cec5SDimitry Andric // source register. VecIdx indicates the swap vector entry to
1310b57cec5SDimitry Andric // mark as mentioning a physical register if the search leads
1320b57cec5SDimitry Andric // to one.
1330b57cec5SDimitry Andric unsigned lookThruCopyLike(unsigned SrcReg, unsigned VecIdx);
1340b57cec5SDimitry Andric
1350b57cec5SDimitry Andric // Generate equivalence classes for related computations (webs).
1360b57cec5SDimitry Andric void formWebs();
1370b57cec5SDimitry Andric
1380b57cec5SDimitry Andric // Analyze webs and determine those that cannot be optimized.
1390b57cec5SDimitry Andric void recordUnoptimizableWebs();
1400b57cec5SDimitry Andric
1410b57cec5SDimitry Andric // Record which swap instructions can be safely removed.
1420b57cec5SDimitry Andric void markSwapsForRemoval();
1430b57cec5SDimitry Andric
1440b57cec5SDimitry Andric // Remove swaps and update other instructions requiring special
1450b57cec5SDimitry Andric // handling. Return true iff any changes are made.
1460b57cec5SDimitry Andric bool removeSwaps();
1470b57cec5SDimitry Andric
1480b57cec5SDimitry Andric // Insert a swap instruction from SrcReg to DstReg at the given
1490b57cec5SDimitry Andric // InsertPoint.
1500b57cec5SDimitry Andric void insertSwap(MachineInstr *MI, MachineBasicBlock::iterator InsertPoint,
1510b57cec5SDimitry Andric unsigned DstReg, unsigned SrcReg);
1520b57cec5SDimitry Andric
1530b57cec5SDimitry Andric // Update instructions requiring special handling.
1540b57cec5SDimitry Andric void handleSpecialSwappables(int EntryIdx);
1550b57cec5SDimitry Andric
1560b57cec5SDimitry Andric // Dump a description of the entries in the swap vector.
1570b57cec5SDimitry Andric void dumpSwapVector();
1580b57cec5SDimitry Andric
1590b57cec5SDimitry Andric // Return true iff the given register is in the given class.
isRegInClass__anon0761410a0111::PPCVSXSwapRemoval1600b57cec5SDimitry Andric bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) {
1618bcb0991SDimitry Andric if (Register::isVirtualRegister(Reg))
1620b57cec5SDimitry Andric return RC->hasSubClassEq(MRI->getRegClass(Reg));
1630b57cec5SDimitry Andric return RC->contains(Reg);
1640b57cec5SDimitry Andric }
1650b57cec5SDimitry Andric
1660b57cec5SDimitry Andric // Return true iff the given register is a full vector register.
isVecReg__anon0761410a0111::PPCVSXSwapRemoval1670b57cec5SDimitry Andric bool isVecReg(unsigned Reg) {
1680b57cec5SDimitry Andric return (isRegInClass(Reg, &PPC::VSRCRegClass) ||
1690b57cec5SDimitry Andric isRegInClass(Reg, &PPC::VRRCRegClass));
1700b57cec5SDimitry Andric }
1710b57cec5SDimitry Andric
1720b57cec5SDimitry Andric // Return true iff the given register is a partial vector register.
isScalarVecReg__anon0761410a0111::PPCVSXSwapRemoval1730b57cec5SDimitry Andric bool isScalarVecReg(unsigned Reg) {
1740b57cec5SDimitry Andric return (isRegInClass(Reg, &PPC::VSFRCRegClass) ||
1750b57cec5SDimitry Andric isRegInClass(Reg, &PPC::VSSRCRegClass));
1760b57cec5SDimitry Andric }
1770b57cec5SDimitry Andric
1780b57cec5SDimitry Andric // Return true iff the given register mentions all or part of a
1790b57cec5SDimitry Andric // vector register. Also sets Partial to true if the mention
1800b57cec5SDimitry Andric // is for just the floating-point register overlap of the register.
isAnyVecReg__anon0761410a0111::PPCVSXSwapRemoval1810b57cec5SDimitry Andric bool isAnyVecReg(unsigned Reg, bool &Partial) {
1820b57cec5SDimitry Andric if (isScalarVecReg(Reg))
1830b57cec5SDimitry Andric Partial = true;
1840b57cec5SDimitry Andric return isScalarVecReg(Reg) || isVecReg(Reg);
1850b57cec5SDimitry Andric }
1860b57cec5SDimitry Andric
1870b57cec5SDimitry Andric public:
1880b57cec5SDimitry Andric // Main entry point for this pass.
runOnMachineFunction__anon0761410a0111::PPCVSXSwapRemoval1890b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override {
1900b57cec5SDimitry Andric if (skipFunction(MF.getFunction()))
1910b57cec5SDimitry Andric return false;
1920b57cec5SDimitry Andric
1930b57cec5SDimitry Andric // If we don't have VSX on the subtarget, don't do anything.
1940b57cec5SDimitry Andric // Also, on Power 9 the load and store ops preserve element order and so
1950b57cec5SDimitry Andric // the swaps are not required.
1960b57cec5SDimitry Andric const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
1970b57cec5SDimitry Andric if (!STI.hasVSX() || !STI.needsSwapsForVSXMemOps())
1980b57cec5SDimitry Andric return false;
1990b57cec5SDimitry Andric
2000b57cec5SDimitry Andric bool Changed = false;
2010b57cec5SDimitry Andric initialize(MF);
2020b57cec5SDimitry Andric
2030b57cec5SDimitry Andric if (gatherVectorInstructions()) {
2040b57cec5SDimitry Andric formWebs();
2050b57cec5SDimitry Andric recordUnoptimizableWebs();
2060b57cec5SDimitry Andric markSwapsForRemoval();
2070b57cec5SDimitry Andric Changed = removeSwaps();
2080b57cec5SDimitry Andric }
2090b57cec5SDimitry Andric
2100b57cec5SDimitry Andric // FIXME: See the allocation of EC in initialize().
2110b57cec5SDimitry Andric delete EC;
2120b57cec5SDimitry Andric return Changed;
2130b57cec5SDimitry Andric }
2140b57cec5SDimitry Andric };
2150b57cec5SDimitry Andric
2160b57cec5SDimitry Andric // Initialize data structures for this pass. In particular, clear the
2170b57cec5SDimitry Andric // swap vector and allocate the equivalence class mapping before
2180b57cec5SDimitry Andric // processing each function.
initialize(MachineFunction & MFParm)2190b57cec5SDimitry Andric void PPCVSXSwapRemoval::initialize(MachineFunction &MFParm) {
2200b57cec5SDimitry Andric MF = &MFParm;
2210b57cec5SDimitry Andric MRI = &MF->getRegInfo();
2220b57cec5SDimitry Andric TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2230b57cec5SDimitry Andric
2240b57cec5SDimitry Andric // An initial vector size of 256 appears to work well in practice.
2250b57cec5SDimitry Andric // Small/medium functions with vector content tend not to incur a
2260b57cec5SDimitry Andric // reallocation at this size. Three of the vector tests in
2270b57cec5SDimitry Andric // projects/test-suite reallocate, which seems like a reasonable rate.
2280b57cec5SDimitry Andric const int InitialVectorSize(256);
2290b57cec5SDimitry Andric SwapVector.clear();
2300b57cec5SDimitry Andric SwapVector.reserve(InitialVectorSize);
2310b57cec5SDimitry Andric
2320b57cec5SDimitry Andric // FIXME: Currently we allocate EC each time because we don't have
2330b57cec5SDimitry Andric // access to the set representation on which to call clear(). Should
2340b57cec5SDimitry Andric // consider adding a clear() method to the EquivalenceClasses class.
2350b57cec5SDimitry Andric EC = new EquivalenceClasses<int>;
2360b57cec5SDimitry Andric }
2370b57cec5SDimitry Andric
2380b57cec5SDimitry Andric // Create an entry in the swap vector for each instruction that mentions
2390b57cec5SDimitry Andric // a full vector register, recording various characteristics of the
2400b57cec5SDimitry Andric // instructions there.
gatherVectorInstructions()2410b57cec5SDimitry Andric bool PPCVSXSwapRemoval::gatherVectorInstructions() {
2420b57cec5SDimitry Andric bool RelevantFunction = false;
2430b57cec5SDimitry Andric
2440b57cec5SDimitry Andric for (MachineBasicBlock &MBB : *MF) {
2450b57cec5SDimitry Andric for (MachineInstr &MI : MBB) {
2460b57cec5SDimitry Andric
2470b57cec5SDimitry Andric if (MI.isDebugInstr())
2480b57cec5SDimitry Andric continue;
2490b57cec5SDimitry Andric
2500b57cec5SDimitry Andric bool RelevantInstr = false;
2510b57cec5SDimitry Andric bool Partial = false;
2520b57cec5SDimitry Andric
2530b57cec5SDimitry Andric for (const MachineOperand &MO : MI.operands()) {
2540b57cec5SDimitry Andric if (!MO.isReg())
2550b57cec5SDimitry Andric continue;
2568bcb0991SDimitry Andric Register Reg = MO.getReg();
257e8d8bef9SDimitry Andric // All operands need to be checked because there are instructions that
258e8d8bef9SDimitry Andric // operate on a partial register and produce a full register (such as
259e8d8bef9SDimitry Andric // XXPERMDIs).
260e8d8bef9SDimitry Andric if (isAnyVecReg(Reg, Partial))
2610b57cec5SDimitry Andric RelevantInstr = true;
2620b57cec5SDimitry Andric }
2630b57cec5SDimitry Andric
2640b57cec5SDimitry Andric if (!RelevantInstr)
2650b57cec5SDimitry Andric continue;
2660b57cec5SDimitry Andric
2670b57cec5SDimitry Andric RelevantFunction = true;
2680b57cec5SDimitry Andric
2690b57cec5SDimitry Andric // Create a SwapEntry initialized to zeros, then fill in the
2700b57cec5SDimitry Andric // instruction and ID fields before pushing it to the back
2710b57cec5SDimitry Andric // of the swap vector.
2720b57cec5SDimitry Andric PPCVSXSwapEntry SwapEntry{};
2730b57cec5SDimitry Andric int VecIdx = addSwapEntry(&MI, SwapEntry);
2740b57cec5SDimitry Andric
2750b57cec5SDimitry Andric switch(MI.getOpcode()) {
2760b57cec5SDimitry Andric default:
2770b57cec5SDimitry Andric // Unless noted otherwise, an instruction is considered
2780b57cec5SDimitry Andric // safe for the optimization. There are a large number of
2790b57cec5SDimitry Andric // such true-SIMD instructions (all vector math, logical,
2800b57cec5SDimitry Andric // select, compare, etc.). However, if the instruction
2810b57cec5SDimitry Andric // mentions a partial vector register and does not have
2820b57cec5SDimitry Andric // special handling defined, it is not swappable.
2830b57cec5SDimitry Andric if (Partial)
2840b57cec5SDimitry Andric SwapVector[VecIdx].MentionsPartialVR = 1;
2850b57cec5SDimitry Andric else
2860b57cec5SDimitry Andric SwapVector[VecIdx].IsSwappable = 1;
2870b57cec5SDimitry Andric break;
2880b57cec5SDimitry Andric case PPC::XXPERMDI: {
2890b57cec5SDimitry Andric // This is a swap if it is of the form XXPERMDI t, s, s, 2.
2900b57cec5SDimitry Andric // Unfortunately, MachineCSE ignores COPY and SUBREG_TO_REG, so we
2910b57cec5SDimitry Andric // can also see XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), 2,
2920b57cec5SDimitry Andric // for example. We have to look through chains of COPY and
2930b57cec5SDimitry Andric // SUBREG_TO_REG to find the real source value for comparison.
2940b57cec5SDimitry Andric // If the real source value is a physical register, then mark the
2950b57cec5SDimitry Andric // XXPERMDI as mentioning a physical register.
2960b57cec5SDimitry Andric int immed = MI.getOperand(3).getImm();
2970b57cec5SDimitry Andric if (immed == 2) {
2980b57cec5SDimitry Andric unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
2990b57cec5SDimitry Andric VecIdx);
3000b57cec5SDimitry Andric unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
3010b57cec5SDimitry Andric VecIdx);
3020b57cec5SDimitry Andric if (trueReg1 == trueReg2)
3030b57cec5SDimitry Andric SwapVector[VecIdx].IsSwap = 1;
3040b57cec5SDimitry Andric else {
3050b57cec5SDimitry Andric // We can still handle these if the two registers are not
3060b57cec5SDimitry Andric // identical, by adjusting the form of the XXPERMDI.
3070b57cec5SDimitry Andric SwapVector[VecIdx].IsSwappable = 1;
3080b57cec5SDimitry Andric SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
3090b57cec5SDimitry Andric }
3100b57cec5SDimitry Andric // This is a doubleword splat if it is of the form
3110b57cec5SDimitry Andric // XXPERMDI t, s, s, 0 or XXPERMDI t, s, s, 3. As above we
3120b57cec5SDimitry Andric // must look through chains of copy-likes to find the source
3130b57cec5SDimitry Andric // register. We turn off the marking for mention of a physical
3140b57cec5SDimitry Andric // register, because splatting it is safe; the optimization
3150b57cec5SDimitry Andric // will not swap the value in the physical register. Whether
3160b57cec5SDimitry Andric // or not the two input registers are identical, we can handle
3170b57cec5SDimitry Andric // these by adjusting the form of the XXPERMDI.
3180b57cec5SDimitry Andric } else if (immed == 0 || immed == 3) {
3190b57cec5SDimitry Andric
3200b57cec5SDimitry Andric SwapVector[VecIdx].IsSwappable = 1;
3210b57cec5SDimitry Andric SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
3220b57cec5SDimitry Andric
3230b57cec5SDimitry Andric unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
3240b57cec5SDimitry Andric VecIdx);
3250b57cec5SDimitry Andric unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
3260b57cec5SDimitry Andric VecIdx);
3270b57cec5SDimitry Andric if (trueReg1 == trueReg2)
3280b57cec5SDimitry Andric SwapVector[VecIdx].MentionsPhysVR = 0;
3290b57cec5SDimitry Andric
3300b57cec5SDimitry Andric } else {
3310b57cec5SDimitry Andric // We can still handle these by adjusting the form of the XXPERMDI.
3320b57cec5SDimitry Andric SwapVector[VecIdx].IsSwappable = 1;
3330b57cec5SDimitry Andric SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
3340b57cec5SDimitry Andric }
3350b57cec5SDimitry Andric break;
3360b57cec5SDimitry Andric }
3370b57cec5SDimitry Andric case PPC::LVX:
3380b57cec5SDimitry Andric // Non-permuting loads are currently unsafe. We can use special
3390b57cec5SDimitry Andric // handling for this in the future. By not marking these as
3400b57cec5SDimitry Andric // IsSwap, we ensure computations containing them will be rejected
3410b57cec5SDimitry Andric // for now.
3420b57cec5SDimitry Andric SwapVector[VecIdx].IsLoad = 1;
3430b57cec5SDimitry Andric break;
3440b57cec5SDimitry Andric case PPC::LXVD2X:
3450b57cec5SDimitry Andric case PPC::LXVW4X:
3460b57cec5SDimitry Andric // Permuting loads are marked as both load and swap, and are
3470b57cec5SDimitry Andric // safe for optimization.
3480b57cec5SDimitry Andric SwapVector[VecIdx].IsLoad = 1;
3490b57cec5SDimitry Andric SwapVector[VecIdx].IsSwap = 1;
3500b57cec5SDimitry Andric break;
3510b57cec5SDimitry Andric case PPC::LXSDX:
3520b57cec5SDimitry Andric case PPC::LXSSPX:
3530b57cec5SDimitry Andric case PPC::XFLOADf64:
3540b57cec5SDimitry Andric case PPC::XFLOADf32:
3550b57cec5SDimitry Andric // A load of a floating-point value into the high-order half of
3560b57cec5SDimitry Andric // a vector register is safe, provided that we introduce a swap
3570b57cec5SDimitry Andric // following the load, which will be done by the SUBREG_TO_REG
3580b57cec5SDimitry Andric // support. So just mark these as safe.
3590b57cec5SDimitry Andric SwapVector[VecIdx].IsLoad = 1;
3600b57cec5SDimitry Andric SwapVector[VecIdx].IsSwappable = 1;
3610b57cec5SDimitry Andric break;
3620b57cec5SDimitry Andric case PPC::STVX:
3630b57cec5SDimitry Andric // Non-permuting stores are currently unsafe. We can use special
3640b57cec5SDimitry Andric // handling for this in the future. By not marking these as
3650b57cec5SDimitry Andric // IsSwap, we ensure computations containing them will be rejected
3660b57cec5SDimitry Andric // for now.
3670b57cec5SDimitry Andric SwapVector[VecIdx].IsStore = 1;
3680b57cec5SDimitry Andric break;
3690b57cec5SDimitry Andric case PPC::STXVD2X:
3700b57cec5SDimitry Andric case PPC::STXVW4X:
3710b57cec5SDimitry Andric // Permuting stores are marked as both store and swap, and are
3720b57cec5SDimitry Andric // safe for optimization.
3730b57cec5SDimitry Andric SwapVector[VecIdx].IsStore = 1;
3740b57cec5SDimitry Andric SwapVector[VecIdx].IsSwap = 1;
3750b57cec5SDimitry Andric break;
3760b57cec5SDimitry Andric case PPC::COPY:
3770b57cec5SDimitry Andric // These are fine provided they are moving between full vector
3780b57cec5SDimitry Andric // register classes.
3790b57cec5SDimitry Andric if (isVecReg(MI.getOperand(0).getReg()) &&
3800b57cec5SDimitry Andric isVecReg(MI.getOperand(1).getReg()))
3810b57cec5SDimitry Andric SwapVector[VecIdx].IsSwappable = 1;
3820b57cec5SDimitry Andric // If we have a copy from one scalar floating-point register
3830b57cec5SDimitry Andric // to another, we can accept this even if it is a physical
3840b57cec5SDimitry Andric // register. The only way this gets involved is if it feeds
3850b57cec5SDimitry Andric // a SUBREG_TO_REG, which is handled by introducing a swap.
3860b57cec5SDimitry Andric else if (isScalarVecReg(MI.getOperand(0).getReg()) &&
3870b57cec5SDimitry Andric isScalarVecReg(MI.getOperand(1).getReg()))
3880b57cec5SDimitry Andric SwapVector[VecIdx].IsSwappable = 1;
3890b57cec5SDimitry Andric break;
3900b57cec5SDimitry Andric case PPC::SUBREG_TO_REG: {
3910b57cec5SDimitry Andric // These are fine provided they are moving between full vector
3920b57cec5SDimitry Andric // register classes. If they are moving from a scalar
3930b57cec5SDimitry Andric // floating-point class to a vector class, we can handle those
3940b57cec5SDimitry Andric // as well, provided we introduce a swap. It is generally the
3950b57cec5SDimitry Andric // case that we will introduce fewer swaps than we remove, but
3960b57cec5SDimitry Andric // (FIXME) a cost model could be used. However, introduced
3970b57cec5SDimitry Andric // swaps could potentially be CSEd, so this is not trivial.
3980b57cec5SDimitry Andric if (isVecReg(MI.getOperand(0).getReg()) &&
3990b57cec5SDimitry Andric isVecReg(MI.getOperand(2).getReg()))
4000b57cec5SDimitry Andric SwapVector[VecIdx].IsSwappable = 1;
4010b57cec5SDimitry Andric else if (isVecReg(MI.getOperand(0).getReg()) &&
4020b57cec5SDimitry Andric isScalarVecReg(MI.getOperand(2).getReg())) {
4030b57cec5SDimitry Andric SwapVector[VecIdx].IsSwappable = 1;
4040b57cec5SDimitry Andric SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN;
4050b57cec5SDimitry Andric }
4060b57cec5SDimitry Andric break;
4070b57cec5SDimitry Andric }
4080b57cec5SDimitry Andric case PPC::VSPLTB:
4090b57cec5SDimitry Andric case PPC::VSPLTH:
4100b57cec5SDimitry Andric case PPC::VSPLTW:
4110b57cec5SDimitry Andric case PPC::XXSPLTW:
4120b57cec5SDimitry Andric // Splats are lane-sensitive, but we can use special handling
4130b57cec5SDimitry Andric // to adjust the source lane for the splat.
4140b57cec5SDimitry Andric SwapVector[VecIdx].IsSwappable = 1;
4150b57cec5SDimitry Andric SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT;
4160b57cec5SDimitry Andric break;
4170b57cec5SDimitry Andric // The presence of the following lane-sensitive operations in a
4180b57cec5SDimitry Andric // web will kill the optimization, at least for now. For these
4190b57cec5SDimitry Andric // we do nothing, causing the optimization to fail.
4200b57cec5SDimitry Andric // FIXME: Some of these could be permitted with special handling,
4210b57cec5SDimitry Andric // and will be phased in as time permits.
4220b57cec5SDimitry Andric // FIXME: There is no simple and maintainable way to express a set
4230b57cec5SDimitry Andric // of opcodes having a common attribute in TableGen. Should this
4240b57cec5SDimitry Andric // change, this is a prime candidate to use such a mechanism.
4250b57cec5SDimitry Andric case PPC::INLINEASM:
4260b57cec5SDimitry Andric case PPC::INLINEASM_BR:
4270b57cec5SDimitry Andric case PPC::EXTRACT_SUBREG:
4280b57cec5SDimitry Andric case PPC::INSERT_SUBREG:
4290b57cec5SDimitry Andric case PPC::COPY_TO_REGCLASS:
4300b57cec5SDimitry Andric case PPC::LVEBX:
4310b57cec5SDimitry Andric case PPC::LVEHX:
4320b57cec5SDimitry Andric case PPC::LVEWX:
4330b57cec5SDimitry Andric case PPC::LVSL:
4340b57cec5SDimitry Andric case PPC::LVSR:
4350b57cec5SDimitry Andric case PPC::LVXL:
4360b57cec5SDimitry Andric case PPC::STVEBX:
4370b57cec5SDimitry Andric case PPC::STVEHX:
4380b57cec5SDimitry Andric case PPC::STVEWX:
4390b57cec5SDimitry Andric case PPC::STVXL:
4400b57cec5SDimitry Andric // We can handle STXSDX and STXSSPX similarly to LXSDX and LXSSPX,
4410b57cec5SDimitry Andric // by adding special handling for narrowing copies as well as
4420b57cec5SDimitry Andric // widening ones. However, I've experimented with this, and in
4430b57cec5SDimitry Andric // practice we currently do not appear to use STXSDX fed by
4440b57cec5SDimitry Andric // a narrowing copy from a full vector register. Since I can't
4450b57cec5SDimitry Andric // generate any useful test cases, I've left this alone for now.
4460b57cec5SDimitry Andric case PPC::STXSDX:
4470b57cec5SDimitry Andric case PPC::STXSSPX:
4480b57cec5SDimitry Andric case PPC::VCIPHER:
4490b57cec5SDimitry Andric case PPC::VCIPHERLAST:
4500b57cec5SDimitry Andric case PPC::VMRGHB:
4510b57cec5SDimitry Andric case PPC::VMRGHH:
4520b57cec5SDimitry Andric case PPC::VMRGHW:
4530b57cec5SDimitry Andric case PPC::VMRGLB:
4540b57cec5SDimitry Andric case PPC::VMRGLH:
4550b57cec5SDimitry Andric case PPC::VMRGLW:
4560b57cec5SDimitry Andric case PPC::VMULESB:
4570b57cec5SDimitry Andric case PPC::VMULESH:
4580b57cec5SDimitry Andric case PPC::VMULESW:
4590b57cec5SDimitry Andric case PPC::VMULEUB:
4600b57cec5SDimitry Andric case PPC::VMULEUH:
4610b57cec5SDimitry Andric case PPC::VMULEUW:
4620b57cec5SDimitry Andric case PPC::VMULOSB:
4630b57cec5SDimitry Andric case PPC::VMULOSH:
4640b57cec5SDimitry Andric case PPC::VMULOSW:
4650b57cec5SDimitry Andric case PPC::VMULOUB:
4660b57cec5SDimitry Andric case PPC::VMULOUH:
4670b57cec5SDimitry Andric case PPC::VMULOUW:
4680b57cec5SDimitry Andric case PPC::VNCIPHER:
4690b57cec5SDimitry Andric case PPC::VNCIPHERLAST:
4700b57cec5SDimitry Andric case PPC::VPERM:
4710b57cec5SDimitry Andric case PPC::VPERMXOR:
4720b57cec5SDimitry Andric case PPC::VPKPX:
4730b57cec5SDimitry Andric case PPC::VPKSHSS:
4740b57cec5SDimitry Andric case PPC::VPKSHUS:
4750b57cec5SDimitry Andric case PPC::VPKSDSS:
4760b57cec5SDimitry Andric case PPC::VPKSDUS:
4770b57cec5SDimitry Andric case PPC::VPKSWSS:
4780b57cec5SDimitry Andric case PPC::VPKSWUS:
4790b57cec5SDimitry Andric case PPC::VPKUDUM:
4800b57cec5SDimitry Andric case PPC::VPKUDUS:
4810b57cec5SDimitry Andric case PPC::VPKUHUM:
4820b57cec5SDimitry Andric case PPC::VPKUHUS:
4830b57cec5SDimitry Andric case PPC::VPKUWUM:
4840b57cec5SDimitry Andric case PPC::VPKUWUS:
4850b57cec5SDimitry Andric case PPC::VPMSUMB:
4860b57cec5SDimitry Andric case PPC::VPMSUMD:
4870b57cec5SDimitry Andric case PPC::VPMSUMH:
4880b57cec5SDimitry Andric case PPC::VPMSUMW:
4890b57cec5SDimitry Andric case PPC::VRLB:
4900b57cec5SDimitry Andric case PPC::VRLD:
4910b57cec5SDimitry Andric case PPC::VRLH:
4920b57cec5SDimitry Andric case PPC::VRLW:
4930b57cec5SDimitry Andric case PPC::VSBOX:
4940b57cec5SDimitry Andric case PPC::VSHASIGMAD:
4950b57cec5SDimitry Andric case PPC::VSHASIGMAW:
4960b57cec5SDimitry Andric case PPC::VSL:
4970b57cec5SDimitry Andric case PPC::VSLDOI:
4980b57cec5SDimitry Andric case PPC::VSLO:
4990b57cec5SDimitry Andric case PPC::VSR:
5000b57cec5SDimitry Andric case PPC::VSRO:
5010b57cec5SDimitry Andric case PPC::VSUM2SWS:
5020b57cec5SDimitry Andric case PPC::VSUM4SBS:
5030b57cec5SDimitry Andric case PPC::VSUM4SHS:
5040b57cec5SDimitry Andric case PPC::VSUM4UBS:
5050b57cec5SDimitry Andric case PPC::VSUMSWS:
5060b57cec5SDimitry Andric case PPC::VUPKHPX:
5070b57cec5SDimitry Andric case PPC::VUPKHSB:
5080b57cec5SDimitry Andric case PPC::VUPKHSH:
5090b57cec5SDimitry Andric case PPC::VUPKHSW:
5100b57cec5SDimitry Andric case PPC::VUPKLPX:
5110b57cec5SDimitry Andric case PPC::VUPKLSB:
5120b57cec5SDimitry Andric case PPC::VUPKLSH:
5130b57cec5SDimitry Andric case PPC::VUPKLSW:
5140b57cec5SDimitry Andric case PPC::XXMRGHW:
5150b57cec5SDimitry Andric case PPC::XXMRGLW:
5160b57cec5SDimitry Andric // XXSLDWI could be replaced by a general permute with one of three
5170b57cec5SDimitry Andric // permute control vectors (for shift values 1, 2, 3). However,
5180b57cec5SDimitry Andric // VPERM has a more restrictive register class.
5190b57cec5SDimitry Andric case PPC::XXSLDWI:
5200b57cec5SDimitry Andric case PPC::XSCVDPSPN:
5210b57cec5SDimitry Andric case PPC::XSCVSPDPN:
52281ad6265SDimitry Andric case PPC::MTVSCR:
52381ad6265SDimitry Andric case PPC::MFVSCR:
5240b57cec5SDimitry Andric break;
5250b57cec5SDimitry Andric }
5260b57cec5SDimitry Andric }
5270b57cec5SDimitry Andric }
5280b57cec5SDimitry Andric
5290b57cec5SDimitry Andric if (RelevantFunction) {
5300b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Swap vector when first built\n\n");
5310b57cec5SDimitry Andric LLVM_DEBUG(dumpSwapVector());
5320b57cec5SDimitry Andric }
5330b57cec5SDimitry Andric
5340b57cec5SDimitry Andric return RelevantFunction;
5350b57cec5SDimitry Andric }
5360b57cec5SDimitry Andric
5370b57cec5SDimitry Andric // Add an entry to the swap vector and swap map, and make a
5380b57cec5SDimitry Andric // singleton equivalence class for the entry.
addSwapEntry(MachineInstr * MI,PPCVSXSwapEntry & SwapEntry)5390b57cec5SDimitry Andric int PPCVSXSwapRemoval::addSwapEntry(MachineInstr *MI,
5400b57cec5SDimitry Andric PPCVSXSwapEntry& SwapEntry) {
5410b57cec5SDimitry Andric SwapEntry.VSEMI = MI;
5420b57cec5SDimitry Andric SwapEntry.VSEId = SwapVector.size();
5430b57cec5SDimitry Andric SwapVector.push_back(SwapEntry);
5440b57cec5SDimitry Andric EC->insert(SwapEntry.VSEId);
5450b57cec5SDimitry Andric SwapMap[MI] = SwapEntry.VSEId;
5460b57cec5SDimitry Andric return SwapEntry.VSEId;
5470b57cec5SDimitry Andric }
5480b57cec5SDimitry Andric
5490b57cec5SDimitry Andric // This is used to find the "true" source register for an
5500b57cec5SDimitry Andric // XXPERMDI instruction, since MachineCSE does not handle the
5510b57cec5SDimitry Andric // "copy-like" operations (Copy and SubregToReg). Returns
5520b57cec5SDimitry Andric // the original SrcReg unless it is the target of a copy-like
5530b57cec5SDimitry Andric // operation, in which case we chain backwards through all
5540b57cec5SDimitry Andric // such operations to the ultimate source register. If a
5550b57cec5SDimitry Andric // physical register is encountered, we stop the search and
5560b57cec5SDimitry Andric // flag the swap entry indicated by VecIdx (the original
5570b57cec5SDimitry Andric // XXPERMDI) as mentioning a physical register.
lookThruCopyLike(unsigned SrcReg,unsigned VecIdx)5580b57cec5SDimitry Andric unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
5590b57cec5SDimitry Andric unsigned VecIdx) {
5600b57cec5SDimitry Andric MachineInstr *MI = MRI->getVRegDef(SrcReg);
5610b57cec5SDimitry Andric if (!MI->isCopyLike())
5620b57cec5SDimitry Andric return SrcReg;
5630b57cec5SDimitry Andric
5640b57cec5SDimitry Andric unsigned CopySrcReg;
5650b57cec5SDimitry Andric if (MI->isCopy())
5660b57cec5SDimitry Andric CopySrcReg = MI->getOperand(1).getReg();
5670b57cec5SDimitry Andric else {
5680b57cec5SDimitry Andric assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
5690b57cec5SDimitry Andric CopySrcReg = MI->getOperand(2).getReg();
5700b57cec5SDimitry Andric }
5710b57cec5SDimitry Andric
5728bcb0991SDimitry Andric if (!Register::isVirtualRegister(CopySrcReg)) {
5730b57cec5SDimitry Andric if (!isScalarVecReg(CopySrcReg))
5740b57cec5SDimitry Andric SwapVector[VecIdx].MentionsPhysVR = 1;
5750b57cec5SDimitry Andric return CopySrcReg;
5760b57cec5SDimitry Andric }
5770b57cec5SDimitry Andric
5780b57cec5SDimitry Andric return lookThruCopyLike(CopySrcReg, VecIdx);
5790b57cec5SDimitry Andric }
5800b57cec5SDimitry Andric
5810b57cec5SDimitry Andric // Generate equivalence classes for related computations (webs) by
5820b57cec5SDimitry Andric // def-use relationships of virtual registers. Mention of a physical
5830b57cec5SDimitry Andric // register terminates the generation of equivalence classes as this
5840b57cec5SDimitry Andric // indicates a use of a parameter, definition of a return value, use
5850b57cec5SDimitry Andric // of a value returned from a call, or definition of a parameter to a
5860b57cec5SDimitry Andric // call. Computations with physical register mentions are flagged
5870b57cec5SDimitry Andric // as such so their containing webs will not be optimized.
formWebs()5880b57cec5SDimitry Andric void PPCVSXSwapRemoval::formWebs() {
5890b57cec5SDimitry Andric
5900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n*** Forming webs for swap removal ***\n\n");
5910b57cec5SDimitry Andric
5920b57cec5SDimitry Andric for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
5930b57cec5SDimitry Andric
5940b57cec5SDimitry Andric MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
5950b57cec5SDimitry Andric
5960b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n" << SwapVector[EntryIdx].VSEId << " ");
5970b57cec5SDimitry Andric LLVM_DEBUG(MI->dump());
5980b57cec5SDimitry Andric
5990b57cec5SDimitry Andric // It's sufficient to walk vector uses and join them to their unique
6000b57cec5SDimitry Andric // definitions. In addition, check full vector register operands
6010b57cec5SDimitry Andric // for physical regs. We exclude partial-vector register operands
6020b57cec5SDimitry Andric // because we can handle them if copied to a full vector.
6030b57cec5SDimitry Andric for (const MachineOperand &MO : MI->operands()) {
6040b57cec5SDimitry Andric if (!MO.isReg())
6050b57cec5SDimitry Andric continue;
6060b57cec5SDimitry Andric
6078bcb0991SDimitry Andric Register Reg = MO.getReg();
6080b57cec5SDimitry Andric if (!isVecReg(Reg) && !isScalarVecReg(Reg))
6090b57cec5SDimitry Andric continue;
6100b57cec5SDimitry Andric
611bdd1243dSDimitry Andric if (!Reg.isVirtual()) {
6120b57cec5SDimitry Andric if (!(MI->isCopy() && isScalarVecReg(Reg)))
6130b57cec5SDimitry Andric SwapVector[EntryIdx].MentionsPhysVR = 1;
6140b57cec5SDimitry Andric continue;
6150b57cec5SDimitry Andric }
6160b57cec5SDimitry Andric
6170b57cec5SDimitry Andric if (!MO.isUse())
6180b57cec5SDimitry Andric continue;
6190b57cec5SDimitry Andric
6200b57cec5SDimitry Andric MachineInstr* DefMI = MRI->getVRegDef(Reg);
621*06c3fb27SDimitry Andric assert(SwapMap.contains(DefMI) &&
6220b57cec5SDimitry Andric "Inconsistency: def of vector reg not found in swap map!");
6230b57cec5SDimitry Andric int DefIdx = SwapMap[DefMI];
6240b57cec5SDimitry Andric (void)EC->unionSets(SwapVector[DefIdx].VSEId,
6250b57cec5SDimitry Andric SwapVector[EntryIdx].VSEId);
6260b57cec5SDimitry Andric
6270b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << format("Unioning %d with %d\n",
6280b57cec5SDimitry Andric SwapVector[DefIdx].VSEId,
6290b57cec5SDimitry Andric SwapVector[EntryIdx].VSEId));
6300b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Def: ");
6310b57cec5SDimitry Andric LLVM_DEBUG(DefMI->dump());
6320b57cec5SDimitry Andric }
6330b57cec5SDimitry Andric }
6340b57cec5SDimitry Andric }
6350b57cec5SDimitry Andric
6360b57cec5SDimitry Andric // Walk the swap vector entries looking for conditions that prevent their
6370b57cec5SDimitry Andric // containing computations from being optimized. When such conditions are
6380b57cec5SDimitry Andric // found, mark the representative of the computation's equivalence class
6390b57cec5SDimitry Andric // as rejected.
recordUnoptimizableWebs()6400b57cec5SDimitry Andric void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
6410b57cec5SDimitry Andric
6420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n*** Rejecting webs for swap removal ***\n\n");
6430b57cec5SDimitry Andric
6440b57cec5SDimitry Andric for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
6450b57cec5SDimitry Andric int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
6460b57cec5SDimitry Andric
6470b57cec5SDimitry Andric // If representative is already rejected, don't waste further time.
6480b57cec5SDimitry Andric if (SwapVector[Repr].WebRejected)
6490b57cec5SDimitry Andric continue;
6500b57cec5SDimitry Andric
6510b57cec5SDimitry Andric // Reject webs containing mentions of physical or partial registers, or
6520b57cec5SDimitry Andric // containing operations that we don't know how to handle in a lane-
6530b57cec5SDimitry Andric // permuted region.
6540b57cec5SDimitry Andric if (SwapVector[EntryIdx].MentionsPhysVR ||
6550b57cec5SDimitry Andric SwapVector[EntryIdx].MentionsPartialVR ||
6560b57cec5SDimitry Andric !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
6570b57cec5SDimitry Andric
6580b57cec5SDimitry Andric SwapVector[Repr].WebRejected = 1;
6590b57cec5SDimitry Andric
6600b57cec5SDimitry Andric LLVM_DEBUG(
6610b57cec5SDimitry Andric dbgs() << format("Web %d rejected for physreg, partial reg, or not "
6620b57cec5SDimitry Andric "swap[pable]\n",
6630b57cec5SDimitry Andric Repr));
6640b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " in " << EntryIdx << ": ");
6650b57cec5SDimitry Andric LLVM_DEBUG(SwapVector[EntryIdx].VSEMI->dump());
6660b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
6670b57cec5SDimitry Andric }
6680b57cec5SDimitry Andric
6690b57cec5SDimitry Andric // Reject webs than contain swapping loads that feed something other
6700b57cec5SDimitry Andric // than a swap instruction.
6710b57cec5SDimitry Andric else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
6720b57cec5SDimitry Andric MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
6738bcb0991SDimitry Andric Register DefReg = MI->getOperand(0).getReg();
6740b57cec5SDimitry Andric
6750b57cec5SDimitry Andric // We skip debug instructions in the analysis. (Note that debug
6760b57cec5SDimitry Andric // location information is still maintained by this optimization
6770b57cec5SDimitry Andric // because it remains on the LXVD2X and STXVD2X instructions after
6780b57cec5SDimitry Andric // the XXPERMDIs are removed.)
6790b57cec5SDimitry Andric for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) {
6800b57cec5SDimitry Andric int UseIdx = SwapMap[&UseMI];
6810b57cec5SDimitry Andric
6820b57cec5SDimitry Andric if (!SwapVector[UseIdx].IsSwap || SwapVector[UseIdx].IsLoad ||
6830b57cec5SDimitry Andric SwapVector[UseIdx].IsStore) {
6840b57cec5SDimitry Andric
6850b57cec5SDimitry Andric SwapVector[Repr].WebRejected = 1;
6860b57cec5SDimitry Andric
6870b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << format(
6880b57cec5SDimitry Andric "Web %d rejected for load not feeding swap\n", Repr));
6890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " def " << EntryIdx << ": ");
6900b57cec5SDimitry Andric LLVM_DEBUG(MI->dump());
6910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " use " << UseIdx << ": ");
6920b57cec5SDimitry Andric LLVM_DEBUG(UseMI.dump());
6930b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
6940b57cec5SDimitry Andric }
695e8d8bef9SDimitry Andric
696e8d8bef9SDimitry Andric // It is possible that the load feeds a swap and that swap feeds a
697e8d8bef9SDimitry Andric // store. In such a case, the code is actually trying to store a swapped
698e8d8bef9SDimitry Andric // vector. We must reject such webs.
699e8d8bef9SDimitry Andric if (SwapVector[UseIdx].IsSwap && !SwapVector[UseIdx].IsLoad &&
700e8d8bef9SDimitry Andric !SwapVector[UseIdx].IsStore) {
701e8d8bef9SDimitry Andric Register SwapDefReg = UseMI.getOperand(0).getReg();
702e8d8bef9SDimitry Andric for (MachineInstr &UseOfUseMI :
703e8d8bef9SDimitry Andric MRI->use_nodbg_instructions(SwapDefReg)) {
704e8d8bef9SDimitry Andric int UseOfUseIdx = SwapMap[&UseOfUseMI];
705e8d8bef9SDimitry Andric if (SwapVector[UseOfUseIdx].IsStore) {
706e8d8bef9SDimitry Andric SwapVector[Repr].WebRejected = 1;
707e8d8bef9SDimitry Andric LLVM_DEBUG(
708e8d8bef9SDimitry Andric dbgs() << format(
709e8d8bef9SDimitry Andric "Web %d rejected for load/swap feeding a store\n", Repr));
710e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << " def " << EntryIdx << ": ");
711e8d8bef9SDimitry Andric LLVM_DEBUG(MI->dump());
712e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << " use " << UseIdx << ": ");
713e8d8bef9SDimitry Andric LLVM_DEBUG(UseMI.dump());
714e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
715e8d8bef9SDimitry Andric }
716e8d8bef9SDimitry Andric }
717e8d8bef9SDimitry Andric }
7180b57cec5SDimitry Andric }
7190b57cec5SDimitry Andric
7200b57cec5SDimitry Andric // Reject webs that contain swapping stores that are fed by something
7210b57cec5SDimitry Andric // other than a swap instruction.
7220b57cec5SDimitry Andric } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
7230b57cec5SDimitry Andric MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
7248bcb0991SDimitry Andric Register UseReg = MI->getOperand(0).getReg();
7250b57cec5SDimitry Andric MachineInstr *DefMI = MRI->getVRegDef(UseReg);
7268bcb0991SDimitry Andric Register DefReg = DefMI->getOperand(0).getReg();
7270b57cec5SDimitry Andric int DefIdx = SwapMap[DefMI];
7280b57cec5SDimitry Andric
7290b57cec5SDimitry Andric if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad ||
7300b57cec5SDimitry Andric SwapVector[DefIdx].IsStore) {
7310b57cec5SDimitry Andric
7320b57cec5SDimitry Andric SwapVector[Repr].WebRejected = 1;
7330b57cec5SDimitry Andric
7340b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << format(
7350b57cec5SDimitry Andric "Web %d rejected for store not fed by swap\n", Repr));
7360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " def " << DefIdx << ": ");
7370b57cec5SDimitry Andric LLVM_DEBUG(DefMI->dump());
7380b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " use " << EntryIdx << ": ");
7390b57cec5SDimitry Andric LLVM_DEBUG(MI->dump());
7400b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
7410b57cec5SDimitry Andric }
7420b57cec5SDimitry Andric
7430b57cec5SDimitry Andric // Ensure all uses of the register defined by DefMI feed store
7440b57cec5SDimitry Andric // instructions
7450b57cec5SDimitry Andric for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) {
7460b57cec5SDimitry Andric int UseIdx = SwapMap[&UseMI];
7470b57cec5SDimitry Andric
7480b57cec5SDimitry Andric if (SwapVector[UseIdx].VSEMI->getOpcode() != MI->getOpcode()) {
7490b57cec5SDimitry Andric SwapVector[Repr].WebRejected = 1;
7500b57cec5SDimitry Andric
7510b57cec5SDimitry Andric LLVM_DEBUG(
7520b57cec5SDimitry Andric dbgs() << format(
7530b57cec5SDimitry Andric "Web %d rejected for swap not feeding only stores\n", Repr));
7540b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " def "
7550b57cec5SDimitry Andric << " : ");
7560b57cec5SDimitry Andric LLVM_DEBUG(DefMI->dump());
7570b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " use " << UseIdx << ": ");
7580b57cec5SDimitry Andric LLVM_DEBUG(SwapVector[UseIdx].VSEMI->dump());
7590b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
7600b57cec5SDimitry Andric }
7610b57cec5SDimitry Andric }
7620b57cec5SDimitry Andric }
7630b57cec5SDimitry Andric }
7640b57cec5SDimitry Andric
7650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Swap vector after web analysis:\n\n");
7660b57cec5SDimitry Andric LLVM_DEBUG(dumpSwapVector());
7670b57cec5SDimitry Andric }
7680b57cec5SDimitry Andric
7690b57cec5SDimitry Andric // Walk the swap vector entries looking for swaps fed by permuting loads
7700b57cec5SDimitry Andric // and swaps that feed permuting stores. If the containing computation
7710b57cec5SDimitry Andric // has not been marked rejected, mark each such swap for removal.
7720b57cec5SDimitry Andric // (Removal is delayed in case optimization has disturbed the pattern,
7730b57cec5SDimitry Andric // such that multiple loads feed the same swap, etc.)
markSwapsForRemoval()7740b57cec5SDimitry Andric void PPCVSXSwapRemoval::markSwapsForRemoval() {
7750b57cec5SDimitry Andric
7760b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n*** Marking swaps for removal ***\n\n");
7770b57cec5SDimitry Andric
7780b57cec5SDimitry Andric for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
7790b57cec5SDimitry Andric
7800b57cec5SDimitry Andric if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
7810b57cec5SDimitry Andric int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
7820b57cec5SDimitry Andric
7830b57cec5SDimitry Andric if (!SwapVector[Repr].WebRejected) {
7840b57cec5SDimitry Andric MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
7858bcb0991SDimitry Andric Register DefReg = MI->getOperand(0).getReg();
7860b57cec5SDimitry Andric
7870b57cec5SDimitry Andric for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) {
7880b57cec5SDimitry Andric int UseIdx = SwapMap[&UseMI];
7890b57cec5SDimitry Andric SwapVector[UseIdx].WillRemove = 1;
7900b57cec5SDimitry Andric
7910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Marking swap fed by load for removal: ");
7920b57cec5SDimitry Andric LLVM_DEBUG(UseMI.dump());
7930b57cec5SDimitry Andric }
7940b57cec5SDimitry Andric }
7950b57cec5SDimitry Andric
7960b57cec5SDimitry Andric } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
7970b57cec5SDimitry Andric int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
7980b57cec5SDimitry Andric
7990b57cec5SDimitry Andric if (!SwapVector[Repr].WebRejected) {
8000b57cec5SDimitry Andric MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
8018bcb0991SDimitry Andric Register UseReg = MI->getOperand(0).getReg();
8020b57cec5SDimitry Andric MachineInstr *DefMI = MRI->getVRegDef(UseReg);
8030b57cec5SDimitry Andric int DefIdx = SwapMap[DefMI];
8040b57cec5SDimitry Andric SwapVector[DefIdx].WillRemove = 1;
8050b57cec5SDimitry Andric
8060b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Marking swap feeding store for removal: ");
8070b57cec5SDimitry Andric LLVM_DEBUG(DefMI->dump());
8080b57cec5SDimitry Andric }
8090b57cec5SDimitry Andric
8100b57cec5SDimitry Andric } else if (SwapVector[EntryIdx].IsSwappable &&
8110b57cec5SDimitry Andric SwapVector[EntryIdx].SpecialHandling != 0) {
8120b57cec5SDimitry Andric int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
8130b57cec5SDimitry Andric
8140b57cec5SDimitry Andric if (!SwapVector[Repr].WebRejected)
8150b57cec5SDimitry Andric handleSpecialSwappables(EntryIdx);
8160b57cec5SDimitry Andric }
8170b57cec5SDimitry Andric }
8180b57cec5SDimitry Andric }
8190b57cec5SDimitry Andric
8200b57cec5SDimitry Andric // Create an xxswapd instruction and insert it prior to the given point.
8210b57cec5SDimitry Andric // MI is used to determine basic block and debug loc information.
8220b57cec5SDimitry Andric // FIXME: When inserting a swap, we should check whether SrcReg is
8230b57cec5SDimitry Andric // defined by another swap: SrcReg = XXPERMDI Reg, Reg, 2; If so,
8240b57cec5SDimitry Andric // then instead we should generate a copy from Reg to DstReg.
insertSwap(MachineInstr * MI,MachineBasicBlock::iterator InsertPoint,unsigned DstReg,unsigned SrcReg)8250b57cec5SDimitry Andric void PPCVSXSwapRemoval::insertSwap(MachineInstr *MI,
8260b57cec5SDimitry Andric MachineBasicBlock::iterator InsertPoint,
8270b57cec5SDimitry Andric unsigned DstReg, unsigned SrcReg) {
8280b57cec5SDimitry Andric BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
8290b57cec5SDimitry Andric TII->get(PPC::XXPERMDI), DstReg)
8300b57cec5SDimitry Andric .addReg(SrcReg)
8310b57cec5SDimitry Andric .addReg(SrcReg)
8320b57cec5SDimitry Andric .addImm(2);
8330b57cec5SDimitry Andric }
8340b57cec5SDimitry Andric
8350b57cec5SDimitry Andric // The identified swap entry requires special handling to allow its
8360b57cec5SDimitry Andric // containing computation to be optimized. Perform that handling
8370b57cec5SDimitry Andric // here.
8380b57cec5SDimitry Andric // FIXME: Additional opportunities will be phased in with subsequent
8390b57cec5SDimitry Andric // patches.
handleSpecialSwappables(int EntryIdx)8400b57cec5SDimitry Andric void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
8410b57cec5SDimitry Andric switch (SwapVector[EntryIdx].SpecialHandling) {
8420b57cec5SDimitry Andric
8430b57cec5SDimitry Andric default:
8440b57cec5SDimitry Andric llvm_unreachable("Unexpected special handling type");
8450b57cec5SDimitry Andric
8460b57cec5SDimitry Andric // For splats based on an index into a vector, add N/2 modulo N
8470b57cec5SDimitry Andric // to the index, where N is the number of vector elements.
8480b57cec5SDimitry Andric case SHValues::SH_SPLAT: {
8490b57cec5SDimitry Andric MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
8500b57cec5SDimitry Andric unsigned NElts;
8510b57cec5SDimitry Andric
8520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Changing splat: ");
8530b57cec5SDimitry Andric LLVM_DEBUG(MI->dump());
8540b57cec5SDimitry Andric
8550b57cec5SDimitry Andric switch (MI->getOpcode()) {
8560b57cec5SDimitry Andric default:
8570b57cec5SDimitry Andric llvm_unreachable("Unexpected splat opcode");
8580b57cec5SDimitry Andric case PPC::VSPLTB: NElts = 16; break;
8590b57cec5SDimitry Andric case PPC::VSPLTH: NElts = 8; break;
8600b57cec5SDimitry Andric case PPC::VSPLTW:
8610b57cec5SDimitry Andric case PPC::XXSPLTW: NElts = 4; break;
8620b57cec5SDimitry Andric }
8630b57cec5SDimitry Andric
8640b57cec5SDimitry Andric unsigned EltNo;
8650b57cec5SDimitry Andric if (MI->getOpcode() == PPC::XXSPLTW)
8660b57cec5SDimitry Andric EltNo = MI->getOperand(2).getImm();
8670b57cec5SDimitry Andric else
8680b57cec5SDimitry Andric EltNo = MI->getOperand(1).getImm();
8690b57cec5SDimitry Andric
8700b57cec5SDimitry Andric EltNo = (EltNo + NElts / 2) % NElts;
8710b57cec5SDimitry Andric if (MI->getOpcode() == PPC::XXSPLTW)
8720b57cec5SDimitry Andric MI->getOperand(2).setImm(EltNo);
8730b57cec5SDimitry Andric else
8740b57cec5SDimitry Andric MI->getOperand(1).setImm(EltNo);
8750b57cec5SDimitry Andric
8760b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Into: ");
8770b57cec5SDimitry Andric LLVM_DEBUG(MI->dump());
8780b57cec5SDimitry Andric break;
8790b57cec5SDimitry Andric }
8800b57cec5SDimitry Andric
8810b57cec5SDimitry Andric // For an XXPERMDI that isn't handled otherwise, we need to
8820b57cec5SDimitry Andric // reverse the order of the operands. If the selector operand
8830b57cec5SDimitry Andric // has a value of 0 or 3, we need to change it to 3 or 0,
8840b57cec5SDimitry Andric // respectively. Otherwise we should leave it alone. (This
8850b57cec5SDimitry Andric // is equivalent to reversing the two bits of the selector
8860b57cec5SDimitry Andric // operand and complementing the result.)
8870b57cec5SDimitry Andric case SHValues::SH_XXPERMDI: {
8880b57cec5SDimitry Andric MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
8890b57cec5SDimitry Andric
8900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Changing XXPERMDI: ");
8910b57cec5SDimitry Andric LLVM_DEBUG(MI->dump());
8920b57cec5SDimitry Andric
8930b57cec5SDimitry Andric unsigned Selector = MI->getOperand(3).getImm();
8940b57cec5SDimitry Andric if (Selector == 0 || Selector == 3)
8950b57cec5SDimitry Andric Selector = 3 - Selector;
8960b57cec5SDimitry Andric MI->getOperand(3).setImm(Selector);
8970b57cec5SDimitry Andric
8988bcb0991SDimitry Andric Register Reg1 = MI->getOperand(1).getReg();
8998bcb0991SDimitry Andric Register Reg2 = MI->getOperand(2).getReg();
9000b57cec5SDimitry Andric MI->getOperand(1).setReg(Reg2);
9010b57cec5SDimitry Andric MI->getOperand(2).setReg(Reg1);
9020b57cec5SDimitry Andric
9030b57cec5SDimitry Andric // We also need to swap kill flag associated with the register.
9040b57cec5SDimitry Andric bool IsKill1 = MI->getOperand(1).isKill();
9050b57cec5SDimitry Andric bool IsKill2 = MI->getOperand(2).isKill();
9060b57cec5SDimitry Andric MI->getOperand(1).setIsKill(IsKill2);
9070b57cec5SDimitry Andric MI->getOperand(2).setIsKill(IsKill1);
9080b57cec5SDimitry Andric
9090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Into: ");
9100b57cec5SDimitry Andric LLVM_DEBUG(MI->dump());
9110b57cec5SDimitry Andric break;
9120b57cec5SDimitry Andric }
9130b57cec5SDimitry Andric
9140b57cec5SDimitry Andric // For a copy from a scalar floating-point register to a vector
9150b57cec5SDimitry Andric // register, removing swaps will leave the copied value in the
9160b57cec5SDimitry Andric // wrong lane. Insert a swap following the copy to fix this.
9170b57cec5SDimitry Andric case SHValues::SH_COPYWIDEN: {
9180b57cec5SDimitry Andric MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
9190b57cec5SDimitry Andric
9200b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Changing SUBREG_TO_REG: ");
9210b57cec5SDimitry Andric LLVM_DEBUG(MI->dump());
9220b57cec5SDimitry Andric
9238bcb0991SDimitry Andric Register DstReg = MI->getOperand(0).getReg();
9240b57cec5SDimitry Andric const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
9258bcb0991SDimitry Andric Register NewVReg = MRI->createVirtualRegister(DstRC);
9260b57cec5SDimitry Andric
9270b57cec5SDimitry Andric MI->getOperand(0).setReg(NewVReg);
9280b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Into: ");
9290b57cec5SDimitry Andric LLVM_DEBUG(MI->dump());
9300b57cec5SDimitry Andric
9310b57cec5SDimitry Andric auto InsertPoint = ++MachineBasicBlock::iterator(MI);
9320b57cec5SDimitry Andric
9330b57cec5SDimitry Andric // Note that an XXPERMDI requires a VSRC, so if the SUBREG_TO_REG
9340b57cec5SDimitry Andric // is copying to a VRRC, we need to be careful to avoid a register
9350b57cec5SDimitry Andric // assignment problem. In this case we must copy from VRRC to VSRC
9360b57cec5SDimitry Andric // prior to the swap, and from VSRC to VRRC following the swap.
9370b57cec5SDimitry Andric // Coalescing will usually remove all this mess.
9380b57cec5SDimitry Andric if (DstRC == &PPC::VRRCRegClass) {
9398bcb0991SDimitry Andric Register VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
9408bcb0991SDimitry Andric Register VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
9410b57cec5SDimitry Andric
9420b57cec5SDimitry Andric BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
9430b57cec5SDimitry Andric TII->get(PPC::COPY), VSRCTmp1)
9440b57cec5SDimitry Andric .addReg(NewVReg);
9450b57cec5SDimitry Andric LLVM_DEBUG(std::prev(InsertPoint)->dump());
9460b57cec5SDimitry Andric
9470b57cec5SDimitry Andric insertSwap(MI, InsertPoint, VSRCTmp2, VSRCTmp1);
9480b57cec5SDimitry Andric LLVM_DEBUG(std::prev(InsertPoint)->dump());
9490b57cec5SDimitry Andric
9500b57cec5SDimitry Andric BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
9510b57cec5SDimitry Andric TII->get(PPC::COPY), DstReg)
9520b57cec5SDimitry Andric .addReg(VSRCTmp2);
9530b57cec5SDimitry Andric LLVM_DEBUG(std::prev(InsertPoint)->dump());
9540b57cec5SDimitry Andric
9550b57cec5SDimitry Andric } else {
9560b57cec5SDimitry Andric insertSwap(MI, InsertPoint, DstReg, NewVReg);
9570b57cec5SDimitry Andric LLVM_DEBUG(std::prev(InsertPoint)->dump());
9580b57cec5SDimitry Andric }
9590b57cec5SDimitry Andric break;
9600b57cec5SDimitry Andric }
9610b57cec5SDimitry Andric }
9620b57cec5SDimitry Andric }
9630b57cec5SDimitry Andric
9640b57cec5SDimitry Andric // Walk the swap vector and replace each entry marked for removal with
9650b57cec5SDimitry Andric // a copy operation.
removeSwaps()9660b57cec5SDimitry Andric bool PPCVSXSwapRemoval::removeSwaps() {
9670b57cec5SDimitry Andric
9680b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n*** Removing swaps ***\n\n");
9690b57cec5SDimitry Andric
9700b57cec5SDimitry Andric bool Changed = false;
9710b57cec5SDimitry Andric
9720b57cec5SDimitry Andric for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
9730b57cec5SDimitry Andric if (SwapVector[EntryIdx].WillRemove) {
9740b57cec5SDimitry Andric Changed = true;
9750b57cec5SDimitry Andric MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
9760b57cec5SDimitry Andric MachineBasicBlock *MBB = MI->getParent();
9770b57cec5SDimitry Andric BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
9780b57cec5SDimitry Andric MI->getOperand(0).getReg())
9790b57cec5SDimitry Andric .add(MI->getOperand(1));
9800b57cec5SDimitry Andric
9810b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << format("Replaced %d with copy: ",
9820b57cec5SDimitry Andric SwapVector[EntryIdx].VSEId));
9830b57cec5SDimitry Andric LLVM_DEBUG(MI->dump());
9840b57cec5SDimitry Andric
9850b57cec5SDimitry Andric MI->eraseFromParent();
9860b57cec5SDimitry Andric }
9870b57cec5SDimitry Andric }
9880b57cec5SDimitry Andric
9890b57cec5SDimitry Andric return Changed;
9900b57cec5SDimitry Andric }
9910b57cec5SDimitry Andric
9920b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
9930b57cec5SDimitry Andric // For debug purposes, dump the contents of the swap vector.
dumpSwapVector()9940b57cec5SDimitry Andric LLVM_DUMP_METHOD void PPCVSXSwapRemoval::dumpSwapVector() {
9950b57cec5SDimitry Andric
9960b57cec5SDimitry Andric for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
9970b57cec5SDimitry Andric
9980b57cec5SDimitry Andric MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
9990b57cec5SDimitry Andric int ID = SwapVector[EntryIdx].VSEId;
10000b57cec5SDimitry Andric
10010b57cec5SDimitry Andric dbgs() << format("%6d", ID);
10020b57cec5SDimitry Andric dbgs() << format("%6d", EC->getLeaderValue(ID));
10030b57cec5SDimitry Andric dbgs() << format(" %bb.%3d", MI->getParent()->getNumber());
10040b57cec5SDimitry Andric dbgs() << format(" %14s ", TII->getName(MI->getOpcode()).str().c_str());
10050b57cec5SDimitry Andric
10060b57cec5SDimitry Andric if (SwapVector[EntryIdx].IsLoad)
10070b57cec5SDimitry Andric dbgs() << "load ";
10080b57cec5SDimitry Andric if (SwapVector[EntryIdx].IsStore)
10090b57cec5SDimitry Andric dbgs() << "store ";
10100b57cec5SDimitry Andric if (SwapVector[EntryIdx].IsSwap)
10110b57cec5SDimitry Andric dbgs() << "swap ";
10120b57cec5SDimitry Andric if (SwapVector[EntryIdx].MentionsPhysVR)
10130b57cec5SDimitry Andric dbgs() << "physreg ";
10140b57cec5SDimitry Andric if (SwapVector[EntryIdx].MentionsPartialVR)
10150b57cec5SDimitry Andric dbgs() << "partialreg ";
10160b57cec5SDimitry Andric
10170b57cec5SDimitry Andric if (SwapVector[EntryIdx].IsSwappable) {
10180b57cec5SDimitry Andric dbgs() << "swappable ";
10190b57cec5SDimitry Andric switch(SwapVector[EntryIdx].SpecialHandling) {
10200b57cec5SDimitry Andric default:
10210b57cec5SDimitry Andric dbgs() << "special:**unknown**";
10220b57cec5SDimitry Andric break;
10230b57cec5SDimitry Andric case SH_NONE:
10240b57cec5SDimitry Andric break;
10250b57cec5SDimitry Andric case SH_EXTRACT:
10260b57cec5SDimitry Andric dbgs() << "special:extract ";
10270b57cec5SDimitry Andric break;
10280b57cec5SDimitry Andric case SH_INSERT:
10290b57cec5SDimitry Andric dbgs() << "special:insert ";
10300b57cec5SDimitry Andric break;
10310b57cec5SDimitry Andric case SH_NOSWAP_LD:
10320b57cec5SDimitry Andric dbgs() << "special:load ";
10330b57cec5SDimitry Andric break;
10340b57cec5SDimitry Andric case SH_NOSWAP_ST:
10350b57cec5SDimitry Andric dbgs() << "special:store ";
10360b57cec5SDimitry Andric break;
10370b57cec5SDimitry Andric case SH_SPLAT:
10380b57cec5SDimitry Andric dbgs() << "special:splat ";
10390b57cec5SDimitry Andric break;
10400b57cec5SDimitry Andric case SH_XXPERMDI:
10410b57cec5SDimitry Andric dbgs() << "special:xxpermdi ";
10420b57cec5SDimitry Andric break;
10430b57cec5SDimitry Andric case SH_COPYWIDEN:
10440b57cec5SDimitry Andric dbgs() << "special:copywiden ";
10450b57cec5SDimitry Andric break;
10460b57cec5SDimitry Andric }
10470b57cec5SDimitry Andric }
10480b57cec5SDimitry Andric
10490b57cec5SDimitry Andric if (SwapVector[EntryIdx].WebRejected)
10500b57cec5SDimitry Andric dbgs() << "rejected ";
10510b57cec5SDimitry Andric if (SwapVector[EntryIdx].WillRemove)
10520b57cec5SDimitry Andric dbgs() << "remove ";
10530b57cec5SDimitry Andric
10540b57cec5SDimitry Andric dbgs() << "\n";
10550b57cec5SDimitry Andric
10560b57cec5SDimitry Andric // For no-asserts builds.
10570b57cec5SDimitry Andric (void)MI;
10580b57cec5SDimitry Andric (void)ID;
10590b57cec5SDimitry Andric }
10600b57cec5SDimitry Andric
10610b57cec5SDimitry Andric dbgs() << "\n";
10620b57cec5SDimitry Andric }
10630b57cec5SDimitry Andric #endif
10640b57cec5SDimitry Andric
10650b57cec5SDimitry Andric } // end default namespace
10660b57cec5SDimitry Andric
10670b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(PPCVSXSwapRemoval, DEBUG_TYPE,
10680b57cec5SDimitry Andric "PowerPC VSX Swap Removal", false, false)
10690b57cec5SDimitry Andric INITIALIZE_PASS_END(PPCVSXSwapRemoval, DEBUG_TYPE,
10700b57cec5SDimitry Andric "PowerPC VSX Swap Removal", false, false)
10710b57cec5SDimitry Andric
10720b57cec5SDimitry Andric char PPCVSXSwapRemoval::ID = 0;
10730b57cec5SDimitry Andric FunctionPass*
createPPCVSXSwapRemovalPass()10740b57cec5SDimitry Andric llvm::createPPCVSXSwapRemovalPass() { return new PPCVSXSwapRemoval(); }
1075