1*0fca6ea1SDimitry Andric //===- LoongArchOptWInstrs.cpp - MI W instruction optimizations ----------===// 2*0fca6ea1SDimitry Andric // 3*0fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0fca6ea1SDimitry Andric // 7*0fca6ea1SDimitry Andric //===---------------------------------------------------------------------===// 8*0fca6ea1SDimitry Andric // 9*0fca6ea1SDimitry Andric // This pass does some optimizations for *W instructions at the MI level. 10*0fca6ea1SDimitry Andric // 11*0fca6ea1SDimitry Andric // First it removes unneeded sext(addi.w rd, rs, 0) instructions. Either 12*0fca6ea1SDimitry Andric // because the sign extended bits aren't consumed or because the input was 13*0fca6ea1SDimitry Andric // already sign extended by an earlier instruction. 14*0fca6ea1SDimitry Andric // 15*0fca6ea1SDimitry Andric // Then: 16*0fca6ea1SDimitry Andric // 1. Unless explicit disabled or the target prefers instructions with W suffix, 17*0fca6ea1SDimitry Andric // it removes the -w suffix from opw instructions whenever all users are 18*0fca6ea1SDimitry Andric // dependent only on the lower word of the result of the instruction. 19*0fca6ea1SDimitry Andric // The cases handled are: 20*0fca6ea1SDimitry Andric // * addi.w because it helps reduce test differences between LA32 and LA64 21*0fca6ea1SDimitry Andric // w/o being a pessimization. 22*0fca6ea1SDimitry Andric // 23*0fca6ea1SDimitry Andric // 2. Or if explicit enabled or the target prefers instructions with W suffix, 24*0fca6ea1SDimitry Andric // it adds the W suffix to the instruction whenever all users are dependent 25*0fca6ea1SDimitry Andric // only on the lower word of the result of the instruction. 26*0fca6ea1SDimitry Andric // The cases handled are: 27*0fca6ea1SDimitry Andric // * add.d/addi.d/sub.d/mul.d. 28*0fca6ea1SDimitry Andric // * slli.d with imm < 32. 29*0fca6ea1SDimitry Andric // * ld.d/ld.wu. 30*0fca6ea1SDimitry Andric //===---------------------------------------------------------------------===// 31*0fca6ea1SDimitry Andric 32*0fca6ea1SDimitry Andric #include "LoongArch.h" 33*0fca6ea1SDimitry Andric #include "LoongArchMachineFunctionInfo.h" 34*0fca6ea1SDimitry Andric #include "LoongArchSubtarget.h" 35*0fca6ea1SDimitry Andric #include "llvm/ADT/SmallSet.h" 36*0fca6ea1SDimitry Andric #include "llvm/ADT/Statistic.h" 37*0fca6ea1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 38*0fca6ea1SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 39*0fca6ea1SDimitry Andric 40*0fca6ea1SDimitry Andric using namespace llvm; 41*0fca6ea1SDimitry Andric 42*0fca6ea1SDimitry Andric #define DEBUG_TYPE "loongarch-opt-w-instrs" 43*0fca6ea1SDimitry Andric #define LOONGARCH_OPT_W_INSTRS_NAME "LoongArch Optimize W Instructions" 44*0fca6ea1SDimitry Andric 45*0fca6ea1SDimitry Andric STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions"); 46*0fca6ea1SDimitry Andric STATISTIC(NumTransformedToWInstrs, 47*0fca6ea1SDimitry Andric "Number of instructions transformed to W-ops"); 48*0fca6ea1SDimitry Andric 49*0fca6ea1SDimitry Andric static cl::opt<bool> 50*0fca6ea1SDimitry Andric DisableSExtWRemoval("loongarch-disable-sextw-removal", 51*0fca6ea1SDimitry Andric cl::desc("Disable removal of sign-extend insn"), 52*0fca6ea1SDimitry Andric cl::init(false), cl::Hidden); 53*0fca6ea1SDimitry Andric static cl::opt<bool> 54*0fca6ea1SDimitry Andric DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix", 55*0fca6ea1SDimitry Andric cl::desc("Disable convert to D suffix"), 56*0fca6ea1SDimitry Andric cl::init(false), cl::Hidden); 57*0fca6ea1SDimitry Andric 58*0fca6ea1SDimitry Andric namespace { 59*0fca6ea1SDimitry Andric 60*0fca6ea1SDimitry Andric class LoongArchOptWInstrs : public MachineFunctionPass { 61*0fca6ea1SDimitry Andric public: 62*0fca6ea1SDimitry Andric static char ID; 63*0fca6ea1SDimitry Andric 64*0fca6ea1SDimitry Andric LoongArchOptWInstrs() : MachineFunctionPass(ID) {} 65*0fca6ea1SDimitry Andric 66*0fca6ea1SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 67*0fca6ea1SDimitry Andric bool removeSExtWInstrs(MachineFunction &MF, const LoongArchInstrInfo &TII, 68*0fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 69*0fca6ea1SDimitry Andric MachineRegisterInfo &MRI); 70*0fca6ea1SDimitry Andric bool convertToDSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII, 71*0fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 72*0fca6ea1SDimitry Andric MachineRegisterInfo &MRI); 73*0fca6ea1SDimitry Andric bool convertToWSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII, 74*0fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 75*0fca6ea1SDimitry Andric MachineRegisterInfo &MRI); 76*0fca6ea1SDimitry Andric 77*0fca6ea1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 78*0fca6ea1SDimitry Andric AU.setPreservesCFG(); 79*0fca6ea1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 80*0fca6ea1SDimitry Andric } 81*0fca6ea1SDimitry Andric 82*0fca6ea1SDimitry Andric StringRef getPassName() const override { return LOONGARCH_OPT_W_INSTRS_NAME; } 83*0fca6ea1SDimitry Andric }; 84*0fca6ea1SDimitry Andric 85*0fca6ea1SDimitry Andric } // end anonymous namespace 86*0fca6ea1SDimitry Andric 87*0fca6ea1SDimitry Andric char LoongArchOptWInstrs::ID = 0; 88*0fca6ea1SDimitry Andric INITIALIZE_PASS(LoongArchOptWInstrs, DEBUG_TYPE, LOONGARCH_OPT_W_INSTRS_NAME, 89*0fca6ea1SDimitry Andric false, false) 90*0fca6ea1SDimitry Andric 91*0fca6ea1SDimitry Andric FunctionPass *llvm::createLoongArchOptWInstrsPass() { 92*0fca6ea1SDimitry Andric return new LoongArchOptWInstrs(); 93*0fca6ea1SDimitry Andric } 94*0fca6ea1SDimitry Andric 95*0fca6ea1SDimitry Andric // Checks if all users only demand the lower \p OrigBits of the original 96*0fca6ea1SDimitry Andric // instruction's result. 97*0fca6ea1SDimitry Andric // TODO: handle multiple interdependent transformations 98*0fca6ea1SDimitry Andric static bool hasAllNBitUsers(const MachineInstr &OrigMI, 99*0fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 100*0fca6ea1SDimitry Andric const MachineRegisterInfo &MRI, unsigned OrigBits) { 101*0fca6ea1SDimitry Andric 102*0fca6ea1SDimitry Andric SmallSet<std::pair<const MachineInstr *, unsigned>, 4> Visited; 103*0fca6ea1SDimitry Andric SmallVector<std::pair<const MachineInstr *, unsigned>, 4> Worklist; 104*0fca6ea1SDimitry Andric 105*0fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(&OrigMI, OrigBits)); 106*0fca6ea1SDimitry Andric 107*0fca6ea1SDimitry Andric while (!Worklist.empty()) { 108*0fca6ea1SDimitry Andric auto P = Worklist.pop_back_val(); 109*0fca6ea1SDimitry Andric const MachineInstr *MI = P.first; 110*0fca6ea1SDimitry Andric unsigned Bits = P.second; 111*0fca6ea1SDimitry Andric 112*0fca6ea1SDimitry Andric if (!Visited.insert(P).second) 113*0fca6ea1SDimitry Andric continue; 114*0fca6ea1SDimitry Andric 115*0fca6ea1SDimitry Andric // Only handle instructions with one def. 116*0fca6ea1SDimitry Andric if (MI->getNumExplicitDefs() != 1) 117*0fca6ea1SDimitry Andric return false; 118*0fca6ea1SDimitry Andric 119*0fca6ea1SDimitry Andric Register DestReg = MI->getOperand(0).getReg(); 120*0fca6ea1SDimitry Andric if (!DestReg.isVirtual()) 121*0fca6ea1SDimitry Andric return false; 122*0fca6ea1SDimitry Andric 123*0fca6ea1SDimitry Andric for (auto &UserOp : MRI.use_nodbg_operands(DestReg)) { 124*0fca6ea1SDimitry Andric const MachineInstr *UserMI = UserOp.getParent(); 125*0fca6ea1SDimitry Andric unsigned OpIdx = UserOp.getOperandNo(); 126*0fca6ea1SDimitry Andric 127*0fca6ea1SDimitry Andric switch (UserMI->getOpcode()) { 128*0fca6ea1SDimitry Andric default: 129*0fca6ea1SDimitry Andric // TODO: Add vector 130*0fca6ea1SDimitry Andric return false; 131*0fca6ea1SDimitry Andric 132*0fca6ea1SDimitry Andric case LoongArch::ADD_W: 133*0fca6ea1SDimitry Andric case LoongArch::ADDI_W: 134*0fca6ea1SDimitry Andric case LoongArch::SUB_W: 135*0fca6ea1SDimitry Andric case LoongArch::ALSL_W: 136*0fca6ea1SDimitry Andric case LoongArch::ALSL_WU: 137*0fca6ea1SDimitry Andric case LoongArch::MUL_W: 138*0fca6ea1SDimitry Andric case LoongArch::MULH_W: 139*0fca6ea1SDimitry Andric case LoongArch::MULH_WU: 140*0fca6ea1SDimitry Andric case LoongArch::MULW_D_W: 141*0fca6ea1SDimitry Andric case LoongArch::MULW_D_WU: 142*0fca6ea1SDimitry Andric // TODO: {DIV,MOD}.{W,WU} consumes the upper 32 bits before LA664+. 143*0fca6ea1SDimitry Andric // case LoongArch::DIV_W: 144*0fca6ea1SDimitry Andric // case LoongArch::DIV_WU: 145*0fca6ea1SDimitry Andric // case LoongArch::MOD_W: 146*0fca6ea1SDimitry Andric // case LoongArch::MOD_WU: 147*0fca6ea1SDimitry Andric case LoongArch::SLL_W: 148*0fca6ea1SDimitry Andric case LoongArch::SLLI_W: 149*0fca6ea1SDimitry Andric case LoongArch::SRL_W: 150*0fca6ea1SDimitry Andric case LoongArch::SRLI_W: 151*0fca6ea1SDimitry Andric case LoongArch::SRA_W: 152*0fca6ea1SDimitry Andric case LoongArch::SRAI_W: 153*0fca6ea1SDimitry Andric case LoongArch::ROTR_W: 154*0fca6ea1SDimitry Andric case LoongArch::ROTRI_W: 155*0fca6ea1SDimitry Andric case LoongArch::CLO_W: 156*0fca6ea1SDimitry Andric case LoongArch::CLZ_W: 157*0fca6ea1SDimitry Andric case LoongArch::CTO_W: 158*0fca6ea1SDimitry Andric case LoongArch::CTZ_W: 159*0fca6ea1SDimitry Andric case LoongArch::BYTEPICK_W: 160*0fca6ea1SDimitry Andric case LoongArch::REVB_2H: 161*0fca6ea1SDimitry Andric case LoongArch::BITREV_4B: 162*0fca6ea1SDimitry Andric case LoongArch::BITREV_W: 163*0fca6ea1SDimitry Andric case LoongArch::BSTRINS_W: 164*0fca6ea1SDimitry Andric case LoongArch::BSTRPICK_W: 165*0fca6ea1SDimitry Andric case LoongArch::CRC_W_W_W: 166*0fca6ea1SDimitry Andric case LoongArch::CRCC_W_W_W: 167*0fca6ea1SDimitry Andric case LoongArch::MOVGR2FCSR: 168*0fca6ea1SDimitry Andric case LoongArch::MOVGR2FRH_W: 169*0fca6ea1SDimitry Andric case LoongArch::MOVGR2FR_W_64: 170*0fca6ea1SDimitry Andric if (Bits >= 32) 171*0fca6ea1SDimitry Andric break; 172*0fca6ea1SDimitry Andric return false; 173*0fca6ea1SDimitry Andric case LoongArch::MOVGR2CF: 174*0fca6ea1SDimitry Andric if (Bits >= 1) 175*0fca6ea1SDimitry Andric break; 176*0fca6ea1SDimitry Andric return false; 177*0fca6ea1SDimitry Andric case LoongArch::EXT_W_B: 178*0fca6ea1SDimitry Andric if (Bits >= 8) 179*0fca6ea1SDimitry Andric break; 180*0fca6ea1SDimitry Andric return false; 181*0fca6ea1SDimitry Andric case LoongArch::EXT_W_H: 182*0fca6ea1SDimitry Andric if (Bits >= 16) 183*0fca6ea1SDimitry Andric break; 184*0fca6ea1SDimitry Andric return false; 185*0fca6ea1SDimitry Andric 186*0fca6ea1SDimitry Andric case LoongArch::SRLI_D: { 187*0fca6ea1SDimitry Andric // If we are shifting right by less than Bits, and users don't demand 188*0fca6ea1SDimitry Andric // any bits that were shifted into [Bits-1:0], then we can consider this 189*0fca6ea1SDimitry Andric // as an N-Bit user. 190*0fca6ea1SDimitry Andric unsigned ShAmt = UserMI->getOperand(2).getImm(); 191*0fca6ea1SDimitry Andric if (Bits > ShAmt) { 192*0fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits - ShAmt)); 193*0fca6ea1SDimitry Andric break; 194*0fca6ea1SDimitry Andric } 195*0fca6ea1SDimitry Andric return false; 196*0fca6ea1SDimitry Andric } 197*0fca6ea1SDimitry Andric 198*0fca6ea1SDimitry Andric // these overwrite higher input bits, otherwise the lower word of output 199*0fca6ea1SDimitry Andric // depends only on the lower word of input. So check their uses read W. 200*0fca6ea1SDimitry Andric case LoongArch::SLLI_D: 201*0fca6ea1SDimitry Andric if (Bits >= (ST.getGRLen() - UserMI->getOperand(2).getImm())) 202*0fca6ea1SDimitry Andric break; 203*0fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits)); 204*0fca6ea1SDimitry Andric break; 205*0fca6ea1SDimitry Andric case LoongArch::ANDI: { 206*0fca6ea1SDimitry Andric uint64_t Imm = UserMI->getOperand(2).getImm(); 207*0fca6ea1SDimitry Andric if (Bits >= (unsigned)llvm::bit_width(Imm)) 208*0fca6ea1SDimitry Andric break; 209*0fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits)); 210*0fca6ea1SDimitry Andric break; 211*0fca6ea1SDimitry Andric } 212*0fca6ea1SDimitry Andric case LoongArch::ORI: { 213*0fca6ea1SDimitry Andric uint64_t Imm = UserMI->getOperand(2).getImm(); 214*0fca6ea1SDimitry Andric if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm)) 215*0fca6ea1SDimitry Andric break; 216*0fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits)); 217*0fca6ea1SDimitry Andric break; 218*0fca6ea1SDimitry Andric } 219*0fca6ea1SDimitry Andric 220*0fca6ea1SDimitry Andric case LoongArch::SLL_D: 221*0fca6ea1SDimitry Andric // Operand 2 is the shift amount which uses log2(grlen) bits. 222*0fca6ea1SDimitry Andric if (OpIdx == 2) { 223*0fca6ea1SDimitry Andric if (Bits >= Log2_32(ST.getGRLen())) 224*0fca6ea1SDimitry Andric break; 225*0fca6ea1SDimitry Andric return false; 226*0fca6ea1SDimitry Andric } 227*0fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits)); 228*0fca6ea1SDimitry Andric break; 229*0fca6ea1SDimitry Andric 230*0fca6ea1SDimitry Andric case LoongArch::SRA_D: 231*0fca6ea1SDimitry Andric case LoongArch::SRL_D: 232*0fca6ea1SDimitry Andric case LoongArch::ROTR_D: 233*0fca6ea1SDimitry Andric // Operand 2 is the shift amount which uses 6 bits. 234*0fca6ea1SDimitry Andric if (OpIdx == 2 && Bits >= Log2_32(ST.getGRLen())) 235*0fca6ea1SDimitry Andric break; 236*0fca6ea1SDimitry Andric return false; 237*0fca6ea1SDimitry Andric 238*0fca6ea1SDimitry Andric case LoongArch::ST_B: 239*0fca6ea1SDimitry Andric case LoongArch::STX_B: 240*0fca6ea1SDimitry Andric case LoongArch::STGT_B: 241*0fca6ea1SDimitry Andric case LoongArch::STLE_B: 242*0fca6ea1SDimitry Andric case LoongArch::IOCSRWR_B: 243*0fca6ea1SDimitry Andric // The first argument is the value to store. 244*0fca6ea1SDimitry Andric if (OpIdx == 0 && Bits >= 8) 245*0fca6ea1SDimitry Andric break; 246*0fca6ea1SDimitry Andric return false; 247*0fca6ea1SDimitry Andric case LoongArch::ST_H: 248*0fca6ea1SDimitry Andric case LoongArch::STX_H: 249*0fca6ea1SDimitry Andric case LoongArch::STGT_H: 250*0fca6ea1SDimitry Andric case LoongArch::STLE_H: 251*0fca6ea1SDimitry Andric case LoongArch::IOCSRWR_H: 252*0fca6ea1SDimitry Andric // The first argument is the value to store. 253*0fca6ea1SDimitry Andric if (OpIdx == 0 && Bits >= 16) 254*0fca6ea1SDimitry Andric break; 255*0fca6ea1SDimitry Andric return false; 256*0fca6ea1SDimitry Andric case LoongArch::ST_W: 257*0fca6ea1SDimitry Andric case LoongArch::STX_W: 258*0fca6ea1SDimitry Andric case LoongArch::SCREL_W: 259*0fca6ea1SDimitry Andric case LoongArch::STPTR_W: 260*0fca6ea1SDimitry Andric case LoongArch::STGT_W: 261*0fca6ea1SDimitry Andric case LoongArch::STLE_W: 262*0fca6ea1SDimitry Andric case LoongArch::IOCSRWR_W: 263*0fca6ea1SDimitry Andric // The first argument is the value to store. 264*0fca6ea1SDimitry Andric if (OpIdx == 0 && Bits >= 32) 265*0fca6ea1SDimitry Andric break; 266*0fca6ea1SDimitry Andric return false; 267*0fca6ea1SDimitry Andric 268*0fca6ea1SDimitry Andric case LoongArch::CRC_W_B_W: 269*0fca6ea1SDimitry Andric case LoongArch::CRCC_W_B_W: 270*0fca6ea1SDimitry Andric if ((OpIdx == 1 && Bits >= 8) || (OpIdx == 2 && Bits >= 32)) 271*0fca6ea1SDimitry Andric break; 272*0fca6ea1SDimitry Andric return false; 273*0fca6ea1SDimitry Andric case LoongArch::CRC_W_H_W: 274*0fca6ea1SDimitry Andric case LoongArch::CRCC_W_H_W: 275*0fca6ea1SDimitry Andric if ((OpIdx == 1 && Bits >= 16) || (OpIdx == 2 && Bits >= 32)) 276*0fca6ea1SDimitry Andric break; 277*0fca6ea1SDimitry Andric return false; 278*0fca6ea1SDimitry Andric case LoongArch::CRC_W_D_W: 279*0fca6ea1SDimitry Andric case LoongArch::CRCC_W_D_W: 280*0fca6ea1SDimitry Andric if (OpIdx == 2 && Bits >= 32) 281*0fca6ea1SDimitry Andric break; 282*0fca6ea1SDimitry Andric return false; 283*0fca6ea1SDimitry Andric 284*0fca6ea1SDimitry Andric // For these, lower word of output in these operations, depends only on 285*0fca6ea1SDimitry Andric // the lower word of input. So, we check all uses only read lower word. 286*0fca6ea1SDimitry Andric case LoongArch::COPY: 287*0fca6ea1SDimitry Andric case LoongArch::PHI: 288*0fca6ea1SDimitry Andric case LoongArch::ADD_D: 289*0fca6ea1SDimitry Andric case LoongArch::ADDI_D: 290*0fca6ea1SDimitry Andric case LoongArch::SUB_D: 291*0fca6ea1SDimitry Andric case LoongArch::MUL_D: 292*0fca6ea1SDimitry Andric case LoongArch::AND: 293*0fca6ea1SDimitry Andric case LoongArch::OR: 294*0fca6ea1SDimitry Andric case LoongArch::NOR: 295*0fca6ea1SDimitry Andric case LoongArch::XOR: 296*0fca6ea1SDimitry Andric case LoongArch::XORI: 297*0fca6ea1SDimitry Andric case LoongArch::ANDN: 298*0fca6ea1SDimitry Andric case LoongArch::ORN: 299*0fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits)); 300*0fca6ea1SDimitry Andric break; 301*0fca6ea1SDimitry Andric 302*0fca6ea1SDimitry Andric case LoongArch::MASKNEZ: 303*0fca6ea1SDimitry Andric case LoongArch::MASKEQZ: 304*0fca6ea1SDimitry Andric if (OpIdx != 1) 305*0fca6ea1SDimitry Andric return false; 306*0fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits)); 307*0fca6ea1SDimitry Andric break; 308*0fca6ea1SDimitry Andric } 309*0fca6ea1SDimitry Andric } 310*0fca6ea1SDimitry Andric } 311*0fca6ea1SDimitry Andric 312*0fca6ea1SDimitry Andric return true; 313*0fca6ea1SDimitry Andric } 314*0fca6ea1SDimitry Andric 315*0fca6ea1SDimitry Andric static bool hasAllWUsers(const MachineInstr &OrigMI, 316*0fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 317*0fca6ea1SDimitry Andric const MachineRegisterInfo &MRI) { 318*0fca6ea1SDimitry Andric return hasAllNBitUsers(OrigMI, ST, MRI, 32); 319*0fca6ea1SDimitry Andric } 320*0fca6ea1SDimitry Andric 321*0fca6ea1SDimitry Andric // This function returns true if the machine instruction always outputs a value 322*0fca6ea1SDimitry Andric // where bits 63:32 match bit 31. 323*0fca6ea1SDimitry Andric static bool isSignExtendingOpW(const MachineInstr &MI, 324*0fca6ea1SDimitry Andric const MachineRegisterInfo &MRI, unsigned OpNo) { 325*0fca6ea1SDimitry Andric switch (MI.getOpcode()) { 326*0fca6ea1SDimitry Andric // Normal cases 327*0fca6ea1SDimitry Andric case LoongArch::ADD_W: 328*0fca6ea1SDimitry Andric case LoongArch::SUB_W: 329*0fca6ea1SDimitry Andric case LoongArch::ADDI_W: 330*0fca6ea1SDimitry Andric case LoongArch::ALSL_W: 331*0fca6ea1SDimitry Andric case LoongArch::LU12I_W: 332*0fca6ea1SDimitry Andric case LoongArch::SLT: 333*0fca6ea1SDimitry Andric case LoongArch::SLTU: 334*0fca6ea1SDimitry Andric case LoongArch::SLTI: 335*0fca6ea1SDimitry Andric case LoongArch::SLTUI: 336*0fca6ea1SDimitry Andric case LoongArch::ANDI: 337*0fca6ea1SDimitry Andric case LoongArch::MUL_W: 338*0fca6ea1SDimitry Andric case LoongArch::MULH_W: 339*0fca6ea1SDimitry Andric case LoongArch::MULH_WU: 340*0fca6ea1SDimitry Andric case LoongArch::DIV_W: 341*0fca6ea1SDimitry Andric case LoongArch::MOD_W: 342*0fca6ea1SDimitry Andric case LoongArch::DIV_WU: 343*0fca6ea1SDimitry Andric case LoongArch::MOD_WU: 344*0fca6ea1SDimitry Andric case LoongArch::SLL_W: 345*0fca6ea1SDimitry Andric case LoongArch::SRL_W: 346*0fca6ea1SDimitry Andric case LoongArch::SRA_W: 347*0fca6ea1SDimitry Andric case LoongArch::ROTR_W: 348*0fca6ea1SDimitry Andric case LoongArch::SLLI_W: 349*0fca6ea1SDimitry Andric case LoongArch::SRLI_W: 350*0fca6ea1SDimitry Andric case LoongArch::SRAI_W: 351*0fca6ea1SDimitry Andric case LoongArch::ROTRI_W: 352*0fca6ea1SDimitry Andric case LoongArch::EXT_W_B: 353*0fca6ea1SDimitry Andric case LoongArch::EXT_W_H: 354*0fca6ea1SDimitry Andric case LoongArch::CLO_W: 355*0fca6ea1SDimitry Andric case LoongArch::CLZ_W: 356*0fca6ea1SDimitry Andric case LoongArch::CTO_W: 357*0fca6ea1SDimitry Andric case LoongArch::CTZ_W: 358*0fca6ea1SDimitry Andric case LoongArch::BYTEPICK_W: 359*0fca6ea1SDimitry Andric case LoongArch::REVB_2H: 360*0fca6ea1SDimitry Andric case LoongArch::BITREV_4B: 361*0fca6ea1SDimitry Andric case LoongArch::BITREV_W: 362*0fca6ea1SDimitry Andric case LoongArch::BSTRINS_W: 363*0fca6ea1SDimitry Andric case LoongArch::BSTRPICK_W: 364*0fca6ea1SDimitry Andric case LoongArch::LD_B: 365*0fca6ea1SDimitry Andric case LoongArch::LD_H: 366*0fca6ea1SDimitry Andric case LoongArch::LD_W: 367*0fca6ea1SDimitry Andric case LoongArch::LD_BU: 368*0fca6ea1SDimitry Andric case LoongArch::LD_HU: 369*0fca6ea1SDimitry Andric case LoongArch::LL_W: 370*0fca6ea1SDimitry Andric case LoongArch::LLACQ_W: 371*0fca6ea1SDimitry Andric case LoongArch::RDTIMEL_W: 372*0fca6ea1SDimitry Andric case LoongArch::RDTIMEH_W: 373*0fca6ea1SDimitry Andric case LoongArch::CPUCFG: 374*0fca6ea1SDimitry Andric case LoongArch::LDX_B: 375*0fca6ea1SDimitry Andric case LoongArch::LDX_H: 376*0fca6ea1SDimitry Andric case LoongArch::LDX_W: 377*0fca6ea1SDimitry Andric case LoongArch::LDX_BU: 378*0fca6ea1SDimitry Andric case LoongArch::LDX_HU: 379*0fca6ea1SDimitry Andric case LoongArch::LDPTR_W: 380*0fca6ea1SDimitry Andric case LoongArch::LDGT_B: 381*0fca6ea1SDimitry Andric case LoongArch::LDGT_H: 382*0fca6ea1SDimitry Andric case LoongArch::LDGT_W: 383*0fca6ea1SDimitry Andric case LoongArch::LDLE_B: 384*0fca6ea1SDimitry Andric case LoongArch::LDLE_H: 385*0fca6ea1SDimitry Andric case LoongArch::LDLE_W: 386*0fca6ea1SDimitry Andric case LoongArch::AMSWAP_B: 387*0fca6ea1SDimitry Andric case LoongArch::AMSWAP_H: 388*0fca6ea1SDimitry Andric case LoongArch::AMSWAP_W: 389*0fca6ea1SDimitry Andric case LoongArch::AMADD_B: 390*0fca6ea1SDimitry Andric case LoongArch::AMADD_H: 391*0fca6ea1SDimitry Andric case LoongArch::AMADD_W: 392*0fca6ea1SDimitry Andric case LoongArch::AMAND_W: 393*0fca6ea1SDimitry Andric case LoongArch::AMOR_W: 394*0fca6ea1SDimitry Andric case LoongArch::AMXOR_W: 395*0fca6ea1SDimitry Andric case LoongArch::AMMAX_W: 396*0fca6ea1SDimitry Andric case LoongArch::AMMIN_W: 397*0fca6ea1SDimitry Andric case LoongArch::AMMAX_WU: 398*0fca6ea1SDimitry Andric case LoongArch::AMMIN_WU: 399*0fca6ea1SDimitry Andric case LoongArch::AMSWAP__DB_B: 400*0fca6ea1SDimitry Andric case LoongArch::AMSWAP__DB_H: 401*0fca6ea1SDimitry Andric case LoongArch::AMSWAP__DB_W: 402*0fca6ea1SDimitry Andric case LoongArch::AMADD__DB_B: 403*0fca6ea1SDimitry Andric case LoongArch::AMADD__DB_H: 404*0fca6ea1SDimitry Andric case LoongArch::AMADD__DB_W: 405*0fca6ea1SDimitry Andric case LoongArch::AMAND__DB_W: 406*0fca6ea1SDimitry Andric case LoongArch::AMOR__DB_W: 407*0fca6ea1SDimitry Andric case LoongArch::AMXOR__DB_W: 408*0fca6ea1SDimitry Andric case LoongArch::AMMAX__DB_W: 409*0fca6ea1SDimitry Andric case LoongArch::AMMIN__DB_W: 410*0fca6ea1SDimitry Andric case LoongArch::AMMAX__DB_WU: 411*0fca6ea1SDimitry Andric case LoongArch::AMMIN__DB_WU: 412*0fca6ea1SDimitry Andric case LoongArch::AMCAS_B: 413*0fca6ea1SDimitry Andric case LoongArch::AMCAS_H: 414*0fca6ea1SDimitry Andric case LoongArch::AMCAS_W: 415*0fca6ea1SDimitry Andric case LoongArch::AMCAS__DB_B: 416*0fca6ea1SDimitry Andric case LoongArch::AMCAS__DB_H: 417*0fca6ea1SDimitry Andric case LoongArch::AMCAS__DB_W: 418*0fca6ea1SDimitry Andric case LoongArch::CRC_W_B_W: 419*0fca6ea1SDimitry Andric case LoongArch::CRC_W_H_W: 420*0fca6ea1SDimitry Andric case LoongArch::CRC_W_W_W: 421*0fca6ea1SDimitry Andric case LoongArch::CRC_W_D_W: 422*0fca6ea1SDimitry Andric case LoongArch::CRCC_W_B_W: 423*0fca6ea1SDimitry Andric case LoongArch::CRCC_W_H_W: 424*0fca6ea1SDimitry Andric case LoongArch::CRCC_W_W_W: 425*0fca6ea1SDimitry Andric case LoongArch::CRCC_W_D_W: 426*0fca6ea1SDimitry Andric case LoongArch::IOCSRRD_B: 427*0fca6ea1SDimitry Andric case LoongArch::IOCSRRD_H: 428*0fca6ea1SDimitry Andric case LoongArch::IOCSRRD_W: 429*0fca6ea1SDimitry Andric case LoongArch::MOVFR2GR_S: 430*0fca6ea1SDimitry Andric case LoongArch::MOVFCSR2GR: 431*0fca6ea1SDimitry Andric case LoongArch::MOVCF2GR: 432*0fca6ea1SDimitry Andric case LoongArch::MOVFRH2GR_S: 433*0fca6ea1SDimitry Andric case LoongArch::MOVFR2GR_S_64: 434*0fca6ea1SDimitry Andric // TODO: Add vector 435*0fca6ea1SDimitry Andric return true; 436*0fca6ea1SDimitry Andric // Special cases that require checking operands. 437*0fca6ea1SDimitry Andric // shifting right sufficiently makes the value 32-bit sign-extended 438*0fca6ea1SDimitry Andric case LoongArch::SRAI_D: 439*0fca6ea1SDimitry Andric return MI.getOperand(2).getImm() >= 32; 440*0fca6ea1SDimitry Andric case LoongArch::SRLI_D: 441*0fca6ea1SDimitry Andric return MI.getOperand(2).getImm() > 32; 442*0fca6ea1SDimitry Andric // The LI pattern ADDI rd, R0, imm and ORI rd, R0, imm are sign extended. 443*0fca6ea1SDimitry Andric case LoongArch::ADDI_D: 444*0fca6ea1SDimitry Andric case LoongArch::ORI: 445*0fca6ea1SDimitry Andric return MI.getOperand(1).isReg() && 446*0fca6ea1SDimitry Andric MI.getOperand(1).getReg() == LoongArch::R0; 447*0fca6ea1SDimitry Andric // A bits extract is sign extended if the msb is less than 31. 448*0fca6ea1SDimitry Andric case LoongArch::BSTRPICK_D: 449*0fca6ea1SDimitry Andric return MI.getOperand(2).getImm() < 31; 450*0fca6ea1SDimitry Andric // Copying from R0 produces zero. 451*0fca6ea1SDimitry Andric case LoongArch::COPY: 452*0fca6ea1SDimitry Andric return MI.getOperand(1).getReg() == LoongArch::R0; 453*0fca6ea1SDimitry Andric // Ignore the scratch register destination. 454*0fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicSwap32: 455*0fca6ea1SDimitry Andric case LoongArch::PseudoAtomicSwap32: 456*0fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadAdd32: 457*0fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadSub32: 458*0fca6ea1SDimitry Andric case LoongArch::PseudoAtomicLoadNand32: 459*0fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadNand32: 460*0fca6ea1SDimitry Andric case LoongArch::PseudoAtomicLoadAdd32: 461*0fca6ea1SDimitry Andric case LoongArch::PseudoAtomicLoadSub32: 462*0fca6ea1SDimitry Andric case LoongArch::PseudoAtomicLoadAnd32: 463*0fca6ea1SDimitry Andric case LoongArch::PseudoAtomicLoadOr32: 464*0fca6ea1SDimitry Andric case LoongArch::PseudoAtomicLoadXor32: 465*0fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadUMax32: 466*0fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadUMin32: 467*0fca6ea1SDimitry Andric case LoongArch::PseudoCmpXchg32: 468*0fca6ea1SDimitry Andric case LoongArch::PseudoMaskedCmpXchg32: 469*0fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadMax32: 470*0fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadMin32: 471*0fca6ea1SDimitry Andric return OpNo == 0; 472*0fca6ea1SDimitry Andric } 473*0fca6ea1SDimitry Andric 474*0fca6ea1SDimitry Andric return false; 475*0fca6ea1SDimitry Andric } 476*0fca6ea1SDimitry Andric 477*0fca6ea1SDimitry Andric static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST, 478*0fca6ea1SDimitry Andric const MachineRegisterInfo &MRI, 479*0fca6ea1SDimitry Andric SmallPtrSetImpl<MachineInstr *> &FixableDef) { 480*0fca6ea1SDimitry Andric SmallSet<Register, 4> Visited; 481*0fca6ea1SDimitry Andric SmallVector<Register, 4> Worklist; 482*0fca6ea1SDimitry Andric 483*0fca6ea1SDimitry Andric auto AddRegToWorkList = [&](Register SrcReg) { 484*0fca6ea1SDimitry Andric if (!SrcReg.isVirtual()) 485*0fca6ea1SDimitry Andric return false; 486*0fca6ea1SDimitry Andric Worklist.push_back(SrcReg); 487*0fca6ea1SDimitry Andric return true; 488*0fca6ea1SDimitry Andric }; 489*0fca6ea1SDimitry Andric 490*0fca6ea1SDimitry Andric if (!AddRegToWorkList(SrcReg)) 491*0fca6ea1SDimitry Andric return false; 492*0fca6ea1SDimitry Andric 493*0fca6ea1SDimitry Andric while (!Worklist.empty()) { 494*0fca6ea1SDimitry Andric Register Reg = Worklist.pop_back_val(); 495*0fca6ea1SDimitry Andric 496*0fca6ea1SDimitry Andric // If we already visited this register, we don't need to check it again. 497*0fca6ea1SDimitry Andric if (!Visited.insert(Reg).second) 498*0fca6ea1SDimitry Andric continue; 499*0fca6ea1SDimitry Andric 500*0fca6ea1SDimitry Andric MachineInstr *MI = MRI.getVRegDef(Reg); 501*0fca6ea1SDimitry Andric if (!MI) 502*0fca6ea1SDimitry Andric continue; 503*0fca6ea1SDimitry Andric 504*0fca6ea1SDimitry Andric int OpNo = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr); 505*0fca6ea1SDimitry Andric assert(OpNo != -1 && "Couldn't find register"); 506*0fca6ea1SDimitry Andric 507*0fca6ea1SDimitry Andric // If this is a sign extending operation we don't need to look any further. 508*0fca6ea1SDimitry Andric if (isSignExtendingOpW(*MI, MRI, OpNo)) 509*0fca6ea1SDimitry Andric continue; 510*0fca6ea1SDimitry Andric 511*0fca6ea1SDimitry Andric // Is this an instruction that propagates sign extend? 512*0fca6ea1SDimitry Andric switch (MI->getOpcode()) { 513*0fca6ea1SDimitry Andric default: 514*0fca6ea1SDimitry Andric // Unknown opcode, give up. 515*0fca6ea1SDimitry Andric return false; 516*0fca6ea1SDimitry Andric case LoongArch::COPY: { 517*0fca6ea1SDimitry Andric const MachineFunction *MF = MI->getMF(); 518*0fca6ea1SDimitry Andric const LoongArchMachineFunctionInfo *LAFI = 519*0fca6ea1SDimitry Andric MF->getInfo<LoongArchMachineFunctionInfo>(); 520*0fca6ea1SDimitry Andric 521*0fca6ea1SDimitry Andric // If this is the entry block and the register is livein, see if we know 522*0fca6ea1SDimitry Andric // it is sign extended. 523*0fca6ea1SDimitry Andric if (MI->getParent() == &MF->front()) { 524*0fca6ea1SDimitry Andric Register VReg = MI->getOperand(0).getReg(); 525*0fca6ea1SDimitry Andric if (MF->getRegInfo().isLiveIn(VReg) && LAFI->isSExt32Register(VReg)) 526*0fca6ea1SDimitry Andric continue; 527*0fca6ea1SDimitry Andric } 528*0fca6ea1SDimitry Andric 529*0fca6ea1SDimitry Andric Register CopySrcReg = MI->getOperand(1).getReg(); 530*0fca6ea1SDimitry Andric if (CopySrcReg == LoongArch::R4) { 531*0fca6ea1SDimitry Andric // For a method return value, we check the ZExt/SExt flags in attribute. 532*0fca6ea1SDimitry Andric // We assume the following code sequence for method call. 533*0fca6ea1SDimitry Andric // PseudoCALL @bar, ... 534*0fca6ea1SDimitry Andric // ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 535*0fca6ea1SDimitry Andric // %0:gpr = COPY $r4 536*0fca6ea1SDimitry Andric // 537*0fca6ea1SDimitry Andric // We use the PseudoCall to look up the IR function being called to find 538*0fca6ea1SDimitry Andric // its return attributes. 539*0fca6ea1SDimitry Andric const MachineBasicBlock *MBB = MI->getParent(); 540*0fca6ea1SDimitry Andric auto II = MI->getIterator(); 541*0fca6ea1SDimitry Andric if (II == MBB->instr_begin() || 542*0fca6ea1SDimitry Andric (--II)->getOpcode() != LoongArch::ADJCALLSTACKUP) 543*0fca6ea1SDimitry Andric return false; 544*0fca6ea1SDimitry Andric 545*0fca6ea1SDimitry Andric const MachineInstr &CallMI = *(--II); 546*0fca6ea1SDimitry Andric if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal()) 547*0fca6ea1SDimitry Andric return false; 548*0fca6ea1SDimitry Andric 549*0fca6ea1SDimitry Andric auto *CalleeFn = 550*0fca6ea1SDimitry Andric dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal()); 551*0fca6ea1SDimitry Andric if (!CalleeFn) 552*0fca6ea1SDimitry Andric return false; 553*0fca6ea1SDimitry Andric 554*0fca6ea1SDimitry Andric auto *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType()); 555*0fca6ea1SDimitry Andric if (!IntTy) 556*0fca6ea1SDimitry Andric return false; 557*0fca6ea1SDimitry Andric 558*0fca6ea1SDimitry Andric const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs(); 559*0fca6ea1SDimitry Andric unsigned BitWidth = IntTy->getBitWidth(); 560*0fca6ea1SDimitry Andric if ((BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) || 561*0fca6ea1SDimitry Andric (BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt))) 562*0fca6ea1SDimitry Andric continue; 563*0fca6ea1SDimitry Andric } 564*0fca6ea1SDimitry Andric 565*0fca6ea1SDimitry Andric if (!AddRegToWorkList(CopySrcReg)) 566*0fca6ea1SDimitry Andric return false; 567*0fca6ea1SDimitry Andric 568*0fca6ea1SDimitry Andric break; 569*0fca6ea1SDimitry Andric } 570*0fca6ea1SDimitry Andric 571*0fca6ea1SDimitry Andric // For these, we just need to check if the 1st operand is sign extended. 572*0fca6ea1SDimitry Andric case LoongArch::MOD_D: 573*0fca6ea1SDimitry Andric case LoongArch::ANDI: 574*0fca6ea1SDimitry Andric case LoongArch::ORI: 575*0fca6ea1SDimitry Andric case LoongArch::XORI: 576*0fca6ea1SDimitry Andric // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R. 577*0fca6ea1SDimitry Andric // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1 578*0fca6ea1SDimitry Andric // Logical operations use a sign extended 12-bit immediate. 579*0fca6ea1SDimitry Andric if (!AddRegToWorkList(MI->getOperand(1).getReg())) 580*0fca6ea1SDimitry Andric return false; 581*0fca6ea1SDimitry Andric 582*0fca6ea1SDimitry Andric break; 583*0fca6ea1SDimitry Andric case LoongArch::MOD_DU: 584*0fca6ea1SDimitry Andric case LoongArch::AND: 585*0fca6ea1SDimitry Andric case LoongArch::OR: 586*0fca6ea1SDimitry Andric case LoongArch::XOR: 587*0fca6ea1SDimitry Andric case LoongArch::ANDN: 588*0fca6ea1SDimitry Andric case LoongArch::ORN: 589*0fca6ea1SDimitry Andric case LoongArch::PHI: { 590*0fca6ea1SDimitry Andric // If all incoming values are sign-extended, the output of AND, OR, XOR, 591*0fca6ea1SDimitry Andric // or PHI is also sign-extended. 592*0fca6ea1SDimitry Andric 593*0fca6ea1SDimitry Andric // The input registers for PHI are operand 1, 3, ... 594*0fca6ea1SDimitry Andric // The input registers for others are operand 1 and 2. 595*0fca6ea1SDimitry Andric unsigned B = 1, E = 3, D = 1; 596*0fca6ea1SDimitry Andric switch (MI->getOpcode()) { 597*0fca6ea1SDimitry Andric case LoongArch::PHI: 598*0fca6ea1SDimitry Andric E = MI->getNumOperands(); 599*0fca6ea1SDimitry Andric D = 2; 600*0fca6ea1SDimitry Andric break; 601*0fca6ea1SDimitry Andric } 602*0fca6ea1SDimitry Andric 603*0fca6ea1SDimitry Andric for (unsigned I = B; I != E; I += D) { 604*0fca6ea1SDimitry Andric if (!MI->getOperand(I).isReg()) 605*0fca6ea1SDimitry Andric return false; 606*0fca6ea1SDimitry Andric 607*0fca6ea1SDimitry Andric if (!AddRegToWorkList(MI->getOperand(I).getReg())) 608*0fca6ea1SDimitry Andric return false; 609*0fca6ea1SDimitry Andric } 610*0fca6ea1SDimitry Andric 611*0fca6ea1SDimitry Andric break; 612*0fca6ea1SDimitry Andric } 613*0fca6ea1SDimitry Andric 614*0fca6ea1SDimitry Andric case LoongArch::MASKEQZ: 615*0fca6ea1SDimitry Andric case LoongArch::MASKNEZ: 616*0fca6ea1SDimitry Andric // Instructions return zero or operand 1. Result is sign extended if 617*0fca6ea1SDimitry Andric // operand 1 is sign extended. 618*0fca6ea1SDimitry Andric if (!AddRegToWorkList(MI->getOperand(1).getReg())) 619*0fca6ea1SDimitry Andric return false; 620*0fca6ea1SDimitry Andric break; 621*0fca6ea1SDimitry Andric 622*0fca6ea1SDimitry Andric // With these opcode, we can "fix" them with the W-version 623*0fca6ea1SDimitry Andric // if we know all users of the result only rely on bits 31:0 624*0fca6ea1SDimitry Andric case LoongArch::SLLI_D: 625*0fca6ea1SDimitry Andric // SLLI_W reads the lowest 5 bits, while SLLI_D reads lowest 6 bits 626*0fca6ea1SDimitry Andric if (MI->getOperand(2).getImm() >= 32) 627*0fca6ea1SDimitry Andric return false; 628*0fca6ea1SDimitry Andric [[fallthrough]]; 629*0fca6ea1SDimitry Andric case LoongArch::ADDI_D: 630*0fca6ea1SDimitry Andric case LoongArch::ADD_D: 631*0fca6ea1SDimitry Andric case LoongArch::LD_D: 632*0fca6ea1SDimitry Andric case LoongArch::LD_WU: 633*0fca6ea1SDimitry Andric case LoongArch::MUL_D: 634*0fca6ea1SDimitry Andric case LoongArch::SUB_D: 635*0fca6ea1SDimitry Andric if (hasAllWUsers(*MI, ST, MRI)) { 636*0fca6ea1SDimitry Andric FixableDef.insert(MI); 637*0fca6ea1SDimitry Andric break; 638*0fca6ea1SDimitry Andric } 639*0fca6ea1SDimitry Andric return false; 640*0fca6ea1SDimitry Andric } 641*0fca6ea1SDimitry Andric } 642*0fca6ea1SDimitry Andric 643*0fca6ea1SDimitry Andric // If we get here, then every node we visited produces a sign extended value 644*0fca6ea1SDimitry Andric // or propagated sign extended values. So the result must be sign extended. 645*0fca6ea1SDimitry Andric return true; 646*0fca6ea1SDimitry Andric } 647*0fca6ea1SDimitry Andric 648*0fca6ea1SDimitry Andric static unsigned getWOp(unsigned Opcode) { 649*0fca6ea1SDimitry Andric switch (Opcode) { 650*0fca6ea1SDimitry Andric case LoongArch::ADDI_D: 651*0fca6ea1SDimitry Andric return LoongArch::ADDI_W; 652*0fca6ea1SDimitry Andric case LoongArch::ADD_D: 653*0fca6ea1SDimitry Andric return LoongArch::ADD_W; 654*0fca6ea1SDimitry Andric case LoongArch::LD_D: 655*0fca6ea1SDimitry Andric case LoongArch::LD_WU: 656*0fca6ea1SDimitry Andric return LoongArch::LD_W; 657*0fca6ea1SDimitry Andric case LoongArch::MUL_D: 658*0fca6ea1SDimitry Andric return LoongArch::MUL_W; 659*0fca6ea1SDimitry Andric case LoongArch::SLLI_D: 660*0fca6ea1SDimitry Andric return LoongArch::SLLI_W; 661*0fca6ea1SDimitry Andric case LoongArch::SUB_D: 662*0fca6ea1SDimitry Andric return LoongArch::SUB_W; 663*0fca6ea1SDimitry Andric default: 664*0fca6ea1SDimitry Andric llvm_unreachable("Unexpected opcode for replacement with W variant"); 665*0fca6ea1SDimitry Andric } 666*0fca6ea1SDimitry Andric } 667*0fca6ea1SDimitry Andric 668*0fca6ea1SDimitry Andric bool LoongArchOptWInstrs::removeSExtWInstrs(MachineFunction &MF, 669*0fca6ea1SDimitry Andric const LoongArchInstrInfo &TII, 670*0fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 671*0fca6ea1SDimitry Andric MachineRegisterInfo &MRI) { 672*0fca6ea1SDimitry Andric if (DisableSExtWRemoval) 673*0fca6ea1SDimitry Andric return false; 674*0fca6ea1SDimitry Andric 675*0fca6ea1SDimitry Andric bool MadeChange = false; 676*0fca6ea1SDimitry Andric for (MachineBasicBlock &MBB : MF) { 677*0fca6ea1SDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { 678*0fca6ea1SDimitry Andric // We're looking for the sext.w pattern ADDI.W rd, rs, 0. 679*0fca6ea1SDimitry Andric if (!LoongArch::isSEXT_W(MI)) 680*0fca6ea1SDimitry Andric continue; 681*0fca6ea1SDimitry Andric 682*0fca6ea1SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 683*0fca6ea1SDimitry Andric 684*0fca6ea1SDimitry Andric SmallPtrSet<MachineInstr *, 4> FixableDefs; 685*0fca6ea1SDimitry Andric 686*0fca6ea1SDimitry Andric // If all users only use the lower bits, this sext.w is redundant. 687*0fca6ea1SDimitry Andric // Or if all definitions reaching MI sign-extend their output, 688*0fca6ea1SDimitry Andric // then sext.w is redundant. 689*0fca6ea1SDimitry Andric if (!hasAllWUsers(MI, ST, MRI) && 690*0fca6ea1SDimitry Andric !isSignExtendedW(SrcReg, ST, MRI, FixableDefs)) 691*0fca6ea1SDimitry Andric continue; 692*0fca6ea1SDimitry Andric 693*0fca6ea1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 694*0fca6ea1SDimitry Andric if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg))) 695*0fca6ea1SDimitry Andric continue; 696*0fca6ea1SDimitry Andric 697*0fca6ea1SDimitry Andric // Convert Fixable instructions to their W versions. 698*0fca6ea1SDimitry Andric for (MachineInstr *Fixable : FixableDefs) { 699*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing " << *Fixable); 700*0fca6ea1SDimitry Andric Fixable->setDesc(TII.get(getWOp(Fixable->getOpcode()))); 701*0fca6ea1SDimitry Andric Fixable->clearFlag(MachineInstr::MIFlag::NoSWrap); 702*0fca6ea1SDimitry Andric Fixable->clearFlag(MachineInstr::MIFlag::NoUWrap); 703*0fca6ea1SDimitry Andric Fixable->clearFlag(MachineInstr::MIFlag::IsExact); 704*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " with " << *Fixable); 705*0fca6ea1SDimitry Andric ++NumTransformedToWInstrs; 706*0fca6ea1SDimitry Andric } 707*0fca6ea1SDimitry Andric 708*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n"); 709*0fca6ea1SDimitry Andric MRI.replaceRegWith(DstReg, SrcReg); 710*0fca6ea1SDimitry Andric MRI.clearKillFlags(SrcReg); 711*0fca6ea1SDimitry Andric MI.eraseFromParent(); 712*0fca6ea1SDimitry Andric ++NumRemovedSExtW; 713*0fca6ea1SDimitry Andric MadeChange = true; 714*0fca6ea1SDimitry Andric } 715*0fca6ea1SDimitry Andric } 716*0fca6ea1SDimitry Andric 717*0fca6ea1SDimitry Andric return MadeChange; 718*0fca6ea1SDimitry Andric } 719*0fca6ea1SDimitry Andric 720*0fca6ea1SDimitry Andric bool LoongArchOptWInstrs::convertToDSuffixes(MachineFunction &MF, 721*0fca6ea1SDimitry Andric const LoongArchInstrInfo &TII, 722*0fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 723*0fca6ea1SDimitry Andric MachineRegisterInfo &MRI) { 724*0fca6ea1SDimitry Andric bool MadeChange = false; 725*0fca6ea1SDimitry Andric for (MachineBasicBlock &MBB : MF) { 726*0fca6ea1SDimitry Andric for (MachineInstr &MI : MBB) { 727*0fca6ea1SDimitry Andric unsigned Opc; 728*0fca6ea1SDimitry Andric switch (MI.getOpcode()) { 729*0fca6ea1SDimitry Andric default: 730*0fca6ea1SDimitry Andric continue; 731*0fca6ea1SDimitry Andric case LoongArch::ADDI_W: 732*0fca6ea1SDimitry Andric Opc = LoongArch::ADDI_D; 733*0fca6ea1SDimitry Andric break; 734*0fca6ea1SDimitry Andric } 735*0fca6ea1SDimitry Andric 736*0fca6ea1SDimitry Andric if (hasAllWUsers(MI, ST, MRI)) { 737*0fca6ea1SDimitry Andric MI.setDesc(TII.get(Opc)); 738*0fca6ea1SDimitry Andric MadeChange = true; 739*0fca6ea1SDimitry Andric } 740*0fca6ea1SDimitry Andric } 741*0fca6ea1SDimitry Andric } 742*0fca6ea1SDimitry Andric 743*0fca6ea1SDimitry Andric return MadeChange; 744*0fca6ea1SDimitry Andric } 745*0fca6ea1SDimitry Andric 746*0fca6ea1SDimitry Andric bool LoongArchOptWInstrs::convertToWSuffixes(MachineFunction &MF, 747*0fca6ea1SDimitry Andric const LoongArchInstrInfo &TII, 748*0fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 749*0fca6ea1SDimitry Andric MachineRegisterInfo &MRI) { 750*0fca6ea1SDimitry Andric bool MadeChange = false; 751*0fca6ea1SDimitry Andric for (MachineBasicBlock &MBB : MF) { 752*0fca6ea1SDimitry Andric for (MachineInstr &MI : MBB) { 753*0fca6ea1SDimitry Andric unsigned WOpc; 754*0fca6ea1SDimitry Andric // TODO: Add more? 755*0fca6ea1SDimitry Andric switch (MI.getOpcode()) { 756*0fca6ea1SDimitry Andric default: 757*0fca6ea1SDimitry Andric continue; 758*0fca6ea1SDimitry Andric case LoongArch::ADD_D: 759*0fca6ea1SDimitry Andric WOpc = LoongArch::ADD_W; 760*0fca6ea1SDimitry Andric break; 761*0fca6ea1SDimitry Andric case LoongArch::ADDI_D: 762*0fca6ea1SDimitry Andric WOpc = LoongArch::ADDI_W; 763*0fca6ea1SDimitry Andric break; 764*0fca6ea1SDimitry Andric case LoongArch::SUB_D: 765*0fca6ea1SDimitry Andric WOpc = LoongArch::SUB_W; 766*0fca6ea1SDimitry Andric break; 767*0fca6ea1SDimitry Andric case LoongArch::MUL_D: 768*0fca6ea1SDimitry Andric WOpc = LoongArch::MUL_W; 769*0fca6ea1SDimitry Andric break; 770*0fca6ea1SDimitry Andric case LoongArch::SLLI_D: 771*0fca6ea1SDimitry Andric // SLLI.W reads the lowest 5 bits, while SLLI.D reads lowest 6 bits 772*0fca6ea1SDimitry Andric if (MI.getOperand(2).getImm() >= 32) 773*0fca6ea1SDimitry Andric continue; 774*0fca6ea1SDimitry Andric WOpc = LoongArch::SLLI_W; 775*0fca6ea1SDimitry Andric break; 776*0fca6ea1SDimitry Andric case LoongArch::LD_D: 777*0fca6ea1SDimitry Andric case LoongArch::LD_WU: 778*0fca6ea1SDimitry Andric WOpc = LoongArch::LD_W; 779*0fca6ea1SDimitry Andric break; 780*0fca6ea1SDimitry Andric } 781*0fca6ea1SDimitry Andric 782*0fca6ea1SDimitry Andric if (hasAllWUsers(MI, ST, MRI)) { 783*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing " << MI); 784*0fca6ea1SDimitry Andric MI.setDesc(TII.get(WOpc)); 785*0fca6ea1SDimitry Andric MI.clearFlag(MachineInstr::MIFlag::NoSWrap); 786*0fca6ea1SDimitry Andric MI.clearFlag(MachineInstr::MIFlag::NoUWrap); 787*0fca6ea1SDimitry Andric MI.clearFlag(MachineInstr::MIFlag::IsExact); 788*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " with " << MI); 789*0fca6ea1SDimitry Andric ++NumTransformedToWInstrs; 790*0fca6ea1SDimitry Andric MadeChange = true; 791*0fca6ea1SDimitry Andric } 792*0fca6ea1SDimitry Andric } 793*0fca6ea1SDimitry Andric } 794*0fca6ea1SDimitry Andric 795*0fca6ea1SDimitry Andric return MadeChange; 796*0fca6ea1SDimitry Andric } 797*0fca6ea1SDimitry Andric 798*0fca6ea1SDimitry Andric bool LoongArchOptWInstrs::runOnMachineFunction(MachineFunction &MF) { 799*0fca6ea1SDimitry Andric if (skipFunction(MF.getFunction())) 800*0fca6ea1SDimitry Andric return false; 801*0fca6ea1SDimitry Andric 802*0fca6ea1SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 803*0fca6ea1SDimitry Andric const LoongArchSubtarget &ST = MF.getSubtarget<LoongArchSubtarget>(); 804*0fca6ea1SDimitry Andric const LoongArchInstrInfo &TII = *ST.getInstrInfo(); 805*0fca6ea1SDimitry Andric 806*0fca6ea1SDimitry Andric if (!ST.is64Bit()) 807*0fca6ea1SDimitry Andric return false; 808*0fca6ea1SDimitry Andric 809*0fca6ea1SDimitry Andric bool MadeChange = false; 810*0fca6ea1SDimitry Andric MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI); 811*0fca6ea1SDimitry Andric 812*0fca6ea1SDimitry Andric if (!(DisableCvtToDSuffix || ST.preferWInst())) 813*0fca6ea1SDimitry Andric MadeChange |= convertToDSuffixes(MF, TII, ST, MRI); 814*0fca6ea1SDimitry Andric 815*0fca6ea1SDimitry Andric if (ST.preferWInst()) 816*0fca6ea1SDimitry Andric MadeChange |= convertToWSuffixes(MF, TII, ST, MRI); 817*0fca6ea1SDimitry Andric 818*0fca6ea1SDimitry Andric return MadeChange; 819*0fca6ea1SDimitry Andric } 820