xref: /freebsd/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1*0fca6ea1SDimitry Andric //===- LoongArchOptWInstrs.cpp - MI W instruction optimizations ----------===//
2*0fca6ea1SDimitry Andric //
3*0fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0fca6ea1SDimitry Andric //
7*0fca6ea1SDimitry Andric //===---------------------------------------------------------------------===//
8*0fca6ea1SDimitry Andric //
9*0fca6ea1SDimitry Andric // This pass does some optimizations for *W instructions at the MI level.
10*0fca6ea1SDimitry Andric //
11*0fca6ea1SDimitry Andric // First it removes unneeded sext(addi.w rd, rs, 0) instructions. Either
12*0fca6ea1SDimitry Andric // because the sign extended bits aren't consumed or because the input was
13*0fca6ea1SDimitry Andric // already sign extended by an earlier instruction.
14*0fca6ea1SDimitry Andric //
15*0fca6ea1SDimitry Andric // Then:
16*0fca6ea1SDimitry Andric // 1. Unless explicit disabled or the target prefers instructions with W suffix,
17*0fca6ea1SDimitry Andric //    it removes the -w suffix from opw instructions whenever all users are
18*0fca6ea1SDimitry Andric //    dependent only on the lower word of the result of the instruction.
19*0fca6ea1SDimitry Andric //    The cases handled are:
20*0fca6ea1SDimitry Andric //    * addi.w because it helps reduce test differences between LA32 and LA64
21*0fca6ea1SDimitry Andric //      w/o being a pessimization.
22*0fca6ea1SDimitry Andric //
23*0fca6ea1SDimitry Andric // 2. Or if explicit enabled or the target prefers instructions with W suffix,
24*0fca6ea1SDimitry Andric //    it adds the W suffix to the instruction whenever all users are dependent
25*0fca6ea1SDimitry Andric //    only on the lower word of the result of the instruction.
26*0fca6ea1SDimitry Andric //    The cases handled are:
27*0fca6ea1SDimitry Andric //    * add.d/addi.d/sub.d/mul.d.
28*0fca6ea1SDimitry Andric //    * slli.d with imm < 32.
29*0fca6ea1SDimitry Andric //    * ld.d/ld.wu.
30*0fca6ea1SDimitry Andric //===---------------------------------------------------------------------===//
31*0fca6ea1SDimitry Andric 
32*0fca6ea1SDimitry Andric #include "LoongArch.h"
33*0fca6ea1SDimitry Andric #include "LoongArchMachineFunctionInfo.h"
34*0fca6ea1SDimitry Andric #include "LoongArchSubtarget.h"
35*0fca6ea1SDimitry Andric #include "llvm/ADT/SmallSet.h"
36*0fca6ea1SDimitry Andric #include "llvm/ADT/Statistic.h"
37*0fca6ea1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
38*0fca6ea1SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
39*0fca6ea1SDimitry Andric 
40*0fca6ea1SDimitry Andric using namespace llvm;
41*0fca6ea1SDimitry Andric 
42*0fca6ea1SDimitry Andric #define DEBUG_TYPE "loongarch-opt-w-instrs"
43*0fca6ea1SDimitry Andric #define LOONGARCH_OPT_W_INSTRS_NAME "LoongArch Optimize W Instructions"
44*0fca6ea1SDimitry Andric 
45*0fca6ea1SDimitry Andric STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
46*0fca6ea1SDimitry Andric STATISTIC(NumTransformedToWInstrs,
47*0fca6ea1SDimitry Andric           "Number of instructions transformed to W-ops");
48*0fca6ea1SDimitry Andric 
49*0fca6ea1SDimitry Andric static cl::opt<bool>
50*0fca6ea1SDimitry Andric     DisableSExtWRemoval("loongarch-disable-sextw-removal",
51*0fca6ea1SDimitry Andric                         cl::desc("Disable removal of sign-extend insn"),
52*0fca6ea1SDimitry Andric                         cl::init(false), cl::Hidden);
53*0fca6ea1SDimitry Andric static cl::opt<bool>
54*0fca6ea1SDimitry Andric     DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix",
55*0fca6ea1SDimitry Andric                         cl::desc("Disable convert to D suffix"),
56*0fca6ea1SDimitry Andric                         cl::init(false), cl::Hidden);
57*0fca6ea1SDimitry Andric 
58*0fca6ea1SDimitry Andric namespace {
59*0fca6ea1SDimitry Andric 
60*0fca6ea1SDimitry Andric class LoongArchOptWInstrs : public MachineFunctionPass {
61*0fca6ea1SDimitry Andric public:
62*0fca6ea1SDimitry Andric   static char ID;
63*0fca6ea1SDimitry Andric 
64*0fca6ea1SDimitry Andric   LoongArchOptWInstrs() : MachineFunctionPass(ID) {}
65*0fca6ea1SDimitry Andric 
66*0fca6ea1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
67*0fca6ea1SDimitry Andric   bool removeSExtWInstrs(MachineFunction &MF, const LoongArchInstrInfo &TII,
68*0fca6ea1SDimitry Andric                          const LoongArchSubtarget &ST,
69*0fca6ea1SDimitry Andric                          MachineRegisterInfo &MRI);
70*0fca6ea1SDimitry Andric   bool convertToDSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,
71*0fca6ea1SDimitry Andric                           const LoongArchSubtarget &ST,
72*0fca6ea1SDimitry Andric                           MachineRegisterInfo &MRI);
73*0fca6ea1SDimitry Andric   bool convertToWSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,
74*0fca6ea1SDimitry Andric                           const LoongArchSubtarget &ST,
75*0fca6ea1SDimitry Andric                           MachineRegisterInfo &MRI);
76*0fca6ea1SDimitry Andric 
77*0fca6ea1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
78*0fca6ea1SDimitry Andric     AU.setPreservesCFG();
79*0fca6ea1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
80*0fca6ea1SDimitry Andric   }
81*0fca6ea1SDimitry Andric 
82*0fca6ea1SDimitry Andric   StringRef getPassName() const override { return LOONGARCH_OPT_W_INSTRS_NAME; }
83*0fca6ea1SDimitry Andric };
84*0fca6ea1SDimitry Andric 
85*0fca6ea1SDimitry Andric } // end anonymous namespace
86*0fca6ea1SDimitry Andric 
87*0fca6ea1SDimitry Andric char LoongArchOptWInstrs::ID = 0;
88*0fca6ea1SDimitry Andric INITIALIZE_PASS(LoongArchOptWInstrs, DEBUG_TYPE, LOONGARCH_OPT_W_INSTRS_NAME,
89*0fca6ea1SDimitry Andric                 false, false)
90*0fca6ea1SDimitry Andric 
91*0fca6ea1SDimitry Andric FunctionPass *llvm::createLoongArchOptWInstrsPass() {
92*0fca6ea1SDimitry Andric   return new LoongArchOptWInstrs();
93*0fca6ea1SDimitry Andric }
94*0fca6ea1SDimitry Andric 
95*0fca6ea1SDimitry Andric // Checks if all users only demand the lower \p OrigBits of the original
96*0fca6ea1SDimitry Andric // instruction's result.
97*0fca6ea1SDimitry Andric // TODO: handle multiple interdependent transformations
98*0fca6ea1SDimitry Andric static bool hasAllNBitUsers(const MachineInstr &OrigMI,
99*0fca6ea1SDimitry Andric                             const LoongArchSubtarget &ST,
100*0fca6ea1SDimitry Andric                             const MachineRegisterInfo &MRI, unsigned OrigBits) {
101*0fca6ea1SDimitry Andric 
102*0fca6ea1SDimitry Andric   SmallSet<std::pair<const MachineInstr *, unsigned>, 4> Visited;
103*0fca6ea1SDimitry Andric   SmallVector<std::pair<const MachineInstr *, unsigned>, 4> Worklist;
104*0fca6ea1SDimitry Andric 
105*0fca6ea1SDimitry Andric   Worklist.push_back(std::make_pair(&OrigMI, OrigBits));
106*0fca6ea1SDimitry Andric 
107*0fca6ea1SDimitry Andric   while (!Worklist.empty()) {
108*0fca6ea1SDimitry Andric     auto P = Worklist.pop_back_val();
109*0fca6ea1SDimitry Andric     const MachineInstr *MI = P.first;
110*0fca6ea1SDimitry Andric     unsigned Bits = P.second;
111*0fca6ea1SDimitry Andric 
112*0fca6ea1SDimitry Andric     if (!Visited.insert(P).second)
113*0fca6ea1SDimitry Andric       continue;
114*0fca6ea1SDimitry Andric 
115*0fca6ea1SDimitry Andric     // Only handle instructions with one def.
116*0fca6ea1SDimitry Andric     if (MI->getNumExplicitDefs() != 1)
117*0fca6ea1SDimitry Andric       return false;
118*0fca6ea1SDimitry Andric 
119*0fca6ea1SDimitry Andric     Register DestReg = MI->getOperand(0).getReg();
120*0fca6ea1SDimitry Andric     if (!DestReg.isVirtual())
121*0fca6ea1SDimitry Andric       return false;
122*0fca6ea1SDimitry Andric 
123*0fca6ea1SDimitry Andric     for (auto &UserOp : MRI.use_nodbg_operands(DestReg)) {
124*0fca6ea1SDimitry Andric       const MachineInstr *UserMI = UserOp.getParent();
125*0fca6ea1SDimitry Andric       unsigned OpIdx = UserOp.getOperandNo();
126*0fca6ea1SDimitry Andric 
127*0fca6ea1SDimitry Andric       switch (UserMI->getOpcode()) {
128*0fca6ea1SDimitry Andric       default:
129*0fca6ea1SDimitry Andric         // TODO: Add vector
130*0fca6ea1SDimitry Andric         return false;
131*0fca6ea1SDimitry Andric 
132*0fca6ea1SDimitry Andric       case LoongArch::ADD_W:
133*0fca6ea1SDimitry Andric       case LoongArch::ADDI_W:
134*0fca6ea1SDimitry Andric       case LoongArch::SUB_W:
135*0fca6ea1SDimitry Andric       case LoongArch::ALSL_W:
136*0fca6ea1SDimitry Andric       case LoongArch::ALSL_WU:
137*0fca6ea1SDimitry Andric       case LoongArch::MUL_W:
138*0fca6ea1SDimitry Andric       case LoongArch::MULH_W:
139*0fca6ea1SDimitry Andric       case LoongArch::MULH_WU:
140*0fca6ea1SDimitry Andric       case LoongArch::MULW_D_W:
141*0fca6ea1SDimitry Andric       case LoongArch::MULW_D_WU:
142*0fca6ea1SDimitry Andric       // TODO: {DIV,MOD}.{W,WU} consumes the upper 32 bits before LA664+.
143*0fca6ea1SDimitry Andric       // case LoongArch::DIV_W:
144*0fca6ea1SDimitry Andric       // case LoongArch::DIV_WU:
145*0fca6ea1SDimitry Andric       // case LoongArch::MOD_W:
146*0fca6ea1SDimitry Andric       // case LoongArch::MOD_WU:
147*0fca6ea1SDimitry Andric       case LoongArch::SLL_W:
148*0fca6ea1SDimitry Andric       case LoongArch::SLLI_W:
149*0fca6ea1SDimitry Andric       case LoongArch::SRL_W:
150*0fca6ea1SDimitry Andric       case LoongArch::SRLI_W:
151*0fca6ea1SDimitry Andric       case LoongArch::SRA_W:
152*0fca6ea1SDimitry Andric       case LoongArch::SRAI_W:
153*0fca6ea1SDimitry Andric       case LoongArch::ROTR_W:
154*0fca6ea1SDimitry Andric       case LoongArch::ROTRI_W:
155*0fca6ea1SDimitry Andric       case LoongArch::CLO_W:
156*0fca6ea1SDimitry Andric       case LoongArch::CLZ_W:
157*0fca6ea1SDimitry Andric       case LoongArch::CTO_W:
158*0fca6ea1SDimitry Andric       case LoongArch::CTZ_W:
159*0fca6ea1SDimitry Andric       case LoongArch::BYTEPICK_W:
160*0fca6ea1SDimitry Andric       case LoongArch::REVB_2H:
161*0fca6ea1SDimitry Andric       case LoongArch::BITREV_4B:
162*0fca6ea1SDimitry Andric       case LoongArch::BITREV_W:
163*0fca6ea1SDimitry Andric       case LoongArch::BSTRINS_W:
164*0fca6ea1SDimitry Andric       case LoongArch::BSTRPICK_W:
165*0fca6ea1SDimitry Andric       case LoongArch::CRC_W_W_W:
166*0fca6ea1SDimitry Andric       case LoongArch::CRCC_W_W_W:
167*0fca6ea1SDimitry Andric       case LoongArch::MOVGR2FCSR:
168*0fca6ea1SDimitry Andric       case LoongArch::MOVGR2FRH_W:
169*0fca6ea1SDimitry Andric       case LoongArch::MOVGR2FR_W_64:
170*0fca6ea1SDimitry Andric         if (Bits >= 32)
171*0fca6ea1SDimitry Andric           break;
172*0fca6ea1SDimitry Andric         return false;
173*0fca6ea1SDimitry Andric       case LoongArch::MOVGR2CF:
174*0fca6ea1SDimitry Andric         if (Bits >= 1)
175*0fca6ea1SDimitry Andric           break;
176*0fca6ea1SDimitry Andric         return false;
177*0fca6ea1SDimitry Andric       case LoongArch::EXT_W_B:
178*0fca6ea1SDimitry Andric         if (Bits >= 8)
179*0fca6ea1SDimitry Andric           break;
180*0fca6ea1SDimitry Andric         return false;
181*0fca6ea1SDimitry Andric       case LoongArch::EXT_W_H:
182*0fca6ea1SDimitry Andric         if (Bits >= 16)
183*0fca6ea1SDimitry Andric           break;
184*0fca6ea1SDimitry Andric         return false;
185*0fca6ea1SDimitry Andric 
186*0fca6ea1SDimitry Andric       case LoongArch::SRLI_D: {
187*0fca6ea1SDimitry Andric         // If we are shifting right by less than Bits, and users don't demand
188*0fca6ea1SDimitry Andric         // any bits that were shifted into [Bits-1:0], then we can consider this
189*0fca6ea1SDimitry Andric         // as an N-Bit user.
190*0fca6ea1SDimitry Andric         unsigned ShAmt = UserMI->getOperand(2).getImm();
191*0fca6ea1SDimitry Andric         if (Bits > ShAmt) {
192*0fca6ea1SDimitry Andric           Worklist.push_back(std::make_pair(UserMI, Bits - ShAmt));
193*0fca6ea1SDimitry Andric           break;
194*0fca6ea1SDimitry Andric         }
195*0fca6ea1SDimitry Andric         return false;
196*0fca6ea1SDimitry Andric       }
197*0fca6ea1SDimitry Andric 
198*0fca6ea1SDimitry Andric       // these overwrite higher input bits, otherwise the lower word of output
199*0fca6ea1SDimitry Andric       // depends only on the lower word of input. So check their uses read W.
200*0fca6ea1SDimitry Andric       case LoongArch::SLLI_D:
201*0fca6ea1SDimitry Andric         if (Bits >= (ST.getGRLen() - UserMI->getOperand(2).getImm()))
202*0fca6ea1SDimitry Andric           break;
203*0fca6ea1SDimitry Andric         Worklist.push_back(std::make_pair(UserMI, Bits));
204*0fca6ea1SDimitry Andric         break;
205*0fca6ea1SDimitry Andric       case LoongArch::ANDI: {
206*0fca6ea1SDimitry Andric         uint64_t Imm = UserMI->getOperand(2).getImm();
207*0fca6ea1SDimitry Andric         if (Bits >= (unsigned)llvm::bit_width(Imm))
208*0fca6ea1SDimitry Andric           break;
209*0fca6ea1SDimitry Andric         Worklist.push_back(std::make_pair(UserMI, Bits));
210*0fca6ea1SDimitry Andric         break;
211*0fca6ea1SDimitry Andric       }
212*0fca6ea1SDimitry Andric       case LoongArch::ORI: {
213*0fca6ea1SDimitry Andric         uint64_t Imm = UserMI->getOperand(2).getImm();
214*0fca6ea1SDimitry Andric         if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
215*0fca6ea1SDimitry Andric           break;
216*0fca6ea1SDimitry Andric         Worklist.push_back(std::make_pair(UserMI, Bits));
217*0fca6ea1SDimitry Andric         break;
218*0fca6ea1SDimitry Andric       }
219*0fca6ea1SDimitry Andric 
220*0fca6ea1SDimitry Andric       case LoongArch::SLL_D:
221*0fca6ea1SDimitry Andric         // Operand 2 is the shift amount which uses log2(grlen) bits.
222*0fca6ea1SDimitry Andric         if (OpIdx == 2) {
223*0fca6ea1SDimitry Andric           if (Bits >= Log2_32(ST.getGRLen()))
224*0fca6ea1SDimitry Andric             break;
225*0fca6ea1SDimitry Andric           return false;
226*0fca6ea1SDimitry Andric         }
227*0fca6ea1SDimitry Andric         Worklist.push_back(std::make_pair(UserMI, Bits));
228*0fca6ea1SDimitry Andric         break;
229*0fca6ea1SDimitry Andric 
230*0fca6ea1SDimitry Andric       case LoongArch::SRA_D:
231*0fca6ea1SDimitry Andric       case LoongArch::SRL_D:
232*0fca6ea1SDimitry Andric       case LoongArch::ROTR_D:
233*0fca6ea1SDimitry Andric         // Operand 2 is the shift amount which uses 6 bits.
234*0fca6ea1SDimitry Andric         if (OpIdx == 2 && Bits >= Log2_32(ST.getGRLen()))
235*0fca6ea1SDimitry Andric           break;
236*0fca6ea1SDimitry Andric         return false;
237*0fca6ea1SDimitry Andric 
238*0fca6ea1SDimitry Andric       case LoongArch::ST_B:
239*0fca6ea1SDimitry Andric       case LoongArch::STX_B:
240*0fca6ea1SDimitry Andric       case LoongArch::STGT_B:
241*0fca6ea1SDimitry Andric       case LoongArch::STLE_B:
242*0fca6ea1SDimitry Andric       case LoongArch::IOCSRWR_B:
243*0fca6ea1SDimitry Andric         // The first argument is the value to store.
244*0fca6ea1SDimitry Andric         if (OpIdx == 0 && Bits >= 8)
245*0fca6ea1SDimitry Andric           break;
246*0fca6ea1SDimitry Andric         return false;
247*0fca6ea1SDimitry Andric       case LoongArch::ST_H:
248*0fca6ea1SDimitry Andric       case LoongArch::STX_H:
249*0fca6ea1SDimitry Andric       case LoongArch::STGT_H:
250*0fca6ea1SDimitry Andric       case LoongArch::STLE_H:
251*0fca6ea1SDimitry Andric       case LoongArch::IOCSRWR_H:
252*0fca6ea1SDimitry Andric         // The first argument is the value to store.
253*0fca6ea1SDimitry Andric         if (OpIdx == 0 && Bits >= 16)
254*0fca6ea1SDimitry Andric           break;
255*0fca6ea1SDimitry Andric         return false;
256*0fca6ea1SDimitry Andric       case LoongArch::ST_W:
257*0fca6ea1SDimitry Andric       case LoongArch::STX_W:
258*0fca6ea1SDimitry Andric       case LoongArch::SCREL_W:
259*0fca6ea1SDimitry Andric       case LoongArch::STPTR_W:
260*0fca6ea1SDimitry Andric       case LoongArch::STGT_W:
261*0fca6ea1SDimitry Andric       case LoongArch::STLE_W:
262*0fca6ea1SDimitry Andric       case LoongArch::IOCSRWR_W:
263*0fca6ea1SDimitry Andric         // The first argument is the value to store.
264*0fca6ea1SDimitry Andric         if (OpIdx == 0 && Bits >= 32)
265*0fca6ea1SDimitry Andric           break;
266*0fca6ea1SDimitry Andric         return false;
267*0fca6ea1SDimitry Andric 
268*0fca6ea1SDimitry Andric       case LoongArch::CRC_W_B_W:
269*0fca6ea1SDimitry Andric       case LoongArch::CRCC_W_B_W:
270*0fca6ea1SDimitry Andric         if ((OpIdx == 1 && Bits >= 8) || (OpIdx == 2 && Bits >= 32))
271*0fca6ea1SDimitry Andric           break;
272*0fca6ea1SDimitry Andric         return false;
273*0fca6ea1SDimitry Andric       case LoongArch::CRC_W_H_W:
274*0fca6ea1SDimitry Andric       case LoongArch::CRCC_W_H_W:
275*0fca6ea1SDimitry Andric         if ((OpIdx == 1 && Bits >= 16) || (OpIdx == 2 && Bits >= 32))
276*0fca6ea1SDimitry Andric           break;
277*0fca6ea1SDimitry Andric         return false;
278*0fca6ea1SDimitry Andric       case LoongArch::CRC_W_D_W:
279*0fca6ea1SDimitry Andric       case LoongArch::CRCC_W_D_W:
280*0fca6ea1SDimitry Andric         if (OpIdx == 2 && Bits >= 32)
281*0fca6ea1SDimitry Andric           break;
282*0fca6ea1SDimitry Andric         return false;
283*0fca6ea1SDimitry Andric 
284*0fca6ea1SDimitry Andric       // For these, lower word of output in these operations, depends only on
285*0fca6ea1SDimitry Andric       // the lower word of input. So, we check all uses only read lower word.
286*0fca6ea1SDimitry Andric       case LoongArch::COPY:
287*0fca6ea1SDimitry Andric       case LoongArch::PHI:
288*0fca6ea1SDimitry Andric       case LoongArch::ADD_D:
289*0fca6ea1SDimitry Andric       case LoongArch::ADDI_D:
290*0fca6ea1SDimitry Andric       case LoongArch::SUB_D:
291*0fca6ea1SDimitry Andric       case LoongArch::MUL_D:
292*0fca6ea1SDimitry Andric       case LoongArch::AND:
293*0fca6ea1SDimitry Andric       case LoongArch::OR:
294*0fca6ea1SDimitry Andric       case LoongArch::NOR:
295*0fca6ea1SDimitry Andric       case LoongArch::XOR:
296*0fca6ea1SDimitry Andric       case LoongArch::XORI:
297*0fca6ea1SDimitry Andric       case LoongArch::ANDN:
298*0fca6ea1SDimitry Andric       case LoongArch::ORN:
299*0fca6ea1SDimitry Andric         Worklist.push_back(std::make_pair(UserMI, Bits));
300*0fca6ea1SDimitry Andric         break;
301*0fca6ea1SDimitry Andric 
302*0fca6ea1SDimitry Andric       case LoongArch::MASKNEZ:
303*0fca6ea1SDimitry Andric       case LoongArch::MASKEQZ:
304*0fca6ea1SDimitry Andric         if (OpIdx != 1)
305*0fca6ea1SDimitry Andric           return false;
306*0fca6ea1SDimitry Andric         Worklist.push_back(std::make_pair(UserMI, Bits));
307*0fca6ea1SDimitry Andric         break;
308*0fca6ea1SDimitry Andric       }
309*0fca6ea1SDimitry Andric     }
310*0fca6ea1SDimitry Andric   }
311*0fca6ea1SDimitry Andric 
312*0fca6ea1SDimitry Andric   return true;
313*0fca6ea1SDimitry Andric }
314*0fca6ea1SDimitry Andric 
315*0fca6ea1SDimitry Andric static bool hasAllWUsers(const MachineInstr &OrigMI,
316*0fca6ea1SDimitry Andric                          const LoongArchSubtarget &ST,
317*0fca6ea1SDimitry Andric                          const MachineRegisterInfo &MRI) {
318*0fca6ea1SDimitry Andric   return hasAllNBitUsers(OrigMI, ST, MRI, 32);
319*0fca6ea1SDimitry Andric }
320*0fca6ea1SDimitry Andric 
321*0fca6ea1SDimitry Andric // This function returns true if the machine instruction always outputs a value
322*0fca6ea1SDimitry Andric // where bits 63:32 match bit 31.
323*0fca6ea1SDimitry Andric static bool isSignExtendingOpW(const MachineInstr &MI,
324*0fca6ea1SDimitry Andric                                const MachineRegisterInfo &MRI, unsigned OpNo) {
325*0fca6ea1SDimitry Andric   switch (MI.getOpcode()) {
326*0fca6ea1SDimitry Andric   // Normal cases
327*0fca6ea1SDimitry Andric   case LoongArch::ADD_W:
328*0fca6ea1SDimitry Andric   case LoongArch::SUB_W:
329*0fca6ea1SDimitry Andric   case LoongArch::ADDI_W:
330*0fca6ea1SDimitry Andric   case LoongArch::ALSL_W:
331*0fca6ea1SDimitry Andric   case LoongArch::LU12I_W:
332*0fca6ea1SDimitry Andric   case LoongArch::SLT:
333*0fca6ea1SDimitry Andric   case LoongArch::SLTU:
334*0fca6ea1SDimitry Andric   case LoongArch::SLTI:
335*0fca6ea1SDimitry Andric   case LoongArch::SLTUI:
336*0fca6ea1SDimitry Andric   case LoongArch::ANDI:
337*0fca6ea1SDimitry Andric   case LoongArch::MUL_W:
338*0fca6ea1SDimitry Andric   case LoongArch::MULH_W:
339*0fca6ea1SDimitry Andric   case LoongArch::MULH_WU:
340*0fca6ea1SDimitry Andric   case LoongArch::DIV_W:
341*0fca6ea1SDimitry Andric   case LoongArch::MOD_W:
342*0fca6ea1SDimitry Andric   case LoongArch::DIV_WU:
343*0fca6ea1SDimitry Andric   case LoongArch::MOD_WU:
344*0fca6ea1SDimitry Andric   case LoongArch::SLL_W:
345*0fca6ea1SDimitry Andric   case LoongArch::SRL_W:
346*0fca6ea1SDimitry Andric   case LoongArch::SRA_W:
347*0fca6ea1SDimitry Andric   case LoongArch::ROTR_W:
348*0fca6ea1SDimitry Andric   case LoongArch::SLLI_W:
349*0fca6ea1SDimitry Andric   case LoongArch::SRLI_W:
350*0fca6ea1SDimitry Andric   case LoongArch::SRAI_W:
351*0fca6ea1SDimitry Andric   case LoongArch::ROTRI_W:
352*0fca6ea1SDimitry Andric   case LoongArch::EXT_W_B:
353*0fca6ea1SDimitry Andric   case LoongArch::EXT_W_H:
354*0fca6ea1SDimitry Andric   case LoongArch::CLO_W:
355*0fca6ea1SDimitry Andric   case LoongArch::CLZ_W:
356*0fca6ea1SDimitry Andric   case LoongArch::CTO_W:
357*0fca6ea1SDimitry Andric   case LoongArch::CTZ_W:
358*0fca6ea1SDimitry Andric   case LoongArch::BYTEPICK_W:
359*0fca6ea1SDimitry Andric   case LoongArch::REVB_2H:
360*0fca6ea1SDimitry Andric   case LoongArch::BITREV_4B:
361*0fca6ea1SDimitry Andric   case LoongArch::BITREV_W:
362*0fca6ea1SDimitry Andric   case LoongArch::BSTRINS_W:
363*0fca6ea1SDimitry Andric   case LoongArch::BSTRPICK_W:
364*0fca6ea1SDimitry Andric   case LoongArch::LD_B:
365*0fca6ea1SDimitry Andric   case LoongArch::LD_H:
366*0fca6ea1SDimitry Andric   case LoongArch::LD_W:
367*0fca6ea1SDimitry Andric   case LoongArch::LD_BU:
368*0fca6ea1SDimitry Andric   case LoongArch::LD_HU:
369*0fca6ea1SDimitry Andric   case LoongArch::LL_W:
370*0fca6ea1SDimitry Andric   case LoongArch::LLACQ_W:
371*0fca6ea1SDimitry Andric   case LoongArch::RDTIMEL_W:
372*0fca6ea1SDimitry Andric   case LoongArch::RDTIMEH_W:
373*0fca6ea1SDimitry Andric   case LoongArch::CPUCFG:
374*0fca6ea1SDimitry Andric   case LoongArch::LDX_B:
375*0fca6ea1SDimitry Andric   case LoongArch::LDX_H:
376*0fca6ea1SDimitry Andric   case LoongArch::LDX_W:
377*0fca6ea1SDimitry Andric   case LoongArch::LDX_BU:
378*0fca6ea1SDimitry Andric   case LoongArch::LDX_HU:
379*0fca6ea1SDimitry Andric   case LoongArch::LDPTR_W:
380*0fca6ea1SDimitry Andric   case LoongArch::LDGT_B:
381*0fca6ea1SDimitry Andric   case LoongArch::LDGT_H:
382*0fca6ea1SDimitry Andric   case LoongArch::LDGT_W:
383*0fca6ea1SDimitry Andric   case LoongArch::LDLE_B:
384*0fca6ea1SDimitry Andric   case LoongArch::LDLE_H:
385*0fca6ea1SDimitry Andric   case LoongArch::LDLE_W:
386*0fca6ea1SDimitry Andric   case LoongArch::AMSWAP_B:
387*0fca6ea1SDimitry Andric   case LoongArch::AMSWAP_H:
388*0fca6ea1SDimitry Andric   case LoongArch::AMSWAP_W:
389*0fca6ea1SDimitry Andric   case LoongArch::AMADD_B:
390*0fca6ea1SDimitry Andric   case LoongArch::AMADD_H:
391*0fca6ea1SDimitry Andric   case LoongArch::AMADD_W:
392*0fca6ea1SDimitry Andric   case LoongArch::AMAND_W:
393*0fca6ea1SDimitry Andric   case LoongArch::AMOR_W:
394*0fca6ea1SDimitry Andric   case LoongArch::AMXOR_W:
395*0fca6ea1SDimitry Andric   case LoongArch::AMMAX_W:
396*0fca6ea1SDimitry Andric   case LoongArch::AMMIN_W:
397*0fca6ea1SDimitry Andric   case LoongArch::AMMAX_WU:
398*0fca6ea1SDimitry Andric   case LoongArch::AMMIN_WU:
399*0fca6ea1SDimitry Andric   case LoongArch::AMSWAP__DB_B:
400*0fca6ea1SDimitry Andric   case LoongArch::AMSWAP__DB_H:
401*0fca6ea1SDimitry Andric   case LoongArch::AMSWAP__DB_W:
402*0fca6ea1SDimitry Andric   case LoongArch::AMADD__DB_B:
403*0fca6ea1SDimitry Andric   case LoongArch::AMADD__DB_H:
404*0fca6ea1SDimitry Andric   case LoongArch::AMADD__DB_W:
405*0fca6ea1SDimitry Andric   case LoongArch::AMAND__DB_W:
406*0fca6ea1SDimitry Andric   case LoongArch::AMOR__DB_W:
407*0fca6ea1SDimitry Andric   case LoongArch::AMXOR__DB_W:
408*0fca6ea1SDimitry Andric   case LoongArch::AMMAX__DB_W:
409*0fca6ea1SDimitry Andric   case LoongArch::AMMIN__DB_W:
410*0fca6ea1SDimitry Andric   case LoongArch::AMMAX__DB_WU:
411*0fca6ea1SDimitry Andric   case LoongArch::AMMIN__DB_WU:
412*0fca6ea1SDimitry Andric   case LoongArch::AMCAS_B:
413*0fca6ea1SDimitry Andric   case LoongArch::AMCAS_H:
414*0fca6ea1SDimitry Andric   case LoongArch::AMCAS_W:
415*0fca6ea1SDimitry Andric   case LoongArch::AMCAS__DB_B:
416*0fca6ea1SDimitry Andric   case LoongArch::AMCAS__DB_H:
417*0fca6ea1SDimitry Andric   case LoongArch::AMCAS__DB_W:
418*0fca6ea1SDimitry Andric   case LoongArch::CRC_W_B_W:
419*0fca6ea1SDimitry Andric   case LoongArch::CRC_W_H_W:
420*0fca6ea1SDimitry Andric   case LoongArch::CRC_W_W_W:
421*0fca6ea1SDimitry Andric   case LoongArch::CRC_W_D_W:
422*0fca6ea1SDimitry Andric   case LoongArch::CRCC_W_B_W:
423*0fca6ea1SDimitry Andric   case LoongArch::CRCC_W_H_W:
424*0fca6ea1SDimitry Andric   case LoongArch::CRCC_W_W_W:
425*0fca6ea1SDimitry Andric   case LoongArch::CRCC_W_D_W:
426*0fca6ea1SDimitry Andric   case LoongArch::IOCSRRD_B:
427*0fca6ea1SDimitry Andric   case LoongArch::IOCSRRD_H:
428*0fca6ea1SDimitry Andric   case LoongArch::IOCSRRD_W:
429*0fca6ea1SDimitry Andric   case LoongArch::MOVFR2GR_S:
430*0fca6ea1SDimitry Andric   case LoongArch::MOVFCSR2GR:
431*0fca6ea1SDimitry Andric   case LoongArch::MOVCF2GR:
432*0fca6ea1SDimitry Andric   case LoongArch::MOVFRH2GR_S:
433*0fca6ea1SDimitry Andric   case LoongArch::MOVFR2GR_S_64:
434*0fca6ea1SDimitry Andric     // TODO: Add vector
435*0fca6ea1SDimitry Andric     return true;
436*0fca6ea1SDimitry Andric   // Special cases that require checking operands.
437*0fca6ea1SDimitry Andric   // shifting right sufficiently makes the value 32-bit sign-extended
438*0fca6ea1SDimitry Andric   case LoongArch::SRAI_D:
439*0fca6ea1SDimitry Andric     return MI.getOperand(2).getImm() >= 32;
440*0fca6ea1SDimitry Andric   case LoongArch::SRLI_D:
441*0fca6ea1SDimitry Andric     return MI.getOperand(2).getImm() > 32;
442*0fca6ea1SDimitry Andric   // The LI pattern ADDI rd, R0, imm and ORI rd, R0, imm are sign extended.
443*0fca6ea1SDimitry Andric   case LoongArch::ADDI_D:
444*0fca6ea1SDimitry Andric   case LoongArch::ORI:
445*0fca6ea1SDimitry Andric     return MI.getOperand(1).isReg() &&
446*0fca6ea1SDimitry Andric            MI.getOperand(1).getReg() == LoongArch::R0;
447*0fca6ea1SDimitry Andric   // A bits extract is sign extended if the msb is less than 31.
448*0fca6ea1SDimitry Andric   case LoongArch::BSTRPICK_D:
449*0fca6ea1SDimitry Andric     return MI.getOperand(2).getImm() < 31;
450*0fca6ea1SDimitry Andric   // Copying from R0 produces zero.
451*0fca6ea1SDimitry Andric   case LoongArch::COPY:
452*0fca6ea1SDimitry Andric     return MI.getOperand(1).getReg() == LoongArch::R0;
453*0fca6ea1SDimitry Andric   // Ignore the scratch register destination.
454*0fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicSwap32:
455*0fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicSwap32:
456*0fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadAdd32:
457*0fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadSub32:
458*0fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicLoadNand32:
459*0fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadNand32:
460*0fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicLoadAdd32:
461*0fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicLoadSub32:
462*0fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicLoadAnd32:
463*0fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicLoadOr32:
464*0fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicLoadXor32:
465*0fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadUMax32:
466*0fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadUMin32:
467*0fca6ea1SDimitry Andric   case LoongArch::PseudoCmpXchg32:
468*0fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedCmpXchg32:
469*0fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadMax32:
470*0fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadMin32:
471*0fca6ea1SDimitry Andric     return OpNo == 0;
472*0fca6ea1SDimitry Andric   }
473*0fca6ea1SDimitry Andric 
474*0fca6ea1SDimitry Andric   return false;
475*0fca6ea1SDimitry Andric }
476*0fca6ea1SDimitry Andric 
477*0fca6ea1SDimitry Andric static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST,
478*0fca6ea1SDimitry Andric                             const MachineRegisterInfo &MRI,
479*0fca6ea1SDimitry Andric                             SmallPtrSetImpl<MachineInstr *> &FixableDef) {
480*0fca6ea1SDimitry Andric   SmallSet<Register, 4> Visited;
481*0fca6ea1SDimitry Andric   SmallVector<Register, 4> Worklist;
482*0fca6ea1SDimitry Andric 
483*0fca6ea1SDimitry Andric   auto AddRegToWorkList = [&](Register SrcReg) {
484*0fca6ea1SDimitry Andric     if (!SrcReg.isVirtual())
485*0fca6ea1SDimitry Andric       return false;
486*0fca6ea1SDimitry Andric     Worklist.push_back(SrcReg);
487*0fca6ea1SDimitry Andric     return true;
488*0fca6ea1SDimitry Andric   };
489*0fca6ea1SDimitry Andric 
490*0fca6ea1SDimitry Andric   if (!AddRegToWorkList(SrcReg))
491*0fca6ea1SDimitry Andric     return false;
492*0fca6ea1SDimitry Andric 
493*0fca6ea1SDimitry Andric   while (!Worklist.empty()) {
494*0fca6ea1SDimitry Andric     Register Reg = Worklist.pop_back_val();
495*0fca6ea1SDimitry Andric 
496*0fca6ea1SDimitry Andric     // If we already visited this register, we don't need to check it again.
497*0fca6ea1SDimitry Andric     if (!Visited.insert(Reg).second)
498*0fca6ea1SDimitry Andric       continue;
499*0fca6ea1SDimitry Andric 
500*0fca6ea1SDimitry Andric     MachineInstr *MI = MRI.getVRegDef(Reg);
501*0fca6ea1SDimitry Andric     if (!MI)
502*0fca6ea1SDimitry Andric       continue;
503*0fca6ea1SDimitry Andric 
504*0fca6ea1SDimitry Andric     int OpNo = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
505*0fca6ea1SDimitry Andric     assert(OpNo != -1 && "Couldn't find register");
506*0fca6ea1SDimitry Andric 
507*0fca6ea1SDimitry Andric     // If this is a sign extending operation we don't need to look any further.
508*0fca6ea1SDimitry Andric     if (isSignExtendingOpW(*MI, MRI, OpNo))
509*0fca6ea1SDimitry Andric       continue;
510*0fca6ea1SDimitry Andric 
511*0fca6ea1SDimitry Andric     // Is this an instruction that propagates sign extend?
512*0fca6ea1SDimitry Andric     switch (MI->getOpcode()) {
513*0fca6ea1SDimitry Andric     default:
514*0fca6ea1SDimitry Andric       // Unknown opcode, give up.
515*0fca6ea1SDimitry Andric       return false;
516*0fca6ea1SDimitry Andric     case LoongArch::COPY: {
517*0fca6ea1SDimitry Andric       const MachineFunction *MF = MI->getMF();
518*0fca6ea1SDimitry Andric       const LoongArchMachineFunctionInfo *LAFI =
519*0fca6ea1SDimitry Andric           MF->getInfo<LoongArchMachineFunctionInfo>();
520*0fca6ea1SDimitry Andric 
521*0fca6ea1SDimitry Andric       // If this is the entry block and the register is livein, see if we know
522*0fca6ea1SDimitry Andric       // it is sign extended.
523*0fca6ea1SDimitry Andric       if (MI->getParent() == &MF->front()) {
524*0fca6ea1SDimitry Andric         Register VReg = MI->getOperand(0).getReg();
525*0fca6ea1SDimitry Andric         if (MF->getRegInfo().isLiveIn(VReg) && LAFI->isSExt32Register(VReg))
526*0fca6ea1SDimitry Andric           continue;
527*0fca6ea1SDimitry Andric       }
528*0fca6ea1SDimitry Andric 
529*0fca6ea1SDimitry Andric       Register CopySrcReg = MI->getOperand(1).getReg();
530*0fca6ea1SDimitry Andric       if (CopySrcReg == LoongArch::R4) {
531*0fca6ea1SDimitry Andric         // For a method return value, we check the ZExt/SExt flags in attribute.
532*0fca6ea1SDimitry Andric         // We assume the following code sequence for method call.
533*0fca6ea1SDimitry Andric         // PseudoCALL @bar, ...
534*0fca6ea1SDimitry Andric         // ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3
535*0fca6ea1SDimitry Andric         // %0:gpr = COPY $r4
536*0fca6ea1SDimitry Andric         //
537*0fca6ea1SDimitry Andric         // We use the PseudoCall to look up the IR function being called to find
538*0fca6ea1SDimitry Andric         // its return attributes.
539*0fca6ea1SDimitry Andric         const MachineBasicBlock *MBB = MI->getParent();
540*0fca6ea1SDimitry Andric         auto II = MI->getIterator();
541*0fca6ea1SDimitry Andric         if (II == MBB->instr_begin() ||
542*0fca6ea1SDimitry Andric             (--II)->getOpcode() != LoongArch::ADJCALLSTACKUP)
543*0fca6ea1SDimitry Andric           return false;
544*0fca6ea1SDimitry Andric 
545*0fca6ea1SDimitry Andric         const MachineInstr &CallMI = *(--II);
546*0fca6ea1SDimitry Andric         if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
547*0fca6ea1SDimitry Andric           return false;
548*0fca6ea1SDimitry Andric 
549*0fca6ea1SDimitry Andric         auto *CalleeFn =
550*0fca6ea1SDimitry Andric             dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
551*0fca6ea1SDimitry Andric         if (!CalleeFn)
552*0fca6ea1SDimitry Andric           return false;
553*0fca6ea1SDimitry Andric 
554*0fca6ea1SDimitry Andric         auto *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
555*0fca6ea1SDimitry Andric         if (!IntTy)
556*0fca6ea1SDimitry Andric           return false;
557*0fca6ea1SDimitry Andric 
558*0fca6ea1SDimitry Andric         const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
559*0fca6ea1SDimitry Andric         unsigned BitWidth = IntTy->getBitWidth();
560*0fca6ea1SDimitry Andric         if ((BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) ||
561*0fca6ea1SDimitry Andric             (BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt)))
562*0fca6ea1SDimitry Andric           continue;
563*0fca6ea1SDimitry Andric       }
564*0fca6ea1SDimitry Andric 
565*0fca6ea1SDimitry Andric       if (!AddRegToWorkList(CopySrcReg))
566*0fca6ea1SDimitry Andric         return false;
567*0fca6ea1SDimitry Andric 
568*0fca6ea1SDimitry Andric       break;
569*0fca6ea1SDimitry Andric     }
570*0fca6ea1SDimitry Andric 
571*0fca6ea1SDimitry Andric     // For these, we just need to check if the 1st operand is sign extended.
572*0fca6ea1SDimitry Andric     case LoongArch::MOD_D:
573*0fca6ea1SDimitry Andric     case LoongArch::ANDI:
574*0fca6ea1SDimitry Andric     case LoongArch::ORI:
575*0fca6ea1SDimitry Andric     case LoongArch::XORI:
576*0fca6ea1SDimitry Andric       // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
577*0fca6ea1SDimitry Andric       // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
578*0fca6ea1SDimitry Andric       // Logical operations use a sign extended 12-bit immediate.
579*0fca6ea1SDimitry Andric       if (!AddRegToWorkList(MI->getOperand(1).getReg()))
580*0fca6ea1SDimitry Andric         return false;
581*0fca6ea1SDimitry Andric 
582*0fca6ea1SDimitry Andric       break;
583*0fca6ea1SDimitry Andric     case LoongArch::MOD_DU:
584*0fca6ea1SDimitry Andric     case LoongArch::AND:
585*0fca6ea1SDimitry Andric     case LoongArch::OR:
586*0fca6ea1SDimitry Andric     case LoongArch::XOR:
587*0fca6ea1SDimitry Andric     case LoongArch::ANDN:
588*0fca6ea1SDimitry Andric     case LoongArch::ORN:
589*0fca6ea1SDimitry Andric     case LoongArch::PHI: {
590*0fca6ea1SDimitry Andric       // If all incoming values are sign-extended, the output of AND, OR, XOR,
591*0fca6ea1SDimitry Andric       // or PHI is also sign-extended.
592*0fca6ea1SDimitry Andric 
593*0fca6ea1SDimitry Andric       // The input registers for PHI are operand 1, 3, ...
594*0fca6ea1SDimitry Andric       // The input registers for others are operand 1 and 2.
595*0fca6ea1SDimitry Andric       unsigned B = 1, E = 3, D = 1;
596*0fca6ea1SDimitry Andric       switch (MI->getOpcode()) {
597*0fca6ea1SDimitry Andric       case LoongArch::PHI:
598*0fca6ea1SDimitry Andric         E = MI->getNumOperands();
599*0fca6ea1SDimitry Andric         D = 2;
600*0fca6ea1SDimitry Andric         break;
601*0fca6ea1SDimitry Andric       }
602*0fca6ea1SDimitry Andric 
603*0fca6ea1SDimitry Andric       for (unsigned I = B; I != E; I += D) {
604*0fca6ea1SDimitry Andric         if (!MI->getOperand(I).isReg())
605*0fca6ea1SDimitry Andric           return false;
606*0fca6ea1SDimitry Andric 
607*0fca6ea1SDimitry Andric         if (!AddRegToWorkList(MI->getOperand(I).getReg()))
608*0fca6ea1SDimitry Andric           return false;
609*0fca6ea1SDimitry Andric       }
610*0fca6ea1SDimitry Andric 
611*0fca6ea1SDimitry Andric       break;
612*0fca6ea1SDimitry Andric     }
613*0fca6ea1SDimitry Andric 
614*0fca6ea1SDimitry Andric     case LoongArch::MASKEQZ:
615*0fca6ea1SDimitry Andric     case LoongArch::MASKNEZ:
616*0fca6ea1SDimitry Andric       // Instructions return zero or operand 1. Result is sign extended if
617*0fca6ea1SDimitry Andric       // operand 1 is sign extended.
618*0fca6ea1SDimitry Andric       if (!AddRegToWorkList(MI->getOperand(1).getReg()))
619*0fca6ea1SDimitry Andric         return false;
620*0fca6ea1SDimitry Andric       break;
621*0fca6ea1SDimitry Andric 
622*0fca6ea1SDimitry Andric     // With these opcode, we can "fix" them with the W-version
623*0fca6ea1SDimitry Andric     // if we know all users of the result only rely on bits 31:0
624*0fca6ea1SDimitry Andric     case LoongArch::SLLI_D:
625*0fca6ea1SDimitry Andric       // SLLI_W reads the lowest 5 bits, while SLLI_D reads lowest 6 bits
626*0fca6ea1SDimitry Andric       if (MI->getOperand(2).getImm() >= 32)
627*0fca6ea1SDimitry Andric         return false;
628*0fca6ea1SDimitry Andric       [[fallthrough]];
629*0fca6ea1SDimitry Andric     case LoongArch::ADDI_D:
630*0fca6ea1SDimitry Andric     case LoongArch::ADD_D:
631*0fca6ea1SDimitry Andric     case LoongArch::LD_D:
632*0fca6ea1SDimitry Andric     case LoongArch::LD_WU:
633*0fca6ea1SDimitry Andric     case LoongArch::MUL_D:
634*0fca6ea1SDimitry Andric     case LoongArch::SUB_D:
635*0fca6ea1SDimitry Andric       if (hasAllWUsers(*MI, ST, MRI)) {
636*0fca6ea1SDimitry Andric         FixableDef.insert(MI);
637*0fca6ea1SDimitry Andric         break;
638*0fca6ea1SDimitry Andric       }
639*0fca6ea1SDimitry Andric       return false;
640*0fca6ea1SDimitry Andric     }
641*0fca6ea1SDimitry Andric   }
642*0fca6ea1SDimitry Andric 
643*0fca6ea1SDimitry Andric   // If we get here, then every node we visited produces a sign extended value
644*0fca6ea1SDimitry Andric   // or propagated sign extended values. So the result must be sign extended.
645*0fca6ea1SDimitry Andric   return true;
646*0fca6ea1SDimitry Andric }
647*0fca6ea1SDimitry Andric 
648*0fca6ea1SDimitry Andric static unsigned getWOp(unsigned Opcode) {
649*0fca6ea1SDimitry Andric   switch (Opcode) {
650*0fca6ea1SDimitry Andric   case LoongArch::ADDI_D:
651*0fca6ea1SDimitry Andric     return LoongArch::ADDI_W;
652*0fca6ea1SDimitry Andric   case LoongArch::ADD_D:
653*0fca6ea1SDimitry Andric     return LoongArch::ADD_W;
654*0fca6ea1SDimitry Andric   case LoongArch::LD_D:
655*0fca6ea1SDimitry Andric   case LoongArch::LD_WU:
656*0fca6ea1SDimitry Andric     return LoongArch::LD_W;
657*0fca6ea1SDimitry Andric   case LoongArch::MUL_D:
658*0fca6ea1SDimitry Andric     return LoongArch::MUL_W;
659*0fca6ea1SDimitry Andric   case LoongArch::SLLI_D:
660*0fca6ea1SDimitry Andric     return LoongArch::SLLI_W;
661*0fca6ea1SDimitry Andric   case LoongArch::SUB_D:
662*0fca6ea1SDimitry Andric     return LoongArch::SUB_W;
663*0fca6ea1SDimitry Andric   default:
664*0fca6ea1SDimitry Andric     llvm_unreachable("Unexpected opcode for replacement with W variant");
665*0fca6ea1SDimitry Andric   }
666*0fca6ea1SDimitry Andric }
667*0fca6ea1SDimitry Andric 
668*0fca6ea1SDimitry Andric bool LoongArchOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
669*0fca6ea1SDimitry Andric                                             const LoongArchInstrInfo &TII,
670*0fca6ea1SDimitry Andric                                             const LoongArchSubtarget &ST,
671*0fca6ea1SDimitry Andric                                             MachineRegisterInfo &MRI) {
672*0fca6ea1SDimitry Andric   if (DisableSExtWRemoval)
673*0fca6ea1SDimitry Andric     return false;
674*0fca6ea1SDimitry Andric 
675*0fca6ea1SDimitry Andric   bool MadeChange = false;
676*0fca6ea1SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
677*0fca6ea1SDimitry Andric     for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
678*0fca6ea1SDimitry Andric       // We're looking for the sext.w pattern ADDI.W rd, rs, 0.
679*0fca6ea1SDimitry Andric       if (!LoongArch::isSEXT_W(MI))
680*0fca6ea1SDimitry Andric         continue;
681*0fca6ea1SDimitry Andric 
682*0fca6ea1SDimitry Andric       Register SrcReg = MI.getOperand(1).getReg();
683*0fca6ea1SDimitry Andric 
684*0fca6ea1SDimitry Andric       SmallPtrSet<MachineInstr *, 4> FixableDefs;
685*0fca6ea1SDimitry Andric 
686*0fca6ea1SDimitry Andric       // If all users only use the lower bits, this sext.w is redundant.
687*0fca6ea1SDimitry Andric       // Or if all definitions reaching MI sign-extend their output,
688*0fca6ea1SDimitry Andric       // then sext.w is redundant.
689*0fca6ea1SDimitry Andric       if (!hasAllWUsers(MI, ST, MRI) &&
690*0fca6ea1SDimitry Andric           !isSignExtendedW(SrcReg, ST, MRI, FixableDefs))
691*0fca6ea1SDimitry Andric         continue;
692*0fca6ea1SDimitry Andric 
693*0fca6ea1SDimitry Andric       Register DstReg = MI.getOperand(0).getReg();
694*0fca6ea1SDimitry Andric       if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))
695*0fca6ea1SDimitry Andric         continue;
696*0fca6ea1SDimitry Andric 
697*0fca6ea1SDimitry Andric       // Convert Fixable instructions to their W versions.
698*0fca6ea1SDimitry Andric       for (MachineInstr *Fixable : FixableDefs) {
699*0fca6ea1SDimitry Andric         LLVM_DEBUG(dbgs() << "Replacing " << *Fixable);
700*0fca6ea1SDimitry Andric         Fixable->setDesc(TII.get(getWOp(Fixable->getOpcode())));
701*0fca6ea1SDimitry Andric         Fixable->clearFlag(MachineInstr::MIFlag::NoSWrap);
702*0fca6ea1SDimitry Andric         Fixable->clearFlag(MachineInstr::MIFlag::NoUWrap);
703*0fca6ea1SDimitry Andric         Fixable->clearFlag(MachineInstr::MIFlag::IsExact);
704*0fca6ea1SDimitry Andric         LLVM_DEBUG(dbgs() << "     with " << *Fixable);
705*0fca6ea1SDimitry Andric         ++NumTransformedToWInstrs;
706*0fca6ea1SDimitry Andric       }
707*0fca6ea1SDimitry Andric 
708*0fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
709*0fca6ea1SDimitry Andric       MRI.replaceRegWith(DstReg, SrcReg);
710*0fca6ea1SDimitry Andric       MRI.clearKillFlags(SrcReg);
711*0fca6ea1SDimitry Andric       MI.eraseFromParent();
712*0fca6ea1SDimitry Andric       ++NumRemovedSExtW;
713*0fca6ea1SDimitry Andric       MadeChange = true;
714*0fca6ea1SDimitry Andric     }
715*0fca6ea1SDimitry Andric   }
716*0fca6ea1SDimitry Andric 
717*0fca6ea1SDimitry Andric   return MadeChange;
718*0fca6ea1SDimitry Andric }
719*0fca6ea1SDimitry Andric 
720*0fca6ea1SDimitry Andric bool LoongArchOptWInstrs::convertToDSuffixes(MachineFunction &MF,
721*0fca6ea1SDimitry Andric                                              const LoongArchInstrInfo &TII,
722*0fca6ea1SDimitry Andric                                              const LoongArchSubtarget &ST,
723*0fca6ea1SDimitry Andric                                              MachineRegisterInfo &MRI) {
724*0fca6ea1SDimitry Andric   bool MadeChange = false;
725*0fca6ea1SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
726*0fca6ea1SDimitry Andric     for (MachineInstr &MI : MBB) {
727*0fca6ea1SDimitry Andric       unsigned Opc;
728*0fca6ea1SDimitry Andric       switch (MI.getOpcode()) {
729*0fca6ea1SDimitry Andric       default:
730*0fca6ea1SDimitry Andric         continue;
731*0fca6ea1SDimitry Andric       case LoongArch::ADDI_W:
732*0fca6ea1SDimitry Andric         Opc = LoongArch::ADDI_D;
733*0fca6ea1SDimitry Andric         break;
734*0fca6ea1SDimitry Andric       }
735*0fca6ea1SDimitry Andric 
736*0fca6ea1SDimitry Andric       if (hasAllWUsers(MI, ST, MRI)) {
737*0fca6ea1SDimitry Andric         MI.setDesc(TII.get(Opc));
738*0fca6ea1SDimitry Andric         MadeChange = true;
739*0fca6ea1SDimitry Andric       }
740*0fca6ea1SDimitry Andric     }
741*0fca6ea1SDimitry Andric   }
742*0fca6ea1SDimitry Andric 
743*0fca6ea1SDimitry Andric   return MadeChange;
744*0fca6ea1SDimitry Andric }
745*0fca6ea1SDimitry Andric 
746*0fca6ea1SDimitry Andric bool LoongArchOptWInstrs::convertToWSuffixes(MachineFunction &MF,
747*0fca6ea1SDimitry Andric                                              const LoongArchInstrInfo &TII,
748*0fca6ea1SDimitry Andric                                              const LoongArchSubtarget &ST,
749*0fca6ea1SDimitry Andric                                              MachineRegisterInfo &MRI) {
750*0fca6ea1SDimitry Andric   bool MadeChange = false;
751*0fca6ea1SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
752*0fca6ea1SDimitry Andric     for (MachineInstr &MI : MBB) {
753*0fca6ea1SDimitry Andric       unsigned WOpc;
754*0fca6ea1SDimitry Andric       // TODO: Add more?
755*0fca6ea1SDimitry Andric       switch (MI.getOpcode()) {
756*0fca6ea1SDimitry Andric       default:
757*0fca6ea1SDimitry Andric         continue;
758*0fca6ea1SDimitry Andric       case LoongArch::ADD_D:
759*0fca6ea1SDimitry Andric         WOpc = LoongArch::ADD_W;
760*0fca6ea1SDimitry Andric         break;
761*0fca6ea1SDimitry Andric       case LoongArch::ADDI_D:
762*0fca6ea1SDimitry Andric         WOpc = LoongArch::ADDI_W;
763*0fca6ea1SDimitry Andric         break;
764*0fca6ea1SDimitry Andric       case LoongArch::SUB_D:
765*0fca6ea1SDimitry Andric         WOpc = LoongArch::SUB_W;
766*0fca6ea1SDimitry Andric         break;
767*0fca6ea1SDimitry Andric       case LoongArch::MUL_D:
768*0fca6ea1SDimitry Andric         WOpc = LoongArch::MUL_W;
769*0fca6ea1SDimitry Andric         break;
770*0fca6ea1SDimitry Andric       case LoongArch::SLLI_D:
771*0fca6ea1SDimitry Andric         // SLLI.W reads the lowest 5 bits, while SLLI.D reads lowest 6 bits
772*0fca6ea1SDimitry Andric         if (MI.getOperand(2).getImm() >= 32)
773*0fca6ea1SDimitry Andric           continue;
774*0fca6ea1SDimitry Andric         WOpc = LoongArch::SLLI_W;
775*0fca6ea1SDimitry Andric         break;
776*0fca6ea1SDimitry Andric       case LoongArch::LD_D:
777*0fca6ea1SDimitry Andric       case LoongArch::LD_WU:
778*0fca6ea1SDimitry Andric         WOpc = LoongArch::LD_W;
779*0fca6ea1SDimitry Andric         break;
780*0fca6ea1SDimitry Andric       }
781*0fca6ea1SDimitry Andric 
782*0fca6ea1SDimitry Andric       if (hasAllWUsers(MI, ST, MRI)) {
783*0fca6ea1SDimitry Andric         LLVM_DEBUG(dbgs() << "Replacing " << MI);
784*0fca6ea1SDimitry Andric         MI.setDesc(TII.get(WOpc));
785*0fca6ea1SDimitry Andric         MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
786*0fca6ea1SDimitry Andric         MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
787*0fca6ea1SDimitry Andric         MI.clearFlag(MachineInstr::MIFlag::IsExact);
788*0fca6ea1SDimitry Andric         LLVM_DEBUG(dbgs() << "     with " << MI);
789*0fca6ea1SDimitry Andric         ++NumTransformedToWInstrs;
790*0fca6ea1SDimitry Andric         MadeChange = true;
791*0fca6ea1SDimitry Andric       }
792*0fca6ea1SDimitry Andric     }
793*0fca6ea1SDimitry Andric   }
794*0fca6ea1SDimitry Andric 
795*0fca6ea1SDimitry Andric   return MadeChange;
796*0fca6ea1SDimitry Andric }
797*0fca6ea1SDimitry Andric 
798*0fca6ea1SDimitry Andric bool LoongArchOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
799*0fca6ea1SDimitry Andric   if (skipFunction(MF.getFunction()))
800*0fca6ea1SDimitry Andric     return false;
801*0fca6ea1SDimitry Andric 
802*0fca6ea1SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
803*0fca6ea1SDimitry Andric   const LoongArchSubtarget &ST = MF.getSubtarget<LoongArchSubtarget>();
804*0fca6ea1SDimitry Andric   const LoongArchInstrInfo &TII = *ST.getInstrInfo();
805*0fca6ea1SDimitry Andric 
806*0fca6ea1SDimitry Andric   if (!ST.is64Bit())
807*0fca6ea1SDimitry Andric     return false;
808*0fca6ea1SDimitry Andric 
809*0fca6ea1SDimitry Andric   bool MadeChange = false;
810*0fca6ea1SDimitry Andric   MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
811*0fca6ea1SDimitry Andric 
812*0fca6ea1SDimitry Andric   if (!(DisableCvtToDSuffix || ST.preferWInst()))
813*0fca6ea1SDimitry Andric     MadeChange |= convertToDSuffixes(MF, TII, ST, MRI);
814*0fca6ea1SDimitry Andric 
815*0fca6ea1SDimitry Andric   if (ST.preferWInst())
816*0fca6ea1SDimitry Andric     MadeChange |= convertToWSuffixes(MF, TII, ST, MRI);
817*0fca6ea1SDimitry Andric 
818*0fca6ea1SDimitry Andric   return MadeChange;
819*0fca6ea1SDimitry Andric }
820