xref: /freebsd/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1700637cbSDimitry Andric //===---- LoongArchMergeBaseOffset.cpp - Optimise address calculations ----===//
2700637cbSDimitry Andric //
3700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6700637cbSDimitry Andric //
7700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8700637cbSDimitry Andric //
9700637cbSDimitry Andric // Merge the offset of address calculation into the offset field
10700637cbSDimitry Andric // of instructions in a global address lowering sequence.
11700637cbSDimitry Andric //
12700637cbSDimitry Andric //===----------------------------------------------------------------------===//
13700637cbSDimitry Andric 
14700637cbSDimitry Andric #include "LoongArch.h"
15700637cbSDimitry Andric #include "LoongArchTargetMachine.h"
16700637cbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
17700637cbSDimitry Andric #include "llvm/CodeGen/Passes.h"
18700637cbSDimitry Andric #include "llvm/MC/TargetRegistry.h"
19700637cbSDimitry Andric #include "llvm/Support/Debug.h"
20700637cbSDimitry Andric #include "llvm/Target/TargetOptions.h"
21700637cbSDimitry Andric #include <optional>
22700637cbSDimitry Andric 
23700637cbSDimitry Andric using namespace llvm;
24700637cbSDimitry Andric 
25700637cbSDimitry Andric #define DEBUG_TYPE "loongarch-merge-base-offset"
26700637cbSDimitry Andric #define LoongArch_MERGE_BASE_OFFSET_NAME "LoongArch Merge Base Offset"
27700637cbSDimitry Andric 
28700637cbSDimitry Andric namespace {
29700637cbSDimitry Andric 
30700637cbSDimitry Andric class LoongArchMergeBaseOffsetOpt : public MachineFunctionPass {
31700637cbSDimitry Andric   const LoongArchSubtarget *ST = nullptr;
32700637cbSDimitry Andric   MachineRegisterInfo *MRI;
33700637cbSDimitry Andric 
34700637cbSDimitry Andric public:
35700637cbSDimitry Andric   static char ID;
36700637cbSDimitry Andric   bool runOnMachineFunction(MachineFunction &Fn) override;
37700637cbSDimitry Andric   bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Lo12,
38700637cbSDimitry Andric                       MachineInstr *&Lo20, MachineInstr *&Hi12,
39700637cbSDimitry Andric                       MachineInstr *&Last);
40700637cbSDimitry Andric   bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Add,
41700637cbSDimitry Andric                       MachineInstr *&Lo12);
42700637cbSDimitry Andric 
43700637cbSDimitry Andric   bool detectAndFoldOffset(MachineInstr &Hi20, MachineInstr &Lo12,
44700637cbSDimitry Andric                            MachineInstr *&Lo20, MachineInstr *&Hi12,
45700637cbSDimitry Andric                            MachineInstr *&Last);
46700637cbSDimitry Andric   void foldOffset(MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
47700637cbSDimitry Andric                   MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
48700637cbSDimitry Andric                   int64_t Offset);
49700637cbSDimitry Andric   bool foldLargeOffset(MachineInstr &Hi20, MachineInstr &Lo12,
50700637cbSDimitry Andric                        MachineInstr *&Lo20, MachineInstr *&Hi12,
51700637cbSDimitry Andric                        MachineInstr *&Last, MachineInstr &TailAdd,
52700637cbSDimitry Andric                        Register GAReg);
53700637cbSDimitry Andric 
54700637cbSDimitry Andric   bool foldIntoMemoryOps(MachineInstr &Hi20, MachineInstr &Lo12,
55700637cbSDimitry Andric                          MachineInstr *&Lo20, MachineInstr *&Hi12,
56700637cbSDimitry Andric                          MachineInstr *&Last);
57700637cbSDimitry Andric 
LoongArchMergeBaseOffsetOpt()58700637cbSDimitry Andric   LoongArchMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
59700637cbSDimitry Andric 
getRequiredProperties() const60700637cbSDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
61700637cbSDimitry Andric     return MachineFunctionProperties().setIsSSA();
62700637cbSDimitry Andric   }
63700637cbSDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const64700637cbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
65700637cbSDimitry Andric     AU.setPreservesCFG();
66700637cbSDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
67700637cbSDimitry Andric   }
68700637cbSDimitry Andric 
getPassName() const69700637cbSDimitry Andric   StringRef getPassName() const override {
70700637cbSDimitry Andric     return LoongArch_MERGE_BASE_OFFSET_NAME;
71700637cbSDimitry Andric   }
72700637cbSDimitry Andric };
73700637cbSDimitry Andric } // end anonymous namespace
74700637cbSDimitry Andric 
75700637cbSDimitry Andric char LoongArchMergeBaseOffsetOpt::ID = 0;
INITIALIZE_PASS(LoongArchMergeBaseOffsetOpt,DEBUG_TYPE,LoongArch_MERGE_BASE_OFFSET_NAME,false,false)76700637cbSDimitry Andric INITIALIZE_PASS(LoongArchMergeBaseOffsetOpt, DEBUG_TYPE,
77700637cbSDimitry Andric                 LoongArch_MERGE_BASE_OFFSET_NAME, false, false)
78700637cbSDimitry Andric 
79700637cbSDimitry Andric // Detect either of the patterns:
80700637cbSDimitry Andric //
81700637cbSDimitry Andric // 1. (small/medium):
82700637cbSDimitry Andric //   pcalau12i vreg1, %pc_hi20(s)
83700637cbSDimitry Andric //   addi.d    vreg2, vreg1, %pc_lo12(s)
84700637cbSDimitry Andric //
85700637cbSDimitry Andric // 2. (large):
86700637cbSDimitry Andric //   pcalau12i vreg1, %pc_hi20(s)
87700637cbSDimitry Andric //   addi.d    vreg2, $zero, %pc_lo12(s)
88700637cbSDimitry Andric //   lu32i.d   vreg3, vreg2, %pc64_lo20(s)
89700637cbSDimitry Andric //   lu52i.d   vreg4, vreg3, %pc64_hi12(s)
90700637cbSDimitry Andric //   add.d     vreg5, vreg4, vreg1
91700637cbSDimitry Andric 
92700637cbSDimitry Andric // The pattern is only accepted if:
93700637cbSDimitry Andric //    1) For small and medium pattern, the first instruction has only one use,
94700637cbSDimitry Andric //       which is the ADDI.
95700637cbSDimitry Andric //    2) For large pattern, the first four instructions each have only one use,
96700637cbSDimitry Andric //       and the user of the fourth instruction is ADD.
97700637cbSDimitry Andric //    3) The address operands have the appropriate type, reflecting the
98700637cbSDimitry Andric //       lowering of a global address or constant pool using the pattern.
99700637cbSDimitry Andric //    4) The offset value in the Global Address or Constant Pool is 0.
100700637cbSDimitry Andric bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
101700637cbSDimitry Andric                                                  MachineInstr *&Lo12,
102700637cbSDimitry Andric                                                  MachineInstr *&Lo20,
103700637cbSDimitry Andric                                                  MachineInstr *&Hi12,
104700637cbSDimitry Andric                                                  MachineInstr *&Last) {
105700637cbSDimitry Andric   if (Hi20.getOpcode() != LoongArch::PCALAU12I)
106700637cbSDimitry Andric     return false;
107700637cbSDimitry Andric 
108700637cbSDimitry Andric   const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
109700637cbSDimitry Andric   if (LoongArchII::getDirectFlags(Hi20Op1) != LoongArchII::MO_PCREL_HI)
110700637cbSDimitry Andric     return false;
111700637cbSDimitry Andric 
112700637cbSDimitry Andric   auto isGlobalOrCPIOrBlockAddress = [](const MachineOperand &Op) {
113700637cbSDimitry Andric     return Op.isGlobal() || Op.isCPI() || Op.isBlockAddress();
114700637cbSDimitry Andric   };
115700637cbSDimitry Andric 
116700637cbSDimitry Andric   if (!isGlobalOrCPIOrBlockAddress(Hi20Op1) || Hi20Op1.getOffset() != 0)
117700637cbSDimitry Andric     return false;
118700637cbSDimitry Andric 
119700637cbSDimitry Andric   Register HiDestReg = Hi20.getOperand(0).getReg();
120700637cbSDimitry Andric   if (!MRI->hasOneUse(HiDestReg))
121700637cbSDimitry Andric     return false;
122700637cbSDimitry Andric 
123700637cbSDimitry Andric   MachineInstr *UseInst = &*MRI->use_instr_begin(HiDestReg);
124700637cbSDimitry Andric   if (UseInst->getOpcode() != LoongArch::ADD_D) {
125700637cbSDimitry Andric     Lo12 = UseInst;
126700637cbSDimitry Andric     if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
127700637cbSDimitry Andric         (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
128700637cbSDimitry Andric       return false;
129700637cbSDimitry Andric   } else {
130700637cbSDimitry Andric     assert(ST->is64Bit());
131700637cbSDimitry Andric     Last = UseInst;
132700637cbSDimitry Andric 
133700637cbSDimitry Andric     Register LastOp1Reg = Last->getOperand(1).getReg();
134700637cbSDimitry Andric     if (!LastOp1Reg.isVirtual())
135700637cbSDimitry Andric       return false;
136700637cbSDimitry Andric     Hi12 = MRI->getVRegDef(LastOp1Reg);
137700637cbSDimitry Andric     const MachineOperand &Hi12Op2 = Hi12->getOperand(2);
138700637cbSDimitry Andric     if (Hi12Op2.getTargetFlags() != LoongArchII::MO_PCREL64_HI)
139700637cbSDimitry Andric       return false;
140700637cbSDimitry Andric     if (!isGlobalOrCPIOrBlockAddress(Hi12Op2) || Hi12Op2.getOffset() != 0)
141700637cbSDimitry Andric       return false;
142700637cbSDimitry Andric     if (!MRI->hasOneUse(Hi12->getOperand(0).getReg()))
143700637cbSDimitry Andric       return false;
144700637cbSDimitry Andric 
145700637cbSDimitry Andric     Lo20 = MRI->getVRegDef(Hi12->getOperand(1).getReg());
146700637cbSDimitry Andric     const MachineOperand &Lo20Op2 = Lo20->getOperand(2);
147700637cbSDimitry Andric     if (Lo20Op2.getTargetFlags() != LoongArchII::MO_PCREL64_LO)
148700637cbSDimitry Andric       return false;
149700637cbSDimitry Andric     if (!isGlobalOrCPIOrBlockAddress(Lo20Op2) || Lo20Op2.getOffset() != 0)
150700637cbSDimitry Andric       return false;
151700637cbSDimitry Andric     if (!MRI->hasOneUse(Lo20->getOperand(0).getReg()))
152700637cbSDimitry Andric       return false;
153700637cbSDimitry Andric 
154700637cbSDimitry Andric     Lo12 = MRI->getVRegDef(Lo20->getOperand(1).getReg());
155700637cbSDimitry Andric     if (!MRI->hasOneUse(Lo12->getOperand(0).getReg()))
156700637cbSDimitry Andric       return false;
157700637cbSDimitry Andric   }
158700637cbSDimitry Andric 
159700637cbSDimitry Andric   const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
160700637cbSDimitry Andric   assert(Hi20.getOpcode() == LoongArch::PCALAU12I);
161700637cbSDimitry Andric   if (LoongArchII::getDirectFlags(Lo12Op2) != LoongArchII::MO_PCREL_LO ||
162700637cbSDimitry Andric       !(isGlobalOrCPIOrBlockAddress(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
163700637cbSDimitry Andric       Lo12Op2.getOffset() != 0)
164700637cbSDimitry Andric     return false;
165700637cbSDimitry Andric 
166700637cbSDimitry Andric   if (Hi20Op1.isGlobal()) {
167700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "  Found lowered global address: "
168700637cbSDimitry Andric                       << *Hi20Op1.getGlobal() << "\n");
169700637cbSDimitry Andric   } else if (Hi20Op1.isBlockAddress()) {
170700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "  Found lowered basic address: "
171700637cbSDimitry Andric                       << *Hi20Op1.getBlockAddress() << "\n");
172700637cbSDimitry Andric   } else if (Hi20Op1.isCPI()) {
173700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "  Found lowered constant pool: " << Hi20Op1.getIndex()
174700637cbSDimitry Andric                       << "\n");
175700637cbSDimitry Andric   }
176700637cbSDimitry Andric 
177700637cbSDimitry Andric   return true;
178700637cbSDimitry Andric }
179700637cbSDimitry Andric 
180700637cbSDimitry Andric // Detect the pattern:
181700637cbSDimitry Andric //
182700637cbSDimitry Andric // (small/medium):
183700637cbSDimitry Andric //   lu12i.w  vreg1, %le_hi20_r(s)
184700637cbSDimitry Andric //   add.w/d  vreg2, vreg1, r2, %le_add_r(s)
185700637cbSDimitry Andric //   addi.w/d vreg3, vreg2, %le_lo12_r(s)
186700637cbSDimitry Andric 
187700637cbSDimitry Andric // The pattern is only accepted if:
188700637cbSDimitry Andric //    1) The first instruction has only one use, which is the PseudoAddTPRel.
189700637cbSDimitry Andric //       The second instruction has only one use, which is the ADDI. The
190700637cbSDimitry Andric //       second instruction's last operand is the tp register.
191700637cbSDimitry Andric //    2) The address operands have the appropriate type, reflecting the
192700637cbSDimitry Andric //       lowering of a thread_local global address using the pattern.
193700637cbSDimitry Andric //    3) The offset value in the ThreadLocal Global Address is 0.
detectFoldable(MachineInstr & Hi20,MachineInstr * & Add,MachineInstr * & Lo12)194700637cbSDimitry Andric bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
195700637cbSDimitry Andric                                                  MachineInstr *&Add,
196700637cbSDimitry Andric                                                  MachineInstr *&Lo12) {
197700637cbSDimitry Andric   if (Hi20.getOpcode() != LoongArch::LU12I_W)
198700637cbSDimitry Andric     return false;
199700637cbSDimitry Andric 
200700637cbSDimitry Andric   auto isGlobalOrCPI = [](const MachineOperand &Op) {
201700637cbSDimitry Andric     return Op.isGlobal() || Op.isCPI();
202700637cbSDimitry Andric   };
203700637cbSDimitry Andric 
204700637cbSDimitry Andric   const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
205700637cbSDimitry Andric   if (LoongArchII::getDirectFlags(Hi20Op1) != LoongArchII::MO_LE_HI_R ||
206700637cbSDimitry Andric       !isGlobalOrCPI(Hi20Op1) || Hi20Op1.getOffset() != 0)
207700637cbSDimitry Andric     return false;
208700637cbSDimitry Andric 
209700637cbSDimitry Andric   Register HiDestReg = Hi20.getOperand(0).getReg();
210700637cbSDimitry Andric   if (!MRI->hasOneUse(HiDestReg))
211700637cbSDimitry Andric     return false;
212700637cbSDimitry Andric 
213700637cbSDimitry Andric   Add = &*MRI->use_instr_begin(HiDestReg);
214700637cbSDimitry Andric   if ((ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_D) ||
215700637cbSDimitry Andric       (!ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_W))
216700637cbSDimitry Andric     return false;
217700637cbSDimitry Andric 
218700637cbSDimitry Andric   if (Add->getOperand(2).getReg() != LoongArch::R2)
219700637cbSDimitry Andric     return false;
220700637cbSDimitry Andric 
221700637cbSDimitry Andric   const MachineOperand &AddOp3 = Add->getOperand(3);
222700637cbSDimitry Andric   if (LoongArchII::getDirectFlags(AddOp3) != LoongArchII::MO_LE_ADD_R ||
223700637cbSDimitry Andric       !(isGlobalOrCPI(AddOp3) || AddOp3.isMCSymbol()) ||
224700637cbSDimitry Andric       AddOp3.getOffset() != 0)
225700637cbSDimitry Andric     return false;
226700637cbSDimitry Andric 
227700637cbSDimitry Andric   Register AddDestReg = Add->getOperand(0).getReg();
228700637cbSDimitry Andric   if (!MRI->hasOneUse(AddDestReg))
229700637cbSDimitry Andric     return false;
230700637cbSDimitry Andric 
231700637cbSDimitry Andric   Lo12 = &*MRI->use_instr_begin(AddDestReg);
232700637cbSDimitry Andric   if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
233700637cbSDimitry Andric       (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
234700637cbSDimitry Andric     return false;
235700637cbSDimitry Andric 
236700637cbSDimitry Andric   const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
237700637cbSDimitry Andric   if (LoongArchII::getDirectFlags(Lo12Op2) != LoongArchII::MO_LE_LO_R ||
238700637cbSDimitry Andric       !(isGlobalOrCPI(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
239700637cbSDimitry Andric       Lo12Op2.getOffset() != 0)
240700637cbSDimitry Andric     return false;
241700637cbSDimitry Andric 
242700637cbSDimitry Andric   if (Hi20Op1.isGlobal()) {
243700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "  Found lowered global address: "
244700637cbSDimitry Andric                       << *Hi20Op1.getGlobal() << "\n");
245700637cbSDimitry Andric   } else if (Hi20Op1.isCPI()) {
246700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "  Found lowered constant pool: " << Hi20Op1.getIndex()
247700637cbSDimitry Andric                       << "\n");
248700637cbSDimitry Andric   }
249700637cbSDimitry Andric 
250700637cbSDimitry Andric   return true;
251700637cbSDimitry Andric }
252700637cbSDimitry Andric 
253700637cbSDimitry Andric // Update the offset in Hi20, (Add), Lo12, (Lo20 and Hi12) instructions.
254700637cbSDimitry Andric // Delete the tail instruction and update all the uses to use the
255700637cbSDimitry Andric // output from Last.
foldOffset(MachineInstr & Hi20,MachineInstr & Lo12,MachineInstr * & Lo20,MachineInstr * & Hi12,MachineInstr * & Last,MachineInstr & Tail,int64_t Offset)256700637cbSDimitry Andric void LoongArchMergeBaseOffsetOpt::foldOffset(
257700637cbSDimitry Andric     MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
258700637cbSDimitry Andric     MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
259700637cbSDimitry Andric     int64_t Offset) {
260700637cbSDimitry Andric   // Put the offset back in Hi and the Lo
261700637cbSDimitry Andric   Hi20.getOperand(1).setOffset(Offset);
262700637cbSDimitry Andric   Lo12.getOperand(2).setOffset(Offset);
263700637cbSDimitry Andric   if (Lo20 && Hi12) {
264700637cbSDimitry Andric     Lo20->getOperand(2).setOffset(Offset);
265700637cbSDimitry Andric     Hi12->getOperand(2).setOffset(Offset);
266700637cbSDimitry Andric   }
267700637cbSDimitry Andric 
268700637cbSDimitry Andric   // For tls-le, offset of the second PseudoAddTPRel instr should also be
269700637cbSDimitry Andric   // updated.
270700637cbSDimitry Andric   MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
271700637cbSDimitry Andric   if (Hi20.getOpcode() == LoongArch::LU12I_W)
272700637cbSDimitry Andric     Add->getOperand(3).setOffset(Offset);
273700637cbSDimitry Andric 
274700637cbSDimitry Andric   // Delete the tail instruction.
275700637cbSDimitry Andric   MachineInstr *Def = Last ? Last : &Lo12;
276700637cbSDimitry Andric   MRI->constrainRegClass(Def->getOperand(0).getReg(),
277700637cbSDimitry Andric                          MRI->getRegClass(Tail.getOperand(0).getReg()));
278700637cbSDimitry Andric   MRI->replaceRegWith(Tail.getOperand(0).getReg(), Def->getOperand(0).getReg());
279700637cbSDimitry Andric   Tail.eraseFromParent();
280700637cbSDimitry Andric 
281700637cbSDimitry Andric   LLVM_DEBUG(dbgs() << "  Merged offset " << Offset << " into base.\n"
282700637cbSDimitry Andric                     << "     " << Hi20;);
283700637cbSDimitry Andric   if (Hi20.getOpcode() == LoongArch::LU12I_W) {
284700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "     " << *Add;);
285700637cbSDimitry Andric   }
286700637cbSDimitry Andric   LLVM_DEBUG(dbgs() << "     " << Lo12;);
287700637cbSDimitry Andric   if (Lo20 && Hi12) {
288700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "     " << *Lo20 << "     " << *Hi12;);
289700637cbSDimitry Andric   }
290700637cbSDimitry Andric }
291700637cbSDimitry Andric 
292700637cbSDimitry Andric // Detect patterns for large offsets that are passed into an ADD instruction.
293700637cbSDimitry Andric // If the pattern is found, updates the offset in Hi20, (Add), Lo12,
294700637cbSDimitry Andric // (Lo20 and Hi12) instructions and deletes TailAdd and the instructions that
295700637cbSDimitry Andric // produced the offset.
296700637cbSDimitry Andric //
297700637cbSDimitry Andric //   (The instructions marked with "!" are not necessarily present)
298700637cbSDimitry Andric //
299700637cbSDimitry Andric //        Base address lowering is of the form:
300700637cbSDimitry Andric //           1) pcala:
301700637cbSDimitry Andric //             Hi20:  pcalau12i vreg1, %pc_hi20(s)
302700637cbSDimitry Andric //        +--- Lo12:  addi.d vreg2, vreg1, %pc_lo12(s)
303700637cbSDimitry Andric //        |    Lo20:  lu32i.d vreg2, %pc64_lo20(s) !
304700637cbSDimitry Andric //        +--- Hi12:  lu52i.d vreg2, vreg2, %pc64_hi12(s) !
305700637cbSDimitry Andric //        |
306700637cbSDimitry Andric //        |  2) tls-le:
307700637cbSDimitry Andric //        |    Hi20:  lu12i.w vreg1, %le_hi20_r(s)
308700637cbSDimitry Andric //        |    Add:   add.w/d vreg1, vreg1, r2, %le_add_r(s)
309700637cbSDimitry Andric //        +--- Lo12:  addi.w/d vreg2, vreg1, %le_lo12_r(s)
310700637cbSDimitry Andric //        |
311700637cbSDimitry Andric //        | The large offset can be one of the forms:
312700637cbSDimitry Andric //        |
313700637cbSDimitry Andric //        +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
314700637cbSDimitry Andric //        |     OffsetHi20: lu12i.w vreg3, 4
315700637cbSDimitry Andric //        |     OffsetLo12: ori voff, vreg3, 188    ------------------+
316700637cbSDimitry Andric //        |                                                           |
317700637cbSDimitry Andric //        +-> 2) Offset that has non zero bits in Hi20 bits only:     |
318700637cbSDimitry Andric //        |     OffsetHi20: lu12i.w voff, 128       ------------------+
319700637cbSDimitry Andric //        |                                                           |
320700637cbSDimitry Andric //        +-> 3) Offset that has non zero bits in Lo20 bits:          |
321700637cbSDimitry Andric //        |     OffsetHi20: lu12i.w vreg3, 121 !                      |
322700637cbSDimitry Andric //        |     OffsetLo12: ori voff, vreg3, 122 !                    |
323700637cbSDimitry Andric //        |     OffsetLo20: lu32i.d voff, 123       ------------------+
324700637cbSDimitry Andric //        +-> 4) Offset that has non zero bits in Hi12 bits:          |
325700637cbSDimitry Andric //              OffsetHi20: lu12i.w vreg3, 121 !                      |
326700637cbSDimitry Andric //              OffsetLo12: ori voff, vreg3, 122 !                    |
327700637cbSDimitry Andric //              OffsetLo20: lu32i.d vreg3, 123 !                      |
328700637cbSDimitry Andric //              OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
329700637cbSDimitry Andric //                                                                    |
330700637cbSDimitry Andric //        TailAdd: add.d  vreg4, vreg2, voff       <------------------+
331700637cbSDimitry Andric //
foldLargeOffset(MachineInstr & Hi20,MachineInstr & Lo12,MachineInstr * & Lo20,MachineInstr * & Hi12,MachineInstr * & Last,MachineInstr & TailAdd,Register GAReg)332700637cbSDimitry Andric bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
333700637cbSDimitry Andric     MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
334700637cbSDimitry Andric     MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
335700637cbSDimitry Andric     Register GAReg) {
336700637cbSDimitry Andric   assert((TailAdd.getOpcode() == LoongArch::ADD_W ||
337700637cbSDimitry Andric           TailAdd.getOpcode() == LoongArch::ADD_D) &&
338700637cbSDimitry Andric          "Expected ADD instruction!");
339700637cbSDimitry Andric   Register Rs = TailAdd.getOperand(1).getReg();
340700637cbSDimitry Andric   Register Rt = TailAdd.getOperand(2).getReg();
341700637cbSDimitry Andric   Register Reg = Rs == GAReg ? Rt : Rs;
342700637cbSDimitry Andric   SmallVector<MachineInstr *, 4> Instrs;
343700637cbSDimitry Andric   int64_t Offset = 0;
344700637cbSDimitry Andric   int64_t Mask = -1;
345700637cbSDimitry Andric 
346700637cbSDimitry Andric   // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
347700637cbSDimitry Andric   for (int i = 0; i < 4; i++) {
348700637cbSDimitry Andric     // Handle Reg is R0.
349700637cbSDimitry Andric     if (Reg == LoongArch::R0)
350700637cbSDimitry Andric       break;
351700637cbSDimitry Andric 
352700637cbSDimitry Andric     // Can't fold if the register has more than one use.
353700637cbSDimitry Andric     if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
354700637cbSDimitry Andric       return false;
355700637cbSDimitry Andric 
356700637cbSDimitry Andric     MachineInstr *Curr = MRI->getVRegDef(Reg);
357700637cbSDimitry Andric     if (!Curr)
358700637cbSDimitry Andric       break;
359700637cbSDimitry Andric 
360700637cbSDimitry Andric     switch (Curr->getOpcode()) {
361700637cbSDimitry Andric     default:
362700637cbSDimitry Andric       // Can't fold if the instruction opcode is unexpected.
363700637cbSDimitry Andric       return false;
364700637cbSDimitry Andric     case LoongArch::ORI: {
365700637cbSDimitry Andric       MachineOperand ImmOp = Curr->getOperand(2);
366700637cbSDimitry Andric       if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
367700637cbSDimitry Andric         return false;
368700637cbSDimitry Andric       Offset += ImmOp.getImm();
369700637cbSDimitry Andric       Reg = Curr->getOperand(1).getReg();
370700637cbSDimitry Andric       Instrs.push_back(Curr);
371700637cbSDimitry Andric       break;
372700637cbSDimitry Andric     }
373700637cbSDimitry Andric     case LoongArch::LU12I_W: {
374700637cbSDimitry Andric       MachineOperand ImmOp = Curr->getOperand(1);
375700637cbSDimitry Andric       if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
376700637cbSDimitry Andric         return false;
377700637cbSDimitry Andric       Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask;
378700637cbSDimitry Andric       Reg = LoongArch::R0;
379700637cbSDimitry Andric       Instrs.push_back(Curr);
380700637cbSDimitry Andric       break;
381700637cbSDimitry Andric     }
382700637cbSDimitry Andric     case LoongArch::LU32I_D: {
383700637cbSDimitry Andric       MachineOperand ImmOp = Curr->getOperand(2);
384700637cbSDimitry Andric       if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20)
385700637cbSDimitry Andric         return false;
386700637cbSDimitry Andric       Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask;
387700637cbSDimitry Andric       Mask ^= 0x000FFFFF00000000ULL;
388700637cbSDimitry Andric       Reg = Curr->getOperand(1).getReg();
389700637cbSDimitry Andric       Instrs.push_back(Curr);
390700637cbSDimitry Andric       break;
391700637cbSDimitry Andric     }
392700637cbSDimitry Andric     case LoongArch::LU52I_D: {
393700637cbSDimitry Andric       MachineOperand ImmOp = Curr->getOperand(2);
394700637cbSDimitry Andric       if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12)
395700637cbSDimitry Andric         return false;
396700637cbSDimitry Andric       Offset += ImmOp.getImm() << 52;
397700637cbSDimitry Andric       Mask ^= 0xFFF0000000000000ULL;
398700637cbSDimitry Andric       Reg = Curr->getOperand(1).getReg();
399700637cbSDimitry Andric       Instrs.push_back(Curr);
400700637cbSDimitry Andric       break;
401700637cbSDimitry Andric     }
402700637cbSDimitry Andric     }
403700637cbSDimitry Andric   }
404700637cbSDimitry Andric 
405700637cbSDimitry Andric   // Can't fold if the offset is not extracted.
406700637cbSDimitry Andric   if (!Offset)
407700637cbSDimitry Andric     return false;
408700637cbSDimitry Andric 
409700637cbSDimitry Andric   foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
410700637cbSDimitry Andric   LLVM_DEBUG(dbgs() << "  Offset Instrs:\n");
411700637cbSDimitry Andric   for (auto I : Instrs) {
412700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "                 " << *I);
413700637cbSDimitry Andric     I->eraseFromParent();
414700637cbSDimitry Andric   }
415700637cbSDimitry Andric 
416700637cbSDimitry Andric   return true;
417700637cbSDimitry Andric }
418700637cbSDimitry Andric 
detectAndFoldOffset(MachineInstr & Hi20,MachineInstr & Lo12,MachineInstr * & Lo20,MachineInstr * & Hi12,MachineInstr * & Last)419700637cbSDimitry Andric bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
420700637cbSDimitry Andric                                                       MachineInstr &Lo12,
421700637cbSDimitry Andric                                                       MachineInstr *&Lo20,
422700637cbSDimitry Andric                                                       MachineInstr *&Hi12,
423700637cbSDimitry Andric                                                       MachineInstr *&Last) {
424700637cbSDimitry Andric   Register DestReg =
425700637cbSDimitry Andric       Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
426700637cbSDimitry Andric 
427700637cbSDimitry Andric   // Look for arithmetic instructions we can get an offset from.
428700637cbSDimitry Andric   // We might be able to remove the arithmetic instructions by folding the
429700637cbSDimitry Andric   // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I) or
430700637cbSDimitry Andric   // LU12I_W+PseudoAddTPRel+ADDI.
431700637cbSDimitry Andric   if (!MRI->hasOneUse(DestReg))
432700637cbSDimitry Andric     return false;
433700637cbSDimitry Andric 
434700637cbSDimitry Andric   // DestReg has only one use.
435700637cbSDimitry Andric   MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
436700637cbSDimitry Andric   switch (Tail.getOpcode()) {
437700637cbSDimitry Andric   default:
438700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
439700637cbSDimitry Andric                       << Tail);
440700637cbSDimitry Andric     break;
441700637cbSDimitry Andric   case LoongArch::ADDI_W:
442700637cbSDimitry Andric     if (ST->is64Bit())
443700637cbSDimitry Andric       return false;
444700637cbSDimitry Andric     [[fallthrough]];
445700637cbSDimitry Andric   case LoongArch::ADDI_D:
446700637cbSDimitry Andric   case LoongArch::ADDU16I_D: {
447700637cbSDimitry Andric     // Offset is simply an immediate operand.
448700637cbSDimitry Andric     int64_t Offset = Tail.getOperand(2).getImm();
449700637cbSDimitry Andric     if (Tail.getOpcode() == LoongArch::ADDU16I_D)
450700637cbSDimitry Andric       Offset = SignExtend64<32>(Offset << 16);
451700637cbSDimitry Andric 
452700637cbSDimitry Andric     // We might have two ADDIs in a row.
453700637cbSDimitry Andric     Register TailDestReg = Tail.getOperand(0).getReg();
454700637cbSDimitry Andric     if (MRI->hasOneUse(TailDestReg)) {
455700637cbSDimitry Andric       MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
456700637cbSDimitry Andric       if (ST->is64Bit() && TailTail.getOpcode() == LoongArch::ADDI_W)
457700637cbSDimitry Andric         return false;
458700637cbSDimitry Andric       if (TailTail.getOpcode() == LoongArch::ADDI_W ||
459700637cbSDimitry Andric           TailTail.getOpcode() == LoongArch::ADDI_D) {
460700637cbSDimitry Andric         Offset += TailTail.getOperand(2).getImm();
461700637cbSDimitry Andric         LLVM_DEBUG(dbgs() << "  Offset Instrs: " << Tail << TailTail);
462700637cbSDimitry Andric         foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailTail, Offset);
463700637cbSDimitry Andric         Tail.eraseFromParent();
464700637cbSDimitry Andric         return true;
465700637cbSDimitry Andric       }
466700637cbSDimitry Andric     }
467700637cbSDimitry Andric 
468700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "  Offset Instr: " << Tail);
469700637cbSDimitry Andric     foldOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, Offset);
470700637cbSDimitry Andric     return true;
471700637cbSDimitry Andric   }
472700637cbSDimitry Andric   case LoongArch::ADD_W:
473700637cbSDimitry Andric     if (ST->is64Bit())
474700637cbSDimitry Andric       return false;
475700637cbSDimitry Andric     [[fallthrough]];
476700637cbSDimitry Andric   case LoongArch::ADD_D:
477700637cbSDimitry Andric     // The offset is too large to fit in the immediate field of ADDI.
478700637cbSDimitry Andric     return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
479700637cbSDimitry Andric     break;
480700637cbSDimitry Andric   }
481700637cbSDimitry Andric 
482700637cbSDimitry Andric   return false;
483700637cbSDimitry Andric }
484700637cbSDimitry Andric 
485700637cbSDimitry Andric // Memory access opcode mapping for transforms.
getNewOpc(unsigned Op,bool isLarge)486700637cbSDimitry Andric static unsigned getNewOpc(unsigned Op, bool isLarge) {
487700637cbSDimitry Andric   switch (Op) {
488700637cbSDimitry Andric   case LoongArch::LD_B:
489700637cbSDimitry Andric     return isLarge ? LoongArch::LDX_B : LoongArch::LD_B;
490700637cbSDimitry Andric   case LoongArch::LD_H:
491700637cbSDimitry Andric     return isLarge ? LoongArch::LDX_H : LoongArch::LD_H;
492700637cbSDimitry Andric   case LoongArch::LD_W:
493700637cbSDimitry Andric   case LoongArch::LDPTR_W:
494700637cbSDimitry Andric     return isLarge ? LoongArch::LDX_W : LoongArch::LD_W;
495700637cbSDimitry Andric   case LoongArch::LD_D:
496700637cbSDimitry Andric   case LoongArch::LDPTR_D:
497700637cbSDimitry Andric     return isLarge ? LoongArch::LDX_D : LoongArch::LD_D;
498700637cbSDimitry Andric   case LoongArch::LD_BU:
499700637cbSDimitry Andric     return isLarge ? LoongArch::LDX_BU : LoongArch::LD_BU;
500700637cbSDimitry Andric   case LoongArch::LD_HU:
501700637cbSDimitry Andric     return isLarge ? LoongArch::LDX_HU : LoongArch::LD_HU;
502700637cbSDimitry Andric   case LoongArch::LD_WU:
503700637cbSDimitry Andric     return isLarge ? LoongArch::LDX_WU : LoongArch::LD_WU;
504700637cbSDimitry Andric   case LoongArch::FLD_S:
505700637cbSDimitry Andric     return isLarge ? LoongArch::FLDX_S : LoongArch::FLD_S;
506700637cbSDimitry Andric   case LoongArch::FLD_D:
507700637cbSDimitry Andric     return isLarge ? LoongArch::FLDX_D : LoongArch::FLD_D;
508700637cbSDimitry Andric   case LoongArch::VLD:
509700637cbSDimitry Andric     return isLarge ? LoongArch::VLDX : LoongArch::VLD;
510700637cbSDimitry Andric   case LoongArch::XVLD:
511700637cbSDimitry Andric     return isLarge ? LoongArch::XVLDX : LoongArch::XVLD;
512700637cbSDimitry Andric   case LoongArch::VLDREPL_B:
513700637cbSDimitry Andric     return LoongArch::VLDREPL_B;
514700637cbSDimitry Andric   case LoongArch::XVLDREPL_B:
515700637cbSDimitry Andric     return LoongArch::XVLDREPL_B;
516700637cbSDimitry Andric   case LoongArch::ST_B:
517700637cbSDimitry Andric     return isLarge ? LoongArch::STX_B : LoongArch::ST_B;
518700637cbSDimitry Andric   case LoongArch::ST_H:
519700637cbSDimitry Andric     return isLarge ? LoongArch::STX_H : LoongArch::ST_H;
520700637cbSDimitry Andric   case LoongArch::ST_W:
521700637cbSDimitry Andric   case LoongArch::STPTR_W:
522700637cbSDimitry Andric     return isLarge ? LoongArch::STX_W : LoongArch::ST_W;
523700637cbSDimitry Andric   case LoongArch::ST_D:
524700637cbSDimitry Andric   case LoongArch::STPTR_D:
525700637cbSDimitry Andric     return isLarge ? LoongArch::STX_D : LoongArch::ST_D;
526700637cbSDimitry Andric   case LoongArch::FST_S:
527700637cbSDimitry Andric     return isLarge ? LoongArch::FSTX_S : LoongArch::FST_S;
528700637cbSDimitry Andric   case LoongArch::FST_D:
529700637cbSDimitry Andric     return isLarge ? LoongArch::FSTX_D : LoongArch::FST_D;
530700637cbSDimitry Andric   case LoongArch::VST:
531700637cbSDimitry Andric     return isLarge ? LoongArch::VSTX : LoongArch::VST;
532700637cbSDimitry Andric   case LoongArch::XVST:
533700637cbSDimitry Andric     return isLarge ? LoongArch::XVSTX : LoongArch::XVST;
534700637cbSDimitry Andric   default:
535700637cbSDimitry Andric     llvm_unreachable("Unexpected opcode for replacement");
536700637cbSDimitry Andric   }
537700637cbSDimitry Andric }
538700637cbSDimitry Andric 
foldIntoMemoryOps(MachineInstr & Hi20,MachineInstr & Lo12,MachineInstr * & Lo20,MachineInstr * & Hi12,MachineInstr * & Last)539700637cbSDimitry Andric bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
540700637cbSDimitry Andric                                                     MachineInstr &Lo12,
541700637cbSDimitry Andric                                                     MachineInstr *&Lo20,
542700637cbSDimitry Andric                                                     MachineInstr *&Hi12,
543700637cbSDimitry Andric                                                     MachineInstr *&Last) {
544700637cbSDimitry Andric   Register DestReg =
545700637cbSDimitry Andric       Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
546700637cbSDimitry Andric 
547700637cbSDimitry Andric   // If all the uses are memory ops with the same offset, we can transform:
548700637cbSDimitry Andric   //
549700637cbSDimitry Andric   // 1. (small/medium):
550700637cbSDimitry Andric   //  1.1. pcala
551700637cbSDimitry Andric   //   pcalau12i vreg1, %pc_hi20(s)
552700637cbSDimitry Andric   //   addi.d    vreg2, vreg1, %pc_lo12(s)
553700637cbSDimitry Andric   //   ld.w      vreg3, 8(vreg2)
554700637cbSDimitry Andric   //
555700637cbSDimitry Andric   //   =>
556700637cbSDimitry Andric   //
557700637cbSDimitry Andric   //   pcalau12i vreg1, %pc_hi20(s+8)
558700637cbSDimitry Andric   //   ld.w      vreg3, vreg1, %pc_lo12(s+8)(vreg1)
559700637cbSDimitry Andric   //
560700637cbSDimitry Andric   //  1.2. tls-le
561700637cbSDimitry Andric   //   lu12i.w  vreg1, %le_hi20_r(s)
562700637cbSDimitry Andric   //   add.w/d  vreg2, vreg1, r2, %le_add_r(s)
563700637cbSDimitry Andric   //   addi.w/d vreg3, vreg2, %le_lo12_r(s)
564700637cbSDimitry Andric   //   ld.w     vreg4, 8(vreg3)
565700637cbSDimitry Andric   //
566700637cbSDimitry Andric   //   =>
567700637cbSDimitry Andric   //
568700637cbSDimitry Andric   //   lu12i.w vreg1, %le_hi20_r(s+8)
569700637cbSDimitry Andric   //   add.w/d vreg2, vreg1, r2, %le_add_r(s+8)
570700637cbSDimitry Andric   //   ld.w    vreg4, vreg2, %le_lo12_r(s+8)(vreg2)
571700637cbSDimitry Andric   //
572700637cbSDimitry Andric   // 2. (large):
573700637cbSDimitry Andric   //   pcalau12i vreg1, %pc_hi20(s)
574700637cbSDimitry Andric   //   addi.d    vreg2, $zero, %pc_lo12(s)
575700637cbSDimitry Andric   //   lu32i.d   vreg3, vreg2, %pc64_lo20(s)
576700637cbSDimitry Andric   //   lu52i.d   vreg4, vreg3, %pc64_hi12(s)
577700637cbSDimitry Andric   //   add.d     vreg5, vreg4, vreg1
578700637cbSDimitry Andric   //   ld.w      vreg6, 8(vreg5)
579700637cbSDimitry Andric   //
580700637cbSDimitry Andric   //   =>
581700637cbSDimitry Andric   //
582700637cbSDimitry Andric   //   pcalau12i vreg1, %pc_hi20(s+8)
583700637cbSDimitry Andric   //   addi.d    vreg2, $zero, %pc_lo12(s+8)
584700637cbSDimitry Andric   //   lu32i.d   vreg3, vreg2, %pc64_lo20(s+8)
585700637cbSDimitry Andric   //   lu52i.d   vreg4, vreg3, %pc64_hi12(s+8)
586700637cbSDimitry Andric   //   ldx.w     vreg6, vreg4, vreg1
587700637cbSDimitry Andric 
588700637cbSDimitry Andric   std::optional<int64_t> CommonOffset;
589700637cbSDimitry Andric   DenseMap<const MachineInstr *, SmallVector<unsigned>>
590700637cbSDimitry Andric       InlineAsmMemoryOpIndexesMap;
591700637cbSDimitry Andric   for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
592700637cbSDimitry Andric     switch (UseMI.getOpcode()) {
593700637cbSDimitry Andric     default:
594700637cbSDimitry Andric       LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
595700637cbSDimitry Andric       return false;
596700637cbSDimitry Andric     case LoongArch::VLDREPL_B:
597700637cbSDimitry Andric     case LoongArch::XVLDREPL_B:
598700637cbSDimitry Andric       // We can't do this for large pattern.
599700637cbSDimitry Andric       if (Last)
600700637cbSDimitry Andric         return false;
601700637cbSDimitry Andric       [[fallthrough]];
602700637cbSDimitry Andric     case LoongArch::LD_B:
603700637cbSDimitry Andric     case LoongArch::LD_H:
604700637cbSDimitry Andric     case LoongArch::LD_W:
605700637cbSDimitry Andric     case LoongArch::LD_D:
606700637cbSDimitry Andric     case LoongArch::LD_BU:
607700637cbSDimitry Andric     case LoongArch::LD_HU:
608700637cbSDimitry Andric     case LoongArch::LD_WU:
609700637cbSDimitry Andric     case LoongArch::LDPTR_W:
610700637cbSDimitry Andric     case LoongArch::LDPTR_D:
611700637cbSDimitry Andric     case LoongArch::FLD_S:
612700637cbSDimitry Andric     case LoongArch::FLD_D:
613700637cbSDimitry Andric     case LoongArch::VLD:
614700637cbSDimitry Andric     case LoongArch::XVLD:
615700637cbSDimitry Andric     case LoongArch::ST_B:
616700637cbSDimitry Andric     case LoongArch::ST_H:
617700637cbSDimitry Andric     case LoongArch::ST_W:
618700637cbSDimitry Andric     case LoongArch::ST_D:
619700637cbSDimitry Andric     case LoongArch::STPTR_W:
620700637cbSDimitry Andric     case LoongArch::STPTR_D:
621700637cbSDimitry Andric     case LoongArch::FST_S:
622700637cbSDimitry Andric     case LoongArch::FST_D:
623700637cbSDimitry Andric     case LoongArch::VST:
624700637cbSDimitry Andric     case LoongArch::XVST: {
625700637cbSDimitry Andric       if (UseMI.getOperand(1).isFI())
626700637cbSDimitry Andric         return false;
627700637cbSDimitry Andric       // Register defined by Lo should not be the value register.
628700637cbSDimitry Andric       if (DestReg == UseMI.getOperand(0).getReg())
629700637cbSDimitry Andric         return false;
630700637cbSDimitry Andric       assert(DestReg == UseMI.getOperand(1).getReg() &&
631700637cbSDimitry Andric              "Expected base address use");
632700637cbSDimitry Andric       // All load/store instructions must use the same offset.
633700637cbSDimitry Andric       int64_t Offset = UseMI.getOperand(2).getImm();
634700637cbSDimitry Andric       if (CommonOffset && Offset != CommonOffset)
635700637cbSDimitry Andric         return false;
636700637cbSDimitry Andric       CommonOffset = Offset;
637700637cbSDimitry Andric       break;
638700637cbSDimitry Andric     }
639700637cbSDimitry Andric     case LoongArch::INLINEASM:
640700637cbSDimitry Andric     case LoongArch::INLINEASM_BR: {
641700637cbSDimitry Andric       // We can't do this for large pattern.
642700637cbSDimitry Andric       if (Last)
643700637cbSDimitry Andric         return false;
644700637cbSDimitry Andric       SmallVector<unsigned> InlineAsmMemoryOpIndexes;
645700637cbSDimitry Andric       unsigned NumOps = 0;
646700637cbSDimitry Andric       for (unsigned I = InlineAsm::MIOp_FirstOperand;
647700637cbSDimitry Andric            I < UseMI.getNumOperands(); I += 1 + NumOps) {
648700637cbSDimitry Andric         const MachineOperand &FlagsMO = UseMI.getOperand(I);
649700637cbSDimitry Andric         // Should be an imm.
650700637cbSDimitry Andric         if (!FlagsMO.isImm())
651700637cbSDimitry Andric           continue;
652700637cbSDimitry Andric 
653700637cbSDimitry Andric         const InlineAsm::Flag Flags(FlagsMO.getImm());
654700637cbSDimitry Andric         NumOps = Flags.getNumOperandRegisters();
655700637cbSDimitry Andric 
656700637cbSDimitry Andric         // Memory constraints have two operands.
657700637cbSDimitry Andric         if (NumOps != 2 || !Flags.isMemKind()) {
658700637cbSDimitry Andric           // If the register is used by something other than a memory contraint,
659700637cbSDimitry Andric           // we should not fold.
660700637cbSDimitry Andric           for (unsigned J = 0; J < NumOps; ++J) {
661700637cbSDimitry Andric             const MachineOperand &MO = UseMI.getOperand(I + 1 + J);
662700637cbSDimitry Andric             if (MO.isReg() && MO.getReg() == DestReg)
663700637cbSDimitry Andric               return false;
664700637cbSDimitry Andric           }
665700637cbSDimitry Andric           continue;
666700637cbSDimitry Andric         }
667700637cbSDimitry Andric 
668700637cbSDimitry Andric         // We can only do this for constraint m.
669700637cbSDimitry Andric         if (Flags.getMemoryConstraintID() != InlineAsm::ConstraintCode::m)
670700637cbSDimitry Andric           return false;
671700637cbSDimitry Andric 
672700637cbSDimitry Andric         const MachineOperand &AddrMO = UseMI.getOperand(I + 1);
673700637cbSDimitry Andric         if (!AddrMO.isReg() || AddrMO.getReg() != DestReg)
674700637cbSDimitry Andric           continue;
675700637cbSDimitry Andric 
676700637cbSDimitry Andric         const MachineOperand &OffsetMO = UseMI.getOperand(I + 2);
677700637cbSDimitry Andric         if (!OffsetMO.isImm())
678700637cbSDimitry Andric           continue;
679700637cbSDimitry Andric 
680700637cbSDimitry Andric         // All inline asm memory operands must use the same offset.
681700637cbSDimitry Andric         int64_t Offset = OffsetMO.getImm();
682700637cbSDimitry Andric         if (CommonOffset && Offset != CommonOffset)
683700637cbSDimitry Andric           return false;
684700637cbSDimitry Andric         CommonOffset = Offset;
685700637cbSDimitry Andric         InlineAsmMemoryOpIndexes.push_back(I + 1);
686700637cbSDimitry Andric       }
687700637cbSDimitry Andric       InlineAsmMemoryOpIndexesMap.insert(
688700637cbSDimitry Andric           std::make_pair(&UseMI, InlineAsmMemoryOpIndexes));
689700637cbSDimitry Andric       break;
690700637cbSDimitry Andric     }
691700637cbSDimitry Andric     }
692700637cbSDimitry Andric   }
693700637cbSDimitry Andric 
694700637cbSDimitry Andric   // We found a common offset.
695700637cbSDimitry Andric   // Update the offsets in global address lowering.
696700637cbSDimitry Andric   // We may have already folded some arithmetic so we need to add to any
697700637cbSDimitry Andric   // existing offset.
698700637cbSDimitry Andric   int64_t NewOffset = Hi20.getOperand(1).getOffset() + *CommonOffset;
699700637cbSDimitry Andric   // LA32 ignores the upper 32 bits.
700700637cbSDimitry Andric   if (!ST->is64Bit())
701700637cbSDimitry Andric     NewOffset = SignExtend64<32>(NewOffset);
702700637cbSDimitry Andric   // We can only fold simm32 offsets.
703700637cbSDimitry Andric   if (!isInt<32>(NewOffset))
704700637cbSDimitry Andric     return false;
705700637cbSDimitry Andric 
706700637cbSDimitry Andric   // If optimized by this pass successfully, MO_RELAX bitmask target-flag should
707700637cbSDimitry Andric   // be removed from the pcala code sequence. Code sequence of tls-le can still
708700637cbSDimitry Andric   // be relaxed after being optimized.
709700637cbSDimitry Andric   //
710700637cbSDimitry Andric   // For example:
711700637cbSDimitry Andric   //   pcalau12i $a0, %pc_hi20(symbol)
712700637cbSDimitry Andric   //   addi.d $a0, $a0, %pc_lo12(symbol)
713700637cbSDimitry Andric   //   ld.w $a0, $a0, 0
714700637cbSDimitry Andric   //
715700637cbSDimitry Andric   //   =>
716700637cbSDimitry Andric   //
717700637cbSDimitry Andric   //   pcalau12i $a0, %pc_hi20(symbol)
718700637cbSDimitry Andric   //   ld.w $a0, $a0, %pc_lo12(symbol)
719700637cbSDimitry Andric   //
720700637cbSDimitry Andric   // Code sequence optimized before can be relax by linker. But after being
721700637cbSDimitry Andric   // optimized, it cannot be relaxed any more. So MO_RELAX flag should not be
722700637cbSDimitry Andric   // carried by them.
723700637cbSDimitry Andric   Hi20.getOperand(1).setOffset(NewOffset);
724700637cbSDimitry Andric   MachineOperand &ImmOp = Lo12.getOperand(2);
725700637cbSDimitry Andric   ImmOp.setOffset(NewOffset);
726700637cbSDimitry Andric   if (Lo20 && Hi12) {
727700637cbSDimitry Andric     Lo20->getOperand(2).setOffset(NewOffset);
728700637cbSDimitry Andric     Hi12->getOperand(2).setOffset(NewOffset);
729700637cbSDimitry Andric   }
730700637cbSDimitry Andric   if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
731700637cbSDimitry Andric     Hi20.getOperand(1).setTargetFlags(
732700637cbSDimitry Andric         LoongArchII::getDirectFlags(Hi20.getOperand(1)));
733700637cbSDimitry Andric     ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp));
734700637cbSDimitry Andric   } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
735700637cbSDimitry Andric     MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
736700637cbSDimitry Andric     Add->getOperand(3).setOffset(NewOffset);
737700637cbSDimitry Andric   }
738700637cbSDimitry Andric 
739700637cbSDimitry Andric   // Update the immediate in the load/store instructions to add the offset.
740700637cbSDimitry Andric   const LoongArchInstrInfo &TII = *ST->getInstrInfo();
741700637cbSDimitry Andric   for (MachineInstr &UseMI :
742700637cbSDimitry Andric        llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
743700637cbSDimitry Andric     if (UseMI.getOpcode() == LoongArch::INLINEASM ||
744700637cbSDimitry Andric         UseMI.getOpcode() == LoongArch::INLINEASM_BR) {
745700637cbSDimitry Andric       auto &InlineAsmMemoryOpIndexes = InlineAsmMemoryOpIndexesMap[&UseMI];
746700637cbSDimitry Andric       for (unsigned I : InlineAsmMemoryOpIndexes) {
747700637cbSDimitry Andric         MachineOperand &MO = UseMI.getOperand(I + 1);
748700637cbSDimitry Andric         switch (ImmOp.getType()) {
749700637cbSDimitry Andric         case MachineOperand::MO_GlobalAddress:
750700637cbSDimitry Andric           MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(),
751700637cbSDimitry Andric                         LoongArchII::getDirectFlags(ImmOp));
752700637cbSDimitry Andric           break;
753700637cbSDimitry Andric         case MachineOperand::MO_MCSymbol:
754700637cbSDimitry Andric           MO.ChangeToMCSymbol(ImmOp.getMCSymbol(),
755700637cbSDimitry Andric                               LoongArchII::getDirectFlags(ImmOp));
756700637cbSDimitry Andric           MO.setOffset(ImmOp.getOffset());
757700637cbSDimitry Andric           break;
758700637cbSDimitry Andric         case MachineOperand::MO_BlockAddress:
759700637cbSDimitry Andric           MO.ChangeToBA(ImmOp.getBlockAddress(), ImmOp.getOffset(),
760700637cbSDimitry Andric                         LoongArchII::getDirectFlags(ImmOp));
761700637cbSDimitry Andric           break;
762*e64bea71SDimitry Andric         case MachineOperand::MO_ConstantPoolIndex:
763*e64bea71SDimitry Andric           MO.ChangeToCPI(ImmOp.getIndex(), ImmOp.getOffset(),
764*e64bea71SDimitry Andric                          LoongArchII::getDirectFlags(ImmOp));
765*e64bea71SDimitry Andric           break;
766700637cbSDimitry Andric         default:
767700637cbSDimitry Andric           report_fatal_error("unsupported machine operand type");
768700637cbSDimitry Andric           break;
769700637cbSDimitry Andric         }
770700637cbSDimitry Andric       }
771700637cbSDimitry Andric     } else {
772700637cbSDimitry Andric       UseMI.setDesc(TII.get(getNewOpc(UseMI.getOpcode(), Last)));
773700637cbSDimitry Andric       if (Last) {
774700637cbSDimitry Andric         UseMI.removeOperand(2);
775700637cbSDimitry Andric         UseMI.removeOperand(1);
776700637cbSDimitry Andric         UseMI.addOperand(Last->getOperand(1));
777700637cbSDimitry Andric         UseMI.addOperand(Last->getOperand(2));
778700637cbSDimitry Andric         UseMI.getOperand(1).setIsKill(false);
779700637cbSDimitry Andric         UseMI.getOperand(2).setIsKill(false);
780700637cbSDimitry Andric       } else {
781700637cbSDimitry Andric         UseMI.removeOperand(2);
782700637cbSDimitry Andric         UseMI.addOperand(ImmOp);
783700637cbSDimitry Andric       }
784700637cbSDimitry Andric     }
785700637cbSDimitry Andric   }
786700637cbSDimitry Andric 
787700637cbSDimitry Andric   if (Last) {
788700637cbSDimitry Andric     Last->eraseFromParent();
789700637cbSDimitry Andric     return true;
790700637cbSDimitry Andric   }
791700637cbSDimitry Andric 
792700637cbSDimitry Andric   if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
793700637cbSDimitry Andric     MRI->replaceRegWith(Lo12.getOperand(0).getReg(),
794700637cbSDimitry Andric                         Hi20.getOperand(0).getReg());
795700637cbSDimitry Andric   } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
796700637cbSDimitry Andric     MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
797700637cbSDimitry Andric     MRI->replaceRegWith(Lo12.getOperand(0).getReg(),
798700637cbSDimitry Andric                         Add->getOperand(0).getReg());
799700637cbSDimitry Andric   }
800700637cbSDimitry Andric   Lo12.eraseFromParent();
801700637cbSDimitry Andric   return true;
802700637cbSDimitry Andric }
803700637cbSDimitry Andric 
runOnMachineFunction(MachineFunction & Fn)804700637cbSDimitry Andric bool LoongArchMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
805700637cbSDimitry Andric   if (skipFunction(Fn.getFunction()))
806700637cbSDimitry Andric     return false;
807700637cbSDimitry Andric 
808700637cbSDimitry Andric   ST = &Fn.getSubtarget<LoongArchSubtarget>();
809700637cbSDimitry Andric 
810700637cbSDimitry Andric   bool MadeChange = false;
811700637cbSDimitry Andric   MRI = &Fn.getRegInfo();
812700637cbSDimitry Andric   for (MachineBasicBlock &MBB : Fn) {
813700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
814700637cbSDimitry Andric     for (MachineInstr &Hi20 : MBB) {
815700637cbSDimitry Andric       MachineInstr *Lo12 = nullptr;
816700637cbSDimitry Andric       MachineInstr *Lo20 = nullptr;
817700637cbSDimitry Andric       MachineInstr *Hi12 = nullptr;
818700637cbSDimitry Andric       MachineInstr *Last = nullptr;
819700637cbSDimitry Andric       if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
820700637cbSDimitry Andric         // Detect foldable pcala code sequence in small/medium/large code model.
821700637cbSDimitry Andric         if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last))
822700637cbSDimitry Andric           continue;
823700637cbSDimitry Andric       } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
824700637cbSDimitry Andric         MachineInstr *Add = nullptr;
825700637cbSDimitry Andric         // Detect foldable tls-le code sequence in small/medium code model.
826700637cbSDimitry Andric         if (!detectFoldable(Hi20, Add, Lo12))
827700637cbSDimitry Andric           continue;
828700637cbSDimitry Andric       } else {
829700637cbSDimitry Andric         continue;
830700637cbSDimitry Andric       }
831700637cbSDimitry Andric       // For tls-le, we do not pass the second PseudoAddTPRel instr in order to
832700637cbSDimitry Andric       // reuse the existing hooks and the last three paramaters should always be
833700637cbSDimitry Andric       // nullptr.
834700637cbSDimitry Andric       MadeChange |= detectAndFoldOffset(Hi20, *Lo12, Lo20, Hi12, Last);
835700637cbSDimitry Andric       MadeChange |= foldIntoMemoryOps(Hi20, *Lo12, Lo20, Hi12, Last);
836700637cbSDimitry Andric     }
837700637cbSDimitry Andric   }
838700637cbSDimitry Andric 
839700637cbSDimitry Andric   return MadeChange;
840700637cbSDimitry Andric }
841700637cbSDimitry Andric 
842700637cbSDimitry Andric /// Returns an instance of the Merge Base Offset Optimization pass.
createLoongArchMergeBaseOffsetOptPass()843700637cbSDimitry Andric FunctionPass *llvm::createLoongArchMergeBaseOffsetOptPass() {
844700637cbSDimitry Andric   return new LoongArchMergeBaseOffsetOpt();
845700637cbSDimitry Andric }
846