1700637cbSDimitry Andric //===---- LoongArchMergeBaseOffset.cpp - Optimise address calculations ----===//
2700637cbSDimitry Andric //
3700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6700637cbSDimitry Andric //
7700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8700637cbSDimitry Andric //
9700637cbSDimitry Andric // Merge the offset of address calculation into the offset field
10700637cbSDimitry Andric // of instructions in a global address lowering sequence.
11700637cbSDimitry Andric //
12700637cbSDimitry Andric //===----------------------------------------------------------------------===//
13700637cbSDimitry Andric
14700637cbSDimitry Andric #include "LoongArch.h"
15700637cbSDimitry Andric #include "LoongArchTargetMachine.h"
16700637cbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
17700637cbSDimitry Andric #include "llvm/CodeGen/Passes.h"
18700637cbSDimitry Andric #include "llvm/MC/TargetRegistry.h"
19700637cbSDimitry Andric #include "llvm/Support/Debug.h"
20700637cbSDimitry Andric #include "llvm/Target/TargetOptions.h"
21700637cbSDimitry Andric #include <optional>
22700637cbSDimitry Andric
23700637cbSDimitry Andric using namespace llvm;
24700637cbSDimitry Andric
25700637cbSDimitry Andric #define DEBUG_TYPE "loongarch-merge-base-offset"
26700637cbSDimitry Andric #define LoongArch_MERGE_BASE_OFFSET_NAME "LoongArch Merge Base Offset"
27700637cbSDimitry Andric
28700637cbSDimitry Andric namespace {
29700637cbSDimitry Andric
30700637cbSDimitry Andric class LoongArchMergeBaseOffsetOpt : public MachineFunctionPass {
31700637cbSDimitry Andric const LoongArchSubtarget *ST = nullptr;
32700637cbSDimitry Andric MachineRegisterInfo *MRI;
33700637cbSDimitry Andric
34700637cbSDimitry Andric public:
35700637cbSDimitry Andric static char ID;
36700637cbSDimitry Andric bool runOnMachineFunction(MachineFunction &Fn) override;
37700637cbSDimitry Andric bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Lo12,
38700637cbSDimitry Andric MachineInstr *&Lo20, MachineInstr *&Hi12,
39700637cbSDimitry Andric MachineInstr *&Last);
40700637cbSDimitry Andric bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Add,
41700637cbSDimitry Andric MachineInstr *&Lo12);
42700637cbSDimitry Andric
43700637cbSDimitry Andric bool detectAndFoldOffset(MachineInstr &Hi20, MachineInstr &Lo12,
44700637cbSDimitry Andric MachineInstr *&Lo20, MachineInstr *&Hi12,
45700637cbSDimitry Andric MachineInstr *&Last);
46700637cbSDimitry Andric void foldOffset(MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
47700637cbSDimitry Andric MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
48700637cbSDimitry Andric int64_t Offset);
49700637cbSDimitry Andric bool foldLargeOffset(MachineInstr &Hi20, MachineInstr &Lo12,
50700637cbSDimitry Andric MachineInstr *&Lo20, MachineInstr *&Hi12,
51700637cbSDimitry Andric MachineInstr *&Last, MachineInstr &TailAdd,
52700637cbSDimitry Andric Register GAReg);
53700637cbSDimitry Andric
54700637cbSDimitry Andric bool foldIntoMemoryOps(MachineInstr &Hi20, MachineInstr &Lo12,
55700637cbSDimitry Andric MachineInstr *&Lo20, MachineInstr *&Hi12,
56700637cbSDimitry Andric MachineInstr *&Last);
57700637cbSDimitry Andric
LoongArchMergeBaseOffsetOpt()58700637cbSDimitry Andric LoongArchMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
59700637cbSDimitry Andric
getRequiredProperties() const60700637cbSDimitry Andric MachineFunctionProperties getRequiredProperties() const override {
61700637cbSDimitry Andric return MachineFunctionProperties().setIsSSA();
62700637cbSDimitry Andric }
63700637cbSDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const64700637cbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
65700637cbSDimitry Andric AU.setPreservesCFG();
66700637cbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
67700637cbSDimitry Andric }
68700637cbSDimitry Andric
getPassName() const69700637cbSDimitry Andric StringRef getPassName() const override {
70700637cbSDimitry Andric return LoongArch_MERGE_BASE_OFFSET_NAME;
71700637cbSDimitry Andric }
72700637cbSDimitry Andric };
73700637cbSDimitry Andric } // end anonymous namespace
74700637cbSDimitry Andric
75700637cbSDimitry Andric char LoongArchMergeBaseOffsetOpt::ID = 0;
INITIALIZE_PASS(LoongArchMergeBaseOffsetOpt,DEBUG_TYPE,LoongArch_MERGE_BASE_OFFSET_NAME,false,false)76700637cbSDimitry Andric INITIALIZE_PASS(LoongArchMergeBaseOffsetOpt, DEBUG_TYPE,
77700637cbSDimitry Andric LoongArch_MERGE_BASE_OFFSET_NAME, false, false)
78700637cbSDimitry Andric
79700637cbSDimitry Andric // Detect either of the patterns:
80700637cbSDimitry Andric //
81700637cbSDimitry Andric // 1. (small/medium):
82700637cbSDimitry Andric // pcalau12i vreg1, %pc_hi20(s)
83700637cbSDimitry Andric // addi.d vreg2, vreg1, %pc_lo12(s)
84700637cbSDimitry Andric //
85700637cbSDimitry Andric // 2. (large):
86700637cbSDimitry Andric // pcalau12i vreg1, %pc_hi20(s)
87700637cbSDimitry Andric // addi.d vreg2, $zero, %pc_lo12(s)
88700637cbSDimitry Andric // lu32i.d vreg3, vreg2, %pc64_lo20(s)
89700637cbSDimitry Andric // lu52i.d vreg4, vreg3, %pc64_hi12(s)
90700637cbSDimitry Andric // add.d vreg5, vreg4, vreg1
91700637cbSDimitry Andric
92700637cbSDimitry Andric // The pattern is only accepted if:
93700637cbSDimitry Andric // 1) For small and medium pattern, the first instruction has only one use,
94700637cbSDimitry Andric // which is the ADDI.
95700637cbSDimitry Andric // 2) For large pattern, the first four instructions each have only one use,
96700637cbSDimitry Andric // and the user of the fourth instruction is ADD.
97700637cbSDimitry Andric // 3) The address operands have the appropriate type, reflecting the
98700637cbSDimitry Andric // lowering of a global address or constant pool using the pattern.
99700637cbSDimitry Andric // 4) The offset value in the Global Address or Constant Pool is 0.
100700637cbSDimitry Andric bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
101700637cbSDimitry Andric MachineInstr *&Lo12,
102700637cbSDimitry Andric MachineInstr *&Lo20,
103700637cbSDimitry Andric MachineInstr *&Hi12,
104700637cbSDimitry Andric MachineInstr *&Last) {
105700637cbSDimitry Andric if (Hi20.getOpcode() != LoongArch::PCALAU12I)
106700637cbSDimitry Andric return false;
107700637cbSDimitry Andric
108700637cbSDimitry Andric const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
109700637cbSDimitry Andric if (LoongArchII::getDirectFlags(Hi20Op1) != LoongArchII::MO_PCREL_HI)
110700637cbSDimitry Andric return false;
111700637cbSDimitry Andric
112700637cbSDimitry Andric auto isGlobalOrCPIOrBlockAddress = [](const MachineOperand &Op) {
113700637cbSDimitry Andric return Op.isGlobal() || Op.isCPI() || Op.isBlockAddress();
114700637cbSDimitry Andric };
115700637cbSDimitry Andric
116700637cbSDimitry Andric if (!isGlobalOrCPIOrBlockAddress(Hi20Op1) || Hi20Op1.getOffset() != 0)
117700637cbSDimitry Andric return false;
118700637cbSDimitry Andric
119700637cbSDimitry Andric Register HiDestReg = Hi20.getOperand(0).getReg();
120700637cbSDimitry Andric if (!MRI->hasOneUse(HiDestReg))
121700637cbSDimitry Andric return false;
122700637cbSDimitry Andric
123700637cbSDimitry Andric MachineInstr *UseInst = &*MRI->use_instr_begin(HiDestReg);
124700637cbSDimitry Andric if (UseInst->getOpcode() != LoongArch::ADD_D) {
125700637cbSDimitry Andric Lo12 = UseInst;
126700637cbSDimitry Andric if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
127700637cbSDimitry Andric (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
128700637cbSDimitry Andric return false;
129700637cbSDimitry Andric } else {
130700637cbSDimitry Andric assert(ST->is64Bit());
131700637cbSDimitry Andric Last = UseInst;
132700637cbSDimitry Andric
133700637cbSDimitry Andric Register LastOp1Reg = Last->getOperand(1).getReg();
134700637cbSDimitry Andric if (!LastOp1Reg.isVirtual())
135700637cbSDimitry Andric return false;
136700637cbSDimitry Andric Hi12 = MRI->getVRegDef(LastOp1Reg);
137700637cbSDimitry Andric const MachineOperand &Hi12Op2 = Hi12->getOperand(2);
138700637cbSDimitry Andric if (Hi12Op2.getTargetFlags() != LoongArchII::MO_PCREL64_HI)
139700637cbSDimitry Andric return false;
140700637cbSDimitry Andric if (!isGlobalOrCPIOrBlockAddress(Hi12Op2) || Hi12Op2.getOffset() != 0)
141700637cbSDimitry Andric return false;
142700637cbSDimitry Andric if (!MRI->hasOneUse(Hi12->getOperand(0).getReg()))
143700637cbSDimitry Andric return false;
144700637cbSDimitry Andric
145700637cbSDimitry Andric Lo20 = MRI->getVRegDef(Hi12->getOperand(1).getReg());
146700637cbSDimitry Andric const MachineOperand &Lo20Op2 = Lo20->getOperand(2);
147700637cbSDimitry Andric if (Lo20Op2.getTargetFlags() != LoongArchII::MO_PCREL64_LO)
148700637cbSDimitry Andric return false;
149700637cbSDimitry Andric if (!isGlobalOrCPIOrBlockAddress(Lo20Op2) || Lo20Op2.getOffset() != 0)
150700637cbSDimitry Andric return false;
151700637cbSDimitry Andric if (!MRI->hasOneUse(Lo20->getOperand(0).getReg()))
152700637cbSDimitry Andric return false;
153700637cbSDimitry Andric
154700637cbSDimitry Andric Lo12 = MRI->getVRegDef(Lo20->getOperand(1).getReg());
155700637cbSDimitry Andric if (!MRI->hasOneUse(Lo12->getOperand(0).getReg()))
156700637cbSDimitry Andric return false;
157700637cbSDimitry Andric }
158700637cbSDimitry Andric
159700637cbSDimitry Andric const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
160700637cbSDimitry Andric assert(Hi20.getOpcode() == LoongArch::PCALAU12I);
161700637cbSDimitry Andric if (LoongArchII::getDirectFlags(Lo12Op2) != LoongArchII::MO_PCREL_LO ||
162700637cbSDimitry Andric !(isGlobalOrCPIOrBlockAddress(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
163700637cbSDimitry Andric Lo12Op2.getOffset() != 0)
164700637cbSDimitry Andric return false;
165700637cbSDimitry Andric
166700637cbSDimitry Andric if (Hi20Op1.isGlobal()) {
167700637cbSDimitry Andric LLVM_DEBUG(dbgs() << " Found lowered global address: "
168700637cbSDimitry Andric << *Hi20Op1.getGlobal() << "\n");
169700637cbSDimitry Andric } else if (Hi20Op1.isBlockAddress()) {
170700637cbSDimitry Andric LLVM_DEBUG(dbgs() << " Found lowered basic address: "
171700637cbSDimitry Andric << *Hi20Op1.getBlockAddress() << "\n");
172700637cbSDimitry Andric } else if (Hi20Op1.isCPI()) {
173700637cbSDimitry Andric LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex()
174700637cbSDimitry Andric << "\n");
175700637cbSDimitry Andric }
176700637cbSDimitry Andric
177700637cbSDimitry Andric return true;
178700637cbSDimitry Andric }
179700637cbSDimitry Andric
180700637cbSDimitry Andric // Detect the pattern:
181700637cbSDimitry Andric //
182700637cbSDimitry Andric // (small/medium):
183700637cbSDimitry Andric // lu12i.w vreg1, %le_hi20_r(s)
184700637cbSDimitry Andric // add.w/d vreg2, vreg1, r2, %le_add_r(s)
185700637cbSDimitry Andric // addi.w/d vreg3, vreg2, %le_lo12_r(s)
186700637cbSDimitry Andric
187700637cbSDimitry Andric // The pattern is only accepted if:
188700637cbSDimitry Andric // 1) The first instruction has only one use, which is the PseudoAddTPRel.
189700637cbSDimitry Andric // The second instruction has only one use, which is the ADDI. The
190700637cbSDimitry Andric // second instruction's last operand is the tp register.
191700637cbSDimitry Andric // 2) The address operands have the appropriate type, reflecting the
192700637cbSDimitry Andric // lowering of a thread_local global address using the pattern.
193700637cbSDimitry Andric // 3) The offset value in the ThreadLocal Global Address is 0.
detectFoldable(MachineInstr & Hi20,MachineInstr * & Add,MachineInstr * & Lo12)194700637cbSDimitry Andric bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
195700637cbSDimitry Andric MachineInstr *&Add,
196700637cbSDimitry Andric MachineInstr *&Lo12) {
197700637cbSDimitry Andric if (Hi20.getOpcode() != LoongArch::LU12I_W)
198700637cbSDimitry Andric return false;
199700637cbSDimitry Andric
200700637cbSDimitry Andric auto isGlobalOrCPI = [](const MachineOperand &Op) {
201700637cbSDimitry Andric return Op.isGlobal() || Op.isCPI();
202700637cbSDimitry Andric };
203700637cbSDimitry Andric
204700637cbSDimitry Andric const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
205700637cbSDimitry Andric if (LoongArchII::getDirectFlags(Hi20Op1) != LoongArchII::MO_LE_HI_R ||
206700637cbSDimitry Andric !isGlobalOrCPI(Hi20Op1) || Hi20Op1.getOffset() != 0)
207700637cbSDimitry Andric return false;
208700637cbSDimitry Andric
209700637cbSDimitry Andric Register HiDestReg = Hi20.getOperand(0).getReg();
210700637cbSDimitry Andric if (!MRI->hasOneUse(HiDestReg))
211700637cbSDimitry Andric return false;
212700637cbSDimitry Andric
213700637cbSDimitry Andric Add = &*MRI->use_instr_begin(HiDestReg);
214700637cbSDimitry Andric if ((ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_D) ||
215700637cbSDimitry Andric (!ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_W))
216700637cbSDimitry Andric return false;
217700637cbSDimitry Andric
218700637cbSDimitry Andric if (Add->getOperand(2).getReg() != LoongArch::R2)
219700637cbSDimitry Andric return false;
220700637cbSDimitry Andric
221700637cbSDimitry Andric const MachineOperand &AddOp3 = Add->getOperand(3);
222700637cbSDimitry Andric if (LoongArchII::getDirectFlags(AddOp3) != LoongArchII::MO_LE_ADD_R ||
223700637cbSDimitry Andric !(isGlobalOrCPI(AddOp3) || AddOp3.isMCSymbol()) ||
224700637cbSDimitry Andric AddOp3.getOffset() != 0)
225700637cbSDimitry Andric return false;
226700637cbSDimitry Andric
227700637cbSDimitry Andric Register AddDestReg = Add->getOperand(0).getReg();
228700637cbSDimitry Andric if (!MRI->hasOneUse(AddDestReg))
229700637cbSDimitry Andric return false;
230700637cbSDimitry Andric
231700637cbSDimitry Andric Lo12 = &*MRI->use_instr_begin(AddDestReg);
232700637cbSDimitry Andric if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
233700637cbSDimitry Andric (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
234700637cbSDimitry Andric return false;
235700637cbSDimitry Andric
236700637cbSDimitry Andric const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
237700637cbSDimitry Andric if (LoongArchII::getDirectFlags(Lo12Op2) != LoongArchII::MO_LE_LO_R ||
238700637cbSDimitry Andric !(isGlobalOrCPI(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
239700637cbSDimitry Andric Lo12Op2.getOffset() != 0)
240700637cbSDimitry Andric return false;
241700637cbSDimitry Andric
242700637cbSDimitry Andric if (Hi20Op1.isGlobal()) {
243700637cbSDimitry Andric LLVM_DEBUG(dbgs() << " Found lowered global address: "
244700637cbSDimitry Andric << *Hi20Op1.getGlobal() << "\n");
245700637cbSDimitry Andric } else if (Hi20Op1.isCPI()) {
246700637cbSDimitry Andric LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex()
247700637cbSDimitry Andric << "\n");
248700637cbSDimitry Andric }
249700637cbSDimitry Andric
250700637cbSDimitry Andric return true;
251700637cbSDimitry Andric }
252700637cbSDimitry Andric
253700637cbSDimitry Andric // Update the offset in Hi20, (Add), Lo12, (Lo20 and Hi12) instructions.
254700637cbSDimitry Andric // Delete the tail instruction and update all the uses to use the
255700637cbSDimitry Andric // output from Last.
foldOffset(MachineInstr & Hi20,MachineInstr & Lo12,MachineInstr * & Lo20,MachineInstr * & Hi12,MachineInstr * & Last,MachineInstr & Tail,int64_t Offset)256700637cbSDimitry Andric void LoongArchMergeBaseOffsetOpt::foldOffset(
257700637cbSDimitry Andric MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
258700637cbSDimitry Andric MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
259700637cbSDimitry Andric int64_t Offset) {
260700637cbSDimitry Andric // Put the offset back in Hi and the Lo
261700637cbSDimitry Andric Hi20.getOperand(1).setOffset(Offset);
262700637cbSDimitry Andric Lo12.getOperand(2).setOffset(Offset);
263700637cbSDimitry Andric if (Lo20 && Hi12) {
264700637cbSDimitry Andric Lo20->getOperand(2).setOffset(Offset);
265700637cbSDimitry Andric Hi12->getOperand(2).setOffset(Offset);
266700637cbSDimitry Andric }
267700637cbSDimitry Andric
268700637cbSDimitry Andric // For tls-le, offset of the second PseudoAddTPRel instr should also be
269700637cbSDimitry Andric // updated.
270700637cbSDimitry Andric MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
271700637cbSDimitry Andric if (Hi20.getOpcode() == LoongArch::LU12I_W)
272700637cbSDimitry Andric Add->getOperand(3).setOffset(Offset);
273700637cbSDimitry Andric
274700637cbSDimitry Andric // Delete the tail instruction.
275700637cbSDimitry Andric MachineInstr *Def = Last ? Last : &Lo12;
276700637cbSDimitry Andric MRI->constrainRegClass(Def->getOperand(0).getReg(),
277700637cbSDimitry Andric MRI->getRegClass(Tail.getOperand(0).getReg()));
278700637cbSDimitry Andric MRI->replaceRegWith(Tail.getOperand(0).getReg(), Def->getOperand(0).getReg());
279700637cbSDimitry Andric Tail.eraseFromParent();
280700637cbSDimitry Andric
281700637cbSDimitry Andric LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
282700637cbSDimitry Andric << " " << Hi20;);
283700637cbSDimitry Andric if (Hi20.getOpcode() == LoongArch::LU12I_W) {
284700637cbSDimitry Andric LLVM_DEBUG(dbgs() << " " << *Add;);
285700637cbSDimitry Andric }
286700637cbSDimitry Andric LLVM_DEBUG(dbgs() << " " << Lo12;);
287700637cbSDimitry Andric if (Lo20 && Hi12) {
288700637cbSDimitry Andric LLVM_DEBUG(dbgs() << " " << *Lo20 << " " << *Hi12;);
289700637cbSDimitry Andric }
290700637cbSDimitry Andric }
291700637cbSDimitry Andric
292700637cbSDimitry Andric // Detect patterns for large offsets that are passed into an ADD instruction.
293700637cbSDimitry Andric // If the pattern is found, updates the offset in Hi20, (Add), Lo12,
294700637cbSDimitry Andric // (Lo20 and Hi12) instructions and deletes TailAdd and the instructions that
295700637cbSDimitry Andric // produced the offset.
296700637cbSDimitry Andric //
297700637cbSDimitry Andric // (The instructions marked with "!" are not necessarily present)
298700637cbSDimitry Andric //
299700637cbSDimitry Andric // Base address lowering is of the form:
300700637cbSDimitry Andric // 1) pcala:
301700637cbSDimitry Andric // Hi20: pcalau12i vreg1, %pc_hi20(s)
302700637cbSDimitry Andric // +--- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
303700637cbSDimitry Andric // | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
304700637cbSDimitry Andric // +--- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
305700637cbSDimitry Andric // |
306700637cbSDimitry Andric // | 2) tls-le:
307700637cbSDimitry Andric // | Hi20: lu12i.w vreg1, %le_hi20_r(s)
308700637cbSDimitry Andric // | Add: add.w/d vreg1, vreg1, r2, %le_add_r(s)
309700637cbSDimitry Andric // +--- Lo12: addi.w/d vreg2, vreg1, %le_lo12_r(s)
310700637cbSDimitry Andric // |
311700637cbSDimitry Andric // | The large offset can be one of the forms:
312700637cbSDimitry Andric // |
313700637cbSDimitry Andric // +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
314700637cbSDimitry Andric // | OffsetHi20: lu12i.w vreg3, 4
315700637cbSDimitry Andric // | OffsetLo12: ori voff, vreg3, 188 ------------------+
316700637cbSDimitry Andric // | |
317700637cbSDimitry Andric // +-> 2) Offset that has non zero bits in Hi20 bits only: |
318700637cbSDimitry Andric // | OffsetHi20: lu12i.w voff, 128 ------------------+
319700637cbSDimitry Andric // | |
320700637cbSDimitry Andric // +-> 3) Offset that has non zero bits in Lo20 bits: |
321700637cbSDimitry Andric // | OffsetHi20: lu12i.w vreg3, 121 ! |
322700637cbSDimitry Andric // | OffsetLo12: ori voff, vreg3, 122 ! |
323700637cbSDimitry Andric // | OffsetLo20: lu32i.d voff, 123 ------------------+
324700637cbSDimitry Andric // +-> 4) Offset that has non zero bits in Hi12 bits: |
325700637cbSDimitry Andric // OffsetHi20: lu12i.w vreg3, 121 ! |
326700637cbSDimitry Andric // OffsetLo12: ori voff, vreg3, 122 ! |
327700637cbSDimitry Andric // OffsetLo20: lu32i.d vreg3, 123 ! |
328700637cbSDimitry Andric // OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
329700637cbSDimitry Andric // |
330700637cbSDimitry Andric // TailAdd: add.d vreg4, vreg2, voff <------------------+
331700637cbSDimitry Andric //
foldLargeOffset(MachineInstr & Hi20,MachineInstr & Lo12,MachineInstr * & Lo20,MachineInstr * & Hi12,MachineInstr * & Last,MachineInstr & TailAdd,Register GAReg)332700637cbSDimitry Andric bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
333700637cbSDimitry Andric MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
334700637cbSDimitry Andric MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
335700637cbSDimitry Andric Register GAReg) {
336700637cbSDimitry Andric assert((TailAdd.getOpcode() == LoongArch::ADD_W ||
337700637cbSDimitry Andric TailAdd.getOpcode() == LoongArch::ADD_D) &&
338700637cbSDimitry Andric "Expected ADD instruction!");
339700637cbSDimitry Andric Register Rs = TailAdd.getOperand(1).getReg();
340700637cbSDimitry Andric Register Rt = TailAdd.getOperand(2).getReg();
341700637cbSDimitry Andric Register Reg = Rs == GAReg ? Rt : Rs;
342700637cbSDimitry Andric SmallVector<MachineInstr *, 4> Instrs;
343700637cbSDimitry Andric int64_t Offset = 0;
344700637cbSDimitry Andric int64_t Mask = -1;
345700637cbSDimitry Andric
346700637cbSDimitry Andric // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
347700637cbSDimitry Andric for (int i = 0; i < 4; i++) {
348700637cbSDimitry Andric // Handle Reg is R0.
349700637cbSDimitry Andric if (Reg == LoongArch::R0)
350700637cbSDimitry Andric break;
351700637cbSDimitry Andric
352700637cbSDimitry Andric // Can't fold if the register has more than one use.
353700637cbSDimitry Andric if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
354700637cbSDimitry Andric return false;
355700637cbSDimitry Andric
356700637cbSDimitry Andric MachineInstr *Curr = MRI->getVRegDef(Reg);
357700637cbSDimitry Andric if (!Curr)
358700637cbSDimitry Andric break;
359700637cbSDimitry Andric
360700637cbSDimitry Andric switch (Curr->getOpcode()) {
361700637cbSDimitry Andric default:
362700637cbSDimitry Andric // Can't fold if the instruction opcode is unexpected.
363700637cbSDimitry Andric return false;
364700637cbSDimitry Andric case LoongArch::ORI: {
365700637cbSDimitry Andric MachineOperand ImmOp = Curr->getOperand(2);
366700637cbSDimitry Andric if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
367700637cbSDimitry Andric return false;
368700637cbSDimitry Andric Offset += ImmOp.getImm();
369700637cbSDimitry Andric Reg = Curr->getOperand(1).getReg();
370700637cbSDimitry Andric Instrs.push_back(Curr);
371700637cbSDimitry Andric break;
372700637cbSDimitry Andric }
373700637cbSDimitry Andric case LoongArch::LU12I_W: {
374700637cbSDimitry Andric MachineOperand ImmOp = Curr->getOperand(1);
375700637cbSDimitry Andric if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
376700637cbSDimitry Andric return false;
377700637cbSDimitry Andric Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask;
378700637cbSDimitry Andric Reg = LoongArch::R0;
379700637cbSDimitry Andric Instrs.push_back(Curr);
380700637cbSDimitry Andric break;
381700637cbSDimitry Andric }
382700637cbSDimitry Andric case LoongArch::LU32I_D: {
383700637cbSDimitry Andric MachineOperand ImmOp = Curr->getOperand(2);
384700637cbSDimitry Andric if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20)
385700637cbSDimitry Andric return false;
386700637cbSDimitry Andric Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask;
387700637cbSDimitry Andric Mask ^= 0x000FFFFF00000000ULL;
388700637cbSDimitry Andric Reg = Curr->getOperand(1).getReg();
389700637cbSDimitry Andric Instrs.push_back(Curr);
390700637cbSDimitry Andric break;
391700637cbSDimitry Andric }
392700637cbSDimitry Andric case LoongArch::LU52I_D: {
393700637cbSDimitry Andric MachineOperand ImmOp = Curr->getOperand(2);
394700637cbSDimitry Andric if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12)
395700637cbSDimitry Andric return false;
396700637cbSDimitry Andric Offset += ImmOp.getImm() << 52;
397700637cbSDimitry Andric Mask ^= 0xFFF0000000000000ULL;
398700637cbSDimitry Andric Reg = Curr->getOperand(1).getReg();
399700637cbSDimitry Andric Instrs.push_back(Curr);
400700637cbSDimitry Andric break;
401700637cbSDimitry Andric }
402700637cbSDimitry Andric }
403700637cbSDimitry Andric }
404700637cbSDimitry Andric
405700637cbSDimitry Andric // Can't fold if the offset is not extracted.
406700637cbSDimitry Andric if (!Offset)
407700637cbSDimitry Andric return false;
408700637cbSDimitry Andric
409700637cbSDimitry Andric foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
410700637cbSDimitry Andric LLVM_DEBUG(dbgs() << " Offset Instrs:\n");
411700637cbSDimitry Andric for (auto I : Instrs) {
412700637cbSDimitry Andric LLVM_DEBUG(dbgs() << " " << *I);
413700637cbSDimitry Andric I->eraseFromParent();
414700637cbSDimitry Andric }
415700637cbSDimitry Andric
416700637cbSDimitry Andric return true;
417700637cbSDimitry Andric }
418700637cbSDimitry Andric
detectAndFoldOffset(MachineInstr & Hi20,MachineInstr & Lo12,MachineInstr * & Lo20,MachineInstr * & Hi12,MachineInstr * & Last)419700637cbSDimitry Andric bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
420700637cbSDimitry Andric MachineInstr &Lo12,
421700637cbSDimitry Andric MachineInstr *&Lo20,
422700637cbSDimitry Andric MachineInstr *&Hi12,
423700637cbSDimitry Andric MachineInstr *&Last) {
424700637cbSDimitry Andric Register DestReg =
425700637cbSDimitry Andric Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
426700637cbSDimitry Andric
427700637cbSDimitry Andric // Look for arithmetic instructions we can get an offset from.
428700637cbSDimitry Andric // We might be able to remove the arithmetic instructions by folding the
429700637cbSDimitry Andric // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I) or
430700637cbSDimitry Andric // LU12I_W+PseudoAddTPRel+ADDI.
431700637cbSDimitry Andric if (!MRI->hasOneUse(DestReg))
432700637cbSDimitry Andric return false;
433700637cbSDimitry Andric
434700637cbSDimitry Andric // DestReg has only one use.
435700637cbSDimitry Andric MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
436700637cbSDimitry Andric switch (Tail.getOpcode()) {
437700637cbSDimitry Andric default:
438700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
439700637cbSDimitry Andric << Tail);
440700637cbSDimitry Andric break;
441700637cbSDimitry Andric case LoongArch::ADDI_W:
442700637cbSDimitry Andric if (ST->is64Bit())
443700637cbSDimitry Andric return false;
444700637cbSDimitry Andric [[fallthrough]];
445700637cbSDimitry Andric case LoongArch::ADDI_D:
446700637cbSDimitry Andric case LoongArch::ADDU16I_D: {
447700637cbSDimitry Andric // Offset is simply an immediate operand.
448700637cbSDimitry Andric int64_t Offset = Tail.getOperand(2).getImm();
449700637cbSDimitry Andric if (Tail.getOpcode() == LoongArch::ADDU16I_D)
450700637cbSDimitry Andric Offset = SignExtend64<32>(Offset << 16);
451700637cbSDimitry Andric
452700637cbSDimitry Andric // We might have two ADDIs in a row.
453700637cbSDimitry Andric Register TailDestReg = Tail.getOperand(0).getReg();
454700637cbSDimitry Andric if (MRI->hasOneUse(TailDestReg)) {
455700637cbSDimitry Andric MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
456700637cbSDimitry Andric if (ST->is64Bit() && TailTail.getOpcode() == LoongArch::ADDI_W)
457700637cbSDimitry Andric return false;
458700637cbSDimitry Andric if (TailTail.getOpcode() == LoongArch::ADDI_W ||
459700637cbSDimitry Andric TailTail.getOpcode() == LoongArch::ADDI_D) {
460700637cbSDimitry Andric Offset += TailTail.getOperand(2).getImm();
461700637cbSDimitry Andric LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
462700637cbSDimitry Andric foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailTail, Offset);
463700637cbSDimitry Andric Tail.eraseFromParent();
464700637cbSDimitry Andric return true;
465700637cbSDimitry Andric }
466700637cbSDimitry Andric }
467700637cbSDimitry Andric
468700637cbSDimitry Andric LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
469700637cbSDimitry Andric foldOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, Offset);
470700637cbSDimitry Andric return true;
471700637cbSDimitry Andric }
472700637cbSDimitry Andric case LoongArch::ADD_W:
473700637cbSDimitry Andric if (ST->is64Bit())
474700637cbSDimitry Andric return false;
475700637cbSDimitry Andric [[fallthrough]];
476700637cbSDimitry Andric case LoongArch::ADD_D:
477700637cbSDimitry Andric // The offset is too large to fit in the immediate field of ADDI.
478700637cbSDimitry Andric return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
479700637cbSDimitry Andric break;
480700637cbSDimitry Andric }
481700637cbSDimitry Andric
482700637cbSDimitry Andric return false;
483700637cbSDimitry Andric }
484700637cbSDimitry Andric
485700637cbSDimitry Andric // Memory access opcode mapping for transforms.
getNewOpc(unsigned Op,bool isLarge)486700637cbSDimitry Andric static unsigned getNewOpc(unsigned Op, bool isLarge) {
487700637cbSDimitry Andric switch (Op) {
488700637cbSDimitry Andric case LoongArch::LD_B:
489700637cbSDimitry Andric return isLarge ? LoongArch::LDX_B : LoongArch::LD_B;
490700637cbSDimitry Andric case LoongArch::LD_H:
491700637cbSDimitry Andric return isLarge ? LoongArch::LDX_H : LoongArch::LD_H;
492700637cbSDimitry Andric case LoongArch::LD_W:
493700637cbSDimitry Andric case LoongArch::LDPTR_W:
494700637cbSDimitry Andric return isLarge ? LoongArch::LDX_W : LoongArch::LD_W;
495700637cbSDimitry Andric case LoongArch::LD_D:
496700637cbSDimitry Andric case LoongArch::LDPTR_D:
497700637cbSDimitry Andric return isLarge ? LoongArch::LDX_D : LoongArch::LD_D;
498700637cbSDimitry Andric case LoongArch::LD_BU:
499700637cbSDimitry Andric return isLarge ? LoongArch::LDX_BU : LoongArch::LD_BU;
500700637cbSDimitry Andric case LoongArch::LD_HU:
501700637cbSDimitry Andric return isLarge ? LoongArch::LDX_HU : LoongArch::LD_HU;
502700637cbSDimitry Andric case LoongArch::LD_WU:
503700637cbSDimitry Andric return isLarge ? LoongArch::LDX_WU : LoongArch::LD_WU;
504700637cbSDimitry Andric case LoongArch::FLD_S:
505700637cbSDimitry Andric return isLarge ? LoongArch::FLDX_S : LoongArch::FLD_S;
506700637cbSDimitry Andric case LoongArch::FLD_D:
507700637cbSDimitry Andric return isLarge ? LoongArch::FLDX_D : LoongArch::FLD_D;
508700637cbSDimitry Andric case LoongArch::VLD:
509700637cbSDimitry Andric return isLarge ? LoongArch::VLDX : LoongArch::VLD;
510700637cbSDimitry Andric case LoongArch::XVLD:
511700637cbSDimitry Andric return isLarge ? LoongArch::XVLDX : LoongArch::XVLD;
512700637cbSDimitry Andric case LoongArch::VLDREPL_B:
513700637cbSDimitry Andric return LoongArch::VLDREPL_B;
514700637cbSDimitry Andric case LoongArch::XVLDREPL_B:
515700637cbSDimitry Andric return LoongArch::XVLDREPL_B;
516700637cbSDimitry Andric case LoongArch::ST_B:
517700637cbSDimitry Andric return isLarge ? LoongArch::STX_B : LoongArch::ST_B;
518700637cbSDimitry Andric case LoongArch::ST_H:
519700637cbSDimitry Andric return isLarge ? LoongArch::STX_H : LoongArch::ST_H;
520700637cbSDimitry Andric case LoongArch::ST_W:
521700637cbSDimitry Andric case LoongArch::STPTR_W:
522700637cbSDimitry Andric return isLarge ? LoongArch::STX_W : LoongArch::ST_W;
523700637cbSDimitry Andric case LoongArch::ST_D:
524700637cbSDimitry Andric case LoongArch::STPTR_D:
525700637cbSDimitry Andric return isLarge ? LoongArch::STX_D : LoongArch::ST_D;
526700637cbSDimitry Andric case LoongArch::FST_S:
527700637cbSDimitry Andric return isLarge ? LoongArch::FSTX_S : LoongArch::FST_S;
528700637cbSDimitry Andric case LoongArch::FST_D:
529700637cbSDimitry Andric return isLarge ? LoongArch::FSTX_D : LoongArch::FST_D;
530700637cbSDimitry Andric case LoongArch::VST:
531700637cbSDimitry Andric return isLarge ? LoongArch::VSTX : LoongArch::VST;
532700637cbSDimitry Andric case LoongArch::XVST:
533700637cbSDimitry Andric return isLarge ? LoongArch::XVSTX : LoongArch::XVST;
534700637cbSDimitry Andric default:
535700637cbSDimitry Andric llvm_unreachable("Unexpected opcode for replacement");
536700637cbSDimitry Andric }
537700637cbSDimitry Andric }
538700637cbSDimitry Andric
foldIntoMemoryOps(MachineInstr & Hi20,MachineInstr & Lo12,MachineInstr * & Lo20,MachineInstr * & Hi12,MachineInstr * & Last)539700637cbSDimitry Andric bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
540700637cbSDimitry Andric MachineInstr &Lo12,
541700637cbSDimitry Andric MachineInstr *&Lo20,
542700637cbSDimitry Andric MachineInstr *&Hi12,
543700637cbSDimitry Andric MachineInstr *&Last) {
544700637cbSDimitry Andric Register DestReg =
545700637cbSDimitry Andric Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
546700637cbSDimitry Andric
547700637cbSDimitry Andric // If all the uses are memory ops with the same offset, we can transform:
548700637cbSDimitry Andric //
549700637cbSDimitry Andric // 1. (small/medium):
550700637cbSDimitry Andric // 1.1. pcala
551700637cbSDimitry Andric // pcalau12i vreg1, %pc_hi20(s)
552700637cbSDimitry Andric // addi.d vreg2, vreg1, %pc_lo12(s)
553700637cbSDimitry Andric // ld.w vreg3, 8(vreg2)
554700637cbSDimitry Andric //
555700637cbSDimitry Andric // =>
556700637cbSDimitry Andric //
557700637cbSDimitry Andric // pcalau12i vreg1, %pc_hi20(s+8)
558700637cbSDimitry Andric // ld.w vreg3, vreg1, %pc_lo12(s+8)(vreg1)
559700637cbSDimitry Andric //
560700637cbSDimitry Andric // 1.2. tls-le
561700637cbSDimitry Andric // lu12i.w vreg1, %le_hi20_r(s)
562700637cbSDimitry Andric // add.w/d vreg2, vreg1, r2, %le_add_r(s)
563700637cbSDimitry Andric // addi.w/d vreg3, vreg2, %le_lo12_r(s)
564700637cbSDimitry Andric // ld.w vreg4, 8(vreg3)
565700637cbSDimitry Andric //
566700637cbSDimitry Andric // =>
567700637cbSDimitry Andric //
568700637cbSDimitry Andric // lu12i.w vreg1, %le_hi20_r(s+8)
569700637cbSDimitry Andric // add.w/d vreg2, vreg1, r2, %le_add_r(s+8)
570700637cbSDimitry Andric // ld.w vreg4, vreg2, %le_lo12_r(s+8)(vreg2)
571700637cbSDimitry Andric //
572700637cbSDimitry Andric // 2. (large):
573700637cbSDimitry Andric // pcalau12i vreg1, %pc_hi20(s)
574700637cbSDimitry Andric // addi.d vreg2, $zero, %pc_lo12(s)
575700637cbSDimitry Andric // lu32i.d vreg3, vreg2, %pc64_lo20(s)
576700637cbSDimitry Andric // lu52i.d vreg4, vreg3, %pc64_hi12(s)
577700637cbSDimitry Andric // add.d vreg5, vreg4, vreg1
578700637cbSDimitry Andric // ld.w vreg6, 8(vreg5)
579700637cbSDimitry Andric //
580700637cbSDimitry Andric // =>
581700637cbSDimitry Andric //
582700637cbSDimitry Andric // pcalau12i vreg1, %pc_hi20(s+8)
583700637cbSDimitry Andric // addi.d vreg2, $zero, %pc_lo12(s+8)
584700637cbSDimitry Andric // lu32i.d vreg3, vreg2, %pc64_lo20(s+8)
585700637cbSDimitry Andric // lu52i.d vreg4, vreg3, %pc64_hi12(s+8)
586700637cbSDimitry Andric // ldx.w vreg6, vreg4, vreg1
587700637cbSDimitry Andric
588700637cbSDimitry Andric std::optional<int64_t> CommonOffset;
589700637cbSDimitry Andric DenseMap<const MachineInstr *, SmallVector<unsigned>>
590700637cbSDimitry Andric InlineAsmMemoryOpIndexesMap;
591700637cbSDimitry Andric for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
592700637cbSDimitry Andric switch (UseMI.getOpcode()) {
593700637cbSDimitry Andric default:
594700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
595700637cbSDimitry Andric return false;
596700637cbSDimitry Andric case LoongArch::VLDREPL_B:
597700637cbSDimitry Andric case LoongArch::XVLDREPL_B:
598700637cbSDimitry Andric // We can't do this for large pattern.
599700637cbSDimitry Andric if (Last)
600700637cbSDimitry Andric return false;
601700637cbSDimitry Andric [[fallthrough]];
602700637cbSDimitry Andric case LoongArch::LD_B:
603700637cbSDimitry Andric case LoongArch::LD_H:
604700637cbSDimitry Andric case LoongArch::LD_W:
605700637cbSDimitry Andric case LoongArch::LD_D:
606700637cbSDimitry Andric case LoongArch::LD_BU:
607700637cbSDimitry Andric case LoongArch::LD_HU:
608700637cbSDimitry Andric case LoongArch::LD_WU:
609700637cbSDimitry Andric case LoongArch::LDPTR_W:
610700637cbSDimitry Andric case LoongArch::LDPTR_D:
611700637cbSDimitry Andric case LoongArch::FLD_S:
612700637cbSDimitry Andric case LoongArch::FLD_D:
613700637cbSDimitry Andric case LoongArch::VLD:
614700637cbSDimitry Andric case LoongArch::XVLD:
615700637cbSDimitry Andric case LoongArch::ST_B:
616700637cbSDimitry Andric case LoongArch::ST_H:
617700637cbSDimitry Andric case LoongArch::ST_W:
618700637cbSDimitry Andric case LoongArch::ST_D:
619700637cbSDimitry Andric case LoongArch::STPTR_W:
620700637cbSDimitry Andric case LoongArch::STPTR_D:
621700637cbSDimitry Andric case LoongArch::FST_S:
622700637cbSDimitry Andric case LoongArch::FST_D:
623700637cbSDimitry Andric case LoongArch::VST:
624700637cbSDimitry Andric case LoongArch::XVST: {
625700637cbSDimitry Andric if (UseMI.getOperand(1).isFI())
626700637cbSDimitry Andric return false;
627700637cbSDimitry Andric // Register defined by Lo should not be the value register.
628700637cbSDimitry Andric if (DestReg == UseMI.getOperand(0).getReg())
629700637cbSDimitry Andric return false;
630700637cbSDimitry Andric assert(DestReg == UseMI.getOperand(1).getReg() &&
631700637cbSDimitry Andric "Expected base address use");
632700637cbSDimitry Andric // All load/store instructions must use the same offset.
633700637cbSDimitry Andric int64_t Offset = UseMI.getOperand(2).getImm();
634700637cbSDimitry Andric if (CommonOffset && Offset != CommonOffset)
635700637cbSDimitry Andric return false;
636700637cbSDimitry Andric CommonOffset = Offset;
637700637cbSDimitry Andric break;
638700637cbSDimitry Andric }
639700637cbSDimitry Andric case LoongArch::INLINEASM:
640700637cbSDimitry Andric case LoongArch::INLINEASM_BR: {
641700637cbSDimitry Andric // We can't do this for large pattern.
642700637cbSDimitry Andric if (Last)
643700637cbSDimitry Andric return false;
644700637cbSDimitry Andric SmallVector<unsigned> InlineAsmMemoryOpIndexes;
645700637cbSDimitry Andric unsigned NumOps = 0;
646700637cbSDimitry Andric for (unsigned I = InlineAsm::MIOp_FirstOperand;
647700637cbSDimitry Andric I < UseMI.getNumOperands(); I += 1 + NumOps) {
648700637cbSDimitry Andric const MachineOperand &FlagsMO = UseMI.getOperand(I);
649700637cbSDimitry Andric // Should be an imm.
650700637cbSDimitry Andric if (!FlagsMO.isImm())
651700637cbSDimitry Andric continue;
652700637cbSDimitry Andric
653700637cbSDimitry Andric const InlineAsm::Flag Flags(FlagsMO.getImm());
654700637cbSDimitry Andric NumOps = Flags.getNumOperandRegisters();
655700637cbSDimitry Andric
656700637cbSDimitry Andric // Memory constraints have two operands.
657700637cbSDimitry Andric if (NumOps != 2 || !Flags.isMemKind()) {
658700637cbSDimitry Andric // If the register is used by something other than a memory contraint,
659700637cbSDimitry Andric // we should not fold.
660700637cbSDimitry Andric for (unsigned J = 0; J < NumOps; ++J) {
661700637cbSDimitry Andric const MachineOperand &MO = UseMI.getOperand(I + 1 + J);
662700637cbSDimitry Andric if (MO.isReg() && MO.getReg() == DestReg)
663700637cbSDimitry Andric return false;
664700637cbSDimitry Andric }
665700637cbSDimitry Andric continue;
666700637cbSDimitry Andric }
667700637cbSDimitry Andric
668700637cbSDimitry Andric // We can only do this for constraint m.
669700637cbSDimitry Andric if (Flags.getMemoryConstraintID() != InlineAsm::ConstraintCode::m)
670700637cbSDimitry Andric return false;
671700637cbSDimitry Andric
672700637cbSDimitry Andric const MachineOperand &AddrMO = UseMI.getOperand(I + 1);
673700637cbSDimitry Andric if (!AddrMO.isReg() || AddrMO.getReg() != DestReg)
674700637cbSDimitry Andric continue;
675700637cbSDimitry Andric
676700637cbSDimitry Andric const MachineOperand &OffsetMO = UseMI.getOperand(I + 2);
677700637cbSDimitry Andric if (!OffsetMO.isImm())
678700637cbSDimitry Andric continue;
679700637cbSDimitry Andric
680700637cbSDimitry Andric // All inline asm memory operands must use the same offset.
681700637cbSDimitry Andric int64_t Offset = OffsetMO.getImm();
682700637cbSDimitry Andric if (CommonOffset && Offset != CommonOffset)
683700637cbSDimitry Andric return false;
684700637cbSDimitry Andric CommonOffset = Offset;
685700637cbSDimitry Andric InlineAsmMemoryOpIndexes.push_back(I + 1);
686700637cbSDimitry Andric }
687700637cbSDimitry Andric InlineAsmMemoryOpIndexesMap.insert(
688700637cbSDimitry Andric std::make_pair(&UseMI, InlineAsmMemoryOpIndexes));
689700637cbSDimitry Andric break;
690700637cbSDimitry Andric }
691700637cbSDimitry Andric }
692700637cbSDimitry Andric }
693700637cbSDimitry Andric
694700637cbSDimitry Andric // We found a common offset.
695700637cbSDimitry Andric // Update the offsets in global address lowering.
696700637cbSDimitry Andric // We may have already folded some arithmetic so we need to add to any
697700637cbSDimitry Andric // existing offset.
698700637cbSDimitry Andric int64_t NewOffset = Hi20.getOperand(1).getOffset() + *CommonOffset;
699700637cbSDimitry Andric // LA32 ignores the upper 32 bits.
700700637cbSDimitry Andric if (!ST->is64Bit())
701700637cbSDimitry Andric NewOffset = SignExtend64<32>(NewOffset);
702700637cbSDimitry Andric // We can only fold simm32 offsets.
703700637cbSDimitry Andric if (!isInt<32>(NewOffset))
704700637cbSDimitry Andric return false;
705700637cbSDimitry Andric
706700637cbSDimitry Andric // If optimized by this pass successfully, MO_RELAX bitmask target-flag should
707700637cbSDimitry Andric // be removed from the pcala code sequence. Code sequence of tls-le can still
708700637cbSDimitry Andric // be relaxed after being optimized.
709700637cbSDimitry Andric //
710700637cbSDimitry Andric // For example:
711700637cbSDimitry Andric // pcalau12i $a0, %pc_hi20(symbol)
712700637cbSDimitry Andric // addi.d $a0, $a0, %pc_lo12(symbol)
713700637cbSDimitry Andric // ld.w $a0, $a0, 0
714700637cbSDimitry Andric //
715700637cbSDimitry Andric // =>
716700637cbSDimitry Andric //
717700637cbSDimitry Andric // pcalau12i $a0, %pc_hi20(symbol)
718700637cbSDimitry Andric // ld.w $a0, $a0, %pc_lo12(symbol)
719700637cbSDimitry Andric //
720700637cbSDimitry Andric // Code sequence optimized before can be relax by linker. But after being
721700637cbSDimitry Andric // optimized, it cannot be relaxed any more. So MO_RELAX flag should not be
722700637cbSDimitry Andric // carried by them.
723700637cbSDimitry Andric Hi20.getOperand(1).setOffset(NewOffset);
724700637cbSDimitry Andric MachineOperand &ImmOp = Lo12.getOperand(2);
725700637cbSDimitry Andric ImmOp.setOffset(NewOffset);
726700637cbSDimitry Andric if (Lo20 && Hi12) {
727700637cbSDimitry Andric Lo20->getOperand(2).setOffset(NewOffset);
728700637cbSDimitry Andric Hi12->getOperand(2).setOffset(NewOffset);
729700637cbSDimitry Andric }
730700637cbSDimitry Andric if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
731700637cbSDimitry Andric Hi20.getOperand(1).setTargetFlags(
732700637cbSDimitry Andric LoongArchII::getDirectFlags(Hi20.getOperand(1)));
733700637cbSDimitry Andric ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp));
734700637cbSDimitry Andric } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
735700637cbSDimitry Andric MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
736700637cbSDimitry Andric Add->getOperand(3).setOffset(NewOffset);
737700637cbSDimitry Andric }
738700637cbSDimitry Andric
739700637cbSDimitry Andric // Update the immediate in the load/store instructions to add the offset.
740700637cbSDimitry Andric const LoongArchInstrInfo &TII = *ST->getInstrInfo();
741700637cbSDimitry Andric for (MachineInstr &UseMI :
742700637cbSDimitry Andric llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
743700637cbSDimitry Andric if (UseMI.getOpcode() == LoongArch::INLINEASM ||
744700637cbSDimitry Andric UseMI.getOpcode() == LoongArch::INLINEASM_BR) {
745700637cbSDimitry Andric auto &InlineAsmMemoryOpIndexes = InlineAsmMemoryOpIndexesMap[&UseMI];
746700637cbSDimitry Andric for (unsigned I : InlineAsmMemoryOpIndexes) {
747700637cbSDimitry Andric MachineOperand &MO = UseMI.getOperand(I + 1);
748700637cbSDimitry Andric switch (ImmOp.getType()) {
749700637cbSDimitry Andric case MachineOperand::MO_GlobalAddress:
750700637cbSDimitry Andric MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(),
751700637cbSDimitry Andric LoongArchII::getDirectFlags(ImmOp));
752700637cbSDimitry Andric break;
753700637cbSDimitry Andric case MachineOperand::MO_MCSymbol:
754700637cbSDimitry Andric MO.ChangeToMCSymbol(ImmOp.getMCSymbol(),
755700637cbSDimitry Andric LoongArchII::getDirectFlags(ImmOp));
756700637cbSDimitry Andric MO.setOffset(ImmOp.getOffset());
757700637cbSDimitry Andric break;
758700637cbSDimitry Andric case MachineOperand::MO_BlockAddress:
759700637cbSDimitry Andric MO.ChangeToBA(ImmOp.getBlockAddress(), ImmOp.getOffset(),
760700637cbSDimitry Andric LoongArchII::getDirectFlags(ImmOp));
761700637cbSDimitry Andric break;
762*e64bea71SDimitry Andric case MachineOperand::MO_ConstantPoolIndex:
763*e64bea71SDimitry Andric MO.ChangeToCPI(ImmOp.getIndex(), ImmOp.getOffset(),
764*e64bea71SDimitry Andric LoongArchII::getDirectFlags(ImmOp));
765*e64bea71SDimitry Andric break;
766700637cbSDimitry Andric default:
767700637cbSDimitry Andric report_fatal_error("unsupported machine operand type");
768700637cbSDimitry Andric break;
769700637cbSDimitry Andric }
770700637cbSDimitry Andric }
771700637cbSDimitry Andric } else {
772700637cbSDimitry Andric UseMI.setDesc(TII.get(getNewOpc(UseMI.getOpcode(), Last)));
773700637cbSDimitry Andric if (Last) {
774700637cbSDimitry Andric UseMI.removeOperand(2);
775700637cbSDimitry Andric UseMI.removeOperand(1);
776700637cbSDimitry Andric UseMI.addOperand(Last->getOperand(1));
777700637cbSDimitry Andric UseMI.addOperand(Last->getOperand(2));
778700637cbSDimitry Andric UseMI.getOperand(1).setIsKill(false);
779700637cbSDimitry Andric UseMI.getOperand(2).setIsKill(false);
780700637cbSDimitry Andric } else {
781700637cbSDimitry Andric UseMI.removeOperand(2);
782700637cbSDimitry Andric UseMI.addOperand(ImmOp);
783700637cbSDimitry Andric }
784700637cbSDimitry Andric }
785700637cbSDimitry Andric }
786700637cbSDimitry Andric
787700637cbSDimitry Andric if (Last) {
788700637cbSDimitry Andric Last->eraseFromParent();
789700637cbSDimitry Andric return true;
790700637cbSDimitry Andric }
791700637cbSDimitry Andric
792700637cbSDimitry Andric if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
793700637cbSDimitry Andric MRI->replaceRegWith(Lo12.getOperand(0).getReg(),
794700637cbSDimitry Andric Hi20.getOperand(0).getReg());
795700637cbSDimitry Andric } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
796700637cbSDimitry Andric MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
797700637cbSDimitry Andric MRI->replaceRegWith(Lo12.getOperand(0).getReg(),
798700637cbSDimitry Andric Add->getOperand(0).getReg());
799700637cbSDimitry Andric }
800700637cbSDimitry Andric Lo12.eraseFromParent();
801700637cbSDimitry Andric return true;
802700637cbSDimitry Andric }
803700637cbSDimitry Andric
runOnMachineFunction(MachineFunction & Fn)804700637cbSDimitry Andric bool LoongArchMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
805700637cbSDimitry Andric if (skipFunction(Fn.getFunction()))
806700637cbSDimitry Andric return false;
807700637cbSDimitry Andric
808700637cbSDimitry Andric ST = &Fn.getSubtarget<LoongArchSubtarget>();
809700637cbSDimitry Andric
810700637cbSDimitry Andric bool MadeChange = false;
811700637cbSDimitry Andric MRI = &Fn.getRegInfo();
812700637cbSDimitry Andric for (MachineBasicBlock &MBB : Fn) {
813700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
814700637cbSDimitry Andric for (MachineInstr &Hi20 : MBB) {
815700637cbSDimitry Andric MachineInstr *Lo12 = nullptr;
816700637cbSDimitry Andric MachineInstr *Lo20 = nullptr;
817700637cbSDimitry Andric MachineInstr *Hi12 = nullptr;
818700637cbSDimitry Andric MachineInstr *Last = nullptr;
819700637cbSDimitry Andric if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
820700637cbSDimitry Andric // Detect foldable pcala code sequence in small/medium/large code model.
821700637cbSDimitry Andric if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last))
822700637cbSDimitry Andric continue;
823700637cbSDimitry Andric } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
824700637cbSDimitry Andric MachineInstr *Add = nullptr;
825700637cbSDimitry Andric // Detect foldable tls-le code sequence in small/medium code model.
826700637cbSDimitry Andric if (!detectFoldable(Hi20, Add, Lo12))
827700637cbSDimitry Andric continue;
828700637cbSDimitry Andric } else {
829700637cbSDimitry Andric continue;
830700637cbSDimitry Andric }
831700637cbSDimitry Andric // For tls-le, we do not pass the second PseudoAddTPRel instr in order to
832700637cbSDimitry Andric // reuse the existing hooks and the last three paramaters should always be
833700637cbSDimitry Andric // nullptr.
834700637cbSDimitry Andric MadeChange |= detectAndFoldOffset(Hi20, *Lo12, Lo20, Hi12, Last);
835700637cbSDimitry Andric MadeChange |= foldIntoMemoryOps(Hi20, *Lo12, Lo20, Hi12, Last);
836700637cbSDimitry Andric }
837700637cbSDimitry Andric }
838700637cbSDimitry Andric
839700637cbSDimitry Andric return MadeChange;
840700637cbSDimitry Andric }
841700637cbSDimitry Andric
842700637cbSDimitry Andric /// Returns an instance of the Merge Base Offset Optimization pass.
createLoongArchMergeBaseOffsetOptPass()843700637cbSDimitry Andric FunctionPass *llvm::createLoongArchMergeBaseOffsetOptPass() {
844700637cbSDimitry Andric return new LoongArchMergeBaseOffsetOpt();
845700637cbSDimitry Andric }
846