xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- RISCVMakeCompressible.cpp - Make more instructions compressible ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass searches for instructions that are prevented from being compressed
10 // by one of the following:
11 //
12 //   1. The use of a single uncompressed register.
13 //   2. A base register + offset where the offset is too large to be compressed
14 //   and the base register may or may not be compressed.
15 //
16 //
17 // For case 1, if a compressed register is available, then the uncompressed
18 // register is copied to the compressed register and its uses are replaced.
19 //
20 // For example, storing zero uses the incompressible zero register:
21 //   sw zero, 0(a0)   # if zero
22 //   sw zero, 8(a0)   # if zero
23 //   sw zero, 4(a0)   # if zero
24 //   sw zero, 24(a0)   # if zero
25 //
26 // If a compressed register (e.g. a1) is available, the above can be transformed
27 // to the following to improve code size:
28 //   li a1, 0
29 //   c.sw a1, 0(a0)
30 //   c.sw a1, 8(a0)
31 //   c.sw a1, 4(a0)
32 //   c.sw a1, 24(a0)
33 //
34 //
35 // For case 2, if a compressed register is available, then the original base
36 // is copied and adjusted such that:
37 //
38 //   new_base_register = base_register + adjustment
39 //   base_register + large_offset = new_base_register + small_offset
40 //
41 // For example, the following offsets are too large for c.sw:
42 //   lui a2, 983065
43 //   sw  a1, -236(a2)
44 //   sw  a1, -240(a2)
45 //   sw  a1, -244(a2)
46 //   sw  a1, -248(a2)
47 //   sw  a1, -252(a2)
48 //   sw  a0, -256(a2)
49 //
50 // If a compressed register is available (e.g. a3), a new base could be created
51 // such that the addresses can accessed with a compressible offset, thus
52 // improving code size:
53 //   lui a2, 983065
54 //   addi  a3, a2, -256
55 //   c.sw  a1, 20(a3)
56 //   c.sw  a1, 16(a3)
57 //   c.sw  a1, 12(a3)
58 //   c.sw  a1, 8(a3)
59 //   c.sw  a1, 4(a3)
60 //   c.sw  a0, 0(a3)
61 //
62 //
63 // This optimization is only applied if there are enough uses of the copied
64 // register for code size to be reduced.
65 //
66 //===----------------------------------------------------------------------===//
67 
68 #include "RISCV.h"
69 #include "RISCVSubtarget.h"
70 #include "llvm/CodeGen/Passes.h"
71 #include "llvm/CodeGen/RegisterScavenging.h"
72 #include "llvm/MC/TargetRegistry.h"
73 #include "llvm/Support/Debug.h"
74 
75 using namespace llvm;
76 
77 #define DEBUG_TYPE "riscv-make-compressible"
78 #define RISCV_COMPRESS_INSTRS_NAME "RISC-V Make Compressible"
79 
80 namespace {
81 
82 struct RISCVMakeCompressibleOpt : public MachineFunctionPass {
83   static char ID;
84 
85   bool runOnMachineFunction(MachineFunction &Fn) override;
86 
RISCVMakeCompressibleOpt__anonac32bc890111::RISCVMakeCompressibleOpt87   RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {}
88 
getPassName__anonac32bc890111::RISCVMakeCompressibleOpt89   StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; }
90 };
91 } // namespace
92 
93 char RISCVMakeCompressibleOpt::ID = 0;
94 INITIALIZE_PASS(RISCVMakeCompressibleOpt, "riscv-make-compressible",
95                 RISCV_COMPRESS_INSTRS_NAME, false, false)
96 
97 // Return log2(widthInBytes) of load/store done by Opcode.
log2LdstWidth(unsigned Opcode)98 static unsigned log2LdstWidth(unsigned Opcode) {
99   switch (Opcode) {
100   default:
101     llvm_unreachable("Unexpected opcode");
102   case RISCV::LBU:
103   case RISCV::SB:
104     return 0;
105   case RISCV::LH:
106   case RISCV::LH_INX:
107   case RISCV::LHU:
108   case RISCV::SH:
109   case RISCV::SH_INX:
110     return 1;
111   case RISCV::LW:
112   case RISCV::LW_INX:
113   case RISCV::SW:
114   case RISCV::SW_INX:
115   case RISCV::FLW:
116   case RISCV::FSW:
117     return 2;
118   case RISCV::LD:
119   case RISCV::LD_RV32:
120   case RISCV::SD:
121   case RISCV::SD_RV32:
122   case RISCV::FLD:
123   case RISCV::FSD:
124     return 3;
125   }
126 }
127 
128 // Return bit field size of immediate operand of Opcode.
offsetMask(unsigned Opcode)129 static unsigned offsetMask(unsigned Opcode) {
130   switch (Opcode) {
131   default:
132     llvm_unreachable("Unexpected opcode");
133   case RISCV::LBU:
134   case RISCV::SB:
135     return maskTrailingOnes<unsigned>(2U);
136   case RISCV::LH:
137   case RISCV::LH_INX:
138   case RISCV::LHU:
139   case RISCV::SH:
140   case RISCV::SH_INX:
141     return maskTrailingOnes<unsigned>(1U);
142   case RISCV::LW:
143   case RISCV::LW_INX:
144   case RISCV::SW:
145   case RISCV::SW_INX:
146   case RISCV::FLW:
147   case RISCV::FSW:
148   case RISCV::LD:
149   case RISCV::LD_RV32:
150   case RISCV::SD:
151   case RISCV::SD_RV32:
152   case RISCV::FLD:
153   case RISCV::FSD:
154     return maskTrailingOnes<unsigned>(5U);
155   }
156 }
157 
158 // Return a mask for the offset bits of a non-stack-pointer based compressed
159 // load/store.
compressedLDSTOffsetMask(unsigned Opcode)160 static uint8_t compressedLDSTOffsetMask(unsigned Opcode) {
161   return offsetMask(Opcode) << log2LdstWidth(Opcode);
162 }
163 
164 // Return true if Offset fits within a compressed stack-pointer based
165 // load/store.
compressibleSPOffset(int64_t Offset,unsigned Opcode)166 static bool compressibleSPOffset(int64_t Offset, unsigned Opcode) {
167   // Compressed sp-based loads and stores only work for 32/64 bits.
168   switch (log2LdstWidth(Opcode)) {
169   case 2:
170     return isShiftedUInt<6, 2>(Offset);
171   case 3:
172     return isShiftedUInt<6, 3>(Offset);
173   }
174   return false;
175 }
176 
177 // Given an offset for a load/store, return the adjustment required to the base
178 // register such that the address can be accessed with a compressible offset.
179 // This will return 0 if the offset is already compressible.
getBaseAdjustForCompression(int64_t Offset,unsigned Opcode)180 static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) {
181   // Return the excess bits that do not fit in a compressible offset.
182   return Offset & ~compressedLDSTOffsetMask(Opcode);
183 }
184 
185 // Return true if Reg is in a compressed register class.
isCompressedReg(Register Reg)186 static bool isCompressedReg(Register Reg) {
187   return RISCV::GPRCRegClass.contains(Reg) ||
188          RISCV::GPRF16CRegClass.contains(Reg) ||
189          RISCV::GPRF32CRegClass.contains(Reg) ||
190          RISCV::FPR32CRegClass.contains(Reg) ||
191          RISCV::FPR64CRegClass.contains(Reg) ||
192          RISCV::GPRPairCRegClass.contains(Reg);
193 }
194 
195 // Return true if MI is a load for which there exists a compressed version.
isCompressibleLoad(const MachineInstr & MI)196 static bool isCompressibleLoad(const MachineInstr &MI) {
197   const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
198 
199   switch (MI.getOpcode()) {
200   default:
201     return false;
202   case RISCV::LBU:
203   case RISCV::LH:
204   case RISCV::LH_INX:
205   case RISCV::LHU:
206     return STI.hasStdExtZcb();
207   case RISCV::LW:
208   case RISCV::LW_INX:
209   case RISCV::LD:
210     return STI.hasStdExtZca();
211   case RISCV::LD_RV32:
212     return STI.hasStdExtZclsd();
213   case RISCV::FLW:
214     return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
215   case RISCV::FLD:
216     return STI.hasStdExtCOrZcd();
217   }
218 }
219 
220 // Return true if MI is a store for which there exists a compressed version.
isCompressibleStore(const MachineInstr & MI)221 static bool isCompressibleStore(const MachineInstr &MI) {
222   const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
223 
224   switch (MI.getOpcode()) {
225   default:
226     return false;
227   case RISCV::SB:
228   case RISCV::SH:
229   case RISCV::SH_INX:
230     return STI.hasStdExtZcb();
231   case RISCV::SW:
232   case RISCV::SW_INX:
233   case RISCV::SD:
234     return STI.hasStdExtZca();
235   case RISCV::SD_RV32:
236     return STI.hasStdExtZclsd();
237   case RISCV::FSW:
238     return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
239   case RISCV::FSD:
240     return STI.hasStdExtCOrZcd();
241   }
242 }
243 
244 // Find a single register and/or large offset which, if compressible, would
245 // allow the given instruction to be compressed.
246 //
247 // Possible return values:
248 //
249 //   {Reg, 0}               - Uncompressed Reg needs replacing with a compressed
250 //                            register.
251 //   {Reg, N}               - Reg needs replacing with a compressed register and
252 //                            N needs adding to the new register. (Reg may be
253 //                            compressed or uncompressed).
254 //   {RISCV::NoRegister, 0} - No suitable optimization found for this
255 //   instruction.
getRegImmPairPreventingCompression(const MachineInstr & MI)256 static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
257   const unsigned Opcode = MI.getOpcode();
258 
259   if (isCompressibleLoad(MI) || isCompressibleStore(MI)) {
260     const MachineOperand &MOImm = MI.getOperand(2);
261     if (!MOImm.isImm())
262       return RegImmPair(RISCV::NoRegister, 0);
263 
264     int64_t Offset = MOImm.getImm();
265     int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
266     Register Base = MI.getOperand(1).getReg();
267 
268     // Memory accesses via the stack pointer do not have a requirement for
269     // either of the registers to be compressible and can take a larger offset.
270     if (RISCV::SPRegClass.contains(Base)) {
271       if (!compressibleSPOffset(Offset, Opcode) && NewBaseAdjust)
272         return RegImmPair(Base, NewBaseAdjust);
273     } else {
274       Register SrcDest = MI.getOperand(0).getReg();
275       bool SrcDestCompressed = isCompressedReg(SrcDest);
276       bool BaseCompressed = isCompressedReg(Base);
277 
278       // If only Base and/or offset prevent compression, then return Base and
279       // any adjustment required to make the offset compressible.
280       if ((!BaseCompressed || NewBaseAdjust) && SrcDestCompressed)
281         return RegImmPair(Base, NewBaseAdjust);
282 
283       // For loads, we can only change the base register since dest is defined
284       // rather than used.
285       //
286       // For stores, we can change SrcDest (and Base if SrcDest == Base) but
287       // cannot resolve an incompressible offset in this case.
288       if (isCompressibleStore(MI)) {
289         if (!SrcDestCompressed && (BaseCompressed || SrcDest == Base) &&
290             !NewBaseAdjust)
291           return RegImmPair(SrcDest, NewBaseAdjust);
292       }
293     }
294   }
295   return RegImmPair(RISCV::NoRegister, 0);
296 }
297 
298 // Check all uses after FirstMI of the given register, keeping a vector of
299 // instructions that would be compressible if the given register (and offset if
300 // applicable) were compressible.
301 //
302 // If there are enough uses for this optimization to improve code size and a
303 // compressed register is available, return that compressed register.
analyzeCompressibleUses(MachineInstr & FirstMI,RegImmPair RegImm,SmallVectorImpl<MachineInstr * > & MIs)304 static Register analyzeCompressibleUses(MachineInstr &FirstMI,
305                                         RegImmPair RegImm,
306                                         SmallVectorImpl<MachineInstr *> &MIs) {
307   MachineBasicBlock &MBB = *FirstMI.getParent();
308   const TargetRegisterInfo *TRI =
309       MBB.getParent()->getSubtarget().getRegisterInfo();
310 
311   for (MachineBasicBlock::instr_iterator I = FirstMI.getIterator(),
312                                          E = MBB.instr_end();
313        I != E; ++I) {
314     MachineInstr &MI = *I;
315 
316     // Determine if this is an instruction which would benefit from using the
317     // new register.
318     RegImmPair CandidateRegImm = getRegImmPairPreventingCompression(MI);
319     if (CandidateRegImm.Reg == RegImm.Reg && CandidateRegImm.Imm == RegImm.Imm)
320       MIs.push_back(&MI);
321 
322     // If RegImm.Reg is modified by this instruction, then we cannot optimize
323     // past this instruction. If the register is already compressed, then it may
324     // possible to optimize a large offset in the current instruction - this
325     // will have been detected by the preceding call to
326     // getRegImmPairPreventingCompression.
327     if (MI.modifiesRegister(RegImm.Reg, TRI))
328       break;
329   }
330 
331   // Adjusting the base costs one new uncompressed addi and therefore three uses
332   // are required for a code size reduction. If no base adjustment is required,
333   // then copying the register costs one new c.mv (or c.li Rd, 0 for "copying"
334   // the zero register) and therefore two uses are required for a code size
335   // reduction. For GPR pairs, we need 2 ADDIs to copy so we need three users.
336   unsigned CopyCost = RISCV::GPRPairRegClass.contains(RegImm.Reg) ? 2 : 1;
337   assert((RegImm.Imm == 0 || CopyCost == 1) && "GPRPair should have zero imm");
338   if (MIs.size() <= CopyCost || (RegImm.Imm != 0 && MIs.size() <= 2))
339     return Register();
340 
341   // Find a compressible register which will be available from the first
342   // instruction we care about to the last.
343   const TargetRegisterClass *RCToScavenge;
344 
345   // Work out the compressed register class from which to scavenge.
346   if (RISCV::GPRRegClass.contains(RegImm.Reg))
347     RCToScavenge = &RISCV::GPRCRegClass;
348   else if (RISCV::GPRF16RegClass.contains(RegImm.Reg))
349     RCToScavenge = &RISCV::GPRF16CRegClass;
350   else if (RISCV::GPRF32RegClass.contains(RegImm.Reg))
351     RCToScavenge = &RISCV::GPRF32CRegClass;
352   else if (RISCV::FPR32RegClass.contains(RegImm.Reg))
353     RCToScavenge = &RISCV::FPR32CRegClass;
354   else if (RISCV::FPR64RegClass.contains(RegImm.Reg))
355     RCToScavenge = &RISCV::FPR64CRegClass;
356   else if (RISCV::GPRPairRegClass.contains(RegImm.Reg))
357     RCToScavenge = &RISCV::GPRPairCRegClass;
358   else
359     return Register();
360 
361   RegScavenger RS;
362   RS.enterBasicBlockEnd(MBB);
363   RS.backward(std::next(MIs.back()->getIterator()));
364   return RS.scavengeRegisterBackwards(*RCToScavenge, FirstMI.getIterator(),
365                                       /*RestoreAfter=*/false, /*SPAdj=*/0,
366                                       /*AllowSpill=*/false);
367 }
368 
369 // Update uses of the old register in the given instruction to the new register.
updateOperands(MachineInstr & MI,RegImmPair OldRegImm,Register NewReg)370 static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm,
371                            Register NewReg) {
372   unsigned Opcode = MI.getOpcode();
373 
374   // If this pass is extended to support more instructions, the check for
375   // definedness may need to be strengthened.
376   assert((isCompressibleLoad(MI) || isCompressibleStore(MI)) &&
377          "Unsupported instruction for this optimization.");
378 
379   int SkipN = 0;
380 
381   // Skip the first (value) operand to a store instruction (except if the store
382   // offset is zero) in order to avoid an incorrect transformation.
383   // e.g. sd a0, 808(a0) to addi a2, a0, 768; sd a2, 40(a2)
384   if (isCompressibleStore(MI) && OldRegImm.Imm != 0)
385     SkipN = 1;
386 
387   // Update registers
388   for (MachineOperand &MO : drop_begin(MI.operands(), SkipN))
389     if (MO.isReg() && MO.getReg() == OldRegImm.Reg) {
390       // Do not update operands that define the old register.
391       //
392       // The new register was scavenged for the range of instructions that are
393       // being updated, therefore it should not be defined within this range
394       // except possibly in the final instruction.
395       if (MO.isDef()) {
396         assert(isCompressibleLoad(MI));
397         continue;
398       }
399       // Update reg
400       MO.setReg(NewReg);
401     }
402 
403   // Update offset
404   MachineOperand &MOImm = MI.getOperand(2);
405   int64_t NewOffset = MOImm.getImm() & compressedLDSTOffsetMask(Opcode);
406   MOImm.setImm(NewOffset);
407 }
408 
runOnMachineFunction(MachineFunction & Fn)409 bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) {
410   // This is a size optimization.
411   if (skipFunction(Fn.getFunction()) || !Fn.getFunction().hasMinSize())
412     return false;
413 
414   const RISCVSubtarget &STI = Fn.getSubtarget<RISCVSubtarget>();
415   const RISCVInstrInfo &TII = *STI.getInstrInfo();
416 
417   // This optimization only makes sense if compressed instructions are emitted.
418   if (!STI.hasStdExtZca())
419     return false;
420 
421   for (MachineBasicBlock &MBB : Fn) {
422     LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
423     for (MachineInstr &MI : MBB) {
424       // Determine if this instruction would otherwise be compressed if not for
425       // an incompressible register or offset.
426       RegImmPair RegImm = getRegImmPairPreventingCompression(MI);
427       if (!RegImm.Reg && RegImm.Imm == 0)
428         continue;
429 
430       // Determine if there is a set of instructions for which replacing this
431       // register with a compressed register (and compressible offset if
432       // applicable) is possible and will allow compression.
433       SmallVector<MachineInstr *, 8> MIs;
434       Register NewReg = analyzeCompressibleUses(MI, RegImm, MIs);
435       if (!NewReg)
436         continue;
437 
438       // Create the appropriate copy and/or offset.
439       if (RISCV::GPRRegClass.contains(RegImm.Reg)) {
440         assert(isInt<12>(RegImm.Imm));
441         BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI), NewReg)
442             .addReg(RegImm.Reg)
443             .addImm(RegImm.Imm);
444       } else {
445         assert(RegImm.Imm == 0);
446         TII.copyPhysReg(MBB, MI, MI.getDebugLoc(), NewReg, RegImm.Reg,
447                         /*KillSrc*/ false);
448       }
449 
450       // Update the set of instructions to use the compressed register and
451       // compressible offset instead. These instructions should now be
452       // compressible.
453       // TODO: Update all uses if RegImm.Imm == 0? Not just those that are
454       // expected to become compressible.
455       for (MachineInstr *UpdateMI : MIs)
456         updateOperands(*UpdateMI, RegImm, NewReg);
457     }
458   }
459   return true;
460 }
461 
462 /// Returns an instance of the Make Compressible Optimization pass.
createRISCVMakeCompressibleOptPass()463 FunctionPass *llvm::createRISCVMakeCompressibleOptPass() {
464   return new RISCVMakeCompressibleOpt();
465 }
466