xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===-- RISCVMakeCompressible.cpp - Make more instructions compressible ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass searches for instructions that are prevented from being compressed
10 // by one of the following:
11 //
12 //   1. The use of a single uncompressed register.
13 //   2. A base register + offset where the offset is too large to be compressed
14 //   and the base register may or may not be compressed.
15 //
16 //
17 // For case 1, if a compressed register is available, then the uncompressed
18 // register is copied to the compressed register and its uses are replaced.
19 //
20 // For example, storing zero uses the uncompressible zero register:
21 //   sw zero, 0(a0)   # if zero
22 //   sw zero, 8(a0)   # if zero
23 //   sw zero, 4(a0)   # if zero
24 //   sw zero, 24(a0)   # if zero
25 //
26 // If a compressed register (e.g. a1) is available, the above can be transformed
27 // to the following to improve code size:
28 //   li a1, 0
29 //   c.sw a1, 0(a0)
30 //   c.sw a1, 8(a0)
31 //   c.sw a1, 4(a0)
32 //   c.sw a1, 24(a0)
33 //
34 //
35 // For case 2, if a compressed register is available, then the original base
36 // is copied and adjusted such that:
37 //
38 //   new_base_register = base_register + adjustment
39 //   base_register + large_offset = new_base_register + small_offset
40 //
41 // For example, the following offsets are too large for c.sw:
42 //   lui a2, 983065
43 //   sw  a1, -236(a2)
44 //   sw  a1, -240(a2)
45 //   sw  a1, -244(a2)
46 //   sw  a1, -248(a2)
47 //   sw  a1, -252(a2)
48 //   sw  a0, -256(a2)
49 //
50 // If a compressed register is available (e.g. a3), a new base could be created
51 // such that the addresses can accessed with a compressible offset, thus
52 // improving code size:
53 //   lui a2, 983065
54 //   addi  a3, a2, -256
55 //   c.sw  a1, 20(a3)
56 //   c.sw  a1, 16(a3)
57 //   c.sw  a1, 12(a3)
58 //   c.sw  a1, 8(a3)
59 //   c.sw  a1, 4(a3)
60 //   c.sw  a0, 0(a3)
61 //
62 //
63 // This optimization is only applied if there are enough uses of the copied
64 // register for code size to be reduced.
65 //
66 //===----------------------------------------------------------------------===//
67 
68 #include "RISCV.h"
69 #include "RISCVSubtarget.h"
70 #include "llvm/CodeGen/Passes.h"
71 #include "llvm/CodeGen/RegisterScavenging.h"
72 #include "llvm/MC/TargetRegistry.h"
73 #include "llvm/Support/Debug.h"
74 
75 using namespace llvm;
76 
77 #define DEBUG_TYPE "riscv-make-compressible"
78 #define RISCV_COMPRESS_INSTRS_NAME "RISC-V Make Compressible"
79 
80 namespace {
81 
82 struct RISCVMakeCompressibleOpt : public MachineFunctionPass {
83   static char ID;
84 
85   bool runOnMachineFunction(MachineFunction &Fn) override;
86 
87   RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {}
88 
89   StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; }
90 };
91 } // namespace
92 
93 char RISCVMakeCompressibleOpt::ID = 0;
94 INITIALIZE_PASS(RISCVMakeCompressibleOpt, "riscv-make-compressible",
95                 RISCV_COMPRESS_INSTRS_NAME, false, false)
96 
97 // Return log2(widthInBytes) of load/store done by Opcode.
98 static unsigned log2LdstWidth(unsigned Opcode) {
99   switch (Opcode) {
100   default:
101     llvm_unreachable("Unexpected opcode");
102   case RISCV::LBU:
103   case RISCV::SB:
104     return 0;
105   case RISCV::LH:
106   case RISCV::LHU:
107   case RISCV::SH:
108     return 1;
109   case RISCV::LW:
110   case RISCV::SW:
111   case RISCV::FLW:
112   case RISCV::FSW:
113     return 2;
114   case RISCV::LD:
115   case RISCV::SD:
116   case RISCV::FLD:
117   case RISCV::FSD:
118     return 3;
119   }
120 }
121 
122 // Return bit field size of immediate operand of Opcode.
123 static unsigned offsetMask(unsigned Opcode) {
124   switch (Opcode) {
125   default:
126     llvm_unreachable("Unexpected opcode");
127   case RISCV::LBU:
128   case RISCV::SB:
129     return maskTrailingOnes<unsigned>(2U);
130   case RISCV::LH:
131   case RISCV::LHU:
132   case RISCV::SH:
133     return maskTrailingOnes<unsigned>(1U);
134   case RISCV::LW:
135   case RISCV::SW:
136   case RISCV::FLW:
137   case RISCV::FSW:
138   case RISCV::LD:
139   case RISCV::SD:
140   case RISCV::FLD:
141   case RISCV::FSD:
142     return maskTrailingOnes<unsigned>(5U);
143   }
144 }
145 
146 // Return a mask for the offset bits of a non-stack-pointer based compressed
147 // load/store.
148 static uint8_t compressedLDSTOffsetMask(unsigned Opcode) {
149   return offsetMask(Opcode) << log2LdstWidth(Opcode);
150 }
151 
152 // Return true if Offset fits within a compressed stack-pointer based
153 // load/store.
154 static bool compressibleSPOffset(int64_t Offset, unsigned Opcode) {
155   // Compressed sp-based loads and stores only work for 32/64 bits.
156   switch (log2LdstWidth(Opcode)) {
157   case 2:
158     return isShiftedUInt<6, 2>(Offset);
159   case 3:
160     return isShiftedUInt<6, 3>(Offset);
161   }
162   return false;
163 }
164 
165 // Given an offset for a load/store, return the adjustment required to the base
166 // register such that the address can be accessed with a compressible offset.
167 // This will return 0 if the offset is already compressible.
168 static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) {
169   // Return the excess bits that do not fit in a compressible offset.
170   return Offset & ~compressedLDSTOffsetMask(Opcode);
171 }
172 
173 // Return true if Reg is in a compressed register class.
174 static bool isCompressedReg(Register Reg) {
175   return RISCV::GPRCRegClass.contains(Reg) ||
176          RISCV::FPR32CRegClass.contains(Reg) ||
177          RISCV::FPR64CRegClass.contains(Reg);
178 }
179 
180 // Return true if MI is a load for which there exists a compressed version.
181 static bool isCompressibleLoad(const MachineInstr &MI) {
182   const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
183 
184   switch (MI.getOpcode()) {
185   default:
186     return false;
187   case RISCV::LBU:
188   case RISCV::LH:
189   case RISCV::LHU:
190     return STI.hasStdExtZcb();
191   case RISCV::LW:
192   case RISCV::LD:
193     return STI.hasStdExtCOrZca();
194   case RISCV::FLW:
195     return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
196   case RISCV::FLD:
197     return STI.hasStdExtCOrZcd();
198   }
199 }
200 
201 // Return true if MI is a store for which there exists a compressed version.
202 static bool isCompressibleStore(const MachineInstr &MI) {
203   const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
204 
205   switch (MI.getOpcode()) {
206   default:
207     return false;
208   case RISCV::SB:
209   case RISCV::SH:
210     return STI.hasStdExtZcb();
211   case RISCV::SW:
212   case RISCV::SD:
213     return STI.hasStdExtCOrZca();
214   case RISCV::FSW:
215     return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
216   case RISCV::FSD:
217     return STI.hasStdExtCOrZcd();
218   }
219 }
220 
221 // Find a single register and/or large offset which, if compressible, would
222 // allow the given instruction to be compressed.
223 //
224 // Possible return values:
225 //
226 //   {Reg, 0}               - Uncompressed Reg needs replacing with a compressed
227 //                            register.
228 //   {Reg, N}               - Reg needs replacing with a compressed register and
229 //                            N needs adding to the new register. (Reg may be
230 //                            compressed or uncompressed).
231 //   {RISCV::NoRegister, 0} - No suitable optimization found for this
232 //   instruction.
233 static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
234   const unsigned Opcode = MI.getOpcode();
235 
236   if (isCompressibleLoad(MI) || isCompressibleStore(MI)) {
237     const MachineOperand &MOImm = MI.getOperand(2);
238     if (!MOImm.isImm())
239       return RegImmPair(RISCV::NoRegister, 0);
240 
241     int64_t Offset = MOImm.getImm();
242     int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
243     Register Base = MI.getOperand(1).getReg();
244 
245     // Memory accesses via the stack pointer do not have a requirement for
246     // either of the registers to be compressible and can take a larger offset.
247     if (RISCV::SPRegClass.contains(Base)) {
248       if (!compressibleSPOffset(Offset, Opcode) && NewBaseAdjust)
249         return RegImmPair(Base, NewBaseAdjust);
250     } else {
251       Register SrcDest = MI.getOperand(0).getReg();
252       bool SrcDestCompressed = isCompressedReg(SrcDest);
253       bool BaseCompressed = isCompressedReg(Base);
254 
255       // If only Base and/or offset prevent compression, then return Base and
256       // any adjustment required to make the offset compressible.
257       if ((!BaseCompressed || NewBaseAdjust) && SrcDestCompressed)
258         return RegImmPair(Base, NewBaseAdjust);
259 
260       // For loads, we can only change the base register since dest is defined
261       // rather than used.
262       //
263       // For stores, we can change SrcDest (and Base if SrcDest == Base) but
264       // cannot resolve an uncompressible offset in this case.
265       if (isCompressibleStore(MI)) {
266         if (!SrcDestCompressed && (BaseCompressed || SrcDest == Base) &&
267             !NewBaseAdjust)
268           return RegImmPair(SrcDest, NewBaseAdjust);
269       }
270     }
271   }
272   return RegImmPair(RISCV::NoRegister, 0);
273 }
274 
275 // Check all uses after FirstMI of the given register, keeping a vector of
276 // instructions that would be compressible if the given register (and offset if
277 // applicable) were compressible.
278 //
279 // If there are enough uses for this optimization to improve code size and a
280 // compressed register is available, return that compressed register.
281 static Register analyzeCompressibleUses(MachineInstr &FirstMI,
282                                         RegImmPair RegImm,
283                                         SmallVectorImpl<MachineInstr *> &MIs) {
284   MachineBasicBlock &MBB = *FirstMI.getParent();
285   const TargetRegisterInfo *TRI =
286       MBB.getParent()->getSubtarget().getRegisterInfo();
287 
288   for (MachineBasicBlock::instr_iterator I = FirstMI.getIterator(),
289                                          E = MBB.instr_end();
290        I != E; ++I) {
291     MachineInstr &MI = *I;
292 
293     // Determine if this is an instruction which would benefit from using the
294     // new register.
295     RegImmPair CandidateRegImm = getRegImmPairPreventingCompression(MI);
296     if (CandidateRegImm.Reg == RegImm.Reg && CandidateRegImm.Imm == RegImm.Imm)
297       MIs.push_back(&MI);
298 
299     // If RegImm.Reg is modified by this instruction, then we cannot optimize
300     // past this instruction. If the register is already compressed, then it may
301     // possible to optimize a large offset in the current instruction - this
302     // will have been detected by the preceeding call to
303     // getRegImmPairPreventingCompression.
304     if (MI.modifiesRegister(RegImm.Reg, TRI))
305       break;
306   }
307 
308   // Adjusting the base costs one new uncompressed addi and therefore three uses
309   // are required for a code size reduction. If no base adjustment is required,
310   // then copying the register costs one new c.mv (or c.li Rd, 0 for "copying"
311   // the zero register) and therefore two uses are required for a code size
312   // reduction.
313   if (MIs.size() < 2 || (RegImm.Imm != 0 && MIs.size() < 3))
314     return RISCV::NoRegister;
315 
316   // Find a compressible register which will be available from the first
317   // instruction we care about to the last.
318   const TargetRegisterClass *RCToScavenge;
319 
320   // Work out the compressed register class from which to scavenge.
321   if (RISCV::GPRRegClass.contains(RegImm.Reg))
322     RCToScavenge = &RISCV::GPRCRegClass;
323   else if (RISCV::FPR32RegClass.contains(RegImm.Reg))
324     RCToScavenge = &RISCV::FPR32CRegClass;
325   else if (RISCV::FPR64RegClass.contains(RegImm.Reg))
326     RCToScavenge = &RISCV::FPR64CRegClass;
327   else
328     return RISCV::NoRegister;
329 
330   RegScavenger RS;
331   RS.enterBasicBlockEnd(MBB);
332   RS.backward(std::next(MIs.back()->getIterator()));
333   return RS.scavengeRegisterBackwards(*RCToScavenge, FirstMI.getIterator(),
334                                       /*RestoreAfter=*/false, /*SPAdj=*/0,
335                                       /*AllowSpill=*/false);
336 }
337 
338 // Update uses of the old register in the given instruction to the new register.
339 static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm,
340                            Register NewReg) {
341   unsigned Opcode = MI.getOpcode();
342 
343   // If this pass is extended to support more instructions, the check for
344   // definedness may need to be strengthened.
345   assert((isCompressibleLoad(MI) || isCompressibleStore(MI)) &&
346          "Unsupported instruction for this optimization.");
347 
348   int SkipN = 0;
349 
350   // Skip the first (value) operand to a store instruction (except if the store
351   // offset is zero) in order to avoid an incorrect transformation.
352   // e.g. sd a0, 808(a0) to addi a2, a0, 768; sd a2, 40(a2)
353   if (isCompressibleStore(MI) && OldRegImm.Imm != 0)
354     SkipN = 1;
355 
356   // Update registers
357   for (MachineOperand &MO : drop_begin(MI.operands(), SkipN))
358     if (MO.isReg() && MO.getReg() == OldRegImm.Reg) {
359       // Do not update operands that define the old register.
360       //
361       // The new register was scavenged for the range of instructions that are
362       // being updated, therefore it should not be defined within this range
363       // except possibly in the final instruction.
364       if (MO.isDef()) {
365         assert(isCompressibleLoad(MI));
366         continue;
367       }
368       // Update reg
369       MO.setReg(NewReg);
370     }
371 
372   // Update offset
373   MachineOperand &MOImm = MI.getOperand(2);
374   int64_t NewOffset = MOImm.getImm() & compressedLDSTOffsetMask(Opcode);
375   MOImm.setImm(NewOffset);
376 }
377 
378 bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) {
379   // This is a size optimization.
380   if (skipFunction(Fn.getFunction()) || !Fn.getFunction().hasMinSize())
381     return false;
382 
383   const RISCVSubtarget &STI = Fn.getSubtarget<RISCVSubtarget>();
384   const RISCVInstrInfo &TII = *STI.getInstrInfo();
385 
386   // This optimization only makes sense if compressed instructions are emitted.
387   if (!STI.hasStdExtCOrZca())
388     return false;
389 
390   for (MachineBasicBlock &MBB : Fn) {
391     LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
392     for (MachineInstr &MI : MBB) {
393       // Determine if this instruction would otherwise be compressed if not for
394       // an uncompressible register or offset.
395       RegImmPair RegImm = getRegImmPairPreventingCompression(MI);
396       if (!RegImm.Reg && RegImm.Imm == 0)
397         continue;
398 
399       // Determine if there is a set of instructions for which replacing this
400       // register with a compressed register (and compressible offset if
401       // applicable) is possible and will allow compression.
402       SmallVector<MachineInstr *, 8> MIs;
403       Register NewReg = analyzeCompressibleUses(MI, RegImm, MIs);
404       if (!NewReg)
405         continue;
406 
407       // Create the appropriate copy and/or offset.
408       if (RISCV::GPRRegClass.contains(RegImm.Reg)) {
409         assert(isInt<12>(RegImm.Imm));
410         BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI), NewReg)
411             .addReg(RegImm.Reg)
412             .addImm(RegImm.Imm);
413       } else {
414         // If we are looking at replacing an FPR register we don't expect to
415         // have any offset. The only compressible FP instructions with an offset
416         // are loads and stores, for which the offset applies to the GPR operand
417         // not the FPR operand.
418         assert(RegImm.Imm == 0);
419         unsigned Opcode = RISCV::FPR32RegClass.contains(RegImm.Reg)
420                               ? RISCV::FSGNJ_S
421                               : RISCV::FSGNJ_D;
422         BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(Opcode), NewReg)
423             .addReg(RegImm.Reg)
424             .addReg(RegImm.Reg);
425       }
426 
427       // Update the set of instructions to use the compressed register and
428       // compressible offset instead. These instructions should now be
429       // compressible.
430       // TODO: Update all uses if RegImm.Imm == 0? Not just those that are
431       // expected to become compressible.
432       for (MachineInstr *UpdateMI : MIs)
433         updateOperands(*UpdateMI, RegImm, NewReg);
434     }
435   }
436   return true;
437 }
438 
439 /// Returns an instance of the Make Compressible Optimization pass.
440 FunctionPass *llvm::createRISCVMakeCompressibleOptPass() {
441   return new RISCVMakeCompressibleOpt();
442 }
443