1 //===- RISCVOptWInstrs.cpp - MI W instruction optimizations ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===---------------------------------------------------------------------===//
8 //
9 // This pass does some optimizations for *W instructions at the MI level.
10 //
11 // First it removes unneeded sext.w instructions. Either because the sign
12 // extended bits aren't consumed or because the input was already sign extended
13 // by an earlier instruction.
14 //
15 // Then:
16 // 1. Unless explicit disabled or the target prefers instructions with W suffix,
17 // it removes the -w suffix from opw instructions whenever all users are
18 // dependent only on the lower word of the result of the instruction.
19 // The cases handled are:
20 // * addw because c.add has a larger register encoding than c.addw.
21 // * addiw because it helps reduce test differences between RV32 and RV64
22 // w/o being a pessimization.
23 // * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
24 // * slliw because c.slliw doesn't exist and c.slli does
25 //
26 // 2. Or if explicit enabled or the target prefers instructions with W suffix,
27 // it adds the W suffix to the instruction whenever all users are dependent
28 // only on the lower word of the result of the instruction.
29 // The cases handled are:
30 // * add/addi/sub/mul.
31 // * slli with imm < 32.
32 // * ld/lwu.
33 //===---------------------------------------------------------------------===//
34
35 #include "RISCV.h"
36 #include "RISCVMachineFunctionInfo.h"
37 #include "RISCVSubtarget.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/Statistic.h"
40 #include "llvm/CodeGen/MachineFunctionPass.h"
41 #include "llvm/CodeGen/TargetInstrInfo.h"
42
43 using namespace llvm;
44
45 #define DEBUG_TYPE "riscv-opt-w-instrs"
46 #define RISCV_OPT_W_INSTRS_NAME "RISC-V Optimize W Instructions"
47
48 STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
49 STATISTIC(NumTransformedToWInstrs,
50 "Number of instructions transformed to W-ops");
51
52 static cl::opt<bool> DisableSExtWRemoval("riscv-disable-sextw-removal",
53 cl::desc("Disable removal of sext.w"),
54 cl::init(false), cl::Hidden);
55 static cl::opt<bool> DisableStripWSuffix("riscv-disable-strip-w-suffix",
56 cl::desc("Disable strip W suffix"),
57 cl::init(false), cl::Hidden);
58
59 namespace {
60
61 class RISCVOptWInstrs : public MachineFunctionPass {
62 public:
63 static char ID;
64
RISCVOptWInstrs()65 RISCVOptWInstrs() : MachineFunctionPass(ID) {}
66
67 bool runOnMachineFunction(MachineFunction &MF) override;
68 bool removeSExtWInstrs(MachineFunction &MF, const RISCVInstrInfo &TII,
69 const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
70 bool stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
71 const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
72 bool appendWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
73 const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
74
getAnalysisUsage(AnalysisUsage & AU) const75 void getAnalysisUsage(AnalysisUsage &AU) const override {
76 AU.setPreservesCFG();
77 MachineFunctionPass::getAnalysisUsage(AU);
78 }
79
getPassName() const80 StringRef getPassName() const override { return RISCV_OPT_W_INSTRS_NAME; }
81 };
82
83 } // end anonymous namespace
84
85 char RISCVOptWInstrs::ID = 0;
INITIALIZE_PASS(RISCVOptWInstrs,DEBUG_TYPE,RISCV_OPT_W_INSTRS_NAME,false,false)86 INITIALIZE_PASS(RISCVOptWInstrs, DEBUG_TYPE, RISCV_OPT_W_INSTRS_NAME, false,
87 false)
88
89 FunctionPass *llvm::createRISCVOptWInstrsPass() {
90 return new RISCVOptWInstrs();
91 }
92
vectorPseudoHasAllNBitUsers(const MachineOperand & UserOp,unsigned Bits)93 static bool vectorPseudoHasAllNBitUsers(const MachineOperand &UserOp,
94 unsigned Bits) {
95 const MachineInstr &MI = *UserOp.getParent();
96 unsigned MCOpcode = RISCV::getRVVMCOpcode(MI.getOpcode());
97
98 if (!MCOpcode)
99 return false;
100
101 const MCInstrDesc &MCID = MI.getDesc();
102 const uint64_t TSFlags = MCID.TSFlags;
103 if (!RISCVII::hasSEWOp(TSFlags))
104 return false;
105 assert(RISCVII::hasVLOp(TSFlags));
106 const unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MCID)).getImm();
107
108 if (UserOp.getOperandNo() == RISCVII::getVLOpNum(MCID))
109 return false;
110
111 auto NumDemandedBits =
112 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
113 return NumDemandedBits && Bits >= *NumDemandedBits;
114 }
115
116 // Checks if all users only demand the lower \p OrigBits of the original
117 // instruction's result.
118 // TODO: handle multiple interdependent transformations
hasAllNBitUsers(const MachineInstr & OrigMI,const RISCVSubtarget & ST,const MachineRegisterInfo & MRI,unsigned OrigBits)119 static bool hasAllNBitUsers(const MachineInstr &OrigMI,
120 const RISCVSubtarget &ST,
121 const MachineRegisterInfo &MRI, unsigned OrigBits) {
122
123 SmallSet<std::pair<const MachineInstr *, unsigned>, 4> Visited;
124 SmallVector<std::pair<const MachineInstr *, unsigned>, 4> Worklist;
125
126 Worklist.push_back(std::make_pair(&OrigMI, OrigBits));
127
128 while (!Worklist.empty()) {
129 auto P = Worklist.pop_back_val();
130 const MachineInstr *MI = P.first;
131 unsigned Bits = P.second;
132
133 if (!Visited.insert(P).second)
134 continue;
135
136 // Only handle instructions with one def.
137 if (MI->getNumExplicitDefs() != 1)
138 return false;
139
140 Register DestReg = MI->getOperand(0).getReg();
141 if (!DestReg.isVirtual())
142 return false;
143
144 for (auto &UserOp : MRI.use_nodbg_operands(DestReg)) {
145 const MachineInstr *UserMI = UserOp.getParent();
146 unsigned OpIdx = UserOp.getOperandNo();
147
148 switch (UserMI->getOpcode()) {
149 default:
150 if (vectorPseudoHasAllNBitUsers(UserOp, Bits))
151 break;
152 return false;
153
154 case RISCV::ADDIW:
155 case RISCV::ADDW:
156 case RISCV::DIVUW:
157 case RISCV::DIVW:
158 case RISCV::MULW:
159 case RISCV::REMUW:
160 case RISCV::REMW:
161 case RISCV::SLLIW:
162 case RISCV::SLLW:
163 case RISCV::SRAIW:
164 case RISCV::SRAW:
165 case RISCV::SRLIW:
166 case RISCV::SRLW:
167 case RISCV::SUBW:
168 case RISCV::ROLW:
169 case RISCV::RORW:
170 case RISCV::RORIW:
171 case RISCV::CLZW:
172 case RISCV::CTZW:
173 case RISCV::CPOPW:
174 case RISCV::SLLI_UW:
175 case RISCV::FMV_W_X:
176 case RISCV::FCVT_H_W:
177 case RISCV::FCVT_H_WU:
178 case RISCV::FCVT_S_W:
179 case RISCV::FCVT_S_WU:
180 case RISCV::FCVT_D_W:
181 case RISCV::FCVT_D_WU:
182 if (Bits >= 32)
183 break;
184 return false;
185 case RISCV::SEXT_B:
186 case RISCV::PACKH:
187 if (Bits >= 8)
188 break;
189 return false;
190 case RISCV::SEXT_H:
191 case RISCV::FMV_H_X:
192 case RISCV::ZEXT_H_RV32:
193 case RISCV::ZEXT_H_RV64:
194 case RISCV::PACKW:
195 if (Bits >= 16)
196 break;
197 return false;
198
199 case RISCV::PACK:
200 if (Bits >= (ST.getXLen() / 2))
201 break;
202 return false;
203
204 case RISCV::SRLI: {
205 // If we are shifting right by less than Bits, and users don't demand
206 // any bits that were shifted into [Bits-1:0], then we can consider this
207 // as an N-Bit user.
208 unsigned ShAmt = UserMI->getOperand(2).getImm();
209 if (Bits > ShAmt) {
210 Worklist.push_back(std::make_pair(UserMI, Bits - ShAmt));
211 break;
212 }
213 return false;
214 }
215
216 // these overwrite higher input bits, otherwise the lower word of output
217 // depends only on the lower word of input. So check their uses read W.
218 case RISCV::SLLI:
219 if (Bits >= (ST.getXLen() - UserMI->getOperand(2).getImm()))
220 break;
221 Worklist.push_back(std::make_pair(UserMI, Bits));
222 break;
223 case RISCV::ANDI: {
224 uint64_t Imm = UserMI->getOperand(2).getImm();
225 if (Bits >= (unsigned)llvm::bit_width(Imm))
226 break;
227 Worklist.push_back(std::make_pair(UserMI, Bits));
228 break;
229 }
230 case RISCV::ORI: {
231 uint64_t Imm = UserMI->getOperand(2).getImm();
232 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
233 break;
234 Worklist.push_back(std::make_pair(UserMI, Bits));
235 break;
236 }
237
238 case RISCV::SLL:
239 case RISCV::BSET:
240 case RISCV::BCLR:
241 case RISCV::BINV:
242 // Operand 2 is the shift amount which uses log2(xlen) bits.
243 if (OpIdx == 2) {
244 if (Bits >= Log2_32(ST.getXLen()))
245 break;
246 return false;
247 }
248 Worklist.push_back(std::make_pair(UserMI, Bits));
249 break;
250
251 case RISCV::SRA:
252 case RISCV::SRL:
253 case RISCV::ROL:
254 case RISCV::ROR:
255 // Operand 2 is the shift amount which uses 6 bits.
256 if (OpIdx == 2 && Bits >= Log2_32(ST.getXLen()))
257 break;
258 return false;
259
260 case RISCV::ADD_UW:
261 case RISCV::SH1ADD_UW:
262 case RISCV::SH2ADD_UW:
263 case RISCV::SH3ADD_UW:
264 // Operand 1 is implicitly zero extended.
265 if (OpIdx == 1 && Bits >= 32)
266 break;
267 Worklist.push_back(std::make_pair(UserMI, Bits));
268 break;
269
270 case RISCV::BEXTI:
271 if (UserMI->getOperand(2).getImm() >= Bits)
272 return false;
273 break;
274
275 case RISCV::SB:
276 // The first argument is the value to store.
277 if (OpIdx == 0 && Bits >= 8)
278 break;
279 return false;
280 case RISCV::SH:
281 // The first argument is the value to store.
282 if (OpIdx == 0 && Bits >= 16)
283 break;
284 return false;
285 case RISCV::SW:
286 // The first argument is the value to store.
287 if (OpIdx == 0 && Bits >= 32)
288 break;
289 return false;
290
291 // For these, lower word of output in these operations, depends only on
292 // the lower word of input. So, we check all uses only read lower word.
293 case RISCV::COPY:
294 case RISCV::PHI:
295
296 case RISCV::ADD:
297 case RISCV::ADDI:
298 case RISCV::AND:
299 case RISCV::MUL:
300 case RISCV::OR:
301 case RISCV::SUB:
302 case RISCV::XOR:
303 case RISCV::XORI:
304
305 case RISCV::ANDN:
306 case RISCV::BREV8:
307 case RISCV::CLMUL:
308 case RISCV::ORC_B:
309 case RISCV::ORN:
310 case RISCV::SH1ADD:
311 case RISCV::SH2ADD:
312 case RISCV::SH3ADD:
313 case RISCV::XNOR:
314 case RISCV::BSETI:
315 case RISCV::BCLRI:
316 case RISCV::BINVI:
317 Worklist.push_back(std::make_pair(UserMI, Bits));
318 break;
319
320 case RISCV::PseudoCCMOVGPR:
321 // Either operand 4 or operand 5 is returned by this instruction. If
322 // only the lower word of the result is used, then only the lower word
323 // of operand 4 and 5 is used.
324 if (OpIdx != 4 && OpIdx != 5)
325 return false;
326 Worklist.push_back(std::make_pair(UserMI, Bits));
327 break;
328
329 case RISCV::CZERO_EQZ:
330 case RISCV::CZERO_NEZ:
331 case RISCV::VT_MASKC:
332 case RISCV::VT_MASKCN:
333 if (OpIdx != 1)
334 return false;
335 Worklist.push_back(std::make_pair(UserMI, Bits));
336 break;
337 }
338 }
339 }
340
341 return true;
342 }
343
hasAllWUsers(const MachineInstr & OrigMI,const RISCVSubtarget & ST,const MachineRegisterInfo & MRI)344 static bool hasAllWUsers(const MachineInstr &OrigMI, const RISCVSubtarget &ST,
345 const MachineRegisterInfo &MRI) {
346 return hasAllNBitUsers(OrigMI, ST, MRI, 32);
347 }
348
349 // This function returns true if the machine instruction always outputs a value
350 // where bits 63:32 match bit 31.
isSignExtendingOpW(const MachineInstr & MI,const MachineRegisterInfo & MRI,unsigned OpNo)351 static bool isSignExtendingOpW(const MachineInstr &MI,
352 const MachineRegisterInfo &MRI, unsigned OpNo) {
353 uint64_t TSFlags = MI.getDesc().TSFlags;
354
355 // Instructions that can be determined from opcode are marked in tablegen.
356 if (TSFlags & RISCVII::IsSignExtendingOpWMask)
357 return true;
358
359 // Special cases that require checking operands.
360 switch (MI.getOpcode()) {
361 // shifting right sufficiently makes the value 32-bit sign-extended
362 case RISCV::SRAI:
363 return MI.getOperand(2).getImm() >= 32;
364 case RISCV::SRLI:
365 return MI.getOperand(2).getImm() > 32;
366 // The LI pattern ADDI rd, X0, imm is sign extended.
367 case RISCV::ADDI:
368 return MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0;
369 // An ANDI with an 11 bit immediate will zero bits 63:11.
370 case RISCV::ANDI:
371 return isUInt<11>(MI.getOperand(2).getImm());
372 // An ORI with an >11 bit immediate (negative 12-bit) will set bits 63:11.
373 case RISCV::ORI:
374 return !isUInt<11>(MI.getOperand(2).getImm());
375 // A bseti with X0 is sign extended if the immediate is less than 31.
376 case RISCV::BSETI:
377 return MI.getOperand(2).getImm() < 31 &&
378 MI.getOperand(1).getReg() == RISCV::X0;
379 // Copying from X0 produces zero.
380 case RISCV::COPY:
381 return MI.getOperand(1).getReg() == RISCV::X0;
382 // Ignore the scratch register destination.
383 case RISCV::PseudoAtomicLoadNand32:
384 return OpNo == 0;
385 case RISCV::PseudoVMV_X_S: {
386 // vmv.x.s has at least 33 sign bits if log2(sew) <= 5.
387 int64_t Log2SEW = MI.getOperand(2).getImm();
388 assert(Log2SEW >= 3 && Log2SEW <= 6 && "Unexpected Log2SEW");
389 return Log2SEW <= 5;
390 }
391 }
392
393 return false;
394 }
395
isSignExtendedW(Register SrcReg,const RISCVSubtarget & ST,const MachineRegisterInfo & MRI,SmallPtrSetImpl<MachineInstr * > & FixableDef)396 static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST,
397 const MachineRegisterInfo &MRI,
398 SmallPtrSetImpl<MachineInstr *> &FixableDef) {
399 SmallSet<Register, 4> Visited;
400 SmallVector<Register, 4> Worklist;
401
402 auto AddRegToWorkList = [&](Register SrcReg) {
403 if (!SrcReg.isVirtual())
404 return false;
405 Worklist.push_back(SrcReg);
406 return true;
407 };
408
409 if (!AddRegToWorkList(SrcReg))
410 return false;
411
412 while (!Worklist.empty()) {
413 Register Reg = Worklist.pop_back_val();
414
415 // If we already visited this register, we don't need to check it again.
416 if (!Visited.insert(Reg).second)
417 continue;
418
419 MachineInstr *MI = MRI.getVRegDef(Reg);
420 if (!MI)
421 continue;
422
423 int OpNo = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
424 assert(OpNo != -1 && "Couldn't find register");
425
426 // If this is a sign extending operation we don't need to look any further.
427 if (isSignExtendingOpW(*MI, MRI, OpNo))
428 continue;
429
430 // Is this an instruction that propagates sign extend?
431 switch (MI->getOpcode()) {
432 default:
433 // Unknown opcode, give up.
434 return false;
435 case RISCV::COPY: {
436 const MachineFunction *MF = MI->getMF();
437 const RISCVMachineFunctionInfo *RVFI =
438 MF->getInfo<RISCVMachineFunctionInfo>();
439
440 // If this is the entry block and the register is livein, see if we know
441 // it is sign extended.
442 if (MI->getParent() == &MF->front()) {
443 Register VReg = MI->getOperand(0).getReg();
444 if (MF->getRegInfo().isLiveIn(VReg) && RVFI->isSExt32Register(VReg))
445 continue;
446 }
447
448 Register CopySrcReg = MI->getOperand(1).getReg();
449 if (CopySrcReg == RISCV::X10) {
450 // For a method return value, we check the ZExt/SExt flags in attribute.
451 // We assume the following code sequence for method call.
452 // PseudoCALL @bar, ...
453 // ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2
454 // %0:gpr = COPY $x10
455 //
456 // We use the PseudoCall to look up the IR function being called to find
457 // its return attributes.
458 const MachineBasicBlock *MBB = MI->getParent();
459 auto II = MI->getIterator();
460 if (II == MBB->instr_begin() ||
461 (--II)->getOpcode() != RISCV::ADJCALLSTACKUP)
462 return false;
463
464 const MachineInstr &CallMI = *(--II);
465 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
466 return false;
467
468 auto *CalleeFn =
469 dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
470 if (!CalleeFn)
471 return false;
472
473 auto *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
474 if (!IntTy)
475 return false;
476
477 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
478 unsigned BitWidth = IntTy->getBitWidth();
479 if ((BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) ||
480 (BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt)))
481 continue;
482 }
483
484 if (!AddRegToWorkList(CopySrcReg))
485 return false;
486
487 break;
488 }
489
490 // For these, we just need to check if the 1st operand is sign extended.
491 case RISCV::BCLRI:
492 case RISCV::BINVI:
493 case RISCV::BSETI:
494 if (MI->getOperand(2).getImm() >= 31)
495 return false;
496 [[fallthrough]];
497 case RISCV::REM:
498 case RISCV::ANDI:
499 case RISCV::ORI:
500 case RISCV::XORI:
501 // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
502 // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
503 // Logical operations use a sign extended 12-bit immediate.
504 if (!AddRegToWorkList(MI->getOperand(1).getReg()))
505 return false;
506
507 break;
508 case RISCV::PseudoCCADDW:
509 case RISCV::PseudoCCADDIW:
510 case RISCV::PseudoCCSUBW:
511 case RISCV::PseudoCCSLLW:
512 case RISCV::PseudoCCSRLW:
513 case RISCV::PseudoCCSRAW:
514 case RISCV::PseudoCCSLLIW:
515 case RISCV::PseudoCCSRLIW:
516 case RISCV::PseudoCCSRAIW:
517 // Returns operand 4 or an ADDW/SUBW/etc. of operands 5 and 6. We only
518 // need to check if operand 4 is sign extended.
519 if (!AddRegToWorkList(MI->getOperand(4).getReg()))
520 return false;
521 break;
522 case RISCV::REMU:
523 case RISCV::AND:
524 case RISCV::OR:
525 case RISCV::XOR:
526 case RISCV::ANDN:
527 case RISCV::ORN:
528 case RISCV::XNOR:
529 case RISCV::MAX:
530 case RISCV::MAXU:
531 case RISCV::MIN:
532 case RISCV::MINU:
533 case RISCV::PseudoCCMOVGPR:
534 case RISCV::PseudoCCAND:
535 case RISCV::PseudoCCOR:
536 case RISCV::PseudoCCXOR:
537 case RISCV::PHI: {
538 // If all incoming values are sign-extended, the output of AND, OR, XOR,
539 // MIN, MAX, or PHI is also sign-extended.
540
541 // The input registers for PHI are operand 1, 3, ...
542 // The input registers for PseudoCCMOVGPR are 4 and 5.
543 // The input registers for PseudoCCAND/OR/XOR are 4, 5, and 6.
544 // The input registers for others are operand 1 and 2.
545 unsigned B = 1, E = 3, D = 1;
546 switch (MI->getOpcode()) {
547 case RISCV::PHI:
548 E = MI->getNumOperands();
549 D = 2;
550 break;
551 case RISCV::PseudoCCMOVGPR:
552 B = 4;
553 E = 6;
554 break;
555 case RISCV::PseudoCCAND:
556 case RISCV::PseudoCCOR:
557 case RISCV::PseudoCCXOR:
558 B = 4;
559 E = 7;
560 break;
561 }
562
563 for (unsigned I = B; I != E; I += D) {
564 if (!MI->getOperand(I).isReg())
565 return false;
566
567 if (!AddRegToWorkList(MI->getOperand(I).getReg()))
568 return false;
569 }
570
571 break;
572 }
573
574 case RISCV::CZERO_EQZ:
575 case RISCV::CZERO_NEZ:
576 case RISCV::VT_MASKC:
577 case RISCV::VT_MASKCN:
578 // Instructions return zero or operand 1. Result is sign extended if
579 // operand 1 is sign extended.
580 if (!AddRegToWorkList(MI->getOperand(1).getReg()))
581 return false;
582 break;
583
584 // With these opcode, we can "fix" them with the W-version
585 // if we know all users of the result only rely on bits 31:0
586 case RISCV::SLLI:
587 // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits
588 if (MI->getOperand(2).getImm() >= 32)
589 return false;
590 [[fallthrough]];
591 case RISCV::ADDI:
592 case RISCV::ADD:
593 case RISCV::LD:
594 case RISCV::LWU:
595 case RISCV::MUL:
596 case RISCV::SUB:
597 if (hasAllWUsers(*MI, ST, MRI)) {
598 FixableDef.insert(MI);
599 break;
600 }
601 return false;
602 }
603 }
604
605 // If we get here, then every node we visited produces a sign extended value
606 // or propagated sign extended values. So the result must be sign extended.
607 return true;
608 }
609
getWOp(unsigned Opcode)610 static unsigned getWOp(unsigned Opcode) {
611 switch (Opcode) {
612 case RISCV::ADDI:
613 return RISCV::ADDIW;
614 case RISCV::ADD:
615 return RISCV::ADDW;
616 case RISCV::LD:
617 case RISCV::LWU:
618 return RISCV::LW;
619 case RISCV::MUL:
620 return RISCV::MULW;
621 case RISCV::SLLI:
622 return RISCV::SLLIW;
623 case RISCV::SUB:
624 return RISCV::SUBW;
625 default:
626 llvm_unreachable("Unexpected opcode for replacement with W variant");
627 }
628 }
629
removeSExtWInstrs(MachineFunction & MF,const RISCVInstrInfo & TII,const RISCVSubtarget & ST,MachineRegisterInfo & MRI)630 bool RISCVOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
631 const RISCVInstrInfo &TII,
632 const RISCVSubtarget &ST,
633 MachineRegisterInfo &MRI) {
634 if (DisableSExtWRemoval)
635 return false;
636
637 bool MadeChange = false;
638 for (MachineBasicBlock &MBB : MF) {
639 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
640 // We're looking for the sext.w pattern ADDIW rd, rs1, 0.
641 if (!RISCV::isSEXT_W(MI))
642 continue;
643
644 Register SrcReg = MI.getOperand(1).getReg();
645
646 SmallPtrSet<MachineInstr *, 4> FixableDefs;
647
648 // If all users only use the lower bits, this sext.w is redundant.
649 // Or if all definitions reaching MI sign-extend their output,
650 // then sext.w is redundant.
651 if (!hasAllWUsers(MI, ST, MRI) &&
652 !isSignExtendedW(SrcReg, ST, MRI, FixableDefs))
653 continue;
654
655 Register DstReg = MI.getOperand(0).getReg();
656 if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))
657 continue;
658
659 // Convert Fixable instructions to their W versions.
660 for (MachineInstr *Fixable : FixableDefs) {
661 LLVM_DEBUG(dbgs() << "Replacing " << *Fixable);
662 Fixable->setDesc(TII.get(getWOp(Fixable->getOpcode())));
663 Fixable->clearFlag(MachineInstr::MIFlag::NoSWrap);
664 Fixable->clearFlag(MachineInstr::MIFlag::NoUWrap);
665 Fixable->clearFlag(MachineInstr::MIFlag::IsExact);
666 LLVM_DEBUG(dbgs() << " with " << *Fixable);
667 ++NumTransformedToWInstrs;
668 }
669
670 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
671 MRI.replaceRegWith(DstReg, SrcReg);
672 MRI.clearKillFlags(SrcReg);
673 MI.eraseFromParent();
674 ++NumRemovedSExtW;
675 MadeChange = true;
676 }
677 }
678
679 return MadeChange;
680 }
681
stripWSuffixes(MachineFunction & MF,const RISCVInstrInfo & TII,const RISCVSubtarget & ST,MachineRegisterInfo & MRI)682 bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
683 const RISCVInstrInfo &TII,
684 const RISCVSubtarget &ST,
685 MachineRegisterInfo &MRI) {
686 bool MadeChange = false;
687 for (MachineBasicBlock &MBB : MF) {
688 for (MachineInstr &MI : MBB) {
689 unsigned Opc;
690 switch (MI.getOpcode()) {
691 default:
692 continue;
693 case RISCV::ADDW: Opc = RISCV::ADD; break;
694 case RISCV::ADDIW: Opc = RISCV::ADDI; break;
695 case RISCV::MULW: Opc = RISCV::MUL; break;
696 case RISCV::SLLIW: Opc = RISCV::SLLI; break;
697 }
698
699 if (hasAllWUsers(MI, ST, MRI)) {
700 MI.setDesc(TII.get(Opc));
701 MadeChange = true;
702 }
703 }
704 }
705
706 return MadeChange;
707 }
708
appendWSuffixes(MachineFunction & MF,const RISCVInstrInfo & TII,const RISCVSubtarget & ST,MachineRegisterInfo & MRI)709 bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF,
710 const RISCVInstrInfo &TII,
711 const RISCVSubtarget &ST,
712 MachineRegisterInfo &MRI) {
713 bool MadeChange = false;
714 for (MachineBasicBlock &MBB : MF) {
715 for (MachineInstr &MI : MBB) {
716 unsigned WOpc;
717 // TODO: Add more?
718 switch (MI.getOpcode()) {
719 default:
720 continue;
721 case RISCV::ADD:
722 WOpc = RISCV::ADDW;
723 break;
724 case RISCV::ADDI:
725 WOpc = RISCV::ADDIW;
726 break;
727 case RISCV::SUB:
728 WOpc = RISCV::SUBW;
729 break;
730 case RISCV::MUL:
731 WOpc = RISCV::MULW;
732 break;
733 case RISCV::SLLI:
734 // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits
735 if (MI.getOperand(2).getImm() >= 32)
736 continue;
737 WOpc = RISCV::SLLIW;
738 break;
739 case RISCV::LD:
740 case RISCV::LWU:
741 WOpc = RISCV::LW;
742 break;
743 }
744
745 if (hasAllWUsers(MI, ST, MRI)) {
746 LLVM_DEBUG(dbgs() << "Replacing " << MI);
747 MI.setDesc(TII.get(WOpc));
748 MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
749 MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
750 MI.clearFlag(MachineInstr::MIFlag::IsExact);
751 LLVM_DEBUG(dbgs() << " with " << MI);
752 ++NumTransformedToWInstrs;
753 MadeChange = true;
754 }
755 }
756 }
757
758 return MadeChange;
759 }
760
runOnMachineFunction(MachineFunction & MF)761 bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
762 if (skipFunction(MF.getFunction()))
763 return false;
764
765 MachineRegisterInfo &MRI = MF.getRegInfo();
766 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
767 const RISCVInstrInfo &TII = *ST.getInstrInfo();
768
769 if (!ST.is64Bit())
770 return false;
771
772 bool MadeChange = false;
773 MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
774
775 if (!(DisableStripWSuffix || ST.preferWInst()))
776 MadeChange |= stripWSuffixes(MF, TII, ST, MRI);
777
778 if (ST.preferWInst())
779 MadeChange |= appendWSuffixes(MF, TII, ST, MRI);
780
781 return MadeChange;
782 }
783