1 //===- LoongArchOptWInstrs.cpp - MI W instruction optimizations ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===---------------------------------------------------------------------===// 8 // 9 // This pass does some optimizations for *W instructions at the MI level. 10 // 11 // First it removes unneeded sext(addi.w rd, rs, 0) instructions. Either 12 // because the sign extended bits aren't consumed or because the input was 13 // already sign extended by an earlier instruction. 14 // 15 // Then: 16 // 1. Unless explicit disabled or the target prefers instructions with W suffix, 17 // it removes the -w suffix from opw instructions whenever all users are 18 // dependent only on the lower word of the result of the instruction. 19 // The cases handled are: 20 // * addi.w because it helps reduce test differences between LA32 and LA64 21 // w/o being a pessimization. 22 // 23 // 2. Or if explicit enabled or the target prefers instructions with W suffix, 24 // it adds the W suffix to the instruction whenever all users are dependent 25 // only on the lower word of the result of the instruction. 26 // The cases handled are: 27 // * add.d/addi.d/sub.d/mul.d. 28 // * slli.d with imm < 32. 29 // * ld.d/ld.wu. 30 //===---------------------------------------------------------------------===// 31 32 #include "LoongArch.h" 33 #include "LoongArchMachineFunctionInfo.h" 34 #include "LoongArchSubtarget.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/Statistic.h" 37 #include "llvm/CodeGen/MachineFunctionPass.h" 38 #include "llvm/CodeGen/TargetInstrInfo.h" 39 40 using namespace llvm; 41 42 #define DEBUG_TYPE "loongarch-opt-w-instrs" 43 #define LOONGARCH_OPT_W_INSTRS_NAME "LoongArch Optimize W Instructions" 44 45 STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions"); 46 STATISTIC(NumTransformedToWInstrs, 47 "Number of instructions transformed to W-ops"); 48 49 static cl::opt<bool> 50 DisableSExtWRemoval("loongarch-disable-sextw-removal", 51 cl::desc("Disable removal of sign-extend insn"), 52 cl::init(false), cl::Hidden); 53 static cl::opt<bool> 54 DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix", 55 cl::desc("Disable convert to D suffix"), 56 cl::init(false), cl::Hidden); 57 58 namespace { 59 60 class LoongArchOptWInstrs : public MachineFunctionPass { 61 public: 62 static char ID; 63 64 LoongArchOptWInstrs() : MachineFunctionPass(ID) {} 65 66 bool runOnMachineFunction(MachineFunction &MF) override; 67 bool removeSExtWInstrs(MachineFunction &MF, const LoongArchInstrInfo &TII, 68 const LoongArchSubtarget &ST, 69 MachineRegisterInfo &MRI); 70 bool convertToDSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII, 71 const LoongArchSubtarget &ST, 72 MachineRegisterInfo &MRI); 73 bool convertToWSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII, 74 const LoongArchSubtarget &ST, 75 MachineRegisterInfo &MRI); 76 77 void getAnalysisUsage(AnalysisUsage &AU) const override { 78 AU.setPreservesCFG(); 79 MachineFunctionPass::getAnalysisUsage(AU); 80 } 81 82 StringRef getPassName() const override { return LOONGARCH_OPT_W_INSTRS_NAME; } 83 }; 84 85 } // end anonymous namespace 86 87 char LoongArchOptWInstrs::ID = 0; 88 INITIALIZE_PASS(LoongArchOptWInstrs, DEBUG_TYPE, LOONGARCH_OPT_W_INSTRS_NAME, 89 false, false) 90 91 FunctionPass *llvm::createLoongArchOptWInstrsPass() { 92 return new LoongArchOptWInstrs(); 93 } 94 95 // Checks if all users only demand the lower \p OrigBits of the original 96 // instruction's result. 97 // TODO: handle multiple interdependent transformations 98 static bool hasAllNBitUsers(const MachineInstr &OrigMI, 99 const LoongArchSubtarget &ST, 100 const MachineRegisterInfo &MRI, unsigned OrigBits) { 101 102 SmallSet<std::pair<const MachineInstr *, unsigned>, 4> Visited; 103 SmallVector<std::pair<const MachineInstr *, unsigned>, 4> Worklist; 104 105 Worklist.push_back(std::make_pair(&OrigMI, OrigBits)); 106 107 while (!Worklist.empty()) { 108 auto P = Worklist.pop_back_val(); 109 const MachineInstr *MI = P.first; 110 unsigned Bits = P.second; 111 112 if (!Visited.insert(P).second) 113 continue; 114 115 // Only handle instructions with one def. 116 if (MI->getNumExplicitDefs() != 1) 117 return false; 118 119 Register DestReg = MI->getOperand(0).getReg(); 120 if (!DestReg.isVirtual()) 121 return false; 122 123 for (auto &UserOp : MRI.use_nodbg_operands(DestReg)) { 124 const MachineInstr *UserMI = UserOp.getParent(); 125 unsigned OpIdx = UserOp.getOperandNo(); 126 127 switch (UserMI->getOpcode()) { 128 default: 129 // TODO: Add vector 130 return false; 131 132 case LoongArch::ADD_W: 133 case LoongArch::ADDI_W: 134 case LoongArch::SUB_W: 135 case LoongArch::ALSL_W: 136 case LoongArch::ALSL_WU: 137 case LoongArch::MUL_W: 138 case LoongArch::MULH_W: 139 case LoongArch::MULH_WU: 140 case LoongArch::MULW_D_W: 141 case LoongArch::MULW_D_WU: 142 // TODO: {DIV,MOD}.{W,WU} consumes the upper 32 bits before LA664+. 143 // case LoongArch::DIV_W: 144 // case LoongArch::DIV_WU: 145 // case LoongArch::MOD_W: 146 // case LoongArch::MOD_WU: 147 case LoongArch::SLL_W: 148 case LoongArch::SLLI_W: 149 case LoongArch::SRL_W: 150 case LoongArch::SRLI_W: 151 case LoongArch::SRA_W: 152 case LoongArch::SRAI_W: 153 case LoongArch::ROTR_W: 154 case LoongArch::ROTRI_W: 155 case LoongArch::CLO_W: 156 case LoongArch::CLZ_W: 157 case LoongArch::CTO_W: 158 case LoongArch::CTZ_W: 159 case LoongArch::BYTEPICK_W: 160 case LoongArch::REVB_2H: 161 case LoongArch::BITREV_4B: 162 case LoongArch::BITREV_W: 163 case LoongArch::BSTRINS_W: 164 case LoongArch::BSTRPICK_W: 165 case LoongArch::CRC_W_W_W: 166 case LoongArch::CRCC_W_W_W: 167 case LoongArch::MOVGR2FCSR: 168 case LoongArch::MOVGR2FRH_W: 169 case LoongArch::MOVGR2FR_W_64: 170 if (Bits >= 32) 171 break; 172 return false; 173 case LoongArch::MOVGR2CF: 174 if (Bits >= 1) 175 break; 176 return false; 177 case LoongArch::EXT_W_B: 178 if (Bits >= 8) 179 break; 180 return false; 181 case LoongArch::EXT_W_H: 182 if (Bits >= 16) 183 break; 184 return false; 185 186 case LoongArch::SRLI_D: { 187 // If we are shifting right by less than Bits, and users don't demand 188 // any bits that were shifted into [Bits-1:0], then we can consider this 189 // as an N-Bit user. 190 unsigned ShAmt = UserMI->getOperand(2).getImm(); 191 if (Bits > ShAmt) { 192 Worklist.push_back(std::make_pair(UserMI, Bits - ShAmt)); 193 break; 194 } 195 return false; 196 } 197 198 // these overwrite higher input bits, otherwise the lower word of output 199 // depends only on the lower word of input. So check their uses read W. 200 case LoongArch::SLLI_D: 201 if (Bits >= (ST.getGRLen() - UserMI->getOperand(2).getImm())) 202 break; 203 Worklist.push_back(std::make_pair(UserMI, Bits)); 204 break; 205 case LoongArch::ANDI: { 206 uint64_t Imm = UserMI->getOperand(2).getImm(); 207 if (Bits >= (unsigned)llvm::bit_width(Imm)) 208 break; 209 Worklist.push_back(std::make_pair(UserMI, Bits)); 210 break; 211 } 212 case LoongArch::ORI: { 213 uint64_t Imm = UserMI->getOperand(2).getImm(); 214 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm)) 215 break; 216 Worklist.push_back(std::make_pair(UserMI, Bits)); 217 break; 218 } 219 220 case LoongArch::SLL_D: 221 // Operand 2 is the shift amount which uses log2(grlen) bits. 222 if (OpIdx == 2) { 223 if (Bits >= Log2_32(ST.getGRLen())) 224 break; 225 return false; 226 } 227 Worklist.push_back(std::make_pair(UserMI, Bits)); 228 break; 229 230 case LoongArch::SRA_D: 231 case LoongArch::SRL_D: 232 case LoongArch::ROTR_D: 233 // Operand 2 is the shift amount which uses 6 bits. 234 if (OpIdx == 2 && Bits >= Log2_32(ST.getGRLen())) 235 break; 236 return false; 237 238 case LoongArch::ST_B: 239 case LoongArch::STX_B: 240 case LoongArch::STGT_B: 241 case LoongArch::STLE_B: 242 case LoongArch::IOCSRWR_B: 243 // The first argument is the value to store. 244 if (OpIdx == 0 && Bits >= 8) 245 break; 246 return false; 247 case LoongArch::ST_H: 248 case LoongArch::STX_H: 249 case LoongArch::STGT_H: 250 case LoongArch::STLE_H: 251 case LoongArch::IOCSRWR_H: 252 // The first argument is the value to store. 253 if (OpIdx == 0 && Bits >= 16) 254 break; 255 return false; 256 case LoongArch::ST_W: 257 case LoongArch::STX_W: 258 case LoongArch::SCREL_W: 259 case LoongArch::STPTR_W: 260 case LoongArch::STGT_W: 261 case LoongArch::STLE_W: 262 case LoongArch::IOCSRWR_W: 263 // The first argument is the value to store. 264 if (OpIdx == 0 && Bits >= 32) 265 break; 266 return false; 267 268 case LoongArch::CRC_W_B_W: 269 case LoongArch::CRCC_W_B_W: 270 if ((OpIdx == 1 && Bits >= 8) || (OpIdx == 2 && Bits >= 32)) 271 break; 272 return false; 273 case LoongArch::CRC_W_H_W: 274 case LoongArch::CRCC_W_H_W: 275 if ((OpIdx == 1 && Bits >= 16) || (OpIdx == 2 && Bits >= 32)) 276 break; 277 return false; 278 case LoongArch::CRC_W_D_W: 279 case LoongArch::CRCC_W_D_W: 280 if (OpIdx == 2 && Bits >= 32) 281 break; 282 return false; 283 284 // For these, lower word of output in these operations, depends only on 285 // the lower word of input. So, we check all uses only read lower word. 286 case LoongArch::COPY: 287 case LoongArch::PHI: 288 case LoongArch::ADD_D: 289 case LoongArch::ADDI_D: 290 case LoongArch::SUB_D: 291 case LoongArch::MUL_D: 292 case LoongArch::AND: 293 case LoongArch::OR: 294 case LoongArch::NOR: 295 case LoongArch::XOR: 296 case LoongArch::XORI: 297 case LoongArch::ANDN: 298 case LoongArch::ORN: 299 Worklist.push_back(std::make_pair(UserMI, Bits)); 300 break; 301 302 case LoongArch::MASKNEZ: 303 case LoongArch::MASKEQZ: 304 if (OpIdx != 1) 305 return false; 306 Worklist.push_back(std::make_pair(UserMI, Bits)); 307 break; 308 } 309 } 310 } 311 312 return true; 313 } 314 315 static bool hasAllWUsers(const MachineInstr &OrigMI, 316 const LoongArchSubtarget &ST, 317 const MachineRegisterInfo &MRI) { 318 return hasAllNBitUsers(OrigMI, ST, MRI, 32); 319 } 320 321 // This function returns true if the machine instruction always outputs a value 322 // where bits 63:32 match bit 31. 323 static bool isSignExtendingOpW(const MachineInstr &MI, 324 const MachineRegisterInfo &MRI, unsigned OpNo) { 325 switch (MI.getOpcode()) { 326 // Normal cases 327 case LoongArch::ADD_W: 328 case LoongArch::SUB_W: 329 case LoongArch::ADDI_W: 330 case LoongArch::ALSL_W: 331 case LoongArch::LU12I_W: 332 case LoongArch::SLT: 333 case LoongArch::SLTU: 334 case LoongArch::SLTI: 335 case LoongArch::SLTUI: 336 case LoongArch::ANDI: 337 case LoongArch::MUL_W: 338 case LoongArch::MULH_W: 339 case LoongArch::MULH_WU: 340 case LoongArch::DIV_W: 341 case LoongArch::MOD_W: 342 case LoongArch::DIV_WU: 343 case LoongArch::MOD_WU: 344 case LoongArch::SLL_W: 345 case LoongArch::SRL_W: 346 case LoongArch::SRA_W: 347 case LoongArch::ROTR_W: 348 case LoongArch::SLLI_W: 349 case LoongArch::SRLI_W: 350 case LoongArch::SRAI_W: 351 case LoongArch::ROTRI_W: 352 case LoongArch::EXT_W_B: 353 case LoongArch::EXT_W_H: 354 case LoongArch::CLO_W: 355 case LoongArch::CLZ_W: 356 case LoongArch::CTO_W: 357 case LoongArch::CTZ_W: 358 case LoongArch::BYTEPICK_W: 359 case LoongArch::REVB_2H: 360 case LoongArch::BITREV_4B: 361 case LoongArch::BITREV_W: 362 case LoongArch::BSTRINS_W: 363 case LoongArch::BSTRPICK_W: 364 case LoongArch::LD_B: 365 case LoongArch::LD_H: 366 case LoongArch::LD_W: 367 case LoongArch::LD_BU: 368 case LoongArch::LD_HU: 369 case LoongArch::LL_W: 370 case LoongArch::LLACQ_W: 371 case LoongArch::RDTIMEL_W: 372 case LoongArch::RDTIMEH_W: 373 case LoongArch::CPUCFG: 374 case LoongArch::LDX_B: 375 case LoongArch::LDX_H: 376 case LoongArch::LDX_W: 377 case LoongArch::LDX_BU: 378 case LoongArch::LDX_HU: 379 case LoongArch::LDPTR_W: 380 case LoongArch::LDGT_B: 381 case LoongArch::LDGT_H: 382 case LoongArch::LDGT_W: 383 case LoongArch::LDLE_B: 384 case LoongArch::LDLE_H: 385 case LoongArch::LDLE_W: 386 case LoongArch::AMSWAP_B: 387 case LoongArch::AMSWAP_H: 388 case LoongArch::AMSWAP_W: 389 case LoongArch::AMADD_B: 390 case LoongArch::AMADD_H: 391 case LoongArch::AMADD_W: 392 case LoongArch::AMAND_W: 393 case LoongArch::AMOR_W: 394 case LoongArch::AMXOR_W: 395 case LoongArch::AMMAX_W: 396 case LoongArch::AMMIN_W: 397 case LoongArch::AMMAX_WU: 398 case LoongArch::AMMIN_WU: 399 case LoongArch::AMSWAP__DB_B: 400 case LoongArch::AMSWAP__DB_H: 401 case LoongArch::AMSWAP__DB_W: 402 case LoongArch::AMADD__DB_B: 403 case LoongArch::AMADD__DB_H: 404 case LoongArch::AMADD__DB_W: 405 case LoongArch::AMAND__DB_W: 406 case LoongArch::AMOR__DB_W: 407 case LoongArch::AMXOR__DB_W: 408 case LoongArch::AMMAX__DB_W: 409 case LoongArch::AMMIN__DB_W: 410 case LoongArch::AMMAX__DB_WU: 411 case LoongArch::AMMIN__DB_WU: 412 case LoongArch::AMCAS_B: 413 case LoongArch::AMCAS_H: 414 case LoongArch::AMCAS_W: 415 case LoongArch::AMCAS__DB_B: 416 case LoongArch::AMCAS__DB_H: 417 case LoongArch::AMCAS__DB_W: 418 case LoongArch::CRC_W_B_W: 419 case LoongArch::CRC_W_H_W: 420 case LoongArch::CRC_W_W_W: 421 case LoongArch::CRC_W_D_W: 422 case LoongArch::CRCC_W_B_W: 423 case LoongArch::CRCC_W_H_W: 424 case LoongArch::CRCC_W_W_W: 425 case LoongArch::CRCC_W_D_W: 426 case LoongArch::IOCSRRD_B: 427 case LoongArch::IOCSRRD_H: 428 case LoongArch::IOCSRRD_W: 429 case LoongArch::MOVFR2GR_S: 430 case LoongArch::MOVFCSR2GR: 431 case LoongArch::MOVCF2GR: 432 case LoongArch::MOVFRH2GR_S: 433 case LoongArch::MOVFR2GR_S_64: 434 // TODO: Add vector 435 return true; 436 // Special cases that require checking operands. 437 // shifting right sufficiently makes the value 32-bit sign-extended 438 case LoongArch::SRAI_D: 439 return MI.getOperand(2).getImm() >= 32; 440 case LoongArch::SRLI_D: 441 return MI.getOperand(2).getImm() > 32; 442 // The LI pattern ADDI rd, R0, imm and ORI rd, R0, imm are sign extended. 443 case LoongArch::ADDI_D: 444 case LoongArch::ORI: 445 return MI.getOperand(1).isReg() && 446 MI.getOperand(1).getReg() == LoongArch::R0; 447 // A bits extract is sign extended if the msb is less than 31. 448 case LoongArch::BSTRPICK_D: 449 return MI.getOperand(2).getImm() < 31; 450 // Copying from R0 produces zero. 451 case LoongArch::COPY: 452 return MI.getOperand(1).getReg() == LoongArch::R0; 453 // Ignore the scratch register destination. 454 case LoongArch::PseudoMaskedAtomicSwap32: 455 case LoongArch::PseudoAtomicSwap32: 456 case LoongArch::PseudoMaskedAtomicLoadAdd32: 457 case LoongArch::PseudoMaskedAtomicLoadSub32: 458 case LoongArch::PseudoAtomicLoadNand32: 459 case LoongArch::PseudoMaskedAtomicLoadNand32: 460 case LoongArch::PseudoAtomicLoadAdd32: 461 case LoongArch::PseudoAtomicLoadSub32: 462 case LoongArch::PseudoAtomicLoadAnd32: 463 case LoongArch::PseudoAtomicLoadOr32: 464 case LoongArch::PseudoAtomicLoadXor32: 465 case LoongArch::PseudoMaskedAtomicLoadUMax32: 466 case LoongArch::PseudoMaskedAtomicLoadUMin32: 467 case LoongArch::PseudoCmpXchg32: 468 case LoongArch::PseudoMaskedCmpXchg32: 469 case LoongArch::PseudoMaskedAtomicLoadMax32: 470 case LoongArch::PseudoMaskedAtomicLoadMin32: 471 return OpNo == 0; 472 } 473 474 return false; 475 } 476 477 static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST, 478 const MachineRegisterInfo &MRI, 479 SmallPtrSetImpl<MachineInstr *> &FixableDef) { 480 SmallSet<Register, 4> Visited; 481 SmallVector<Register, 4> Worklist; 482 483 auto AddRegToWorkList = [&](Register SrcReg) { 484 if (!SrcReg.isVirtual()) 485 return false; 486 Worklist.push_back(SrcReg); 487 return true; 488 }; 489 490 if (!AddRegToWorkList(SrcReg)) 491 return false; 492 493 while (!Worklist.empty()) { 494 Register Reg = Worklist.pop_back_val(); 495 496 // If we already visited this register, we don't need to check it again. 497 if (!Visited.insert(Reg).second) 498 continue; 499 500 MachineInstr *MI = MRI.getVRegDef(Reg); 501 if (!MI) 502 continue; 503 504 int OpNo = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr); 505 assert(OpNo != -1 && "Couldn't find register"); 506 507 // If this is a sign extending operation we don't need to look any further. 508 if (isSignExtendingOpW(*MI, MRI, OpNo)) 509 continue; 510 511 // Is this an instruction that propagates sign extend? 512 switch (MI->getOpcode()) { 513 default: 514 // Unknown opcode, give up. 515 return false; 516 case LoongArch::COPY: { 517 const MachineFunction *MF = MI->getMF(); 518 const LoongArchMachineFunctionInfo *LAFI = 519 MF->getInfo<LoongArchMachineFunctionInfo>(); 520 521 // If this is the entry block and the register is livein, see if we know 522 // it is sign extended. 523 if (MI->getParent() == &MF->front()) { 524 Register VReg = MI->getOperand(0).getReg(); 525 if (MF->getRegInfo().isLiveIn(VReg) && LAFI->isSExt32Register(VReg)) 526 continue; 527 } 528 529 Register CopySrcReg = MI->getOperand(1).getReg(); 530 if (CopySrcReg == LoongArch::R4) { 531 // For a method return value, we check the ZExt/SExt flags in attribute. 532 // We assume the following code sequence for method call. 533 // PseudoCALL @bar, ... 534 // ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 535 // %0:gpr = COPY $r4 536 // 537 // We use the PseudoCall to look up the IR function being called to find 538 // its return attributes. 539 const MachineBasicBlock *MBB = MI->getParent(); 540 auto II = MI->getIterator(); 541 if (II == MBB->instr_begin() || 542 (--II)->getOpcode() != LoongArch::ADJCALLSTACKUP) 543 return false; 544 545 const MachineInstr &CallMI = *(--II); 546 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal()) 547 return false; 548 549 auto *CalleeFn = 550 dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal()); 551 if (!CalleeFn) 552 return false; 553 554 auto *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType()); 555 if (!IntTy) 556 return false; 557 558 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs(); 559 unsigned BitWidth = IntTy->getBitWidth(); 560 if ((BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) || 561 (BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt))) 562 continue; 563 } 564 565 if (!AddRegToWorkList(CopySrcReg)) 566 return false; 567 568 break; 569 } 570 571 // For these, we just need to check if the 1st operand is sign extended. 572 case LoongArch::MOD_D: 573 case LoongArch::ANDI: 574 case LoongArch::ORI: 575 case LoongArch::XORI: 576 // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R. 577 // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1 578 // Logical operations use a sign extended 12-bit immediate. 579 if (!AddRegToWorkList(MI->getOperand(1).getReg())) 580 return false; 581 582 break; 583 case LoongArch::MOD_DU: 584 case LoongArch::AND: 585 case LoongArch::OR: 586 case LoongArch::XOR: 587 case LoongArch::ANDN: 588 case LoongArch::ORN: 589 case LoongArch::PHI: { 590 // If all incoming values are sign-extended, the output of AND, OR, XOR, 591 // or PHI is also sign-extended. 592 593 // The input registers for PHI are operand 1, 3, ... 594 // The input registers for others are operand 1 and 2. 595 unsigned B = 1, E = 3, D = 1; 596 switch (MI->getOpcode()) { 597 case LoongArch::PHI: 598 E = MI->getNumOperands(); 599 D = 2; 600 break; 601 } 602 603 for (unsigned I = B; I != E; I += D) { 604 if (!MI->getOperand(I).isReg()) 605 return false; 606 607 if (!AddRegToWorkList(MI->getOperand(I).getReg())) 608 return false; 609 } 610 611 break; 612 } 613 614 case LoongArch::MASKEQZ: 615 case LoongArch::MASKNEZ: 616 // Instructions return zero or operand 1. Result is sign extended if 617 // operand 1 is sign extended. 618 if (!AddRegToWorkList(MI->getOperand(1).getReg())) 619 return false; 620 break; 621 622 // With these opcode, we can "fix" them with the W-version 623 // if we know all users of the result only rely on bits 31:0 624 case LoongArch::SLLI_D: 625 // SLLI_W reads the lowest 5 bits, while SLLI_D reads lowest 6 bits 626 if (MI->getOperand(2).getImm() >= 32) 627 return false; 628 [[fallthrough]]; 629 case LoongArch::ADDI_D: 630 case LoongArch::ADD_D: 631 case LoongArch::LD_D: 632 case LoongArch::LD_WU: 633 case LoongArch::MUL_D: 634 case LoongArch::SUB_D: 635 if (hasAllWUsers(*MI, ST, MRI)) { 636 FixableDef.insert(MI); 637 break; 638 } 639 return false; 640 // If all incoming values are sign-extended and all users only use 641 // the lower 32 bits, then convert them to W versions. 642 case LoongArch::DIV_D: { 643 if (!AddRegToWorkList(MI->getOperand(1).getReg())) 644 return false; 645 if (!AddRegToWorkList(MI->getOperand(2).getReg())) 646 return false; 647 if (hasAllWUsers(*MI, ST, MRI)) { 648 FixableDef.insert(MI); 649 break; 650 } 651 return false; 652 } 653 } 654 } 655 656 // If we get here, then every node we visited produces a sign extended value 657 // or propagated sign extended values. So the result must be sign extended. 658 return true; 659 } 660 661 static unsigned getWOp(unsigned Opcode) { 662 switch (Opcode) { 663 case LoongArch::ADDI_D: 664 return LoongArch::ADDI_W; 665 case LoongArch::ADD_D: 666 return LoongArch::ADD_W; 667 case LoongArch::DIV_D: 668 return LoongArch::DIV_W; 669 case LoongArch::LD_D: 670 case LoongArch::LD_WU: 671 return LoongArch::LD_W; 672 case LoongArch::MUL_D: 673 return LoongArch::MUL_W; 674 case LoongArch::SLLI_D: 675 return LoongArch::SLLI_W; 676 case LoongArch::SUB_D: 677 return LoongArch::SUB_W; 678 default: 679 llvm_unreachable("Unexpected opcode for replacement with W variant"); 680 } 681 } 682 683 bool LoongArchOptWInstrs::removeSExtWInstrs(MachineFunction &MF, 684 const LoongArchInstrInfo &TII, 685 const LoongArchSubtarget &ST, 686 MachineRegisterInfo &MRI) { 687 if (DisableSExtWRemoval) 688 return false; 689 690 bool MadeChange = false; 691 for (MachineBasicBlock &MBB : MF) { 692 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { 693 // We're looking for the sext.w pattern ADDI.W rd, rs, 0. 694 if (!LoongArch::isSEXT_W(MI)) 695 continue; 696 697 Register SrcReg = MI.getOperand(1).getReg(); 698 699 SmallPtrSet<MachineInstr *, 4> FixableDefs; 700 701 // If all users only use the lower bits, this sext.w is redundant. 702 // Or if all definitions reaching MI sign-extend their output, 703 // then sext.w is redundant. 704 if (!hasAllWUsers(MI, ST, MRI) && 705 !isSignExtendedW(SrcReg, ST, MRI, FixableDefs)) 706 continue; 707 708 Register DstReg = MI.getOperand(0).getReg(); 709 if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg))) 710 continue; 711 712 // Convert Fixable instructions to their W versions. 713 for (MachineInstr *Fixable : FixableDefs) { 714 LLVM_DEBUG(dbgs() << "Replacing " << *Fixable); 715 Fixable->setDesc(TII.get(getWOp(Fixable->getOpcode()))); 716 Fixable->clearFlag(MachineInstr::MIFlag::NoSWrap); 717 Fixable->clearFlag(MachineInstr::MIFlag::NoUWrap); 718 Fixable->clearFlag(MachineInstr::MIFlag::IsExact); 719 LLVM_DEBUG(dbgs() << " with " << *Fixable); 720 ++NumTransformedToWInstrs; 721 } 722 723 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n"); 724 MRI.replaceRegWith(DstReg, SrcReg); 725 MRI.clearKillFlags(SrcReg); 726 MI.eraseFromParent(); 727 ++NumRemovedSExtW; 728 MadeChange = true; 729 } 730 } 731 732 return MadeChange; 733 } 734 735 bool LoongArchOptWInstrs::convertToDSuffixes(MachineFunction &MF, 736 const LoongArchInstrInfo &TII, 737 const LoongArchSubtarget &ST, 738 MachineRegisterInfo &MRI) { 739 bool MadeChange = false; 740 for (MachineBasicBlock &MBB : MF) { 741 for (MachineInstr &MI : MBB) { 742 unsigned Opc; 743 switch (MI.getOpcode()) { 744 default: 745 continue; 746 case LoongArch::ADDI_W: 747 Opc = LoongArch::ADDI_D; 748 break; 749 } 750 751 if (hasAllWUsers(MI, ST, MRI)) { 752 MI.setDesc(TII.get(Opc)); 753 MadeChange = true; 754 } 755 } 756 } 757 758 return MadeChange; 759 } 760 761 bool LoongArchOptWInstrs::convertToWSuffixes(MachineFunction &MF, 762 const LoongArchInstrInfo &TII, 763 const LoongArchSubtarget &ST, 764 MachineRegisterInfo &MRI) { 765 bool MadeChange = false; 766 for (MachineBasicBlock &MBB : MF) { 767 for (MachineInstr &MI : MBB) { 768 unsigned WOpc; 769 // TODO: Add more? 770 switch (MI.getOpcode()) { 771 default: 772 continue; 773 case LoongArch::ADD_D: 774 WOpc = LoongArch::ADD_W; 775 break; 776 case LoongArch::ADDI_D: 777 WOpc = LoongArch::ADDI_W; 778 break; 779 case LoongArch::SUB_D: 780 WOpc = LoongArch::SUB_W; 781 break; 782 case LoongArch::MUL_D: 783 WOpc = LoongArch::MUL_W; 784 break; 785 case LoongArch::SLLI_D: 786 // SLLI.W reads the lowest 5 bits, while SLLI.D reads lowest 6 bits 787 if (MI.getOperand(2).getImm() >= 32) 788 continue; 789 WOpc = LoongArch::SLLI_W; 790 break; 791 case LoongArch::LD_D: 792 case LoongArch::LD_WU: 793 WOpc = LoongArch::LD_W; 794 break; 795 } 796 797 if (hasAllWUsers(MI, ST, MRI)) { 798 LLVM_DEBUG(dbgs() << "Replacing " << MI); 799 MI.setDesc(TII.get(WOpc)); 800 MI.clearFlag(MachineInstr::MIFlag::NoSWrap); 801 MI.clearFlag(MachineInstr::MIFlag::NoUWrap); 802 MI.clearFlag(MachineInstr::MIFlag::IsExact); 803 LLVM_DEBUG(dbgs() << " with " << MI); 804 ++NumTransformedToWInstrs; 805 MadeChange = true; 806 } 807 } 808 } 809 810 return MadeChange; 811 } 812 813 bool LoongArchOptWInstrs::runOnMachineFunction(MachineFunction &MF) { 814 if (skipFunction(MF.getFunction())) 815 return false; 816 817 MachineRegisterInfo &MRI = MF.getRegInfo(); 818 const LoongArchSubtarget &ST = MF.getSubtarget<LoongArchSubtarget>(); 819 const LoongArchInstrInfo &TII = *ST.getInstrInfo(); 820 821 if (!ST.is64Bit()) 822 return false; 823 824 bool MadeChange = false; 825 MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI); 826 827 if (!(DisableCvtToDSuffix || ST.preferWInst())) 828 MadeChange |= convertToDSuffixes(MF, TII, ST, MRI); 829 830 if (ST.preferWInst()) 831 MadeChange |= convertToWSuffixes(MF, TII, ST, MRI); 832 833 return MadeChange; 834 } 835