1 //===- HexagonBitSimplify.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "BitTracker.h" 10 #include "Hexagon.h" 11 #include "HexagonBitTracker.h" 12 #include "HexagonInstrInfo.h" 13 #include "HexagonRegisterInfo.h" 14 #include "HexagonSubtarget.h" 15 #include "llvm/ADT/BitVector.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/CodeGen/MachineBasicBlock.h" 19 #include "llvm/CodeGen/MachineDominators.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstr.h" 23 #include "llvm/CodeGen/MachineOperand.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/TargetRegisterInfo.h" 26 #include "llvm/IR/DebugLoc.h" 27 #include "llvm/InitializePasses.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/Pass.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/raw_ostream.h" 32 33 #define DEBUG_TYPE "hexbit" 34 35 using namespace llvm; 36 37 static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden, 38 cl::init(true), cl::desc("Preserve subregisters in tied operands")); 39 static cl::opt<bool> GenExtract("hexbit-extract", cl::Hidden, 40 cl::init(true), cl::desc("Generate extract instructions")); 41 static cl::opt<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden, 42 cl::init(true), cl::desc("Generate bitsplit instructions")); 43 44 static cl::opt<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden, 45 cl::init(std::numeric_limits<unsigned>::max())); 46 static unsigned CountExtract = 0; 47 static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden, 48 cl::init(std::numeric_limits<unsigned>::max())); 49 static unsigned CountBitSplit = 0; 50 51 static cl::opt<unsigned> RegisterSetLimit("hexbit-registerset-limit", 52 cl::Hidden, cl::init(1000)); 53 54 namespace { 55 56 // Set of virtual registers, based on BitVector. 57 struct RegisterSet { 58 RegisterSet() = default; 59 explicit RegisterSet(unsigned s, bool t = false) : Bits(s, t) {} 60 RegisterSet(const RegisterSet &RS) = default; 61 62 void clear() { 63 Bits.clear(); 64 LRU.clear(); 65 } 66 67 unsigned count() const { 68 return Bits.count(); 69 } 70 71 unsigned find_first() const { 72 int First = Bits.find_first(); 73 if (First < 0) 74 return 0; 75 return x2v(First); 76 } 77 78 unsigned find_next(unsigned Prev) const { 79 int Next = Bits.find_next(v2x(Prev)); 80 if (Next < 0) 81 return 0; 82 return x2v(Next); 83 } 84 85 RegisterSet &insert(unsigned R) { 86 unsigned Idx = v2x(R); 87 ensure(Idx); 88 bool Exists = Bits.test(Idx); 89 Bits.set(Idx); 90 if (!Exists) { 91 LRU.push_back(Idx); 92 if (LRU.size() > RegisterSetLimit) { 93 unsigned T = LRU.front(); 94 Bits.reset(T); 95 LRU.pop_front(); 96 } 97 } 98 return *this; 99 } 100 RegisterSet &remove(unsigned R) { 101 unsigned Idx = v2x(R); 102 if (Idx < Bits.size()) { 103 bool Exists = Bits.test(Idx); 104 Bits.reset(Idx); 105 if (Exists) { 106 auto F = llvm::find(LRU, Idx); 107 assert(F != LRU.end()); 108 LRU.erase(F); 109 } 110 } 111 return *this; 112 } 113 114 RegisterSet &insert(const RegisterSet &Rs) { 115 for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) 116 insert(R); 117 return *this; 118 } 119 RegisterSet &remove(const RegisterSet &Rs) { 120 for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) 121 remove(R); 122 return *this; 123 } 124 125 bool operator[](unsigned R) const { 126 unsigned Idx = v2x(R); 127 return Idx < Bits.size() ? Bits[Idx] : false; 128 } 129 bool has(unsigned R) const { 130 unsigned Idx = v2x(R); 131 if (Idx >= Bits.size()) 132 return false; 133 return Bits.test(Idx); 134 } 135 136 bool empty() const { 137 return !Bits.any(); 138 } 139 bool includes(const RegisterSet &Rs) const { 140 // A.test(B) <=> A-B != {} 141 return !Rs.Bits.test(Bits); 142 } 143 bool intersects(const RegisterSet &Rs) const { 144 return Bits.anyCommon(Rs.Bits); 145 } 146 147 private: 148 BitVector Bits; 149 std::deque<unsigned> LRU; 150 151 void ensure(unsigned Idx) { 152 if (Bits.size() <= Idx) 153 Bits.resize(std::max(Idx+1, 32U)); 154 } 155 156 static inline unsigned v2x(unsigned v) { 157 return Register(v).virtRegIndex(); 158 } 159 160 static inline unsigned x2v(unsigned x) { 161 return Register::index2VirtReg(x); 162 } 163 }; 164 165 struct PrintRegSet { 166 PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) 167 : RS(S), TRI(RI) {} 168 169 friend raw_ostream &operator<< (raw_ostream &OS, 170 const PrintRegSet &P); 171 172 private: 173 const RegisterSet &RS; 174 const TargetRegisterInfo *TRI; 175 }; 176 177 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) 178 LLVM_ATTRIBUTE_UNUSED; 179 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { 180 OS << '{'; 181 for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) 182 OS << ' ' << printReg(R, P.TRI); 183 OS << " }"; 184 return OS; 185 } 186 187 class Transformation; 188 189 class HexagonBitSimplify : public MachineFunctionPass { 190 public: 191 static char ID; 192 193 HexagonBitSimplify() : MachineFunctionPass(ID) {} 194 195 StringRef getPassName() const override { 196 return "Hexagon bit simplification"; 197 } 198 199 void getAnalysisUsage(AnalysisUsage &AU) const override { 200 AU.addRequired<MachineDominatorTreeWrapperPass>(); 201 AU.addPreserved<MachineDominatorTreeWrapperPass>(); 202 MachineFunctionPass::getAnalysisUsage(AU); 203 } 204 205 bool runOnMachineFunction(MachineFunction &MF) override; 206 207 static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs); 208 static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses); 209 static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1, 210 const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W); 211 static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B, 212 uint16_t W); 213 static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B, 214 uint16_t W, uint64_t &U); 215 static bool replaceReg(Register OldR, Register NewR, 216 MachineRegisterInfo &MRI); 217 static bool getSubregMask(const BitTracker::RegisterRef &RR, 218 unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI); 219 static bool replaceRegWithSub(Register OldR, Register NewR, unsigned NewSR, 220 MachineRegisterInfo &MRI); 221 static bool replaceSubWithSub(Register OldR, unsigned OldSR, Register NewR, 222 unsigned NewSR, MachineRegisterInfo &MRI); 223 static bool parseRegSequence(const MachineInstr &I, 224 BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH, 225 const MachineRegisterInfo &MRI); 226 227 static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits, 228 uint16_t Begin); 229 static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits, 230 uint16_t Begin, const HexagonInstrInfo &HII); 231 232 static const TargetRegisterClass *getFinalVRegClass( 233 const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI); 234 static bool isTransparentCopy(const BitTracker::RegisterRef &RD, 235 const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI); 236 237 private: 238 MachineDominatorTree *MDT = nullptr; 239 240 bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs); 241 static bool hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI, 242 unsigned NewSub = Hexagon::NoSubRegister); 243 }; 244 245 using HBS = HexagonBitSimplify; 246 247 // The purpose of this class is to provide a common facility to traverse 248 // the function top-down or bottom-up via the dominator tree, and keep 249 // track of the available registers. 250 class Transformation { 251 public: 252 bool TopDown; 253 254 Transformation(bool TD) : TopDown(TD) {} 255 virtual ~Transformation() = default; 256 257 virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0; 258 }; 259 260 } // end anonymous namespace 261 262 char HexagonBitSimplify::ID = 0; 263 264 INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexagon-bit-simplify", 265 "Hexagon bit simplification", false, false) 266 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 267 INITIALIZE_PASS_END(HexagonBitSimplify, "hexagon-bit-simplify", 268 "Hexagon bit simplification", false, false) 269 270 bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T, 271 RegisterSet &AVs) { 272 bool Changed = false; 273 274 if (T.TopDown) 275 Changed = T.processBlock(B, AVs); 276 277 RegisterSet Defs; 278 for (auto &I : B) 279 getInstrDefs(I, Defs); 280 RegisterSet NewAVs = AVs; 281 NewAVs.insert(Defs); 282 283 for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(&B))) 284 Changed |= visitBlock(*(DTN->getBlock()), T, NewAVs); 285 286 if (!T.TopDown) 287 Changed |= T.processBlock(B, AVs); 288 289 return Changed; 290 } 291 292 // 293 // Utility functions: 294 // 295 void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI, 296 RegisterSet &Defs) { 297 for (auto &Op : MI.operands()) { 298 if (!Op.isReg() || !Op.isDef()) 299 continue; 300 Register R = Op.getReg(); 301 if (!R.isVirtual()) 302 continue; 303 Defs.insert(R); 304 } 305 } 306 307 void HexagonBitSimplify::getInstrUses(const MachineInstr &MI, 308 RegisterSet &Uses) { 309 for (auto &Op : MI.operands()) { 310 if (!Op.isReg() || !Op.isUse()) 311 continue; 312 Register R = Op.getReg(); 313 if (!R.isVirtual()) 314 continue; 315 Uses.insert(R); 316 } 317 } 318 319 // Check if all the bits in range [B, E) in both cells are equal. 320 bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1, 321 uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2, 322 uint16_t W) { 323 for (uint16_t i = 0; i < W; ++i) { 324 // If RC1[i] is "bottom", it cannot be proven equal to RC2[i]. 325 if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0) 326 return false; 327 // Same for RC2[i]. 328 if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0) 329 return false; 330 if (RC1[B1+i] != RC2[B2+i]) 331 return false; 332 } 333 return true; 334 } 335 336 bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC, 337 uint16_t B, uint16_t W) { 338 assert(B < RC.width() && B+W <= RC.width()); 339 for (uint16_t i = B; i < B+W; ++i) 340 if (!RC[i].is(0)) 341 return false; 342 return true; 343 } 344 345 bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC, 346 uint16_t B, uint16_t W, uint64_t &U) { 347 assert(B < RC.width() && B+W <= RC.width()); 348 int64_t T = 0; 349 for (uint16_t i = B+W; i > B; --i) { 350 const BitTracker::BitValue &BV = RC[i-1]; 351 T <<= 1; 352 if (BV.is(1)) 353 T |= 1; 354 else if (!BV.is(0)) 355 return false; 356 } 357 U = T; 358 return true; 359 } 360 361 bool HexagonBitSimplify::replaceReg(Register OldR, Register NewR, 362 MachineRegisterInfo &MRI) { 363 if (!OldR.isVirtual() || !NewR.isVirtual()) 364 return false; 365 auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); 366 decltype(End) NextI; 367 for (auto I = Begin; I != End; I = NextI) { 368 NextI = std::next(I); 369 I->setReg(NewR); 370 } 371 return Begin != End; 372 } 373 374 bool HexagonBitSimplify::replaceRegWithSub(Register OldR, Register NewR, 375 unsigned NewSR, 376 MachineRegisterInfo &MRI) { 377 if (!OldR.isVirtual() || !NewR.isVirtual()) 378 return false; 379 if (hasTiedUse(OldR, MRI, NewSR)) 380 return false; 381 auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); 382 decltype(End) NextI; 383 for (auto I = Begin; I != End; I = NextI) { 384 NextI = std::next(I); 385 I->setReg(NewR); 386 I->setSubReg(NewSR); 387 } 388 return Begin != End; 389 } 390 391 bool HexagonBitSimplify::replaceSubWithSub(Register OldR, unsigned OldSR, 392 Register NewR, unsigned NewSR, 393 MachineRegisterInfo &MRI) { 394 if (!OldR.isVirtual() || !NewR.isVirtual()) 395 return false; 396 if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR)) 397 return false; 398 auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); 399 decltype(End) NextI; 400 for (auto I = Begin; I != End; I = NextI) { 401 NextI = std::next(I); 402 if (I->getSubReg() != OldSR) 403 continue; 404 I->setReg(NewR); 405 I->setSubReg(NewSR); 406 } 407 return Begin != End; 408 } 409 410 // For a register ref (pair Reg:Sub), set Begin to the position of the LSB 411 // of Sub in Reg, and set Width to the size of Sub in bits. Return true, 412 // if this succeeded, otherwise return false. 413 bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, 414 unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) { 415 const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg); 416 if (RR.Sub == 0) { 417 Begin = 0; 418 Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC); 419 return true; 420 } 421 422 Begin = 0; 423 424 switch (RC->getID()) { 425 case Hexagon::DoubleRegsRegClassID: 426 case Hexagon::HvxWRRegClassID: 427 Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 2; 428 if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi) 429 Begin = Width; 430 break; 431 default: 432 return false; 433 } 434 return true; 435 } 436 437 438 // For a REG_SEQUENCE, set SL to the low subregister and SH to the high 439 // subregister. 440 bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I, 441 BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH, 442 const MachineRegisterInfo &MRI) { 443 assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE); 444 unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm(); 445 auto &DstRC = *MRI.getRegClass(I.getOperand(0).getReg()); 446 auto &HRI = static_cast<const HexagonRegisterInfo&>( 447 *MRI.getTargetRegisterInfo()); 448 unsigned SubLo = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_lo); 449 unsigned SubHi = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_hi); 450 assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo)); 451 if (Sub1 == SubLo && Sub2 == SubHi) { 452 SL = I.getOperand(1); 453 SH = I.getOperand(3); 454 return true; 455 } 456 if (Sub1 == SubHi && Sub2 == SubLo) { 457 SH = I.getOperand(1); 458 SL = I.getOperand(3); 459 return true; 460 } 461 return false; 462 } 463 464 // All stores (except 64-bit stores) take a 32-bit register as the source 465 // of the value to be stored. If the instruction stores into a location 466 // that is shorter than 32 bits, some bits of the source register are not 467 // used. For each store instruction, calculate the set of used bits in 468 // the source register, and set appropriate bits in Bits. Return true if 469 // the bits are calculated, false otherwise. 470 bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits, 471 uint16_t Begin) { 472 using namespace Hexagon; 473 474 switch (Opc) { 475 // Store byte 476 case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32 477 case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new 478 case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32 479 case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32 480 case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32 481 case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32 482 case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new 483 case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new 484 case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new 485 case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new 486 case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32 487 case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new 488 case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32 489 case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32 490 case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32 491 case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32 492 case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new 493 case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new 494 case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new 495 case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new 496 case S4_storerb_ap: // memb(Re32=#U6)=Rt32 497 case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new 498 case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32 499 case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new 500 case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32 501 case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new 502 case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32 503 case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new 504 case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32 505 case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new 506 case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32 507 case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new 508 case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32 509 case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new 510 case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32 511 case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32 512 case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 513 case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 514 case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new 515 case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new 516 case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new 517 case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new 518 case S2_storerbgp: // memb(gp+#u16:0)=Rt32 519 case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new 520 case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32 521 case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32 522 case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32 523 case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32 524 case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new 525 case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new 526 case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new 527 case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new 528 Bits.set(Begin, Begin+8); 529 return true; 530 531 // Store low half 532 case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32 533 case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new 534 case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32 535 case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32 536 case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32 537 case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32 538 case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new 539 case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new 540 case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new 541 case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new 542 case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32 543 case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new 544 case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32 545 case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32 546 case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32 547 case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32 548 case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new 549 case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new 550 case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new 551 case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new 552 case S4_storerh_ap: // memh(Re32=#U6)=Rt32 553 case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new 554 case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32 555 case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new 556 case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32 557 case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new 558 case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32 559 case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new 560 case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32 561 case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new 562 case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32 563 case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new 564 case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32 565 case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32 566 case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32 567 case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 568 case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 569 case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new 570 case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new 571 case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new 572 case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new 573 case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new 574 case S2_storerhgp: // memh(gp+#u16:1)=Rt32 575 case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new 576 case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32 577 case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32 578 case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32 579 case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32 580 case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new 581 case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new 582 case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new 583 case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new 584 Bits.set(Begin, Begin+16); 585 return true; 586 587 // Store high half 588 case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32 589 case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32 590 case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32 591 case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32 592 case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32 593 case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32 594 case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32 595 case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32 596 case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32 597 case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32 598 case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32 599 case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32 600 case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32 601 case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32 602 case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32 603 case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32 604 case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32 605 case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 606 case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 607 case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 608 case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 609 case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32 610 case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32 611 case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32 612 case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32 613 case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32 614 Bits.set(Begin+16, Begin+32); 615 return true; 616 } 617 618 return false; 619 } 620 621 // For an instruction with opcode Opc, calculate the set of bits that it 622 // uses in a register in operand OpN. This only calculates the set of used 623 // bits for cases where it does not depend on any operands (as is the case 624 // in shifts, for example). For concrete instructions from a program, the 625 // operand may be a subregister of a larger register, while Bits would 626 // correspond to the larger register in its entirety. Because of that, 627 // the parameter Begin can be used to indicate which bit of Bits should be 628 // considered the LSB of the operand. 629 bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, 630 BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) { 631 using namespace Hexagon; 632 633 const MCInstrDesc &D = HII.get(Opc); 634 if (D.mayStore()) { 635 if (OpN == D.getNumOperands()-1) 636 return getUsedBitsInStore(Opc, Bits, Begin); 637 return false; 638 } 639 640 switch (Opc) { 641 // One register source. Used bits: R1[0-7]. 642 case A2_sxtb: 643 case A2_zxtb: 644 case A4_cmpbeqi: 645 case A4_cmpbgti: 646 case A4_cmpbgtui: 647 if (OpN == 1) { 648 Bits.set(Begin, Begin+8); 649 return true; 650 } 651 break; 652 653 // One register source. Used bits: R1[0-15]. 654 case A2_aslh: 655 case A2_sxth: 656 case A2_zxth: 657 case A4_cmpheqi: 658 case A4_cmphgti: 659 case A4_cmphgtui: 660 if (OpN == 1) { 661 Bits.set(Begin, Begin+16); 662 return true; 663 } 664 break; 665 666 // One register source. Used bits: R1[16-31]. 667 case A2_asrh: 668 if (OpN == 1) { 669 Bits.set(Begin+16, Begin+32); 670 return true; 671 } 672 break; 673 674 // Two register sources. Used bits: R1[0-7], R2[0-7]. 675 case A4_cmpbeq: 676 case A4_cmpbgt: 677 case A4_cmpbgtu: 678 if (OpN == 1) { 679 Bits.set(Begin, Begin+8); 680 return true; 681 } 682 break; 683 684 // Two register sources. Used bits: R1[0-15], R2[0-15]. 685 case A4_cmpheq: 686 case A4_cmphgt: 687 case A4_cmphgtu: 688 case A2_addh_h16_ll: 689 case A2_addh_h16_sat_ll: 690 case A2_addh_l16_ll: 691 case A2_addh_l16_sat_ll: 692 case A2_combine_ll: 693 case A2_subh_h16_ll: 694 case A2_subh_h16_sat_ll: 695 case A2_subh_l16_ll: 696 case A2_subh_l16_sat_ll: 697 case M2_mpy_acc_ll_s0: 698 case M2_mpy_acc_ll_s1: 699 case M2_mpy_acc_sat_ll_s0: 700 case M2_mpy_acc_sat_ll_s1: 701 case M2_mpy_ll_s0: 702 case M2_mpy_ll_s1: 703 case M2_mpy_nac_ll_s0: 704 case M2_mpy_nac_ll_s1: 705 case M2_mpy_nac_sat_ll_s0: 706 case M2_mpy_nac_sat_ll_s1: 707 case M2_mpy_rnd_ll_s0: 708 case M2_mpy_rnd_ll_s1: 709 case M2_mpy_sat_ll_s0: 710 case M2_mpy_sat_ll_s1: 711 case M2_mpy_sat_rnd_ll_s0: 712 case M2_mpy_sat_rnd_ll_s1: 713 case M2_mpyd_acc_ll_s0: 714 case M2_mpyd_acc_ll_s1: 715 case M2_mpyd_ll_s0: 716 case M2_mpyd_ll_s1: 717 case M2_mpyd_nac_ll_s0: 718 case M2_mpyd_nac_ll_s1: 719 case M2_mpyd_rnd_ll_s0: 720 case M2_mpyd_rnd_ll_s1: 721 case M2_mpyu_acc_ll_s0: 722 case M2_mpyu_acc_ll_s1: 723 case M2_mpyu_ll_s0: 724 case M2_mpyu_ll_s1: 725 case M2_mpyu_nac_ll_s0: 726 case M2_mpyu_nac_ll_s1: 727 case M2_mpyud_acc_ll_s0: 728 case M2_mpyud_acc_ll_s1: 729 case M2_mpyud_ll_s0: 730 case M2_mpyud_ll_s1: 731 case M2_mpyud_nac_ll_s0: 732 case M2_mpyud_nac_ll_s1: 733 if (OpN == 1 || OpN == 2) { 734 Bits.set(Begin, Begin+16); 735 return true; 736 } 737 break; 738 739 // Two register sources. Used bits: R1[0-15], R2[16-31]. 740 case A2_addh_h16_lh: 741 case A2_addh_h16_sat_lh: 742 case A2_combine_lh: 743 case A2_subh_h16_lh: 744 case A2_subh_h16_sat_lh: 745 case M2_mpy_acc_lh_s0: 746 case M2_mpy_acc_lh_s1: 747 case M2_mpy_acc_sat_lh_s0: 748 case M2_mpy_acc_sat_lh_s1: 749 case M2_mpy_lh_s0: 750 case M2_mpy_lh_s1: 751 case M2_mpy_nac_lh_s0: 752 case M2_mpy_nac_lh_s1: 753 case M2_mpy_nac_sat_lh_s0: 754 case M2_mpy_nac_sat_lh_s1: 755 case M2_mpy_rnd_lh_s0: 756 case M2_mpy_rnd_lh_s1: 757 case M2_mpy_sat_lh_s0: 758 case M2_mpy_sat_lh_s1: 759 case M2_mpy_sat_rnd_lh_s0: 760 case M2_mpy_sat_rnd_lh_s1: 761 case M2_mpyd_acc_lh_s0: 762 case M2_mpyd_acc_lh_s1: 763 case M2_mpyd_lh_s0: 764 case M2_mpyd_lh_s1: 765 case M2_mpyd_nac_lh_s0: 766 case M2_mpyd_nac_lh_s1: 767 case M2_mpyd_rnd_lh_s0: 768 case M2_mpyd_rnd_lh_s1: 769 case M2_mpyu_acc_lh_s0: 770 case M2_mpyu_acc_lh_s1: 771 case M2_mpyu_lh_s0: 772 case M2_mpyu_lh_s1: 773 case M2_mpyu_nac_lh_s0: 774 case M2_mpyu_nac_lh_s1: 775 case M2_mpyud_acc_lh_s0: 776 case M2_mpyud_acc_lh_s1: 777 case M2_mpyud_lh_s0: 778 case M2_mpyud_lh_s1: 779 case M2_mpyud_nac_lh_s0: 780 case M2_mpyud_nac_lh_s1: 781 // These four are actually LH. 782 case A2_addh_l16_hl: 783 case A2_addh_l16_sat_hl: 784 case A2_subh_l16_hl: 785 case A2_subh_l16_sat_hl: 786 if (OpN == 1) { 787 Bits.set(Begin, Begin+16); 788 return true; 789 } 790 if (OpN == 2) { 791 Bits.set(Begin+16, Begin+32); 792 return true; 793 } 794 break; 795 796 // Two register sources, used bits: R1[16-31], R2[0-15]. 797 case A2_addh_h16_hl: 798 case A2_addh_h16_sat_hl: 799 case A2_combine_hl: 800 case A2_subh_h16_hl: 801 case A2_subh_h16_sat_hl: 802 case M2_mpy_acc_hl_s0: 803 case M2_mpy_acc_hl_s1: 804 case M2_mpy_acc_sat_hl_s0: 805 case M2_mpy_acc_sat_hl_s1: 806 case M2_mpy_hl_s0: 807 case M2_mpy_hl_s1: 808 case M2_mpy_nac_hl_s0: 809 case M2_mpy_nac_hl_s1: 810 case M2_mpy_nac_sat_hl_s0: 811 case M2_mpy_nac_sat_hl_s1: 812 case M2_mpy_rnd_hl_s0: 813 case M2_mpy_rnd_hl_s1: 814 case M2_mpy_sat_hl_s0: 815 case M2_mpy_sat_hl_s1: 816 case M2_mpy_sat_rnd_hl_s0: 817 case M2_mpy_sat_rnd_hl_s1: 818 case M2_mpyd_acc_hl_s0: 819 case M2_mpyd_acc_hl_s1: 820 case M2_mpyd_hl_s0: 821 case M2_mpyd_hl_s1: 822 case M2_mpyd_nac_hl_s0: 823 case M2_mpyd_nac_hl_s1: 824 case M2_mpyd_rnd_hl_s0: 825 case M2_mpyd_rnd_hl_s1: 826 case M2_mpyu_acc_hl_s0: 827 case M2_mpyu_acc_hl_s1: 828 case M2_mpyu_hl_s0: 829 case M2_mpyu_hl_s1: 830 case M2_mpyu_nac_hl_s0: 831 case M2_mpyu_nac_hl_s1: 832 case M2_mpyud_acc_hl_s0: 833 case M2_mpyud_acc_hl_s1: 834 case M2_mpyud_hl_s0: 835 case M2_mpyud_hl_s1: 836 case M2_mpyud_nac_hl_s0: 837 case M2_mpyud_nac_hl_s1: 838 if (OpN == 1) { 839 Bits.set(Begin+16, Begin+32); 840 return true; 841 } 842 if (OpN == 2) { 843 Bits.set(Begin, Begin+16); 844 return true; 845 } 846 break; 847 848 // Two register sources, used bits: R1[16-31], R2[16-31]. 849 case A2_addh_h16_hh: 850 case A2_addh_h16_sat_hh: 851 case A2_combine_hh: 852 case A2_subh_h16_hh: 853 case A2_subh_h16_sat_hh: 854 case M2_mpy_acc_hh_s0: 855 case M2_mpy_acc_hh_s1: 856 case M2_mpy_acc_sat_hh_s0: 857 case M2_mpy_acc_sat_hh_s1: 858 case M2_mpy_hh_s0: 859 case M2_mpy_hh_s1: 860 case M2_mpy_nac_hh_s0: 861 case M2_mpy_nac_hh_s1: 862 case M2_mpy_nac_sat_hh_s0: 863 case M2_mpy_nac_sat_hh_s1: 864 case M2_mpy_rnd_hh_s0: 865 case M2_mpy_rnd_hh_s1: 866 case M2_mpy_sat_hh_s0: 867 case M2_mpy_sat_hh_s1: 868 case M2_mpy_sat_rnd_hh_s0: 869 case M2_mpy_sat_rnd_hh_s1: 870 case M2_mpyd_acc_hh_s0: 871 case M2_mpyd_acc_hh_s1: 872 case M2_mpyd_hh_s0: 873 case M2_mpyd_hh_s1: 874 case M2_mpyd_nac_hh_s0: 875 case M2_mpyd_nac_hh_s1: 876 case M2_mpyd_rnd_hh_s0: 877 case M2_mpyd_rnd_hh_s1: 878 case M2_mpyu_acc_hh_s0: 879 case M2_mpyu_acc_hh_s1: 880 case M2_mpyu_hh_s0: 881 case M2_mpyu_hh_s1: 882 case M2_mpyu_nac_hh_s0: 883 case M2_mpyu_nac_hh_s1: 884 case M2_mpyud_acc_hh_s0: 885 case M2_mpyud_acc_hh_s1: 886 case M2_mpyud_hh_s0: 887 case M2_mpyud_hh_s1: 888 case M2_mpyud_nac_hh_s0: 889 case M2_mpyud_nac_hh_s1: 890 if (OpN == 1 || OpN == 2) { 891 Bits.set(Begin+16, Begin+32); 892 return true; 893 } 894 break; 895 } 896 897 return false; 898 } 899 900 // Calculate the register class that matches Reg:Sub. For example, if 901 // %1 is a double register, then %1:isub_hi would match the "int" 902 // register class. 903 const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( 904 const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) { 905 if (!RR.Reg.isVirtual()) 906 return nullptr; 907 auto *RC = MRI.getRegClass(RR.Reg); 908 if (RR.Sub == 0) 909 return RC; 910 auto &HRI = static_cast<const HexagonRegisterInfo&>( 911 *MRI.getTargetRegisterInfo()); 912 913 auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void { 914 (void)HRI; 915 assert(Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_lo) || 916 Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_hi)); 917 }; 918 919 switch (RC->getID()) { 920 case Hexagon::DoubleRegsRegClassID: 921 VerifySR(RC, RR.Sub); 922 return &Hexagon::IntRegsRegClass; 923 case Hexagon::HvxWRRegClassID: 924 VerifySR(RC, RR.Sub); 925 return &Hexagon::HvxVRRegClass; 926 } 927 return nullptr; 928 } 929 930 // Check if RD could be replaced with RS at any possible use of RD. 931 // For example a predicate register cannot be replaced with a integer 932 // register, but a 64-bit register with a subregister can be replaced 933 // with a 32-bit register. 934 bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD, 935 const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) { 936 if (!RD.Reg.isVirtual() || !RS.Reg.isVirtual()) 937 return false; 938 // Return false if one (or both) classes are nullptr. 939 auto *DRC = getFinalVRegClass(RD, MRI); 940 if (!DRC) 941 return false; 942 943 return DRC == getFinalVRegClass(RS, MRI); 944 } 945 946 bool HexagonBitSimplify::hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI, 947 unsigned NewSub) { 948 if (!PreserveTiedOps) 949 return false; 950 return llvm::any_of(MRI.use_operands(Reg), 951 [NewSub] (const MachineOperand &Op) -> bool { 952 return Op.getSubReg() != NewSub && Op.isTied(); 953 }); 954 } 955 956 namespace { 957 958 class DeadCodeElimination { 959 public: 960 DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt) 961 : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()), 962 MDT(mdt), MRI(mf.getRegInfo()) {} 963 964 bool run() { 965 return runOnNode(MDT.getRootNode()); 966 } 967 968 private: 969 bool isDead(unsigned R) const; 970 bool runOnNode(MachineDomTreeNode *N); 971 972 MachineFunction &MF; 973 const HexagonInstrInfo &HII; 974 MachineDominatorTree &MDT; 975 MachineRegisterInfo &MRI; 976 }; 977 978 } // end anonymous namespace 979 980 bool DeadCodeElimination::isDead(unsigned R) const { 981 for (const MachineOperand &MO : MRI.use_operands(R)) { 982 const MachineInstr *UseI = MO.getParent(); 983 if (UseI->isDebugInstr()) 984 continue; 985 if (UseI->isPHI()) { 986 assert(!UseI->getOperand(0).getSubReg()); 987 Register DR = UseI->getOperand(0).getReg(); 988 if (DR == R) 989 continue; 990 } 991 return false; 992 } 993 return true; 994 } 995 996 bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { 997 bool Changed = false; 998 999 for (auto *DTN : children<MachineDomTreeNode*>(N)) 1000 Changed |= runOnNode(DTN); 1001 1002 MachineBasicBlock *B = N->getBlock(); 1003 std::vector<MachineInstr*> Instrs; 1004 for (MachineInstr &MI : llvm::reverse(*B)) 1005 Instrs.push_back(&MI); 1006 1007 for (auto *MI : Instrs) { 1008 unsigned Opc = MI->getOpcode(); 1009 // Do not touch lifetime markers. This is why the target-independent DCE 1010 // cannot be used. 1011 if (Opc == TargetOpcode::LIFETIME_START || 1012 Opc == TargetOpcode::LIFETIME_END) 1013 continue; 1014 bool Store = false; 1015 if (MI->isInlineAsm()) 1016 continue; 1017 // Delete PHIs if possible. 1018 if (!MI->isPHI() && !MI->isSafeToMove(Store)) 1019 continue; 1020 1021 bool AllDead = true; 1022 SmallVector<unsigned,2> Regs; 1023 for (auto &Op : MI->operands()) { 1024 if (!Op.isReg() || !Op.isDef()) 1025 continue; 1026 Register R = Op.getReg(); 1027 if (!R.isVirtual() || !isDead(R)) { 1028 AllDead = false; 1029 break; 1030 } 1031 Regs.push_back(R); 1032 } 1033 if (!AllDead) 1034 continue; 1035 1036 B->erase(MI); 1037 for (unsigned Reg : Regs) 1038 MRI.markUsesInDebugValueAsUndef(Reg); 1039 Changed = true; 1040 } 1041 1042 return Changed; 1043 } 1044 1045 namespace { 1046 1047 // Eliminate redundant instructions 1048 // 1049 // This transformation will identify instructions where the output register 1050 // is the same as one of its input registers. This only works on instructions 1051 // that define a single register (unlike post-increment loads, for example). 1052 // The equality check is actually more detailed: the code calculates which 1053 // bits of the output are used, and only compares these bits with the input 1054 // registers. 1055 // If the output matches an input, the instruction is replaced with COPY. 1056 // The copies will be removed by another transformation. 1057 class RedundantInstrElimination : public Transformation { 1058 public: 1059 RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, 1060 const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) 1061 : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} 1062 1063 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1064 1065 private: 1066 bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN, 1067 unsigned &LostB, unsigned &LostE); 1068 bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN, 1069 unsigned &LostB, unsigned &LostE); 1070 bool computeUsedBits(unsigned Reg, BitVector &Bits); 1071 bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits, 1072 uint16_t Begin); 1073 bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS); 1074 1075 const HexagonInstrInfo &HII; 1076 const HexagonRegisterInfo &HRI; 1077 MachineRegisterInfo &MRI; 1078 BitTracker &BT; 1079 }; 1080 1081 } // end anonymous namespace 1082 1083 // Check if the instruction is a lossy shift left, where the input being 1084 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range 1085 // of bit indices that are lost. 1086 bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI, 1087 unsigned OpN, unsigned &LostB, unsigned &LostE) { 1088 using namespace Hexagon; 1089 1090 unsigned Opc = MI.getOpcode(); 1091 unsigned ImN, RegN, Width; 1092 switch (Opc) { 1093 case S2_asl_i_p: 1094 ImN = 2; 1095 RegN = 1; 1096 Width = 64; 1097 break; 1098 case S2_asl_i_p_acc: 1099 case S2_asl_i_p_and: 1100 case S2_asl_i_p_nac: 1101 case S2_asl_i_p_or: 1102 case S2_asl_i_p_xacc: 1103 ImN = 3; 1104 RegN = 2; 1105 Width = 64; 1106 break; 1107 case S2_asl_i_r: 1108 ImN = 2; 1109 RegN = 1; 1110 Width = 32; 1111 break; 1112 case S2_addasl_rrri: 1113 case S4_andi_asl_ri: 1114 case S4_ori_asl_ri: 1115 case S4_addi_asl_ri: 1116 case S4_subi_asl_ri: 1117 case S2_asl_i_r_acc: 1118 case S2_asl_i_r_and: 1119 case S2_asl_i_r_nac: 1120 case S2_asl_i_r_or: 1121 case S2_asl_i_r_sat: 1122 case S2_asl_i_r_xacc: 1123 ImN = 3; 1124 RegN = 2; 1125 Width = 32; 1126 break; 1127 default: 1128 return false; 1129 } 1130 1131 if (RegN != OpN) 1132 return false; 1133 1134 assert(MI.getOperand(ImN).isImm()); 1135 unsigned S = MI.getOperand(ImN).getImm(); 1136 if (S == 0) 1137 return false; 1138 LostB = Width-S; 1139 LostE = Width; 1140 return true; 1141 } 1142 1143 // Check if the instruction is a lossy shift right, where the input being 1144 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range 1145 // of bit indices that are lost. 1146 bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI, 1147 unsigned OpN, unsigned &LostB, unsigned &LostE) { 1148 using namespace Hexagon; 1149 1150 unsigned Opc = MI.getOpcode(); 1151 unsigned ImN, RegN; 1152 switch (Opc) { 1153 case S2_asr_i_p: 1154 case S2_lsr_i_p: 1155 ImN = 2; 1156 RegN = 1; 1157 break; 1158 case S2_asr_i_p_acc: 1159 case S2_asr_i_p_and: 1160 case S2_asr_i_p_nac: 1161 case S2_asr_i_p_or: 1162 case S2_lsr_i_p_acc: 1163 case S2_lsr_i_p_and: 1164 case S2_lsr_i_p_nac: 1165 case S2_lsr_i_p_or: 1166 case S2_lsr_i_p_xacc: 1167 ImN = 3; 1168 RegN = 2; 1169 break; 1170 case S2_asr_i_r: 1171 case S2_lsr_i_r: 1172 ImN = 2; 1173 RegN = 1; 1174 break; 1175 case S4_andi_lsr_ri: 1176 case S4_ori_lsr_ri: 1177 case S4_addi_lsr_ri: 1178 case S4_subi_lsr_ri: 1179 case S2_asr_i_r_acc: 1180 case S2_asr_i_r_and: 1181 case S2_asr_i_r_nac: 1182 case S2_asr_i_r_or: 1183 case S2_lsr_i_r_acc: 1184 case S2_lsr_i_r_and: 1185 case S2_lsr_i_r_nac: 1186 case S2_lsr_i_r_or: 1187 case S2_lsr_i_r_xacc: 1188 ImN = 3; 1189 RegN = 2; 1190 break; 1191 1192 default: 1193 return false; 1194 } 1195 1196 if (RegN != OpN) 1197 return false; 1198 1199 assert(MI.getOperand(ImN).isImm()); 1200 unsigned S = MI.getOperand(ImN).getImm(); 1201 LostB = 0; 1202 LostE = S; 1203 return true; 1204 } 1205 1206 // Calculate the bit vector that corresponds to the used bits of register Reg. 1207 // The vector Bits has the same size, as the size of Reg in bits. If the cal- 1208 // culation fails (i.e. the used bits are unknown), it returns false. Other- 1209 // wise, it returns true and sets the corresponding bits in Bits. 1210 bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { 1211 BitVector Used(Bits.size()); 1212 RegisterSet Visited; 1213 std::vector<unsigned> Pending; 1214 Pending.push_back(Reg); 1215 1216 for (unsigned i = 0; i < Pending.size(); ++i) { 1217 unsigned R = Pending[i]; 1218 if (Visited.has(R)) 1219 continue; 1220 Visited.insert(R); 1221 for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) { 1222 BitTracker::RegisterRef UR = *I; 1223 unsigned B, W; 1224 if (!HBS::getSubregMask(UR, B, W, MRI)) 1225 return false; 1226 MachineInstr &UseI = *I->getParent(); 1227 if (UseI.isPHI() || UseI.isCopy()) { 1228 Register DefR = UseI.getOperand(0).getReg(); 1229 if (!DefR.isVirtual()) 1230 return false; 1231 Pending.push_back(DefR); 1232 } else { 1233 if (!computeUsedBits(UseI, I.getOperandNo(), Used, B)) 1234 return false; 1235 } 1236 } 1237 } 1238 Bits |= Used; 1239 return true; 1240 } 1241 1242 // Calculate the bits used by instruction MI in a register in operand OpN. 1243 // Return true/false if the calculation succeeds/fails. If is succeeds, set 1244 // used bits in Bits. This function does not reset any bits in Bits, so 1245 // subsequent calls over different instructions will result in the union 1246 // of the used bits in all these instructions. 1247 // The register in question may be used with a sub-register, whereas Bits 1248 // holds the bits for the entire register. To keep track of that, the 1249 // argument Begin indicates where in Bits is the lowest-significant bit 1250 // of the register used in operand OpN. For example, in instruction: 1251 // %1 = S2_lsr_i_r %2:isub_hi, 10 1252 // the operand 1 is a 32-bit register, which happens to be a subregister 1253 // of the 64-bit register %2, and that subregister starts at position 32. 1254 // In this case Begin=32, since Bits[32] would be the lowest-significant bit 1255 // of %2:isub_hi. 1256 bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, 1257 unsigned OpN, BitVector &Bits, uint16_t Begin) { 1258 unsigned Opc = MI.getOpcode(); 1259 BitVector T(Bits.size()); 1260 bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII); 1261 // Even if we don't have bits yet, we could still provide some information 1262 // if the instruction is a lossy shift: the lost bits will be marked as 1263 // not used. 1264 unsigned LB, LE; 1265 if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) { 1266 assert(MI.getOperand(OpN).isReg()); 1267 BitTracker::RegisterRef RR = MI.getOperand(OpN); 1268 const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI); 1269 uint16_t Width = HRI.getRegSizeInBits(*RC); 1270 1271 if (!GotBits) 1272 T.set(Begin, Begin+Width); 1273 assert(LB <= LE && LB < Width && LE <= Width); 1274 T.reset(Begin+LB, Begin+LE); 1275 GotBits = true; 1276 } 1277 if (GotBits) 1278 Bits |= T; 1279 return GotBits; 1280 } 1281 1282 // Calculates the used bits in RD ("defined register"), and checks if these 1283 // bits in RS ("used register") and RD are identical. 1284 bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD, 1285 BitTracker::RegisterRef RS) { 1286 const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); 1287 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); 1288 1289 unsigned DB, DW; 1290 if (!HBS::getSubregMask(RD, DB, DW, MRI)) 1291 return false; 1292 unsigned SB, SW; 1293 if (!HBS::getSubregMask(RS, SB, SW, MRI)) 1294 return false; 1295 if (SW != DW) 1296 return false; 1297 1298 BitVector Used(DC.width()); 1299 if (!computeUsedBits(RD.Reg, Used)) 1300 return false; 1301 1302 for (unsigned i = 0; i != DW; ++i) 1303 if (Used[i+DB] && DC[DB+i] != SC[SB+i]) 1304 return false; 1305 return true; 1306 } 1307 1308 bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, 1309 const RegisterSet&) { 1310 if (!BT.reached(&B)) 1311 return false; 1312 bool Changed = false; 1313 1314 for (auto I = B.begin(), E = B.end(); I != E; ++I) { 1315 MachineInstr *MI = &*I; 1316 1317 if (MI->getOpcode() == TargetOpcode::COPY) 1318 continue; 1319 if (MI->isPHI() || MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) 1320 continue; 1321 unsigned NumD = MI->getDesc().getNumDefs(); 1322 if (NumD != 1) 1323 continue; 1324 1325 BitTracker::RegisterRef RD = MI->getOperand(0); 1326 if (!BT.has(RD.Reg)) 1327 continue; 1328 const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); 1329 auto At = MachineBasicBlock::iterator(MI); 1330 1331 // Find a source operand that is equal to the result. 1332 for (auto &Op : MI->uses()) { 1333 if (!Op.isReg()) 1334 continue; 1335 BitTracker::RegisterRef RS = Op; 1336 if (!BT.has(RS.Reg)) 1337 continue; 1338 if (!HBS::isTransparentCopy(RD, RS, MRI)) 1339 continue; 1340 1341 unsigned BN, BW; 1342 if (!HBS::getSubregMask(RS, BN, BW, MRI)) 1343 continue; 1344 1345 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); 1346 if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW)) 1347 continue; 1348 1349 // If found, replace the instruction with a COPY. 1350 const DebugLoc &DL = MI->getDebugLoc(); 1351 const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); 1352 Register NewR = MRI.createVirtualRegister(FRC); 1353 MachineInstr *CopyI = 1354 BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) 1355 .addReg(RS.Reg, 0, RS.Sub); 1356 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 1357 // This pass can create copies between registers that don't have the 1358 // exact same values. Updating the tracker has to involve updating 1359 // all dependent cells. Example: 1360 // %1 = inst %2 ; %1 != %2, but used bits are equal 1361 // 1362 // %3 = copy %2 ; <- inserted 1363 // ... = %3 ; <- replaced from %2 1364 // Indirectly, we can create a "copy" between %1 and %2 even 1365 // though their exact values do not match. 1366 BT.visit(*CopyI); 1367 Changed = true; 1368 break; 1369 } 1370 } 1371 1372 return Changed; 1373 } 1374 1375 namespace { 1376 1377 // Recognize instructions that produce constant values known at compile-time. 1378 // Replace them with register definitions that load these constants directly. 1379 class ConstGeneration : public Transformation { 1380 public: 1381 ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii, 1382 MachineRegisterInfo &mri) 1383 : Transformation(true), HII(hii), MRI(mri), BT(bt) {} 1384 1385 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1386 static bool isTfrConst(const MachineInstr &MI); 1387 1388 private: 1389 Register genTfrConst(const TargetRegisterClass *RC, int64_t C, 1390 MachineBasicBlock &B, MachineBasicBlock::iterator At, 1391 DebugLoc &DL); 1392 1393 const HexagonInstrInfo &HII; 1394 MachineRegisterInfo &MRI; 1395 BitTracker &BT; 1396 }; 1397 1398 } // end anonymous namespace 1399 1400 bool ConstGeneration::isTfrConst(const MachineInstr &MI) { 1401 unsigned Opc = MI.getOpcode(); 1402 switch (Opc) { 1403 case Hexagon::A2_combineii: 1404 case Hexagon::A4_combineii: 1405 case Hexagon::A2_tfrsi: 1406 case Hexagon::A2_tfrpi: 1407 case Hexagon::PS_true: 1408 case Hexagon::PS_false: 1409 case Hexagon::CONST32: 1410 case Hexagon::CONST64: 1411 return true; 1412 } 1413 return false; 1414 } 1415 1416 // Generate a transfer-immediate instruction that is appropriate for the 1417 // register class and the actual value being transferred. 1418 Register ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, 1419 MachineBasicBlock &B, 1420 MachineBasicBlock::iterator At, 1421 DebugLoc &DL) { 1422 Register Reg = MRI.createVirtualRegister(RC); 1423 if (RC == &Hexagon::IntRegsRegClass) { 1424 BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg) 1425 .addImm(int32_t(C)); 1426 return Reg; 1427 } 1428 1429 if (RC == &Hexagon::DoubleRegsRegClass) { 1430 if (isInt<8>(C)) { 1431 BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg) 1432 .addImm(C); 1433 return Reg; 1434 } 1435 1436 unsigned Lo = Lo_32(C), Hi = Hi_32(C); 1437 if (isInt<8>(Lo) || isInt<8>(Hi)) { 1438 unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii 1439 : Hexagon::A4_combineii; 1440 BuildMI(B, At, DL, HII.get(Opc), Reg) 1441 .addImm(int32_t(Hi)) 1442 .addImm(int32_t(Lo)); 1443 return Reg; 1444 } 1445 MachineFunction *MF = B.getParent(); 1446 auto &HST = MF->getSubtarget<HexagonSubtarget>(); 1447 1448 // Disable CONST64 for tiny core since it takes a LD resource. 1449 if (!HST.isTinyCore() || 1450 MF->getFunction().hasOptSize()) { 1451 BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg) 1452 .addImm(C); 1453 return Reg; 1454 } 1455 } 1456 1457 if (RC == &Hexagon::PredRegsRegClass) { 1458 unsigned Opc; 1459 if (C == 0) 1460 Opc = Hexagon::PS_false; 1461 else if ((C & 0xFF) == 0xFF) 1462 Opc = Hexagon::PS_true; 1463 else 1464 return 0; 1465 BuildMI(B, At, DL, HII.get(Opc), Reg); 1466 return Reg; 1467 } 1468 1469 return 0; 1470 } 1471 1472 bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { 1473 if (!BT.reached(&B)) 1474 return false; 1475 bool Changed = false; 1476 RegisterSet Defs; 1477 1478 for (auto I = B.begin(), E = B.end(); I != E; ++I) { 1479 if (isTfrConst(*I)) 1480 continue; 1481 Defs.clear(); 1482 HBS::getInstrDefs(*I, Defs); 1483 if (Defs.count() != 1) 1484 continue; 1485 Register DR = Defs.find_first(); 1486 if (!DR.isVirtual()) 1487 continue; 1488 uint64_t U; 1489 const BitTracker::RegisterCell &DRC = BT.lookup(DR); 1490 if (HBS::getConst(DRC, 0, DRC.width(), U)) { 1491 int64_t C = U; 1492 DebugLoc DL = I->getDebugLoc(); 1493 auto At = I->isPHI() ? B.getFirstNonPHI() : I; 1494 Register ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL); 1495 if (ImmReg) { 1496 HBS::replaceReg(DR, ImmReg, MRI); 1497 BT.put(ImmReg, DRC); 1498 Changed = true; 1499 } 1500 } 1501 } 1502 return Changed; 1503 } 1504 1505 namespace { 1506 1507 // Identify pairs of available registers which hold identical values. 1508 // In such cases, only one of them needs to be calculated, the other one 1509 // will be defined as a copy of the first. 1510 class CopyGeneration : public Transformation { 1511 public: 1512 CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii, 1513 const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) 1514 : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} 1515 1516 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1517 1518 private: 1519 bool findMatch(const BitTracker::RegisterRef &Inp, 1520 BitTracker::RegisterRef &Out, const RegisterSet &AVs); 1521 1522 const HexagonInstrInfo &HII; 1523 const HexagonRegisterInfo &HRI; 1524 MachineRegisterInfo &MRI; 1525 BitTracker &BT; 1526 RegisterSet Forbidden; 1527 }; 1528 1529 // Eliminate register copies RD = RS, by replacing the uses of RD with 1530 // with uses of RS. 1531 class CopyPropagation : public Transformation { 1532 public: 1533 CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) 1534 : Transformation(false), HRI(hri), MRI(mri) {} 1535 1536 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1537 1538 static bool isCopyReg(unsigned Opc, bool NoConv); 1539 1540 private: 1541 bool propagateRegCopy(MachineInstr &MI); 1542 1543 const HexagonRegisterInfo &HRI; 1544 MachineRegisterInfo &MRI; 1545 }; 1546 1547 } // end anonymous namespace 1548 1549 /// Check if there is a register in AVs that is identical to Inp. If so, 1550 /// set Out to the found register. The output may be a pair Reg:Sub. 1551 bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, 1552 BitTracker::RegisterRef &Out, const RegisterSet &AVs) { 1553 if (!BT.has(Inp.Reg)) 1554 return false; 1555 const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg); 1556 auto *FRC = HBS::getFinalVRegClass(Inp, MRI); 1557 unsigned B, W; 1558 if (!HBS::getSubregMask(Inp, B, W, MRI)) 1559 return false; 1560 1561 for (Register R = AVs.find_first(); R; R = AVs.find_next(R)) { 1562 if (!BT.has(R) || Forbidden[R]) 1563 continue; 1564 const BitTracker::RegisterCell &RC = BT.lookup(R); 1565 unsigned RW = RC.width(); 1566 if (W == RW) { 1567 if (FRC != MRI.getRegClass(R)) 1568 continue; 1569 if (!HBS::isTransparentCopy(R, Inp, MRI)) 1570 continue; 1571 if (!HBS::isEqual(InpRC, B, RC, 0, W)) 1572 continue; 1573 Out.Reg = R; 1574 Out.Sub = 0; 1575 return true; 1576 } 1577 // Check if there is a super-register, whose part (with a subregister) 1578 // is equal to the input. 1579 // Only do double registers for now. 1580 if (W*2 != RW) 1581 continue; 1582 if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass) 1583 continue; 1584 1585 if (HBS::isEqual(InpRC, B, RC, 0, W)) 1586 Out.Sub = Hexagon::isub_lo; 1587 else if (HBS::isEqual(InpRC, B, RC, W, W)) 1588 Out.Sub = Hexagon::isub_hi; 1589 else 1590 continue; 1591 Out.Reg = R; 1592 if (HBS::isTransparentCopy(Out, Inp, MRI)) 1593 return true; 1594 } 1595 return false; 1596 } 1597 1598 bool CopyGeneration::processBlock(MachineBasicBlock &B, 1599 const RegisterSet &AVs) { 1600 if (!BT.reached(&B)) 1601 return false; 1602 RegisterSet AVB(AVs); 1603 bool Changed = false; 1604 RegisterSet Defs; 1605 1606 for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) { 1607 Defs.clear(); 1608 HBS::getInstrDefs(*I, Defs); 1609 1610 unsigned Opc = I->getOpcode(); 1611 if (CopyPropagation::isCopyReg(Opc, false) || 1612 ConstGeneration::isTfrConst(*I)) 1613 continue; 1614 1615 DebugLoc DL = I->getDebugLoc(); 1616 auto At = I->isPHI() ? B.getFirstNonPHI() : I; 1617 1618 for (Register R = Defs.find_first(); R; R = Defs.find_next(R)) { 1619 BitTracker::RegisterRef MR; 1620 auto *FRC = HBS::getFinalVRegClass(R, MRI); 1621 1622 if (findMatch(R, MR, AVB)) { 1623 Register NewR = MRI.createVirtualRegister(FRC); 1624 BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) 1625 .addReg(MR.Reg, 0, MR.Sub); 1626 BT.put(BitTracker::RegisterRef(NewR), BT.get(MR)); 1627 HBS::replaceReg(R, NewR, MRI); 1628 Forbidden.insert(R); 1629 continue; 1630 } 1631 1632 if (FRC == &Hexagon::DoubleRegsRegClass || 1633 FRC == &Hexagon::HvxWRRegClass) { 1634 // Try to generate REG_SEQUENCE. 1635 unsigned SubLo = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_lo); 1636 unsigned SubHi = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_hi); 1637 BitTracker::RegisterRef TL = { R, SubLo }; 1638 BitTracker::RegisterRef TH = { R, SubHi }; 1639 BitTracker::RegisterRef ML, MH; 1640 if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) { 1641 auto *FRC = HBS::getFinalVRegClass(R, MRI); 1642 Register NewR = MRI.createVirtualRegister(FRC); 1643 BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR) 1644 .addReg(ML.Reg, 0, ML.Sub) 1645 .addImm(SubLo) 1646 .addReg(MH.Reg, 0, MH.Sub) 1647 .addImm(SubHi); 1648 BT.put(BitTracker::RegisterRef(NewR), BT.get(R)); 1649 HBS::replaceReg(R, NewR, MRI); 1650 Forbidden.insert(R); 1651 } 1652 } 1653 } 1654 } 1655 1656 return Changed; 1657 } 1658 1659 bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) { 1660 switch (Opc) { 1661 case TargetOpcode::COPY: 1662 case TargetOpcode::REG_SEQUENCE: 1663 case Hexagon::A4_combineir: 1664 case Hexagon::A4_combineri: 1665 return true; 1666 case Hexagon::A2_tfr: 1667 case Hexagon::A2_tfrp: 1668 case Hexagon::A2_combinew: 1669 case Hexagon::V6_vcombine: 1670 return NoConv; 1671 default: 1672 break; 1673 } 1674 return false; 1675 } 1676 1677 bool CopyPropagation::propagateRegCopy(MachineInstr &MI) { 1678 bool Changed = false; 1679 unsigned Opc = MI.getOpcode(); 1680 BitTracker::RegisterRef RD = MI.getOperand(0); 1681 assert(MI.getOperand(0).getSubReg() == 0); 1682 1683 switch (Opc) { 1684 case TargetOpcode::COPY: 1685 case Hexagon::A2_tfr: 1686 case Hexagon::A2_tfrp: { 1687 BitTracker::RegisterRef RS = MI.getOperand(1); 1688 if (!HBS::isTransparentCopy(RD, RS, MRI)) 1689 break; 1690 if (RS.Sub != 0) 1691 Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI); 1692 else 1693 Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI); 1694 break; 1695 } 1696 case TargetOpcode::REG_SEQUENCE: { 1697 BitTracker::RegisterRef SL, SH; 1698 if (HBS::parseRegSequence(MI, SL, SH, MRI)) { 1699 const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg); 1700 unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo); 1701 unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi); 1702 Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, SL.Reg, SL.Sub, MRI); 1703 Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, SH.Reg, SH.Sub, MRI); 1704 } 1705 break; 1706 } 1707 case Hexagon::A2_combinew: 1708 case Hexagon::V6_vcombine: { 1709 const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg); 1710 unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo); 1711 unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi); 1712 BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2); 1713 Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, RL.Reg, RL.Sub, MRI); 1714 Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, RH.Reg, RH.Sub, MRI); 1715 break; 1716 } 1717 case Hexagon::A4_combineir: 1718 case Hexagon::A4_combineri: { 1719 unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1; 1720 unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo 1721 : Hexagon::isub_hi; 1722 BitTracker::RegisterRef RS = MI.getOperand(SrcX); 1723 Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI); 1724 break; 1725 } 1726 } 1727 return Changed; 1728 } 1729 1730 bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) { 1731 std::vector<MachineInstr*> Instrs; 1732 for (MachineInstr &MI : llvm::reverse(B)) 1733 Instrs.push_back(&MI); 1734 1735 bool Changed = false; 1736 for (auto *I : Instrs) { 1737 unsigned Opc = I->getOpcode(); 1738 if (!CopyPropagation::isCopyReg(Opc, true)) 1739 continue; 1740 Changed |= propagateRegCopy(*I); 1741 } 1742 1743 return Changed; 1744 } 1745 1746 namespace { 1747 1748 // Recognize patterns that can be simplified and replace them with the 1749 // simpler forms. 1750 // This is by no means complete 1751 class BitSimplification : public Transformation { 1752 public: 1753 BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt, 1754 const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri, 1755 MachineRegisterInfo &mri, MachineFunction &mf) 1756 : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri), 1757 MF(mf), BT(bt) {} 1758 1759 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1760 1761 private: 1762 struct RegHalf : public BitTracker::RegisterRef { 1763 bool Low; // Low/High halfword. 1764 }; 1765 1766 bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC, 1767 unsigned B, RegHalf &RH); 1768 bool validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum); 1769 1770 bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC, 1771 BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt); 1772 unsigned getCombineOpcode(bool HLow, bool LLow); 1773 1774 bool genStoreUpperHalf(MachineInstr *MI); 1775 bool genStoreImmediate(MachineInstr *MI); 1776 bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD, 1777 const BitTracker::RegisterCell &RC); 1778 bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD, 1779 const BitTracker::RegisterCell &RC); 1780 bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD, 1781 const BitTracker::RegisterCell &RC); 1782 bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, 1783 const BitTracker::RegisterCell &RC); 1784 bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD, 1785 const BitTracker::RegisterCell &RC, const RegisterSet &AVs); 1786 bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD, 1787 const BitTracker::RegisterCell &RC); 1788 bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, 1789 const BitTracker::RegisterCell &RC, const RegisterSet &AVs); 1790 bool simplifyRCmp0(MachineInstr *MI, BitTracker::RegisterRef RD); 1791 1792 // Cache of created instructions to avoid creating duplicates. 1793 // XXX Currently only used by genBitSplit. 1794 std::vector<MachineInstr*> NewMIs; 1795 1796 const MachineDominatorTree &MDT; 1797 const HexagonInstrInfo &HII; 1798 const HexagonRegisterInfo &HRI; 1799 MachineRegisterInfo &MRI; 1800 MachineFunction &MF; 1801 BitTracker &BT; 1802 }; 1803 1804 } // end anonymous namespace 1805 1806 // Check if the bits [B..B+16) in register cell RC form a valid halfword, 1807 // i.e. [0..16), [16..32), etc. of some register. If so, return true and 1808 // set the information about the found register in RH. 1809 bool BitSimplification::matchHalf(unsigned SelfR, 1810 const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) { 1811 // XXX This could be searching in the set of available registers, in case 1812 // the match is not exact. 1813 1814 // Match 16-bit chunks, where the RC[B..B+15] references exactly one 1815 // register and all the bits B..B+15 match between RC and the register. 1816 // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... }, 1817 // and RC = { [0]:0 [1-15]:v1[1-15]... }. 1818 bool Low = false; 1819 unsigned I = B; 1820 while (I < B+16 && RC[I].num()) 1821 I++; 1822 if (I == B+16) 1823 return false; 1824 1825 Register Reg = RC[I].RefI.Reg; 1826 unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B. 1827 if (P < I-B) 1828 return false; 1829 unsigned Pos = P - (I-B); 1830 1831 if (Reg == 0 || Reg == SelfR) // Don't match "self". 1832 return false; 1833 if (!Reg.isVirtual()) 1834 return false; 1835 if (!BT.has(Reg)) 1836 return false; 1837 1838 const BitTracker::RegisterCell &SC = BT.lookup(Reg); 1839 if (Pos+16 > SC.width()) 1840 return false; 1841 1842 for (unsigned i = 0; i < 16; ++i) { 1843 const BitTracker::BitValue &RV = RC[i+B]; 1844 if (RV.Type == BitTracker::BitValue::Ref) { 1845 if (RV.RefI.Reg != Reg) 1846 return false; 1847 if (RV.RefI.Pos != i+Pos) 1848 return false; 1849 continue; 1850 } 1851 if (RC[i+B] != SC[i+Pos]) 1852 return false; 1853 } 1854 1855 unsigned Sub = 0; 1856 switch (Pos) { 1857 case 0: 1858 Sub = Hexagon::isub_lo; 1859 Low = true; 1860 break; 1861 case 16: 1862 Sub = Hexagon::isub_lo; 1863 Low = false; 1864 break; 1865 case 32: 1866 Sub = Hexagon::isub_hi; 1867 Low = true; 1868 break; 1869 case 48: 1870 Sub = Hexagon::isub_hi; 1871 Low = false; 1872 break; 1873 default: 1874 return false; 1875 } 1876 1877 RH.Reg = Reg; 1878 RH.Sub = Sub; 1879 RH.Low = Low; 1880 // If the subregister is not valid with the register, set it to 0. 1881 if (!HBS::getFinalVRegClass(RH, MRI)) 1882 RH.Sub = 0; 1883 1884 return true; 1885 } 1886 1887 bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc, 1888 unsigned OpNum) { 1889 auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF); 1890 auto *RRC = HBS::getFinalVRegClass(R, MRI); 1891 return OpRC->hasSubClassEq(RRC); 1892 } 1893 1894 // Check if RC matches the pattern of a S2_packhl. If so, return true and 1895 // set the inputs Rs and Rt. 1896 bool BitSimplification::matchPackhl(unsigned SelfR, 1897 const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs, 1898 BitTracker::RegisterRef &Rt) { 1899 RegHalf L1, H1, L2, H2; 1900 1901 if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1)) 1902 return false; 1903 if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1)) 1904 return false; 1905 1906 // Rs = H1.L1, Rt = H2.L2 1907 if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low) 1908 return false; 1909 if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low) 1910 return false; 1911 1912 Rs = H1; 1913 Rt = H2; 1914 return true; 1915 } 1916 1917 unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) { 1918 return HLow ? LLow ? Hexagon::A2_combine_ll 1919 : Hexagon::A2_combine_lh 1920 : LLow ? Hexagon::A2_combine_hl 1921 : Hexagon::A2_combine_hh; 1922 } 1923 1924 // If MI stores the upper halfword of a register (potentially obtained via 1925 // shifts or extracts), replace it with a storerf instruction. This could 1926 // cause the "extraction" code to become dead. 1927 bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) { 1928 unsigned Opc = MI->getOpcode(); 1929 if (Opc != Hexagon::S2_storerh_io) 1930 return false; 1931 1932 MachineOperand &ValOp = MI->getOperand(2); 1933 BitTracker::RegisterRef RS = ValOp; 1934 if (!BT.has(RS.Reg)) 1935 return false; 1936 const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); 1937 RegHalf H; 1938 unsigned B = (RS.Sub == Hexagon::isub_hi) ? 32 : 0; 1939 if (!matchHalf(0, RC, B, H)) 1940 return false; 1941 if (H.Low) 1942 return false; 1943 MI->setDesc(HII.get(Hexagon::S2_storerf_io)); 1944 ValOp.setReg(H.Reg); 1945 ValOp.setSubReg(H.Sub); 1946 return true; 1947 } 1948 1949 // If MI stores a value known at compile-time, and the value is within a range 1950 // that avoids using constant-extenders, replace it with a store-immediate. 1951 bool BitSimplification::genStoreImmediate(MachineInstr *MI) { 1952 unsigned Opc = MI->getOpcode(); 1953 unsigned Align = 0; 1954 switch (Opc) { 1955 case Hexagon::S2_storeri_io: 1956 Align++; 1957 [[fallthrough]]; 1958 case Hexagon::S2_storerh_io: 1959 Align++; 1960 [[fallthrough]]; 1961 case Hexagon::S2_storerb_io: 1962 break; 1963 default: 1964 return false; 1965 } 1966 1967 // Avoid stores to frame-indices (due to an unknown offset). 1968 if (!MI->getOperand(0).isReg()) 1969 return false; 1970 MachineOperand &OffOp = MI->getOperand(1); 1971 if (!OffOp.isImm()) 1972 return false; 1973 1974 int64_t Off = OffOp.getImm(); 1975 // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x). 1976 if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1))) 1977 return false; 1978 // Source register: 1979 BitTracker::RegisterRef RS = MI->getOperand(2); 1980 if (!BT.has(RS.Reg)) 1981 return false; 1982 const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); 1983 uint64_t U; 1984 if (!HBS::getConst(RC, 0, RC.width(), U)) 1985 return false; 1986 1987 // Only consider 8-bit values to avoid constant-extenders. 1988 int V; 1989 switch (Opc) { 1990 case Hexagon::S2_storerb_io: 1991 V = int8_t(U); 1992 break; 1993 case Hexagon::S2_storerh_io: 1994 V = int16_t(U); 1995 break; 1996 case Hexagon::S2_storeri_io: 1997 V = int32_t(U); 1998 break; 1999 default: 2000 // Opc is already checked above to be one of the three store instructions. 2001 // This silences a -Wuninitialized false positive on GCC 5.4. 2002 llvm_unreachable("Unexpected store opcode"); 2003 } 2004 if (!isInt<8>(V)) 2005 return false; 2006 2007 MI->removeOperand(2); 2008 switch (Opc) { 2009 case Hexagon::S2_storerb_io: 2010 MI->setDesc(HII.get(Hexagon::S4_storeirb_io)); 2011 break; 2012 case Hexagon::S2_storerh_io: 2013 MI->setDesc(HII.get(Hexagon::S4_storeirh_io)); 2014 break; 2015 case Hexagon::S2_storeri_io: 2016 MI->setDesc(HII.get(Hexagon::S4_storeiri_io)); 2017 break; 2018 } 2019 MI->addOperand(MachineOperand::CreateImm(V)); 2020 return true; 2021 } 2022 2023 // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the 2024 // last instruction in a sequence that results in something equivalent to 2025 // the pack-halfwords. The intent is to cause the entire sequence to become 2026 // dead. 2027 bool BitSimplification::genPackhl(MachineInstr *MI, 2028 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 2029 unsigned Opc = MI->getOpcode(); 2030 if (Opc == Hexagon::S2_packhl) 2031 return false; 2032 BitTracker::RegisterRef Rs, Rt; 2033 if (!matchPackhl(RD.Reg, RC, Rs, Rt)) 2034 return false; 2035 if (!validateReg(Rs, Hexagon::S2_packhl, 1) || 2036 !validateReg(Rt, Hexagon::S2_packhl, 2)) 2037 return false; 2038 2039 MachineBasicBlock &B = *MI->getParent(); 2040 Register NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); 2041 DebugLoc DL = MI->getDebugLoc(); 2042 auto At = MI->isPHI() ? B.getFirstNonPHI() 2043 : MachineBasicBlock::iterator(MI); 2044 BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR) 2045 .addReg(Rs.Reg, 0, Rs.Sub) 2046 .addReg(Rt.Reg, 0, Rt.Sub); 2047 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 2048 BT.put(BitTracker::RegisterRef(NewR), RC); 2049 return true; 2050 } 2051 2052 // If MI produces halfword of the input in the low half of the output, 2053 // replace it with zero-extend or extractu. 2054 bool BitSimplification::genExtractHalf(MachineInstr *MI, 2055 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 2056 RegHalf L; 2057 // Check for halfword in low 16 bits, zeros elsewhere. 2058 if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16)) 2059 return false; 2060 2061 unsigned Opc = MI->getOpcode(); 2062 MachineBasicBlock &B = *MI->getParent(); 2063 DebugLoc DL = MI->getDebugLoc(); 2064 2065 // Prefer zxth, since zxth can go in any slot, while extractu only in 2066 // slots 2 and 3. 2067 unsigned NewR = 0; 2068 auto At = MI->isPHI() ? B.getFirstNonPHI() 2069 : MachineBasicBlock::iterator(MI); 2070 if (L.Low && Opc != Hexagon::A2_zxth) { 2071 if (validateReg(L, Hexagon::A2_zxth, 1)) { 2072 NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 2073 BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR) 2074 .addReg(L.Reg, 0, L.Sub); 2075 } 2076 } else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) { 2077 if (validateReg(L, Hexagon::S2_lsr_i_r, 1)) { 2078 NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 2079 BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR) 2080 .addReg(L.Reg, 0, L.Sub) 2081 .addImm(16); 2082 } 2083 } 2084 if (NewR == 0) 2085 return false; 2086 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 2087 BT.put(BitTracker::RegisterRef(NewR), RC); 2088 return true; 2089 } 2090 2091 // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the 2092 // combine. 2093 bool BitSimplification::genCombineHalf(MachineInstr *MI, 2094 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 2095 RegHalf L, H; 2096 // Check for combine h/l 2097 if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H)) 2098 return false; 2099 // Do nothing if this is just a reg copy. 2100 if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low) 2101 return false; 2102 2103 unsigned Opc = MI->getOpcode(); 2104 unsigned COpc = getCombineOpcode(H.Low, L.Low); 2105 if (COpc == Opc) 2106 return false; 2107 if (!validateReg(H, COpc, 1) || !validateReg(L, COpc, 2)) 2108 return false; 2109 2110 MachineBasicBlock &B = *MI->getParent(); 2111 DebugLoc DL = MI->getDebugLoc(); 2112 Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 2113 auto At = MI->isPHI() ? B.getFirstNonPHI() 2114 : MachineBasicBlock::iterator(MI); 2115 BuildMI(B, At, DL, HII.get(COpc), NewR) 2116 .addReg(H.Reg, 0, H.Sub) 2117 .addReg(L.Reg, 0, L.Sub); 2118 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 2119 BT.put(BitTracker::RegisterRef(NewR), RC); 2120 return true; 2121 } 2122 2123 // If MI resets high bits of a register and keeps the lower ones, replace it 2124 // with zero-extend byte/half, and-immediate, or extractu, as appropriate. 2125 bool BitSimplification::genExtractLow(MachineInstr *MI, 2126 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 2127 unsigned Opc = MI->getOpcode(); 2128 switch (Opc) { 2129 case Hexagon::A2_zxtb: 2130 case Hexagon::A2_zxth: 2131 case Hexagon::S2_extractu: 2132 return false; 2133 } 2134 if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) { 2135 int32_t Imm = MI->getOperand(2).getImm(); 2136 if (isInt<10>(Imm)) 2137 return false; 2138 } 2139 2140 if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) 2141 return false; 2142 unsigned W = RC.width(); 2143 while (W > 0 && RC[W-1].is(0)) 2144 W--; 2145 if (W == 0 || W == RC.width()) 2146 return false; 2147 unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb 2148 : (W == 16) ? Hexagon::A2_zxth 2149 : (W < 10) ? Hexagon::A2_andir 2150 : Hexagon::S2_extractu; 2151 MachineBasicBlock &B = *MI->getParent(); 2152 DebugLoc DL = MI->getDebugLoc(); 2153 2154 for (auto &Op : MI->uses()) { 2155 if (!Op.isReg()) 2156 continue; 2157 BitTracker::RegisterRef RS = Op; 2158 if (!BT.has(RS.Reg)) 2159 continue; 2160 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); 2161 unsigned BN, BW; 2162 if (!HBS::getSubregMask(RS, BN, BW, MRI)) 2163 continue; 2164 if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W)) 2165 continue; 2166 if (!validateReg(RS, NewOpc, 1)) 2167 continue; 2168 2169 Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 2170 auto At = MI->isPHI() ? B.getFirstNonPHI() 2171 : MachineBasicBlock::iterator(MI); 2172 auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR) 2173 .addReg(RS.Reg, 0, RS.Sub); 2174 if (NewOpc == Hexagon::A2_andir) 2175 MIB.addImm((1 << W) - 1); 2176 else if (NewOpc == Hexagon::S2_extractu) 2177 MIB.addImm(W).addImm(0); 2178 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 2179 BT.put(BitTracker::RegisterRef(NewR), RC); 2180 return true; 2181 } 2182 return false; 2183 } 2184 2185 bool BitSimplification::genBitSplit(MachineInstr *MI, 2186 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC, 2187 const RegisterSet &AVs) { 2188 if (!GenBitSplit) 2189 return false; 2190 if (MaxBitSplit.getNumOccurrences()) { 2191 if (CountBitSplit >= MaxBitSplit) 2192 return false; 2193 } 2194 2195 unsigned Opc = MI->getOpcode(); 2196 switch (Opc) { 2197 case Hexagon::A4_bitsplit: 2198 case Hexagon::A4_bitspliti: 2199 return false; 2200 } 2201 2202 unsigned W = RC.width(); 2203 if (W != 32) 2204 return false; 2205 2206 auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned { 2207 unsigned Z = C.width(); 2208 while (Z > 0 && C[Z-1].is(0)) 2209 --Z; 2210 return C.width() - Z; 2211 }; 2212 2213 // Count the number of leading zeros in the target RC. 2214 unsigned Z = ctlz(RC); 2215 if (Z == 0 || Z == W) 2216 return false; 2217 2218 // A simplistic analysis: assume the source register (the one being split) 2219 // is fully unknown, and that all its bits are self-references. 2220 const BitTracker::BitValue &B0 = RC[0]; 2221 if (B0.Type != BitTracker::BitValue::Ref) 2222 return false; 2223 2224 unsigned SrcR = B0.RefI.Reg; 2225 unsigned SrcSR = 0; 2226 unsigned Pos = B0.RefI.Pos; 2227 2228 // All the non-zero bits should be consecutive bits from the same register. 2229 for (unsigned i = 1; i < W-Z; ++i) { 2230 const BitTracker::BitValue &V = RC[i]; 2231 if (V.Type != BitTracker::BitValue::Ref) 2232 return false; 2233 if (V.RefI.Reg != SrcR || V.RefI.Pos != Pos+i) 2234 return false; 2235 } 2236 2237 // Now, find the other bitfield among AVs. 2238 for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) { 2239 // The number of leading zeros here should be the number of trailing 2240 // non-zeros in RC. 2241 unsigned SRC = MRI.getRegClass(S)->getID(); 2242 if (SRC != Hexagon::IntRegsRegClassID && 2243 SRC != Hexagon::DoubleRegsRegClassID) 2244 continue; 2245 if (!BT.has(S)) 2246 continue; 2247 const BitTracker::RegisterCell &SC = BT.lookup(S); 2248 if (SC.width() != W || ctlz(SC) != W-Z) 2249 continue; 2250 // The Z lower bits should now match SrcR. 2251 const BitTracker::BitValue &S0 = SC[0]; 2252 if (S0.Type != BitTracker::BitValue::Ref || S0.RefI.Reg != SrcR) 2253 continue; 2254 unsigned P = S0.RefI.Pos; 2255 2256 if (Pos <= P && (Pos + W-Z) != P) 2257 continue; 2258 if (P < Pos && (P + Z) != Pos) 2259 continue; 2260 // The starting bitfield position must be at a subregister boundary. 2261 if (std::min(P, Pos) != 0 && std::min(P, Pos) != 32) 2262 continue; 2263 2264 unsigned I; 2265 for (I = 1; I < Z; ++I) { 2266 const BitTracker::BitValue &V = SC[I]; 2267 if (V.Type != BitTracker::BitValue::Ref) 2268 break; 2269 if (V.RefI.Reg != SrcR || V.RefI.Pos != P+I) 2270 break; 2271 } 2272 if (I != Z) 2273 continue; 2274 2275 // Generate bitsplit where S is defined. 2276 if (MaxBitSplit.getNumOccurrences()) 2277 CountBitSplit++; 2278 MachineInstr *DefS = MRI.getVRegDef(S); 2279 assert(DefS != nullptr); 2280 DebugLoc DL = DefS->getDebugLoc(); 2281 MachineBasicBlock &B = *DefS->getParent(); 2282 auto At = DefS->isPHI() ? B.getFirstNonPHI() 2283 : MachineBasicBlock::iterator(DefS); 2284 if (MRI.getRegClass(SrcR)->getID() == Hexagon::DoubleRegsRegClassID) 2285 SrcSR = (std::min(Pos, P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo; 2286 if (!validateReg({SrcR,SrcSR}, Hexagon::A4_bitspliti, 1)) 2287 continue; 2288 unsigned ImmOp = Pos <= P ? W-Z : Z; 2289 2290 // Find an existing bitsplit instruction if one already exists. 2291 unsigned NewR = 0; 2292 for (MachineInstr *In : NewMIs) { 2293 if (In->getOpcode() != Hexagon::A4_bitspliti) 2294 continue; 2295 MachineOperand &Op1 = In->getOperand(1); 2296 if (Op1.getReg() != SrcR || Op1.getSubReg() != SrcSR) 2297 continue; 2298 if (In->getOperand(2).getImm() != ImmOp) 2299 continue; 2300 // Check if the target register is available here. 2301 MachineOperand &Op0 = In->getOperand(0); 2302 MachineInstr *DefI = MRI.getVRegDef(Op0.getReg()); 2303 assert(DefI != nullptr); 2304 if (!MDT.dominates(DefI, &*At)) 2305 continue; 2306 2307 // Found one that can be reused. 2308 assert(Op0.getSubReg() == 0); 2309 NewR = Op0.getReg(); 2310 break; 2311 } 2312 if (!NewR) { 2313 NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); 2314 auto NewBS = BuildMI(B, At, DL, HII.get(Hexagon::A4_bitspliti), NewR) 2315 .addReg(SrcR, 0, SrcSR) 2316 .addImm(ImmOp); 2317 NewMIs.push_back(NewBS); 2318 } 2319 if (Pos <= P) { 2320 HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_lo, MRI); 2321 HBS::replaceRegWithSub(S, NewR, Hexagon::isub_hi, MRI); 2322 } else { 2323 HBS::replaceRegWithSub(S, NewR, Hexagon::isub_lo, MRI); 2324 HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_hi, MRI); 2325 } 2326 return true; 2327 } 2328 2329 return false; 2330 } 2331 2332 // Check for tstbit simplification opportunity, where the bit being checked 2333 // can be tracked back to another register. For example: 2334 // %2 = S2_lsr_i_r %1, 5 2335 // %3 = S2_tstbit_i %2, 0 2336 // => 2337 // %3 = S2_tstbit_i %1, 5 2338 bool BitSimplification::simplifyTstbit(MachineInstr *MI, 2339 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 2340 unsigned Opc = MI->getOpcode(); 2341 if (Opc != Hexagon::S2_tstbit_i) 2342 return false; 2343 2344 unsigned BN = MI->getOperand(2).getImm(); 2345 BitTracker::RegisterRef RS = MI->getOperand(1); 2346 unsigned F, W; 2347 DebugLoc DL = MI->getDebugLoc(); 2348 if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI)) 2349 return false; 2350 MachineBasicBlock &B = *MI->getParent(); 2351 auto At = MI->isPHI() ? B.getFirstNonPHI() 2352 : MachineBasicBlock::iterator(MI); 2353 2354 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); 2355 const BitTracker::BitValue &V = SC[F+BN]; 2356 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) { 2357 const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg); 2358 // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is 2359 // a double register, need to use a subregister and adjust bit 2360 // number. 2361 unsigned P = std::numeric_limits<unsigned>::max(); 2362 BitTracker::RegisterRef RR(V.RefI.Reg, 0); 2363 if (TC == &Hexagon::DoubleRegsRegClass) { 2364 P = V.RefI.Pos; 2365 RR.Sub = Hexagon::isub_lo; 2366 if (P >= 32) { 2367 P -= 32; 2368 RR.Sub = Hexagon::isub_hi; 2369 } 2370 } else if (TC == &Hexagon::IntRegsRegClass) { 2371 P = V.RefI.Pos; 2372 } 2373 if (P != std::numeric_limits<unsigned>::max()) { 2374 Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); 2375 BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR) 2376 .addReg(RR.Reg, 0, RR.Sub) 2377 .addImm(P); 2378 HBS::replaceReg(RD.Reg, NewR, MRI); 2379 BT.put(NewR, RC); 2380 return true; 2381 } 2382 } else if (V.is(0) || V.is(1)) { 2383 Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); 2384 unsigned NewOpc = V.is(0) ? Hexagon::PS_false : Hexagon::PS_true; 2385 BuildMI(B, At, DL, HII.get(NewOpc), NewR); 2386 HBS::replaceReg(RD.Reg, NewR, MRI); 2387 return true; 2388 } 2389 2390 return false; 2391 } 2392 2393 // Detect whether RD is a bitfield extract (sign- or zero-extended) of 2394 // some register from the AVs set. Create a new corresponding instruction 2395 // at the location of MI. The intent is to recognize situations where 2396 // a sequence of instructions performs an operation that is equivalent to 2397 // an extract operation, such as a shift left followed by a shift right. 2398 bool BitSimplification::simplifyExtractLow(MachineInstr *MI, 2399 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC, 2400 const RegisterSet &AVs) { 2401 if (!GenExtract) 2402 return false; 2403 if (MaxExtract.getNumOccurrences()) { 2404 if (CountExtract >= MaxExtract) 2405 return false; 2406 CountExtract++; 2407 } 2408 2409 unsigned W = RC.width(); 2410 unsigned RW = W; 2411 unsigned Len; 2412 bool Signed; 2413 2414 // The code is mostly class-independent, except for the part that generates 2415 // the extract instruction, and establishes the source register (in case it 2416 // needs to use a subregister). 2417 const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); 2418 if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass) 2419 return false; 2420 assert(RD.Sub == 0); 2421 2422 // Observation: 2423 // If the cell has a form of 00..0xx..x with k zeros and n remaining 2424 // bits, this could be an extractu of the n bits, but it could also be 2425 // an extractu of a longer field which happens to have 0s in the top 2426 // bit positions. 2427 // The same logic applies to sign-extended fields. 2428 // 2429 // Do not check for the extended extracts, since it would expand the 2430 // search space quite a bit. The search may be expensive as it is. 2431 2432 const BitTracker::BitValue &TopV = RC[W-1]; 2433 2434 // Eliminate candidates that have self-referential bits, since they 2435 // cannot be extracts from other registers. Also, skip registers that 2436 // have compile-time constant values. 2437 bool IsConst = true; 2438 for (unsigned I = 0; I != W; ++I) { 2439 const BitTracker::BitValue &V = RC[I]; 2440 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == RD.Reg) 2441 return false; 2442 IsConst = IsConst && (V.is(0) || V.is(1)); 2443 } 2444 if (IsConst) 2445 return false; 2446 2447 if (TopV.is(0) || TopV.is(1)) { 2448 bool S = TopV.is(1); 2449 for (--W; W > 0 && RC[W-1].is(S); --W) 2450 ; 2451 Len = W; 2452 Signed = S; 2453 // The sign bit must be a part of the field being extended. 2454 if (Signed) 2455 ++Len; 2456 } else { 2457 // This could still be a sign-extended extract. 2458 assert(TopV.Type == BitTracker::BitValue::Ref); 2459 if (TopV.RefI.Reg == RD.Reg || TopV.RefI.Pos == W-1) 2460 return false; 2461 for (--W; W > 0 && RC[W-1] == TopV; --W) 2462 ; 2463 // The top bits of RC are copies of TopV. One occurrence of TopV will 2464 // be a part of the field. 2465 Len = W + 1; 2466 Signed = true; 2467 } 2468 2469 // This would be just a copy. It should be handled elsewhere. 2470 if (Len == RW) 2471 return false; 2472 2473 LLVM_DEBUG({ 2474 dbgs() << __func__ << " on reg: " << printReg(RD.Reg, &HRI, RD.Sub) 2475 << ", MI: " << *MI; 2476 dbgs() << "Cell: " << RC << '\n'; 2477 dbgs() << "Expected bitfield size: " << Len << " bits, " 2478 << (Signed ? "sign" : "zero") << "-extended\n"; 2479 }); 2480 2481 bool Changed = false; 2482 2483 for (unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(R)) { 2484 if (!BT.has(R)) 2485 continue; 2486 const BitTracker::RegisterCell &SC = BT.lookup(R); 2487 unsigned SW = SC.width(); 2488 2489 // The source can be longer than the destination, as long as its size is 2490 // a multiple of the size of the destination. Also, we would need to be 2491 // able to refer to the subregister in the source that would be of the 2492 // same size as the destination, but only check the sizes here. 2493 if (SW < RW || (SW % RW) != 0) 2494 continue; 2495 2496 // The field can start at any offset in SC as long as it contains Len 2497 // bits and does not cross subregister boundary (if the source register 2498 // is longer than the destination). 2499 unsigned Off = 0; 2500 while (Off <= SW-Len) { 2501 unsigned OE = (Off+Len)/RW; 2502 if (OE != Off/RW) { 2503 // The assumption here is that if the source (R) is longer than the 2504 // destination, then the destination is a sequence of words of 2505 // size RW, and each such word in R can be accessed via a subregister. 2506 // 2507 // If the beginning and the end of the field cross the subregister 2508 // boundary, advance to the next subregister. 2509 Off = OE*RW; 2510 continue; 2511 } 2512 if (HBS::isEqual(RC, 0, SC, Off, Len)) 2513 break; 2514 ++Off; 2515 } 2516 2517 if (Off > SW-Len) 2518 continue; 2519 2520 // Found match. 2521 unsigned ExtOpc = 0; 2522 if (Off == 0) { 2523 if (Len == 8) 2524 ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb; 2525 else if (Len == 16) 2526 ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth; 2527 else if (Len < 10 && !Signed) 2528 ExtOpc = Hexagon::A2_andir; 2529 } 2530 if (ExtOpc == 0) { 2531 ExtOpc = 2532 Signed ? (RW == 32 ? Hexagon::S4_extract : Hexagon::S4_extractp) 2533 : (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup); 2534 } 2535 unsigned SR = 0; 2536 // This only recognizes isub_lo and isub_hi. 2537 if (RW != SW && RW*2 != SW) 2538 continue; 2539 if (RW != SW) 2540 SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi; 2541 Off = Off % RW; 2542 2543 if (!validateReg({R,SR}, ExtOpc, 1)) 2544 continue; 2545 2546 // Don't generate the same instruction as the one being optimized. 2547 if (MI->getOpcode() == ExtOpc) { 2548 // All possible ExtOpc's have the source in operand(1). 2549 const MachineOperand &SrcOp = MI->getOperand(1); 2550 if (SrcOp.getReg() == R) 2551 continue; 2552 } 2553 2554 DebugLoc DL = MI->getDebugLoc(); 2555 MachineBasicBlock &B = *MI->getParent(); 2556 Register NewR = MRI.createVirtualRegister(FRC); 2557 auto At = MI->isPHI() ? B.getFirstNonPHI() 2558 : MachineBasicBlock::iterator(MI); 2559 auto MIB = BuildMI(B, At, DL, HII.get(ExtOpc), NewR) 2560 .addReg(R, 0, SR); 2561 switch (ExtOpc) { 2562 case Hexagon::A2_sxtb: 2563 case Hexagon::A2_zxtb: 2564 case Hexagon::A2_sxth: 2565 case Hexagon::A2_zxth: 2566 break; 2567 case Hexagon::A2_andir: 2568 MIB.addImm((1u << Len) - 1); 2569 break; 2570 case Hexagon::S4_extract: 2571 case Hexagon::S2_extractu: 2572 case Hexagon::S4_extractp: 2573 case Hexagon::S2_extractup: 2574 MIB.addImm(Len) 2575 .addImm(Off); 2576 break; 2577 default: 2578 llvm_unreachable("Unexpected opcode"); 2579 } 2580 2581 HBS::replaceReg(RD.Reg, NewR, MRI); 2582 BT.put(BitTracker::RegisterRef(NewR), RC); 2583 Changed = true; 2584 break; 2585 } 2586 2587 return Changed; 2588 } 2589 2590 bool BitSimplification::simplifyRCmp0(MachineInstr *MI, 2591 BitTracker::RegisterRef RD) { 2592 unsigned Opc = MI->getOpcode(); 2593 if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi) 2594 return false; 2595 MachineOperand &CmpOp = MI->getOperand(2); 2596 if (!CmpOp.isImm() || CmpOp.getImm() != 0) 2597 return false; 2598 2599 const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); 2600 if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass) 2601 return false; 2602 assert(RD.Sub == 0); 2603 2604 MachineBasicBlock &B = *MI->getParent(); 2605 const DebugLoc &DL = MI->getDebugLoc(); 2606 auto At = MI->isPHI() ? B.getFirstNonPHI() 2607 : MachineBasicBlock::iterator(MI); 2608 bool KnownZ = true; 2609 bool KnownNZ = false; 2610 2611 BitTracker::RegisterRef SR = MI->getOperand(1); 2612 if (!BT.has(SR.Reg)) 2613 return false; 2614 const BitTracker::RegisterCell &SC = BT.lookup(SR.Reg); 2615 unsigned F, W; 2616 if (!HBS::getSubregMask(SR, F, W, MRI)) 2617 return false; 2618 2619 for (uint16_t I = F; I != F+W; ++I) { 2620 const BitTracker::BitValue &V = SC[I]; 2621 if (!V.is(0)) 2622 KnownZ = false; 2623 if (V.is(1)) 2624 KnownNZ = true; 2625 } 2626 2627 auto ReplaceWithConst = [&](int C) { 2628 Register NewR = MRI.createVirtualRegister(FRC); 2629 BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR) 2630 .addImm(C); 2631 HBS::replaceReg(RD.Reg, NewR, MRI); 2632 BitTracker::RegisterCell NewRC(W); 2633 for (uint16_t I = 0; I != W; ++I) { 2634 NewRC[I] = BitTracker::BitValue(C & 1); 2635 C = unsigned(C) >> 1; 2636 } 2637 BT.put(BitTracker::RegisterRef(NewR), NewRC); 2638 return true; 2639 }; 2640 2641 auto IsNonZero = [] (const MachineOperand &Op) { 2642 if (Op.isGlobal() || Op.isBlockAddress()) 2643 return true; 2644 if (Op.isImm()) 2645 return Op.getImm() != 0; 2646 if (Op.isCImm()) 2647 return !Op.getCImm()->isZero(); 2648 if (Op.isFPImm()) 2649 return !Op.getFPImm()->isZero(); 2650 return false; 2651 }; 2652 2653 auto IsZero = [] (const MachineOperand &Op) { 2654 if (Op.isGlobal() || Op.isBlockAddress()) 2655 return false; 2656 if (Op.isImm()) 2657 return Op.getImm() == 0; 2658 if (Op.isCImm()) 2659 return Op.getCImm()->isZero(); 2660 if (Op.isFPImm()) 2661 return Op.getFPImm()->isZero(); 2662 return false; 2663 }; 2664 2665 // If the source register is known to be 0 or non-0, the comparison can 2666 // be folded to a load of a constant. 2667 if (KnownZ || KnownNZ) { 2668 assert(KnownZ != KnownNZ && "Register cannot be both 0 and non-0"); 2669 return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi)); 2670 } 2671 2672 // Special case: if the compare comes from a C2_muxii, then we know the 2673 // two possible constants that can be the source value. 2674 MachineInstr *InpDef = MRI.getVRegDef(SR.Reg); 2675 if (!InpDef) 2676 return false; 2677 if (SR.Sub == 0 && InpDef->getOpcode() == Hexagon::C2_muxii) { 2678 MachineOperand &Src1 = InpDef->getOperand(2); 2679 MachineOperand &Src2 = InpDef->getOperand(3); 2680 // Check if both are non-zero. 2681 bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2); 2682 if (KnownNZ1 && KnownNZ2) 2683 return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi); 2684 // Check if both are zero. 2685 bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2); 2686 if (KnownZ1 && KnownZ2) 2687 return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi); 2688 2689 // If for both operands we know that they are either 0 or non-0, 2690 // replace the comparison with a C2_muxii, using the same predicate 2691 // register, but with operands substituted with 0/1 accordingly. 2692 if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) { 2693 Register NewR = MRI.createVirtualRegister(FRC); 2694 BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR) 2695 .addReg(InpDef->getOperand(1).getReg()) 2696 .addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi)) 2697 .addImm(KnownZ2 == (Opc == Hexagon::A4_rcmpeqi)); 2698 HBS::replaceReg(RD.Reg, NewR, MRI); 2699 // Create a new cell with only the least significant bit unknown. 2700 BitTracker::RegisterCell NewRC(W); 2701 NewRC[0] = BitTracker::BitValue::self(); 2702 NewRC.fill(1, W, BitTracker::BitValue::Zero); 2703 BT.put(BitTracker::RegisterRef(NewR), NewRC); 2704 return true; 2705 } 2706 } 2707 2708 return false; 2709 } 2710 2711 bool BitSimplification::processBlock(MachineBasicBlock &B, 2712 const RegisterSet &AVs) { 2713 if (!BT.reached(&B)) 2714 return false; 2715 bool Changed = false; 2716 RegisterSet AVB = AVs; 2717 RegisterSet Defs; 2718 2719 for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) { 2720 MachineInstr *MI = &*I; 2721 Defs.clear(); 2722 HBS::getInstrDefs(*MI, Defs); 2723 2724 unsigned Opc = MI->getOpcode(); 2725 if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE) 2726 continue; 2727 2728 if (MI->mayStore()) { 2729 bool T = genStoreUpperHalf(MI); 2730 T = T || genStoreImmediate(MI); 2731 Changed |= T; 2732 continue; 2733 } 2734 2735 if (Defs.count() != 1) 2736 continue; 2737 const MachineOperand &Op0 = MI->getOperand(0); 2738 if (!Op0.isReg() || !Op0.isDef()) 2739 continue; 2740 BitTracker::RegisterRef RD = Op0; 2741 if (!BT.has(RD.Reg)) 2742 continue; 2743 const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); 2744 const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg); 2745 2746 if (FRC->getID() == Hexagon::DoubleRegsRegClassID) { 2747 bool T = genPackhl(MI, RD, RC); 2748 T = T || simplifyExtractLow(MI, RD, RC, AVB); 2749 Changed |= T; 2750 continue; 2751 } 2752 2753 if (FRC->getID() == Hexagon::IntRegsRegClassID) { 2754 bool T = genBitSplit(MI, RD, RC, AVB); 2755 T = T || simplifyExtractLow(MI, RD, RC, AVB); 2756 T = T || genExtractHalf(MI, RD, RC); 2757 T = T || genCombineHalf(MI, RD, RC); 2758 T = T || genExtractLow(MI, RD, RC); 2759 T = T || simplifyRCmp0(MI, RD); 2760 Changed |= T; 2761 continue; 2762 } 2763 2764 if (FRC->getID() == Hexagon::PredRegsRegClassID) { 2765 bool T = simplifyTstbit(MI, RD, RC); 2766 Changed |= T; 2767 continue; 2768 } 2769 } 2770 return Changed; 2771 } 2772 2773 bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { 2774 if (skipFunction(MF.getFunction())) 2775 return false; 2776 2777 auto &HST = MF.getSubtarget<HexagonSubtarget>(); 2778 auto &HRI = *HST.getRegisterInfo(); 2779 auto &HII = *HST.getInstrInfo(); 2780 2781 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 2782 MachineRegisterInfo &MRI = MF.getRegInfo(); 2783 bool Changed; 2784 2785 Changed = DeadCodeElimination(MF, *MDT).run(); 2786 2787 const HexagonEvaluator HE(HRI, MRI, HII, MF); 2788 BitTracker BT(HE, MF); 2789 LLVM_DEBUG(BT.trace(true)); 2790 BT.run(); 2791 2792 MachineBasicBlock &Entry = MF.front(); 2793 2794 RegisterSet AIG; // Available registers for IG. 2795 ConstGeneration ImmG(BT, HII, MRI); 2796 Changed |= visitBlock(Entry, ImmG, AIG); 2797 2798 RegisterSet ARE; // Available registers for RIE. 2799 RedundantInstrElimination RIE(BT, HII, HRI, MRI); 2800 bool Ried = visitBlock(Entry, RIE, ARE); 2801 if (Ried) { 2802 Changed = true; 2803 BT.run(); 2804 } 2805 2806 RegisterSet ACG; // Available registers for CG. 2807 CopyGeneration CopyG(BT, HII, HRI, MRI); 2808 Changed |= visitBlock(Entry, CopyG, ACG); 2809 2810 RegisterSet ACP; // Available registers for CP. 2811 CopyPropagation CopyP(HRI, MRI); 2812 Changed |= visitBlock(Entry, CopyP, ACP); 2813 2814 Changed = DeadCodeElimination(MF, *MDT).run() || Changed; 2815 2816 BT.run(); 2817 RegisterSet ABS; // Available registers for BS. 2818 BitSimplification BitS(BT, *MDT, HII, HRI, MRI, MF); 2819 Changed |= visitBlock(Entry, BitS, ABS); 2820 2821 Changed = DeadCodeElimination(MF, *MDT).run() || Changed; 2822 2823 if (Changed) { 2824 for (auto &B : MF) 2825 for (auto &I : B) 2826 I.clearKillInfo(); 2827 DeadCodeElimination(MF, *MDT).run(); 2828 } 2829 return Changed; 2830 } 2831 2832 // Recognize loops where the code at the end of the loop matches the code 2833 // before the entry of the loop, and the matching code is such that is can 2834 // be simplified. This pass relies on the bit simplification above and only 2835 // prepares code in a way that can be handled by the bit simplification. 2836 // 2837 // This is the motivating testcase (and explanation): 2838 // 2839 // { 2840 // loop0(.LBB0_2, r1) // %for.body.preheader 2841 // r5:4 = memd(r0++#8) 2842 // } 2843 // { 2844 // r3 = lsr(r4, #16) 2845 // r7:6 = combine(r5, r5) 2846 // } 2847 // { 2848 // r3 = insert(r5, #16, #16) 2849 // r7:6 = vlsrw(r7:6, #16) 2850 // } 2851 // .LBB0_2: 2852 // { 2853 // memh(r2+#4) = r5 2854 // memh(r2+#6) = r6 # R6 is really R5.H 2855 // } 2856 // { 2857 // r2 = add(r2, #8) 2858 // memh(r2+#0) = r4 2859 // memh(r2+#2) = r3 # R3 is really R4.H 2860 // } 2861 // { 2862 // r5:4 = memd(r0++#8) 2863 // } 2864 // { # "Shuffling" code that sets up R3 and R6 2865 // r3 = lsr(r4, #16) # so that their halves can be stored in the 2866 // r7:6 = combine(r5, r5) # next iteration. This could be folded into 2867 // } # the stores if the code was at the beginning 2868 // { # of the loop iteration. Since the same code 2869 // r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved 2870 // r7:6 = vlsrw(r7:6, #16) # there. 2871 // }:endloop0 2872 // 2873 // 2874 // The outcome: 2875 // 2876 // { 2877 // loop0(.LBB0_2, r1) 2878 // r5:4 = memd(r0++#8) 2879 // } 2880 // .LBB0_2: 2881 // { 2882 // memh(r2+#4) = r5 2883 // memh(r2+#6) = r5.h 2884 // } 2885 // { 2886 // r2 = add(r2, #8) 2887 // memh(r2+#0) = r4 2888 // memh(r2+#2) = r4.h 2889 // } 2890 // { 2891 // r5:4 = memd(r0++#8) 2892 // }:endloop0 2893 2894 namespace { 2895 2896 class HexagonLoopRescheduling : public MachineFunctionPass { 2897 public: 2898 static char ID; 2899 2900 HexagonLoopRescheduling() : MachineFunctionPass(ID) {} 2901 2902 bool runOnMachineFunction(MachineFunction &MF) override; 2903 2904 private: 2905 const HexagonInstrInfo *HII = nullptr; 2906 const HexagonRegisterInfo *HRI = nullptr; 2907 MachineRegisterInfo *MRI = nullptr; 2908 BitTracker *BTP = nullptr; 2909 2910 struct LoopCand { 2911 LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb, 2912 MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {} 2913 2914 MachineBasicBlock *LB, *PB, *EB; 2915 }; 2916 using InstrList = std::vector<MachineInstr *>; 2917 struct InstrGroup { 2918 BitTracker::RegisterRef Inp, Out; 2919 InstrList Ins; 2920 }; 2921 struct PhiInfo { 2922 PhiInfo(MachineInstr &P, MachineBasicBlock &B); 2923 2924 unsigned DefR; 2925 BitTracker::RegisterRef LR, PR; // Loop Register, Preheader Register 2926 MachineBasicBlock *LB, *PB; // Loop Block, Preheader Block 2927 }; 2928 2929 static unsigned getDefReg(const MachineInstr *MI); 2930 bool isConst(unsigned Reg) const; 2931 bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const; 2932 bool isStoreInput(const MachineInstr *MI, unsigned DefR) const; 2933 bool isShuffleOf(unsigned OutR, unsigned InpR) const; 2934 bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2, 2935 unsigned &InpR2) const; 2936 void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB, 2937 MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR); 2938 bool processLoop(LoopCand &C); 2939 }; 2940 2941 } // end anonymous namespace 2942 2943 char HexagonLoopRescheduling::ID = 0; 2944 2945 INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched-pass", 2946 "Hexagon Loop Rescheduling", false, false) 2947 2948 HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P, 2949 MachineBasicBlock &B) { 2950 DefR = HexagonLoopRescheduling::getDefReg(&P); 2951 LB = &B; 2952 PB = nullptr; 2953 for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) { 2954 const MachineOperand &OpB = P.getOperand(i+1); 2955 if (OpB.getMBB() == &B) { 2956 LR = P.getOperand(i); 2957 continue; 2958 } 2959 PB = OpB.getMBB(); 2960 PR = P.getOperand(i); 2961 } 2962 } 2963 2964 unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) { 2965 RegisterSet Defs; 2966 HBS::getInstrDefs(*MI, Defs); 2967 if (Defs.count() != 1) 2968 return 0; 2969 return Defs.find_first(); 2970 } 2971 2972 bool HexagonLoopRescheduling::isConst(unsigned Reg) const { 2973 if (!BTP->has(Reg)) 2974 return false; 2975 const BitTracker::RegisterCell &RC = BTP->lookup(Reg); 2976 for (unsigned i = 0, w = RC.width(); i < w; ++i) { 2977 const BitTracker::BitValue &V = RC[i]; 2978 if (!V.is(0) && !V.is(1)) 2979 return false; 2980 } 2981 return true; 2982 } 2983 2984 bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI, 2985 unsigned DefR) const { 2986 unsigned Opc = MI->getOpcode(); 2987 switch (Opc) { 2988 case TargetOpcode::COPY: 2989 case Hexagon::S2_lsr_i_r: 2990 case Hexagon::S2_asr_i_r: 2991 case Hexagon::S2_asl_i_r: 2992 case Hexagon::S2_lsr_i_p: 2993 case Hexagon::S2_asr_i_p: 2994 case Hexagon::S2_asl_i_p: 2995 case Hexagon::S2_insert: 2996 case Hexagon::A2_or: 2997 case Hexagon::A2_orp: 2998 case Hexagon::A2_and: 2999 case Hexagon::A2_andp: 3000 case Hexagon::A2_combinew: 3001 case Hexagon::A4_combineri: 3002 case Hexagon::A4_combineir: 3003 case Hexagon::A2_combineii: 3004 case Hexagon::A4_combineii: 3005 case Hexagon::A2_combine_ll: 3006 case Hexagon::A2_combine_lh: 3007 case Hexagon::A2_combine_hl: 3008 case Hexagon::A2_combine_hh: 3009 return true; 3010 } 3011 return false; 3012 } 3013 3014 bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI, 3015 unsigned InpR) const { 3016 for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 3017 const MachineOperand &Op = MI->getOperand(i); 3018 if (!Op.isReg()) 3019 continue; 3020 if (Op.getReg() == InpR) 3021 return i == n-1; 3022 } 3023 return false; 3024 } 3025 3026 bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const { 3027 if (!BTP->has(OutR) || !BTP->has(InpR)) 3028 return false; 3029 const BitTracker::RegisterCell &OutC = BTP->lookup(OutR); 3030 for (unsigned i = 0, w = OutC.width(); i < w; ++i) { 3031 const BitTracker::BitValue &V = OutC[i]; 3032 if (V.Type != BitTracker::BitValue::Ref) 3033 continue; 3034 if (V.RefI.Reg != InpR) 3035 return false; 3036 } 3037 return true; 3038 } 3039 3040 bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1, 3041 unsigned OutR2, unsigned &InpR2) const { 3042 if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2)) 3043 return false; 3044 const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1); 3045 const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2); 3046 unsigned W = OutC1.width(); 3047 unsigned MatchR = 0; 3048 if (W != OutC2.width()) 3049 return false; 3050 for (unsigned i = 0; i < W; ++i) { 3051 const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i]; 3052 if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One) 3053 return false; 3054 if (V1.Type != BitTracker::BitValue::Ref) 3055 continue; 3056 if (V1.RefI.Pos != V2.RefI.Pos) 3057 return false; 3058 if (V1.RefI.Reg != InpR1) 3059 return false; 3060 if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2) 3061 return false; 3062 if (!MatchR) 3063 MatchR = V2.RefI.Reg; 3064 else if (V2.RefI.Reg != MatchR) 3065 return false; 3066 } 3067 InpR2 = MatchR; 3068 return true; 3069 } 3070 3071 void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, 3072 MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR, 3073 unsigned NewPredR) { 3074 DenseMap<unsigned,unsigned> RegMap; 3075 3076 const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR); 3077 Register PhiR = MRI->createVirtualRegister(PhiRC); 3078 BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR) 3079 .addReg(NewPredR) 3080 .addMBB(&PB) 3081 .addReg(G.Inp.Reg) 3082 .addMBB(&LB); 3083 RegMap.insert(std::make_pair(G.Inp.Reg, PhiR)); 3084 3085 for (const MachineInstr *SI : llvm::reverse(G.Ins)) { 3086 unsigned DR = getDefReg(SI); 3087 const TargetRegisterClass *RC = MRI->getRegClass(DR); 3088 Register NewDR = MRI->createVirtualRegister(RC); 3089 DebugLoc DL = SI->getDebugLoc(); 3090 3091 auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR); 3092 for (const MachineOperand &Op : SI->operands()) { 3093 if (!Op.isReg()) { 3094 MIB.add(Op); 3095 continue; 3096 } 3097 if (!Op.isUse()) 3098 continue; 3099 unsigned UseR = RegMap[Op.getReg()]; 3100 MIB.addReg(UseR, 0, Op.getSubReg()); 3101 } 3102 RegMap.insert(std::make_pair(DR, NewDR)); 3103 } 3104 3105 HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI); 3106 } 3107 3108 bool HexagonLoopRescheduling::processLoop(LoopCand &C) { 3109 LLVM_DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C.LB) 3110 << "\n"); 3111 std::vector<PhiInfo> Phis; 3112 for (auto &I : *C.LB) { 3113 if (!I.isPHI()) 3114 break; 3115 unsigned PR = getDefReg(&I); 3116 if (isConst(PR)) 3117 continue; 3118 bool BadUse = false, GoodUse = false; 3119 for (const MachineOperand &MO : MRI->use_operands(PR)) { 3120 const MachineInstr *UseI = MO.getParent(); 3121 if (UseI->getParent() != C.LB) { 3122 BadUse = true; 3123 break; 3124 } 3125 if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR)) 3126 GoodUse = true; 3127 } 3128 if (BadUse || !GoodUse) 3129 continue; 3130 3131 Phis.push_back(PhiInfo(I, *C.LB)); 3132 } 3133 3134 LLVM_DEBUG({ 3135 dbgs() << "Phis: {"; 3136 for (auto &I : Phis) { 3137 dbgs() << ' ' << printReg(I.DefR, HRI) << "=phi(" 3138 << printReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber() 3139 << ',' << printReg(I.LR.Reg, HRI, I.LR.Sub) << ":b" 3140 << I.LB->getNumber() << ')'; 3141 } 3142 dbgs() << " }\n"; 3143 }); 3144 3145 if (Phis.empty()) 3146 return false; 3147 3148 bool Changed = false; 3149 InstrList ShufIns; 3150 3151 // Go backwards in the block: for each bit shuffling instruction, check 3152 // if that instruction could potentially be moved to the front of the loop: 3153 // the output of the loop cannot be used in a non-shuffling instruction 3154 // in this loop. 3155 for (MachineInstr &MI : llvm::reverse(*C.LB)) { 3156 if (MI.isTerminator()) 3157 continue; 3158 if (MI.isPHI()) 3159 break; 3160 3161 RegisterSet Defs; 3162 HBS::getInstrDefs(MI, Defs); 3163 if (Defs.count() != 1) 3164 continue; 3165 Register DefR = Defs.find_first(); 3166 if (!DefR.isVirtual()) 3167 continue; 3168 if (!isBitShuffle(&MI, DefR)) 3169 continue; 3170 3171 bool BadUse = false; 3172 for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) { 3173 MachineInstr *UseI = UI->getParent(); 3174 if (UseI->getParent() == C.LB) { 3175 if (UseI->isPHI()) { 3176 // If the use is in a phi node in this loop, then it should be 3177 // the value corresponding to the back edge. 3178 unsigned Idx = UI.getOperandNo(); 3179 if (UseI->getOperand(Idx+1).getMBB() != C.LB) 3180 BadUse = true; 3181 } else { 3182 if (!llvm::is_contained(ShufIns, UseI)) 3183 BadUse = true; 3184 } 3185 } else { 3186 // There is a use outside of the loop, but there is no epilog block 3187 // suitable for a copy-out. 3188 if (C.EB == nullptr) 3189 BadUse = true; 3190 } 3191 if (BadUse) 3192 break; 3193 } 3194 3195 if (BadUse) 3196 continue; 3197 ShufIns.push_back(&MI); 3198 } 3199 3200 // Partition the list of shuffling instructions into instruction groups, 3201 // where each group has to be moved as a whole (i.e. a group is a chain of 3202 // dependent instructions). A group produces a single live output register, 3203 // which is meant to be the input of the loop phi node (although this is 3204 // not checked here yet). It also uses a single register as its input, 3205 // which is some value produced in the loop body. After moving the group 3206 // to the beginning of the loop, that input register would need to be 3207 // the loop-carried register (through a phi node) instead of the (currently 3208 // loop-carried) output register. 3209 using InstrGroupList = std::vector<InstrGroup>; 3210 InstrGroupList Groups; 3211 3212 for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) { 3213 MachineInstr *SI = ShufIns[i]; 3214 if (SI == nullptr) 3215 continue; 3216 3217 InstrGroup G; 3218 G.Ins.push_back(SI); 3219 G.Out.Reg = getDefReg(SI); 3220 RegisterSet Inputs; 3221 HBS::getInstrUses(*SI, Inputs); 3222 3223 for (unsigned j = i+1; j < n; ++j) { 3224 MachineInstr *MI = ShufIns[j]; 3225 if (MI == nullptr) 3226 continue; 3227 RegisterSet Defs; 3228 HBS::getInstrDefs(*MI, Defs); 3229 // If this instruction does not define any pending inputs, skip it. 3230 if (!Defs.intersects(Inputs)) 3231 continue; 3232 // Otherwise, add it to the current group and remove the inputs that 3233 // are defined by MI. 3234 G.Ins.push_back(MI); 3235 Inputs.remove(Defs); 3236 // Then add all registers used by MI. 3237 HBS::getInstrUses(*MI, Inputs); 3238 ShufIns[j] = nullptr; 3239 } 3240 3241 // Only add a group if it requires at most one register. 3242 if (Inputs.count() > 1) 3243 continue; 3244 auto LoopInpEq = [G] (const PhiInfo &P) -> bool { 3245 return G.Out.Reg == P.LR.Reg; 3246 }; 3247 if (llvm::none_of(Phis, LoopInpEq)) 3248 continue; 3249 3250 G.Inp.Reg = Inputs.find_first(); 3251 Groups.push_back(G); 3252 } 3253 3254 LLVM_DEBUG({ 3255 for (unsigned i = 0, n = Groups.size(); i < n; ++i) { 3256 InstrGroup &G = Groups[i]; 3257 dbgs() << "Group[" << i << "] inp: " 3258 << printReg(G.Inp.Reg, HRI, G.Inp.Sub) 3259 << " out: " << printReg(G.Out.Reg, HRI, G.Out.Sub) << "\n"; 3260 for (const MachineInstr *MI : G.Ins) 3261 dbgs() << " " << MI; 3262 } 3263 }); 3264 3265 for (InstrGroup &G : Groups) { 3266 if (!isShuffleOf(G.Out.Reg, G.Inp.Reg)) 3267 continue; 3268 auto LoopInpEq = [G] (const PhiInfo &P) -> bool { 3269 return G.Out.Reg == P.LR.Reg; 3270 }; 3271 auto F = llvm::find_if(Phis, LoopInpEq); 3272 if (F == Phis.end()) 3273 continue; 3274 unsigned PrehR = 0; 3275 if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PrehR)) { 3276 const MachineInstr *DefPrehR = MRI->getVRegDef(F->PR.Reg); 3277 unsigned Opc = DefPrehR->getOpcode(); 3278 if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi) 3279 continue; 3280 if (!DefPrehR->getOperand(1).isImm()) 3281 continue; 3282 if (DefPrehR->getOperand(1).getImm() != 0) 3283 continue; 3284 const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg); 3285 if (RC != MRI->getRegClass(F->PR.Reg)) { 3286 PrehR = MRI->createVirtualRegister(RC); 3287 unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi 3288 : Hexagon::A2_tfrpi; 3289 auto T = C.PB->getFirstTerminator(); 3290 DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc(); 3291 BuildMI(*C.PB, T, DL, HII->get(TfrI), PrehR) 3292 .addImm(0); 3293 } else { 3294 PrehR = F->PR.Reg; 3295 } 3296 } 3297 // isSameShuffle could match with PrehR being of a wider class than 3298 // G.Inp.Reg, for example if G shuffles the low 32 bits of its input, 3299 // it would match for the input being a 32-bit register, and PrehR 3300 // being a 64-bit register (where the low 32 bits match). This could 3301 // be handled, but for now skip these cases. 3302 if (MRI->getRegClass(PrehR) != MRI->getRegClass(G.Inp.Reg)) 3303 continue; 3304 moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PrehR); 3305 Changed = true; 3306 } 3307 3308 return Changed; 3309 } 3310 3311 bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) { 3312 if (skipFunction(MF.getFunction())) 3313 return false; 3314 3315 auto &HST = MF.getSubtarget<HexagonSubtarget>(); 3316 HII = HST.getInstrInfo(); 3317 HRI = HST.getRegisterInfo(); 3318 MRI = &MF.getRegInfo(); 3319 const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); 3320 BitTracker BT(HE, MF); 3321 LLVM_DEBUG(BT.trace(true)); 3322 BT.run(); 3323 BTP = &BT; 3324 3325 std::vector<LoopCand> Cand; 3326 3327 for (auto &B : MF) { 3328 if (B.pred_size() != 2 || B.succ_size() != 2) 3329 continue; 3330 MachineBasicBlock *PB = nullptr; 3331 bool IsLoop = false; 3332 for (MachineBasicBlock *Pred : B.predecessors()) { 3333 if (Pred != &B) 3334 PB = Pred; 3335 else 3336 IsLoop = true; 3337 } 3338 if (!IsLoop) 3339 continue; 3340 3341 MachineBasicBlock *EB = nullptr; 3342 for (MachineBasicBlock *Succ : B.successors()) { 3343 if (Succ == &B) 3344 continue; 3345 // Set EP to the epilog block, if it has only 1 predecessor (i.e. the 3346 // edge from B to EP is non-critical. 3347 if (Succ->pred_size() == 1) 3348 EB = Succ; 3349 break; 3350 } 3351 3352 Cand.push_back(LoopCand(&B, PB, EB)); 3353 } 3354 3355 bool Changed = false; 3356 for (auto &C : Cand) 3357 Changed |= processLoop(C); 3358 3359 return Changed; 3360 } 3361 3362 //===----------------------------------------------------------------------===// 3363 // Public Constructor Functions 3364 //===----------------------------------------------------------------------===// 3365 3366 FunctionPass *llvm::createHexagonLoopRescheduling() { 3367 return new HexagonLoopRescheduling(); 3368 } 3369 3370 FunctionPass *llvm::createHexagonBitSimplify() { 3371 return new HexagonBitSimplify(); 3372 } 3373