1 //===- HexagonStoreWidening.cpp -------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // Replace sequences of "narrow" stores to adjacent memory locations with 9 // a fewer "wide" stores that have the same effect. 10 // For example, replace: 11 // S4_storeirb_io %100, 0, 0 ; store-immediate-byte 12 // S4_storeirb_io %100, 1, 0 ; store-immediate-byte 13 // with 14 // S4_storeirh_io %100, 0, 0 ; store-immediate-halfword 15 // The above is the general idea. The actual cases handled by the code 16 // may be a bit more complex. 17 // The purpose of this pass is to reduce the number of outstanding stores, 18 // or as one could say, "reduce store queue pressure". Also, wide stores 19 // mean fewer stores, and since there are only two memory instructions allowed 20 // per packet, it also means fewer packets, and ultimately fewer cycles. 21 //===---------------------------------------------------------------------===// 22 23 #define DEBUG_TYPE "hexagon-widen-stores" 24 25 #include "HexagonInstrInfo.h" 26 #include "HexagonRegisterInfo.h" 27 #include "HexagonSubtarget.h" 28 #include "llvm/ADT/SmallPtrSet.h" 29 #include "llvm/Analysis/AliasAnalysis.h" 30 #include "llvm/Analysis/MemoryLocation.h" 31 #include "llvm/CodeGen/MachineBasicBlock.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineFunctionPass.h" 34 #include "llvm/CodeGen/MachineInstr.h" 35 #include "llvm/CodeGen/MachineInstrBuilder.h" 36 #include "llvm/CodeGen/MachineMemOperand.h" 37 #include "llvm/CodeGen/MachineOperand.h" 38 #include "llvm/CodeGen/MachineRegisterInfo.h" 39 #include "llvm/IR/DebugLoc.h" 40 #include "llvm/MC/MCInstrDesc.h" 41 #include "llvm/Pass.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/MathExtras.h" 45 #include "llvm/Support/raw_ostream.h" 46 #include <algorithm> 47 #include <cassert> 48 #include <cstdint> 49 #include <iterator> 50 #include <vector> 51 52 using namespace llvm; 53 54 namespace llvm { 55 56 FunctionPass *createHexagonStoreWidening(); 57 void initializeHexagonStoreWideningPass(PassRegistry&); 58 59 } // end namespace llvm 60 61 namespace { 62 63 struct HexagonStoreWidening : public MachineFunctionPass { 64 const HexagonInstrInfo *TII; 65 const HexagonRegisterInfo *TRI; 66 const MachineRegisterInfo *MRI; 67 AliasAnalysis *AA; 68 MachineFunction *MF; 69 70 public: 71 static char ID; 72 73 HexagonStoreWidening() : MachineFunctionPass(ID) { 74 initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry()); 75 } 76 77 bool runOnMachineFunction(MachineFunction &MF) override; 78 79 StringRef getPassName() const override { return "Hexagon Store Widening"; } 80 81 void getAnalysisUsage(AnalysisUsage &AU) const override { 82 AU.addRequired<AAResultsWrapperPass>(); 83 AU.addPreserved<AAResultsWrapperPass>(); 84 MachineFunctionPass::getAnalysisUsage(AU); 85 } 86 87 static bool handledStoreType(const MachineInstr *MI); 88 89 private: 90 static const int MaxWideSize = 4; 91 92 using InstrGroup = std::vector<MachineInstr *>; 93 using InstrGroupList = std::vector<InstrGroup>; 94 95 bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO); 96 bool instrAliased(InstrGroup &Stores, const MachineInstr *MI); 97 void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin, 98 InstrGroup::iterator End, InstrGroup &Group); 99 void createStoreGroups(MachineBasicBlock &MBB, 100 InstrGroupList &StoreGroups); 101 bool processBasicBlock(MachineBasicBlock &MBB); 102 bool processStoreGroup(InstrGroup &Group); 103 bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End, 104 InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize); 105 bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); 106 bool replaceStores(InstrGroup &OG, InstrGroup &NG); 107 bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2); 108 }; 109 110 } // end anonymous namespace 111 112 char HexagonStoreWidening::ID = 0; 113 114 INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", 115 "Hexason Store Widening", false, false) 116 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 117 INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", 118 "Hexagon Store Widening", false, false) 119 120 // Some local helper functions... 121 static unsigned getBaseAddressRegister(const MachineInstr *MI) { 122 const MachineOperand &MO = MI->getOperand(0); 123 assert(MO.isReg() && "Expecting register operand"); 124 return MO.getReg(); 125 } 126 127 static int64_t getStoreOffset(const MachineInstr *MI) { 128 unsigned OpC = MI->getOpcode(); 129 assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode"); 130 131 switch (OpC) { 132 case Hexagon::S4_storeirb_io: 133 case Hexagon::S4_storeirh_io: 134 case Hexagon::S4_storeiri_io: { 135 const MachineOperand &MO = MI->getOperand(1); 136 assert(MO.isImm() && "Expecting immediate offset"); 137 return MO.getImm(); 138 } 139 } 140 dbgs() << *MI; 141 llvm_unreachable("Store offset calculation missing for a handled opcode"); 142 return 0; 143 } 144 145 static const MachineMemOperand &getStoreTarget(const MachineInstr *MI) { 146 assert(!MI->memoperands_empty() && "Expecting memory operands"); 147 return **MI->memoperands_begin(); 148 } 149 150 // Filtering function: any stores whose opcodes are not "approved" of by 151 // this function will not be subjected to widening. 152 inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) { 153 // For now, only handle stores of immediate values. 154 // Also, reject stores to stack slots. 155 unsigned Opc = MI->getOpcode(); 156 switch (Opc) { 157 case Hexagon::S4_storeirb_io: 158 case Hexagon::S4_storeirh_io: 159 case Hexagon::S4_storeiri_io: 160 // Base address must be a register. (Implement FI later.) 161 return MI->getOperand(0).isReg(); 162 default: 163 return false; 164 } 165 } 166 167 // Check if the machine memory operand MMO is aliased with any of the 168 // stores in the store group Stores. 169 bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, 170 const MachineMemOperand &MMO) { 171 if (!MMO.getValue()) 172 return true; 173 174 MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo()); 175 176 for (auto SI : Stores) { 177 const MachineMemOperand &SMO = getStoreTarget(SI); 178 if (!SMO.getValue()) 179 return true; 180 181 MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo()); 182 if (AA->alias(L, SL)) 183 return true; 184 } 185 186 return false; 187 } 188 189 // Check if the machine instruction MI accesses any storage aliased with 190 // any store in the group Stores. 191 bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, 192 const MachineInstr *MI) { 193 for (auto &I : MI->memoperands()) 194 if (instrAliased(Stores, *I)) 195 return true; 196 return false; 197 } 198 199 // Inspect a machine basic block, and generate store groups out of stores 200 // encountered in the block. 201 // 202 // A store group is a group of stores that use the same base register, 203 // and which can be reordered within that group without altering the 204 // semantics of the program. A single store group could be widened as 205 // a whole, if there existed a single store instruction with the same 206 // semantics as the entire group. In many cases, a single store group 207 // may need more than one wide store. 208 void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB, 209 InstrGroupList &StoreGroups) { 210 InstrGroup AllInsns; 211 212 // Copy all instruction pointers from the basic block to a temporary 213 // list. This will allow operating on the list, and modifying its 214 // elements without affecting the basic block. 215 for (auto &I : MBB) 216 AllInsns.push_back(&I); 217 218 // Traverse all instructions in the AllInsns list, and if we encounter 219 // a store, then try to create a store group starting at that instruction 220 // i.e. a sequence of independent stores that can be widened. 221 for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) { 222 MachineInstr *MI = *I; 223 // Skip null pointers (processed instructions). 224 if (!MI || !handledStoreType(MI)) 225 continue; 226 227 // Found a store. Try to create a store group. 228 InstrGroup G; 229 createStoreGroup(MI, I+1, E, G); 230 if (G.size() > 1) 231 StoreGroups.push_back(G); 232 } 233 } 234 235 // Create a single store group. The stores need to be independent between 236 // themselves, and also there cannot be other instructions between them 237 // that could read or modify storage being stored into. 238 void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore, 239 InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) { 240 assert(handledStoreType(BaseStore) && "Unexpected instruction"); 241 unsigned BaseReg = getBaseAddressRegister(BaseStore); 242 InstrGroup Other; 243 244 Group.push_back(BaseStore); 245 246 for (auto I = Begin; I != End; ++I) { 247 MachineInstr *MI = *I; 248 if (!MI) 249 continue; 250 251 if (handledStoreType(MI)) { 252 // If this store instruction is aliased with anything already in the 253 // group, terminate the group now. 254 if (instrAliased(Group, getStoreTarget(MI))) 255 return; 256 // If this store is aliased to any of the memory instructions we have 257 // seen so far (that are not a part of this group), terminate the group. 258 if (instrAliased(Other, getStoreTarget(MI))) 259 return; 260 261 unsigned BR = getBaseAddressRegister(MI); 262 if (BR == BaseReg) { 263 Group.push_back(MI); 264 *I = nullptr; 265 continue; 266 } 267 } 268 269 // Assume calls are aliased to everything. 270 if (MI->isCall() || MI->hasUnmodeledSideEffects()) 271 return; 272 273 if (MI->mayLoad() || MI->mayStore()) { 274 if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI)) 275 return; 276 Other.push_back(MI); 277 } 278 } // for 279 } 280 281 // Check if store instructions S1 and S2 are adjacent. More precisely, 282 // S2 has to access memory immediately following that accessed by S1. 283 bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1, 284 const MachineInstr *S2) { 285 if (!handledStoreType(S1) || !handledStoreType(S2)) 286 return false; 287 288 const MachineMemOperand &S1MO = getStoreTarget(S1); 289 290 // Currently only handling immediate stores. 291 int Off1 = S1->getOperand(1).getImm(); 292 int Off2 = S2->getOperand(1).getImm(); 293 294 return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2) 295 : int(Off1+S1MO.getSize()) == Off2; 296 } 297 298 /// Given a sequence of adjacent stores, and a maximum size of a single wide 299 /// store, pick a group of stores that can be replaced by a single store 300 /// of size not exceeding MaxSize. The selected sequence will be recorded 301 /// in OG ("old group" of instructions). 302 /// OG should be empty on entry, and should be left empty if the function 303 /// fails. 304 bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin, 305 InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize, 306 unsigned MaxSize) { 307 assert(Begin != End && "No instructions to analyze"); 308 assert(OG.empty() && "Old group not empty on entry"); 309 310 if (std::distance(Begin, End) <= 1) 311 return false; 312 313 MachineInstr *FirstMI = *Begin; 314 assert(!FirstMI->memoperands_empty() && "Expecting some memory operands"); 315 const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI); 316 unsigned Alignment = FirstMMO.getAlignment(); 317 unsigned SizeAccum = FirstMMO.getSize(); 318 unsigned FirstOffset = getStoreOffset(FirstMI); 319 320 // The initial value of SizeAccum should always be a power of 2. 321 assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2"); 322 323 // If the size of the first store equals to or exceeds the limit, do nothing. 324 if (SizeAccum >= MaxSize) 325 return false; 326 327 // If the size of the first store is greater than or equal to the address 328 // stored to, then the store cannot be made any wider. 329 if (SizeAccum >= Alignment) 330 return false; 331 332 // The offset of a store will put restrictions on how wide the store can be. 333 // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0. 334 // If the first store already exhausts the offset limits, quit. Test this 335 // by checking if the next wider size would exceed the limit. 336 if ((2*SizeAccum-1) & FirstOffset) 337 return false; 338 339 OG.push_back(FirstMI); 340 MachineInstr *S1 = FirstMI; 341 342 // Pow2Num will be the largest number of elements in OG such that the sum 343 // of sizes of stores 0...Pow2Num-1 will be a power of 2. 344 unsigned Pow2Num = 1; 345 unsigned Pow2Size = SizeAccum; 346 347 // Be greedy: keep accumulating stores as long as they are to adjacent 348 // memory locations, and as long as the total number of bytes stored 349 // does not exceed the limit (MaxSize). 350 // Keep track of when the total size covered is a power of 2, since 351 // this is a size a single store can cover. 352 for (InstrGroup::iterator I = Begin + 1; I != End; ++I) { 353 MachineInstr *S2 = *I; 354 // Stores are sorted, so if S1 and S2 are not adjacent, there won't be 355 // any other store to fill the "hole". 356 if (!storesAreAdjacent(S1, S2)) 357 break; 358 359 unsigned S2Size = getStoreTarget(S2).getSize(); 360 if (SizeAccum + S2Size > std::min(MaxSize, Alignment)) 361 break; 362 363 OG.push_back(S2); 364 SizeAccum += S2Size; 365 if (isPowerOf2_32(SizeAccum)) { 366 Pow2Num = OG.size(); 367 Pow2Size = SizeAccum; 368 } 369 if ((2*Pow2Size-1) & FirstOffset) 370 break; 371 372 S1 = S2; 373 } 374 375 // The stores don't add up to anything that can be widened. Clean up. 376 if (Pow2Num <= 1) { 377 OG.clear(); 378 return false; 379 } 380 381 // Only leave the stored being widened. 382 OG.resize(Pow2Num); 383 TotalSize = Pow2Size; 384 return true; 385 } 386 387 /// Given an "old group" OG of stores, create a "new group" NG of instructions 388 /// to replace them. Ideally, NG would only have a single instruction in it, 389 /// but that may only be possible for store-immediate. 390 bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, 391 unsigned TotalSize) { 392 // XXX Current limitations: 393 // - only expect stores of immediate values in OG, 394 // - only handle a TotalSize of up to 4. 395 396 if (TotalSize > 4) 397 return false; 398 399 unsigned Acc = 0; // Value accumulator. 400 unsigned Shift = 0; 401 402 for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) { 403 MachineInstr *MI = *I; 404 const MachineMemOperand &MMO = getStoreTarget(MI); 405 MachineOperand &SO = MI->getOperand(2); // Source. 406 assert(SO.isImm() && "Expecting an immediate operand"); 407 408 unsigned NBits = MMO.getSize()*8; 409 unsigned Mask = (0xFFFFFFFFU >> (32-NBits)); 410 unsigned Val = (SO.getImm() & Mask) << Shift; 411 Acc |= Val; 412 Shift += NBits; 413 } 414 415 MachineInstr *FirstSt = OG.front(); 416 DebugLoc DL = OG.back()->getDebugLoc(); 417 const MachineMemOperand &OldM = getStoreTarget(FirstSt); 418 MachineMemOperand *NewM = 419 MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(), 420 TotalSize, OldM.getAlignment(), 421 OldM.getAAInfo()); 422 423 if (Acc < 0x10000) { 424 // Create mem[hw] = #Acc 425 unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io : 426 (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0; 427 assert(WOpc && "Unexpected size"); 428 429 int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc); 430 const MCInstrDesc &StD = TII->get(WOpc); 431 MachineOperand &MR = FirstSt->getOperand(0); 432 int64_t Off = FirstSt->getOperand(1).getImm(); 433 MachineInstr *StI = 434 BuildMI(*MF, DL, StD) 435 .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg()) 436 .addImm(Off) 437 .addImm(Val); 438 StI->addMemOperand(*MF, NewM); 439 NG.push_back(StI); 440 } else { 441 // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg 442 const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); 443 const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); 444 unsigned VReg = MF->getRegInfo().createVirtualRegister(RC); 445 MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg) 446 .addImm(int(Acc)); 447 NG.push_back(TfrI); 448 449 unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io : 450 (TotalSize == 4) ? Hexagon::S2_storeri_io : 0; 451 assert(WOpc && "Unexpected size"); 452 453 const MCInstrDesc &StD = TII->get(WOpc); 454 MachineOperand &MR = FirstSt->getOperand(0); 455 int64_t Off = FirstSt->getOperand(1).getImm(); 456 MachineInstr *StI = 457 BuildMI(*MF, DL, StD) 458 .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg()) 459 .addImm(Off) 460 .addReg(VReg, RegState::Kill); 461 StI->addMemOperand(*MF, NewM); 462 NG.push_back(StI); 463 } 464 465 return true; 466 } 467 468 // Replace instructions from the old group OG with instructions from the 469 // new group NG. Conceptually, remove all instructions in OG, and then 470 // insert all instructions in NG, starting at where the first instruction 471 // from OG was (in the order in which they appeared in the basic block). 472 // (The ordering in OG does not have to match the order in the basic block.) 473 bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) { 474 LLVM_DEBUG({ 475 dbgs() << "Replacing:\n"; 476 for (auto I : OG) 477 dbgs() << " " << *I; 478 dbgs() << "with\n"; 479 for (auto I : NG) 480 dbgs() << " " << *I; 481 }); 482 483 MachineBasicBlock *MBB = OG.back()->getParent(); 484 MachineBasicBlock::iterator InsertAt = MBB->end(); 485 486 // Need to establish the insertion point. The best one is right before 487 // the first store in the OG, but in the order in which the stores occur 488 // in the program list. Since the ordering in OG does not correspond 489 // to the order in the program list, we need to do some work to find 490 // the insertion point. 491 492 // Create a set of all instructions in OG (for quick lookup). 493 SmallPtrSet<MachineInstr*, 4> InstrSet; 494 for (auto I : OG) 495 InstrSet.insert(I); 496 497 // Traverse the block, until we hit an instruction from OG. 498 for (auto &I : *MBB) { 499 if (InstrSet.count(&I)) { 500 InsertAt = I; 501 break; 502 } 503 } 504 505 assert((InsertAt != MBB->end()) && "Cannot locate any store from the group"); 506 507 bool AtBBStart = false; 508 509 // InsertAt points at the first instruction that will be removed. We need 510 // to move it out of the way, so it remains valid after removing all the 511 // old stores, and so we are able to recover it back to the proper insertion 512 // position. 513 if (InsertAt != MBB->begin()) 514 --InsertAt; 515 else 516 AtBBStart = true; 517 518 for (auto I : OG) 519 I->eraseFromParent(); 520 521 if (!AtBBStart) 522 ++InsertAt; 523 else 524 InsertAt = MBB->begin(); 525 526 for (auto I : NG) 527 MBB->insert(InsertAt, I); 528 529 return true; 530 } 531 532 // Break up the group into smaller groups, each of which can be replaced by 533 // a single wide store. Widen each such smaller group and replace the old 534 // instructions with the widened ones. 535 bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) { 536 bool Changed = false; 537 InstrGroup::iterator I = Group.begin(), E = Group.end(); 538 InstrGroup OG, NG; // Old and new groups. 539 unsigned CollectedSize; 540 541 while (I != E) { 542 OG.clear(); 543 NG.clear(); 544 545 bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) && 546 createWideStores(OG, NG, CollectedSize) && 547 replaceStores(OG, NG); 548 if (!Succ) 549 continue; 550 551 assert(OG.size() > 1 && "Created invalid group"); 552 assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements"); 553 I += OG.size()-1; 554 555 Changed = true; 556 } 557 558 return Changed; 559 } 560 561 // Process a single basic block: create the store groups, and replace them 562 // with the widened stores, if possible. Processing of each basic block 563 // is independent from processing of any other basic block. This transfor- 564 // mation could be stopped after having processed any basic block without 565 // any ill effects (other than not having performed widening in the unpro- 566 // cessed blocks). Also, the basic blocks can be processed in any order. 567 bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { 568 InstrGroupList SGs; 569 bool Changed = false; 570 571 createStoreGroups(MBB, SGs); 572 573 auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool { 574 return getStoreOffset(A) < getStoreOffset(B); 575 }; 576 for (auto &G : SGs) { 577 assert(G.size() > 1 && "Store group with fewer than 2 elements"); 578 llvm::sort(G, Less); 579 580 Changed |= processStoreGroup(G); 581 } 582 583 return Changed; 584 } 585 586 bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) { 587 if (skipFunction(MFn.getFunction())) 588 return false; 589 590 MF = &MFn; 591 auto &ST = MFn.getSubtarget<HexagonSubtarget>(); 592 TII = ST.getInstrInfo(); 593 TRI = ST.getRegisterInfo(); 594 MRI = &MFn.getRegInfo(); 595 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 596 597 bool Changed = false; 598 599 for (auto &B : MFn) 600 Changed |= processBasicBlock(B); 601 602 return Changed; 603 } 604 605 FunctionPass *llvm::createHexagonStoreWidening() { 606 return new HexagonStoreWidening(); 607 } 608