1 //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ARM.h" 10 #include "ARMBaseInstrInfo.h" 11 #include "ARMSubtarget.h" 12 #include "MCTargetDesc/ARMBaseInfo.h" 13 #include "Thumb2InstrInfo.h" 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/ADT/PostOrderIterator.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/CodeGen/MachineBasicBlock.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineOperand.h" 27 #include "llvm/CodeGen/TargetInstrInfo.h" 28 #include "llvm/IR/DebugLoc.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/MC/MCAsmInfo.h" 31 #include "llvm/MC/MCInstrDesc.h" 32 #include "llvm/MC/MCRegisterInfo.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Compiler.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include <algorithm> 39 #include <cassert> 40 #include <cstdint> 41 #include <functional> 42 #include <iterator> 43 #include <utility> 44 45 using namespace llvm; 46 47 #define DEBUG_TYPE "thumb2-reduce-size" 48 #define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass" 49 50 STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); 51 STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); 52 STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones"); 53 54 static cl::opt<int> ReduceLimit("t2-reduce-limit", 55 cl::init(-1), cl::Hidden); 56 static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2", 57 cl::init(-1), cl::Hidden); 58 static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3", 59 cl::init(-1), cl::Hidden); 60 61 namespace { 62 63 /// ReduceTable - A static table with information on mapping from wide 64 /// opcodes to narrow 65 struct ReduceEntry { 66 uint16_t WideOpc; // Wide opcode 67 uint16_t NarrowOpc1; // Narrow opcode to transform to 68 uint16_t NarrowOpc2; // Narrow opcode when it's two-address 69 uint8_t Imm1Limit; // Limit of immediate field (bits) 70 uint8_t Imm2Limit; // Limit of immediate field when it's two-address 71 unsigned LowRegs1 : 1; // Only possible if low-registers are used 72 unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) 73 unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa. 74 // 1 - No cc field. 75 // 2 - Always set CPSR. 76 unsigned PredCC2 : 2; 77 unsigned PartFlag : 1; // 16-bit instruction does partial flag update 78 unsigned Special : 1; // Needs to be dealt with specially 79 unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift) 80 }; 81 82 static const ReduceEntry ReduceTable[] = { 83 // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM 84 { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 }, 85 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 }, 86 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 }, 87 { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 }, 88 { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, 89 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 }, 90 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, 91 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, 92 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 }, 93 //FIXME: Disable CMN, as CCodes are backwards from compare expectations 94 //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 95 { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 96 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 }, 97 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 }, 98 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 }, 99 // FIXME: adr.n immediate offset must be multiple of 4. 100 //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 101 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, 102 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 }, 103 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, 104 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, 105 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 }, 106 { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 }, 107 // FIXME: Do we need the 16-bit 'S' variant? 108 { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 }, 109 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 }, 110 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, 111 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 }, 112 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 113 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 114 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 115 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 }, 116 { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 117 { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, 118 { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 }, 119 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 }, 120 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, 121 { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 }, 122 { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 123 { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 124 { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 125 { ARM::t2TEQrr, ARM::tEOR, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, 126 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 127 { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 128 { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 129 130 // FIXME: Clean this up after splitting each Thumb load / store opcode 131 // into multiple ones. 132 { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, 133 { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 134 { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 135 { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 136 { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 137 { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 138 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 139 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 140 { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 }, 141 { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, 142 { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 143 { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 144 { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 145 { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 146 { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 147 { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 }, 148 149 { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, 150 { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 }, 151 { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 }, 152 // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent. 153 // tSTMIA_UPD is a change in semantics which can only be used if the base 154 // register is killed. This difference is correctly handled elsewhere. 155 { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, 156 { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, 157 { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 } 158 }; 159 160 class Thumb2SizeReduce : public MachineFunctionPass { 161 public: 162 static char ID; 163 164 const Thumb2InstrInfo *TII; 165 const ARMSubtarget *STI; 166 167 Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr); 168 169 bool runOnMachineFunction(MachineFunction &MF) override; 170 171 MachineFunctionProperties getRequiredProperties() const override { 172 return MachineFunctionProperties().set( 173 MachineFunctionProperties::Property::NoVRegs); 174 } 175 176 StringRef getPassName() const override { 177 return THUMB2_SIZE_REDUCE_NAME; 178 } 179 180 private: 181 /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. 182 DenseMap<unsigned, unsigned> ReduceOpcodeMap; 183 184 bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop); 185 186 bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 187 bool is2Addr, ARMCC::CondCodes Pred, 188 bool LiveCPSR, bool &HasCC, bool &CCDead); 189 190 bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 191 const ReduceEntry &Entry); 192 193 bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 194 const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop); 195 196 /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address 197 /// instruction. 198 bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 199 const ReduceEntry &Entry, bool LiveCPSR, 200 bool IsSelfLoop); 201 202 /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit 203 /// non-two-address instruction. 204 bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 205 const ReduceEntry &Entry, bool LiveCPSR, 206 bool IsSelfLoop); 207 208 /// ReduceMI - Attempt to reduce MI, return true on success. 209 bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR, 210 bool IsSelfLoop, bool SkipPrologueEpilogue); 211 212 /// ReduceMBB - Reduce width of instructions in the specified basic block. 213 bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue); 214 215 bool OptimizeSize; 216 bool MinimizeSize; 217 218 // Last instruction to define CPSR in the current block. 219 MachineInstr *CPSRDef; 220 // Was CPSR last defined by a high latency instruction? 221 // When CPSRDef is null, this refers to CPSR defs in predecessors. 222 bool HighLatencyCPSR; 223 224 struct MBBInfo { 225 // The flags leaving this block have high latency. 226 bool HighLatencyCPSR = false; 227 // Has this block been visited yet? 228 bool Visited = false; 229 230 MBBInfo() = default; 231 }; 232 233 SmallVector<MBBInfo, 8> BlockInfo; 234 235 std::function<bool(const Function &)> PredicateFtor; 236 }; 237 238 char Thumb2SizeReduce::ID = 0; 239 240 } // end anonymous namespace 241 242 INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false, 243 false) 244 245 Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor) 246 : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { 247 OptimizeSize = MinimizeSize = false; 248 for (unsigned i = 0, e = std::size(ReduceTable); i != e; ++i) { 249 unsigned FromOpc = ReduceTable[i].WideOpc; 250 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) 251 llvm_unreachable("Duplicated entries?"); 252 } 253 } 254 255 static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { 256 return is_contained(MCID.implicit_defs(), ARM::CPSR); 257 } 258 259 // Check for a likely high-latency flag def. 260 static bool isHighLatencyCPSR(MachineInstr *Def) { 261 switch(Def->getOpcode()) { 262 case ARM::FMSTAT: 263 case ARM::tMUL: 264 return true; 265 } 266 return false; 267 } 268 269 /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations, 270 /// the 's' 16-bit instruction partially update CPSR. Abort the 271 /// transformation to avoid adding false dependency on last CPSR setting 272 /// instruction which hurts the ability for out-of-order execution engine 273 /// to do register renaming magic. 274 /// This function checks if there is a read-of-write dependency between the 275 /// last instruction that defines the CPSR and the current instruction. If there 276 /// is, then there is no harm done since the instruction cannot be retired 277 /// before the CPSR setting instruction anyway. 278 /// Note, we are not doing full dependency analysis here for the sake of compile 279 /// time. We're not looking for cases like: 280 /// r0 = muls ... 281 /// r1 = add.w r0, ... 282 /// ... 283 /// = mul.w r1 284 /// In this case it would have been ok to narrow the mul.w to muls since there 285 /// are indirect RAW dependency between the muls and the mul.w 286 bool 287 Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { 288 // Disable the check for -Oz (aka OptimizeForSizeHarder). 289 if (MinimizeSize || !STI->avoidCPSRPartialUpdate()) 290 return false; 291 292 if (!CPSRDef) 293 // If this BB loops back to itself, conservatively avoid narrowing the 294 // first instruction that does partial flag update. 295 return HighLatencyCPSR || FirstInSelfLoop; 296 297 SmallSet<unsigned, 2> Defs; 298 for (const MachineOperand &MO : CPSRDef->operands()) { 299 if (!MO.isReg() || MO.isUndef() || MO.isUse()) 300 continue; 301 Register Reg = MO.getReg(); 302 if (Reg == 0 || Reg == ARM::CPSR) 303 continue; 304 Defs.insert(Reg); 305 } 306 307 for (const MachineOperand &MO : Use->operands()) { 308 if (!MO.isReg() || MO.isUndef() || MO.isDef()) 309 continue; 310 Register Reg = MO.getReg(); 311 if (Defs.count(Reg)) 312 return false; 313 } 314 315 // If the current CPSR has high latency, try to avoid the false dependency. 316 if (HighLatencyCPSR) 317 return true; 318 319 // tMOVi8 usually doesn't start long dependency chains, and there are a lot 320 // of them, so always shrink them when CPSR doesn't have high latency. 321 if (Use->getOpcode() == ARM::t2MOVi || 322 Use->getOpcode() == ARM::t2MOVi16) 323 return false; 324 325 // No read-after-write dependency. The narrowing will add false dependency. 326 return true; 327 } 328 329 bool 330 Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 331 bool is2Addr, ARMCC::CondCodes Pred, 332 bool LiveCPSR, bool &HasCC, bool &CCDead) { 333 if ((is2Addr && Entry.PredCC2 == 0) || 334 (!is2Addr && Entry.PredCC1 == 0)) { 335 if (Pred == ARMCC::AL) { 336 // Not predicated, must set CPSR. 337 if (!HasCC) { 338 // Original instruction was not setting CPSR, but CPSR is not 339 // currently live anyway. It's ok to set it. The CPSR def is 340 // dead though. 341 if (!LiveCPSR) { 342 HasCC = true; 343 CCDead = true; 344 return true; 345 } 346 return false; 347 } 348 } else { 349 // Predicated, must not set CPSR. 350 if (HasCC) 351 return false; 352 } 353 } else if ((is2Addr && Entry.PredCC2 == 2) || 354 (!is2Addr && Entry.PredCC1 == 2)) { 355 /// Old opcode has an optional def of CPSR. 356 if (HasCC) 357 return true; 358 // If old opcode does not implicitly define CPSR, then it's not ok since 359 // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP. 360 if (!HasImplicitCPSRDef(MI->getDesc())) 361 return false; 362 HasCC = true; 363 } else { 364 // 16-bit instruction does not set CPSR. 365 if (HasCC) 366 return false; 367 } 368 369 return true; 370 } 371 372 static bool VerifyLowRegs(MachineInstr *MI) { 373 unsigned Opc = MI->getOpcode(); 374 bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD); 375 bool isLROk = (Opc == ARM::t2STMDB_UPD); 376 bool isSPOk = isPCOk || isLROk; 377 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 378 const MachineOperand &MO = MI->getOperand(i); 379 if (!MO.isReg() || MO.isImplicit()) 380 continue; 381 Register Reg = MO.getReg(); 382 if (Reg == 0 || Reg == ARM::CPSR) 383 continue; 384 if (isPCOk && Reg == ARM::PC) 385 continue; 386 if (isLROk && Reg == ARM::LR) 387 continue; 388 if (Reg == ARM::SP) { 389 if (isSPOk) 390 continue; 391 if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12)) 392 // Special case for these ldr / str with sp as base register. 393 continue; 394 } 395 if (!isARMLowRegister(Reg)) 396 return false; 397 } 398 return true; 399 } 400 401 bool 402 Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 403 const ReduceEntry &Entry) { 404 if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) 405 return false; 406 407 unsigned Scale = 1; 408 bool HasImmOffset = false; 409 bool HasShift = false; 410 bool HasOffReg = true; 411 bool isLdStMul = false; 412 unsigned Opc = Entry.NarrowOpc1; 413 unsigned OpNum = 3; // First 'rest' of operands. 414 uint8_t ImmLimit = Entry.Imm1Limit; 415 416 switch (Entry.WideOpc) { 417 default: 418 llvm_unreachable("Unexpected Thumb2 load / store opcode!"); 419 case ARM::t2LDRi12: 420 case ARM::t2STRi12: 421 if (MI->getOperand(1).getReg() == ARM::SP) { 422 Opc = Entry.NarrowOpc2; 423 ImmLimit = Entry.Imm2Limit; 424 } 425 426 Scale = 4; 427 HasImmOffset = true; 428 HasOffReg = false; 429 break; 430 case ARM::t2LDRBi12: 431 case ARM::t2STRBi12: 432 HasImmOffset = true; 433 HasOffReg = false; 434 break; 435 case ARM::t2LDRHi12: 436 case ARM::t2STRHi12: 437 Scale = 2; 438 HasImmOffset = true; 439 HasOffReg = false; 440 break; 441 case ARM::t2LDRs: 442 case ARM::t2LDRBs: 443 case ARM::t2LDRHs: 444 case ARM::t2LDRSBs: 445 case ARM::t2LDRSHs: 446 case ARM::t2STRs: 447 case ARM::t2STRBs: 448 case ARM::t2STRHs: 449 HasShift = true; 450 OpNum = 4; 451 break; 452 case ARM::t2LDR_POST: 453 case ARM::t2STR_POST: { 454 if (!MinimizeSize) 455 return false; 456 457 if (!MI->hasOneMemOperand() || 458 (*MI->memoperands_begin())->getAlign() < Align(4)) 459 return false; 460 461 // We're creating a completely different type of load/store - LDM from LDR. 462 // For this reason we can't reuse the logic at the end of this function; we 463 // have to implement the MI building here. 464 bool IsStore = Entry.WideOpc == ARM::t2STR_POST; 465 Register Rt = MI->getOperand(IsStore ? 1 : 0).getReg(); 466 Register Rn = MI->getOperand(IsStore ? 0 : 1).getReg(); 467 unsigned Offset = MI->getOperand(3).getImm(); 468 unsigned PredImm = MI->getOperand(4).getImm(); 469 Register PredReg = MI->getOperand(5).getReg(); 470 assert(isARMLowRegister(Rt)); 471 assert(isARMLowRegister(Rn)); 472 473 if (Offset != 4) 474 return false; 475 476 // Add the 16-bit load / store instruction. 477 DebugLoc dl = MI->getDebugLoc(); 478 auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1)) 479 .addReg(Rn, RegState::Define) 480 .addReg(Rn) 481 .addImm(PredImm) 482 .addReg(PredReg) 483 .addReg(Rt, IsStore ? 0 : RegState::Define); 484 485 // Transfer memoperands. 486 MIB.setMemRefs(MI->memoperands()); 487 488 // Transfer MI flags. 489 MIB.setMIFlags(MI->getFlags()); 490 491 // Kill the old instruction. 492 MI->eraseFromBundle(); 493 ++NumLdSts; 494 return true; 495 } 496 case ARM::t2LDMIA: { 497 Register BaseReg = MI->getOperand(0).getReg(); 498 assert(isARMLowRegister(BaseReg)); 499 500 // For the non-writeback version (this one), the base register must be 501 // one of the registers being loaded. 502 bool isOK = false; 503 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3)) { 504 if (MO.getReg() == BaseReg) { 505 isOK = true; 506 break; 507 } 508 } 509 510 if (!isOK) 511 return false; 512 513 OpNum = 0; 514 isLdStMul = true; 515 break; 516 } 517 case ARM::t2STMIA: { 518 // t2STMIA is reduced to tSTMIA_UPD which has writeback. We can only do this 519 // if the base register is killed, as then it doesn't matter what its value 520 // is after the instruction. 521 if (!MI->getOperand(0).isKill()) 522 return false; 523 524 // If the base register is in the register list and isn't the lowest 525 // numbered register (i.e. it's in operand 4 onwards) then with writeback 526 // the stored value is unknown, so we can't convert to tSTMIA_UPD. 527 Register BaseReg = MI->getOperand(0).getReg(); 528 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 4)) 529 if (MO.getReg() == BaseReg) 530 return false; 531 532 break; 533 } 534 case ARM::t2LDMIA_RET: { 535 Register BaseReg = MI->getOperand(1).getReg(); 536 if (BaseReg != ARM::SP) 537 return false; 538 Opc = Entry.NarrowOpc2; // tPOP_RET 539 OpNum = 2; 540 isLdStMul = true; 541 break; 542 } 543 case ARM::t2LDMIA_UPD: 544 case ARM::t2STMIA_UPD: 545 case ARM::t2STMDB_UPD: { 546 OpNum = 0; 547 548 Register BaseReg = MI->getOperand(1).getReg(); 549 if (BaseReg == ARM::SP && 550 (Entry.WideOpc == ARM::t2LDMIA_UPD || 551 Entry.WideOpc == ARM::t2STMDB_UPD)) { 552 Opc = Entry.NarrowOpc2; // tPOP or tPUSH 553 OpNum = 2; 554 } else if (!isARMLowRegister(BaseReg) || 555 (Entry.WideOpc != ARM::t2LDMIA_UPD && 556 Entry.WideOpc != ARM::t2STMIA_UPD)) { 557 return false; 558 } 559 560 isLdStMul = true; 561 break; 562 } 563 } 564 565 unsigned OffsetReg = 0; 566 bool OffsetKill = false; 567 bool OffsetInternal = false; 568 if (HasShift) { 569 OffsetReg = MI->getOperand(2).getReg(); 570 OffsetKill = MI->getOperand(2).isKill(); 571 OffsetInternal = MI->getOperand(2).isInternalRead(); 572 573 if (MI->getOperand(3).getImm()) 574 // Thumb1 addressing mode doesn't support shift. 575 return false; 576 } 577 578 unsigned OffsetImm = 0; 579 if (HasImmOffset) { 580 OffsetImm = MI->getOperand(2).getImm(); 581 unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; 582 583 if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) 584 // Make sure the immediate field fits. 585 return false; 586 } 587 588 // Add the 16-bit load / store instruction. 589 DebugLoc dl = MI->getDebugLoc(); 590 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); 591 592 // tSTMIA_UPD takes a defining register operand. We've already checked that 593 // the register is killed, so mark it as dead here. 594 if (Entry.WideOpc == ARM::t2STMIA) 595 MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead); 596 597 if (!isLdStMul) { 598 MIB.add(MI->getOperand(0)); 599 MIB.add(MI->getOperand(1)); 600 601 if (HasImmOffset) 602 MIB.addImm(OffsetImm / Scale); 603 604 assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); 605 606 if (HasOffReg) 607 MIB.addReg(OffsetReg, getKillRegState(OffsetKill) | 608 getInternalReadRegState(OffsetInternal)); 609 } 610 611 // Transfer the rest of operands. 612 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), OpNum)) 613 MIB.add(MO); 614 615 // Transfer memoperands. 616 MIB.setMemRefs(MI->memoperands()); 617 618 // Transfer MI flags. 619 MIB.setMIFlags(MI->getFlags()); 620 621 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI 622 << " to 16-bit: " << *MIB); 623 624 MBB.erase_instr(MI); 625 ++NumLdSts; 626 return true; 627 } 628 629 bool 630 Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 631 const ReduceEntry &Entry, 632 bool LiveCPSR, bool IsSelfLoop) { 633 unsigned Opc = MI->getOpcode(); 634 if (Opc == ARM::t2ADDri) { 635 // If the source register is SP, try to reduce to tADDrSPi, otherwise 636 // it's a normal reduce. 637 if (MI->getOperand(1).getReg() != ARM::SP) { 638 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) 639 return true; 640 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 641 } 642 // Try to reduce to tADDrSPi. 643 unsigned Imm = MI->getOperand(2).getImm(); 644 // The immediate must be in range, the destination register must be a low 645 // reg, the predicate must be "always" and the condition flags must not 646 // be being set. 647 if (Imm & 3 || Imm > 1020) 648 return false; 649 if (!isARMLowRegister(MI->getOperand(0).getReg())) 650 return false; 651 if (MI->getOperand(3).getImm() != ARMCC::AL) 652 return false; 653 const MCInstrDesc &MCID = MI->getDesc(); 654 if (MCID.hasOptionalDef() && 655 MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) 656 return false; 657 658 MachineInstrBuilder MIB = 659 BuildMI(MBB, MI, MI->getDebugLoc(), 660 TII->get(ARM::tADDrSPi)) 661 .add(MI->getOperand(0)) 662 .add(MI->getOperand(1)) 663 .addImm(Imm / 4) // The tADDrSPi has an implied scale by four. 664 .add(predOps(ARMCC::AL)); 665 666 // Transfer MI flags. 667 MIB.setMIFlags(MI->getFlags()); 668 669 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI 670 << " to 16-bit: " << *MIB); 671 672 MBB.erase_instr(MI); 673 ++NumNarrows; 674 return true; 675 } 676 677 if (Entry.LowRegs1 && !VerifyLowRegs(MI)) 678 return false; 679 680 if (MI->mayLoadOrStore()) 681 return ReduceLoadStore(MBB, MI, Entry); 682 683 switch (Opc) { 684 default: break; 685 case ARM::t2ADDSri: 686 case ARM::t2ADDSrr: { 687 Register PredReg; 688 if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) { 689 switch (Opc) { 690 default: break; 691 case ARM::t2ADDSri: 692 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) 693 return true; 694 [[fallthrough]]; 695 case ARM::t2ADDSrr: 696 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 697 } 698 } 699 break; 700 } 701 case ARM::t2RSBri: 702 case ARM::t2RSBSri: 703 case ARM::t2SXTB: 704 case ARM::t2SXTH: 705 case ARM::t2UXTB: 706 case ARM::t2UXTH: 707 if (MI->getOperand(2).getImm() == 0) 708 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 709 break; 710 case ARM::t2MOVi16: 711 // Can convert only 'pure' immediate operands, not immediates obtained as 712 // globals' addresses. 713 if (MI->getOperand(1).isImm()) 714 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 715 break; 716 case ARM::t2CMPrr: { 717 // Try to reduce to the lo-reg only version first. Why there are two 718 // versions of the instruction is a mystery. 719 // It would be nice to just have two entries in the main table that 720 // are prioritized, but the table assumes a unique entry for each 721 // source insn opcode. So for now, we hack a local entry record to use. 722 static const ReduceEntry NarrowEntry = 723 { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 }; 724 if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop)) 725 return true; 726 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 727 } 728 case ARM::t2TEQrr: { 729 Register PredReg; 730 // Can only convert to eors if we're not in an IT block. 731 if (getInstrPredicate(*MI, PredReg) != ARMCC::AL) 732 break; 733 // TODO if Operand 0 is not killed but Operand 1 is, then we could write 734 // to Op1 instead. 735 if (MI->getOperand(0).isKill()) 736 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 737 } 738 } 739 return false; 740 } 741 742 bool 743 Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 744 const ReduceEntry &Entry, 745 bool LiveCPSR, bool IsSelfLoop) { 746 if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) 747 return false; 748 749 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) 750 // Don't issue movs with shifter operand for some CPUs unless we 751 // are optimizing for size. 752 return false; 753 754 Register Reg0 = MI->getOperand(0).getReg(); 755 Register Reg1 = MI->getOperand(1).getReg(); 756 // t2MUL is "special". The tied source operand is second, not first. 757 if (MI->getOpcode() == ARM::t2MUL) { 758 Register Reg2 = MI->getOperand(2).getReg(); 759 // Early exit if the regs aren't all low regs. 760 if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) 761 || !isARMLowRegister(Reg2)) 762 return false; 763 if (Reg0 != Reg2) { 764 // If the other operand also isn't the same as the destination, we 765 // can't reduce. 766 if (Reg1 != Reg0) 767 return false; 768 // Try to commute the operands to make it a 2-address instruction. 769 MachineInstr *CommutedMI = TII->commuteInstruction(*MI); 770 if (!CommutedMI) 771 return false; 772 } 773 } else if (Reg0 != Reg1) { 774 // Try to commute the operands to make it a 2-address instruction. 775 unsigned CommOpIdx1 = 1; 776 unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex; 777 if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) || 778 MI->getOperand(CommOpIdx2).getReg() != Reg0) 779 return false; 780 MachineInstr *CommutedMI = 781 TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2); 782 if (!CommutedMI) 783 return false; 784 } 785 if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) 786 return false; 787 if (Entry.Imm2Limit) { 788 unsigned Imm = MI->getOperand(2).getImm(); 789 unsigned Limit = (1 << Entry.Imm2Limit) - 1; 790 if (Imm > Limit) 791 return false; 792 } else { 793 Register Reg2 = MI->getOperand(2).getReg(); 794 if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) 795 return false; 796 } 797 798 // Check if it's possible / necessary to transfer the predicate. 799 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); 800 Register PredReg; 801 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 802 bool SkipPred = false; 803 if (Pred != ARMCC::AL) { 804 if (!NewMCID.isPredicable()) 805 // Can't transfer predicate, fail. 806 return false; 807 } else { 808 SkipPred = !NewMCID.isPredicable(); 809 } 810 811 bool HasCC = false; 812 bool CCDead = false; 813 const MCInstrDesc &MCID = MI->getDesc(); 814 if (MCID.hasOptionalDef()) { 815 unsigned NumOps = MCID.getNumOperands(); 816 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 817 if (HasCC && MI->getOperand(NumOps-1).isDead()) 818 CCDead = true; 819 } 820 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) 821 return false; 822 823 // Avoid adding a false dependency on partial flag update by some 16-bit 824 // instructions which has the 's' bit set. 825 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && 826 canAddPseudoFlagDep(MI, IsSelfLoop)) 827 return false; 828 829 // Add the 16-bit instruction. 830 DebugLoc dl = MI->getDebugLoc(); 831 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); 832 MIB.add(MI->getOperand(0)); 833 if (NewMCID.hasOptionalDef()) 834 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); 835 836 // Transfer the rest of operands. 837 unsigned NumOps = MCID.getNumOperands(); 838 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 839 if (i < NumOps && MCID.operands()[i].isOptionalDef()) 840 continue; 841 if (SkipPred && MCID.operands()[i].isPredicate()) 842 continue; 843 MIB.add(MI->getOperand(i)); 844 } 845 846 // Transfer MI flags. 847 MIB.setMIFlags(MI->getFlags()); 848 849 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI 850 << " to 16-bit: " << *MIB); 851 852 MBB.erase_instr(MI); 853 ++Num2Addrs; 854 return true; 855 } 856 857 bool 858 Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 859 const ReduceEntry &Entry, 860 bool LiveCPSR, bool IsSelfLoop) { 861 if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) 862 return false; 863 864 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) 865 // Don't issue movs with shifter operand for some CPUs unless we 866 // are optimizing for size. 867 return false; 868 869 unsigned Limit = ~0U; 870 if (Entry.Imm1Limit) 871 Limit = (1 << Entry.Imm1Limit) - 1; 872 873 const MCInstrDesc &MCID = MI->getDesc(); 874 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { 875 if (MCID.operands()[i].isPredicate()) 876 continue; 877 const MachineOperand &MO = MI->getOperand(i); 878 if (MO.isReg()) { 879 Register Reg = MO.getReg(); 880 if (!Reg || Reg == ARM::CPSR) 881 continue; 882 if (Entry.LowRegs1 && !isARMLowRegister(Reg)) 883 return false; 884 } else if (MO.isImm() && !MCID.operands()[i].isPredicate()) { 885 if (((unsigned)MO.getImm()) > Limit) 886 return false; 887 } 888 } 889 890 // Check if it's possible / necessary to transfer the predicate. 891 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); 892 Register PredReg; 893 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 894 bool SkipPred = false; 895 if (Pred != ARMCC::AL) { 896 if (!NewMCID.isPredicable()) 897 // Can't transfer predicate, fail. 898 return false; 899 } else { 900 SkipPred = !NewMCID.isPredicable(); 901 } 902 903 bool HasCC = false; 904 bool CCDead = false; 905 if (MCID.hasOptionalDef()) { 906 unsigned NumOps = MCID.getNumOperands(); 907 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 908 if (HasCC && MI->getOperand(NumOps-1).isDead()) 909 CCDead = true; 910 } 911 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) 912 return false; 913 914 // Avoid adding a false dependency on partial flag update by some 16-bit 915 // instructions which has the 's' bit set. 916 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && 917 canAddPseudoFlagDep(MI, IsSelfLoop)) 918 return false; 919 920 // Add the 16-bit instruction. 921 DebugLoc dl = MI->getDebugLoc(); 922 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); 923 924 // TEQ is special in that it doesn't define a register but we're converting 925 // it into an EOR which does. So add the first operand as a def and then 926 // again as a use. 927 if (MCID.getOpcode() == ARM::t2TEQrr) { 928 MIB.add(MI->getOperand(0)); 929 MIB->getOperand(0).setIsKill(false); 930 MIB->getOperand(0).setIsDef(true); 931 MIB->getOperand(0).setIsDead(true); 932 933 if (NewMCID.hasOptionalDef()) 934 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); 935 MIB.add(MI->getOperand(0)); 936 } else { 937 MIB.add(MI->getOperand(0)); 938 if (NewMCID.hasOptionalDef()) 939 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); 940 } 941 942 // Transfer the rest of operands. 943 unsigned NumOps = MCID.getNumOperands(); 944 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 945 if (i < NumOps && MCID.operands()[i].isOptionalDef()) 946 continue; 947 if ((MCID.getOpcode() == ARM::t2RSBSri || 948 MCID.getOpcode() == ARM::t2RSBri || 949 MCID.getOpcode() == ARM::t2SXTB || 950 MCID.getOpcode() == ARM::t2SXTH || 951 MCID.getOpcode() == ARM::t2UXTB || 952 MCID.getOpcode() == ARM::t2UXTH) && i == 2) 953 // Skip the zero immediate operand, it's now implicit. 954 continue; 955 bool isPred = (i < NumOps && MCID.operands()[i].isPredicate()); 956 if (SkipPred && isPred) 957 continue; 958 const MachineOperand &MO = MI->getOperand(i); 959 if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) 960 // Skip implicit def of CPSR. Either it's modeled as an optional 961 // def now or it's already an implicit def on the new instruction. 962 continue; 963 MIB.add(MO); 964 } 965 if (!MCID.isPredicable() && NewMCID.isPredicable()) 966 MIB.add(predOps(ARMCC::AL)); 967 968 // Transfer MI flags. 969 MIB.setMIFlags(MI->getFlags()); 970 971 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI 972 << " to 16-bit: " << *MIB); 973 974 MBB.erase_instr(MI); 975 ++NumNarrows; 976 return true; 977 } 978 979 static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { 980 bool HasDef = false; 981 for (const MachineOperand &MO : MI.operands()) { 982 if (!MO.isReg() || MO.isUndef() || MO.isUse()) 983 continue; 984 if (MO.getReg() != ARM::CPSR) 985 continue; 986 987 DefCPSR = true; 988 if (!MO.isDead()) 989 HasDef = true; 990 } 991 992 return HasDef || LiveCPSR; 993 } 994 995 static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { 996 for (const MachineOperand &MO : MI.operands()) { 997 if (!MO.isReg() || MO.isUndef() || MO.isDef()) 998 continue; 999 if (MO.getReg() != ARM::CPSR) 1000 continue; 1001 assert(LiveCPSR && "CPSR liveness tracking is wrong!"); 1002 if (MO.isKill()) { 1003 LiveCPSR = false; 1004 break; 1005 } 1006 } 1007 1008 return LiveCPSR; 1009 } 1010 1011 bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, 1012 bool LiveCPSR, bool IsSelfLoop, 1013 bool SkipPrologueEpilogue) { 1014 unsigned Opcode = MI->getOpcode(); 1015 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); 1016 if (OPI == ReduceOpcodeMap.end()) 1017 return false; 1018 if (SkipPrologueEpilogue && (MI->getFlag(MachineInstr::FrameSetup) || 1019 MI->getFlag(MachineInstr::FrameDestroy))) 1020 return false; 1021 const ReduceEntry &Entry = ReduceTable[OPI->second]; 1022 1023 // Don't attempt normal reductions on "special" cases for now. 1024 if (Entry.Special) 1025 return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 1026 1027 // Try to transform to a 16-bit two-address instruction. 1028 if (Entry.NarrowOpc2 && 1029 ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) 1030 return true; 1031 1032 // Try to transform to a 16-bit non-two-address instruction. 1033 if (Entry.NarrowOpc1 && 1034 ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) 1035 return true; 1036 1037 return false; 1038 } 1039 1040 bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB, 1041 bool SkipPrologueEpilogue) { 1042 bool Modified = false; 1043 1044 // Yes, CPSR could be livein. 1045 bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); 1046 MachineInstr *BundleMI = nullptr; 1047 1048 CPSRDef = nullptr; 1049 HighLatencyCPSR = false; 1050 1051 // Check predecessors for the latest CPSRDef. 1052 for (auto *Pred : MBB.predecessors()) { 1053 const MBBInfo &PInfo = BlockInfo[Pred->getNumber()]; 1054 if (!PInfo.Visited) { 1055 // Since blocks are visited in RPO, this must be a back-edge. 1056 continue; 1057 } 1058 if (PInfo.HighLatencyCPSR) { 1059 HighLatencyCPSR = true; 1060 break; 1061 } 1062 } 1063 1064 // If this BB loops back to itself, conservatively avoid narrowing the 1065 // first instruction that does partial flag update. 1066 bool IsSelfLoop = MBB.isSuccessor(&MBB); 1067 MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); 1068 MachineBasicBlock::instr_iterator NextMII; 1069 for (; MII != E; MII = NextMII) { 1070 NextMII = std::next(MII); 1071 1072 MachineInstr *MI = &*MII; 1073 if (MI->isBundle()) { 1074 BundleMI = MI; 1075 continue; 1076 } 1077 if (MI->isDebugInstr()) 1078 continue; 1079 1080 LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); 1081 1082 // Does NextMII belong to the same bundle as MI? 1083 bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); 1084 1085 if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) { 1086 Modified = true; 1087 MachineBasicBlock::instr_iterator I = std::prev(NextMII); 1088 MI = &*I; 1089 // Removing and reinserting the first instruction in a bundle will break 1090 // up the bundle. Fix the bundling if it was broken. 1091 if (NextInSameBundle && !NextMII->isBundledWithPred()) 1092 NextMII->bundleWithPred(); 1093 } 1094 1095 if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) { 1096 // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill 1097 // marker is only on the BUNDLE instruction. Process the BUNDLE 1098 // instruction as we finish with the bundled instruction to work around 1099 // the inconsistency. 1100 if (BundleMI->killsRegister(ARM::CPSR, /*TRI=*/nullptr)) 1101 LiveCPSR = false; 1102 MachineOperand *MO = 1103 BundleMI->findRegisterDefOperand(ARM::CPSR, /*TRI=*/nullptr); 1104 if (MO && !MO->isDead()) 1105 LiveCPSR = true; 1106 MO = BundleMI->findRegisterUseOperand(ARM::CPSR, /*TRI=*/nullptr); 1107 if (MO && !MO->isKill()) 1108 LiveCPSR = true; 1109 } 1110 1111 bool DefCPSR = false; 1112 LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); 1113 if (MI->isCall()) { 1114 // Calls don't really set CPSR. 1115 CPSRDef = nullptr; 1116 HighLatencyCPSR = false; 1117 IsSelfLoop = false; 1118 } else if (DefCPSR) { 1119 // This is the last CPSR defining instruction. 1120 CPSRDef = MI; 1121 HighLatencyCPSR = isHighLatencyCPSR(CPSRDef); 1122 IsSelfLoop = false; 1123 } 1124 } 1125 1126 MBBInfo &Info = BlockInfo[MBB.getNumber()]; 1127 Info.HighLatencyCPSR = HighLatencyCPSR; 1128 Info.Visited = true; 1129 return Modified; 1130 } 1131 1132 bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { 1133 if (PredicateFtor && !PredicateFtor(MF.getFunction())) 1134 return false; 1135 1136 STI = &MF.getSubtarget<ARMSubtarget>(); 1137 if (STI->isThumb1Only() || STI->prefers32BitThumb()) 1138 return false; 1139 1140 TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo()); 1141 1142 // Optimizing / minimizing size? Minimizing size implies optimizing for size. 1143 OptimizeSize = MF.getFunction().hasOptSize(); 1144 MinimizeSize = STI->hasMinSize(); 1145 1146 BlockInfo.clear(); 1147 BlockInfo.resize(MF.getNumBlockIDs()); 1148 1149 // Visit blocks in reverse post-order so LastCPSRDef is known for all 1150 // predecessors. 1151 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); 1152 bool Modified = false; 1153 bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && 1154 MF.getFunction().needsUnwindTableEntry(); 1155 for (MachineBasicBlock *MBB : RPOT) 1156 Modified |= ReduceMBB(*MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI); 1157 return Modified; 1158 } 1159 1160 /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size 1161 /// reduction pass. 1162 FunctionPass *llvm::createThumb2SizeReductionPass( 1163 std::function<bool(const Function &)> Ftor) { 1164 return new Thumb2SizeReduce(std::move(Ftor)); 1165 } 1166