1 //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ARM.h" 10 #include "ARMBaseInstrInfo.h" 11 #include "ARMSubtarget.h" 12 #include "MCTargetDesc/ARMBaseInfo.h" 13 #include "Thumb2InstrInfo.h" 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/ADT/PostOrderIterator.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/CodeGen/MachineBasicBlock.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineOperand.h" 27 #include "llvm/CodeGen/TargetInstrInfo.h" 28 #include "llvm/IR/DebugLoc.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/MC/MCAsmInfo.h" 31 #include "llvm/MC/MCInstrDesc.h" 32 #include "llvm/MC/MCRegisterInfo.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Compiler.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include <algorithm> 39 #include <cassert> 40 #include <cstdint> 41 #include <functional> 42 #include <iterator> 43 #include <utility> 44 45 using namespace llvm; 46 47 #define DEBUG_TYPE "thumb2-reduce-size" 48 #define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass" 49 50 STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); 51 STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); 52 STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones"); 53 54 static cl::opt<int> ReduceLimit("t2-reduce-limit", 55 cl::init(-1), cl::Hidden); 56 static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2", 57 cl::init(-1), cl::Hidden); 58 static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3", 59 cl::init(-1), cl::Hidden); 60 61 namespace { 62 63 /// ReduceTable - A static table with information on mapping from wide 64 /// opcodes to narrow 65 struct ReduceEntry { 66 uint16_t WideOpc; // Wide opcode 67 uint16_t NarrowOpc1; // Narrow opcode to transform to 68 uint16_t NarrowOpc2; // Narrow opcode when it's two-address 69 uint8_t Imm1Limit; // Limit of immediate field (bits) 70 uint8_t Imm2Limit; // Limit of immediate field when it's two-address 71 unsigned LowRegs1 : 1; // Only possible if low-registers are used 72 unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) 73 unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa. 74 // 1 - No cc field. 75 // 2 - Always set CPSR. 76 unsigned PredCC2 : 2; 77 unsigned PartFlag : 1; // 16-bit instruction does partial flag update 78 unsigned Special : 1; // Needs to be dealt with specially 79 unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift) 80 }; 81 82 static const ReduceEntry ReduceTable[] = { 83 // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM 84 { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 }, 85 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 }, 86 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 }, 87 { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 }, 88 { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, 89 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 }, 90 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, 91 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, 92 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 }, 93 //FIXME: Disable CMN, as CCodes are backwards from compare expectations 94 //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 95 { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 96 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 }, 97 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 }, 98 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 }, 99 // FIXME: adr.n immediate offset must be multiple of 4. 100 //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 101 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, 102 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 }, 103 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, 104 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, 105 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 }, 106 { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 }, 107 // FIXME: Do we need the 16-bit 'S' variant? 108 { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 }, 109 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 }, 110 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, 111 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 }, 112 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 113 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 114 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 115 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 }, 116 { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 117 { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, 118 { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 }, 119 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 }, 120 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, 121 { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 }, 122 { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 123 { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 124 { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 125 { ARM::t2TEQrr, ARM::tEOR, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, 126 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 127 { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 128 { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 129 130 // FIXME: Clean this up after splitting each Thumb load / store opcode 131 // into multiple ones. 132 { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, 133 { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 134 { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 135 { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 136 { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 137 { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 138 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 139 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 140 { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 }, 141 { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, 142 { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 143 { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 144 { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 145 { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 146 { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 147 { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 }, 148 149 { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, 150 { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 }, 151 { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 }, 152 // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent. 153 // tSTMIA_UPD is a change in semantics which can only be used if the base 154 // register is killed. This difference is correctly handled elsewhere. 155 { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, 156 { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, 157 { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 } 158 }; 159 160 class Thumb2SizeReduce : public MachineFunctionPass { 161 public: 162 static char ID; 163 164 const Thumb2InstrInfo *TII; 165 const ARMSubtarget *STI; 166 167 Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr); 168 169 bool runOnMachineFunction(MachineFunction &MF) override; 170 171 MachineFunctionProperties getRequiredProperties() const override { 172 return MachineFunctionProperties().set( 173 MachineFunctionProperties::Property::NoVRegs); 174 } 175 176 StringRef getPassName() const override { 177 return THUMB2_SIZE_REDUCE_NAME; 178 } 179 180 private: 181 /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. 182 DenseMap<unsigned, unsigned> ReduceOpcodeMap; 183 184 bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop); 185 186 bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 187 bool is2Addr, ARMCC::CondCodes Pred, 188 bool LiveCPSR, bool &HasCC, bool &CCDead); 189 190 bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 191 const ReduceEntry &Entry); 192 193 bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 194 const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop); 195 196 /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address 197 /// instruction. 198 bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 199 const ReduceEntry &Entry, bool LiveCPSR, 200 bool IsSelfLoop); 201 202 /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit 203 /// non-two-address instruction. 204 bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 205 const ReduceEntry &Entry, bool LiveCPSR, 206 bool IsSelfLoop); 207 208 /// ReduceMI - Attempt to reduce MI, return true on success. 209 bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR, 210 bool IsSelfLoop, bool SkipPrologueEpilogue); 211 212 /// ReduceMBB - Reduce width of instructions in the specified basic block. 213 bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue); 214 215 bool OptimizeSize; 216 bool MinimizeSize; 217 218 // Last instruction to define CPSR in the current block. 219 MachineInstr *CPSRDef; 220 // Was CPSR last defined by a high latency instruction? 221 // When CPSRDef is null, this refers to CPSR defs in predecessors. 222 bool HighLatencyCPSR; 223 224 struct MBBInfo { 225 // The flags leaving this block have high latency. 226 bool HighLatencyCPSR = false; 227 // Has this block been visited yet? 228 bool Visited = false; 229 230 MBBInfo() = default; 231 }; 232 233 SmallVector<MBBInfo, 8> BlockInfo; 234 235 std::function<bool(const Function &)> PredicateFtor; 236 }; 237 238 char Thumb2SizeReduce::ID = 0; 239 240 } // end anonymous namespace 241 242 INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false, 243 false) 244 245 Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor) 246 : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { 247 OptimizeSize = MinimizeSize = false; 248 for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { 249 unsigned FromOpc = ReduceTable[i].WideOpc; 250 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) 251 llvm_unreachable("Duplicated entries?"); 252 } 253 } 254 255 static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { 256 for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) 257 if (*Regs == ARM::CPSR) 258 return true; 259 return false; 260 } 261 262 // Check for a likely high-latency flag def. 263 static bool isHighLatencyCPSR(MachineInstr *Def) { 264 switch(Def->getOpcode()) { 265 case ARM::FMSTAT: 266 case ARM::tMUL: 267 return true; 268 } 269 return false; 270 } 271 272 /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations, 273 /// the 's' 16-bit instruction partially update CPSR. Abort the 274 /// transformation to avoid adding false dependency on last CPSR setting 275 /// instruction which hurts the ability for out-of-order execution engine 276 /// to do register renaming magic. 277 /// This function checks if there is a read-of-write dependency between the 278 /// last instruction that defines the CPSR and the current instruction. If there 279 /// is, then there is no harm done since the instruction cannot be retired 280 /// before the CPSR setting instruction anyway. 281 /// Note, we are not doing full dependency analysis here for the sake of compile 282 /// time. We're not looking for cases like: 283 /// r0 = muls ... 284 /// r1 = add.w r0, ... 285 /// ... 286 /// = mul.w r1 287 /// In this case it would have been ok to narrow the mul.w to muls since there 288 /// are indirect RAW dependency between the muls and the mul.w 289 bool 290 Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { 291 // Disable the check for -Oz (aka OptimizeForSizeHarder). 292 if (MinimizeSize || !STI->avoidCPSRPartialUpdate()) 293 return false; 294 295 if (!CPSRDef) 296 // If this BB loops back to itself, conservatively avoid narrowing the 297 // first instruction that does partial flag update. 298 return HighLatencyCPSR || FirstInSelfLoop; 299 300 SmallSet<unsigned, 2> Defs; 301 for (const MachineOperand &MO : CPSRDef->operands()) { 302 if (!MO.isReg() || MO.isUndef() || MO.isUse()) 303 continue; 304 Register Reg = MO.getReg(); 305 if (Reg == 0 || Reg == ARM::CPSR) 306 continue; 307 Defs.insert(Reg); 308 } 309 310 for (const MachineOperand &MO : Use->operands()) { 311 if (!MO.isReg() || MO.isUndef() || MO.isDef()) 312 continue; 313 Register Reg = MO.getReg(); 314 if (Defs.count(Reg)) 315 return false; 316 } 317 318 // If the current CPSR has high latency, try to avoid the false dependency. 319 if (HighLatencyCPSR) 320 return true; 321 322 // tMOVi8 usually doesn't start long dependency chains, and there are a lot 323 // of them, so always shrink them when CPSR doesn't have high latency. 324 if (Use->getOpcode() == ARM::t2MOVi || 325 Use->getOpcode() == ARM::t2MOVi16) 326 return false; 327 328 // No read-after-write dependency. The narrowing will add false dependency. 329 return true; 330 } 331 332 bool 333 Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 334 bool is2Addr, ARMCC::CondCodes Pred, 335 bool LiveCPSR, bool &HasCC, bool &CCDead) { 336 if ((is2Addr && Entry.PredCC2 == 0) || 337 (!is2Addr && Entry.PredCC1 == 0)) { 338 if (Pred == ARMCC::AL) { 339 // Not predicated, must set CPSR. 340 if (!HasCC) { 341 // Original instruction was not setting CPSR, but CPSR is not 342 // currently live anyway. It's ok to set it. The CPSR def is 343 // dead though. 344 if (!LiveCPSR) { 345 HasCC = true; 346 CCDead = true; 347 return true; 348 } 349 return false; 350 } 351 } else { 352 // Predicated, must not set CPSR. 353 if (HasCC) 354 return false; 355 } 356 } else if ((is2Addr && Entry.PredCC2 == 2) || 357 (!is2Addr && Entry.PredCC1 == 2)) { 358 /// Old opcode has an optional def of CPSR. 359 if (HasCC) 360 return true; 361 // If old opcode does not implicitly define CPSR, then it's not ok since 362 // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP. 363 if (!HasImplicitCPSRDef(MI->getDesc())) 364 return false; 365 HasCC = true; 366 } else { 367 // 16-bit instruction does not set CPSR. 368 if (HasCC) 369 return false; 370 } 371 372 return true; 373 } 374 375 static bool VerifyLowRegs(MachineInstr *MI) { 376 unsigned Opc = MI->getOpcode(); 377 bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD); 378 bool isLROk = (Opc == ARM::t2STMDB_UPD); 379 bool isSPOk = isPCOk || isLROk; 380 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 381 const MachineOperand &MO = MI->getOperand(i); 382 if (!MO.isReg() || MO.isImplicit()) 383 continue; 384 Register Reg = MO.getReg(); 385 if (Reg == 0 || Reg == ARM::CPSR) 386 continue; 387 if (isPCOk && Reg == ARM::PC) 388 continue; 389 if (isLROk && Reg == ARM::LR) 390 continue; 391 if (Reg == ARM::SP) { 392 if (isSPOk) 393 continue; 394 if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12)) 395 // Special case for these ldr / str with sp as base register. 396 continue; 397 } 398 if (!isARMLowRegister(Reg)) 399 return false; 400 } 401 return true; 402 } 403 404 bool 405 Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 406 const ReduceEntry &Entry) { 407 if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) 408 return false; 409 410 unsigned Scale = 1; 411 bool HasImmOffset = false; 412 bool HasShift = false; 413 bool HasOffReg = true; 414 bool isLdStMul = false; 415 unsigned Opc = Entry.NarrowOpc1; 416 unsigned OpNum = 3; // First 'rest' of operands. 417 uint8_t ImmLimit = Entry.Imm1Limit; 418 419 switch (Entry.WideOpc) { 420 default: 421 llvm_unreachable("Unexpected Thumb2 load / store opcode!"); 422 case ARM::t2LDRi12: 423 case ARM::t2STRi12: 424 if (MI->getOperand(1).getReg() == ARM::SP) { 425 Opc = Entry.NarrowOpc2; 426 ImmLimit = Entry.Imm2Limit; 427 } 428 429 Scale = 4; 430 HasImmOffset = true; 431 HasOffReg = false; 432 break; 433 case ARM::t2LDRBi12: 434 case ARM::t2STRBi12: 435 HasImmOffset = true; 436 HasOffReg = false; 437 break; 438 case ARM::t2LDRHi12: 439 case ARM::t2STRHi12: 440 Scale = 2; 441 HasImmOffset = true; 442 HasOffReg = false; 443 break; 444 case ARM::t2LDRs: 445 case ARM::t2LDRBs: 446 case ARM::t2LDRHs: 447 case ARM::t2LDRSBs: 448 case ARM::t2LDRSHs: 449 case ARM::t2STRs: 450 case ARM::t2STRBs: 451 case ARM::t2STRHs: 452 HasShift = true; 453 OpNum = 4; 454 break; 455 case ARM::t2LDR_POST: 456 case ARM::t2STR_POST: { 457 if (!MinimizeSize) 458 return false; 459 460 if (!MI->hasOneMemOperand() || 461 (*MI->memoperands_begin())->getAlign() < Align(4)) 462 return false; 463 464 // We're creating a completely different type of load/store - LDM from LDR. 465 // For this reason we can't reuse the logic at the end of this function; we 466 // have to implement the MI building here. 467 bool IsStore = Entry.WideOpc == ARM::t2STR_POST; 468 Register Rt = MI->getOperand(IsStore ? 1 : 0).getReg(); 469 Register Rn = MI->getOperand(IsStore ? 0 : 1).getReg(); 470 unsigned Offset = MI->getOperand(3).getImm(); 471 unsigned PredImm = MI->getOperand(4).getImm(); 472 Register PredReg = MI->getOperand(5).getReg(); 473 assert(isARMLowRegister(Rt)); 474 assert(isARMLowRegister(Rn)); 475 476 if (Offset != 4) 477 return false; 478 479 // Add the 16-bit load / store instruction. 480 DebugLoc dl = MI->getDebugLoc(); 481 auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1)) 482 .addReg(Rn, RegState::Define) 483 .addReg(Rn) 484 .addImm(PredImm) 485 .addReg(PredReg) 486 .addReg(Rt, IsStore ? 0 : RegState::Define); 487 488 // Transfer memoperands. 489 MIB.setMemRefs(MI->memoperands()); 490 491 // Transfer MI flags. 492 MIB.setMIFlags(MI->getFlags()); 493 494 // Kill the old instruction. 495 MI->eraseFromBundle(); 496 ++NumLdSts; 497 return true; 498 } 499 case ARM::t2LDMIA: { 500 Register BaseReg = MI->getOperand(0).getReg(); 501 assert(isARMLowRegister(BaseReg)); 502 503 // For the non-writeback version (this one), the base register must be 504 // one of the registers being loaded. 505 bool isOK = false; 506 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3)) { 507 if (MO.getReg() == BaseReg) { 508 isOK = true; 509 break; 510 } 511 } 512 513 if (!isOK) 514 return false; 515 516 OpNum = 0; 517 isLdStMul = true; 518 break; 519 } 520 case ARM::t2STMIA: { 521 // t2STMIA is reduced to tSTMIA_UPD which has writeback. We can only do this 522 // if the base register is killed, as then it doesn't matter what its value 523 // is after the instruction. 524 if (!MI->getOperand(0).isKill()) 525 return false; 526 527 // If the base register is in the register list and isn't the lowest 528 // numbered register (i.e. it's in operand 4 onwards) then with writeback 529 // the stored value is unknown, so we can't convert to tSTMIA_UPD. 530 Register BaseReg = MI->getOperand(0).getReg(); 531 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 4)) 532 if (MO.getReg() == BaseReg) 533 return false; 534 535 break; 536 } 537 case ARM::t2LDMIA_RET: { 538 Register BaseReg = MI->getOperand(1).getReg(); 539 if (BaseReg != ARM::SP) 540 return false; 541 Opc = Entry.NarrowOpc2; // tPOP_RET 542 OpNum = 2; 543 isLdStMul = true; 544 break; 545 } 546 case ARM::t2LDMIA_UPD: 547 case ARM::t2STMIA_UPD: 548 case ARM::t2STMDB_UPD: { 549 OpNum = 0; 550 551 Register BaseReg = MI->getOperand(1).getReg(); 552 if (BaseReg == ARM::SP && 553 (Entry.WideOpc == ARM::t2LDMIA_UPD || 554 Entry.WideOpc == ARM::t2STMDB_UPD)) { 555 Opc = Entry.NarrowOpc2; // tPOP or tPUSH 556 OpNum = 2; 557 } else if (!isARMLowRegister(BaseReg) || 558 (Entry.WideOpc != ARM::t2LDMIA_UPD && 559 Entry.WideOpc != ARM::t2STMIA_UPD)) { 560 return false; 561 } 562 563 isLdStMul = true; 564 break; 565 } 566 } 567 568 unsigned OffsetReg = 0; 569 bool OffsetKill = false; 570 bool OffsetInternal = false; 571 if (HasShift) { 572 OffsetReg = MI->getOperand(2).getReg(); 573 OffsetKill = MI->getOperand(2).isKill(); 574 OffsetInternal = MI->getOperand(2).isInternalRead(); 575 576 if (MI->getOperand(3).getImm()) 577 // Thumb1 addressing mode doesn't support shift. 578 return false; 579 } 580 581 unsigned OffsetImm = 0; 582 if (HasImmOffset) { 583 OffsetImm = MI->getOperand(2).getImm(); 584 unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; 585 586 if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) 587 // Make sure the immediate field fits. 588 return false; 589 } 590 591 // Add the 16-bit load / store instruction. 592 DebugLoc dl = MI->getDebugLoc(); 593 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); 594 595 // tSTMIA_UPD takes a defining register operand. We've already checked that 596 // the register is killed, so mark it as dead here. 597 if (Entry.WideOpc == ARM::t2STMIA) 598 MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead); 599 600 if (!isLdStMul) { 601 MIB.add(MI->getOperand(0)); 602 MIB.add(MI->getOperand(1)); 603 604 if (HasImmOffset) 605 MIB.addImm(OffsetImm / Scale); 606 607 assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); 608 609 if (HasOffReg) 610 MIB.addReg(OffsetReg, getKillRegState(OffsetKill) | 611 getInternalReadRegState(OffsetInternal)); 612 } 613 614 // Transfer the rest of operands. 615 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), OpNum)) 616 MIB.add(MO); 617 618 // Transfer memoperands. 619 MIB.setMemRefs(MI->memoperands()); 620 621 // Transfer MI flags. 622 MIB.setMIFlags(MI->getFlags()); 623 624 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI 625 << " to 16-bit: " << *MIB); 626 627 MBB.erase_instr(MI); 628 ++NumLdSts; 629 return true; 630 } 631 632 bool 633 Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 634 const ReduceEntry &Entry, 635 bool LiveCPSR, bool IsSelfLoop) { 636 unsigned Opc = MI->getOpcode(); 637 if (Opc == ARM::t2ADDri) { 638 // If the source register is SP, try to reduce to tADDrSPi, otherwise 639 // it's a normal reduce. 640 if (MI->getOperand(1).getReg() != ARM::SP) { 641 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) 642 return true; 643 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 644 } 645 // Try to reduce to tADDrSPi. 646 unsigned Imm = MI->getOperand(2).getImm(); 647 // The immediate must be in range, the destination register must be a low 648 // reg, the predicate must be "always" and the condition flags must not 649 // be being set. 650 if (Imm & 3 || Imm > 1020) 651 return false; 652 if (!isARMLowRegister(MI->getOperand(0).getReg())) 653 return false; 654 if (MI->getOperand(3).getImm() != ARMCC::AL) 655 return false; 656 const MCInstrDesc &MCID = MI->getDesc(); 657 if (MCID.hasOptionalDef() && 658 MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) 659 return false; 660 661 MachineInstrBuilder MIB = 662 BuildMI(MBB, MI, MI->getDebugLoc(), 663 TII->get(ARM::tADDrSPi)) 664 .add(MI->getOperand(0)) 665 .add(MI->getOperand(1)) 666 .addImm(Imm / 4) // The tADDrSPi has an implied scale by four. 667 .add(predOps(ARMCC::AL)); 668 669 // Transfer MI flags. 670 MIB.setMIFlags(MI->getFlags()); 671 672 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI 673 << " to 16-bit: " << *MIB); 674 675 MBB.erase_instr(MI); 676 ++NumNarrows; 677 return true; 678 } 679 680 if (Entry.LowRegs1 && !VerifyLowRegs(MI)) 681 return false; 682 683 if (MI->mayLoadOrStore()) 684 return ReduceLoadStore(MBB, MI, Entry); 685 686 switch (Opc) { 687 default: break; 688 case ARM::t2ADDSri: 689 case ARM::t2ADDSrr: { 690 Register PredReg; 691 if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) { 692 switch (Opc) { 693 default: break; 694 case ARM::t2ADDSri: 695 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) 696 return true; 697 LLVM_FALLTHROUGH; 698 case ARM::t2ADDSrr: 699 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 700 } 701 } 702 break; 703 } 704 case ARM::t2RSBri: 705 case ARM::t2RSBSri: 706 case ARM::t2SXTB: 707 case ARM::t2SXTH: 708 case ARM::t2UXTB: 709 case ARM::t2UXTH: 710 if (MI->getOperand(2).getImm() == 0) 711 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 712 break; 713 case ARM::t2MOVi16: 714 // Can convert only 'pure' immediate operands, not immediates obtained as 715 // globals' addresses. 716 if (MI->getOperand(1).isImm()) 717 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 718 break; 719 case ARM::t2CMPrr: { 720 // Try to reduce to the lo-reg only version first. Why there are two 721 // versions of the instruction is a mystery. 722 // It would be nice to just have two entries in the main table that 723 // are prioritized, but the table assumes a unique entry for each 724 // source insn opcode. So for now, we hack a local entry record to use. 725 static const ReduceEntry NarrowEntry = 726 { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 }; 727 if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop)) 728 return true; 729 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 730 } 731 case ARM::t2TEQrr: { 732 Register PredReg; 733 // Can only convert to eors if we're not in an IT block. 734 if (getInstrPredicate(*MI, PredReg) != ARMCC::AL) 735 break; 736 // TODO if Operand 0 is not killed but Operand 1 is, then we could write 737 // to Op1 instead. 738 if (MI->getOperand(0).isKill()) 739 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 740 } 741 } 742 return false; 743 } 744 745 bool 746 Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 747 const ReduceEntry &Entry, 748 bool LiveCPSR, bool IsSelfLoop) { 749 if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) 750 return false; 751 752 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) 753 // Don't issue movs with shifter operand for some CPUs unless we 754 // are optimizing for size. 755 return false; 756 757 Register Reg0 = MI->getOperand(0).getReg(); 758 Register Reg1 = MI->getOperand(1).getReg(); 759 // t2MUL is "special". The tied source operand is second, not first. 760 if (MI->getOpcode() == ARM::t2MUL) { 761 Register Reg2 = MI->getOperand(2).getReg(); 762 // Early exit if the regs aren't all low regs. 763 if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) 764 || !isARMLowRegister(Reg2)) 765 return false; 766 if (Reg0 != Reg2) { 767 // If the other operand also isn't the same as the destination, we 768 // can't reduce. 769 if (Reg1 != Reg0) 770 return false; 771 // Try to commute the operands to make it a 2-address instruction. 772 MachineInstr *CommutedMI = TII->commuteInstruction(*MI); 773 if (!CommutedMI) 774 return false; 775 } 776 } else if (Reg0 != Reg1) { 777 // Try to commute the operands to make it a 2-address instruction. 778 unsigned CommOpIdx1 = 1; 779 unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex; 780 if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) || 781 MI->getOperand(CommOpIdx2).getReg() != Reg0) 782 return false; 783 MachineInstr *CommutedMI = 784 TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2); 785 if (!CommutedMI) 786 return false; 787 } 788 if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) 789 return false; 790 if (Entry.Imm2Limit) { 791 unsigned Imm = MI->getOperand(2).getImm(); 792 unsigned Limit = (1 << Entry.Imm2Limit) - 1; 793 if (Imm > Limit) 794 return false; 795 } else { 796 Register Reg2 = MI->getOperand(2).getReg(); 797 if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) 798 return false; 799 } 800 801 // Check if it's possible / necessary to transfer the predicate. 802 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); 803 Register PredReg; 804 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 805 bool SkipPred = false; 806 if (Pred != ARMCC::AL) { 807 if (!NewMCID.isPredicable()) 808 // Can't transfer predicate, fail. 809 return false; 810 } else { 811 SkipPred = !NewMCID.isPredicable(); 812 } 813 814 bool HasCC = false; 815 bool CCDead = false; 816 const MCInstrDesc &MCID = MI->getDesc(); 817 if (MCID.hasOptionalDef()) { 818 unsigned NumOps = MCID.getNumOperands(); 819 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 820 if (HasCC && MI->getOperand(NumOps-1).isDead()) 821 CCDead = true; 822 } 823 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) 824 return false; 825 826 // Avoid adding a false dependency on partial flag update by some 16-bit 827 // instructions which has the 's' bit set. 828 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && 829 canAddPseudoFlagDep(MI, IsSelfLoop)) 830 return false; 831 832 // Add the 16-bit instruction. 833 DebugLoc dl = MI->getDebugLoc(); 834 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); 835 MIB.add(MI->getOperand(0)); 836 if (NewMCID.hasOptionalDef()) 837 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); 838 839 // Transfer the rest of operands. 840 unsigned NumOps = MCID.getNumOperands(); 841 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 842 if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) 843 continue; 844 if (SkipPred && MCID.OpInfo[i].isPredicate()) 845 continue; 846 MIB.add(MI->getOperand(i)); 847 } 848 849 // Transfer MI flags. 850 MIB.setMIFlags(MI->getFlags()); 851 852 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI 853 << " to 16-bit: " << *MIB); 854 855 MBB.erase_instr(MI); 856 ++Num2Addrs; 857 return true; 858 } 859 860 bool 861 Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 862 const ReduceEntry &Entry, 863 bool LiveCPSR, bool IsSelfLoop) { 864 if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) 865 return false; 866 867 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) 868 // Don't issue movs with shifter operand for some CPUs unless we 869 // are optimizing for size. 870 return false; 871 872 unsigned Limit = ~0U; 873 if (Entry.Imm1Limit) 874 Limit = (1 << Entry.Imm1Limit) - 1; 875 876 const MCInstrDesc &MCID = MI->getDesc(); 877 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { 878 if (MCID.OpInfo[i].isPredicate()) 879 continue; 880 const MachineOperand &MO = MI->getOperand(i); 881 if (MO.isReg()) { 882 Register Reg = MO.getReg(); 883 if (!Reg || Reg == ARM::CPSR) 884 continue; 885 if (Entry.LowRegs1 && !isARMLowRegister(Reg)) 886 return false; 887 } else if (MO.isImm() && 888 !MCID.OpInfo[i].isPredicate()) { 889 if (((unsigned)MO.getImm()) > Limit) 890 return false; 891 } 892 } 893 894 // Check if it's possible / necessary to transfer the predicate. 895 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); 896 Register PredReg; 897 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 898 bool SkipPred = false; 899 if (Pred != ARMCC::AL) { 900 if (!NewMCID.isPredicable()) 901 // Can't transfer predicate, fail. 902 return false; 903 } else { 904 SkipPred = !NewMCID.isPredicable(); 905 } 906 907 bool HasCC = false; 908 bool CCDead = false; 909 if (MCID.hasOptionalDef()) { 910 unsigned NumOps = MCID.getNumOperands(); 911 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 912 if (HasCC && MI->getOperand(NumOps-1).isDead()) 913 CCDead = true; 914 } 915 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) 916 return false; 917 918 // Avoid adding a false dependency on partial flag update by some 16-bit 919 // instructions which has the 's' bit set. 920 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && 921 canAddPseudoFlagDep(MI, IsSelfLoop)) 922 return false; 923 924 // Add the 16-bit instruction. 925 DebugLoc dl = MI->getDebugLoc(); 926 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); 927 928 // TEQ is special in that it doesn't define a register but we're converting 929 // it into an EOR which does. So add the first operand as a def and then 930 // again as a use. 931 if (MCID.getOpcode() == ARM::t2TEQrr) { 932 MIB.add(MI->getOperand(0)); 933 MIB->getOperand(0).setIsKill(false); 934 MIB->getOperand(0).setIsDef(true); 935 MIB->getOperand(0).setIsDead(true); 936 937 if (NewMCID.hasOptionalDef()) 938 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); 939 MIB.add(MI->getOperand(0)); 940 } else { 941 MIB.add(MI->getOperand(0)); 942 if (NewMCID.hasOptionalDef()) 943 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); 944 } 945 946 // Transfer the rest of operands. 947 unsigned NumOps = MCID.getNumOperands(); 948 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 949 if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) 950 continue; 951 if ((MCID.getOpcode() == ARM::t2RSBSri || 952 MCID.getOpcode() == ARM::t2RSBri || 953 MCID.getOpcode() == ARM::t2SXTB || 954 MCID.getOpcode() == ARM::t2SXTH || 955 MCID.getOpcode() == ARM::t2UXTB || 956 MCID.getOpcode() == ARM::t2UXTH) && i == 2) 957 // Skip the zero immediate operand, it's now implicit. 958 continue; 959 bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); 960 if (SkipPred && isPred) 961 continue; 962 const MachineOperand &MO = MI->getOperand(i); 963 if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) 964 // Skip implicit def of CPSR. Either it's modeled as an optional 965 // def now or it's already an implicit def on the new instruction. 966 continue; 967 MIB.add(MO); 968 } 969 if (!MCID.isPredicable() && NewMCID.isPredicable()) 970 MIB.add(predOps(ARMCC::AL)); 971 972 // Transfer MI flags. 973 MIB.setMIFlags(MI->getFlags()); 974 975 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI 976 << " to 16-bit: " << *MIB); 977 978 MBB.erase_instr(MI); 979 ++NumNarrows; 980 return true; 981 } 982 983 static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { 984 bool HasDef = false; 985 for (const MachineOperand &MO : MI.operands()) { 986 if (!MO.isReg() || MO.isUndef() || MO.isUse()) 987 continue; 988 if (MO.getReg() != ARM::CPSR) 989 continue; 990 991 DefCPSR = true; 992 if (!MO.isDead()) 993 HasDef = true; 994 } 995 996 return HasDef || LiveCPSR; 997 } 998 999 static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { 1000 for (const MachineOperand &MO : MI.operands()) { 1001 if (!MO.isReg() || MO.isUndef() || MO.isDef()) 1002 continue; 1003 if (MO.getReg() != ARM::CPSR) 1004 continue; 1005 assert(LiveCPSR && "CPSR liveness tracking is wrong!"); 1006 if (MO.isKill()) { 1007 LiveCPSR = false; 1008 break; 1009 } 1010 } 1011 1012 return LiveCPSR; 1013 } 1014 1015 bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, 1016 bool LiveCPSR, bool IsSelfLoop, 1017 bool SkipPrologueEpilogue) { 1018 unsigned Opcode = MI->getOpcode(); 1019 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); 1020 if (OPI == ReduceOpcodeMap.end()) 1021 return false; 1022 if (SkipPrologueEpilogue && (MI->getFlag(MachineInstr::FrameSetup) || 1023 MI->getFlag(MachineInstr::FrameDestroy))) 1024 return false; 1025 const ReduceEntry &Entry = ReduceTable[OPI->second]; 1026 1027 // Don't attempt normal reductions on "special" cases for now. 1028 if (Entry.Special) 1029 return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 1030 1031 // Try to transform to a 16-bit two-address instruction. 1032 if (Entry.NarrowOpc2 && 1033 ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) 1034 return true; 1035 1036 // Try to transform to a 16-bit non-two-address instruction. 1037 if (Entry.NarrowOpc1 && 1038 ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) 1039 return true; 1040 1041 return false; 1042 } 1043 1044 bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB, 1045 bool SkipPrologueEpilogue) { 1046 bool Modified = false; 1047 1048 // Yes, CPSR could be livein. 1049 bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); 1050 MachineInstr *BundleMI = nullptr; 1051 1052 CPSRDef = nullptr; 1053 HighLatencyCPSR = false; 1054 1055 // Check predecessors for the latest CPSRDef. 1056 for (auto *Pred : MBB.predecessors()) { 1057 const MBBInfo &PInfo = BlockInfo[Pred->getNumber()]; 1058 if (!PInfo.Visited) { 1059 // Since blocks are visited in RPO, this must be a back-edge. 1060 continue; 1061 } 1062 if (PInfo.HighLatencyCPSR) { 1063 HighLatencyCPSR = true; 1064 break; 1065 } 1066 } 1067 1068 // If this BB loops back to itself, conservatively avoid narrowing the 1069 // first instruction that does partial flag update. 1070 bool IsSelfLoop = MBB.isSuccessor(&MBB); 1071 MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); 1072 MachineBasicBlock::instr_iterator NextMII; 1073 for (; MII != E; MII = NextMII) { 1074 NextMII = std::next(MII); 1075 1076 MachineInstr *MI = &*MII; 1077 if (MI->isBundle()) { 1078 BundleMI = MI; 1079 continue; 1080 } 1081 if (MI->isDebugInstr()) 1082 continue; 1083 1084 LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); 1085 1086 // Does NextMII belong to the same bundle as MI? 1087 bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); 1088 1089 if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) { 1090 Modified = true; 1091 MachineBasicBlock::instr_iterator I = std::prev(NextMII); 1092 MI = &*I; 1093 // Removing and reinserting the first instruction in a bundle will break 1094 // up the bundle. Fix the bundling if it was broken. 1095 if (NextInSameBundle && !NextMII->isBundledWithPred()) 1096 NextMII->bundleWithPred(); 1097 } 1098 1099 if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) { 1100 // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill 1101 // marker is only on the BUNDLE instruction. Process the BUNDLE 1102 // instruction as we finish with the bundled instruction to work around 1103 // the inconsistency. 1104 if (BundleMI->killsRegister(ARM::CPSR)) 1105 LiveCPSR = false; 1106 MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); 1107 if (MO && !MO->isDead()) 1108 LiveCPSR = true; 1109 MO = BundleMI->findRegisterUseOperand(ARM::CPSR); 1110 if (MO && !MO->isKill()) 1111 LiveCPSR = true; 1112 } 1113 1114 bool DefCPSR = false; 1115 LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); 1116 if (MI->isCall()) { 1117 // Calls don't really set CPSR. 1118 CPSRDef = nullptr; 1119 HighLatencyCPSR = false; 1120 IsSelfLoop = false; 1121 } else if (DefCPSR) { 1122 // This is the last CPSR defining instruction. 1123 CPSRDef = MI; 1124 HighLatencyCPSR = isHighLatencyCPSR(CPSRDef); 1125 IsSelfLoop = false; 1126 } 1127 } 1128 1129 MBBInfo &Info = BlockInfo[MBB.getNumber()]; 1130 Info.HighLatencyCPSR = HighLatencyCPSR; 1131 Info.Visited = true; 1132 return Modified; 1133 } 1134 1135 bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { 1136 if (PredicateFtor && !PredicateFtor(MF.getFunction())) 1137 return false; 1138 1139 STI = &MF.getSubtarget<ARMSubtarget>(); 1140 if (STI->isThumb1Only() || STI->prefers32BitThumb()) 1141 return false; 1142 1143 TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo()); 1144 1145 // Optimizing / minimizing size? Minimizing size implies optimizing for size. 1146 OptimizeSize = MF.getFunction().hasOptSize(); 1147 MinimizeSize = STI->hasMinSize(); 1148 1149 BlockInfo.clear(); 1150 BlockInfo.resize(MF.getNumBlockIDs()); 1151 1152 // Visit blocks in reverse post-order so LastCPSRDef is known for all 1153 // predecessors. 1154 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); 1155 bool Modified = false; 1156 bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && 1157 MF.getFunction().needsUnwindTableEntry(); 1158 for (MachineBasicBlock *MBB : RPOT) 1159 Modified |= ReduceMBB(*MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI); 1160 return Modified; 1161 } 1162 1163 /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size 1164 /// reduction pass. 1165 FunctionPass *llvm::createThumb2SizeReductionPass( 1166 std::function<bool(const Function &)> Ftor) { 1167 return new Thumb2SizeReduce(std::move(Ftor)); 1168 } 1169