1 //===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling, if-conversion, and other late 11 // optimizations. This pass should be run after register allocation but before 12 // the post-regalloc scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMBaseRegisterInfo.h" 19 #include "ARMConstantPoolValue.h" 20 #include "ARMMachineFunctionInfo.h" 21 #include "ARMSubtarget.h" 22 #include "MCTargetDesc/ARMAddressingModes.h" 23 #include "llvm/CodeGen/LivePhysRegs.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineJumpTableInfo.h" 27 #include "llvm/MC/MCAsmInfo.h" 28 #include "llvm/Support/Debug.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "arm-pseudo" 33 34 static cl::opt<bool> 35 VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden, 36 cl::desc("Verify machine code after expanding ARM pseudos")); 37 38 #define ARM_EXPAND_PSEUDO_NAME "ARM pseudo instruction expansion pass" 39 40 namespace { 41 class ARMExpandPseudo : public MachineFunctionPass { 42 public: 43 static char ID; 44 ARMExpandPseudo() : MachineFunctionPass(ID) {} 45 46 const ARMBaseInstrInfo *TII; 47 const TargetRegisterInfo *TRI; 48 const ARMSubtarget *STI; 49 ARMFunctionInfo *AFI; 50 51 bool runOnMachineFunction(MachineFunction &Fn) override; 52 53 MachineFunctionProperties getRequiredProperties() const override { 54 return MachineFunctionProperties().set( 55 MachineFunctionProperties::Property::NoVRegs); 56 } 57 58 StringRef getPassName() const override { 59 return ARM_EXPAND_PSEUDO_NAME; 60 } 61 62 private: 63 bool ExpandMI(MachineBasicBlock &MBB, 64 MachineBasicBlock::iterator MBBI, 65 MachineBasicBlock::iterator &NextMBBI); 66 bool ExpandMBB(MachineBasicBlock &MBB); 67 void ExpandVLD(MachineBasicBlock::iterator &MBBI); 68 void ExpandVST(MachineBasicBlock::iterator &MBBI); 69 void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); 70 void ExpandVTBL(MachineBasicBlock::iterator &MBBI, 71 unsigned Opc, bool IsExt); 72 void ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI); 73 void ExpandTMOV32BitImm(MachineBasicBlock &MBB, 74 MachineBasicBlock::iterator &MBBI); 75 void ExpandMOV32BitImm(MachineBasicBlock &MBB, 76 MachineBasicBlock::iterator &MBBI); 77 void CMSEClearGPRegs(MachineBasicBlock &MBB, 78 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 79 const SmallVectorImpl<unsigned> &ClearRegs, 80 unsigned ClobberReg); 81 MachineBasicBlock &CMSEClearFPRegs(MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator MBBI); 83 MachineBasicBlock &CMSEClearFPRegsV8(MachineBasicBlock &MBB, 84 MachineBasicBlock::iterator MBBI, 85 const BitVector &ClearRegs); 86 MachineBasicBlock &CMSEClearFPRegsV81(MachineBasicBlock &MBB, 87 MachineBasicBlock::iterator MBBI, 88 const BitVector &ClearRegs); 89 void CMSESaveClearFPRegs(MachineBasicBlock &MBB, 90 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 91 const LivePhysRegs &LiveRegs, 92 SmallVectorImpl<unsigned> &AvailableRegs); 93 void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB, 94 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 95 const LivePhysRegs &LiveRegs, 96 SmallVectorImpl<unsigned> &ScratchRegs); 97 void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB, 98 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 99 const LivePhysRegs &LiveRegs); 100 void CMSERestoreFPRegs(MachineBasicBlock &MBB, 101 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 102 SmallVectorImpl<unsigned> &AvailableRegs); 103 void CMSERestoreFPRegsV8(MachineBasicBlock &MBB, 104 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 105 SmallVectorImpl<unsigned> &AvailableRegs); 106 void CMSERestoreFPRegsV81(MachineBasicBlock &MBB, 107 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 108 SmallVectorImpl<unsigned> &AvailableRegs); 109 bool ExpandCMP_SWAP(MachineBasicBlock &MBB, 110 MachineBasicBlock::iterator MBBI, unsigned LdrexOp, 111 unsigned StrexOp, unsigned UxtOp, 112 MachineBasicBlock::iterator &NextMBBI); 113 114 bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB, 115 MachineBasicBlock::iterator MBBI, 116 MachineBasicBlock::iterator &NextMBBI); 117 }; 118 char ARMExpandPseudo::ID = 0; 119 } 120 121 INITIALIZE_PASS(ARMExpandPseudo, DEBUG_TYPE, ARM_EXPAND_PSEUDO_NAME, false, 122 false) 123 124 namespace { 125 // Constants for register spacing in NEON load/store instructions. 126 // For quad-register load-lane and store-lane pseudo instructors, the 127 // spacing is initially assumed to be EvenDblSpc, and that is changed to 128 // OddDblSpc depending on the lane number operand. 129 enum NEONRegSpacing { 130 SingleSpc, 131 SingleLowSpc , // Single spacing, low registers, three and four vectors. 132 SingleHighQSpc, // Single spacing, high registers, four vectors. 133 SingleHighTSpc, // Single spacing, high registers, three vectors. 134 EvenDblSpc, 135 OddDblSpc 136 }; 137 138 // Entries for NEON load/store information table. The table is sorted by 139 // PseudoOpc for fast binary-search lookups. 140 struct NEONLdStTableEntry { 141 uint16_t PseudoOpc; 142 uint16_t RealOpc; 143 bool IsLoad; 144 bool isUpdating; 145 bool hasWritebackOperand; 146 uint8_t RegSpacing; // One of type NEONRegSpacing 147 uint8_t NumRegs; // D registers loaded or stored 148 uint8_t RegElts; // elements per D register; used for lane ops 149 // FIXME: Temporary flag to denote whether the real instruction takes 150 // a single register (like the encoding) or all of the registers in 151 // the list (like the asm syntax and the isel DAG). When all definitions 152 // are converted to take only the single encoded register, this will 153 // go away. 154 bool copyAllListRegs; 155 156 // Comparison methods for binary search of the table. 157 bool operator<(const NEONLdStTableEntry &TE) const { 158 return PseudoOpc < TE.PseudoOpc; 159 } 160 friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) { 161 return TE.PseudoOpc < PseudoOpc; 162 } 163 friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc, 164 const NEONLdStTableEntry &TE) { 165 return PseudoOpc < TE.PseudoOpc; 166 } 167 }; 168 } 169 170 static const NEONLdStTableEntry NEONLdStTable[] = { 171 { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, 172 { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true}, 173 { ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true}, 174 { ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true}, 175 { ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true}, 176 { ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true}, 177 178 { ARM::VLD1d16QPseudo, ARM::VLD1d16Q, true, false, false, SingleSpc, 4, 4 ,false}, 179 { ARM::VLD1d16QPseudoWB_fixed, ARM::VLD1d16Qwb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, 180 { ARM::VLD1d16QPseudoWB_register, ARM::VLD1d16Qwb_register, true, true, true, SingleSpc, 4, 4 ,false}, 181 { ARM::VLD1d16TPseudo, ARM::VLD1d16T, true, false, false, SingleSpc, 3, 4 ,false}, 182 { ARM::VLD1d16TPseudoWB_fixed, ARM::VLD1d16Twb_fixed, true, true, false, SingleSpc, 3, 4 ,false}, 183 { ARM::VLD1d16TPseudoWB_register, ARM::VLD1d16Twb_register, true, true, true, SingleSpc, 3, 4 ,false}, 184 185 { ARM::VLD1d32QPseudo, ARM::VLD1d32Q, true, false, false, SingleSpc, 4, 2 ,false}, 186 { ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d32Qwb_fixed, true, true, false, SingleSpc, 4, 2 ,false}, 187 { ARM::VLD1d32QPseudoWB_register, ARM::VLD1d32Qwb_register, true, true, true, SingleSpc, 4, 2 ,false}, 188 { ARM::VLD1d32TPseudo, ARM::VLD1d32T, true, false, false, SingleSpc, 3, 2 ,false}, 189 { ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d32Twb_fixed, true, true, false, SingleSpc, 3, 2 ,false}, 190 { ARM::VLD1d32TPseudoWB_register, ARM::VLD1d32Twb_register, true, true, true, SingleSpc, 3, 2 ,false}, 191 192 { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, 193 { ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false}, 194 { ARM::VLD1d64QPseudoWB_register, ARM::VLD1d64Qwb_register, true, true, true, SingleSpc, 4, 1 ,false}, 195 { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, 196 { ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false}, 197 { ARM::VLD1d64TPseudoWB_register, ARM::VLD1d64Twb_register, true, true, true, SingleSpc, 3, 1 ,false}, 198 199 { ARM::VLD1d8QPseudo, ARM::VLD1d8Q, true, false, false, SingleSpc, 4, 8 ,false}, 200 { ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d8Qwb_fixed, true, true, false, SingleSpc, 4, 8 ,false}, 201 { ARM::VLD1d8QPseudoWB_register, ARM::VLD1d8Qwb_register, true, true, true, SingleSpc, 4, 8 ,false}, 202 { ARM::VLD1d8TPseudo, ARM::VLD1d8T, true, false, false, SingleSpc, 3, 8 ,false}, 203 { ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d8Twb_fixed, true, true, false, SingleSpc, 3, 8 ,false}, 204 { ARM::VLD1d8TPseudoWB_register, ARM::VLD1d8Twb_register, true, true, true, SingleSpc, 3, 8 ,false}, 205 206 { ARM::VLD1q16HighQPseudo, ARM::VLD1d16Q, true, false, false, SingleHighQSpc, 4, 4 ,false}, 207 { ARM::VLD1q16HighQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleHighQSpc, 4, 4 ,false}, 208 { ARM::VLD1q16HighTPseudo, ARM::VLD1d16T, true, false, false, SingleHighTSpc, 3, 4 ,false}, 209 { ARM::VLD1q16HighTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleHighTSpc, 3, 4 ,false}, 210 { ARM::VLD1q16LowQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleLowSpc, 4, 4 ,false}, 211 { ARM::VLD1q16LowTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleLowSpc, 3, 4 ,false}, 212 213 { ARM::VLD1q32HighQPseudo, ARM::VLD1d32Q, true, false, false, SingleHighQSpc, 4, 2 ,false}, 214 { ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleHighQSpc, 4, 2 ,false}, 215 { ARM::VLD1q32HighTPseudo, ARM::VLD1d32T, true, false, false, SingleHighTSpc, 3, 2 ,false}, 216 { ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleHighTSpc, 3, 2 ,false}, 217 { ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleLowSpc, 4, 2 ,false}, 218 { ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleLowSpc, 3, 2 ,false}, 219 220 { ARM::VLD1q64HighQPseudo, ARM::VLD1d64Q, true, false, false, SingleHighQSpc, 4, 1 ,false}, 221 { ARM::VLD1q64HighQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleHighQSpc, 4, 1 ,false}, 222 { ARM::VLD1q64HighTPseudo, ARM::VLD1d64T, true, false, false, SingleHighTSpc, 3, 1 ,false}, 223 { ARM::VLD1q64HighTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleHighTSpc, 3, 1 ,false}, 224 { ARM::VLD1q64LowQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleLowSpc, 4, 1 ,false}, 225 { ARM::VLD1q64LowTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleLowSpc, 3, 1 ,false}, 226 227 { ARM::VLD1q8HighQPseudo, ARM::VLD1d8Q, true, false, false, SingleHighQSpc, 4, 8 ,false}, 228 { ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleHighQSpc, 4, 8 ,false}, 229 { ARM::VLD1q8HighTPseudo, ARM::VLD1d8T, true, false, false, SingleHighTSpc, 3, 8 ,false}, 230 { ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleHighTSpc, 3, 8 ,false}, 231 { ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleLowSpc, 4, 8 ,false}, 232 { ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleLowSpc, 3, 8 ,false}, 233 234 { ARM::VLD2DUPq16EvenPseudo, ARM::VLD2DUPd16x2, true, false, false, EvenDblSpc, 2, 4 ,false}, 235 { ARM::VLD2DUPq16OddPseudo, ARM::VLD2DUPd16x2, true, false, false, OddDblSpc, 2, 4 ,false}, 236 { ARM::VLD2DUPq16OddPseudoWB_fixed, ARM::VLD2DUPd16x2wb_fixed, true, true, false, OddDblSpc, 2, 4 ,false}, 237 { ARM::VLD2DUPq16OddPseudoWB_register, ARM::VLD2DUPd16x2wb_register, true, true, true, OddDblSpc, 2, 4 ,false}, 238 { ARM::VLD2DUPq32EvenPseudo, ARM::VLD2DUPd32x2, true, false, false, EvenDblSpc, 2, 2 ,false}, 239 { ARM::VLD2DUPq32OddPseudo, ARM::VLD2DUPd32x2, true, false, false, OddDblSpc, 2, 2 ,false}, 240 { ARM::VLD2DUPq32OddPseudoWB_fixed, ARM::VLD2DUPd32x2wb_fixed, true, true, false, OddDblSpc, 2, 2 ,false}, 241 { ARM::VLD2DUPq32OddPseudoWB_register, ARM::VLD2DUPd32x2wb_register, true, true, true, OddDblSpc, 2, 2 ,false}, 242 { ARM::VLD2DUPq8EvenPseudo, ARM::VLD2DUPd8x2, true, false, false, EvenDblSpc, 2, 8 ,false}, 243 { ARM::VLD2DUPq8OddPseudo, ARM::VLD2DUPd8x2, true, false, false, OddDblSpc, 2, 8 ,false}, 244 { ARM::VLD2DUPq8OddPseudoWB_fixed, ARM::VLD2DUPd8x2wb_fixed, true, true, false, OddDblSpc, 2, 8 ,false}, 245 { ARM::VLD2DUPq8OddPseudoWB_register, ARM::VLD2DUPd8x2wb_register, true, true, true, OddDblSpc, 2, 8 ,false}, 246 247 { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, 248 { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, 249 { ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true}, 250 { ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true}, 251 { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true}, 252 { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true}, 253 { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true}, 254 { ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true}, 255 { ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true}, 256 { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true}, 257 258 { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false}, 259 { ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, 260 { ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false}, 261 { ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false}, 262 { ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false}, 263 { ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false}, 264 { ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false}, 265 { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false}, 266 { ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false}, 267 268 { ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true}, 269 { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true}, 270 { ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true}, 271 { ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true}, 272 { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true}, 273 { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true}, 274 { ARM::VLD3DUPq16EvenPseudo, ARM::VLD3DUPq16, true, false, false, EvenDblSpc, 3, 4 ,true}, 275 { ARM::VLD3DUPq16OddPseudo, ARM::VLD3DUPq16, true, false, false, OddDblSpc, 3, 4 ,true}, 276 { ARM::VLD3DUPq16OddPseudo_UPD, ARM::VLD3DUPq16_UPD, true, true, true, OddDblSpc, 3, 4 ,true}, 277 { ARM::VLD3DUPq32EvenPseudo, ARM::VLD3DUPq32, true, false, false, EvenDblSpc, 3, 2 ,true}, 278 { ARM::VLD3DUPq32OddPseudo, ARM::VLD3DUPq32, true, false, false, OddDblSpc, 3, 2 ,true}, 279 { ARM::VLD3DUPq32OddPseudo_UPD, ARM::VLD3DUPq32_UPD, true, true, true, OddDblSpc, 3, 2 ,true}, 280 { ARM::VLD3DUPq8EvenPseudo, ARM::VLD3DUPq8, true, false, false, EvenDblSpc, 3, 8 ,true}, 281 { ARM::VLD3DUPq8OddPseudo, ARM::VLD3DUPq8, true, false, false, OddDblSpc, 3, 8 ,true}, 282 { ARM::VLD3DUPq8OddPseudo_UPD, ARM::VLD3DUPq8_UPD, true, true, true, OddDblSpc, 3, 8 ,true}, 283 284 { ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true}, 285 { ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, 286 { ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true}, 287 { ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, 288 { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true}, 289 { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, 290 { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true}, 291 { ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, 292 { ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true}, 293 { ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, 294 295 { ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true}, 296 { ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, 297 { ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true}, 298 { ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, 299 { ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true}, 300 { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, 301 302 { ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, 303 { ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true}, 304 { ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true}, 305 { ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, 306 { ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true}, 307 { ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true}, 308 { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true}, 309 { ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true}, 310 { ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true}, 311 312 { ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true}, 313 { ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true}, 314 { ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true}, 315 { ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true}, 316 { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true}, 317 { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true}, 318 { ARM::VLD4DUPq16EvenPseudo, ARM::VLD4DUPq16, true, false, false, EvenDblSpc, 4, 4 ,true}, 319 { ARM::VLD4DUPq16OddPseudo, ARM::VLD4DUPq16, true, false, false, OddDblSpc, 4, 4 ,true}, 320 { ARM::VLD4DUPq16OddPseudo_UPD, ARM::VLD4DUPq16_UPD, true, true, true, OddDblSpc, 4, 4 ,true}, 321 { ARM::VLD4DUPq32EvenPseudo, ARM::VLD4DUPq32, true, false, false, EvenDblSpc, 4, 2 ,true}, 322 { ARM::VLD4DUPq32OddPseudo, ARM::VLD4DUPq32, true, false, false, OddDblSpc, 4, 2 ,true}, 323 { ARM::VLD4DUPq32OddPseudo_UPD, ARM::VLD4DUPq32_UPD, true, true, true, OddDblSpc, 4, 2 ,true}, 324 { ARM::VLD4DUPq8EvenPseudo, ARM::VLD4DUPq8, true, false, false, EvenDblSpc, 4, 8 ,true}, 325 { ARM::VLD4DUPq8OddPseudo, ARM::VLD4DUPq8, true, false, false, OddDblSpc, 4, 8 ,true}, 326 { ARM::VLD4DUPq8OddPseudo_UPD, ARM::VLD4DUPq8_UPD, true, true, true, OddDblSpc, 4, 8 ,true}, 327 328 { ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true}, 329 { ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, 330 { ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true}, 331 { ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, 332 { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true}, 333 { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, 334 { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true}, 335 { ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, 336 { ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true}, 337 { ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, 338 339 { ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true}, 340 { ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, 341 { ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true}, 342 { ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, 343 { ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true}, 344 { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, 345 346 { ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, 347 { ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true}, 348 { ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true}, 349 { ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, 350 { ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true}, 351 { ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true}, 352 { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true}, 353 { ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true}, 354 { ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true}, 355 356 { ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true}, 357 { ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true}, 358 { ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true}, 359 { ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true}, 360 { ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true}, 361 { ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true}, 362 363 { ARM::VST1d16QPseudo, ARM::VST1d16Q, false, false, false, SingleSpc, 4, 4 ,false}, 364 { ARM::VST1d16QPseudoWB_fixed, ARM::VST1d16Qwb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, 365 { ARM::VST1d16QPseudoWB_register, ARM::VST1d16Qwb_register, false, true, true, SingleSpc, 4, 4 ,false}, 366 { ARM::VST1d16TPseudo, ARM::VST1d16T, false, false, false, SingleSpc, 3, 4 ,false}, 367 { ARM::VST1d16TPseudoWB_fixed, ARM::VST1d16Twb_fixed, false, true, false, SingleSpc, 3, 4 ,false}, 368 { ARM::VST1d16TPseudoWB_register, ARM::VST1d16Twb_register, false, true, true, SingleSpc, 3, 4 ,false}, 369 370 { ARM::VST1d32QPseudo, ARM::VST1d32Q, false, false, false, SingleSpc, 4, 2 ,false}, 371 { ARM::VST1d32QPseudoWB_fixed, ARM::VST1d32Qwb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, 372 { ARM::VST1d32QPseudoWB_register, ARM::VST1d32Qwb_register, false, true, true, SingleSpc, 4, 2 ,false}, 373 { ARM::VST1d32TPseudo, ARM::VST1d32T, false, false, false, SingleSpc, 3, 2 ,false}, 374 { ARM::VST1d32TPseudoWB_fixed, ARM::VST1d32Twb_fixed, false, true, false, SingleSpc, 3, 2 ,false}, 375 { ARM::VST1d32TPseudoWB_register, ARM::VST1d32Twb_register, false, true, true, SingleSpc, 3, 2 ,false}, 376 377 { ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false}, 378 { ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false}, 379 { ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false}, 380 { ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false}, 381 { ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false}, 382 { ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false}, 383 384 { ARM::VST1d8QPseudo, ARM::VST1d8Q, false, false, false, SingleSpc, 4, 8 ,false}, 385 { ARM::VST1d8QPseudoWB_fixed, ARM::VST1d8Qwb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, 386 { ARM::VST1d8QPseudoWB_register, ARM::VST1d8Qwb_register, false, true, true, SingleSpc, 4, 8 ,false}, 387 { ARM::VST1d8TPseudo, ARM::VST1d8T, false, false, false, SingleSpc, 3, 8 ,false}, 388 { ARM::VST1d8TPseudoWB_fixed, ARM::VST1d8Twb_fixed, false, true, false, SingleSpc, 3, 8 ,false}, 389 { ARM::VST1d8TPseudoWB_register, ARM::VST1d8Twb_register, false, true, true, SingleSpc, 3, 8 ,false}, 390 391 { ARM::VST1q16HighQPseudo, ARM::VST1d16Q, false, false, false, SingleHighQSpc, 4, 4 ,false}, 392 { ARM::VST1q16HighQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 393 { ARM::VST1q16HighTPseudo, ARM::VST1d16T, false, false, false, SingleHighTSpc, 3, 4 ,false}, 394 { ARM::VST1q16HighTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleHighTSpc, 3, 4 ,false}, 395 { ARM::VST1q16LowQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleLowSpc, 4, 4 ,false}, 396 { ARM::VST1q16LowTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleLowSpc, 3, 4 ,false}, 397 398 { ARM::VST1q32HighQPseudo, ARM::VST1d32Q, false, false, false, SingleHighQSpc, 4, 2 ,false}, 399 { ARM::VST1q32HighQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 400 { ARM::VST1q32HighTPseudo, ARM::VST1d32T, false, false, false, SingleHighTSpc, 3, 2 ,false}, 401 { ARM::VST1q32HighTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleHighTSpc, 3, 2 ,false}, 402 { ARM::VST1q32LowQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleLowSpc, 4, 2 ,false}, 403 { ARM::VST1q32LowTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleLowSpc, 3, 2 ,false}, 404 405 { ARM::VST1q64HighQPseudo, ARM::VST1d64Q, false, false, false, SingleHighQSpc, 4, 1 ,false}, 406 { ARM::VST1q64HighQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 407 { ARM::VST1q64HighTPseudo, ARM::VST1d64T, false, false, false, SingleHighTSpc, 3, 1 ,false}, 408 { ARM::VST1q64HighTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleHighTSpc, 3, 1 ,false}, 409 { ARM::VST1q64LowQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleLowSpc, 4, 1 ,false}, 410 { ARM::VST1q64LowTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleLowSpc, 3, 1 ,false}, 411 412 { ARM::VST1q8HighQPseudo, ARM::VST1d8Q, false, false, false, SingleHighQSpc, 4, 8 ,false}, 413 { ARM::VST1q8HighQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 414 { ARM::VST1q8HighTPseudo, ARM::VST1d8T, false, false, false, SingleHighTSpc, 3, 8 ,false}, 415 { ARM::VST1q8HighTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleHighTSpc, 3, 8 ,false}, 416 { ARM::VST1q8LowQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleLowSpc, 4, 8 ,false}, 417 { ARM::VST1q8LowTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleLowSpc, 3, 8 ,false}, 418 419 { ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true}, 420 { ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, 421 { ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true}, 422 { ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true}, 423 { ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true}, 424 { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true}, 425 { ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true}, 426 { ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true}, 427 { ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true}, 428 { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, 429 430 { ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false}, 431 { ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, 432 { ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false}, 433 { ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false}, 434 { ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, 435 { ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false}, 436 { ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false}, 437 { ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, 438 { ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false}, 439 440 { ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true}, 441 { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, 442 { ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true}, 443 { ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, 444 { ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true}, 445 { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, 446 { ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true}, 447 { ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true}, 448 { ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true}, 449 { ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true}, 450 451 { ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true}, 452 { ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, 453 { ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true}, 454 { ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, 455 { ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true}, 456 { ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, 457 458 { ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true}, 459 { ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true}, 460 { ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true}, 461 { ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true}, 462 { ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true}, 463 { ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true}, 464 { ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true}, 465 { ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true}, 466 { ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true}, 467 468 { ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true}, 469 { ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, 470 { ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true}, 471 { ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, 472 { ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true}, 473 { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, 474 { ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true}, 475 { ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true}, 476 { ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true}, 477 { ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true}, 478 479 { ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true}, 480 { ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, 481 { ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true}, 482 { ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, 483 { ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true}, 484 { ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, 485 486 { ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true}, 487 { ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true}, 488 { ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true}, 489 { ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true}, 490 { ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true}, 491 { ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true}, 492 { ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true}, 493 { ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true}, 494 { ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true} 495 }; 496 497 /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON 498 /// load or store pseudo instruction. 499 static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { 500 #ifndef NDEBUG 501 // Make sure the table is sorted. 502 static std::atomic<bool> TableChecked(false); 503 if (!TableChecked.load(std::memory_order_relaxed)) { 504 assert(llvm::is_sorted(NEONLdStTable) && "NEONLdStTable is not sorted!"); 505 TableChecked.store(true, std::memory_order_relaxed); 506 } 507 #endif 508 509 auto I = llvm::lower_bound(NEONLdStTable, Opcode); 510 if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode) 511 return I; 512 return nullptr; 513 } 514 515 /// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register, 516 /// corresponding to the specified register spacing. Not all of the results 517 /// are necessarily valid, e.g., a Q register only has 2 D subregisters. 518 static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc, 519 const TargetRegisterInfo *TRI, unsigned &D0, 520 unsigned &D1, unsigned &D2, unsigned &D3) { 521 if (RegSpc == SingleSpc || RegSpc == SingleLowSpc) { 522 D0 = TRI->getSubReg(Reg, ARM::dsub_0); 523 D1 = TRI->getSubReg(Reg, ARM::dsub_1); 524 D2 = TRI->getSubReg(Reg, ARM::dsub_2); 525 D3 = TRI->getSubReg(Reg, ARM::dsub_3); 526 } else if (RegSpc == SingleHighQSpc) { 527 D0 = TRI->getSubReg(Reg, ARM::dsub_4); 528 D1 = TRI->getSubReg(Reg, ARM::dsub_5); 529 D2 = TRI->getSubReg(Reg, ARM::dsub_6); 530 D3 = TRI->getSubReg(Reg, ARM::dsub_7); 531 } else if (RegSpc == SingleHighTSpc) { 532 D0 = TRI->getSubReg(Reg, ARM::dsub_3); 533 D1 = TRI->getSubReg(Reg, ARM::dsub_4); 534 D2 = TRI->getSubReg(Reg, ARM::dsub_5); 535 D3 = TRI->getSubReg(Reg, ARM::dsub_6); 536 } else if (RegSpc == EvenDblSpc) { 537 D0 = TRI->getSubReg(Reg, ARM::dsub_0); 538 D1 = TRI->getSubReg(Reg, ARM::dsub_2); 539 D2 = TRI->getSubReg(Reg, ARM::dsub_4); 540 D3 = TRI->getSubReg(Reg, ARM::dsub_6); 541 } else { 542 assert(RegSpc == OddDblSpc && "unknown register spacing"); 543 D0 = TRI->getSubReg(Reg, ARM::dsub_1); 544 D1 = TRI->getSubReg(Reg, ARM::dsub_3); 545 D2 = TRI->getSubReg(Reg, ARM::dsub_5); 546 D3 = TRI->getSubReg(Reg, ARM::dsub_7); 547 } 548 } 549 550 /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register 551 /// operands to real VLD instructions with D register operands. 552 void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { 553 MachineInstr &MI = *MBBI; 554 MachineBasicBlock &MBB = *MI.getParent(); 555 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 556 557 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); 558 assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed"); 559 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; 560 unsigned NumRegs = TableEntry->NumRegs; 561 562 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 563 TII->get(TableEntry->RealOpc)); 564 unsigned OpIdx = 0; 565 566 bool DstIsDead = MI.getOperand(OpIdx).isDead(); 567 Register DstReg = MI.getOperand(OpIdx++).getReg(); 568 569 bool IsVLD2DUP = TableEntry->RealOpc == ARM::VLD2DUPd8x2 || 570 TableEntry->RealOpc == ARM::VLD2DUPd16x2 || 571 TableEntry->RealOpc == ARM::VLD2DUPd32x2 || 572 TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_fixed || 573 TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_fixed || 574 TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_fixed || 575 TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_register || 576 TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_register || 577 TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_register; 578 579 if (IsVLD2DUP) { 580 unsigned SubRegIndex; 581 if (RegSpc == EvenDblSpc) { 582 SubRegIndex = ARM::dsub_0; 583 } else { 584 assert(RegSpc == OddDblSpc && "Unexpected spacing!"); 585 SubRegIndex = ARM::dsub_1; 586 } 587 Register SubReg = TRI->getSubReg(DstReg, SubRegIndex); 588 unsigned DstRegPair = TRI->getMatchingSuperReg(SubReg, ARM::dsub_0, 589 &ARM::DPairSpcRegClass); 590 MIB.addReg(DstRegPair, RegState::Define | getDeadRegState(DstIsDead)); 591 } else { 592 unsigned D0, D1, D2, D3; 593 GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); 594 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); 595 if (NumRegs > 1 && TableEntry->copyAllListRegs) 596 MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); 597 if (NumRegs > 2 && TableEntry->copyAllListRegs) 598 MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); 599 if (NumRegs > 3 && TableEntry->copyAllListRegs) 600 MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); 601 } 602 603 if (TableEntry->isUpdating) 604 MIB.add(MI.getOperand(OpIdx++)); 605 606 // Copy the addrmode6 operands. 607 MIB.add(MI.getOperand(OpIdx++)); 608 MIB.add(MI.getOperand(OpIdx++)); 609 610 // Copy the am6offset operand. 611 if (TableEntry->hasWritebackOperand) { 612 // TODO: The writing-back pseudo instructions we translate here are all 613 // defined to take am6offset nodes that are capable to represent both fixed 614 // and register forms. Some real instructions, however, do not rely on 615 // am6offset and have separate definitions for such forms. When this is the 616 // case, fixed forms do not take any offset nodes, so here we skip them for 617 // such instructions. Once all real and pseudo writing-back instructions are 618 // rewritten without use of am6offset nodes, this code will go away. 619 const MachineOperand &AM6Offset = MI.getOperand(OpIdx++); 620 if (TableEntry->RealOpc == ARM::VLD1d8Qwb_fixed || 621 TableEntry->RealOpc == ARM::VLD1d16Qwb_fixed || 622 TableEntry->RealOpc == ARM::VLD1d32Qwb_fixed || 623 TableEntry->RealOpc == ARM::VLD1d64Qwb_fixed || 624 TableEntry->RealOpc == ARM::VLD1d8Twb_fixed || 625 TableEntry->RealOpc == ARM::VLD1d16Twb_fixed || 626 TableEntry->RealOpc == ARM::VLD1d32Twb_fixed || 627 TableEntry->RealOpc == ARM::VLD1d64Twb_fixed || 628 TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_fixed || 629 TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_fixed || 630 TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_fixed) { 631 assert(AM6Offset.getReg() == 0 && 632 "A fixed writing-back pseudo instruction provides an offset " 633 "register!"); 634 } else { 635 MIB.add(AM6Offset); 636 } 637 } 638 639 // For an instruction writing double-spaced subregs, the pseudo instruction 640 // has an extra operand that is a use of the super-register. Record the 641 // operand index and skip over it. 642 unsigned SrcOpIdx = 0; 643 if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc || RegSpc == SingleLowSpc || 644 RegSpc == SingleHighQSpc || RegSpc == SingleHighTSpc) 645 SrcOpIdx = OpIdx++; 646 647 // Copy the predicate operands. 648 MIB.add(MI.getOperand(OpIdx++)); 649 MIB.add(MI.getOperand(OpIdx++)); 650 651 // Copy the super-register source operand used for double-spaced subregs over 652 // to the new instruction as an implicit operand. 653 if (SrcOpIdx != 0) { 654 MachineOperand MO = MI.getOperand(SrcOpIdx); 655 MO.setImplicit(true); 656 MIB.add(MO); 657 } 658 // Add an implicit def for the super-register. 659 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); 660 MIB.copyImplicitOps(MI); 661 662 // Transfer memoperands. 663 MIB.cloneMemRefs(MI); 664 MI.eraseFromParent(); 665 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump();); 666 } 667 668 /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register 669 /// operands to real VST instructions with D register operands. 670 void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { 671 MachineInstr &MI = *MBBI; 672 MachineBasicBlock &MBB = *MI.getParent(); 673 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 674 675 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); 676 assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed"); 677 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; 678 unsigned NumRegs = TableEntry->NumRegs; 679 680 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 681 TII->get(TableEntry->RealOpc)); 682 unsigned OpIdx = 0; 683 if (TableEntry->isUpdating) 684 MIB.add(MI.getOperand(OpIdx++)); 685 686 // Copy the addrmode6 operands. 687 MIB.add(MI.getOperand(OpIdx++)); 688 MIB.add(MI.getOperand(OpIdx++)); 689 690 if (TableEntry->hasWritebackOperand) { 691 // TODO: The writing-back pseudo instructions we translate here are all 692 // defined to take am6offset nodes that are capable to represent both fixed 693 // and register forms. Some real instructions, however, do not rely on 694 // am6offset and have separate definitions for such forms. When this is the 695 // case, fixed forms do not take any offset nodes, so here we skip them for 696 // such instructions. Once all real and pseudo writing-back instructions are 697 // rewritten without use of am6offset nodes, this code will go away. 698 const MachineOperand &AM6Offset = MI.getOperand(OpIdx++); 699 if (TableEntry->RealOpc == ARM::VST1d8Qwb_fixed || 700 TableEntry->RealOpc == ARM::VST1d16Qwb_fixed || 701 TableEntry->RealOpc == ARM::VST1d32Qwb_fixed || 702 TableEntry->RealOpc == ARM::VST1d64Qwb_fixed || 703 TableEntry->RealOpc == ARM::VST1d8Twb_fixed || 704 TableEntry->RealOpc == ARM::VST1d16Twb_fixed || 705 TableEntry->RealOpc == ARM::VST1d32Twb_fixed || 706 TableEntry->RealOpc == ARM::VST1d64Twb_fixed) { 707 assert(AM6Offset.getReg() == 0 && 708 "A fixed writing-back pseudo instruction provides an offset " 709 "register!"); 710 } else { 711 MIB.add(AM6Offset); 712 } 713 } 714 715 bool SrcIsKill = MI.getOperand(OpIdx).isKill(); 716 bool SrcIsUndef = MI.getOperand(OpIdx).isUndef(); 717 Register SrcReg = MI.getOperand(OpIdx++).getReg(); 718 unsigned D0, D1, D2, D3; 719 GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); 720 MIB.addReg(D0, getUndefRegState(SrcIsUndef)); 721 if (NumRegs > 1 && TableEntry->copyAllListRegs) 722 MIB.addReg(D1, getUndefRegState(SrcIsUndef)); 723 if (NumRegs > 2 && TableEntry->copyAllListRegs) 724 MIB.addReg(D2, getUndefRegState(SrcIsUndef)); 725 if (NumRegs > 3 && TableEntry->copyAllListRegs) 726 MIB.addReg(D3, getUndefRegState(SrcIsUndef)); 727 728 // Copy the predicate operands. 729 MIB.add(MI.getOperand(OpIdx++)); 730 MIB.add(MI.getOperand(OpIdx++)); 731 732 if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg. 733 MIB->addRegisterKilled(SrcReg, TRI, true); 734 else if (!SrcIsUndef) 735 MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg. 736 MIB.copyImplicitOps(MI); 737 738 // Transfer memoperands. 739 MIB.cloneMemRefs(MI); 740 MI.eraseFromParent(); 741 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump();); 742 } 743 744 /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ 745 /// register operands to real instructions with D register operands. 746 void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { 747 MachineInstr &MI = *MBBI; 748 MachineBasicBlock &MBB = *MI.getParent(); 749 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 750 751 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); 752 assert(TableEntry && "NEONLdStTable lookup failed"); 753 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; 754 unsigned NumRegs = TableEntry->NumRegs; 755 unsigned RegElts = TableEntry->RegElts; 756 757 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 758 TII->get(TableEntry->RealOpc)); 759 unsigned OpIdx = 0; 760 // The lane operand is always the 3rd from last operand, before the 2 761 // predicate operands. 762 unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm(); 763 764 // Adjust the lane and spacing as needed for Q registers. 765 assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane"); 766 if (RegSpc == EvenDblSpc && Lane >= RegElts) { 767 RegSpc = OddDblSpc; 768 Lane -= RegElts; 769 } 770 assert(Lane < RegElts && "out of range lane for VLD/VST-lane"); 771 772 unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0; 773 unsigned DstReg = 0; 774 bool DstIsDead = false; 775 if (TableEntry->IsLoad) { 776 DstIsDead = MI.getOperand(OpIdx).isDead(); 777 DstReg = MI.getOperand(OpIdx++).getReg(); 778 GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); 779 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); 780 if (NumRegs > 1) 781 MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); 782 if (NumRegs > 2) 783 MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); 784 if (NumRegs > 3) 785 MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); 786 } 787 788 if (TableEntry->isUpdating) 789 MIB.add(MI.getOperand(OpIdx++)); 790 791 // Copy the addrmode6 operands. 792 MIB.add(MI.getOperand(OpIdx++)); 793 MIB.add(MI.getOperand(OpIdx++)); 794 // Copy the am6offset operand. 795 if (TableEntry->hasWritebackOperand) 796 MIB.add(MI.getOperand(OpIdx++)); 797 798 // Grab the super-register source. 799 MachineOperand MO = MI.getOperand(OpIdx++); 800 if (!TableEntry->IsLoad) 801 GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3); 802 803 // Add the subregs as sources of the new instruction. 804 unsigned SrcFlags = (getUndefRegState(MO.isUndef()) | 805 getKillRegState(MO.isKill())); 806 MIB.addReg(D0, SrcFlags); 807 if (NumRegs > 1) 808 MIB.addReg(D1, SrcFlags); 809 if (NumRegs > 2) 810 MIB.addReg(D2, SrcFlags); 811 if (NumRegs > 3) 812 MIB.addReg(D3, SrcFlags); 813 814 // Add the lane number operand. 815 MIB.addImm(Lane); 816 OpIdx += 1; 817 818 // Copy the predicate operands. 819 MIB.add(MI.getOperand(OpIdx++)); 820 MIB.add(MI.getOperand(OpIdx++)); 821 822 // Copy the super-register source to be an implicit source. 823 MO.setImplicit(true); 824 MIB.add(MO); 825 if (TableEntry->IsLoad) 826 // Add an implicit def for the super-register. 827 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); 828 MIB.copyImplicitOps(MI); 829 // Transfer memoperands. 830 MIB.cloneMemRefs(MI); 831 MI.eraseFromParent(); 832 } 833 834 /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ 835 /// register operands to real instructions with D register operands. 836 void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, 837 unsigned Opc, bool IsExt) { 838 MachineInstr &MI = *MBBI; 839 MachineBasicBlock &MBB = *MI.getParent(); 840 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 841 842 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); 843 unsigned OpIdx = 0; 844 845 // Transfer the destination register operand. 846 MIB.add(MI.getOperand(OpIdx++)); 847 if (IsExt) { 848 MachineOperand VdSrc(MI.getOperand(OpIdx++)); 849 MIB.add(VdSrc); 850 } 851 852 bool SrcIsKill = MI.getOperand(OpIdx).isKill(); 853 Register SrcReg = MI.getOperand(OpIdx++).getReg(); 854 unsigned D0, D1, D2, D3; 855 GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3); 856 MIB.addReg(D0); 857 858 // Copy the other source register operand. 859 MachineOperand VmSrc(MI.getOperand(OpIdx++)); 860 MIB.add(VmSrc); 861 862 // Copy the predicate operands. 863 MIB.add(MI.getOperand(OpIdx++)); 864 MIB.add(MI.getOperand(OpIdx++)); 865 866 // Add an implicit kill and use for the super-reg. 867 MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill)); 868 MIB.copyImplicitOps(MI); 869 MI.eraseFromParent(); 870 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump();); 871 } 872 873 void ARMExpandPseudo::ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI) { 874 MachineInstr &MI = *MBBI; 875 MachineBasicBlock &MBB = *MI.getParent(); 876 unsigned NewOpc = 877 MI.getOpcode() == ARM::MQQPRStore || MI.getOpcode() == ARM::MQQQQPRStore 878 ? ARM::VSTMDIA 879 : ARM::VLDMDIA; 880 MachineInstrBuilder MIB = 881 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); 882 883 unsigned Flags = getKillRegState(MI.getOperand(0).isKill()) | 884 getDefRegState(MI.getOperand(0).isDef()); 885 Register SrcReg = MI.getOperand(0).getReg(); 886 887 // Copy the destination register. 888 MIB.add(MI.getOperand(1)); 889 MIB.add(predOps(ARMCC::AL)); 890 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_0), Flags); 891 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_1), Flags); 892 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_2), Flags); 893 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_3), Flags); 894 if (MI.getOpcode() == ARM::MQQQQPRStore || 895 MI.getOpcode() == ARM::MQQQQPRLoad) { 896 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_4), Flags); 897 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_5), Flags); 898 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_6), Flags); 899 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_7), Flags); 900 } 901 902 if (NewOpc == ARM::VSTMDIA) 903 MIB.addReg(SrcReg, RegState::Implicit); 904 905 MIB.copyImplicitOps(MI); 906 MIB.cloneMemRefs(MI); 907 MI.eraseFromParent(); 908 } 909 910 static bool IsAnAddressOperand(const MachineOperand &MO) { 911 // This check is overly conservative. Unless we are certain that the machine 912 // operand is not a symbol reference, we return that it is a symbol reference. 913 // This is important as the load pair may not be split up Windows. 914 switch (MO.getType()) { 915 case MachineOperand::MO_Register: 916 case MachineOperand::MO_Immediate: 917 case MachineOperand::MO_CImmediate: 918 case MachineOperand::MO_FPImmediate: 919 case MachineOperand::MO_ShuffleMask: 920 return false; 921 case MachineOperand::MO_MachineBasicBlock: 922 return true; 923 case MachineOperand::MO_FrameIndex: 924 return false; 925 case MachineOperand::MO_ConstantPoolIndex: 926 case MachineOperand::MO_TargetIndex: 927 case MachineOperand::MO_JumpTableIndex: 928 case MachineOperand::MO_ExternalSymbol: 929 case MachineOperand::MO_GlobalAddress: 930 case MachineOperand::MO_BlockAddress: 931 return true; 932 case MachineOperand::MO_RegisterMask: 933 case MachineOperand::MO_RegisterLiveOut: 934 return false; 935 case MachineOperand::MO_Metadata: 936 case MachineOperand::MO_MCSymbol: 937 return true; 938 case MachineOperand::MO_DbgInstrRef: 939 case MachineOperand::MO_CFIIndex: 940 return false; 941 case MachineOperand::MO_IntrinsicID: 942 case MachineOperand::MO_Predicate: 943 llvm_unreachable("should not exist post-isel"); 944 } 945 llvm_unreachable("unhandled machine operand type"); 946 } 947 948 static MachineOperand makeImplicit(const MachineOperand &MO) { 949 MachineOperand NewMO = MO; 950 NewMO.setImplicit(); 951 return NewMO; 952 } 953 954 static MachineOperand getMovOperand(const MachineOperand &MO, 955 unsigned TargetFlag) { 956 unsigned TF = MO.getTargetFlags() | TargetFlag; 957 switch (MO.getType()) { 958 case MachineOperand::MO_Immediate: { 959 unsigned Imm = MO.getImm(); 960 switch (TargetFlag) { 961 case ARMII::MO_HI_8_15: 962 Imm = (Imm >> 24) & 0xff; 963 break; 964 case ARMII::MO_HI_0_7: 965 Imm = (Imm >> 16) & 0xff; 966 break; 967 case ARMII::MO_LO_8_15: 968 Imm = (Imm >> 8) & 0xff; 969 break; 970 case ARMII::MO_LO_0_7: 971 Imm = Imm & 0xff; 972 break; 973 case ARMII::MO_HI16: 974 Imm = (Imm >> 16) & 0xffff; 975 break; 976 case ARMII::MO_LO16: 977 Imm = Imm & 0xffff; 978 break; 979 default: 980 llvm_unreachable("Only HI/LO target flags are expected"); 981 } 982 return MachineOperand::CreateImm(Imm); 983 } 984 case MachineOperand::MO_ExternalSymbol: 985 return MachineOperand::CreateES(MO.getSymbolName(), TF); 986 case MachineOperand::MO_JumpTableIndex: 987 return MachineOperand::CreateJTI(MO.getIndex(), TF); 988 default: 989 return MachineOperand::CreateGA(MO.getGlobal(), MO.getOffset(), TF); 990 } 991 } 992 993 void ARMExpandPseudo::ExpandTMOV32BitImm(MachineBasicBlock &MBB, 994 MachineBasicBlock::iterator &MBBI) { 995 MachineInstr &MI = *MBBI; 996 Register DstReg = MI.getOperand(0).getReg(); 997 bool DstIsDead = MI.getOperand(0).isDead(); 998 const MachineOperand &MO = MI.getOperand(1); 999 unsigned MIFlags = MI.getFlags(); 1000 1001 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 1002 1003 // Expand the mov into a sequence of mov/add+lsl of the individual bytes. We 1004 // want to avoid emitting any zero bytes, as they won't change the result, and 1005 // also don't want any pointless shifts, so instead of immediately emitting 1006 // the shift for a byte we keep track of how much we will need to shift and do 1007 // it before the next nonzero byte. 1008 unsigned PendingShift = 0; 1009 for (unsigned Byte = 0; Byte < 4; ++Byte) { 1010 unsigned Flag = Byte == 0 ? ARMII::MO_HI_8_15 1011 : Byte == 1 ? ARMII::MO_HI_0_7 1012 : Byte == 2 ? ARMII::MO_LO_8_15 1013 : ARMII::MO_LO_0_7; 1014 MachineOperand Operand = getMovOperand(MO, Flag); 1015 bool ZeroImm = Operand.isImm() && Operand.getImm() == 0; 1016 unsigned Op = PendingShift ? ARM::tADDi8 : ARM::tMOVi8; 1017 1018 // Emit the pending shift if we're going to emit this byte or if we've 1019 // reached the end. 1020 if (PendingShift && (!ZeroImm || Byte == 3)) { 1021 MachineInstr *Lsl = 1022 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tLSLri), DstReg) 1023 .add(t1CondCodeOp(true)) 1024 .addReg(DstReg) 1025 .addImm(PendingShift) 1026 .add(predOps(ARMCC::AL)) 1027 .setMIFlags(MIFlags); 1028 (void)Lsl; 1029 LLVM_DEBUG(dbgs() << "And: "; Lsl->dump();); 1030 PendingShift = 0; 1031 } 1032 1033 // Emit this byte if it's nonzero. 1034 if (!ZeroImm) { 1035 MachineInstrBuilder MIB = 1036 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Op), DstReg) 1037 .add(t1CondCodeOp(true)); 1038 if (Op == ARM::tADDi8) 1039 MIB.addReg(DstReg); 1040 MIB.add(Operand); 1041 MIB.add(predOps(ARMCC::AL)); 1042 MIB.setMIFlags(MIFlags); 1043 LLVM_DEBUG(dbgs() << (Op == ARM::tMOVi8 ? "To: " : "And:") << " "; 1044 MIB.getInstr()->dump();); 1045 } 1046 1047 // Don't accumulate the shift value if we've not yet seen a nonzero byte. 1048 if (PendingShift || !ZeroImm) 1049 PendingShift += 8; 1050 } 1051 1052 // The dest is dead on the last instruction we emitted if it was dead on the 1053 // original instruction. 1054 (--MBBI)->getOperand(0).setIsDead(DstIsDead); 1055 1056 MI.eraseFromParent(); 1057 } 1058 1059 void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, 1060 MachineBasicBlock::iterator &MBBI) { 1061 MachineInstr &MI = *MBBI; 1062 unsigned Opcode = MI.getOpcode(); 1063 Register PredReg; 1064 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1065 Register DstReg = MI.getOperand(0).getReg(); 1066 bool DstIsDead = MI.getOperand(0).isDead(); 1067 bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; 1068 const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); 1069 bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO); 1070 MachineInstrBuilder LO16, HI16; 1071 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 1072 1073 if (!STI->hasV6T2Ops() && 1074 (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) { 1075 // FIXME Windows CE supports older ARM CPUs 1076 assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+"); 1077 1078 assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!"); 1079 unsigned ImmVal = (unsigned)MO.getImm(); 1080 unsigned SOImmValV1 = 0, SOImmValV2 = 0; 1081 1082 if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr. 1083 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); 1084 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) 1085 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 1086 .addReg(DstReg); 1087 SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); 1088 SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); 1089 } else { // Expand into a mvn + sub. 1090 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg); 1091 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri)) 1092 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 1093 .addReg(DstReg); 1094 SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal); 1095 SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal); 1096 SOImmValV1 = ~(-SOImmValV1); 1097 } 1098 1099 unsigned MIFlags = MI.getFlags(); 1100 LO16 = LO16.addImm(SOImmValV1); 1101 HI16 = HI16.addImm(SOImmValV2); 1102 LO16.cloneMemRefs(MI); 1103 HI16.cloneMemRefs(MI); 1104 LO16.setMIFlags(MIFlags); 1105 HI16.setMIFlags(MIFlags); 1106 LO16.addImm(Pred).addReg(PredReg).add(condCodeOp()); 1107 HI16.addImm(Pred).addReg(PredReg).add(condCodeOp()); 1108 if (isCC) 1109 LO16.add(makeImplicit(MI.getOperand(1))); 1110 LO16.copyImplicitOps(MI); 1111 HI16.copyImplicitOps(MI); 1112 MI.eraseFromParent(); 1113 return; 1114 } 1115 1116 unsigned LO16Opc = 0; 1117 unsigned HI16Opc = 0; 1118 unsigned MIFlags = MI.getFlags(); 1119 if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) { 1120 LO16Opc = ARM::t2MOVi16; 1121 HI16Opc = ARM::t2MOVTi16; 1122 } else { 1123 LO16Opc = ARM::MOVi16; 1124 HI16Opc = ARM::MOVTi16; 1125 } 1126 1127 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); 1128 LO16.setMIFlags(MIFlags); 1129 LO16.add(getMovOperand(MO, ARMII::MO_LO16)); 1130 LO16.cloneMemRefs(MI); 1131 LO16.addImm(Pred).addReg(PredReg); 1132 if (isCC) 1133 LO16.add(makeImplicit(MI.getOperand(1))); 1134 LO16.copyImplicitOps(MI); 1135 LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump();); 1136 1137 MachineOperand HIOperand = getMovOperand(MO, ARMII::MO_HI16); 1138 if (!(HIOperand.isImm() && HIOperand.getImm() == 0)) { 1139 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) 1140 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 1141 .addReg(DstReg); 1142 HI16.setMIFlags(MIFlags); 1143 HI16.add(HIOperand); 1144 HI16.cloneMemRefs(MI); 1145 HI16.addImm(Pred).addReg(PredReg); 1146 HI16.copyImplicitOps(MI); 1147 LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump();); 1148 } else { 1149 LO16->getOperand(0).setIsDead(DstIsDead); 1150 } 1151 1152 if (RequiresBundling) 1153 finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator()); 1154 1155 MI.eraseFromParent(); 1156 } 1157 1158 // The size of the area, accessed by that VLSTM/VLLDM 1159 // S0-S31 + FPSCR + 8 more bytes (VPR + pad, or just pad) 1160 static const int CMSE_FP_SAVE_SIZE = 136; 1161 1162 static void determineGPRegsToClear(const MachineInstr &MI, 1163 const std::initializer_list<unsigned> &Regs, 1164 SmallVectorImpl<unsigned> &ClearRegs) { 1165 SmallVector<unsigned, 4> OpRegs; 1166 for (const MachineOperand &Op : MI.operands()) { 1167 if (!Op.isReg() || !Op.isUse()) 1168 continue; 1169 OpRegs.push_back(Op.getReg()); 1170 } 1171 llvm::sort(OpRegs); 1172 1173 std::set_difference(Regs.begin(), Regs.end(), OpRegs.begin(), OpRegs.end(), 1174 std::back_inserter(ClearRegs)); 1175 } 1176 1177 void ARMExpandPseudo::CMSEClearGPRegs( 1178 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 1179 const DebugLoc &DL, const SmallVectorImpl<unsigned> &ClearRegs, 1180 unsigned ClobberReg) { 1181 1182 if (STI->hasV8_1MMainlineOps()) { 1183 // Clear the registers using the CLRM instruction. 1184 MachineInstrBuilder CLRM = 1185 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2CLRM)).add(predOps(ARMCC::AL)); 1186 for (unsigned R : ClearRegs) 1187 CLRM.addReg(R, RegState::Define); 1188 CLRM.addReg(ARM::APSR, RegState::Define); 1189 CLRM.addReg(ARM::CPSR, RegState::Define | RegState::Implicit); 1190 } else { 1191 // Clear the registers and flags by copying ClobberReg into them. 1192 // (Baseline can't do a high register clear in one instruction). 1193 for (unsigned Reg : ClearRegs) { 1194 if (Reg == ClobberReg) 1195 continue; 1196 BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVr), Reg) 1197 .addReg(ClobberReg) 1198 .add(predOps(ARMCC::AL)); 1199 } 1200 1201 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2MSR_M)) 1202 .addImm(STI->hasDSP() ? 0xc00 : 0x800) 1203 .addReg(ClobberReg) 1204 .add(predOps(ARMCC::AL)); 1205 } 1206 } 1207 1208 // Find which FP registers need to be cleared. The parameter `ClearRegs` is 1209 // initialised with all elements set to true, and this function resets all the 1210 // bits, which correspond to register uses. Returns true if any floating point 1211 // register is defined, false otherwise. 1212 static bool determineFPRegsToClear(const MachineInstr &MI, 1213 BitVector &ClearRegs) { 1214 bool DefFP = false; 1215 for (const MachineOperand &Op : MI.operands()) { 1216 if (!Op.isReg()) 1217 continue; 1218 1219 Register Reg = Op.getReg(); 1220 if (Op.isDef()) { 1221 if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) || 1222 (Reg >= ARM::D0 && Reg <= ARM::D15) || 1223 (Reg >= ARM::S0 && Reg <= ARM::S31)) 1224 DefFP = true; 1225 continue; 1226 } 1227 1228 if (Reg >= ARM::Q0 && Reg <= ARM::Q7) { 1229 int R = Reg - ARM::Q0; 1230 ClearRegs.reset(R * 4, (R + 1) * 4); 1231 } else if (Reg >= ARM::D0 && Reg <= ARM::D15) { 1232 int R = Reg - ARM::D0; 1233 ClearRegs.reset(R * 2, (R + 1) * 2); 1234 } else if (Reg >= ARM::S0 && Reg <= ARM::S31) { 1235 ClearRegs[Reg - ARM::S0] = false; 1236 } 1237 } 1238 return DefFP; 1239 } 1240 1241 MachineBasicBlock & 1242 ARMExpandPseudo::CMSEClearFPRegs(MachineBasicBlock &MBB, 1243 MachineBasicBlock::iterator MBBI) { 1244 BitVector ClearRegs(16, true); 1245 (void)determineFPRegsToClear(*MBBI, ClearRegs); 1246 1247 if (STI->hasV8_1MMainlineOps()) 1248 return CMSEClearFPRegsV81(MBB, MBBI, ClearRegs); 1249 else 1250 return CMSEClearFPRegsV8(MBB, MBBI, ClearRegs); 1251 } 1252 1253 // Clear the FP registers for v8.0-M, by copying over the content 1254 // of LR. Uses R12 as a scratch register. 1255 MachineBasicBlock & 1256 ARMExpandPseudo::CMSEClearFPRegsV8(MachineBasicBlock &MBB, 1257 MachineBasicBlock::iterator MBBI, 1258 const BitVector &ClearRegs) { 1259 if (!STI->hasFPRegs()) 1260 return MBB; 1261 1262 auto &RetI = *MBBI; 1263 const DebugLoc &DL = RetI.getDebugLoc(); 1264 1265 // If optimising for minimum size, clear FP registers unconditionally. 1266 // Otherwise, check the CONTROL.SFPA (Secure Floating-Point Active) bit and 1267 // don't clear them if they belong to the non-secure state. 1268 MachineBasicBlock *ClearBB, *DoneBB; 1269 if (STI->hasMinSize()) { 1270 ClearBB = DoneBB = &MBB; 1271 } else { 1272 MachineFunction *MF = MBB.getParent(); 1273 ClearBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1274 DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1275 1276 MF->insert(++MBB.getIterator(), ClearBB); 1277 MF->insert(++ClearBB->getIterator(), DoneBB); 1278 1279 DoneBB->splice(DoneBB->end(), &MBB, MBBI, MBB.end()); 1280 DoneBB->transferSuccessors(&MBB); 1281 MBB.addSuccessor(ClearBB); 1282 MBB.addSuccessor(DoneBB); 1283 ClearBB->addSuccessor(DoneBB); 1284 1285 // At the new basic blocks we need to have live-in the registers, used 1286 // for the return value as well as LR, used to clear registers. 1287 for (const MachineOperand &Op : RetI.operands()) { 1288 if (!Op.isReg()) 1289 continue; 1290 Register Reg = Op.getReg(); 1291 if (Reg == ARM::NoRegister || Reg == ARM::LR) 1292 continue; 1293 assert(Reg.isPhysical() && "Unallocated register"); 1294 ClearBB->addLiveIn(Reg); 1295 DoneBB->addLiveIn(Reg); 1296 } 1297 ClearBB->addLiveIn(ARM::LR); 1298 DoneBB->addLiveIn(ARM::LR); 1299 1300 // Read the CONTROL register. 1301 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2MRS_M), ARM::R12) 1302 .addImm(20) 1303 .add(predOps(ARMCC::AL)); 1304 // Check bit 3 (SFPA). 1305 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2TSTri)) 1306 .addReg(ARM::R12) 1307 .addImm(8) 1308 .add(predOps(ARMCC::AL)); 1309 // If SFPA is clear, jump over ClearBB to DoneBB. 1310 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::tBcc)) 1311 .addMBB(DoneBB) 1312 .addImm(ARMCC::EQ) 1313 .addReg(ARM::CPSR, RegState::Kill); 1314 } 1315 1316 // Emit the clearing sequence 1317 for (unsigned D = 0; D < 8; D++) { 1318 // Attempt to clear as double 1319 if (ClearRegs[D * 2 + 0] && ClearRegs[D * 2 + 1]) { 1320 unsigned Reg = ARM::D0 + D; 1321 BuildMI(ClearBB, DL, TII->get(ARM::VMOVDRR), Reg) 1322 .addReg(ARM::LR) 1323 .addReg(ARM::LR) 1324 .add(predOps(ARMCC::AL)); 1325 } else { 1326 // Clear first part as single 1327 if (ClearRegs[D * 2 + 0]) { 1328 unsigned Reg = ARM::S0 + D * 2; 1329 BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg) 1330 .addReg(ARM::LR) 1331 .add(predOps(ARMCC::AL)); 1332 } 1333 // Clear second part as single 1334 if (ClearRegs[D * 2 + 1]) { 1335 unsigned Reg = ARM::S0 + D * 2 + 1; 1336 BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg) 1337 .addReg(ARM::LR) 1338 .add(predOps(ARMCC::AL)); 1339 } 1340 } 1341 } 1342 1343 // Clear FPSCR bits 0-4, 7, 28-31 1344 // The other bits are program global according to the AAPCS 1345 BuildMI(ClearBB, DL, TII->get(ARM::VMRS), ARM::R12) 1346 .add(predOps(ARMCC::AL)); 1347 BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12) 1348 .addReg(ARM::R12) 1349 .addImm(0x0000009F) 1350 .add(predOps(ARMCC::AL)) 1351 .add(condCodeOp()); 1352 BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12) 1353 .addReg(ARM::R12) 1354 .addImm(0xF0000000) 1355 .add(predOps(ARMCC::AL)) 1356 .add(condCodeOp()); 1357 BuildMI(ClearBB, DL, TII->get(ARM::VMSR)) 1358 .addReg(ARM::R12) 1359 .add(predOps(ARMCC::AL)); 1360 1361 return *DoneBB; 1362 } 1363 1364 MachineBasicBlock & 1365 ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB, 1366 MachineBasicBlock::iterator MBBI, 1367 const BitVector &ClearRegs) { 1368 auto &RetI = *MBBI; 1369 1370 // Emit a sequence of VSCCLRM <sreglist> instructions, one instruction for 1371 // each contiguous sequence of S-registers. 1372 int Start = -1, End = -1; 1373 for (int S = 0, E = ClearRegs.size(); S != E; ++S) { 1374 if (ClearRegs[S] && S == End + 1) { 1375 End = S; // extend range 1376 continue; 1377 } 1378 // Emit current range. 1379 if (Start < End) { 1380 MachineInstrBuilder VSCCLRM = 1381 BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS)) 1382 .add(predOps(ARMCC::AL)); 1383 while (++Start <= End) 1384 VSCCLRM.addReg(ARM::S0 + Start, RegState::Define); 1385 VSCCLRM.addReg(ARM::VPR, RegState::Define); 1386 } 1387 Start = End = S; 1388 } 1389 // Emit last range. 1390 if (Start < End) { 1391 MachineInstrBuilder VSCCLRM = 1392 BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS)) 1393 .add(predOps(ARMCC::AL)); 1394 while (++Start <= End) 1395 VSCCLRM.addReg(ARM::S0 + Start, RegState::Define); 1396 VSCCLRM.addReg(ARM::VPR, RegState::Define); 1397 } 1398 1399 return MBB; 1400 } 1401 1402 void ARMExpandPseudo::CMSESaveClearFPRegs( 1403 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1404 const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) { 1405 if (STI->hasV8_1MMainlineOps()) 1406 CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs); 1407 else if (STI->hasV8MMainlineOps()) 1408 CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs); 1409 } 1410 1411 // Save and clear FP registers if present 1412 void ARMExpandPseudo::CMSESaveClearFPRegsV8( 1413 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1414 const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) { 1415 1416 // Store an available register for FPSCR clearing 1417 assert(!ScratchRegs.empty()); 1418 unsigned SpareReg = ScratchRegs.front(); 1419 1420 // save space on stack for VLSTM 1421 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP) 1422 .addReg(ARM::SP) 1423 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1424 .add(predOps(ARMCC::AL)); 1425 1426 // Use ScratchRegs to store the fp regs 1427 std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs; 1428 std::vector<unsigned> NonclearedFPRegs; 1429 for (const MachineOperand &Op : MBBI->operands()) { 1430 if (Op.isReg() && Op.isUse()) { 1431 Register Reg = Op.getReg(); 1432 assert(!ARM::DPRRegClass.contains(Reg) || 1433 ARM::DPR_VFP2RegClass.contains(Reg)); 1434 assert(!ARM::QPRRegClass.contains(Reg)); 1435 if (ARM::DPR_VFP2RegClass.contains(Reg)) { 1436 if (ScratchRegs.size() >= 2) { 1437 unsigned SaveReg2 = ScratchRegs.pop_back_val(); 1438 unsigned SaveReg1 = ScratchRegs.pop_back_val(); 1439 ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2); 1440 1441 // Save the fp register to the normal registers 1442 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD)) 1443 .addReg(SaveReg1, RegState::Define) 1444 .addReg(SaveReg2, RegState::Define) 1445 .addReg(Reg) 1446 .add(predOps(ARMCC::AL)); 1447 } else { 1448 NonclearedFPRegs.push_back(Reg); 1449 } 1450 } else if (ARM::SPRRegClass.contains(Reg)) { 1451 if (ScratchRegs.size() >= 1) { 1452 unsigned SaveReg = ScratchRegs.pop_back_val(); 1453 ClearedFPRegs.emplace_back(Reg, SaveReg, 0); 1454 1455 // Save the fp register to the normal registers 1456 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg) 1457 .addReg(Reg) 1458 .add(predOps(ARMCC::AL)); 1459 } else { 1460 NonclearedFPRegs.push_back(Reg); 1461 } 1462 } 1463 } 1464 } 1465 1466 bool passesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty()); 1467 1468 if (passesFPReg) 1469 assert(STI->hasFPRegs() && "Subtarget needs fpregs"); 1470 1471 // Lazy store all fp registers to the stack. 1472 // This executes as NOP in the absence of floating-point support. 1473 MachineInstrBuilder VLSTM = 1474 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) 1475 .addReg(ARM::SP) 1476 .add(predOps(ARMCC::AL)) 1477 .addImm(0); // Represents a pseoudo register list, has no effect on 1478 // the encoding. 1479 // Mark non-live registers as undef 1480 for (MachineOperand &MO : VLSTM->implicit_operands()) { 1481 if (MO.isReg() && !MO.isDef()) { 1482 Register Reg = MO.getReg(); 1483 MO.setIsUndef(!LiveRegs.contains(Reg)); 1484 } 1485 } 1486 1487 // Restore all arguments 1488 for (const auto &Regs : ClearedFPRegs) { 1489 unsigned Reg, SaveReg1, SaveReg2; 1490 std::tie(Reg, SaveReg1, SaveReg2) = Regs; 1491 if (ARM::DPR_VFP2RegClass.contains(Reg)) 1492 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg) 1493 .addReg(SaveReg1) 1494 .addReg(SaveReg2) 1495 .add(predOps(ARMCC::AL)); 1496 else if (ARM::SPRRegClass.contains(Reg)) 1497 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg) 1498 .addReg(SaveReg1) 1499 .add(predOps(ARMCC::AL)); 1500 } 1501 1502 for (unsigned Reg : NonclearedFPRegs) { 1503 if (ARM::DPR_VFP2RegClass.contains(Reg)) { 1504 if (STI->isLittle()) { 1505 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRD), Reg) 1506 .addReg(ARM::SP) 1507 .addImm((Reg - ARM::D0) * 2) 1508 .add(predOps(ARMCC::AL)); 1509 } else { 1510 // For big-endian targets we need to load the two subregisters of Reg 1511 // manually because VLDRD would load them in wrong order 1512 unsigned SReg0 = TRI->getSubReg(Reg, ARM::ssub_0); 1513 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0) 1514 .addReg(ARM::SP) 1515 .addImm((Reg - ARM::D0) * 2) 1516 .add(predOps(ARMCC::AL)); 1517 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0 + 1) 1518 .addReg(ARM::SP) 1519 .addImm((Reg - ARM::D0) * 2 + 1) 1520 .add(predOps(ARMCC::AL)); 1521 } 1522 } else if (ARM::SPRRegClass.contains(Reg)) { 1523 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), Reg) 1524 .addReg(ARM::SP) 1525 .addImm(Reg - ARM::S0) 1526 .add(predOps(ARMCC::AL)); 1527 } 1528 } 1529 // restore FPSCR from stack and clear bits 0-4, 7, 28-31 1530 // The other bits are program global according to the AAPCS 1531 if (passesFPReg) { 1532 BuildMI(MBB, MBBI, DL, TII->get(ARM::tLDRspi), SpareReg) 1533 .addReg(ARM::SP) 1534 .addImm(0x10) 1535 .add(predOps(ARMCC::AL)); 1536 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg) 1537 .addReg(SpareReg) 1538 .addImm(0x0000009F) 1539 .add(predOps(ARMCC::AL)) 1540 .add(condCodeOp()); 1541 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg) 1542 .addReg(SpareReg) 1543 .addImm(0xF0000000) 1544 .add(predOps(ARMCC::AL)) 1545 .add(condCodeOp()); 1546 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMSR)) 1547 .addReg(SpareReg) 1548 .add(predOps(ARMCC::AL)); 1549 // The ldr must happen after a floating point instruction. To prevent the 1550 // post-ra scheduler to mess with the order, we create a bundle. 1551 finalizeBundle(MBB, VLSTM->getIterator(), MBBI->getIterator()); 1552 } 1553 } 1554 1555 void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB, 1556 MachineBasicBlock::iterator MBBI, 1557 DebugLoc &DL, 1558 const LivePhysRegs &LiveRegs) { 1559 BitVector ClearRegs(32, true); 1560 bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs); 1561 1562 // If the instruction does not write to a FP register and no elements were 1563 // removed from the set, then no FP registers were used to pass 1564 // arguments/returns. 1565 if (!DefFP && ClearRegs.count() == ClearRegs.size()) { 1566 // save space on stack for VLSTM 1567 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP) 1568 .addReg(ARM::SP) 1569 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1570 .add(predOps(ARMCC::AL)); 1571 1572 // Lazy store all FP registers to the stack 1573 MachineInstrBuilder VLSTM = 1574 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) 1575 .addReg(ARM::SP) 1576 .add(predOps(ARMCC::AL)) 1577 .addImm(0); // Represents a pseoudo register list, has no effect on 1578 // the encoding. 1579 // Mark non-live registers as undef 1580 for (MachineOperand &MO : VLSTM->implicit_operands()) { 1581 if (MO.isReg() && MO.isImplicit() && !MO.isDef()) { 1582 Register Reg = MO.getReg(); 1583 MO.setIsUndef(!LiveRegs.contains(Reg)); 1584 } 1585 } 1586 } else { 1587 // Push all the callee-saved registers (s16-s31). 1588 MachineInstrBuilder VPUSH = 1589 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP) 1590 .addReg(ARM::SP) 1591 .add(predOps(ARMCC::AL)); 1592 for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg) 1593 VPUSH.addReg(Reg); 1594 1595 // Clear FP registers with a VSCCLRM. 1596 (void)CMSEClearFPRegsV81(MBB, MBBI, ClearRegs); 1597 1598 // Save floating-point context. 1599 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTR_FPCXTS_pre), ARM::SP) 1600 .addReg(ARM::SP) 1601 .addImm(-8) 1602 .add(predOps(ARMCC::AL)); 1603 } 1604 } 1605 1606 // Restore FP registers if present 1607 void ARMExpandPseudo::CMSERestoreFPRegs( 1608 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1609 SmallVectorImpl<unsigned> &AvailableRegs) { 1610 if (STI->hasV8_1MMainlineOps()) 1611 CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs); 1612 else if (STI->hasV8MMainlineOps()) 1613 CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs); 1614 } 1615 1616 void ARMExpandPseudo::CMSERestoreFPRegsV8( 1617 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1618 SmallVectorImpl<unsigned> &AvailableRegs) { 1619 1620 // Keep a scratch register for the mitigation sequence. 1621 unsigned ScratchReg = ARM::NoRegister; 1622 if (STI->fixCMSE_CVE_2021_35465()) 1623 ScratchReg = AvailableRegs.pop_back_val(); 1624 1625 // Use AvailableRegs to store the fp regs 1626 std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs; 1627 std::vector<unsigned> NonclearedFPRegs; 1628 for (const MachineOperand &Op : MBBI->operands()) { 1629 if (Op.isReg() && Op.isDef()) { 1630 Register Reg = Op.getReg(); 1631 assert(!ARM::DPRRegClass.contains(Reg) || 1632 ARM::DPR_VFP2RegClass.contains(Reg)); 1633 assert(!ARM::QPRRegClass.contains(Reg)); 1634 if (ARM::DPR_VFP2RegClass.contains(Reg)) { 1635 if (AvailableRegs.size() >= 2) { 1636 unsigned SaveReg2 = AvailableRegs.pop_back_val(); 1637 unsigned SaveReg1 = AvailableRegs.pop_back_val(); 1638 ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2); 1639 1640 // Save the fp register to the normal registers 1641 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD)) 1642 .addReg(SaveReg1, RegState::Define) 1643 .addReg(SaveReg2, RegState::Define) 1644 .addReg(Reg) 1645 .add(predOps(ARMCC::AL)); 1646 } else { 1647 NonclearedFPRegs.push_back(Reg); 1648 } 1649 } else if (ARM::SPRRegClass.contains(Reg)) { 1650 if (AvailableRegs.size() >= 1) { 1651 unsigned SaveReg = AvailableRegs.pop_back_val(); 1652 ClearedFPRegs.emplace_back(Reg, SaveReg, 0); 1653 1654 // Save the fp register to the normal registers 1655 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg) 1656 .addReg(Reg) 1657 .add(predOps(ARMCC::AL)); 1658 } else { 1659 NonclearedFPRegs.push_back(Reg); 1660 } 1661 } 1662 } 1663 } 1664 1665 bool returnsFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty()); 1666 1667 if (returnsFPReg) 1668 assert(STI->hasFPRegs() && "Subtarget needs fpregs"); 1669 1670 // Push FP regs that cannot be restored via normal registers on the stack 1671 for (unsigned Reg : NonclearedFPRegs) { 1672 if (ARM::DPR_VFP2RegClass.contains(Reg)) 1673 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD)) 1674 .addReg(Reg) 1675 .addReg(ARM::SP) 1676 .addImm((Reg - ARM::D0) * 2) 1677 .add(predOps(ARMCC::AL)); 1678 else if (ARM::SPRRegClass.contains(Reg)) 1679 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS)) 1680 .addReg(Reg) 1681 .addReg(ARM::SP) 1682 .addImm(Reg - ARM::S0) 1683 .add(predOps(ARMCC::AL)); 1684 } 1685 1686 // Lazy load fp regs from stack. 1687 // This executes as NOP in the absence of floating-point support. 1688 MachineInstrBuilder VLLDM = 1689 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM)) 1690 .addReg(ARM::SP) 1691 .add(predOps(ARMCC::AL)) 1692 .addImm(0); // Represents a pseoudo register list, has no effect on 1693 // the encoding. 1694 1695 if (STI->fixCMSE_CVE_2021_35465()) { 1696 auto Bundler = MIBundleBuilder(MBB, VLLDM); 1697 // Read the CONTROL register. 1698 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2MRS_M)) 1699 .addReg(ScratchReg, RegState::Define) 1700 .addImm(20) 1701 .add(predOps(ARMCC::AL))); 1702 // Check bit 3 (SFPA). 1703 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2TSTri)) 1704 .addReg(ScratchReg) 1705 .addImm(8) 1706 .add(predOps(ARMCC::AL))); 1707 // Emit the IT block. 1708 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2IT)) 1709 .addImm(ARMCC::NE) 1710 .addImm(8)); 1711 // If SFPA is clear jump over to VLLDM, otherwise execute an instruction 1712 // which has no functional effect apart from causing context creation: 1713 // vmovne s0, s0. In the absence of FPU we emit .inst.w 0xeeb00a40, 1714 // which is defined as NOP if not executed. 1715 if (STI->hasFPRegs()) 1716 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::VMOVS)) 1717 .addReg(ARM::S0, RegState::Define) 1718 .addReg(ARM::S0, RegState::Undef) 1719 .add(predOps(ARMCC::NE))); 1720 else 1721 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::INLINEASM)) 1722 .addExternalSymbol(".inst.w 0xeeb00a40") 1723 .addImm(InlineAsm::Extra_HasSideEffects)); 1724 finalizeBundle(MBB, Bundler.begin(), Bundler.end()); 1725 } 1726 1727 // Restore all FP registers via normal registers 1728 for (const auto &Regs : ClearedFPRegs) { 1729 unsigned Reg, SaveReg1, SaveReg2; 1730 std::tie(Reg, SaveReg1, SaveReg2) = Regs; 1731 if (ARM::DPR_VFP2RegClass.contains(Reg)) 1732 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg) 1733 .addReg(SaveReg1) 1734 .addReg(SaveReg2) 1735 .add(predOps(ARMCC::AL)); 1736 else if (ARM::SPRRegClass.contains(Reg)) 1737 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg) 1738 .addReg(SaveReg1) 1739 .add(predOps(ARMCC::AL)); 1740 } 1741 1742 // Pop the stack space 1743 BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP) 1744 .addReg(ARM::SP) 1745 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1746 .add(predOps(ARMCC::AL)); 1747 } 1748 1749 static bool definesOrUsesFPReg(const MachineInstr &MI) { 1750 for (const MachineOperand &Op : MI.operands()) { 1751 if (!Op.isReg()) 1752 continue; 1753 Register Reg = Op.getReg(); 1754 if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) || 1755 (Reg >= ARM::D0 && Reg <= ARM::D15) || 1756 (Reg >= ARM::S0 && Reg <= ARM::S31)) 1757 return true; 1758 } 1759 return false; 1760 } 1761 1762 void ARMExpandPseudo::CMSERestoreFPRegsV81( 1763 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1764 SmallVectorImpl<unsigned> &AvailableRegs) { 1765 if (!definesOrUsesFPReg(*MBBI)) { 1766 if (STI->fixCMSE_CVE_2021_35465()) { 1767 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSCCLRMS)) 1768 .add(predOps(ARMCC::AL)) 1769 .addReg(ARM::VPR, RegState::Define); 1770 } 1771 1772 // Load FP registers from stack. 1773 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM)) 1774 .addReg(ARM::SP) 1775 .add(predOps(ARMCC::AL)) 1776 .addImm(0); // Represents a pseoudo register list, has no effect on the 1777 // encoding. 1778 1779 // Pop the stack space 1780 BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP) 1781 .addReg(ARM::SP) 1782 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1783 .add(predOps(ARMCC::AL)); 1784 } else { 1785 // Restore the floating point context. 1786 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::VLDR_FPCXTS_post), 1787 ARM::SP) 1788 .addReg(ARM::SP) 1789 .addImm(8) 1790 .add(predOps(ARMCC::AL)); 1791 1792 // Pop all the callee-saved registers (s16-s31). 1793 MachineInstrBuilder VPOP = 1794 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP) 1795 .addReg(ARM::SP) 1796 .add(predOps(ARMCC::AL)); 1797 for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg) 1798 VPOP.addReg(Reg, RegState::Define); 1799 } 1800 } 1801 1802 /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as 1803 /// possible. This only gets used at -O0 so we don't care about efficiency of 1804 /// the generated code. 1805 bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, 1806 MachineBasicBlock::iterator MBBI, 1807 unsigned LdrexOp, unsigned StrexOp, 1808 unsigned UxtOp, 1809 MachineBasicBlock::iterator &NextMBBI) { 1810 bool IsThumb = STI->isThumb(); 1811 bool IsThumb1Only = STI->isThumb1Only(); 1812 MachineInstr &MI = *MBBI; 1813 DebugLoc DL = MI.getDebugLoc(); 1814 const MachineOperand &Dest = MI.getOperand(0); 1815 Register TempReg = MI.getOperand(1).getReg(); 1816 // Duplicating undef operands into 2 instructions does not guarantee the same 1817 // value on both; However undef should be replaced by xzr anyway. 1818 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 1819 Register AddrReg = MI.getOperand(2).getReg(); 1820 Register DesiredReg = MI.getOperand(3).getReg(); 1821 Register NewReg = MI.getOperand(4).getReg(); 1822 1823 if (IsThumb) { 1824 assert(STI->hasV8MBaselineOps() && 1825 "CMP_SWAP not expected to be custom expanded for Thumb1"); 1826 assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) && 1827 "ARMv8-M.baseline does not have t2UXTB/t2UXTH"); 1828 assert((UxtOp == 0 || ARM::tGPRRegClass.contains(DesiredReg)) && 1829 "DesiredReg used for UXT op must be tGPR"); 1830 } 1831 1832 MachineFunction *MF = MBB.getParent(); 1833 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1834 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1835 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1836 1837 MF->insert(++MBB.getIterator(), LoadCmpBB); 1838 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 1839 MF->insert(++StoreBB->getIterator(), DoneBB); 1840 1841 if (UxtOp) { 1842 MachineInstrBuilder MIB = 1843 BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg) 1844 .addReg(DesiredReg, RegState::Kill); 1845 if (!IsThumb) 1846 MIB.addImm(0); 1847 MIB.add(predOps(ARMCC::AL)); 1848 } 1849 1850 // .Lloadcmp: 1851 // ldrex rDest, [rAddr] 1852 // cmp rDest, rDesired 1853 // bne .Ldone 1854 1855 MachineInstrBuilder MIB; 1856 MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg()); 1857 MIB.addReg(AddrReg); 1858 if (LdrexOp == ARM::t2LDREX) 1859 MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset. 1860 MIB.add(predOps(ARMCC::AL)); 1861 1862 unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; 1863 BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) 1864 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 1865 .addReg(DesiredReg) 1866 .add(predOps(ARMCC::AL)); 1867 unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; 1868 BuildMI(LoadCmpBB, DL, TII->get(Bcc)) 1869 .addMBB(DoneBB) 1870 .addImm(ARMCC::NE) 1871 .addReg(ARM::CPSR, RegState::Kill); 1872 LoadCmpBB->addSuccessor(DoneBB); 1873 LoadCmpBB->addSuccessor(StoreBB); 1874 1875 // .Lstore: 1876 // strex rTempReg, rNew, [rAddr] 1877 // cmp rTempReg, #0 1878 // bne .Lloadcmp 1879 MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg) 1880 .addReg(NewReg) 1881 .addReg(AddrReg); 1882 if (StrexOp == ARM::t2STREX) 1883 MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset. 1884 MIB.add(predOps(ARMCC::AL)); 1885 1886 unsigned CMPri = 1887 IsThumb ? (IsThumb1Only ? ARM::tCMPi8 : ARM::t2CMPri) : ARM::CMPri; 1888 BuildMI(StoreBB, DL, TII->get(CMPri)) 1889 .addReg(TempReg, RegState::Kill) 1890 .addImm(0) 1891 .add(predOps(ARMCC::AL)); 1892 BuildMI(StoreBB, DL, TII->get(Bcc)) 1893 .addMBB(LoadCmpBB) 1894 .addImm(ARMCC::NE) 1895 .addReg(ARM::CPSR, RegState::Kill); 1896 StoreBB->addSuccessor(LoadCmpBB); 1897 StoreBB->addSuccessor(DoneBB); 1898 1899 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 1900 DoneBB->transferSuccessors(&MBB); 1901 1902 MBB.addSuccessor(LoadCmpBB); 1903 1904 NextMBBI = MBB.end(); 1905 MI.eraseFromParent(); 1906 1907 // Recompute livein lists. 1908 LivePhysRegs LiveRegs; 1909 computeAndAddLiveIns(LiveRegs, *DoneBB); 1910 computeAndAddLiveIns(LiveRegs, *StoreBB); 1911 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 1912 // Do an extra pass around the loop to get loop carried registers right. 1913 StoreBB->clearLiveIns(); 1914 computeAndAddLiveIns(LiveRegs, *StoreBB); 1915 LoadCmpBB->clearLiveIns(); 1916 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 1917 1918 return true; 1919 } 1920 1921 /// ARM's ldrexd/strexd take a consecutive register pair (represented as a 1922 /// single GPRPair register), Thumb's take two separate registers so we need to 1923 /// extract the subregs from the pair. 1924 static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg, 1925 unsigned Flags, bool IsThumb, 1926 const TargetRegisterInfo *TRI) { 1927 if (IsThumb) { 1928 Register RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0); 1929 Register RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1); 1930 MIB.addReg(RegLo, Flags); 1931 MIB.addReg(RegHi, Flags); 1932 } else 1933 MIB.addReg(Reg.getReg(), Flags); 1934 } 1935 1936 /// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop. 1937 bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, 1938 MachineBasicBlock::iterator MBBI, 1939 MachineBasicBlock::iterator &NextMBBI) { 1940 bool IsThumb = STI->isThumb(); 1941 assert(!STI->isThumb1Only() && "CMP_SWAP_64 unsupported under Thumb1!"); 1942 MachineInstr &MI = *MBBI; 1943 DebugLoc DL = MI.getDebugLoc(); 1944 MachineOperand &Dest = MI.getOperand(0); 1945 Register TempReg = MI.getOperand(1).getReg(); 1946 // Duplicating undef operands into 2 instructions does not guarantee the same 1947 // value on both; However undef should be replaced by xzr anyway. 1948 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 1949 Register AddrReg = MI.getOperand(2).getReg(); 1950 Register DesiredReg = MI.getOperand(3).getReg(); 1951 MachineOperand New = MI.getOperand(4); 1952 New.setIsKill(false); 1953 1954 Register DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); 1955 Register DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); 1956 Register DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0); 1957 Register DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1); 1958 1959 MachineFunction *MF = MBB.getParent(); 1960 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1961 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1962 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1963 1964 MF->insert(++MBB.getIterator(), LoadCmpBB); 1965 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 1966 MF->insert(++StoreBB->getIterator(), DoneBB); 1967 1968 // .Lloadcmp: 1969 // ldrexd rDestLo, rDestHi, [rAddr] 1970 // cmp rDestLo, rDesiredLo 1971 // sbcs dead rTempReg, rDestHi, rDesiredHi 1972 // bne .Ldone 1973 unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; 1974 MachineInstrBuilder MIB; 1975 MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD)); 1976 addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI); 1977 MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); 1978 1979 unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; 1980 BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) 1981 .addReg(DestLo, getKillRegState(Dest.isDead())) 1982 .addReg(DesiredLo) 1983 .add(predOps(ARMCC::AL)); 1984 1985 BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) 1986 .addReg(DestHi, getKillRegState(Dest.isDead())) 1987 .addReg(DesiredHi) 1988 .addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill); 1989 1990 unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; 1991 BuildMI(LoadCmpBB, DL, TII->get(Bcc)) 1992 .addMBB(DoneBB) 1993 .addImm(ARMCC::NE) 1994 .addReg(ARM::CPSR, RegState::Kill); 1995 LoadCmpBB->addSuccessor(DoneBB); 1996 LoadCmpBB->addSuccessor(StoreBB); 1997 1998 // .Lstore: 1999 // strexd rTempReg, rNewLo, rNewHi, [rAddr] 2000 // cmp rTempReg, #0 2001 // bne .Lloadcmp 2002 unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; 2003 MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg); 2004 unsigned Flags = getKillRegState(New.isDead()); 2005 addExclusiveRegPair(MIB, New, Flags, IsThumb, TRI); 2006 MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); 2007 2008 unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; 2009 BuildMI(StoreBB, DL, TII->get(CMPri)) 2010 .addReg(TempReg, RegState::Kill) 2011 .addImm(0) 2012 .add(predOps(ARMCC::AL)); 2013 BuildMI(StoreBB, DL, TII->get(Bcc)) 2014 .addMBB(LoadCmpBB) 2015 .addImm(ARMCC::NE) 2016 .addReg(ARM::CPSR, RegState::Kill); 2017 StoreBB->addSuccessor(LoadCmpBB); 2018 StoreBB->addSuccessor(DoneBB); 2019 2020 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 2021 DoneBB->transferSuccessors(&MBB); 2022 2023 MBB.addSuccessor(LoadCmpBB); 2024 2025 NextMBBI = MBB.end(); 2026 MI.eraseFromParent(); 2027 2028 // Recompute livein lists. 2029 LivePhysRegs LiveRegs; 2030 computeAndAddLiveIns(LiveRegs, *DoneBB); 2031 computeAndAddLiveIns(LiveRegs, *StoreBB); 2032 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 2033 // Do an extra pass around the loop to get loop carried registers right. 2034 StoreBB->clearLiveIns(); 2035 computeAndAddLiveIns(LiveRegs, *StoreBB); 2036 LoadCmpBB->clearLiveIns(); 2037 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 2038 2039 return true; 2040 } 2041 2042 static void CMSEPushCalleeSaves(const TargetInstrInfo &TII, 2043 MachineBasicBlock &MBB, 2044 MachineBasicBlock::iterator MBBI, int JumpReg, 2045 const LivePhysRegs &LiveRegs, bool Thumb1Only) { 2046 const DebugLoc &DL = MBBI->getDebugLoc(); 2047 if (Thumb1Only) { // push Lo and Hi regs separately 2048 MachineInstrBuilder PushMIB = 2049 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); 2050 for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) { 2051 PushMIB.addReg( 2052 Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef); 2053 } 2054 2055 // Thumb1 can only tPUSH low regs, so we copy the high regs to the low 2056 // regs that we just saved and push the low regs again, taking care to 2057 // not clobber JumpReg. If JumpReg is one of the low registers, push first 2058 // the values of r9-r11, and then r8. That would leave them ordered in 2059 // memory, and allow us to later pop them with a single instructions. 2060 // FIXME: Could also use any of r0-r3 that are free (including in the 2061 // first PUSH above). 2062 for (int LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; --LoReg) { 2063 if (JumpReg == LoReg) 2064 continue; 2065 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) 2066 .addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef) 2067 .add(predOps(ARMCC::AL)); 2068 --HiReg; 2069 } 2070 MachineInstrBuilder PushMIB2 = 2071 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); 2072 for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) { 2073 if (Reg == JumpReg) 2074 continue; 2075 PushMIB2.addReg(Reg, RegState::Kill); 2076 } 2077 2078 // If we couldn't use a low register for temporary storage (because it was 2079 // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been 2080 // saved. 2081 if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) { 2082 int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4; 2083 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) 2084 .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef) 2085 .add(predOps(ARMCC::AL)); 2086 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)) 2087 .add(predOps(ARMCC::AL)) 2088 .addReg(LoReg, RegState::Kill); 2089 } 2090 } else { // push Lo and Hi registers with a single instruction 2091 MachineInstrBuilder PushMIB = 2092 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP) 2093 .addReg(ARM::SP) 2094 .add(predOps(ARMCC::AL)); 2095 for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) { 2096 PushMIB.addReg( 2097 Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef); 2098 } 2099 } 2100 } 2101 2102 static void CMSEPopCalleeSaves(const TargetInstrInfo &TII, 2103 MachineBasicBlock &MBB, 2104 MachineBasicBlock::iterator MBBI, int JumpReg, 2105 bool Thumb1Only) { 2106 const DebugLoc &DL = MBBI->getDebugLoc(); 2107 if (Thumb1Only) { 2108 MachineInstrBuilder PopMIB = 2109 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); 2110 for (int R = 0; R < 4; ++R) { 2111 PopMIB.addReg(ARM::R4 + R, RegState::Define); 2112 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), ARM::R8 + R) 2113 .addReg(ARM::R4 + R, RegState::Kill) 2114 .add(predOps(ARMCC::AL)); 2115 } 2116 MachineInstrBuilder PopMIB2 = 2117 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); 2118 for (int R = 0; R < 4; ++R) 2119 PopMIB2.addReg(ARM::R4 + R, RegState::Define); 2120 } else { // pop Lo and Hi registers with a single instruction 2121 MachineInstrBuilder PopMIB = 2122 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP) 2123 .addReg(ARM::SP) 2124 .add(predOps(ARMCC::AL)); 2125 for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) 2126 PopMIB.addReg(Reg, RegState::Define); 2127 } 2128 } 2129 2130 bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, 2131 MachineBasicBlock::iterator MBBI, 2132 MachineBasicBlock::iterator &NextMBBI) { 2133 MachineInstr &MI = *MBBI; 2134 unsigned Opcode = MI.getOpcode(); 2135 switch (Opcode) { 2136 default: 2137 return false; 2138 2139 case ARM::VBSPd: 2140 case ARM::VBSPq: { 2141 Register DstReg = MI.getOperand(0).getReg(); 2142 if (DstReg == MI.getOperand(3).getReg()) { 2143 // Expand to VBIT 2144 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq; 2145 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 2146 .add(MI.getOperand(0)) 2147 .add(MI.getOperand(3)) 2148 .add(MI.getOperand(2)) 2149 .add(MI.getOperand(1)) 2150 .addImm(MI.getOperand(4).getImm()) 2151 .add(MI.getOperand(5)); 2152 } else if (DstReg == MI.getOperand(2).getReg()) { 2153 // Expand to VBIF 2154 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq; 2155 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 2156 .add(MI.getOperand(0)) 2157 .add(MI.getOperand(2)) 2158 .add(MI.getOperand(3)) 2159 .add(MI.getOperand(1)) 2160 .addImm(MI.getOperand(4).getImm()) 2161 .add(MI.getOperand(5)); 2162 } else { 2163 // Expand to VBSL 2164 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq; 2165 if (DstReg == MI.getOperand(1).getReg()) { 2166 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 2167 .add(MI.getOperand(0)) 2168 .add(MI.getOperand(1)) 2169 .add(MI.getOperand(2)) 2170 .add(MI.getOperand(3)) 2171 .addImm(MI.getOperand(4).getImm()) 2172 .add(MI.getOperand(5)); 2173 } else { 2174 // Use move to satisfy constraints 2175 unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq; 2176 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc)) 2177 .addReg(DstReg, 2178 RegState::Define | 2179 getRenamableRegState(MI.getOperand(0).isRenamable())) 2180 .add(MI.getOperand(1)) 2181 .add(MI.getOperand(1)) 2182 .addImm(MI.getOperand(4).getImm()) 2183 .add(MI.getOperand(5)); 2184 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 2185 .add(MI.getOperand(0)) 2186 .addReg(DstReg, 2187 RegState::Kill | 2188 getRenamableRegState(MI.getOperand(0).isRenamable())) 2189 .add(MI.getOperand(2)) 2190 .add(MI.getOperand(3)) 2191 .addImm(MI.getOperand(4).getImm()) 2192 .add(MI.getOperand(5)); 2193 } 2194 } 2195 MI.eraseFromParent(); 2196 return true; 2197 } 2198 2199 case ARM::TCRETURNdi: 2200 case ARM::TCRETURNri: 2201 case ARM::TCRETURNrinotr12: { 2202 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 2203 if (MBBI->getOpcode() == ARM::SEH_EpilogEnd) 2204 MBBI--; 2205 if (MBBI->getOpcode() == ARM::SEH_Nop_Ret) 2206 MBBI--; 2207 assert(MBBI->isReturn() && 2208 "Can only insert epilog into returning blocks"); 2209 unsigned RetOpcode = MBBI->getOpcode(); 2210 DebugLoc dl = MBBI->getDebugLoc(); 2211 const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>( 2212 MBB.getParent()->getSubtarget().getInstrInfo()); 2213 2214 // Tail call return: adjust the stack pointer and jump to callee. 2215 MBBI = MBB.getLastNonDebugInstr(); 2216 if (MBBI->getOpcode() == ARM::SEH_EpilogEnd) 2217 MBBI--; 2218 if (MBBI->getOpcode() == ARM::SEH_Nop_Ret) 2219 MBBI--; 2220 MachineOperand &JumpTarget = MBBI->getOperand(0); 2221 2222 // Jump to label or value in register. 2223 if (RetOpcode == ARM::TCRETURNdi) { 2224 MachineFunction *MF = MBB.getParent(); 2225 bool NeedsWinCFI = MF->getTarget().getMCAsmInfo()->usesWindowsCFI() && 2226 MF->getFunction().needsUnwindTableEntry(); 2227 unsigned TCOpcode = 2228 STI->isThumb() 2229 ? ((STI->isTargetMachO() || NeedsWinCFI) ? ARM::tTAILJMPd 2230 : ARM::tTAILJMPdND) 2231 : ARM::TAILJMPd; 2232 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); 2233 if (JumpTarget.isGlobal()) 2234 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 2235 JumpTarget.getTargetFlags()); 2236 else { 2237 assert(JumpTarget.isSymbol()); 2238 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 2239 JumpTarget.getTargetFlags()); 2240 } 2241 2242 // Add the default predicate in Thumb mode. 2243 if (STI->isThumb()) 2244 MIB.add(predOps(ARMCC::AL)); 2245 } else if (RetOpcode == ARM::TCRETURNri || 2246 RetOpcode == ARM::TCRETURNrinotr12) { 2247 unsigned Opcode = 2248 STI->isThumb() ? ARM::tTAILJMPr 2249 : (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4); 2250 BuildMI(MBB, MBBI, dl, 2251 TII.get(Opcode)) 2252 .addReg(JumpTarget.getReg(), RegState::Kill); 2253 } 2254 2255 auto NewMI = std::prev(MBBI); 2256 for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) 2257 NewMI->addOperand(MBBI->getOperand(i)); 2258 2259 2260 // Update call site info and delete the pseudo instruction TCRETURN. 2261 if (MI.isCandidateForCallSiteEntry()) 2262 MI.getMF()->moveCallSiteInfo(&MI, &*NewMI); 2263 // Copy nomerge flag over to new instruction. 2264 if (MI.getFlag(MachineInstr::NoMerge)) 2265 NewMI->setFlag(MachineInstr::NoMerge); 2266 MBB.erase(MBBI); 2267 2268 MBBI = NewMI; 2269 return true; 2270 } 2271 case ARM::tBXNS_RET: { 2272 // For v8.0-M.Main we need to authenticate LR before clearing FPRs, which 2273 // uses R12 as a scratch register. 2274 if (!STI->hasV8_1MMainlineOps() && AFI->shouldSignReturnAddress()) 2275 BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARM::t2AUT)); 2276 2277 MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI); 2278 2279 if (STI->hasV8_1MMainlineOps()) { 2280 // Restore the non-secure floating point context. 2281 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 2282 TII->get(ARM::VLDR_FPCXTNS_post), ARM::SP) 2283 .addReg(ARM::SP) 2284 .addImm(4) 2285 .add(predOps(ARMCC::AL)); 2286 2287 if (AFI->shouldSignReturnAddress()) 2288 BuildMI(AfterBB, AfterBB.end(), DebugLoc(), TII->get(ARM::t2AUT)); 2289 } 2290 2291 // Clear all GPR that are not a use of the return instruction. 2292 assert(llvm::all_of(MBBI->operands(), [](const MachineOperand &Op) { 2293 return !Op.isReg() || Op.getReg() != ARM::R12; 2294 })); 2295 SmallVector<unsigned, 5> ClearRegs; 2296 determineGPRegsToClear( 2297 *MBBI, {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12}, ClearRegs); 2298 CMSEClearGPRegs(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), ClearRegs, 2299 ARM::LR); 2300 2301 MachineInstrBuilder NewMI = 2302 BuildMI(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), 2303 TII->get(ARM::tBXNS)) 2304 .addReg(ARM::LR) 2305 .add(predOps(ARMCC::AL)); 2306 for (const MachineOperand &Op : MI.operands()) 2307 NewMI->addOperand(Op); 2308 MI.eraseFromParent(); 2309 return true; 2310 } 2311 case ARM::tBLXNS_CALL: { 2312 DebugLoc DL = MBBI->getDebugLoc(); 2313 Register JumpReg = MBBI->getOperand(0).getReg(); 2314 2315 // Figure out which registers are live at the point immediately before the 2316 // call. When we indiscriminately push a set of registers, the live 2317 // registers are added as ordinary use operands, whereas dead registers 2318 // are "undef". 2319 LivePhysRegs LiveRegs(*TRI); 2320 LiveRegs.addLiveOuts(MBB); 2321 for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse())) 2322 LiveRegs.stepBackward(MI); 2323 LiveRegs.stepBackward(*MBBI); 2324 2325 CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs, 2326 AFI->isThumb1OnlyFunction()); 2327 2328 SmallVector<unsigned, 16> ClearRegs; 2329 determineGPRegsToClear(*MBBI, 2330 {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, 2331 ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9, 2332 ARM::R10, ARM::R11, ARM::R12}, 2333 ClearRegs); 2334 auto OriginalClearRegs = ClearRegs; 2335 2336 // Get the first cleared register as a scratch (to use later with tBIC). 2337 // We need to use the first so we can ensure it is a low register. 2338 unsigned ScratchReg = ClearRegs.front(); 2339 2340 // Clear LSB of JumpReg 2341 if (AFI->isThumb2Function()) { 2342 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), JumpReg) 2343 .addReg(JumpReg) 2344 .addImm(1) 2345 .add(predOps(ARMCC::AL)) 2346 .add(condCodeOp()); 2347 } else { 2348 // We need to use an extra register to cope with 8M Baseline, 2349 // since we have saved all of the registers we are ok to trash a non 2350 // argument register here. 2351 BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVi8), ScratchReg) 2352 .add(condCodeOp()) 2353 .addImm(1) 2354 .add(predOps(ARMCC::AL)); 2355 BuildMI(MBB, MBBI, DL, TII->get(ARM::tBIC), JumpReg) 2356 .addReg(ARM::CPSR, RegState::Define) 2357 .addReg(JumpReg) 2358 .addReg(ScratchReg) 2359 .add(predOps(ARMCC::AL)); 2360 } 2361 2362 CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs, 2363 ClearRegs); // save+clear FP regs with ClearRegs 2364 CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg); 2365 2366 const MachineInstrBuilder NewCall = 2367 BuildMI(MBB, MBBI, DL, TII->get(ARM::tBLXNSr)) 2368 .add(predOps(ARMCC::AL)) 2369 .addReg(JumpReg, RegState::Kill); 2370 2371 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 2372 NewCall->addOperand(MO); 2373 if (MI.isCandidateForCallSiteEntry()) 2374 MI.getMF()->moveCallSiteInfo(&MI, NewCall.getInstr()); 2375 2376 CMSERestoreFPRegs(MBB, MBBI, DL, OriginalClearRegs); // restore FP registers 2377 2378 CMSEPopCalleeSaves(*TII, MBB, MBBI, JumpReg, AFI->isThumb1OnlyFunction()); 2379 2380 MI.eraseFromParent(); 2381 return true; 2382 } 2383 case ARM::VMOVHcc: 2384 case ARM::VMOVScc: 2385 case ARM::VMOVDcc: { 2386 unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD; 2387 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), 2388 MI.getOperand(1).getReg()) 2389 .add(MI.getOperand(2)) 2390 .addImm(MI.getOperand(3).getImm()) // 'pred' 2391 .add(MI.getOperand(4)) 2392 .add(makeImplicit(MI.getOperand(1))); 2393 2394 MI.eraseFromParent(); 2395 return true; 2396 } 2397 case ARM::t2MOVCCr: 2398 case ARM::MOVCCr: { 2399 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr; 2400 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), 2401 MI.getOperand(1).getReg()) 2402 .add(MI.getOperand(2)) 2403 .addImm(MI.getOperand(3).getImm()) // 'pred' 2404 .add(MI.getOperand(4)) 2405 .add(condCodeOp()) // 's' bit 2406 .add(makeImplicit(MI.getOperand(1))); 2407 2408 MI.eraseFromParent(); 2409 return true; 2410 } 2411 case ARM::MOVCCsi: { 2412 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), 2413 (MI.getOperand(1).getReg())) 2414 .add(MI.getOperand(2)) 2415 .addImm(MI.getOperand(3).getImm()) 2416 .addImm(MI.getOperand(4).getImm()) // 'pred' 2417 .add(MI.getOperand(5)) 2418 .add(condCodeOp()) // 's' bit 2419 .add(makeImplicit(MI.getOperand(1))); 2420 2421 MI.eraseFromParent(); 2422 return true; 2423 } 2424 case ARM::MOVCCsr: { 2425 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr), 2426 (MI.getOperand(1).getReg())) 2427 .add(MI.getOperand(2)) 2428 .add(MI.getOperand(3)) 2429 .addImm(MI.getOperand(4).getImm()) 2430 .addImm(MI.getOperand(5).getImm()) // 'pred' 2431 .add(MI.getOperand(6)) 2432 .add(condCodeOp()) // 's' bit 2433 .add(makeImplicit(MI.getOperand(1))); 2434 2435 MI.eraseFromParent(); 2436 return true; 2437 } 2438 case ARM::t2MOVCCi16: 2439 case ARM::MOVCCi16: { 2440 unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16; 2441 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), 2442 MI.getOperand(1).getReg()) 2443 .addImm(MI.getOperand(2).getImm()) 2444 .addImm(MI.getOperand(3).getImm()) // 'pred' 2445 .add(MI.getOperand(4)) 2446 .add(makeImplicit(MI.getOperand(1))); 2447 MI.eraseFromParent(); 2448 return true; 2449 } 2450 case ARM::t2MOVCCi: 2451 case ARM::MOVCCi: { 2452 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi; 2453 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), 2454 MI.getOperand(1).getReg()) 2455 .addImm(MI.getOperand(2).getImm()) 2456 .addImm(MI.getOperand(3).getImm()) // 'pred' 2457 .add(MI.getOperand(4)) 2458 .add(condCodeOp()) // 's' bit 2459 .add(makeImplicit(MI.getOperand(1))); 2460 2461 MI.eraseFromParent(); 2462 return true; 2463 } 2464 case ARM::t2MVNCCi: 2465 case ARM::MVNCCi: { 2466 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi; 2467 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), 2468 MI.getOperand(1).getReg()) 2469 .addImm(MI.getOperand(2).getImm()) 2470 .addImm(MI.getOperand(3).getImm()) // 'pred' 2471 .add(MI.getOperand(4)) 2472 .add(condCodeOp()) // 's' bit 2473 .add(makeImplicit(MI.getOperand(1))); 2474 2475 MI.eraseFromParent(); 2476 return true; 2477 } 2478 case ARM::t2MOVCClsl: 2479 case ARM::t2MOVCClsr: 2480 case ARM::t2MOVCCasr: 2481 case ARM::t2MOVCCror: { 2482 unsigned NewOpc; 2483 switch (Opcode) { 2484 case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break; 2485 case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break; 2486 case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break; 2487 case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break; 2488 default: llvm_unreachable("unexpeced conditional move"); 2489 } 2490 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), 2491 MI.getOperand(1).getReg()) 2492 .add(MI.getOperand(2)) 2493 .addImm(MI.getOperand(3).getImm()) 2494 .addImm(MI.getOperand(4).getImm()) // 'pred' 2495 .add(MI.getOperand(5)) 2496 .add(condCodeOp()) // 's' bit 2497 .add(makeImplicit(MI.getOperand(1))); 2498 MI.eraseFromParent(); 2499 return true; 2500 } 2501 case ARM::Int_eh_sjlj_dispatchsetup: { 2502 MachineFunction &MF = *MI.getParent()->getParent(); 2503 const ARMBaseInstrInfo *AII = 2504 static_cast<const ARMBaseInstrInfo*>(TII); 2505 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); 2506 // For functions using a base pointer, we rematerialize it (via the frame 2507 // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it 2508 // for us. Otherwise, expand to nothing. 2509 if (RI.hasBasePointer(MF)) { 2510 int32_t NumBytes = AFI->getFramePtrSpillOffset(); 2511 Register FramePtr = RI.getFrameRegister(MF); 2512 assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) && 2513 "base pointer without frame pointer?"); 2514 2515 if (AFI->isThumb2Function()) { 2516 emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, 2517 FramePtr, -NumBytes, ARMCC::AL, 0, *TII); 2518 } else if (AFI->isThumbFunction()) { 2519 emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, 2520 FramePtr, -NumBytes, *TII, RI); 2521 } else { 2522 emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, 2523 FramePtr, -NumBytes, ARMCC::AL, 0, 2524 *TII); 2525 } 2526 // If there's dynamic realignment, adjust for it. 2527 if (RI.hasStackRealignment(MF)) { 2528 MachineFrameInfo &MFI = MF.getFrameInfo(); 2529 Align MaxAlign = MFI.getMaxAlign(); 2530 assert (!AFI->isThumb1OnlyFunction()); 2531 // Emit bic r6, r6, MaxAlign 2532 assert(MaxAlign <= Align(256) && 2533 "The BIC instruction cannot encode " 2534 "immediates larger than 256 with all lower " 2535 "bits set."); 2536 unsigned bicOpc = AFI->isThumbFunction() ? 2537 ARM::t2BICri : ARM::BICri; 2538 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6) 2539 .addReg(ARM::R6, RegState::Kill) 2540 .addImm(MaxAlign.value() - 1) 2541 .add(predOps(ARMCC::AL)) 2542 .add(condCodeOp()); 2543 } 2544 } 2545 MI.eraseFromParent(); 2546 return true; 2547 } 2548 2549 case ARM::MOVsrl_glue: 2550 case ARM::MOVsra_glue: { 2551 // These are just fancy MOVs instructions. 2552 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), 2553 MI.getOperand(0).getReg()) 2554 .add(MI.getOperand(1)) 2555 .addImm(ARM_AM::getSORegOpc( 2556 (Opcode == ARM::MOVsrl_glue ? ARM_AM::lsr : ARM_AM::asr), 1)) 2557 .add(predOps(ARMCC::AL)) 2558 .addReg(ARM::CPSR, RegState::Define); 2559 MI.eraseFromParent(); 2560 return true; 2561 } 2562 case ARM::RRX: { 2563 // This encodes as "MOVs Rd, Rm, rrx 2564 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), 2565 MI.getOperand(0).getReg()) 2566 .add(MI.getOperand(1)) 2567 .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)) 2568 .add(predOps(ARMCC::AL)) 2569 .add(condCodeOp()) 2570 .copyImplicitOps(MI); 2571 MI.eraseFromParent(); 2572 return true; 2573 } 2574 case ARM::tTPsoft: 2575 case ARM::TPsoft: { 2576 const bool Thumb = Opcode == ARM::tTPsoft; 2577 2578 MachineInstrBuilder MIB; 2579 MachineFunction *MF = MBB.getParent(); 2580 if (STI->genLongCalls()) { 2581 MachineConstantPool *MCP = MF->getConstantPool(); 2582 unsigned PCLabelID = AFI->createPICLabelUId(); 2583 MachineConstantPoolValue *CPV = 2584 ARMConstantPoolSymbol::Create(MF->getFunction().getContext(), 2585 "__aeabi_read_tp", PCLabelID, 0); 2586 Register Reg = MI.getOperand(0).getReg(); 2587 MIB = 2588 BuildMI(MBB, MBBI, MI.getDebugLoc(), 2589 TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg) 2590 .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4))); 2591 if (!Thumb) 2592 MIB.addImm(0); 2593 MIB.add(predOps(ARMCC::AL)); 2594 2595 MIB = 2596 BuildMI(MBB, MBBI, MI.getDebugLoc(), 2597 TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF))); 2598 if (Thumb) 2599 MIB.add(predOps(ARMCC::AL)); 2600 MIB.addReg(Reg, RegState::Kill); 2601 } else { 2602 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 2603 TII->get(Thumb ? ARM::tBL : ARM::BL)); 2604 if (Thumb) 2605 MIB.add(predOps(ARMCC::AL)); 2606 MIB.addExternalSymbol("__aeabi_read_tp", 0); 2607 } 2608 2609 MIB.cloneMemRefs(MI); 2610 MIB.copyImplicitOps(MI); 2611 // Update the call site info. 2612 if (MI.isCandidateForCallSiteEntry()) 2613 MF->moveCallSiteInfo(&MI, &*MIB); 2614 MI.eraseFromParent(); 2615 return true; 2616 } 2617 case ARM::tLDRpci_pic: 2618 case ARM::t2LDRpci_pic: { 2619 unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) 2620 ? ARM::tLDRpci : ARM::t2LDRpci; 2621 Register DstReg = MI.getOperand(0).getReg(); 2622 bool DstIsDead = MI.getOperand(0).isDead(); 2623 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) 2624 .add(MI.getOperand(1)) 2625 .add(predOps(ARMCC::AL)) 2626 .cloneMemRefs(MI) 2627 .copyImplicitOps(MI); 2628 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) 2629 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 2630 .addReg(DstReg) 2631 .add(MI.getOperand(2)) 2632 .copyImplicitOps(MI); 2633 MI.eraseFromParent(); 2634 return true; 2635 } 2636 2637 case ARM::LDRLIT_ga_abs: 2638 case ARM::LDRLIT_ga_pcrel: 2639 case ARM::LDRLIT_ga_pcrel_ldr: 2640 case ARM::tLDRLIT_ga_abs: 2641 case ARM::t2LDRLIT_ga_pcrel: 2642 case ARM::tLDRLIT_ga_pcrel: { 2643 Register DstReg = MI.getOperand(0).getReg(); 2644 bool DstIsDead = MI.getOperand(0).isDead(); 2645 const MachineOperand &MO1 = MI.getOperand(1); 2646 auto Flags = MO1.getTargetFlags(); 2647 const GlobalValue *GV = MO1.getGlobal(); 2648 bool IsARM = Opcode != ARM::tLDRLIT_ga_pcrel && 2649 Opcode != ARM::tLDRLIT_ga_abs && 2650 Opcode != ARM::t2LDRLIT_ga_pcrel; 2651 bool IsPIC = 2652 Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs; 2653 unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci; 2654 if (Opcode == ARM::t2LDRLIT_ga_pcrel) 2655 LDRLITOpc = ARM::t2LDRpci; 2656 unsigned PICAddOpc = 2657 IsARM 2658 ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) 2659 : ARM::tPICADD; 2660 2661 // We need a new const-pool entry to load from. 2662 MachineConstantPool *MCP = MBB.getParent()->getConstantPool(); 2663 unsigned ARMPCLabelIndex = 0; 2664 MachineConstantPoolValue *CPV; 2665 2666 if (IsPIC) { 2667 unsigned PCAdj = IsARM ? 8 : 4; 2668 auto Modifier = (Flags & ARMII::MO_GOT) 2669 ? ARMCP::GOT_PREL 2670 : ARMCP::no_modifier; 2671 ARMPCLabelIndex = AFI->createPICLabelUId(); 2672 CPV = ARMConstantPoolConstant::Create( 2673 GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, Modifier, 2674 /*AddCurrentAddr*/ Modifier == ARMCP::GOT_PREL); 2675 } else 2676 CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier); 2677 2678 MachineInstrBuilder MIB = 2679 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg) 2680 .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4))); 2681 if (IsARM) 2682 MIB.addImm(0); 2683 MIB.add(predOps(ARMCC::AL)); 2684 2685 if (IsPIC) { 2686 MachineInstrBuilder MIB = 2687 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc)) 2688 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 2689 .addReg(DstReg) 2690 .addImm(ARMPCLabelIndex); 2691 2692 if (IsARM) 2693 MIB.add(predOps(ARMCC::AL)); 2694 } 2695 2696 MI.eraseFromParent(); 2697 return true; 2698 } 2699 case ARM::MOV_ga_pcrel: 2700 case ARM::MOV_ga_pcrel_ldr: 2701 case ARM::t2MOV_ga_pcrel: { 2702 // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode. 2703 unsigned LabelId = AFI->createPICLabelUId(); 2704 Register DstReg = MI.getOperand(0).getReg(); 2705 bool DstIsDead = MI.getOperand(0).isDead(); 2706 const MachineOperand &MO1 = MI.getOperand(1); 2707 const GlobalValue *GV = MO1.getGlobal(); 2708 unsigned TF = MO1.getTargetFlags(); 2709 bool isARM = Opcode != ARM::t2MOV_ga_pcrel; 2710 unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel; 2711 unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel; 2712 unsigned LO16TF = TF | ARMII::MO_LO16; 2713 unsigned HI16TF = TF | ARMII::MO_HI16; 2714 unsigned PICAddOpc = isARM 2715 ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) 2716 : ARM::tPICADD; 2717 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg) 2718 .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF) 2719 .addImm(LabelId) 2720 .copyImplicitOps(MI); 2721 2722 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg) 2723 .addReg(DstReg) 2724 .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF) 2725 .addImm(LabelId) 2726 .copyImplicitOps(MI); 2727 2728 MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(), 2729 TII->get(PICAddOpc)) 2730 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 2731 .addReg(DstReg).addImm(LabelId); 2732 if (isARM) { 2733 MIB3.add(predOps(ARMCC::AL)); 2734 if (Opcode == ARM::MOV_ga_pcrel_ldr) 2735 MIB3.cloneMemRefs(MI); 2736 } 2737 MIB3.copyImplicitOps(MI); 2738 MI.eraseFromParent(); 2739 return true; 2740 } 2741 2742 case ARM::MOVi32imm: 2743 case ARM::MOVCCi32imm: 2744 case ARM::t2MOVi32imm: 2745 case ARM::t2MOVCCi32imm: 2746 ExpandMOV32BitImm(MBB, MBBI); 2747 return true; 2748 2749 case ARM::tMOVi32imm: 2750 ExpandTMOV32BitImm(MBB, MBBI); 2751 return true; 2752 2753 case ARM::tLEApcrelJT: 2754 // Inline jump tables are handled in ARMAsmPrinter. 2755 if (MI.getMF()->getJumpTableInfo()->getEntryKind() == 2756 MachineJumpTableInfo::EK_Inline) 2757 return false; 2758 2759 // Use a 32-bit immediate move to generate the address of the jump table. 2760 assert(STI->isThumb() && "Non-inline jump tables expected only in thumb"); 2761 ExpandTMOV32BitImm(MBB, MBBI); 2762 return true; 2763 2764 case ARM::SUBS_PC_LR: { 2765 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC) 2766 .addReg(ARM::LR) 2767 .add(MI.getOperand(0)) 2768 .add(MI.getOperand(1)) 2769 .add(MI.getOperand(2)) 2770 .addReg(ARM::CPSR, RegState::Undef) 2771 .copyImplicitOps(MI); 2772 MI.eraseFromParent(); 2773 return true; 2774 } 2775 case ARM::VLDMQIA: { 2776 unsigned NewOpc = ARM::VLDMDIA; 2777 MachineInstrBuilder MIB = 2778 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); 2779 unsigned OpIdx = 0; 2780 2781 // Grab the Q register destination. 2782 bool DstIsDead = MI.getOperand(OpIdx).isDead(); 2783 Register DstReg = MI.getOperand(OpIdx++).getReg(); 2784 2785 // Copy the source register. 2786 MIB.add(MI.getOperand(OpIdx++)); 2787 2788 // Copy the predicate operands. 2789 MIB.add(MI.getOperand(OpIdx++)); 2790 MIB.add(MI.getOperand(OpIdx++)); 2791 2792 // Add the destination operands (D subregs). 2793 Register D0 = TRI->getSubReg(DstReg, ARM::dsub_0); 2794 Register D1 = TRI->getSubReg(DstReg, ARM::dsub_1); 2795 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) 2796 .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); 2797 2798 // Add an implicit def for the super-register. 2799 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); 2800 MIB.copyImplicitOps(MI); 2801 MIB.cloneMemRefs(MI); 2802 MI.eraseFromParent(); 2803 return true; 2804 } 2805 2806 case ARM::VSTMQIA: { 2807 unsigned NewOpc = ARM::VSTMDIA; 2808 MachineInstrBuilder MIB = 2809 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); 2810 unsigned OpIdx = 0; 2811 2812 // Grab the Q register source. 2813 bool SrcIsKill = MI.getOperand(OpIdx).isKill(); 2814 Register SrcReg = MI.getOperand(OpIdx++).getReg(); 2815 2816 // Copy the destination register. 2817 MachineOperand Dst(MI.getOperand(OpIdx++)); 2818 MIB.add(Dst); 2819 2820 // Copy the predicate operands. 2821 MIB.add(MI.getOperand(OpIdx++)); 2822 MIB.add(MI.getOperand(OpIdx++)); 2823 2824 // Add the source operands (D subregs). 2825 Register D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); 2826 Register D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); 2827 MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0) 2828 .addReg(D1, SrcIsKill ? RegState::Kill : 0); 2829 2830 if (SrcIsKill) // Add an implicit kill for the Q register. 2831 MIB->addRegisterKilled(SrcReg, TRI, true); 2832 2833 MIB.copyImplicitOps(MI); 2834 MIB.cloneMemRefs(MI); 2835 MI.eraseFromParent(); 2836 return true; 2837 } 2838 2839 case ARM::VLD2q8Pseudo: 2840 case ARM::VLD2q16Pseudo: 2841 case ARM::VLD2q32Pseudo: 2842 case ARM::VLD2q8PseudoWB_fixed: 2843 case ARM::VLD2q16PseudoWB_fixed: 2844 case ARM::VLD2q32PseudoWB_fixed: 2845 case ARM::VLD2q8PseudoWB_register: 2846 case ARM::VLD2q16PseudoWB_register: 2847 case ARM::VLD2q32PseudoWB_register: 2848 case ARM::VLD3d8Pseudo: 2849 case ARM::VLD3d16Pseudo: 2850 case ARM::VLD3d32Pseudo: 2851 case ARM::VLD1d8TPseudo: 2852 case ARM::VLD1d8TPseudoWB_fixed: 2853 case ARM::VLD1d8TPseudoWB_register: 2854 case ARM::VLD1d16TPseudo: 2855 case ARM::VLD1d16TPseudoWB_fixed: 2856 case ARM::VLD1d16TPseudoWB_register: 2857 case ARM::VLD1d32TPseudo: 2858 case ARM::VLD1d32TPseudoWB_fixed: 2859 case ARM::VLD1d32TPseudoWB_register: 2860 case ARM::VLD1d64TPseudo: 2861 case ARM::VLD1d64TPseudoWB_fixed: 2862 case ARM::VLD1d64TPseudoWB_register: 2863 case ARM::VLD3d8Pseudo_UPD: 2864 case ARM::VLD3d16Pseudo_UPD: 2865 case ARM::VLD3d32Pseudo_UPD: 2866 case ARM::VLD3q8Pseudo_UPD: 2867 case ARM::VLD3q16Pseudo_UPD: 2868 case ARM::VLD3q32Pseudo_UPD: 2869 case ARM::VLD3q8oddPseudo: 2870 case ARM::VLD3q16oddPseudo: 2871 case ARM::VLD3q32oddPseudo: 2872 case ARM::VLD3q8oddPseudo_UPD: 2873 case ARM::VLD3q16oddPseudo_UPD: 2874 case ARM::VLD3q32oddPseudo_UPD: 2875 case ARM::VLD4d8Pseudo: 2876 case ARM::VLD4d16Pseudo: 2877 case ARM::VLD4d32Pseudo: 2878 case ARM::VLD1d8QPseudo: 2879 case ARM::VLD1d8QPseudoWB_fixed: 2880 case ARM::VLD1d8QPseudoWB_register: 2881 case ARM::VLD1d16QPseudo: 2882 case ARM::VLD1d16QPseudoWB_fixed: 2883 case ARM::VLD1d16QPseudoWB_register: 2884 case ARM::VLD1d32QPseudo: 2885 case ARM::VLD1d32QPseudoWB_fixed: 2886 case ARM::VLD1d32QPseudoWB_register: 2887 case ARM::VLD1d64QPseudo: 2888 case ARM::VLD1d64QPseudoWB_fixed: 2889 case ARM::VLD1d64QPseudoWB_register: 2890 case ARM::VLD1q8HighQPseudo: 2891 case ARM::VLD1q8HighQPseudo_UPD: 2892 case ARM::VLD1q8LowQPseudo_UPD: 2893 case ARM::VLD1q8HighTPseudo: 2894 case ARM::VLD1q8HighTPseudo_UPD: 2895 case ARM::VLD1q8LowTPseudo_UPD: 2896 case ARM::VLD1q16HighQPseudo: 2897 case ARM::VLD1q16HighQPseudo_UPD: 2898 case ARM::VLD1q16LowQPseudo_UPD: 2899 case ARM::VLD1q16HighTPseudo: 2900 case ARM::VLD1q16HighTPseudo_UPD: 2901 case ARM::VLD1q16LowTPseudo_UPD: 2902 case ARM::VLD1q32HighQPseudo: 2903 case ARM::VLD1q32HighQPseudo_UPD: 2904 case ARM::VLD1q32LowQPseudo_UPD: 2905 case ARM::VLD1q32HighTPseudo: 2906 case ARM::VLD1q32HighTPseudo_UPD: 2907 case ARM::VLD1q32LowTPseudo_UPD: 2908 case ARM::VLD1q64HighQPseudo: 2909 case ARM::VLD1q64HighQPseudo_UPD: 2910 case ARM::VLD1q64LowQPseudo_UPD: 2911 case ARM::VLD1q64HighTPseudo: 2912 case ARM::VLD1q64HighTPseudo_UPD: 2913 case ARM::VLD1q64LowTPseudo_UPD: 2914 case ARM::VLD4d8Pseudo_UPD: 2915 case ARM::VLD4d16Pseudo_UPD: 2916 case ARM::VLD4d32Pseudo_UPD: 2917 case ARM::VLD4q8Pseudo_UPD: 2918 case ARM::VLD4q16Pseudo_UPD: 2919 case ARM::VLD4q32Pseudo_UPD: 2920 case ARM::VLD4q8oddPseudo: 2921 case ARM::VLD4q16oddPseudo: 2922 case ARM::VLD4q32oddPseudo: 2923 case ARM::VLD4q8oddPseudo_UPD: 2924 case ARM::VLD4q16oddPseudo_UPD: 2925 case ARM::VLD4q32oddPseudo_UPD: 2926 case ARM::VLD3DUPd8Pseudo: 2927 case ARM::VLD3DUPd16Pseudo: 2928 case ARM::VLD3DUPd32Pseudo: 2929 case ARM::VLD3DUPd8Pseudo_UPD: 2930 case ARM::VLD3DUPd16Pseudo_UPD: 2931 case ARM::VLD3DUPd32Pseudo_UPD: 2932 case ARM::VLD4DUPd8Pseudo: 2933 case ARM::VLD4DUPd16Pseudo: 2934 case ARM::VLD4DUPd32Pseudo: 2935 case ARM::VLD4DUPd8Pseudo_UPD: 2936 case ARM::VLD4DUPd16Pseudo_UPD: 2937 case ARM::VLD4DUPd32Pseudo_UPD: 2938 case ARM::VLD2DUPq8EvenPseudo: 2939 case ARM::VLD2DUPq8OddPseudo: 2940 case ARM::VLD2DUPq16EvenPseudo: 2941 case ARM::VLD2DUPq16OddPseudo: 2942 case ARM::VLD2DUPq32EvenPseudo: 2943 case ARM::VLD2DUPq32OddPseudo: 2944 case ARM::VLD2DUPq8OddPseudoWB_fixed: 2945 case ARM::VLD2DUPq8OddPseudoWB_register: 2946 case ARM::VLD2DUPq16OddPseudoWB_fixed: 2947 case ARM::VLD2DUPq16OddPseudoWB_register: 2948 case ARM::VLD2DUPq32OddPseudoWB_fixed: 2949 case ARM::VLD2DUPq32OddPseudoWB_register: 2950 case ARM::VLD3DUPq8EvenPseudo: 2951 case ARM::VLD3DUPq8OddPseudo: 2952 case ARM::VLD3DUPq16EvenPseudo: 2953 case ARM::VLD3DUPq16OddPseudo: 2954 case ARM::VLD3DUPq32EvenPseudo: 2955 case ARM::VLD3DUPq32OddPseudo: 2956 case ARM::VLD3DUPq8OddPseudo_UPD: 2957 case ARM::VLD3DUPq16OddPseudo_UPD: 2958 case ARM::VLD3DUPq32OddPseudo_UPD: 2959 case ARM::VLD4DUPq8EvenPseudo: 2960 case ARM::VLD4DUPq8OddPseudo: 2961 case ARM::VLD4DUPq16EvenPseudo: 2962 case ARM::VLD4DUPq16OddPseudo: 2963 case ARM::VLD4DUPq32EvenPseudo: 2964 case ARM::VLD4DUPq32OddPseudo: 2965 case ARM::VLD4DUPq8OddPseudo_UPD: 2966 case ARM::VLD4DUPq16OddPseudo_UPD: 2967 case ARM::VLD4DUPq32OddPseudo_UPD: 2968 ExpandVLD(MBBI); 2969 return true; 2970 2971 case ARM::VST2q8Pseudo: 2972 case ARM::VST2q16Pseudo: 2973 case ARM::VST2q32Pseudo: 2974 case ARM::VST2q8PseudoWB_fixed: 2975 case ARM::VST2q16PseudoWB_fixed: 2976 case ARM::VST2q32PseudoWB_fixed: 2977 case ARM::VST2q8PseudoWB_register: 2978 case ARM::VST2q16PseudoWB_register: 2979 case ARM::VST2q32PseudoWB_register: 2980 case ARM::VST3d8Pseudo: 2981 case ARM::VST3d16Pseudo: 2982 case ARM::VST3d32Pseudo: 2983 case ARM::VST1d8TPseudo: 2984 case ARM::VST1d8TPseudoWB_fixed: 2985 case ARM::VST1d8TPseudoWB_register: 2986 case ARM::VST1d16TPseudo: 2987 case ARM::VST1d16TPseudoWB_fixed: 2988 case ARM::VST1d16TPseudoWB_register: 2989 case ARM::VST1d32TPseudo: 2990 case ARM::VST1d32TPseudoWB_fixed: 2991 case ARM::VST1d32TPseudoWB_register: 2992 case ARM::VST1d64TPseudo: 2993 case ARM::VST1d64TPseudoWB_fixed: 2994 case ARM::VST1d64TPseudoWB_register: 2995 case ARM::VST3d8Pseudo_UPD: 2996 case ARM::VST3d16Pseudo_UPD: 2997 case ARM::VST3d32Pseudo_UPD: 2998 case ARM::VST3q8Pseudo_UPD: 2999 case ARM::VST3q16Pseudo_UPD: 3000 case ARM::VST3q32Pseudo_UPD: 3001 case ARM::VST3q8oddPseudo: 3002 case ARM::VST3q16oddPseudo: 3003 case ARM::VST3q32oddPseudo: 3004 case ARM::VST3q8oddPseudo_UPD: 3005 case ARM::VST3q16oddPseudo_UPD: 3006 case ARM::VST3q32oddPseudo_UPD: 3007 case ARM::VST4d8Pseudo: 3008 case ARM::VST4d16Pseudo: 3009 case ARM::VST4d32Pseudo: 3010 case ARM::VST1d8QPseudo: 3011 case ARM::VST1d8QPseudoWB_fixed: 3012 case ARM::VST1d8QPseudoWB_register: 3013 case ARM::VST1d16QPseudo: 3014 case ARM::VST1d16QPseudoWB_fixed: 3015 case ARM::VST1d16QPseudoWB_register: 3016 case ARM::VST1d32QPseudo: 3017 case ARM::VST1d32QPseudoWB_fixed: 3018 case ARM::VST1d32QPseudoWB_register: 3019 case ARM::VST1d64QPseudo: 3020 case ARM::VST1d64QPseudoWB_fixed: 3021 case ARM::VST1d64QPseudoWB_register: 3022 case ARM::VST4d8Pseudo_UPD: 3023 case ARM::VST4d16Pseudo_UPD: 3024 case ARM::VST4d32Pseudo_UPD: 3025 case ARM::VST1q8HighQPseudo: 3026 case ARM::VST1q8LowQPseudo_UPD: 3027 case ARM::VST1q8HighTPseudo: 3028 case ARM::VST1q8LowTPseudo_UPD: 3029 case ARM::VST1q16HighQPseudo: 3030 case ARM::VST1q16LowQPseudo_UPD: 3031 case ARM::VST1q16HighTPseudo: 3032 case ARM::VST1q16LowTPseudo_UPD: 3033 case ARM::VST1q32HighQPseudo: 3034 case ARM::VST1q32LowQPseudo_UPD: 3035 case ARM::VST1q32HighTPseudo: 3036 case ARM::VST1q32LowTPseudo_UPD: 3037 case ARM::VST1q64HighQPseudo: 3038 case ARM::VST1q64LowQPseudo_UPD: 3039 case ARM::VST1q64HighTPseudo: 3040 case ARM::VST1q64LowTPseudo_UPD: 3041 case ARM::VST1q8HighTPseudo_UPD: 3042 case ARM::VST1q16HighTPseudo_UPD: 3043 case ARM::VST1q32HighTPseudo_UPD: 3044 case ARM::VST1q64HighTPseudo_UPD: 3045 case ARM::VST1q8HighQPseudo_UPD: 3046 case ARM::VST1q16HighQPseudo_UPD: 3047 case ARM::VST1q32HighQPseudo_UPD: 3048 case ARM::VST1q64HighQPseudo_UPD: 3049 case ARM::VST4q8Pseudo_UPD: 3050 case ARM::VST4q16Pseudo_UPD: 3051 case ARM::VST4q32Pseudo_UPD: 3052 case ARM::VST4q8oddPseudo: 3053 case ARM::VST4q16oddPseudo: 3054 case ARM::VST4q32oddPseudo: 3055 case ARM::VST4q8oddPseudo_UPD: 3056 case ARM::VST4q16oddPseudo_UPD: 3057 case ARM::VST4q32oddPseudo_UPD: 3058 ExpandVST(MBBI); 3059 return true; 3060 3061 case ARM::VLD1LNq8Pseudo: 3062 case ARM::VLD1LNq16Pseudo: 3063 case ARM::VLD1LNq32Pseudo: 3064 case ARM::VLD1LNq8Pseudo_UPD: 3065 case ARM::VLD1LNq16Pseudo_UPD: 3066 case ARM::VLD1LNq32Pseudo_UPD: 3067 case ARM::VLD2LNd8Pseudo: 3068 case ARM::VLD2LNd16Pseudo: 3069 case ARM::VLD2LNd32Pseudo: 3070 case ARM::VLD2LNq16Pseudo: 3071 case ARM::VLD2LNq32Pseudo: 3072 case ARM::VLD2LNd8Pseudo_UPD: 3073 case ARM::VLD2LNd16Pseudo_UPD: 3074 case ARM::VLD2LNd32Pseudo_UPD: 3075 case ARM::VLD2LNq16Pseudo_UPD: 3076 case ARM::VLD2LNq32Pseudo_UPD: 3077 case ARM::VLD3LNd8Pseudo: 3078 case ARM::VLD3LNd16Pseudo: 3079 case ARM::VLD3LNd32Pseudo: 3080 case ARM::VLD3LNq16Pseudo: 3081 case ARM::VLD3LNq32Pseudo: 3082 case ARM::VLD3LNd8Pseudo_UPD: 3083 case ARM::VLD3LNd16Pseudo_UPD: 3084 case ARM::VLD3LNd32Pseudo_UPD: 3085 case ARM::VLD3LNq16Pseudo_UPD: 3086 case ARM::VLD3LNq32Pseudo_UPD: 3087 case ARM::VLD4LNd8Pseudo: 3088 case ARM::VLD4LNd16Pseudo: 3089 case ARM::VLD4LNd32Pseudo: 3090 case ARM::VLD4LNq16Pseudo: 3091 case ARM::VLD4LNq32Pseudo: 3092 case ARM::VLD4LNd8Pseudo_UPD: 3093 case ARM::VLD4LNd16Pseudo_UPD: 3094 case ARM::VLD4LNd32Pseudo_UPD: 3095 case ARM::VLD4LNq16Pseudo_UPD: 3096 case ARM::VLD4LNq32Pseudo_UPD: 3097 case ARM::VST1LNq8Pseudo: 3098 case ARM::VST1LNq16Pseudo: 3099 case ARM::VST1LNq32Pseudo: 3100 case ARM::VST1LNq8Pseudo_UPD: 3101 case ARM::VST1LNq16Pseudo_UPD: 3102 case ARM::VST1LNq32Pseudo_UPD: 3103 case ARM::VST2LNd8Pseudo: 3104 case ARM::VST2LNd16Pseudo: 3105 case ARM::VST2LNd32Pseudo: 3106 case ARM::VST2LNq16Pseudo: 3107 case ARM::VST2LNq32Pseudo: 3108 case ARM::VST2LNd8Pseudo_UPD: 3109 case ARM::VST2LNd16Pseudo_UPD: 3110 case ARM::VST2LNd32Pseudo_UPD: 3111 case ARM::VST2LNq16Pseudo_UPD: 3112 case ARM::VST2LNq32Pseudo_UPD: 3113 case ARM::VST3LNd8Pseudo: 3114 case ARM::VST3LNd16Pseudo: 3115 case ARM::VST3LNd32Pseudo: 3116 case ARM::VST3LNq16Pseudo: 3117 case ARM::VST3LNq32Pseudo: 3118 case ARM::VST3LNd8Pseudo_UPD: 3119 case ARM::VST3LNd16Pseudo_UPD: 3120 case ARM::VST3LNd32Pseudo_UPD: 3121 case ARM::VST3LNq16Pseudo_UPD: 3122 case ARM::VST3LNq32Pseudo_UPD: 3123 case ARM::VST4LNd8Pseudo: 3124 case ARM::VST4LNd16Pseudo: 3125 case ARM::VST4LNd32Pseudo: 3126 case ARM::VST4LNq16Pseudo: 3127 case ARM::VST4LNq32Pseudo: 3128 case ARM::VST4LNd8Pseudo_UPD: 3129 case ARM::VST4LNd16Pseudo_UPD: 3130 case ARM::VST4LNd32Pseudo_UPD: 3131 case ARM::VST4LNq16Pseudo_UPD: 3132 case ARM::VST4LNq32Pseudo_UPD: 3133 ExpandLaneOp(MBBI); 3134 return true; 3135 3136 case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; 3137 case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; 3138 case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; 3139 case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; 3140 3141 case ARM::MQQPRLoad: 3142 case ARM::MQQPRStore: 3143 case ARM::MQQQQPRLoad: 3144 case ARM::MQQQQPRStore: 3145 ExpandMQQPRLoadStore(MBBI); 3146 return true; 3147 3148 case ARM::tCMP_SWAP_8: 3149 assert(STI->isThumb()); 3150 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB, 3151 NextMBBI); 3152 case ARM::tCMP_SWAP_16: 3153 assert(STI->isThumb()); 3154 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH, 3155 NextMBBI); 3156 case ARM::tCMP_SWAP_32: 3157 assert(STI->isThumb()); 3158 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, NextMBBI); 3159 3160 case ARM::CMP_SWAP_8: 3161 assert(!STI->isThumb()); 3162 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, ARM::UXTB, 3163 NextMBBI); 3164 case ARM::CMP_SWAP_16: 3165 assert(!STI->isThumb()); 3166 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH, 3167 NextMBBI); 3168 case ARM::CMP_SWAP_32: 3169 assert(!STI->isThumb()); 3170 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI); 3171 3172 case ARM::CMP_SWAP_64: 3173 return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); 3174 3175 case ARM::tBL_PUSHLR: 3176 case ARM::BL_PUSHLR: { 3177 const bool Thumb = Opcode == ARM::tBL_PUSHLR; 3178 Register Reg = MI.getOperand(0).getReg(); 3179 assert(Reg == ARM::LR && "expect LR register!"); 3180 MachineInstrBuilder MIB; 3181 if (Thumb) { 3182 // push {lr} 3183 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH)) 3184 .add(predOps(ARMCC::AL)) 3185 .addReg(Reg); 3186 3187 // bl __gnu_mcount_nc 3188 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL)); 3189 } else { 3190 // stmdb sp!, {lr} 3191 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD)) 3192 .addReg(ARM::SP, RegState::Define) 3193 .addReg(ARM::SP) 3194 .add(predOps(ARMCC::AL)) 3195 .addReg(Reg); 3196 3197 // bl __gnu_mcount_nc 3198 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL)); 3199 } 3200 MIB.cloneMemRefs(MI); 3201 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 3202 MIB.add(MO); 3203 MI.eraseFromParent(); 3204 return true; 3205 } 3206 case ARM::t2CALL_BTI: { 3207 MachineFunction &MF = *MI.getMF(); 3208 MachineInstrBuilder MIB = 3209 BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::tBL)); 3210 MIB.cloneMemRefs(MI); 3211 for (unsigned i = 0; i < MI.getNumOperands(); ++i) 3212 MIB.add(MI.getOperand(i)); 3213 if (MI.isCandidateForCallSiteEntry()) 3214 MF.moveCallSiteInfo(&MI, MIB.getInstr()); 3215 MIBundleBuilder Bundler(MBB, MI); 3216 Bundler.append(MIB); 3217 Bundler.append(BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::t2BTI))); 3218 finalizeBundle(MBB, Bundler.begin(), Bundler.end()); 3219 MI.eraseFromParent(); 3220 return true; 3221 } 3222 case ARM::LOADDUAL: 3223 case ARM::STOREDUAL: { 3224 Register PairReg = MI.getOperand(0).getReg(); 3225 3226 MachineInstrBuilder MIB = 3227 BuildMI(MBB, MBBI, MI.getDebugLoc(), 3228 TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD)) 3229 .addReg(TRI->getSubReg(PairReg, ARM::gsub_0), 3230 Opcode == ARM::LOADDUAL ? RegState::Define : 0) 3231 .addReg(TRI->getSubReg(PairReg, ARM::gsub_1), 3232 Opcode == ARM::LOADDUAL ? RegState::Define : 0); 3233 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 3234 MIB.add(MO); 3235 MIB.add(predOps(ARMCC::AL)); 3236 MIB.cloneMemRefs(MI); 3237 MI.eraseFromParent(); 3238 return true; 3239 } 3240 } 3241 } 3242 3243 bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { 3244 bool Modified = false; 3245 3246 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 3247 while (MBBI != E) { 3248 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 3249 Modified |= ExpandMI(MBB, MBBI, NMBBI); 3250 MBBI = NMBBI; 3251 } 3252 3253 return Modified; 3254 } 3255 3256 bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 3257 STI = &MF.getSubtarget<ARMSubtarget>(); 3258 TII = STI->getInstrInfo(); 3259 TRI = STI->getRegisterInfo(); 3260 AFI = MF.getInfo<ARMFunctionInfo>(); 3261 3262 LLVM_DEBUG(dbgs() << "********** ARM EXPAND PSEUDO INSTRUCTIONS **********\n" 3263 << "********** Function: " << MF.getName() << '\n'); 3264 3265 bool Modified = false; 3266 for (MachineBasicBlock &MBB : MF) 3267 Modified |= ExpandMBB(MBB); 3268 if (VerifyARMPseudo) 3269 MF.verify(this, "After expanding ARM pseudo instructions."); 3270 3271 LLVM_DEBUG(dbgs() << "***************************************************\n"); 3272 return Modified; 3273 } 3274 3275 /// createARMExpandPseudoPass - returns an instance of the pseudo instruction 3276 /// expansion pass. 3277 FunctionPass *llvm::createARMExpandPseudoPass() { 3278 return new ARMExpandPseudo(); 3279 } 3280