1 //===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling, if-conversion, and other late 11 // optimizations. This pass should be run after register allocation but before 12 // the post-regalloc scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMBaseRegisterInfo.h" 19 #include "ARMConstantPoolValue.h" 20 #include "ARMMachineFunctionInfo.h" 21 #include "ARMSubtarget.h" 22 #include "MCTargetDesc/ARMAddressingModes.h" 23 #include "llvm/CodeGen/LivePhysRegs.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineJumpTableInfo.h" 27 #include "llvm/MC/MCAsmInfo.h" 28 #include "llvm/Support/Debug.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "arm-pseudo" 33 34 static cl::opt<bool> 35 VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden, 36 cl::desc("Verify machine code after expanding ARM pseudos")); 37 38 #define ARM_EXPAND_PSEUDO_NAME "ARM pseudo instruction expansion pass" 39 40 namespace { 41 class ARMExpandPseudo : public MachineFunctionPass { 42 public: 43 static char ID; 44 ARMExpandPseudo() : MachineFunctionPass(ID) {} 45 46 const ARMBaseInstrInfo *TII; 47 const TargetRegisterInfo *TRI; 48 const ARMSubtarget *STI; 49 ARMFunctionInfo *AFI; 50 51 bool runOnMachineFunction(MachineFunction &Fn) override; 52 53 MachineFunctionProperties getRequiredProperties() const override { 54 return MachineFunctionProperties().setNoVRegs(); 55 } 56 57 StringRef getPassName() const override { 58 return ARM_EXPAND_PSEUDO_NAME; 59 } 60 61 private: 62 bool ExpandMI(MachineBasicBlock &MBB, 63 MachineBasicBlock::iterator MBBI, 64 MachineBasicBlock::iterator &NextMBBI); 65 bool ExpandMBB(MachineBasicBlock &MBB); 66 void ExpandVLD(MachineBasicBlock::iterator &MBBI); 67 void ExpandVST(MachineBasicBlock::iterator &MBBI); 68 void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); 69 void ExpandVTBL(MachineBasicBlock::iterator &MBBI, 70 unsigned Opc, bool IsExt); 71 void ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI); 72 void ExpandTMOV32BitImm(MachineBasicBlock &MBB, 73 MachineBasicBlock::iterator &MBBI); 74 void ExpandMOV32BitImm(MachineBasicBlock &MBB, 75 MachineBasicBlock::iterator &MBBI); 76 void CMSEClearGPRegs(MachineBasicBlock &MBB, 77 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 78 const SmallVectorImpl<unsigned> &ClearRegs, 79 unsigned ClobberReg); 80 MachineBasicBlock &CMSEClearFPRegs(MachineBasicBlock &MBB, 81 MachineBasicBlock::iterator MBBI); 82 MachineBasicBlock &CMSEClearFPRegsV8(MachineBasicBlock &MBB, 83 MachineBasicBlock::iterator MBBI, 84 const BitVector &ClearRegs); 85 MachineBasicBlock &CMSEClearFPRegsV81(MachineBasicBlock &MBB, 86 MachineBasicBlock::iterator MBBI, 87 const BitVector &ClearRegs); 88 void CMSESaveClearFPRegs(MachineBasicBlock &MBB, 89 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 90 const LivePhysRegs &LiveRegs, 91 SmallVectorImpl<unsigned> &AvailableRegs); 92 void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB, 93 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 94 const LivePhysRegs &LiveRegs, 95 SmallVectorImpl<unsigned> &ScratchRegs); 96 void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB, 97 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 98 const LivePhysRegs &LiveRegs); 99 void CMSERestoreFPRegs(MachineBasicBlock &MBB, 100 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 101 SmallVectorImpl<unsigned> &AvailableRegs); 102 void CMSERestoreFPRegsV8(MachineBasicBlock &MBB, 103 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 104 SmallVectorImpl<unsigned> &AvailableRegs); 105 void CMSERestoreFPRegsV81(MachineBasicBlock &MBB, 106 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 107 SmallVectorImpl<unsigned> &AvailableRegs); 108 bool ExpandCMP_SWAP(MachineBasicBlock &MBB, 109 MachineBasicBlock::iterator MBBI, unsigned LdrexOp, 110 unsigned StrexOp, unsigned UxtOp, 111 MachineBasicBlock::iterator &NextMBBI); 112 113 bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB, 114 MachineBasicBlock::iterator MBBI, 115 MachineBasicBlock::iterator &NextMBBI); 116 }; 117 char ARMExpandPseudo::ID = 0; 118 } 119 120 INITIALIZE_PASS(ARMExpandPseudo, DEBUG_TYPE, ARM_EXPAND_PSEUDO_NAME, false, 121 false) 122 123 namespace { 124 // Constants for register spacing in NEON load/store instructions. 125 // For quad-register load-lane and store-lane pseudo instructors, the 126 // spacing is initially assumed to be EvenDblSpc, and that is changed to 127 // OddDblSpc depending on the lane number operand. 128 enum NEONRegSpacing { 129 SingleSpc, 130 SingleLowSpc , // Single spacing, low registers, three and four vectors. 131 SingleHighQSpc, // Single spacing, high registers, four vectors. 132 SingleHighTSpc, // Single spacing, high registers, three vectors. 133 EvenDblSpc, 134 OddDblSpc 135 }; 136 137 // Entries for NEON load/store information table. The table is sorted by 138 // PseudoOpc for fast binary-search lookups. 139 struct NEONLdStTableEntry { 140 uint16_t PseudoOpc; 141 uint16_t RealOpc; 142 bool IsLoad; 143 bool isUpdating; 144 bool hasWritebackOperand; 145 uint8_t RegSpacing; // One of type NEONRegSpacing 146 uint8_t NumRegs; // D registers loaded or stored 147 uint8_t RegElts; // elements per D register; used for lane ops 148 // FIXME: Temporary flag to denote whether the real instruction takes 149 // a single register (like the encoding) or all of the registers in 150 // the list (like the asm syntax and the isel DAG). When all definitions 151 // are converted to take only the single encoded register, this will 152 // go away. 153 bool copyAllListRegs; 154 155 // Comparison methods for binary search of the table. 156 bool operator<(const NEONLdStTableEntry &TE) const { 157 return PseudoOpc < TE.PseudoOpc; 158 } 159 friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) { 160 return TE.PseudoOpc < PseudoOpc; 161 } 162 friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc, 163 const NEONLdStTableEntry &TE) { 164 return PseudoOpc < TE.PseudoOpc; 165 } 166 }; 167 } 168 169 static const NEONLdStTableEntry NEONLdStTable[] = { 170 { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, 171 { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true}, 172 { ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true}, 173 { ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true}, 174 { ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true}, 175 { ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true}, 176 177 { ARM::VLD1d16QPseudo, ARM::VLD1d16Q, true, false, false, SingleSpc, 4, 4 ,false}, 178 { ARM::VLD1d16QPseudoWB_fixed, ARM::VLD1d16Qwb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, 179 { ARM::VLD1d16QPseudoWB_register, ARM::VLD1d16Qwb_register, true, true, true, SingleSpc, 4, 4 ,false}, 180 { ARM::VLD1d16TPseudo, ARM::VLD1d16T, true, false, false, SingleSpc, 3, 4 ,false}, 181 { ARM::VLD1d16TPseudoWB_fixed, ARM::VLD1d16Twb_fixed, true, true, false, SingleSpc, 3, 4 ,false}, 182 { ARM::VLD1d16TPseudoWB_register, ARM::VLD1d16Twb_register, true, true, true, SingleSpc, 3, 4 ,false}, 183 184 { ARM::VLD1d32QPseudo, ARM::VLD1d32Q, true, false, false, SingleSpc, 4, 2 ,false}, 185 { ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d32Qwb_fixed, true, true, false, SingleSpc, 4, 2 ,false}, 186 { ARM::VLD1d32QPseudoWB_register, ARM::VLD1d32Qwb_register, true, true, true, SingleSpc, 4, 2 ,false}, 187 { ARM::VLD1d32TPseudo, ARM::VLD1d32T, true, false, false, SingleSpc, 3, 2 ,false}, 188 { ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d32Twb_fixed, true, true, false, SingleSpc, 3, 2 ,false}, 189 { ARM::VLD1d32TPseudoWB_register, ARM::VLD1d32Twb_register, true, true, true, SingleSpc, 3, 2 ,false}, 190 191 { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, 192 { ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false}, 193 { ARM::VLD1d64QPseudoWB_register, ARM::VLD1d64Qwb_register, true, true, true, SingleSpc, 4, 1 ,false}, 194 { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, 195 { ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false}, 196 { ARM::VLD1d64TPseudoWB_register, ARM::VLD1d64Twb_register, true, true, true, SingleSpc, 3, 1 ,false}, 197 198 { ARM::VLD1d8QPseudo, ARM::VLD1d8Q, true, false, false, SingleSpc, 4, 8 ,false}, 199 { ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d8Qwb_fixed, true, true, false, SingleSpc, 4, 8 ,false}, 200 { ARM::VLD1d8QPseudoWB_register, ARM::VLD1d8Qwb_register, true, true, true, SingleSpc, 4, 8 ,false}, 201 { ARM::VLD1d8TPseudo, ARM::VLD1d8T, true, false, false, SingleSpc, 3, 8 ,false}, 202 { ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d8Twb_fixed, true, true, false, SingleSpc, 3, 8 ,false}, 203 { ARM::VLD1d8TPseudoWB_register, ARM::VLD1d8Twb_register, true, true, true, SingleSpc, 3, 8 ,false}, 204 205 { ARM::VLD1q16HighQPseudo, ARM::VLD1d16Q, true, false, false, SingleHighQSpc, 4, 4 ,false}, 206 { ARM::VLD1q16HighQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleHighQSpc, 4, 4 ,false}, 207 { ARM::VLD1q16HighTPseudo, ARM::VLD1d16T, true, false, false, SingleHighTSpc, 3, 4 ,false}, 208 { ARM::VLD1q16HighTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleHighTSpc, 3, 4 ,false}, 209 { ARM::VLD1q16LowQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleLowSpc, 4, 4 ,false}, 210 { ARM::VLD1q16LowTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleLowSpc, 3, 4 ,false}, 211 212 { ARM::VLD1q32HighQPseudo, ARM::VLD1d32Q, true, false, false, SingleHighQSpc, 4, 2 ,false}, 213 { ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleHighQSpc, 4, 2 ,false}, 214 { ARM::VLD1q32HighTPseudo, ARM::VLD1d32T, true, false, false, SingleHighTSpc, 3, 2 ,false}, 215 { ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleHighTSpc, 3, 2 ,false}, 216 { ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleLowSpc, 4, 2 ,false}, 217 { ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleLowSpc, 3, 2 ,false}, 218 219 { ARM::VLD1q64HighQPseudo, ARM::VLD1d64Q, true, false, false, SingleHighQSpc, 4, 1 ,false}, 220 { ARM::VLD1q64HighQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleHighQSpc, 4, 1 ,false}, 221 { ARM::VLD1q64HighTPseudo, ARM::VLD1d64T, true, false, false, SingleHighTSpc, 3, 1 ,false}, 222 { ARM::VLD1q64HighTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleHighTSpc, 3, 1 ,false}, 223 { ARM::VLD1q64LowQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleLowSpc, 4, 1 ,false}, 224 { ARM::VLD1q64LowTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleLowSpc, 3, 1 ,false}, 225 226 { ARM::VLD1q8HighQPseudo, ARM::VLD1d8Q, true, false, false, SingleHighQSpc, 4, 8 ,false}, 227 { ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleHighQSpc, 4, 8 ,false}, 228 { ARM::VLD1q8HighTPseudo, ARM::VLD1d8T, true, false, false, SingleHighTSpc, 3, 8 ,false}, 229 { ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleHighTSpc, 3, 8 ,false}, 230 { ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleLowSpc, 4, 8 ,false}, 231 { ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleLowSpc, 3, 8 ,false}, 232 233 { ARM::VLD2DUPq16EvenPseudo, ARM::VLD2DUPd16x2, true, false, false, EvenDblSpc, 2, 4 ,false}, 234 { ARM::VLD2DUPq16OddPseudo, ARM::VLD2DUPd16x2, true, false, false, OddDblSpc, 2, 4 ,false}, 235 { ARM::VLD2DUPq16OddPseudoWB_fixed, ARM::VLD2DUPd16x2wb_fixed, true, true, false, OddDblSpc, 2, 4 ,false}, 236 { ARM::VLD2DUPq16OddPseudoWB_register, ARM::VLD2DUPd16x2wb_register, true, true, true, OddDblSpc, 2, 4 ,false}, 237 { ARM::VLD2DUPq32EvenPseudo, ARM::VLD2DUPd32x2, true, false, false, EvenDblSpc, 2, 2 ,false}, 238 { ARM::VLD2DUPq32OddPseudo, ARM::VLD2DUPd32x2, true, false, false, OddDblSpc, 2, 2 ,false}, 239 { ARM::VLD2DUPq32OddPseudoWB_fixed, ARM::VLD2DUPd32x2wb_fixed, true, true, false, OddDblSpc, 2, 2 ,false}, 240 { ARM::VLD2DUPq32OddPseudoWB_register, ARM::VLD2DUPd32x2wb_register, true, true, true, OddDblSpc, 2, 2 ,false}, 241 { ARM::VLD2DUPq8EvenPseudo, ARM::VLD2DUPd8x2, true, false, false, EvenDblSpc, 2, 8 ,false}, 242 { ARM::VLD2DUPq8OddPseudo, ARM::VLD2DUPd8x2, true, false, false, OddDblSpc, 2, 8 ,false}, 243 { ARM::VLD2DUPq8OddPseudoWB_fixed, ARM::VLD2DUPd8x2wb_fixed, true, true, false, OddDblSpc, 2, 8 ,false}, 244 { ARM::VLD2DUPq8OddPseudoWB_register, ARM::VLD2DUPd8x2wb_register, true, true, true, OddDblSpc, 2, 8 ,false}, 245 246 { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, 247 { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, 248 { ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true}, 249 { ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true}, 250 { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true}, 251 { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true}, 252 { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true}, 253 { ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true}, 254 { ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true}, 255 { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true}, 256 257 { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false}, 258 { ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, 259 { ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false}, 260 { ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false}, 261 { ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false}, 262 { ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false}, 263 { ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false}, 264 { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false}, 265 { ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false}, 266 267 { ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true}, 268 { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true}, 269 { ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true}, 270 { ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true}, 271 { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true}, 272 { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true}, 273 { ARM::VLD3DUPq16EvenPseudo, ARM::VLD3DUPq16, true, false, false, EvenDblSpc, 3, 4 ,true}, 274 { ARM::VLD3DUPq16OddPseudo, ARM::VLD3DUPq16, true, false, false, OddDblSpc, 3, 4 ,true}, 275 { ARM::VLD3DUPq16OddPseudo_UPD, ARM::VLD3DUPq16_UPD, true, true, true, OddDblSpc, 3, 4 ,true}, 276 { ARM::VLD3DUPq32EvenPseudo, ARM::VLD3DUPq32, true, false, false, EvenDblSpc, 3, 2 ,true}, 277 { ARM::VLD3DUPq32OddPseudo, ARM::VLD3DUPq32, true, false, false, OddDblSpc, 3, 2 ,true}, 278 { ARM::VLD3DUPq32OddPseudo_UPD, ARM::VLD3DUPq32_UPD, true, true, true, OddDblSpc, 3, 2 ,true}, 279 { ARM::VLD3DUPq8EvenPseudo, ARM::VLD3DUPq8, true, false, false, EvenDblSpc, 3, 8 ,true}, 280 { ARM::VLD3DUPq8OddPseudo, ARM::VLD3DUPq8, true, false, false, OddDblSpc, 3, 8 ,true}, 281 { ARM::VLD3DUPq8OddPseudo_UPD, ARM::VLD3DUPq8_UPD, true, true, true, OddDblSpc, 3, 8 ,true}, 282 283 { ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true}, 284 { ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, 285 { ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true}, 286 { ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, 287 { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true}, 288 { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, 289 { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true}, 290 { ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, 291 { ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true}, 292 { ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, 293 294 { ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true}, 295 { ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, 296 { ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true}, 297 { ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, 298 { ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true}, 299 { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, 300 301 { ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, 302 { ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true}, 303 { ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true}, 304 { ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, 305 { ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true}, 306 { ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true}, 307 { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true}, 308 { ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true}, 309 { ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true}, 310 311 { ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true}, 312 { ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true}, 313 { ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true}, 314 { ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true}, 315 { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true}, 316 { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true}, 317 { ARM::VLD4DUPq16EvenPseudo, ARM::VLD4DUPq16, true, false, false, EvenDblSpc, 4, 4 ,true}, 318 { ARM::VLD4DUPq16OddPseudo, ARM::VLD4DUPq16, true, false, false, OddDblSpc, 4, 4 ,true}, 319 { ARM::VLD4DUPq16OddPseudo_UPD, ARM::VLD4DUPq16_UPD, true, true, true, OddDblSpc, 4, 4 ,true}, 320 { ARM::VLD4DUPq32EvenPseudo, ARM::VLD4DUPq32, true, false, false, EvenDblSpc, 4, 2 ,true}, 321 { ARM::VLD4DUPq32OddPseudo, ARM::VLD4DUPq32, true, false, false, OddDblSpc, 4, 2 ,true}, 322 { ARM::VLD4DUPq32OddPseudo_UPD, ARM::VLD4DUPq32_UPD, true, true, true, OddDblSpc, 4, 2 ,true}, 323 { ARM::VLD4DUPq8EvenPseudo, ARM::VLD4DUPq8, true, false, false, EvenDblSpc, 4, 8 ,true}, 324 { ARM::VLD4DUPq8OddPseudo, ARM::VLD4DUPq8, true, false, false, OddDblSpc, 4, 8 ,true}, 325 { ARM::VLD4DUPq8OddPseudo_UPD, ARM::VLD4DUPq8_UPD, true, true, true, OddDblSpc, 4, 8 ,true}, 326 327 { ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true}, 328 { ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, 329 { ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true}, 330 { ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, 331 { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true}, 332 { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, 333 { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true}, 334 { ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, 335 { ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true}, 336 { ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, 337 338 { ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true}, 339 { ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, 340 { ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true}, 341 { ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, 342 { ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true}, 343 { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, 344 345 { ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, 346 { ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true}, 347 { ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true}, 348 { ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, 349 { ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true}, 350 { ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true}, 351 { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true}, 352 { ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true}, 353 { ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true}, 354 355 { ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true}, 356 { ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true}, 357 { ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true}, 358 { ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true}, 359 { ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true}, 360 { ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true}, 361 362 { ARM::VST1d16QPseudo, ARM::VST1d16Q, false, false, false, SingleSpc, 4, 4 ,false}, 363 { ARM::VST1d16QPseudoWB_fixed, ARM::VST1d16Qwb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, 364 { ARM::VST1d16QPseudoWB_register, ARM::VST1d16Qwb_register, false, true, true, SingleSpc, 4, 4 ,false}, 365 { ARM::VST1d16TPseudo, ARM::VST1d16T, false, false, false, SingleSpc, 3, 4 ,false}, 366 { ARM::VST1d16TPseudoWB_fixed, ARM::VST1d16Twb_fixed, false, true, false, SingleSpc, 3, 4 ,false}, 367 { ARM::VST1d16TPseudoWB_register, ARM::VST1d16Twb_register, false, true, true, SingleSpc, 3, 4 ,false}, 368 369 { ARM::VST1d32QPseudo, ARM::VST1d32Q, false, false, false, SingleSpc, 4, 2 ,false}, 370 { ARM::VST1d32QPseudoWB_fixed, ARM::VST1d32Qwb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, 371 { ARM::VST1d32QPseudoWB_register, ARM::VST1d32Qwb_register, false, true, true, SingleSpc, 4, 2 ,false}, 372 { ARM::VST1d32TPseudo, ARM::VST1d32T, false, false, false, SingleSpc, 3, 2 ,false}, 373 { ARM::VST1d32TPseudoWB_fixed, ARM::VST1d32Twb_fixed, false, true, false, SingleSpc, 3, 2 ,false}, 374 { ARM::VST1d32TPseudoWB_register, ARM::VST1d32Twb_register, false, true, true, SingleSpc, 3, 2 ,false}, 375 376 { ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false}, 377 { ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false}, 378 { ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false}, 379 { ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false}, 380 { ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false}, 381 { ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false}, 382 383 { ARM::VST1d8QPseudo, ARM::VST1d8Q, false, false, false, SingleSpc, 4, 8 ,false}, 384 { ARM::VST1d8QPseudoWB_fixed, ARM::VST1d8Qwb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, 385 { ARM::VST1d8QPseudoWB_register, ARM::VST1d8Qwb_register, false, true, true, SingleSpc, 4, 8 ,false}, 386 { ARM::VST1d8TPseudo, ARM::VST1d8T, false, false, false, SingleSpc, 3, 8 ,false}, 387 { ARM::VST1d8TPseudoWB_fixed, ARM::VST1d8Twb_fixed, false, true, false, SingleSpc, 3, 8 ,false}, 388 { ARM::VST1d8TPseudoWB_register, ARM::VST1d8Twb_register, false, true, true, SingleSpc, 3, 8 ,false}, 389 390 { ARM::VST1q16HighQPseudo, ARM::VST1d16Q, false, false, false, SingleHighQSpc, 4, 4 ,false}, 391 { ARM::VST1q16HighQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 392 { ARM::VST1q16HighTPseudo, ARM::VST1d16T, false, false, false, SingleHighTSpc, 3, 4 ,false}, 393 { ARM::VST1q16HighTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleHighTSpc, 3, 4 ,false}, 394 { ARM::VST1q16LowQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleLowSpc, 4, 4 ,false}, 395 { ARM::VST1q16LowTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleLowSpc, 3, 4 ,false}, 396 397 { ARM::VST1q32HighQPseudo, ARM::VST1d32Q, false, false, false, SingleHighQSpc, 4, 2 ,false}, 398 { ARM::VST1q32HighQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 399 { ARM::VST1q32HighTPseudo, ARM::VST1d32T, false, false, false, SingleHighTSpc, 3, 2 ,false}, 400 { ARM::VST1q32HighTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleHighTSpc, 3, 2 ,false}, 401 { ARM::VST1q32LowQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleLowSpc, 4, 2 ,false}, 402 { ARM::VST1q32LowTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleLowSpc, 3, 2 ,false}, 403 404 { ARM::VST1q64HighQPseudo, ARM::VST1d64Q, false, false, false, SingleHighQSpc, 4, 1 ,false}, 405 { ARM::VST1q64HighQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 406 { ARM::VST1q64HighTPseudo, ARM::VST1d64T, false, false, false, SingleHighTSpc, 3, 1 ,false}, 407 { ARM::VST1q64HighTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleHighTSpc, 3, 1 ,false}, 408 { ARM::VST1q64LowQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleLowSpc, 4, 1 ,false}, 409 { ARM::VST1q64LowTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleLowSpc, 3, 1 ,false}, 410 411 { ARM::VST1q8HighQPseudo, ARM::VST1d8Q, false, false, false, SingleHighQSpc, 4, 8 ,false}, 412 { ARM::VST1q8HighQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 413 { ARM::VST1q8HighTPseudo, ARM::VST1d8T, false, false, false, SingleHighTSpc, 3, 8 ,false}, 414 { ARM::VST1q8HighTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleHighTSpc, 3, 8 ,false}, 415 { ARM::VST1q8LowQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleLowSpc, 4, 8 ,false}, 416 { ARM::VST1q8LowTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleLowSpc, 3, 8 ,false}, 417 418 { ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true}, 419 { ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, 420 { ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true}, 421 { ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true}, 422 { ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true}, 423 { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true}, 424 { ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true}, 425 { ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true}, 426 { ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true}, 427 { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, 428 429 { ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false}, 430 { ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, 431 { ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false}, 432 { ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false}, 433 { ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, 434 { ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false}, 435 { ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false}, 436 { ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, 437 { ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false}, 438 439 { ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true}, 440 { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, 441 { ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true}, 442 { ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, 443 { ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true}, 444 { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, 445 { ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true}, 446 { ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true}, 447 { ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true}, 448 { ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true}, 449 450 { ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true}, 451 { ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, 452 { ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true}, 453 { ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, 454 { ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true}, 455 { ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, 456 457 { ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true}, 458 { ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true}, 459 { ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true}, 460 { ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true}, 461 { ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true}, 462 { ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true}, 463 { ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true}, 464 { ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true}, 465 { ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true}, 466 467 { ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true}, 468 { ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, 469 { ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true}, 470 { ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, 471 { ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true}, 472 { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, 473 { ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true}, 474 { ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true}, 475 { ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true}, 476 { ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true}, 477 478 { ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true}, 479 { ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, 480 { ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true}, 481 { ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, 482 { ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true}, 483 { ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, 484 485 { ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true}, 486 { ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true}, 487 { ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true}, 488 { ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true}, 489 { ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true}, 490 { ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true}, 491 { ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true}, 492 { ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true}, 493 { ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true} 494 }; 495 496 /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON 497 /// load or store pseudo instruction. 498 static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { 499 #ifndef NDEBUG 500 // Make sure the table is sorted. 501 static std::atomic<bool> TableChecked(false); 502 if (!TableChecked.load(std::memory_order_relaxed)) { 503 assert(llvm::is_sorted(NEONLdStTable) && "NEONLdStTable is not sorted!"); 504 TableChecked.store(true, std::memory_order_relaxed); 505 } 506 #endif 507 508 auto I = llvm::lower_bound(NEONLdStTable, Opcode); 509 if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode) 510 return I; 511 return nullptr; 512 } 513 514 /// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register, 515 /// corresponding to the specified register spacing. Not all of the results 516 /// are necessarily valid, e.g., a Q register only has 2 D subregisters. 517 static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc, 518 const TargetRegisterInfo *TRI, MCRegister &D0, 519 MCRegister &D1, MCRegister &D2, MCRegister &D3) { 520 if (RegSpc == SingleSpc || RegSpc == SingleLowSpc) { 521 D0 = TRI->getSubReg(Reg, ARM::dsub_0); 522 D1 = TRI->getSubReg(Reg, ARM::dsub_1); 523 D2 = TRI->getSubReg(Reg, ARM::dsub_2); 524 D3 = TRI->getSubReg(Reg, ARM::dsub_3); 525 } else if (RegSpc == SingleHighQSpc) { 526 D0 = TRI->getSubReg(Reg, ARM::dsub_4); 527 D1 = TRI->getSubReg(Reg, ARM::dsub_5); 528 D2 = TRI->getSubReg(Reg, ARM::dsub_6); 529 D3 = TRI->getSubReg(Reg, ARM::dsub_7); 530 } else if (RegSpc == SingleHighTSpc) { 531 D0 = TRI->getSubReg(Reg, ARM::dsub_3); 532 D1 = TRI->getSubReg(Reg, ARM::dsub_4); 533 D2 = TRI->getSubReg(Reg, ARM::dsub_5); 534 D3 = TRI->getSubReg(Reg, ARM::dsub_6); 535 } else if (RegSpc == EvenDblSpc) { 536 D0 = TRI->getSubReg(Reg, ARM::dsub_0); 537 D1 = TRI->getSubReg(Reg, ARM::dsub_2); 538 D2 = TRI->getSubReg(Reg, ARM::dsub_4); 539 D3 = TRI->getSubReg(Reg, ARM::dsub_6); 540 } else { 541 assert(RegSpc == OddDblSpc && "unknown register spacing"); 542 D0 = TRI->getSubReg(Reg, ARM::dsub_1); 543 D1 = TRI->getSubReg(Reg, ARM::dsub_3); 544 D2 = TRI->getSubReg(Reg, ARM::dsub_5); 545 D3 = TRI->getSubReg(Reg, ARM::dsub_7); 546 } 547 } 548 549 /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register 550 /// operands to real VLD instructions with D register operands. 551 void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { 552 MachineInstr &MI = *MBBI; 553 MachineBasicBlock &MBB = *MI.getParent(); 554 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 555 556 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); 557 assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed"); 558 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; 559 unsigned NumRegs = TableEntry->NumRegs; 560 561 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 562 TII->get(TableEntry->RealOpc)); 563 unsigned OpIdx = 0; 564 565 bool DstIsDead = MI.getOperand(OpIdx).isDead(); 566 Register DstReg = MI.getOperand(OpIdx++).getReg(); 567 568 bool IsVLD2DUP = TableEntry->RealOpc == ARM::VLD2DUPd8x2 || 569 TableEntry->RealOpc == ARM::VLD2DUPd16x2 || 570 TableEntry->RealOpc == ARM::VLD2DUPd32x2 || 571 TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_fixed || 572 TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_fixed || 573 TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_fixed || 574 TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_register || 575 TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_register || 576 TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_register; 577 578 if (IsVLD2DUP) { 579 unsigned SubRegIndex; 580 if (RegSpc == EvenDblSpc) { 581 SubRegIndex = ARM::dsub_0; 582 } else { 583 assert(RegSpc == OddDblSpc && "Unexpected spacing!"); 584 SubRegIndex = ARM::dsub_1; 585 } 586 Register SubReg = TRI->getSubReg(DstReg, SubRegIndex); 587 MCRegister DstRegPair = 588 TRI->getMatchingSuperReg(SubReg, ARM::dsub_0, &ARM::DPairSpcRegClass); 589 MIB.addReg(DstRegPair, RegState::Define | getDeadRegState(DstIsDead)); 590 } else { 591 MCRegister D0, D1, D2, D3; 592 GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); 593 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); 594 if (NumRegs > 1 && TableEntry->copyAllListRegs) 595 MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); 596 if (NumRegs > 2 && TableEntry->copyAllListRegs) 597 MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); 598 if (NumRegs > 3 && TableEntry->copyAllListRegs) 599 MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); 600 } 601 602 if (TableEntry->isUpdating) 603 MIB.add(MI.getOperand(OpIdx++)); 604 605 // Copy the addrmode6 operands. 606 MIB.add(MI.getOperand(OpIdx++)); 607 MIB.add(MI.getOperand(OpIdx++)); 608 609 // Copy the am6offset operand. 610 if (TableEntry->hasWritebackOperand) { 611 // TODO: The writing-back pseudo instructions we translate here are all 612 // defined to take am6offset nodes that are capable to represent both fixed 613 // and register forms. Some real instructions, however, do not rely on 614 // am6offset and have separate definitions for such forms. When this is the 615 // case, fixed forms do not take any offset nodes, so here we skip them for 616 // such instructions. Once all real and pseudo writing-back instructions are 617 // rewritten without use of am6offset nodes, this code will go away. 618 const MachineOperand &AM6Offset = MI.getOperand(OpIdx++); 619 if (TableEntry->RealOpc == ARM::VLD1d8Qwb_fixed || 620 TableEntry->RealOpc == ARM::VLD1d16Qwb_fixed || 621 TableEntry->RealOpc == ARM::VLD1d32Qwb_fixed || 622 TableEntry->RealOpc == ARM::VLD1d64Qwb_fixed || 623 TableEntry->RealOpc == ARM::VLD1d8Twb_fixed || 624 TableEntry->RealOpc == ARM::VLD1d16Twb_fixed || 625 TableEntry->RealOpc == ARM::VLD1d32Twb_fixed || 626 TableEntry->RealOpc == ARM::VLD1d64Twb_fixed || 627 TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_fixed || 628 TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_fixed || 629 TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_fixed) { 630 assert(AM6Offset.getReg() == 0 && 631 "A fixed writing-back pseudo instruction provides an offset " 632 "register!"); 633 } else { 634 MIB.add(AM6Offset); 635 } 636 } 637 638 // For an instruction writing double-spaced subregs, the pseudo instruction 639 // has an extra operand that is a use of the super-register. Record the 640 // operand index and skip over it. 641 unsigned SrcOpIdx = 0; 642 if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc || RegSpc == SingleLowSpc || 643 RegSpc == SingleHighQSpc || RegSpc == SingleHighTSpc) 644 SrcOpIdx = OpIdx++; 645 646 // Copy the predicate operands. 647 MIB.add(MI.getOperand(OpIdx++)); 648 MIB.add(MI.getOperand(OpIdx++)); 649 650 // Copy the super-register source operand used for double-spaced subregs over 651 // to the new instruction as an implicit operand. 652 if (SrcOpIdx != 0) { 653 MachineOperand MO = MI.getOperand(SrcOpIdx); 654 MO.setImplicit(true); 655 MIB.add(MO); 656 } 657 // Add an implicit def for the super-register. 658 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); 659 MIB.copyImplicitOps(MI); 660 661 // Transfer memoperands. 662 MIB.cloneMemRefs(MI); 663 MI.eraseFromParent(); 664 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump();); 665 } 666 667 /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register 668 /// operands to real VST instructions with D register operands. 669 void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { 670 MachineInstr &MI = *MBBI; 671 MachineBasicBlock &MBB = *MI.getParent(); 672 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 673 674 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); 675 assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed"); 676 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; 677 unsigned NumRegs = TableEntry->NumRegs; 678 679 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 680 TII->get(TableEntry->RealOpc)); 681 unsigned OpIdx = 0; 682 if (TableEntry->isUpdating) 683 MIB.add(MI.getOperand(OpIdx++)); 684 685 // Copy the addrmode6 operands. 686 MIB.add(MI.getOperand(OpIdx++)); 687 MIB.add(MI.getOperand(OpIdx++)); 688 689 if (TableEntry->hasWritebackOperand) { 690 // TODO: The writing-back pseudo instructions we translate here are all 691 // defined to take am6offset nodes that are capable to represent both fixed 692 // and register forms. Some real instructions, however, do not rely on 693 // am6offset and have separate definitions for such forms. When this is the 694 // case, fixed forms do not take any offset nodes, so here we skip them for 695 // such instructions. Once all real and pseudo writing-back instructions are 696 // rewritten without use of am6offset nodes, this code will go away. 697 const MachineOperand &AM6Offset = MI.getOperand(OpIdx++); 698 if (TableEntry->RealOpc == ARM::VST1d8Qwb_fixed || 699 TableEntry->RealOpc == ARM::VST1d16Qwb_fixed || 700 TableEntry->RealOpc == ARM::VST1d32Qwb_fixed || 701 TableEntry->RealOpc == ARM::VST1d64Qwb_fixed || 702 TableEntry->RealOpc == ARM::VST1d8Twb_fixed || 703 TableEntry->RealOpc == ARM::VST1d16Twb_fixed || 704 TableEntry->RealOpc == ARM::VST1d32Twb_fixed || 705 TableEntry->RealOpc == ARM::VST1d64Twb_fixed) { 706 assert(AM6Offset.getReg() == 0 && 707 "A fixed writing-back pseudo instruction provides an offset " 708 "register!"); 709 } else { 710 MIB.add(AM6Offset); 711 } 712 } 713 714 bool SrcIsKill = MI.getOperand(OpIdx).isKill(); 715 bool SrcIsUndef = MI.getOperand(OpIdx).isUndef(); 716 Register SrcReg = MI.getOperand(OpIdx++).getReg(); 717 MCRegister D0, D1, D2, D3; 718 GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); 719 MIB.addReg(D0, getUndefRegState(SrcIsUndef)); 720 if (NumRegs > 1 && TableEntry->copyAllListRegs) 721 MIB.addReg(D1, getUndefRegState(SrcIsUndef)); 722 if (NumRegs > 2 && TableEntry->copyAllListRegs) 723 MIB.addReg(D2, getUndefRegState(SrcIsUndef)); 724 if (NumRegs > 3 && TableEntry->copyAllListRegs) 725 MIB.addReg(D3, getUndefRegState(SrcIsUndef)); 726 727 // Copy the predicate operands. 728 MIB.add(MI.getOperand(OpIdx++)); 729 MIB.add(MI.getOperand(OpIdx++)); 730 731 if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg. 732 MIB->addRegisterKilled(SrcReg, TRI, true); 733 else if (!SrcIsUndef) 734 MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg. 735 MIB.copyImplicitOps(MI); 736 737 // Transfer memoperands. 738 MIB.cloneMemRefs(MI); 739 MI.eraseFromParent(); 740 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump();); 741 } 742 743 /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ 744 /// register operands to real instructions with D register operands. 745 void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { 746 MachineInstr &MI = *MBBI; 747 MachineBasicBlock &MBB = *MI.getParent(); 748 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 749 750 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); 751 assert(TableEntry && "NEONLdStTable lookup failed"); 752 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; 753 unsigned NumRegs = TableEntry->NumRegs; 754 unsigned RegElts = TableEntry->RegElts; 755 756 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 757 TII->get(TableEntry->RealOpc)); 758 unsigned OpIdx = 0; 759 // The lane operand is always the 3rd from last operand, before the 2 760 // predicate operands. 761 unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm(); 762 763 // Adjust the lane and spacing as needed for Q registers. 764 assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane"); 765 if (RegSpc == EvenDblSpc && Lane >= RegElts) { 766 RegSpc = OddDblSpc; 767 Lane -= RegElts; 768 } 769 assert(Lane < RegElts && "out of range lane for VLD/VST-lane"); 770 771 MCRegister D0, D1, D2, D3; 772 unsigned DstReg = 0; 773 bool DstIsDead = false; 774 if (TableEntry->IsLoad) { 775 DstIsDead = MI.getOperand(OpIdx).isDead(); 776 DstReg = MI.getOperand(OpIdx++).getReg(); 777 GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); 778 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); 779 if (NumRegs > 1) 780 MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); 781 if (NumRegs > 2) 782 MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); 783 if (NumRegs > 3) 784 MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); 785 } 786 787 if (TableEntry->isUpdating) 788 MIB.add(MI.getOperand(OpIdx++)); 789 790 // Copy the addrmode6 operands. 791 MIB.add(MI.getOperand(OpIdx++)); 792 MIB.add(MI.getOperand(OpIdx++)); 793 // Copy the am6offset operand. 794 if (TableEntry->hasWritebackOperand) 795 MIB.add(MI.getOperand(OpIdx++)); 796 797 // Grab the super-register source. 798 MachineOperand MO = MI.getOperand(OpIdx++); 799 if (!TableEntry->IsLoad) 800 GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3); 801 802 // Add the subregs as sources of the new instruction. 803 unsigned SrcFlags = (getUndefRegState(MO.isUndef()) | 804 getKillRegState(MO.isKill())); 805 MIB.addReg(D0, SrcFlags); 806 if (NumRegs > 1) 807 MIB.addReg(D1, SrcFlags); 808 if (NumRegs > 2) 809 MIB.addReg(D2, SrcFlags); 810 if (NumRegs > 3) 811 MIB.addReg(D3, SrcFlags); 812 813 // Add the lane number operand. 814 MIB.addImm(Lane); 815 OpIdx += 1; 816 817 // Copy the predicate operands. 818 MIB.add(MI.getOperand(OpIdx++)); 819 MIB.add(MI.getOperand(OpIdx++)); 820 821 // Copy the super-register source to be an implicit source. 822 MO.setImplicit(true); 823 MIB.add(MO); 824 if (TableEntry->IsLoad) 825 // Add an implicit def for the super-register. 826 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); 827 MIB.copyImplicitOps(MI); 828 // Transfer memoperands. 829 MIB.cloneMemRefs(MI); 830 MI.eraseFromParent(); 831 } 832 833 /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ 834 /// register operands to real instructions with D register operands. 835 void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, 836 unsigned Opc, bool IsExt) { 837 MachineInstr &MI = *MBBI; 838 MachineBasicBlock &MBB = *MI.getParent(); 839 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 840 841 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); 842 unsigned OpIdx = 0; 843 844 // Transfer the destination register operand. 845 MIB.add(MI.getOperand(OpIdx++)); 846 if (IsExt) { 847 MachineOperand VdSrc(MI.getOperand(OpIdx++)); 848 MIB.add(VdSrc); 849 } 850 851 bool SrcIsKill = MI.getOperand(OpIdx).isKill(); 852 Register SrcReg = MI.getOperand(OpIdx++).getReg(); 853 MCRegister D0, D1, D2, D3; 854 GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3); 855 MIB.addReg(D0); 856 857 // Copy the other source register operand. 858 MachineOperand VmSrc(MI.getOperand(OpIdx++)); 859 MIB.add(VmSrc); 860 861 // Copy the predicate operands. 862 MIB.add(MI.getOperand(OpIdx++)); 863 MIB.add(MI.getOperand(OpIdx++)); 864 865 // Add an implicit kill and use for the super-reg. 866 MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill)); 867 MIB.copyImplicitOps(MI); 868 MI.eraseFromParent(); 869 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump();); 870 } 871 872 void ARMExpandPseudo::ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI) { 873 MachineInstr &MI = *MBBI; 874 MachineBasicBlock &MBB = *MI.getParent(); 875 unsigned NewOpc = 876 MI.getOpcode() == ARM::MQQPRStore || MI.getOpcode() == ARM::MQQQQPRStore 877 ? ARM::VSTMDIA 878 : ARM::VLDMDIA; 879 MachineInstrBuilder MIB = 880 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); 881 882 unsigned Flags = getKillRegState(MI.getOperand(0).isKill()) | 883 getDefRegState(MI.getOperand(0).isDef()); 884 Register SrcReg = MI.getOperand(0).getReg(); 885 886 // Copy the destination register. 887 MIB.add(MI.getOperand(1)); 888 MIB.add(predOps(ARMCC::AL)); 889 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_0), Flags); 890 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_1), Flags); 891 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_2), Flags); 892 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_3), Flags); 893 if (MI.getOpcode() == ARM::MQQQQPRStore || 894 MI.getOpcode() == ARM::MQQQQPRLoad) { 895 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_4), Flags); 896 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_5), Flags); 897 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_6), Flags); 898 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_7), Flags); 899 } 900 901 if (NewOpc == ARM::VSTMDIA) 902 MIB.addReg(SrcReg, RegState::Implicit); 903 904 MIB.copyImplicitOps(MI); 905 MIB.cloneMemRefs(MI); 906 MI.eraseFromParent(); 907 } 908 909 static bool IsAnAddressOperand(const MachineOperand &MO) { 910 // This check is overly conservative. Unless we are certain that the machine 911 // operand is not a symbol reference, we return that it is a symbol reference. 912 // This is important as the load pair may not be split up Windows. 913 switch (MO.getType()) { 914 case MachineOperand::MO_Register: 915 case MachineOperand::MO_Immediate: 916 case MachineOperand::MO_CImmediate: 917 case MachineOperand::MO_FPImmediate: 918 case MachineOperand::MO_ShuffleMask: 919 return false; 920 case MachineOperand::MO_MachineBasicBlock: 921 return true; 922 case MachineOperand::MO_FrameIndex: 923 return false; 924 case MachineOperand::MO_ConstantPoolIndex: 925 case MachineOperand::MO_TargetIndex: 926 case MachineOperand::MO_JumpTableIndex: 927 case MachineOperand::MO_ExternalSymbol: 928 case MachineOperand::MO_GlobalAddress: 929 case MachineOperand::MO_BlockAddress: 930 return true; 931 case MachineOperand::MO_RegisterMask: 932 case MachineOperand::MO_RegisterLiveOut: 933 return false; 934 case MachineOperand::MO_Metadata: 935 case MachineOperand::MO_MCSymbol: 936 return true; 937 case MachineOperand::MO_DbgInstrRef: 938 case MachineOperand::MO_CFIIndex: 939 return false; 940 case MachineOperand::MO_IntrinsicID: 941 case MachineOperand::MO_Predicate: 942 llvm_unreachable("should not exist post-isel"); 943 } 944 llvm_unreachable("unhandled machine operand type"); 945 } 946 947 static MachineOperand makeImplicit(const MachineOperand &MO) { 948 MachineOperand NewMO = MO; 949 NewMO.setImplicit(); 950 return NewMO; 951 } 952 953 static MachineOperand getMovOperand(const MachineOperand &MO, 954 unsigned TargetFlag) { 955 unsigned TF = MO.getTargetFlags() | TargetFlag; 956 switch (MO.getType()) { 957 case MachineOperand::MO_Immediate: { 958 unsigned Imm = MO.getImm(); 959 switch (TargetFlag) { 960 case ARMII::MO_HI_8_15: 961 Imm = (Imm >> 24) & 0xff; 962 break; 963 case ARMII::MO_HI_0_7: 964 Imm = (Imm >> 16) & 0xff; 965 break; 966 case ARMII::MO_LO_8_15: 967 Imm = (Imm >> 8) & 0xff; 968 break; 969 case ARMII::MO_LO_0_7: 970 Imm = Imm & 0xff; 971 break; 972 case ARMII::MO_HI16: 973 Imm = (Imm >> 16) & 0xffff; 974 break; 975 case ARMII::MO_LO16: 976 Imm = Imm & 0xffff; 977 break; 978 default: 979 llvm_unreachable("Only HI/LO target flags are expected"); 980 } 981 return MachineOperand::CreateImm(Imm); 982 } 983 case MachineOperand::MO_ExternalSymbol: 984 return MachineOperand::CreateES(MO.getSymbolName(), TF); 985 case MachineOperand::MO_JumpTableIndex: 986 return MachineOperand::CreateJTI(MO.getIndex(), TF); 987 default: 988 return MachineOperand::CreateGA(MO.getGlobal(), MO.getOffset(), TF); 989 } 990 } 991 992 void ARMExpandPseudo::ExpandTMOV32BitImm(MachineBasicBlock &MBB, 993 MachineBasicBlock::iterator &MBBI) { 994 MachineInstr &MI = *MBBI; 995 Register DstReg = MI.getOperand(0).getReg(); 996 bool DstIsDead = MI.getOperand(0).isDead(); 997 const MachineOperand &MO = MI.getOperand(1); 998 unsigned MIFlags = MI.getFlags(); 999 1000 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 1001 1002 // Expand the mov into a sequence of mov/add+lsl of the individual bytes. We 1003 // want to avoid emitting any zero bytes, as they won't change the result, and 1004 // also don't want any pointless shifts, so instead of immediately emitting 1005 // the shift for a byte we keep track of how much we will need to shift and do 1006 // it before the next nonzero byte. 1007 unsigned PendingShift = 0; 1008 for (unsigned Byte = 0; Byte < 4; ++Byte) { 1009 unsigned Flag = Byte == 0 ? ARMII::MO_HI_8_15 1010 : Byte == 1 ? ARMII::MO_HI_0_7 1011 : Byte == 2 ? ARMII::MO_LO_8_15 1012 : ARMII::MO_LO_0_7; 1013 MachineOperand Operand = getMovOperand(MO, Flag); 1014 bool ZeroImm = Operand.isImm() && Operand.getImm() == 0; 1015 unsigned Op = PendingShift ? ARM::tADDi8 : ARM::tMOVi8; 1016 1017 // Emit the pending shift if we're going to emit this byte or if we've 1018 // reached the end. 1019 if (PendingShift && (!ZeroImm || Byte == 3)) { 1020 MachineInstr *Lsl = 1021 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tLSLri), DstReg) 1022 .add(t1CondCodeOp(true)) 1023 .addReg(DstReg) 1024 .addImm(PendingShift) 1025 .add(predOps(ARMCC::AL)) 1026 .setMIFlags(MIFlags); 1027 (void)Lsl; 1028 LLVM_DEBUG(dbgs() << "And: "; Lsl->dump();); 1029 PendingShift = 0; 1030 } 1031 1032 // Emit this byte if it's nonzero. 1033 if (!ZeroImm) { 1034 MachineInstrBuilder MIB = 1035 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Op), DstReg) 1036 .add(t1CondCodeOp(true)); 1037 if (Op == ARM::tADDi8) 1038 MIB.addReg(DstReg); 1039 MIB.add(Operand); 1040 MIB.add(predOps(ARMCC::AL)); 1041 MIB.setMIFlags(MIFlags); 1042 LLVM_DEBUG(dbgs() << (Op == ARM::tMOVi8 ? "To: " : "And:") << " "; 1043 MIB.getInstr()->dump();); 1044 } 1045 1046 // Don't accumulate the shift value if we've not yet seen a nonzero byte. 1047 if (PendingShift || !ZeroImm) 1048 PendingShift += 8; 1049 } 1050 1051 // The dest is dead on the last instruction we emitted if it was dead on the 1052 // original instruction. 1053 (--MBBI)->getOperand(0).setIsDead(DstIsDead); 1054 1055 MI.eraseFromParent(); 1056 } 1057 1058 void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, 1059 MachineBasicBlock::iterator &MBBI) { 1060 MachineInstr &MI = *MBBI; 1061 unsigned Opcode = MI.getOpcode(); 1062 Register PredReg; 1063 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1064 Register DstReg = MI.getOperand(0).getReg(); 1065 bool DstIsDead = MI.getOperand(0).isDead(); 1066 bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; 1067 const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); 1068 bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO); 1069 MachineInstrBuilder LO16, HI16; 1070 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 1071 1072 if (!STI->hasV6T2Ops() && 1073 (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) { 1074 // FIXME Windows CE supports older ARM CPUs 1075 assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+"); 1076 1077 assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!"); 1078 unsigned ImmVal = (unsigned)MO.getImm(); 1079 unsigned SOImmValV1 = 0, SOImmValV2 = 0; 1080 1081 if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr. 1082 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); 1083 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) 1084 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 1085 .addReg(DstReg); 1086 SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); 1087 SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); 1088 } else { // Expand into a mvn + sub. 1089 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg); 1090 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri)) 1091 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 1092 .addReg(DstReg); 1093 SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal); 1094 SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal); 1095 SOImmValV1 = ~(-SOImmValV1); 1096 } 1097 1098 unsigned MIFlags = MI.getFlags(); 1099 LO16 = LO16.addImm(SOImmValV1); 1100 HI16 = HI16.addImm(SOImmValV2); 1101 LO16.cloneMemRefs(MI); 1102 HI16.cloneMemRefs(MI); 1103 LO16.setMIFlags(MIFlags); 1104 HI16.setMIFlags(MIFlags); 1105 LO16.addImm(Pred).addReg(PredReg).add(condCodeOp()); 1106 HI16.addImm(Pred).addReg(PredReg).add(condCodeOp()); 1107 if (isCC) 1108 LO16.add(makeImplicit(MI.getOperand(1))); 1109 LO16.copyImplicitOps(MI); 1110 HI16.copyImplicitOps(MI); 1111 MI.eraseFromParent(); 1112 return; 1113 } 1114 1115 unsigned LO16Opc = 0; 1116 unsigned HI16Opc = 0; 1117 unsigned MIFlags = MI.getFlags(); 1118 if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) { 1119 LO16Opc = ARM::t2MOVi16; 1120 HI16Opc = ARM::t2MOVTi16; 1121 } else { 1122 LO16Opc = ARM::MOVi16; 1123 HI16Opc = ARM::MOVTi16; 1124 } 1125 1126 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); 1127 LO16.setMIFlags(MIFlags); 1128 LO16.add(getMovOperand(MO, ARMII::MO_LO16)); 1129 LO16.cloneMemRefs(MI); 1130 LO16.addImm(Pred).addReg(PredReg); 1131 if (isCC) 1132 LO16.add(makeImplicit(MI.getOperand(1))); 1133 LO16.copyImplicitOps(MI); 1134 LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump();); 1135 1136 MachineOperand HIOperand = getMovOperand(MO, ARMII::MO_HI16); 1137 if (!(HIOperand.isImm() && HIOperand.getImm() == 0)) { 1138 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) 1139 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 1140 .addReg(DstReg); 1141 HI16.setMIFlags(MIFlags); 1142 HI16.add(HIOperand); 1143 HI16.cloneMemRefs(MI); 1144 HI16.addImm(Pred).addReg(PredReg); 1145 HI16.copyImplicitOps(MI); 1146 LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump();); 1147 } else { 1148 LO16->getOperand(0).setIsDead(DstIsDead); 1149 } 1150 1151 if (RequiresBundling) 1152 finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator()); 1153 1154 MI.eraseFromParent(); 1155 } 1156 1157 // The size of the area, accessed by that VLSTM/VLLDM 1158 // S0-S31 + FPSCR + 8 more bytes (VPR + pad, or just pad) 1159 static const int CMSE_FP_SAVE_SIZE = 136; 1160 1161 static void determineGPRegsToClear(const MachineInstr &MI, 1162 const std::initializer_list<unsigned> &Regs, 1163 SmallVectorImpl<unsigned> &ClearRegs) { 1164 SmallVector<unsigned, 4> OpRegs; 1165 for (const MachineOperand &Op : MI.operands()) { 1166 if (!Op.isReg() || !Op.isUse()) 1167 continue; 1168 OpRegs.push_back(Op.getReg()); 1169 } 1170 llvm::sort(OpRegs); 1171 1172 std::set_difference(Regs.begin(), Regs.end(), OpRegs.begin(), OpRegs.end(), 1173 std::back_inserter(ClearRegs)); 1174 } 1175 1176 void ARMExpandPseudo::CMSEClearGPRegs( 1177 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 1178 const DebugLoc &DL, const SmallVectorImpl<unsigned> &ClearRegs, 1179 unsigned ClobberReg) { 1180 1181 if (STI->hasV8_1MMainlineOps()) { 1182 // Clear the registers using the CLRM instruction. 1183 MachineInstrBuilder CLRM = 1184 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2CLRM)).add(predOps(ARMCC::AL)); 1185 for (unsigned R : ClearRegs) 1186 CLRM.addReg(R, RegState::Define); 1187 CLRM.addReg(ARM::APSR, RegState::Define); 1188 CLRM.addReg(ARM::CPSR, RegState::Define | RegState::Implicit); 1189 } else { 1190 // Clear the registers and flags by copying ClobberReg into them. 1191 // (Baseline can't do a high register clear in one instruction). 1192 for (unsigned Reg : ClearRegs) { 1193 if (Reg == ClobberReg) 1194 continue; 1195 BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVr), Reg) 1196 .addReg(ClobberReg) 1197 .add(predOps(ARMCC::AL)); 1198 } 1199 1200 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2MSR_M)) 1201 .addImm(STI->hasDSP() ? 0xc00 : 0x800) 1202 .addReg(ClobberReg) 1203 .add(predOps(ARMCC::AL)); 1204 } 1205 } 1206 1207 // Find which FP registers need to be cleared. The parameter `ClearRegs` is 1208 // initialised with all elements set to true, and this function resets all the 1209 // bits, which correspond to register uses. Returns true if any floating point 1210 // register is defined, false otherwise. 1211 static bool determineFPRegsToClear(const MachineInstr &MI, 1212 BitVector &ClearRegs) { 1213 bool DefFP = false; 1214 for (const MachineOperand &Op : MI.operands()) { 1215 if (!Op.isReg()) 1216 continue; 1217 1218 Register Reg = Op.getReg(); 1219 if (Op.isDef()) { 1220 if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) || 1221 (Reg >= ARM::D0 && Reg <= ARM::D15) || 1222 (Reg >= ARM::S0 && Reg <= ARM::S31)) 1223 DefFP = true; 1224 continue; 1225 } 1226 1227 if (Reg >= ARM::Q0 && Reg <= ARM::Q7) { 1228 int R = Reg - ARM::Q0; 1229 ClearRegs.reset(R * 4, (R + 1) * 4); 1230 } else if (Reg >= ARM::D0 && Reg <= ARM::D15) { 1231 int R = Reg - ARM::D0; 1232 ClearRegs.reset(R * 2, (R + 1) * 2); 1233 } else if (Reg >= ARM::S0 && Reg <= ARM::S31) { 1234 ClearRegs[Reg - ARM::S0] = false; 1235 } 1236 } 1237 return DefFP; 1238 } 1239 1240 MachineBasicBlock & 1241 ARMExpandPseudo::CMSEClearFPRegs(MachineBasicBlock &MBB, 1242 MachineBasicBlock::iterator MBBI) { 1243 BitVector ClearRegs(16, true); 1244 (void)determineFPRegsToClear(*MBBI, ClearRegs); 1245 1246 if (STI->hasV8_1MMainlineOps()) 1247 return CMSEClearFPRegsV81(MBB, MBBI, ClearRegs); 1248 else 1249 return CMSEClearFPRegsV8(MBB, MBBI, ClearRegs); 1250 } 1251 1252 // Clear the FP registers for v8.0-M, by copying over the content 1253 // of LR. Uses R12 as a scratch register. 1254 MachineBasicBlock & 1255 ARMExpandPseudo::CMSEClearFPRegsV8(MachineBasicBlock &MBB, 1256 MachineBasicBlock::iterator MBBI, 1257 const BitVector &ClearRegs) { 1258 if (!STI->hasFPRegs()) 1259 return MBB; 1260 1261 auto &RetI = *MBBI; 1262 const DebugLoc &DL = RetI.getDebugLoc(); 1263 1264 // If optimising for minimum size, clear FP registers unconditionally. 1265 // Otherwise, check the CONTROL.SFPA (Secure Floating-Point Active) bit and 1266 // don't clear them if they belong to the non-secure state. 1267 MachineBasicBlock *ClearBB, *DoneBB; 1268 if (STI->hasMinSize()) { 1269 ClearBB = DoneBB = &MBB; 1270 } else { 1271 MachineFunction *MF = MBB.getParent(); 1272 ClearBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1273 DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1274 1275 MF->insert(++MBB.getIterator(), ClearBB); 1276 MF->insert(++ClearBB->getIterator(), DoneBB); 1277 1278 DoneBB->splice(DoneBB->end(), &MBB, MBBI, MBB.end()); 1279 DoneBB->transferSuccessors(&MBB); 1280 MBB.addSuccessor(ClearBB); 1281 MBB.addSuccessor(DoneBB); 1282 ClearBB->addSuccessor(DoneBB); 1283 1284 // At the new basic blocks we need to have live-in the registers, used 1285 // for the return value as well as LR, used to clear registers. 1286 for (const MachineOperand &Op : RetI.operands()) { 1287 if (!Op.isReg()) 1288 continue; 1289 Register Reg = Op.getReg(); 1290 if (Reg == ARM::NoRegister || Reg == ARM::LR) 1291 continue; 1292 assert(Reg.isPhysical() && "Unallocated register"); 1293 ClearBB->addLiveIn(Reg); 1294 DoneBB->addLiveIn(Reg); 1295 } 1296 ClearBB->addLiveIn(ARM::LR); 1297 DoneBB->addLiveIn(ARM::LR); 1298 1299 // Read the CONTROL register. 1300 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2MRS_M), ARM::R12) 1301 .addImm(20) 1302 .add(predOps(ARMCC::AL)); 1303 // Check bit 3 (SFPA). 1304 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2TSTri)) 1305 .addReg(ARM::R12) 1306 .addImm(8) 1307 .add(predOps(ARMCC::AL)); 1308 // If SFPA is clear, jump over ClearBB to DoneBB. 1309 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::tBcc)) 1310 .addMBB(DoneBB) 1311 .addImm(ARMCC::EQ) 1312 .addReg(ARM::CPSR, RegState::Kill); 1313 } 1314 1315 // Emit the clearing sequence 1316 for (unsigned D = 0; D < 8; D++) { 1317 // Attempt to clear as double 1318 if (ClearRegs[D * 2 + 0] && ClearRegs[D * 2 + 1]) { 1319 unsigned Reg = ARM::D0 + D; 1320 BuildMI(ClearBB, DL, TII->get(ARM::VMOVDRR), Reg) 1321 .addReg(ARM::LR) 1322 .addReg(ARM::LR) 1323 .add(predOps(ARMCC::AL)); 1324 } else { 1325 // Clear first part as single 1326 if (ClearRegs[D * 2 + 0]) { 1327 unsigned Reg = ARM::S0 + D * 2; 1328 BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg) 1329 .addReg(ARM::LR) 1330 .add(predOps(ARMCC::AL)); 1331 } 1332 // Clear second part as single 1333 if (ClearRegs[D * 2 + 1]) { 1334 unsigned Reg = ARM::S0 + D * 2 + 1; 1335 BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg) 1336 .addReg(ARM::LR) 1337 .add(predOps(ARMCC::AL)); 1338 } 1339 } 1340 } 1341 1342 // Clear FPSCR bits 0-4, 7, 28-31 1343 // The other bits are program global according to the AAPCS 1344 BuildMI(ClearBB, DL, TII->get(ARM::VMRS), ARM::R12) 1345 .add(predOps(ARMCC::AL)); 1346 BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12) 1347 .addReg(ARM::R12) 1348 .addImm(0x0000009F) 1349 .add(predOps(ARMCC::AL)) 1350 .add(condCodeOp()); 1351 BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12) 1352 .addReg(ARM::R12) 1353 .addImm(0xF0000000) 1354 .add(predOps(ARMCC::AL)) 1355 .add(condCodeOp()); 1356 BuildMI(ClearBB, DL, TII->get(ARM::VMSR)) 1357 .addReg(ARM::R12) 1358 .add(predOps(ARMCC::AL)); 1359 1360 return *DoneBB; 1361 } 1362 1363 MachineBasicBlock & 1364 ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB, 1365 MachineBasicBlock::iterator MBBI, 1366 const BitVector &ClearRegs) { 1367 auto &RetI = *MBBI; 1368 1369 // Emit a sequence of VSCCLRM <sreglist> instructions, one instruction for 1370 // each contiguous sequence of S-registers. 1371 int Start = -1, End = -1; 1372 for (int S = 0, E = ClearRegs.size(); S != E; ++S) { 1373 if (ClearRegs[S] && S == End + 1) { 1374 End = S; // extend range 1375 continue; 1376 } 1377 // Emit current range. 1378 if (Start < End) { 1379 MachineInstrBuilder VSCCLRM = 1380 BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS)) 1381 .add(predOps(ARMCC::AL)); 1382 while (++Start <= End) 1383 VSCCLRM.addReg(ARM::S0 + Start, RegState::Define); 1384 VSCCLRM.addReg(ARM::VPR, RegState::Define); 1385 } 1386 Start = End = S; 1387 } 1388 // Emit last range. 1389 if (Start < End) { 1390 MachineInstrBuilder VSCCLRM = 1391 BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS)) 1392 .add(predOps(ARMCC::AL)); 1393 while (++Start <= End) 1394 VSCCLRM.addReg(ARM::S0 + Start, RegState::Define); 1395 VSCCLRM.addReg(ARM::VPR, RegState::Define); 1396 } 1397 1398 return MBB; 1399 } 1400 1401 void ARMExpandPseudo::CMSESaveClearFPRegs( 1402 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1403 const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) { 1404 if (STI->hasV8_1MMainlineOps()) 1405 CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs); 1406 else if (STI->hasV8MMainlineOps()) 1407 CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs); 1408 } 1409 1410 // Save and clear FP registers if present 1411 void ARMExpandPseudo::CMSESaveClearFPRegsV8( 1412 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1413 const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) { 1414 1415 // Store an available register for FPSCR clearing 1416 assert(!ScratchRegs.empty()); 1417 unsigned SpareReg = ScratchRegs.front(); 1418 1419 // save space on stack for VLSTM 1420 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP) 1421 .addReg(ARM::SP) 1422 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1423 .add(predOps(ARMCC::AL)); 1424 1425 // Use ScratchRegs to store the fp regs 1426 std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs; 1427 std::vector<unsigned> NonclearedFPRegs; 1428 bool ReturnsFPReg = false; 1429 for (const MachineOperand &Op : MBBI->operands()) { 1430 if (Op.isReg() && Op.isUse()) { 1431 Register Reg = Op.getReg(); 1432 assert(!ARM::DPRRegClass.contains(Reg) || 1433 ARM::DPR_VFP2RegClass.contains(Reg)); 1434 assert(!ARM::QPRRegClass.contains(Reg)); 1435 if (ARM::DPR_VFP2RegClass.contains(Reg)) { 1436 if (ScratchRegs.size() >= 2) { 1437 unsigned SaveReg2 = ScratchRegs.pop_back_val(); 1438 unsigned SaveReg1 = ScratchRegs.pop_back_val(); 1439 ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2); 1440 1441 // Save the fp register to the normal registers 1442 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD)) 1443 .addReg(SaveReg1, RegState::Define) 1444 .addReg(SaveReg2, RegState::Define) 1445 .addReg(Reg) 1446 .add(predOps(ARMCC::AL)); 1447 } else { 1448 NonclearedFPRegs.push_back(Reg); 1449 } 1450 } else if (ARM::SPRRegClass.contains(Reg)) { 1451 if (ScratchRegs.size() >= 1) { 1452 unsigned SaveReg = ScratchRegs.pop_back_val(); 1453 ClearedFPRegs.emplace_back(Reg, SaveReg, 0); 1454 1455 // Save the fp register to the normal registers 1456 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg) 1457 .addReg(Reg) 1458 .add(predOps(ARMCC::AL)); 1459 } else { 1460 NonclearedFPRegs.push_back(Reg); 1461 } 1462 } 1463 } else if (Op.isReg() && Op.isDef()) { 1464 Register Reg = Op.getReg(); 1465 if (ARM::SPRRegClass.contains(Reg) || ARM::DPRRegClass.contains(Reg) || 1466 ARM::QPRRegClass.contains(Reg)) 1467 ReturnsFPReg = true; 1468 } 1469 } 1470 1471 bool PassesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty()); 1472 1473 if (PassesFPReg || ReturnsFPReg) 1474 assert(STI->hasFPRegs() && "Subtarget needs fpregs"); 1475 1476 // CVE-2024-7883 1477 // 1478 // The VLLDM/VLSTM instructions set up lazy state preservation, but they 1479 // execute as NOPs if the FP register file is not considered to contain 1480 // secure data, represented by the CONTROL_S.SFPA bit. This means that the 1481 // state of CONTROL_S.SFPA must be the same when these two instructions are 1482 // executed. That might not be the case if we haven't used any FP 1483 // instructions before the VLSTM, so CONTROL_S.SFPA is clear, but do have one 1484 // before the VLLDM, which sets it.. 1485 // 1486 // If we can't prove that SFPA will be the same for the VLSTM and VLLDM, we 1487 // execute a "vmov s0, s0" instruction before the VLSTM to ensure that 1488 // CONTROL_S.SFPA is set for both. 1489 // 1490 // That can only happen for callees which take no FP arguments (or we'd have 1491 // inserted a VMOV above) and which return values in FP regs (so that we need 1492 // to use a VMOV to back-up the return value before the VLLDM). It also can't 1493 // happen if the call is dominated by other existing floating-point 1494 // instructions, but we don't currently check for that case. 1495 // 1496 // These conditions mean that we only emit this instruction when using the 1497 // hard-float ABI, which means we can assume that FP instructions are 1498 // available, and don't need to make it conditional like we do for the 1499 // CVE-2021-35465 workaround. 1500 if (ReturnsFPReg && !PassesFPReg) { 1501 bool S0Dead = !LiveRegs.contains(ARM::S0); 1502 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVS)) 1503 .addReg(ARM::S0, RegState::Define | getDeadRegState(S0Dead)) 1504 .addReg(ARM::S0, getUndefRegState(S0Dead)) 1505 .add(predOps(ARMCC::AL)); 1506 } 1507 1508 // Lazy store all fp registers to the stack. 1509 // This executes as NOP in the absence of floating-point support. 1510 MachineInstrBuilder VLSTM = 1511 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) 1512 .addReg(ARM::SP) 1513 .add(predOps(ARMCC::AL)) 1514 .addImm(0); // Represents a pseoudo register list, has no effect on 1515 // the encoding. 1516 // Mark non-live registers as undef 1517 for (MachineOperand &MO : VLSTM->implicit_operands()) { 1518 if (MO.isReg() && !MO.isDef()) { 1519 Register Reg = MO.getReg(); 1520 MO.setIsUndef(!LiveRegs.contains(Reg)); 1521 } 1522 } 1523 1524 // Restore all arguments 1525 for (const auto &Regs : ClearedFPRegs) { 1526 unsigned Reg, SaveReg1, SaveReg2; 1527 std::tie(Reg, SaveReg1, SaveReg2) = Regs; 1528 if (ARM::DPR_VFP2RegClass.contains(Reg)) 1529 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg) 1530 .addReg(SaveReg1) 1531 .addReg(SaveReg2) 1532 .add(predOps(ARMCC::AL)); 1533 else if (ARM::SPRRegClass.contains(Reg)) 1534 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg) 1535 .addReg(SaveReg1) 1536 .add(predOps(ARMCC::AL)); 1537 } 1538 1539 for (unsigned Reg : NonclearedFPRegs) { 1540 if (ARM::DPR_VFP2RegClass.contains(Reg)) { 1541 if (STI->isLittle()) { 1542 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRD), Reg) 1543 .addReg(ARM::SP) 1544 .addImm((Reg - ARM::D0) * 2) 1545 .add(predOps(ARMCC::AL)); 1546 } else { 1547 // For big-endian targets we need to load the two subregisters of Reg 1548 // manually because VLDRD would load them in wrong order 1549 MCRegister SReg0 = TRI->getSubReg(Reg, ARM::ssub_0); 1550 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0) 1551 .addReg(ARM::SP) 1552 .addImm((Reg - ARM::D0) * 2) 1553 .add(predOps(ARMCC::AL)); 1554 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0 + 1) 1555 .addReg(ARM::SP) 1556 .addImm((Reg - ARM::D0) * 2 + 1) 1557 .add(predOps(ARMCC::AL)); 1558 } 1559 } else if (ARM::SPRRegClass.contains(Reg)) { 1560 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), Reg) 1561 .addReg(ARM::SP) 1562 .addImm(Reg - ARM::S0) 1563 .add(predOps(ARMCC::AL)); 1564 } 1565 } 1566 // restore FPSCR from stack and clear bits 0-4, 7, 28-31 1567 // The other bits are program global according to the AAPCS 1568 if (PassesFPReg) { 1569 BuildMI(MBB, MBBI, DL, TII->get(ARM::tLDRspi), SpareReg) 1570 .addReg(ARM::SP) 1571 .addImm(0x10) 1572 .add(predOps(ARMCC::AL)); 1573 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg) 1574 .addReg(SpareReg) 1575 .addImm(0x0000009F) 1576 .add(predOps(ARMCC::AL)) 1577 .add(condCodeOp()); 1578 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg) 1579 .addReg(SpareReg) 1580 .addImm(0xF0000000) 1581 .add(predOps(ARMCC::AL)) 1582 .add(condCodeOp()); 1583 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMSR)) 1584 .addReg(SpareReg) 1585 .add(predOps(ARMCC::AL)); 1586 // The ldr must happen after a floating point instruction. To prevent the 1587 // post-ra scheduler to mess with the order, we create a bundle. 1588 finalizeBundle(MBB, VLSTM->getIterator(), MBBI->getIterator()); 1589 } 1590 } 1591 1592 void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB, 1593 MachineBasicBlock::iterator MBBI, 1594 DebugLoc &DL, 1595 const LivePhysRegs &LiveRegs) { 1596 BitVector ClearRegs(32, true); 1597 bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs); 1598 1599 // If the instruction does not write to a FP register and no elements were 1600 // removed from the set, then no FP registers were used to pass 1601 // arguments/returns. 1602 if (!DefFP && ClearRegs.count() == ClearRegs.size()) { 1603 // save space on stack for VLSTM 1604 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP) 1605 .addReg(ARM::SP) 1606 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1607 .add(predOps(ARMCC::AL)); 1608 1609 // Lazy store all FP registers to the stack 1610 MachineInstrBuilder VLSTM = 1611 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) 1612 .addReg(ARM::SP) 1613 .add(predOps(ARMCC::AL)) 1614 .addImm(0); // Represents a pseoudo register list, has no effect on 1615 // the encoding. 1616 // Mark non-live registers as undef 1617 for (MachineOperand &MO : VLSTM->implicit_operands()) { 1618 if (MO.isReg() && !MO.isDef()) { 1619 Register Reg = MO.getReg(); 1620 MO.setIsUndef(!LiveRegs.contains(Reg)); 1621 } 1622 } 1623 } else { 1624 // Push all the callee-saved registers (s16-s31). 1625 MachineInstrBuilder VPUSH = 1626 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP) 1627 .addReg(ARM::SP) 1628 .add(predOps(ARMCC::AL)); 1629 for (unsigned Reg = ARM::S16; Reg <= ARM::S31; ++Reg) 1630 VPUSH.addReg(Reg); 1631 1632 // Clear FP registers with a VSCCLRM. 1633 (void)CMSEClearFPRegsV81(MBB, MBBI, ClearRegs); 1634 1635 // Save floating-point context. 1636 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTR_FPCXTS_pre), ARM::SP) 1637 .addReg(ARM::SP) 1638 .addImm(-8) 1639 .add(predOps(ARMCC::AL)); 1640 } 1641 } 1642 1643 // Restore FP registers if present 1644 void ARMExpandPseudo::CMSERestoreFPRegs( 1645 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1646 SmallVectorImpl<unsigned> &AvailableRegs) { 1647 if (STI->hasV8_1MMainlineOps()) 1648 CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs); 1649 else if (STI->hasV8MMainlineOps()) 1650 CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs); 1651 } 1652 1653 void ARMExpandPseudo::CMSERestoreFPRegsV8( 1654 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1655 SmallVectorImpl<unsigned> &AvailableRegs) { 1656 1657 // Keep a scratch register for the mitigation sequence. 1658 unsigned ScratchReg = ARM::NoRegister; 1659 if (STI->fixCMSE_CVE_2021_35465()) 1660 ScratchReg = AvailableRegs.pop_back_val(); 1661 1662 // Use AvailableRegs to store the fp regs 1663 std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs; 1664 std::vector<unsigned> NonclearedFPRegs; 1665 for (const MachineOperand &Op : MBBI->operands()) { 1666 if (Op.isReg() && Op.isDef()) { 1667 Register Reg = Op.getReg(); 1668 assert(!ARM::DPRRegClass.contains(Reg) || 1669 ARM::DPR_VFP2RegClass.contains(Reg)); 1670 assert(!ARM::QPRRegClass.contains(Reg)); 1671 if (ARM::DPR_VFP2RegClass.contains(Reg)) { 1672 if (AvailableRegs.size() >= 2) { 1673 unsigned SaveReg2 = AvailableRegs.pop_back_val(); 1674 unsigned SaveReg1 = AvailableRegs.pop_back_val(); 1675 ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2); 1676 1677 // Save the fp register to the normal registers 1678 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD)) 1679 .addReg(SaveReg1, RegState::Define) 1680 .addReg(SaveReg2, RegState::Define) 1681 .addReg(Reg) 1682 .add(predOps(ARMCC::AL)); 1683 } else { 1684 NonclearedFPRegs.push_back(Reg); 1685 } 1686 } else if (ARM::SPRRegClass.contains(Reg)) { 1687 if (AvailableRegs.size() >= 1) { 1688 unsigned SaveReg = AvailableRegs.pop_back_val(); 1689 ClearedFPRegs.emplace_back(Reg, SaveReg, 0); 1690 1691 // Save the fp register to the normal registers 1692 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg) 1693 .addReg(Reg) 1694 .add(predOps(ARMCC::AL)); 1695 } else { 1696 NonclearedFPRegs.push_back(Reg); 1697 } 1698 } 1699 } 1700 } 1701 1702 bool returnsFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty()); 1703 1704 if (returnsFPReg) 1705 assert(STI->hasFPRegs() && "Subtarget needs fpregs"); 1706 1707 // Push FP regs that cannot be restored via normal registers on the stack 1708 for (unsigned Reg : NonclearedFPRegs) { 1709 if (ARM::DPR_VFP2RegClass.contains(Reg)) 1710 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD)) 1711 .addReg(Reg) 1712 .addReg(ARM::SP) 1713 .addImm((Reg - ARM::D0) * 2) 1714 .add(predOps(ARMCC::AL)); 1715 else if (ARM::SPRRegClass.contains(Reg)) 1716 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS)) 1717 .addReg(Reg) 1718 .addReg(ARM::SP) 1719 .addImm(Reg - ARM::S0) 1720 .add(predOps(ARMCC::AL)); 1721 } 1722 1723 // Lazy load fp regs from stack. 1724 // This executes as NOP in the absence of floating-point support. 1725 MachineInstrBuilder VLLDM = 1726 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM)) 1727 .addReg(ARM::SP) 1728 .add(predOps(ARMCC::AL)) 1729 .addImm(0); // Represents a pseoudo register list, has no effect on 1730 // the encoding. 1731 1732 if (STI->fixCMSE_CVE_2021_35465()) { 1733 auto Bundler = MIBundleBuilder(MBB, VLLDM); 1734 // Read the CONTROL register. 1735 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2MRS_M)) 1736 .addReg(ScratchReg, RegState::Define) 1737 .addImm(20) 1738 .add(predOps(ARMCC::AL))); 1739 // Check bit 3 (SFPA). 1740 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2TSTri)) 1741 .addReg(ScratchReg) 1742 .addImm(8) 1743 .add(predOps(ARMCC::AL))); 1744 // Emit the IT block. 1745 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2IT)) 1746 .addImm(ARMCC::NE) 1747 .addImm(8)); 1748 // If SFPA is clear jump over to VLLDM, otherwise execute an instruction 1749 // which has no functional effect apart from causing context creation: 1750 // vmovne s0, s0. In the absence of FPU we emit .inst.w 0xeeb00a40, 1751 // which is defined as NOP if not executed. 1752 if (STI->hasFPRegs()) 1753 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::VMOVS)) 1754 .addReg(ARM::S0, RegState::Define) 1755 .addReg(ARM::S0, RegState::Undef) 1756 .add(predOps(ARMCC::NE))); 1757 else 1758 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::INLINEASM)) 1759 .addExternalSymbol(".inst.w 0xeeb00a40") 1760 .addImm(InlineAsm::Extra_HasSideEffects)); 1761 finalizeBundle(MBB, Bundler.begin(), Bundler.end()); 1762 } 1763 1764 // Restore all FP registers via normal registers 1765 for (const auto &Regs : ClearedFPRegs) { 1766 unsigned Reg, SaveReg1, SaveReg2; 1767 std::tie(Reg, SaveReg1, SaveReg2) = Regs; 1768 if (ARM::DPR_VFP2RegClass.contains(Reg)) 1769 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg) 1770 .addReg(SaveReg1) 1771 .addReg(SaveReg2) 1772 .add(predOps(ARMCC::AL)); 1773 else if (ARM::SPRRegClass.contains(Reg)) 1774 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg) 1775 .addReg(SaveReg1) 1776 .add(predOps(ARMCC::AL)); 1777 } 1778 1779 // Pop the stack space 1780 BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP) 1781 .addReg(ARM::SP) 1782 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1783 .add(predOps(ARMCC::AL)); 1784 } 1785 1786 static bool definesOrUsesFPReg(const MachineInstr &MI) { 1787 for (const MachineOperand &Op : MI.operands()) { 1788 if (!Op.isReg()) 1789 continue; 1790 Register Reg = Op.getReg(); 1791 if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) || 1792 (Reg >= ARM::D0 && Reg <= ARM::D15) || 1793 (Reg >= ARM::S0 && Reg <= ARM::S31)) 1794 return true; 1795 } 1796 return false; 1797 } 1798 1799 void ARMExpandPseudo::CMSERestoreFPRegsV81( 1800 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1801 SmallVectorImpl<unsigned> &AvailableRegs) { 1802 if (!definesOrUsesFPReg(*MBBI)) { 1803 if (STI->fixCMSE_CVE_2021_35465()) { 1804 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSCCLRMS)) 1805 .add(predOps(ARMCC::AL)) 1806 .addReg(ARM::VPR, RegState::Define); 1807 } 1808 1809 // Load FP registers from stack. 1810 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM)) 1811 .addReg(ARM::SP) 1812 .add(predOps(ARMCC::AL)) 1813 .addImm(0); // Represents a pseoudo register list, has no effect on the 1814 // encoding. 1815 1816 // Pop the stack space 1817 BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP) 1818 .addReg(ARM::SP) 1819 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1820 .add(predOps(ARMCC::AL)); 1821 } else { 1822 // Restore the floating point context. 1823 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::VLDR_FPCXTS_post), 1824 ARM::SP) 1825 .addReg(ARM::SP) 1826 .addImm(8) 1827 .add(predOps(ARMCC::AL)); 1828 1829 // Pop all the callee-saved registers (s16-s31). 1830 MachineInstrBuilder VPOP = 1831 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP) 1832 .addReg(ARM::SP) 1833 .add(predOps(ARMCC::AL)); 1834 for (unsigned Reg = ARM::S16; Reg <= ARM::S31; ++Reg) 1835 VPOP.addReg(Reg, RegState::Define); 1836 } 1837 } 1838 1839 /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as 1840 /// possible. This only gets used at -O0 so we don't care about efficiency of 1841 /// the generated code. 1842 bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, 1843 MachineBasicBlock::iterator MBBI, 1844 unsigned LdrexOp, unsigned StrexOp, 1845 unsigned UxtOp, 1846 MachineBasicBlock::iterator &NextMBBI) { 1847 bool IsThumb = STI->isThumb(); 1848 bool IsThumb1Only = STI->isThumb1Only(); 1849 MachineInstr &MI = *MBBI; 1850 DebugLoc DL = MI.getDebugLoc(); 1851 const MachineOperand &Dest = MI.getOperand(0); 1852 Register TempReg = MI.getOperand(1).getReg(); 1853 // Duplicating undef operands into 2 instructions does not guarantee the same 1854 // value on both; However undef should be replaced by xzr anyway. 1855 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 1856 Register AddrReg = MI.getOperand(2).getReg(); 1857 Register DesiredReg = MI.getOperand(3).getReg(); 1858 Register NewReg = MI.getOperand(4).getReg(); 1859 1860 if (IsThumb) { 1861 assert(STI->hasV8MBaselineOps() && 1862 "CMP_SWAP not expected to be custom expanded for Thumb1"); 1863 assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) && 1864 "ARMv8-M.baseline does not have t2UXTB/t2UXTH"); 1865 assert((UxtOp == 0 || ARM::tGPRRegClass.contains(DesiredReg)) && 1866 "DesiredReg used for UXT op must be tGPR"); 1867 } 1868 1869 MachineFunction *MF = MBB.getParent(); 1870 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1871 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1872 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1873 1874 MF->insert(++MBB.getIterator(), LoadCmpBB); 1875 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 1876 MF->insert(++StoreBB->getIterator(), DoneBB); 1877 1878 if (UxtOp) { 1879 MachineInstrBuilder MIB = 1880 BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg) 1881 .addReg(DesiredReg, RegState::Kill); 1882 if (!IsThumb) 1883 MIB.addImm(0); 1884 MIB.add(predOps(ARMCC::AL)); 1885 } 1886 1887 // .Lloadcmp: 1888 // ldrex rDest, [rAddr] 1889 // cmp rDest, rDesired 1890 // bne .Ldone 1891 1892 MachineInstrBuilder MIB; 1893 MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg()); 1894 MIB.addReg(AddrReg); 1895 if (LdrexOp == ARM::t2LDREX) 1896 MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset. 1897 MIB.add(predOps(ARMCC::AL)); 1898 1899 unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; 1900 BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) 1901 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 1902 .addReg(DesiredReg) 1903 .add(predOps(ARMCC::AL)); 1904 unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; 1905 BuildMI(LoadCmpBB, DL, TII->get(Bcc)) 1906 .addMBB(DoneBB) 1907 .addImm(ARMCC::NE) 1908 .addReg(ARM::CPSR, RegState::Kill); 1909 LoadCmpBB->addSuccessor(DoneBB); 1910 LoadCmpBB->addSuccessor(StoreBB); 1911 1912 // .Lstore: 1913 // strex rTempReg, rNew, [rAddr] 1914 // cmp rTempReg, #0 1915 // bne .Lloadcmp 1916 MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg) 1917 .addReg(NewReg) 1918 .addReg(AddrReg); 1919 if (StrexOp == ARM::t2STREX) 1920 MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset. 1921 MIB.add(predOps(ARMCC::AL)); 1922 1923 unsigned CMPri = 1924 IsThumb ? (IsThumb1Only ? ARM::tCMPi8 : ARM::t2CMPri) : ARM::CMPri; 1925 BuildMI(StoreBB, DL, TII->get(CMPri)) 1926 .addReg(TempReg, RegState::Kill) 1927 .addImm(0) 1928 .add(predOps(ARMCC::AL)); 1929 BuildMI(StoreBB, DL, TII->get(Bcc)) 1930 .addMBB(LoadCmpBB) 1931 .addImm(ARMCC::NE) 1932 .addReg(ARM::CPSR, RegState::Kill); 1933 StoreBB->addSuccessor(LoadCmpBB); 1934 StoreBB->addSuccessor(DoneBB); 1935 1936 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 1937 DoneBB->transferSuccessors(&MBB); 1938 1939 MBB.addSuccessor(LoadCmpBB); 1940 1941 NextMBBI = MBB.end(); 1942 MI.eraseFromParent(); 1943 1944 // Recompute livein lists. 1945 LivePhysRegs LiveRegs; 1946 computeAndAddLiveIns(LiveRegs, *DoneBB); 1947 computeAndAddLiveIns(LiveRegs, *StoreBB); 1948 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 1949 // Do an extra pass around the loop to get loop carried registers right. 1950 StoreBB->clearLiveIns(); 1951 computeAndAddLiveIns(LiveRegs, *StoreBB); 1952 LoadCmpBB->clearLiveIns(); 1953 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 1954 1955 return true; 1956 } 1957 1958 /// ARM's ldrexd/strexd take a consecutive register pair (represented as a 1959 /// single GPRPair register), Thumb's take two separate registers so we need to 1960 /// extract the subregs from the pair. 1961 static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg, 1962 unsigned Flags, bool IsThumb, 1963 const TargetRegisterInfo *TRI) { 1964 if (IsThumb) { 1965 Register RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0); 1966 Register RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1); 1967 MIB.addReg(RegLo, Flags); 1968 MIB.addReg(RegHi, Flags); 1969 } else 1970 MIB.addReg(Reg.getReg(), Flags); 1971 } 1972 1973 /// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop. 1974 bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, 1975 MachineBasicBlock::iterator MBBI, 1976 MachineBasicBlock::iterator &NextMBBI) { 1977 bool IsThumb = STI->isThumb(); 1978 assert(!STI->isThumb1Only() && "CMP_SWAP_64 unsupported under Thumb1!"); 1979 MachineInstr &MI = *MBBI; 1980 DebugLoc DL = MI.getDebugLoc(); 1981 MachineOperand &Dest = MI.getOperand(0); 1982 // Duplicating undef operands into 2 instructions does not guarantee the same 1983 // value on both; However undef should be replaced by xzr anyway. 1984 assert(!MI.getOperand(1).isUndef() && "cannot handle undef"); 1985 Register AddrAndTempReg = MI.getOperand(1).getReg(); 1986 Register AddrReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_0); 1987 Register TempReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_1); 1988 assert(MI.getOperand(1).getReg() == MI.getOperand(2).getReg() && 1989 "tied operands have different registers"); 1990 Register DesiredReg = MI.getOperand(3).getReg(); 1991 MachineOperand New = MI.getOperand(4); 1992 New.setIsKill(false); 1993 1994 Register DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); 1995 Register DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); 1996 Register DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0); 1997 Register DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1); 1998 1999 MachineFunction *MF = MBB.getParent(); 2000 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 2001 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 2002 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 2003 2004 MF->insert(++MBB.getIterator(), LoadCmpBB); 2005 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 2006 MF->insert(++StoreBB->getIterator(), DoneBB); 2007 2008 // .Lloadcmp: 2009 // ldrexd rDestLo, rDestHi, [rAddr] 2010 // cmp rDestLo, rDesiredLo 2011 // sbcs dead rTempReg, rDestHi, rDesiredHi 2012 // bne .Ldone 2013 unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; 2014 MachineInstrBuilder MIB; 2015 MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD)); 2016 addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI); 2017 MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); 2018 2019 unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; 2020 BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) 2021 .addReg(DestLo, getKillRegState(Dest.isDead())) 2022 .addReg(DesiredLo) 2023 .add(predOps(ARMCC::AL)); 2024 2025 BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) 2026 .addReg(DestHi, getKillRegState(Dest.isDead())) 2027 .addReg(DesiredHi) 2028 .addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill); 2029 2030 unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; 2031 BuildMI(LoadCmpBB, DL, TII->get(Bcc)) 2032 .addMBB(DoneBB) 2033 .addImm(ARMCC::NE) 2034 .addReg(ARM::CPSR, RegState::Kill); 2035 LoadCmpBB->addSuccessor(DoneBB); 2036 LoadCmpBB->addSuccessor(StoreBB); 2037 2038 // .Lstore: 2039 // strexd rTempReg, rNewLo, rNewHi, [rAddr] 2040 // cmp rTempReg, #0 2041 // bne .Lloadcmp 2042 unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; 2043 MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg); 2044 unsigned Flags = getKillRegState(New.isDead()); 2045 addExclusiveRegPair(MIB, New, Flags, IsThumb, TRI); 2046 MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); 2047 2048 unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; 2049 BuildMI(StoreBB, DL, TII->get(CMPri)) 2050 .addReg(TempReg, RegState::Kill) 2051 .addImm(0) 2052 .add(predOps(ARMCC::AL)); 2053 BuildMI(StoreBB, DL, TII->get(Bcc)) 2054 .addMBB(LoadCmpBB) 2055 .addImm(ARMCC::NE) 2056 .addReg(ARM::CPSR, RegState::Kill); 2057 StoreBB->addSuccessor(LoadCmpBB); 2058 StoreBB->addSuccessor(DoneBB); 2059 2060 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 2061 DoneBB->transferSuccessors(&MBB); 2062 2063 MBB.addSuccessor(LoadCmpBB); 2064 2065 NextMBBI = MBB.end(); 2066 MI.eraseFromParent(); 2067 2068 // Recompute livein lists. 2069 LivePhysRegs LiveRegs; 2070 computeAndAddLiveIns(LiveRegs, *DoneBB); 2071 computeAndAddLiveIns(LiveRegs, *StoreBB); 2072 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 2073 // Do an extra pass around the loop to get loop carried registers right. 2074 StoreBB->clearLiveIns(); 2075 computeAndAddLiveIns(LiveRegs, *StoreBB); 2076 LoadCmpBB->clearLiveIns(); 2077 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 2078 2079 return true; 2080 } 2081 2082 static void CMSEPushCalleeSaves(const TargetInstrInfo &TII, 2083 MachineBasicBlock &MBB, 2084 MachineBasicBlock::iterator MBBI, 2085 Register JumpReg, const LivePhysRegs &LiveRegs, 2086 bool Thumb1Only) { 2087 const DebugLoc &DL = MBBI->getDebugLoc(); 2088 if (Thumb1Only) { // push Lo and Hi regs separately 2089 MachineInstrBuilder PushMIB = 2090 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); 2091 for (unsigned Reg = ARM::R4; Reg < ARM::R8; ++Reg) { 2092 PushMIB.addReg( 2093 Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef); 2094 } 2095 2096 // Thumb1 can only tPUSH low regs, so we copy the high regs to the low 2097 // regs that we just saved and push the low regs again, taking care to 2098 // not clobber JumpReg. If JumpReg is one of the low registers, push first 2099 // the values of r9-r11, and then r8. That would leave them ordered in 2100 // memory, and allow us to later pop them with a single instructions. 2101 // FIXME: Could also use any of r0-r3 that are free (including in the 2102 // first PUSH above). 2103 for (unsigned LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; 2104 --LoReg) { 2105 if (JumpReg == LoReg) 2106 continue; 2107 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) 2108 .addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef) 2109 .add(predOps(ARMCC::AL)); 2110 --HiReg; 2111 } 2112 MachineInstrBuilder PushMIB2 = 2113 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); 2114 for (unsigned Reg = ARM::R4; Reg < ARM::R8; ++Reg) { 2115 if (Reg == JumpReg) 2116 continue; 2117 PushMIB2.addReg(Reg, RegState::Kill); 2118 } 2119 2120 // If we couldn't use a low register for temporary storage (because it was 2121 // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been 2122 // saved. 2123 if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) { 2124 Register LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4; 2125 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) 2126 .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef) 2127 .add(predOps(ARMCC::AL)); 2128 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)) 2129 .add(predOps(ARMCC::AL)) 2130 .addReg(LoReg, RegState::Kill); 2131 } 2132 } else { // push Lo and Hi registers with a single instruction 2133 MachineInstrBuilder PushMIB = 2134 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP) 2135 .addReg(ARM::SP) 2136 .add(predOps(ARMCC::AL)); 2137 for (unsigned Reg = ARM::R4; Reg < ARM::R12; ++Reg) { 2138 PushMIB.addReg( 2139 Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef); 2140 } 2141 } 2142 } 2143 2144 static void CMSEPopCalleeSaves(const TargetInstrInfo &TII, 2145 MachineBasicBlock &MBB, 2146 MachineBasicBlock::iterator MBBI, 2147 bool Thumb1Only) { 2148 const DebugLoc &DL = MBBI->getDebugLoc(); 2149 if (Thumb1Only) { 2150 MachineInstrBuilder PopMIB = 2151 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); 2152 for (int R = 0; R < 4; ++R) { 2153 PopMIB.addReg(ARM::R4 + R, RegState::Define); 2154 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), ARM::R8 + R) 2155 .addReg(ARM::R4 + R, RegState::Kill) 2156 .add(predOps(ARMCC::AL)); 2157 } 2158 MachineInstrBuilder PopMIB2 = 2159 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); 2160 for (int R = 0; R < 4; ++R) 2161 PopMIB2.addReg(ARM::R4 + R, RegState::Define); 2162 } else { // pop Lo and Hi registers with a single instruction 2163 MachineInstrBuilder PopMIB = 2164 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP) 2165 .addReg(ARM::SP) 2166 .add(predOps(ARMCC::AL)); 2167 for (unsigned Reg = ARM::R4; Reg < ARM::R12; ++Reg) 2168 PopMIB.addReg(Reg, RegState::Define); 2169 } 2170 } 2171 2172 bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, 2173 MachineBasicBlock::iterator MBBI, 2174 MachineBasicBlock::iterator &NextMBBI) { 2175 MachineInstr &MI = *MBBI; 2176 unsigned Opcode = MI.getOpcode(); 2177 switch (Opcode) { 2178 default: 2179 return false; 2180 2181 case ARM::VBSPd: 2182 case ARM::VBSPq: { 2183 Register DstReg = MI.getOperand(0).getReg(); 2184 if (DstReg == MI.getOperand(3).getReg()) { 2185 // Expand to VBIT 2186 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq; 2187 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 2188 .add(MI.getOperand(0)) 2189 .add(MI.getOperand(3)) 2190 .add(MI.getOperand(2)) 2191 .add(MI.getOperand(1)) 2192 .addImm(MI.getOperand(4).getImm()) 2193 .add(MI.getOperand(5)); 2194 } else if (DstReg == MI.getOperand(2).getReg()) { 2195 // Expand to VBIF 2196 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq; 2197 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 2198 .add(MI.getOperand(0)) 2199 .add(MI.getOperand(2)) 2200 .add(MI.getOperand(3)) 2201 .add(MI.getOperand(1)) 2202 .addImm(MI.getOperand(4).getImm()) 2203 .add(MI.getOperand(5)); 2204 } else { 2205 // Expand to VBSL 2206 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq; 2207 if (DstReg == MI.getOperand(1).getReg()) { 2208 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 2209 .add(MI.getOperand(0)) 2210 .add(MI.getOperand(1)) 2211 .add(MI.getOperand(2)) 2212 .add(MI.getOperand(3)) 2213 .addImm(MI.getOperand(4).getImm()) 2214 .add(MI.getOperand(5)); 2215 } else { 2216 // Use move to satisfy constraints 2217 unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq; 2218 unsigned MO1Flags = getRegState(MI.getOperand(1)) & ~RegState::Kill; 2219 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc)) 2220 .addReg(DstReg, 2221 RegState::Define | 2222 getRenamableRegState(MI.getOperand(0).isRenamable())) 2223 .addReg(MI.getOperand(1).getReg(), MO1Flags) 2224 .addReg(MI.getOperand(1).getReg(), MO1Flags) 2225 .addImm(MI.getOperand(4).getImm()) 2226 .add(MI.getOperand(5)); 2227 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 2228 .add(MI.getOperand(0)) 2229 .addReg(DstReg, 2230 RegState::Kill | 2231 getRenamableRegState(MI.getOperand(0).isRenamable())) 2232 .add(MI.getOperand(2)) 2233 .add(MI.getOperand(3)) 2234 .addImm(MI.getOperand(4).getImm()) 2235 .add(MI.getOperand(5)); 2236 } 2237 } 2238 MI.eraseFromParent(); 2239 return true; 2240 } 2241 2242 case ARM::TCRETURNdi: 2243 case ARM::TCRETURNri: 2244 case ARM::TCRETURNrinotr12: { 2245 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 2246 if (MBBI->getOpcode() == ARM::SEH_EpilogEnd) 2247 MBBI--; 2248 if (MBBI->getOpcode() == ARM::SEH_Nop_Ret) 2249 MBBI--; 2250 assert(MBBI->isReturn() && 2251 "Can only insert epilog into returning blocks"); 2252 unsigned RetOpcode = MBBI->getOpcode(); 2253 DebugLoc dl = MBBI->getDebugLoc(); 2254 const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>( 2255 MBB.getParent()->getSubtarget().getInstrInfo()); 2256 2257 // Tail call return: adjust the stack pointer and jump to callee. 2258 MBBI = MBB.getLastNonDebugInstr(); 2259 if (MBBI->getOpcode() == ARM::SEH_EpilogEnd) 2260 MBBI--; 2261 if (MBBI->getOpcode() == ARM::SEH_Nop_Ret) 2262 MBBI--; 2263 MachineOperand &JumpTarget = MBBI->getOperand(0); 2264 2265 // Jump to label or value in register. 2266 if (RetOpcode == ARM::TCRETURNdi) { 2267 MachineFunction *MF = MBB.getParent(); 2268 bool NeedsWinCFI = MF->getTarget().getMCAsmInfo()->usesWindowsCFI() && 2269 MF->getFunction().needsUnwindTableEntry(); 2270 unsigned TCOpcode = 2271 STI->isThumb() 2272 ? ((STI->isTargetMachO() || NeedsWinCFI) ? ARM::tTAILJMPd 2273 : ARM::tTAILJMPdND) 2274 : ARM::TAILJMPd; 2275 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); 2276 if (JumpTarget.isGlobal()) 2277 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 2278 JumpTarget.getTargetFlags()); 2279 else { 2280 assert(JumpTarget.isSymbol()); 2281 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 2282 JumpTarget.getTargetFlags()); 2283 } 2284 2285 // Add the default predicate in Thumb mode. 2286 if (STI->isThumb()) 2287 MIB.add(predOps(ARMCC::AL)); 2288 } else if (RetOpcode == ARM::TCRETURNri || 2289 RetOpcode == ARM::TCRETURNrinotr12) { 2290 unsigned Opcode = 2291 STI->isThumb() ? ARM::tTAILJMPr 2292 : (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4); 2293 BuildMI(MBB, MBBI, dl, 2294 TII.get(Opcode)) 2295 .addReg(JumpTarget.getReg(), RegState::Kill); 2296 } 2297 2298 auto NewMI = std::prev(MBBI); 2299 for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) 2300 NewMI->addOperand(MBBI->getOperand(i)); 2301 2302 // Update call info and delete the pseudo instruction TCRETURN. 2303 if (MI.isCandidateForAdditionalCallInfo()) 2304 MI.getMF()->moveAdditionalCallInfo(&MI, &*NewMI); 2305 // Copy nomerge flag over to new instruction. 2306 if (MI.getFlag(MachineInstr::NoMerge)) 2307 NewMI->setFlag(MachineInstr::NoMerge); 2308 MBB.erase(MBBI); 2309 2310 MBBI = NewMI; 2311 return true; 2312 } 2313 case ARM::tBXNS_RET: { 2314 // For v8.0-M.Main we need to authenticate LR before clearing FPRs, which 2315 // uses R12 as a scratch register. 2316 if (!STI->hasV8_1MMainlineOps() && AFI->shouldSignReturnAddress()) 2317 BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARM::t2AUT)); 2318 2319 MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI); 2320 2321 if (STI->hasV8_1MMainlineOps()) { 2322 // Restore the non-secure floating point context. 2323 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 2324 TII->get(ARM::VLDR_FPCXTNS_post), ARM::SP) 2325 .addReg(ARM::SP) 2326 .addImm(4) 2327 .add(predOps(ARMCC::AL)); 2328 2329 if (AFI->shouldSignReturnAddress()) 2330 BuildMI(AfterBB, AfterBB.end(), DebugLoc(), TII->get(ARM::t2AUT)); 2331 } 2332 2333 // Clear all GPR that are not a use of the return instruction. 2334 assert(llvm::all_of(MBBI->operands(), [](const MachineOperand &Op) { 2335 return !Op.isReg() || Op.getReg() != ARM::R12; 2336 })); 2337 SmallVector<unsigned, 5> ClearRegs; 2338 determineGPRegsToClear( 2339 *MBBI, {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12}, ClearRegs); 2340 CMSEClearGPRegs(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), ClearRegs, 2341 ARM::LR); 2342 2343 MachineInstrBuilder NewMI = 2344 BuildMI(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), 2345 TII->get(ARM::tBXNS)) 2346 .addReg(ARM::LR) 2347 .add(predOps(ARMCC::AL)); 2348 for (const MachineOperand &Op : MI.operands()) 2349 NewMI->addOperand(Op); 2350 MI.eraseFromParent(); 2351 return true; 2352 } 2353 case ARM::tBLXNS_CALL: { 2354 DebugLoc DL = MBBI->getDebugLoc(); 2355 Register JumpReg = MBBI->getOperand(0).getReg(); 2356 2357 // Figure out which registers are live at the point immediately before the 2358 // call. When we indiscriminately push a set of registers, the live 2359 // registers are added as ordinary use operands, whereas dead registers 2360 // are "undef". 2361 LivePhysRegs LiveRegs(*TRI); 2362 LiveRegs.addLiveOuts(MBB); 2363 for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse())) 2364 LiveRegs.stepBackward(MI); 2365 LiveRegs.stepBackward(*MBBI); 2366 2367 CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs, 2368 AFI->isThumb1OnlyFunction()); 2369 2370 SmallVector<unsigned, 16> ClearRegs; 2371 determineGPRegsToClear(*MBBI, 2372 {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, 2373 ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9, 2374 ARM::R10, ARM::R11, ARM::R12}, 2375 ClearRegs); 2376 auto OriginalClearRegs = ClearRegs; 2377 2378 // Get the first cleared register as a scratch (to use later with tBIC). 2379 // We need to use the first so we can ensure it is a low register. 2380 unsigned ScratchReg = ClearRegs.front(); 2381 2382 // Clear LSB of JumpReg 2383 if (AFI->isThumb2Function()) { 2384 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), JumpReg) 2385 .addReg(JumpReg) 2386 .addImm(1) 2387 .add(predOps(ARMCC::AL)) 2388 .add(condCodeOp()); 2389 } else { 2390 // We need to use an extra register to cope with 8M Baseline, 2391 // since we have saved all of the registers we are ok to trash a non 2392 // argument register here. 2393 BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVi8), ScratchReg) 2394 .add(condCodeOp()) 2395 .addImm(1) 2396 .add(predOps(ARMCC::AL)); 2397 BuildMI(MBB, MBBI, DL, TII->get(ARM::tBIC), JumpReg) 2398 .addReg(ARM::CPSR, RegState::Define) 2399 .addReg(JumpReg) 2400 .addReg(ScratchReg) 2401 .add(predOps(ARMCC::AL)); 2402 } 2403 2404 CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs, 2405 ClearRegs); // save+clear FP regs with ClearRegs 2406 CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg); 2407 2408 const MachineInstrBuilder NewCall = 2409 BuildMI(MBB, MBBI, DL, TII->get(ARM::tBLXNSr)) 2410 .add(predOps(ARMCC::AL)) 2411 .addReg(JumpReg, RegState::Kill); 2412 2413 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 2414 NewCall->addOperand(MO); 2415 if (MI.isCandidateForAdditionalCallInfo()) 2416 MI.getMF()->moveAdditionalCallInfo(&MI, NewCall.getInstr()); 2417 2418 CMSERestoreFPRegs(MBB, MBBI, DL, OriginalClearRegs); // restore FP registers 2419 2420 CMSEPopCalleeSaves(*TII, MBB, MBBI, AFI->isThumb1OnlyFunction()); 2421 2422 MI.eraseFromParent(); 2423 return true; 2424 } 2425 case ARM::VMOVHcc: 2426 case ARM::VMOVScc: 2427 case ARM::VMOVDcc: { 2428 unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD; 2429 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), 2430 MI.getOperand(1).getReg()) 2431 .add(MI.getOperand(2)) 2432 .addImm(MI.getOperand(3).getImm()) // 'pred' 2433 .add(MI.getOperand(4)) 2434 .add(makeImplicit(MI.getOperand(1))); 2435 2436 MI.eraseFromParent(); 2437 return true; 2438 } 2439 case ARM::t2MOVCCr: 2440 case ARM::MOVCCr: { 2441 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr; 2442 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), 2443 MI.getOperand(1).getReg()) 2444 .add(MI.getOperand(2)) 2445 .addImm(MI.getOperand(3).getImm()) // 'pred' 2446 .add(MI.getOperand(4)) 2447 .add(condCodeOp()) // 's' bit 2448 .add(makeImplicit(MI.getOperand(1))); 2449 2450 MI.eraseFromParent(); 2451 return true; 2452 } 2453 case ARM::MOVCCsi: { 2454 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), 2455 (MI.getOperand(1).getReg())) 2456 .add(MI.getOperand(2)) 2457 .addImm(MI.getOperand(3).getImm()) 2458 .addImm(MI.getOperand(4).getImm()) // 'pred' 2459 .add(MI.getOperand(5)) 2460 .add(condCodeOp()) // 's' bit 2461 .add(makeImplicit(MI.getOperand(1))); 2462 2463 MI.eraseFromParent(); 2464 return true; 2465 } 2466 case ARM::MOVCCsr: { 2467 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr), 2468 (MI.getOperand(1).getReg())) 2469 .add(MI.getOperand(2)) 2470 .add(MI.getOperand(3)) 2471 .addImm(MI.getOperand(4).getImm()) 2472 .addImm(MI.getOperand(5).getImm()) // 'pred' 2473 .add(MI.getOperand(6)) 2474 .add(condCodeOp()) // 's' bit 2475 .add(makeImplicit(MI.getOperand(1))); 2476 2477 MI.eraseFromParent(); 2478 return true; 2479 } 2480 case ARM::t2MOVCCi16: 2481 case ARM::MOVCCi16: { 2482 unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16; 2483 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), 2484 MI.getOperand(1).getReg()) 2485 .addImm(MI.getOperand(2).getImm()) 2486 .addImm(MI.getOperand(3).getImm()) // 'pred' 2487 .add(MI.getOperand(4)) 2488 .add(makeImplicit(MI.getOperand(1))); 2489 MI.eraseFromParent(); 2490 return true; 2491 } 2492 case ARM::t2MOVCCi: 2493 case ARM::MOVCCi: { 2494 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi; 2495 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), 2496 MI.getOperand(1).getReg()) 2497 .addImm(MI.getOperand(2).getImm()) 2498 .addImm(MI.getOperand(3).getImm()) // 'pred' 2499 .add(MI.getOperand(4)) 2500 .add(condCodeOp()) // 's' bit 2501 .add(makeImplicit(MI.getOperand(1))); 2502 2503 MI.eraseFromParent(); 2504 return true; 2505 } 2506 case ARM::t2MVNCCi: 2507 case ARM::MVNCCi: { 2508 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi; 2509 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), 2510 MI.getOperand(1).getReg()) 2511 .addImm(MI.getOperand(2).getImm()) 2512 .addImm(MI.getOperand(3).getImm()) // 'pred' 2513 .add(MI.getOperand(4)) 2514 .add(condCodeOp()) // 's' bit 2515 .add(makeImplicit(MI.getOperand(1))); 2516 2517 MI.eraseFromParent(); 2518 return true; 2519 } 2520 case ARM::t2MOVCClsl: 2521 case ARM::t2MOVCClsr: 2522 case ARM::t2MOVCCasr: 2523 case ARM::t2MOVCCror: { 2524 unsigned NewOpc; 2525 switch (Opcode) { 2526 case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break; 2527 case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break; 2528 case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break; 2529 case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break; 2530 default: llvm_unreachable("unexpeced conditional move"); 2531 } 2532 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), 2533 MI.getOperand(1).getReg()) 2534 .add(MI.getOperand(2)) 2535 .addImm(MI.getOperand(3).getImm()) 2536 .addImm(MI.getOperand(4).getImm()) // 'pred' 2537 .add(MI.getOperand(5)) 2538 .add(condCodeOp()) // 's' bit 2539 .add(makeImplicit(MI.getOperand(1))); 2540 MI.eraseFromParent(); 2541 return true; 2542 } 2543 case ARM::Int_eh_sjlj_dispatchsetup: { 2544 MachineFunction &MF = *MI.getParent()->getParent(); 2545 const ARMBaseInstrInfo *AII = 2546 static_cast<const ARMBaseInstrInfo*>(TII); 2547 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); 2548 // For functions using a base pointer, we rematerialize it (via the frame 2549 // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it 2550 // for us. Otherwise, expand to nothing. 2551 if (RI.hasBasePointer(MF)) { 2552 int32_t NumBytes = AFI->getFramePtrSpillOffset(); 2553 Register FramePtr = RI.getFrameRegister(MF); 2554 assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) && 2555 "base pointer without frame pointer?"); 2556 2557 if (AFI->isThumb2Function()) { 2558 emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, 2559 FramePtr, -NumBytes, ARMCC::AL, 0, *TII); 2560 } else if (AFI->isThumbFunction()) { 2561 emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, 2562 FramePtr, -NumBytes, *TII, RI); 2563 } else { 2564 emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, 2565 FramePtr, -NumBytes, ARMCC::AL, 0, 2566 *TII); 2567 } 2568 // If there's dynamic realignment, adjust for it. 2569 if (RI.hasStackRealignment(MF)) { 2570 MachineFrameInfo &MFI = MF.getFrameInfo(); 2571 Align MaxAlign = MFI.getMaxAlign(); 2572 assert (!AFI->isThumb1OnlyFunction()); 2573 // Emit bic r6, r6, MaxAlign 2574 assert(MaxAlign <= Align(256) && 2575 "The BIC instruction cannot encode " 2576 "immediates larger than 256 with all lower " 2577 "bits set."); 2578 unsigned bicOpc = AFI->isThumbFunction() ? 2579 ARM::t2BICri : ARM::BICri; 2580 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6) 2581 .addReg(ARM::R6, RegState::Kill) 2582 .addImm(MaxAlign.value() - 1) 2583 .add(predOps(ARMCC::AL)) 2584 .add(condCodeOp()); 2585 } 2586 } 2587 MI.eraseFromParent(); 2588 return true; 2589 } 2590 2591 case ARM::LSRs1: 2592 case ARM::ASRs1: { 2593 // These are just fancy MOVs instructions. 2594 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), 2595 MI.getOperand(0).getReg()) 2596 .add(MI.getOperand(1)) 2597 .addImm(ARM_AM::getSORegOpc( 2598 (Opcode == ARM::LSRs1 ? ARM_AM::lsr : ARM_AM::asr), 1)) 2599 .add(predOps(ARMCC::AL)) 2600 .addReg(ARM::CPSR, RegState::Define); 2601 MI.eraseFromParent(); 2602 return true; 2603 } 2604 case ARM::RRX: { 2605 // This encodes as "MOVs Rd, Rm, rrx 2606 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), 2607 MI.getOperand(0).getReg()) 2608 .add(MI.getOperand(1)) 2609 .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)) 2610 .add(predOps(ARMCC::AL)) 2611 .add(condCodeOp()) 2612 .copyImplicitOps(MI); 2613 MI.eraseFromParent(); 2614 return true; 2615 } 2616 case ARM::tTPsoft: 2617 case ARM::TPsoft: { 2618 const bool Thumb = Opcode == ARM::tTPsoft; 2619 2620 MachineInstrBuilder MIB; 2621 MachineFunction *MF = MBB.getParent(); 2622 if (STI->genLongCalls()) { 2623 MachineConstantPool *MCP = MF->getConstantPool(); 2624 unsigned PCLabelID = AFI->createPICLabelUId(); 2625 MachineConstantPoolValue *CPV = 2626 ARMConstantPoolSymbol::Create(MF->getFunction().getContext(), 2627 "__aeabi_read_tp", PCLabelID, 0); 2628 Register Reg = MI.getOperand(0).getReg(); 2629 MIB = 2630 BuildMI(MBB, MBBI, MI.getDebugLoc(), 2631 TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg) 2632 .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4))); 2633 if (!Thumb) 2634 MIB.addImm(0); 2635 MIB.add(predOps(ARMCC::AL)); 2636 2637 MIB = 2638 BuildMI(MBB, MBBI, MI.getDebugLoc(), 2639 TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF))); 2640 if (Thumb) 2641 MIB.add(predOps(ARMCC::AL)); 2642 MIB.addReg(Reg, RegState::Kill); 2643 } else { 2644 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 2645 TII->get(Thumb ? ARM::tBL : ARM::BL)); 2646 if (Thumb) 2647 MIB.add(predOps(ARMCC::AL)); 2648 MIB.addExternalSymbol("__aeabi_read_tp", 0); 2649 } 2650 2651 MIB.cloneMemRefs(MI); 2652 MIB.copyImplicitOps(MI); 2653 // Update the call info. 2654 if (MI.isCandidateForAdditionalCallInfo()) 2655 MF->moveAdditionalCallInfo(&MI, &*MIB); 2656 MI.eraseFromParent(); 2657 return true; 2658 } 2659 case ARM::tLDRpci_pic: 2660 case ARM::t2LDRpci_pic: { 2661 unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) 2662 ? ARM::tLDRpci : ARM::t2LDRpci; 2663 Register DstReg = MI.getOperand(0).getReg(); 2664 bool DstIsDead = MI.getOperand(0).isDead(); 2665 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) 2666 .add(MI.getOperand(1)) 2667 .add(predOps(ARMCC::AL)) 2668 .cloneMemRefs(MI) 2669 .copyImplicitOps(MI); 2670 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) 2671 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 2672 .addReg(DstReg) 2673 .add(MI.getOperand(2)) 2674 .copyImplicitOps(MI); 2675 MI.eraseFromParent(); 2676 return true; 2677 } 2678 2679 case ARM::LDRLIT_ga_abs: 2680 case ARM::LDRLIT_ga_pcrel: 2681 case ARM::LDRLIT_ga_pcrel_ldr: 2682 case ARM::tLDRLIT_ga_abs: 2683 case ARM::t2LDRLIT_ga_pcrel: 2684 case ARM::tLDRLIT_ga_pcrel: { 2685 Register DstReg = MI.getOperand(0).getReg(); 2686 bool DstIsDead = MI.getOperand(0).isDead(); 2687 const MachineOperand &MO1 = MI.getOperand(1); 2688 auto Flags = MO1.getTargetFlags(); 2689 const GlobalValue *GV = MO1.getGlobal(); 2690 bool IsARM = Opcode != ARM::tLDRLIT_ga_pcrel && 2691 Opcode != ARM::tLDRLIT_ga_abs && 2692 Opcode != ARM::t2LDRLIT_ga_pcrel; 2693 bool IsPIC = 2694 Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs; 2695 unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci; 2696 if (Opcode == ARM::t2LDRLIT_ga_pcrel) 2697 LDRLITOpc = ARM::t2LDRpci; 2698 unsigned PICAddOpc = 2699 IsARM 2700 ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) 2701 : ARM::tPICADD; 2702 2703 // We need a new const-pool entry to load from. 2704 MachineConstantPool *MCP = MBB.getParent()->getConstantPool(); 2705 unsigned ARMPCLabelIndex = 0; 2706 MachineConstantPoolValue *CPV; 2707 2708 if (IsPIC) { 2709 unsigned PCAdj = IsARM ? 8 : 4; 2710 auto Modifier = (Flags & ARMII::MO_GOT) 2711 ? ARMCP::GOT_PREL 2712 : ARMCP::no_modifier; 2713 ARMPCLabelIndex = AFI->createPICLabelUId(); 2714 CPV = ARMConstantPoolConstant::Create( 2715 GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, Modifier, 2716 /*AddCurrentAddr*/ Modifier == ARMCP::GOT_PREL); 2717 } else 2718 CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier); 2719 2720 MachineInstrBuilder MIB = 2721 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg) 2722 .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4))); 2723 if (IsARM) 2724 MIB.addImm(0); 2725 MIB.add(predOps(ARMCC::AL)); 2726 2727 if (IsPIC) { 2728 MachineInstrBuilder MIB = 2729 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc)) 2730 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 2731 .addReg(DstReg) 2732 .addImm(ARMPCLabelIndex); 2733 2734 if (IsARM) 2735 MIB.add(predOps(ARMCC::AL)); 2736 } 2737 2738 MI.eraseFromParent(); 2739 return true; 2740 } 2741 case ARM::MOV_ga_pcrel: 2742 case ARM::MOV_ga_pcrel_ldr: 2743 case ARM::t2MOV_ga_pcrel: { 2744 // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode. 2745 unsigned LabelId = AFI->createPICLabelUId(); 2746 Register DstReg = MI.getOperand(0).getReg(); 2747 bool DstIsDead = MI.getOperand(0).isDead(); 2748 const MachineOperand &MO1 = MI.getOperand(1); 2749 const GlobalValue *GV = MO1.getGlobal(); 2750 unsigned TF = MO1.getTargetFlags(); 2751 bool isARM = Opcode != ARM::t2MOV_ga_pcrel; 2752 unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel; 2753 unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel; 2754 unsigned LO16TF = TF | ARMII::MO_LO16; 2755 unsigned HI16TF = TF | ARMII::MO_HI16; 2756 unsigned PICAddOpc = isARM 2757 ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) 2758 : ARM::tPICADD; 2759 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg) 2760 .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF) 2761 .addImm(LabelId) 2762 .copyImplicitOps(MI); 2763 2764 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg) 2765 .addReg(DstReg) 2766 .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF) 2767 .addImm(LabelId) 2768 .copyImplicitOps(MI); 2769 2770 MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(), 2771 TII->get(PICAddOpc)) 2772 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 2773 .addReg(DstReg).addImm(LabelId); 2774 if (isARM) { 2775 MIB3.add(predOps(ARMCC::AL)); 2776 if (Opcode == ARM::MOV_ga_pcrel_ldr) 2777 MIB3.cloneMemRefs(MI); 2778 } 2779 MIB3.copyImplicitOps(MI); 2780 MI.eraseFromParent(); 2781 return true; 2782 } 2783 2784 case ARM::MOVi32imm: 2785 case ARM::MOVCCi32imm: 2786 case ARM::t2MOVi32imm: 2787 case ARM::t2MOVCCi32imm: 2788 ExpandMOV32BitImm(MBB, MBBI); 2789 return true; 2790 2791 case ARM::tMOVi32imm: 2792 ExpandTMOV32BitImm(MBB, MBBI); 2793 return true; 2794 2795 case ARM::tLEApcrelJT: 2796 // Inline jump tables are handled in ARMAsmPrinter. 2797 if (MI.getMF()->getJumpTableInfo()->getEntryKind() == 2798 MachineJumpTableInfo::EK_Inline) 2799 return false; 2800 2801 // Use a 32-bit immediate move to generate the address of the jump table. 2802 assert(STI->isThumb() && "Non-inline jump tables expected only in thumb"); 2803 ExpandTMOV32BitImm(MBB, MBBI); 2804 return true; 2805 2806 case ARM::SUBS_PC_LR: { 2807 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC) 2808 .addReg(ARM::LR) 2809 .add(MI.getOperand(0)) 2810 .add(MI.getOperand(1)) 2811 .add(MI.getOperand(2)) 2812 .addReg(ARM::CPSR, RegState::Undef) 2813 .copyImplicitOps(MI); 2814 MI.eraseFromParent(); 2815 return true; 2816 } 2817 case ARM::VLDMQIA: { 2818 unsigned NewOpc = ARM::VLDMDIA; 2819 MachineInstrBuilder MIB = 2820 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); 2821 unsigned OpIdx = 0; 2822 2823 // Grab the Q register destination. 2824 bool DstIsDead = MI.getOperand(OpIdx).isDead(); 2825 Register DstReg = MI.getOperand(OpIdx++).getReg(); 2826 2827 // Copy the source register. 2828 MIB.add(MI.getOperand(OpIdx++)); 2829 2830 // Copy the predicate operands. 2831 MIB.add(MI.getOperand(OpIdx++)); 2832 MIB.add(MI.getOperand(OpIdx++)); 2833 2834 // Add the destination operands (D subregs). 2835 Register D0 = TRI->getSubReg(DstReg, ARM::dsub_0); 2836 Register D1 = TRI->getSubReg(DstReg, ARM::dsub_1); 2837 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) 2838 .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); 2839 2840 // Add an implicit def for the super-register. 2841 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); 2842 MIB.copyImplicitOps(MI); 2843 MIB.cloneMemRefs(MI); 2844 MI.eraseFromParent(); 2845 return true; 2846 } 2847 2848 case ARM::VSTMQIA: { 2849 unsigned NewOpc = ARM::VSTMDIA; 2850 MachineInstrBuilder MIB = 2851 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); 2852 unsigned OpIdx = 0; 2853 2854 // Grab the Q register source. 2855 bool SrcIsKill = MI.getOperand(OpIdx).isKill(); 2856 Register SrcReg = MI.getOperand(OpIdx++).getReg(); 2857 2858 // Copy the destination register. 2859 MachineOperand Dst(MI.getOperand(OpIdx++)); 2860 MIB.add(Dst); 2861 2862 // Copy the predicate operands. 2863 MIB.add(MI.getOperand(OpIdx++)); 2864 MIB.add(MI.getOperand(OpIdx++)); 2865 2866 // Add the source operands (D subregs). 2867 Register D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); 2868 Register D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); 2869 MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0) 2870 .addReg(D1, SrcIsKill ? RegState::Kill : 0); 2871 2872 if (SrcIsKill) // Add an implicit kill for the Q register. 2873 MIB->addRegisterKilled(SrcReg, TRI, true); 2874 2875 MIB.copyImplicitOps(MI); 2876 MIB.cloneMemRefs(MI); 2877 MI.eraseFromParent(); 2878 return true; 2879 } 2880 2881 case ARM::VLD2q8Pseudo: 2882 case ARM::VLD2q16Pseudo: 2883 case ARM::VLD2q32Pseudo: 2884 case ARM::VLD2q8PseudoWB_fixed: 2885 case ARM::VLD2q16PseudoWB_fixed: 2886 case ARM::VLD2q32PseudoWB_fixed: 2887 case ARM::VLD2q8PseudoWB_register: 2888 case ARM::VLD2q16PseudoWB_register: 2889 case ARM::VLD2q32PseudoWB_register: 2890 case ARM::VLD3d8Pseudo: 2891 case ARM::VLD3d16Pseudo: 2892 case ARM::VLD3d32Pseudo: 2893 case ARM::VLD1d8TPseudo: 2894 case ARM::VLD1d8TPseudoWB_fixed: 2895 case ARM::VLD1d8TPseudoWB_register: 2896 case ARM::VLD1d16TPseudo: 2897 case ARM::VLD1d16TPseudoWB_fixed: 2898 case ARM::VLD1d16TPseudoWB_register: 2899 case ARM::VLD1d32TPseudo: 2900 case ARM::VLD1d32TPseudoWB_fixed: 2901 case ARM::VLD1d32TPseudoWB_register: 2902 case ARM::VLD1d64TPseudo: 2903 case ARM::VLD1d64TPseudoWB_fixed: 2904 case ARM::VLD1d64TPseudoWB_register: 2905 case ARM::VLD3d8Pseudo_UPD: 2906 case ARM::VLD3d16Pseudo_UPD: 2907 case ARM::VLD3d32Pseudo_UPD: 2908 case ARM::VLD3q8Pseudo_UPD: 2909 case ARM::VLD3q16Pseudo_UPD: 2910 case ARM::VLD3q32Pseudo_UPD: 2911 case ARM::VLD3q8oddPseudo: 2912 case ARM::VLD3q16oddPseudo: 2913 case ARM::VLD3q32oddPseudo: 2914 case ARM::VLD3q8oddPseudo_UPD: 2915 case ARM::VLD3q16oddPseudo_UPD: 2916 case ARM::VLD3q32oddPseudo_UPD: 2917 case ARM::VLD4d8Pseudo: 2918 case ARM::VLD4d16Pseudo: 2919 case ARM::VLD4d32Pseudo: 2920 case ARM::VLD1d8QPseudo: 2921 case ARM::VLD1d8QPseudoWB_fixed: 2922 case ARM::VLD1d8QPseudoWB_register: 2923 case ARM::VLD1d16QPseudo: 2924 case ARM::VLD1d16QPseudoWB_fixed: 2925 case ARM::VLD1d16QPseudoWB_register: 2926 case ARM::VLD1d32QPseudo: 2927 case ARM::VLD1d32QPseudoWB_fixed: 2928 case ARM::VLD1d32QPseudoWB_register: 2929 case ARM::VLD1d64QPseudo: 2930 case ARM::VLD1d64QPseudoWB_fixed: 2931 case ARM::VLD1d64QPseudoWB_register: 2932 case ARM::VLD1q8HighQPseudo: 2933 case ARM::VLD1q8HighQPseudo_UPD: 2934 case ARM::VLD1q8LowQPseudo_UPD: 2935 case ARM::VLD1q8HighTPseudo: 2936 case ARM::VLD1q8HighTPseudo_UPD: 2937 case ARM::VLD1q8LowTPseudo_UPD: 2938 case ARM::VLD1q16HighQPseudo: 2939 case ARM::VLD1q16HighQPseudo_UPD: 2940 case ARM::VLD1q16LowQPseudo_UPD: 2941 case ARM::VLD1q16HighTPseudo: 2942 case ARM::VLD1q16HighTPseudo_UPD: 2943 case ARM::VLD1q16LowTPseudo_UPD: 2944 case ARM::VLD1q32HighQPseudo: 2945 case ARM::VLD1q32HighQPseudo_UPD: 2946 case ARM::VLD1q32LowQPseudo_UPD: 2947 case ARM::VLD1q32HighTPseudo: 2948 case ARM::VLD1q32HighTPseudo_UPD: 2949 case ARM::VLD1q32LowTPseudo_UPD: 2950 case ARM::VLD1q64HighQPseudo: 2951 case ARM::VLD1q64HighQPseudo_UPD: 2952 case ARM::VLD1q64LowQPseudo_UPD: 2953 case ARM::VLD1q64HighTPseudo: 2954 case ARM::VLD1q64HighTPseudo_UPD: 2955 case ARM::VLD1q64LowTPseudo_UPD: 2956 case ARM::VLD4d8Pseudo_UPD: 2957 case ARM::VLD4d16Pseudo_UPD: 2958 case ARM::VLD4d32Pseudo_UPD: 2959 case ARM::VLD4q8Pseudo_UPD: 2960 case ARM::VLD4q16Pseudo_UPD: 2961 case ARM::VLD4q32Pseudo_UPD: 2962 case ARM::VLD4q8oddPseudo: 2963 case ARM::VLD4q16oddPseudo: 2964 case ARM::VLD4q32oddPseudo: 2965 case ARM::VLD4q8oddPseudo_UPD: 2966 case ARM::VLD4q16oddPseudo_UPD: 2967 case ARM::VLD4q32oddPseudo_UPD: 2968 case ARM::VLD3DUPd8Pseudo: 2969 case ARM::VLD3DUPd16Pseudo: 2970 case ARM::VLD3DUPd32Pseudo: 2971 case ARM::VLD3DUPd8Pseudo_UPD: 2972 case ARM::VLD3DUPd16Pseudo_UPD: 2973 case ARM::VLD3DUPd32Pseudo_UPD: 2974 case ARM::VLD4DUPd8Pseudo: 2975 case ARM::VLD4DUPd16Pseudo: 2976 case ARM::VLD4DUPd32Pseudo: 2977 case ARM::VLD4DUPd8Pseudo_UPD: 2978 case ARM::VLD4DUPd16Pseudo_UPD: 2979 case ARM::VLD4DUPd32Pseudo_UPD: 2980 case ARM::VLD2DUPq8EvenPseudo: 2981 case ARM::VLD2DUPq8OddPseudo: 2982 case ARM::VLD2DUPq16EvenPseudo: 2983 case ARM::VLD2DUPq16OddPseudo: 2984 case ARM::VLD2DUPq32EvenPseudo: 2985 case ARM::VLD2DUPq32OddPseudo: 2986 case ARM::VLD2DUPq8OddPseudoWB_fixed: 2987 case ARM::VLD2DUPq8OddPseudoWB_register: 2988 case ARM::VLD2DUPq16OddPseudoWB_fixed: 2989 case ARM::VLD2DUPq16OddPseudoWB_register: 2990 case ARM::VLD2DUPq32OddPseudoWB_fixed: 2991 case ARM::VLD2DUPq32OddPseudoWB_register: 2992 case ARM::VLD3DUPq8EvenPseudo: 2993 case ARM::VLD3DUPq8OddPseudo: 2994 case ARM::VLD3DUPq16EvenPseudo: 2995 case ARM::VLD3DUPq16OddPseudo: 2996 case ARM::VLD3DUPq32EvenPseudo: 2997 case ARM::VLD3DUPq32OddPseudo: 2998 case ARM::VLD3DUPq8OddPseudo_UPD: 2999 case ARM::VLD3DUPq16OddPseudo_UPD: 3000 case ARM::VLD3DUPq32OddPseudo_UPD: 3001 case ARM::VLD4DUPq8EvenPseudo: 3002 case ARM::VLD4DUPq8OddPseudo: 3003 case ARM::VLD4DUPq16EvenPseudo: 3004 case ARM::VLD4DUPq16OddPseudo: 3005 case ARM::VLD4DUPq32EvenPseudo: 3006 case ARM::VLD4DUPq32OddPseudo: 3007 case ARM::VLD4DUPq8OddPseudo_UPD: 3008 case ARM::VLD4DUPq16OddPseudo_UPD: 3009 case ARM::VLD4DUPq32OddPseudo_UPD: 3010 ExpandVLD(MBBI); 3011 return true; 3012 3013 case ARM::VST2q8Pseudo: 3014 case ARM::VST2q16Pseudo: 3015 case ARM::VST2q32Pseudo: 3016 case ARM::VST2q8PseudoWB_fixed: 3017 case ARM::VST2q16PseudoWB_fixed: 3018 case ARM::VST2q32PseudoWB_fixed: 3019 case ARM::VST2q8PseudoWB_register: 3020 case ARM::VST2q16PseudoWB_register: 3021 case ARM::VST2q32PseudoWB_register: 3022 case ARM::VST3d8Pseudo: 3023 case ARM::VST3d16Pseudo: 3024 case ARM::VST3d32Pseudo: 3025 case ARM::VST1d8TPseudo: 3026 case ARM::VST1d8TPseudoWB_fixed: 3027 case ARM::VST1d8TPseudoWB_register: 3028 case ARM::VST1d16TPseudo: 3029 case ARM::VST1d16TPseudoWB_fixed: 3030 case ARM::VST1d16TPseudoWB_register: 3031 case ARM::VST1d32TPseudo: 3032 case ARM::VST1d32TPseudoWB_fixed: 3033 case ARM::VST1d32TPseudoWB_register: 3034 case ARM::VST1d64TPseudo: 3035 case ARM::VST1d64TPseudoWB_fixed: 3036 case ARM::VST1d64TPseudoWB_register: 3037 case ARM::VST3d8Pseudo_UPD: 3038 case ARM::VST3d16Pseudo_UPD: 3039 case ARM::VST3d32Pseudo_UPD: 3040 case ARM::VST3q8Pseudo_UPD: 3041 case ARM::VST3q16Pseudo_UPD: 3042 case ARM::VST3q32Pseudo_UPD: 3043 case ARM::VST3q8oddPseudo: 3044 case ARM::VST3q16oddPseudo: 3045 case ARM::VST3q32oddPseudo: 3046 case ARM::VST3q8oddPseudo_UPD: 3047 case ARM::VST3q16oddPseudo_UPD: 3048 case ARM::VST3q32oddPseudo_UPD: 3049 case ARM::VST4d8Pseudo: 3050 case ARM::VST4d16Pseudo: 3051 case ARM::VST4d32Pseudo: 3052 case ARM::VST1d8QPseudo: 3053 case ARM::VST1d8QPseudoWB_fixed: 3054 case ARM::VST1d8QPseudoWB_register: 3055 case ARM::VST1d16QPseudo: 3056 case ARM::VST1d16QPseudoWB_fixed: 3057 case ARM::VST1d16QPseudoWB_register: 3058 case ARM::VST1d32QPseudo: 3059 case ARM::VST1d32QPseudoWB_fixed: 3060 case ARM::VST1d32QPseudoWB_register: 3061 case ARM::VST1d64QPseudo: 3062 case ARM::VST1d64QPseudoWB_fixed: 3063 case ARM::VST1d64QPseudoWB_register: 3064 case ARM::VST4d8Pseudo_UPD: 3065 case ARM::VST4d16Pseudo_UPD: 3066 case ARM::VST4d32Pseudo_UPD: 3067 case ARM::VST1q8HighQPseudo: 3068 case ARM::VST1q8LowQPseudo_UPD: 3069 case ARM::VST1q8HighTPseudo: 3070 case ARM::VST1q8LowTPseudo_UPD: 3071 case ARM::VST1q16HighQPseudo: 3072 case ARM::VST1q16LowQPseudo_UPD: 3073 case ARM::VST1q16HighTPseudo: 3074 case ARM::VST1q16LowTPseudo_UPD: 3075 case ARM::VST1q32HighQPseudo: 3076 case ARM::VST1q32LowQPseudo_UPD: 3077 case ARM::VST1q32HighTPseudo: 3078 case ARM::VST1q32LowTPseudo_UPD: 3079 case ARM::VST1q64HighQPseudo: 3080 case ARM::VST1q64LowQPseudo_UPD: 3081 case ARM::VST1q64HighTPseudo: 3082 case ARM::VST1q64LowTPseudo_UPD: 3083 case ARM::VST1q8HighTPseudo_UPD: 3084 case ARM::VST1q16HighTPseudo_UPD: 3085 case ARM::VST1q32HighTPseudo_UPD: 3086 case ARM::VST1q64HighTPseudo_UPD: 3087 case ARM::VST1q8HighQPseudo_UPD: 3088 case ARM::VST1q16HighQPseudo_UPD: 3089 case ARM::VST1q32HighQPseudo_UPD: 3090 case ARM::VST1q64HighQPseudo_UPD: 3091 case ARM::VST4q8Pseudo_UPD: 3092 case ARM::VST4q16Pseudo_UPD: 3093 case ARM::VST4q32Pseudo_UPD: 3094 case ARM::VST4q8oddPseudo: 3095 case ARM::VST4q16oddPseudo: 3096 case ARM::VST4q32oddPseudo: 3097 case ARM::VST4q8oddPseudo_UPD: 3098 case ARM::VST4q16oddPseudo_UPD: 3099 case ARM::VST4q32oddPseudo_UPD: 3100 ExpandVST(MBBI); 3101 return true; 3102 3103 case ARM::VLD1LNq8Pseudo: 3104 case ARM::VLD1LNq16Pseudo: 3105 case ARM::VLD1LNq32Pseudo: 3106 case ARM::VLD1LNq8Pseudo_UPD: 3107 case ARM::VLD1LNq16Pseudo_UPD: 3108 case ARM::VLD1LNq32Pseudo_UPD: 3109 case ARM::VLD2LNd8Pseudo: 3110 case ARM::VLD2LNd16Pseudo: 3111 case ARM::VLD2LNd32Pseudo: 3112 case ARM::VLD2LNq16Pseudo: 3113 case ARM::VLD2LNq32Pseudo: 3114 case ARM::VLD2LNd8Pseudo_UPD: 3115 case ARM::VLD2LNd16Pseudo_UPD: 3116 case ARM::VLD2LNd32Pseudo_UPD: 3117 case ARM::VLD2LNq16Pseudo_UPD: 3118 case ARM::VLD2LNq32Pseudo_UPD: 3119 case ARM::VLD3LNd8Pseudo: 3120 case ARM::VLD3LNd16Pseudo: 3121 case ARM::VLD3LNd32Pseudo: 3122 case ARM::VLD3LNq16Pseudo: 3123 case ARM::VLD3LNq32Pseudo: 3124 case ARM::VLD3LNd8Pseudo_UPD: 3125 case ARM::VLD3LNd16Pseudo_UPD: 3126 case ARM::VLD3LNd32Pseudo_UPD: 3127 case ARM::VLD3LNq16Pseudo_UPD: 3128 case ARM::VLD3LNq32Pseudo_UPD: 3129 case ARM::VLD4LNd8Pseudo: 3130 case ARM::VLD4LNd16Pseudo: 3131 case ARM::VLD4LNd32Pseudo: 3132 case ARM::VLD4LNq16Pseudo: 3133 case ARM::VLD4LNq32Pseudo: 3134 case ARM::VLD4LNd8Pseudo_UPD: 3135 case ARM::VLD4LNd16Pseudo_UPD: 3136 case ARM::VLD4LNd32Pseudo_UPD: 3137 case ARM::VLD4LNq16Pseudo_UPD: 3138 case ARM::VLD4LNq32Pseudo_UPD: 3139 case ARM::VST1LNq8Pseudo: 3140 case ARM::VST1LNq16Pseudo: 3141 case ARM::VST1LNq32Pseudo: 3142 case ARM::VST1LNq8Pseudo_UPD: 3143 case ARM::VST1LNq16Pseudo_UPD: 3144 case ARM::VST1LNq32Pseudo_UPD: 3145 case ARM::VST2LNd8Pseudo: 3146 case ARM::VST2LNd16Pseudo: 3147 case ARM::VST2LNd32Pseudo: 3148 case ARM::VST2LNq16Pseudo: 3149 case ARM::VST2LNq32Pseudo: 3150 case ARM::VST2LNd8Pseudo_UPD: 3151 case ARM::VST2LNd16Pseudo_UPD: 3152 case ARM::VST2LNd32Pseudo_UPD: 3153 case ARM::VST2LNq16Pseudo_UPD: 3154 case ARM::VST2LNq32Pseudo_UPD: 3155 case ARM::VST3LNd8Pseudo: 3156 case ARM::VST3LNd16Pseudo: 3157 case ARM::VST3LNd32Pseudo: 3158 case ARM::VST3LNq16Pseudo: 3159 case ARM::VST3LNq32Pseudo: 3160 case ARM::VST3LNd8Pseudo_UPD: 3161 case ARM::VST3LNd16Pseudo_UPD: 3162 case ARM::VST3LNd32Pseudo_UPD: 3163 case ARM::VST3LNq16Pseudo_UPD: 3164 case ARM::VST3LNq32Pseudo_UPD: 3165 case ARM::VST4LNd8Pseudo: 3166 case ARM::VST4LNd16Pseudo: 3167 case ARM::VST4LNd32Pseudo: 3168 case ARM::VST4LNq16Pseudo: 3169 case ARM::VST4LNq32Pseudo: 3170 case ARM::VST4LNd8Pseudo_UPD: 3171 case ARM::VST4LNd16Pseudo_UPD: 3172 case ARM::VST4LNd32Pseudo_UPD: 3173 case ARM::VST4LNq16Pseudo_UPD: 3174 case ARM::VST4LNq32Pseudo_UPD: 3175 ExpandLaneOp(MBBI); 3176 return true; 3177 3178 case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; 3179 case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; 3180 case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; 3181 case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; 3182 3183 case ARM::MQQPRLoad: 3184 case ARM::MQQPRStore: 3185 case ARM::MQQQQPRLoad: 3186 case ARM::MQQQQPRStore: 3187 ExpandMQQPRLoadStore(MBBI); 3188 return true; 3189 3190 case ARM::tCMP_SWAP_8: 3191 assert(STI->isThumb()); 3192 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB, 3193 NextMBBI); 3194 case ARM::tCMP_SWAP_16: 3195 assert(STI->isThumb()); 3196 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH, 3197 NextMBBI); 3198 case ARM::tCMP_SWAP_32: 3199 assert(STI->isThumb()); 3200 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, NextMBBI); 3201 3202 case ARM::CMP_SWAP_8: 3203 assert(!STI->isThumb()); 3204 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, ARM::UXTB, 3205 NextMBBI); 3206 case ARM::CMP_SWAP_16: 3207 assert(!STI->isThumb()); 3208 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH, 3209 NextMBBI); 3210 case ARM::CMP_SWAP_32: 3211 assert(!STI->isThumb()); 3212 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI); 3213 3214 case ARM::CMP_SWAP_64: 3215 return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); 3216 3217 case ARM::tBL_PUSHLR: 3218 case ARM::BL_PUSHLR: { 3219 const bool Thumb = Opcode == ARM::tBL_PUSHLR; 3220 Register Reg = MI.getOperand(0).getReg(); 3221 assert(Reg == ARM::LR && "expect LR register!"); 3222 MachineInstrBuilder MIB; 3223 if (Thumb) { 3224 // push {lr} 3225 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH)) 3226 .add(predOps(ARMCC::AL)) 3227 .addReg(Reg); 3228 3229 // bl __gnu_mcount_nc 3230 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL)); 3231 } else { 3232 // stmdb sp!, {lr} 3233 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD)) 3234 .addReg(ARM::SP, RegState::Define) 3235 .addReg(ARM::SP) 3236 .add(predOps(ARMCC::AL)) 3237 .addReg(Reg); 3238 3239 // bl __gnu_mcount_nc 3240 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL)); 3241 } 3242 MIB.cloneMemRefs(MI); 3243 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 3244 MIB.add(MO); 3245 MI.eraseFromParent(); 3246 return true; 3247 } 3248 case ARM::t2CALL_BTI: { 3249 MachineFunction &MF = *MI.getMF(); 3250 MachineInstrBuilder MIB = 3251 BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::tBL)); 3252 MIB.cloneMemRefs(MI); 3253 for (unsigned i = 0; i < MI.getNumOperands(); ++i) 3254 MIB.add(MI.getOperand(i)); 3255 if (MI.isCandidateForAdditionalCallInfo()) 3256 MF.moveAdditionalCallInfo(&MI, MIB.getInstr()); 3257 MIBundleBuilder Bundler(MBB, MI); 3258 Bundler.append(MIB); 3259 Bundler.append(BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::t2BTI))); 3260 finalizeBundle(MBB, Bundler.begin(), Bundler.end()); 3261 MI.eraseFromParent(); 3262 return true; 3263 } 3264 case ARM::LOADDUAL: 3265 case ARM::STOREDUAL: { 3266 Register PairReg = MI.getOperand(0).getReg(); 3267 3268 MachineInstrBuilder MIB = 3269 BuildMI(MBB, MBBI, MI.getDebugLoc(), 3270 TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD)) 3271 .addReg(TRI->getSubReg(PairReg, ARM::gsub_0), 3272 Opcode == ARM::LOADDUAL ? RegState::Define : 0) 3273 .addReg(TRI->getSubReg(PairReg, ARM::gsub_1), 3274 Opcode == ARM::LOADDUAL ? RegState::Define : 0); 3275 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 3276 MIB.add(MO); 3277 MIB.add(predOps(ARMCC::AL)); 3278 MIB.cloneMemRefs(MI); 3279 MI.eraseFromParent(); 3280 return true; 3281 } 3282 } 3283 } 3284 3285 bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { 3286 bool Modified = false; 3287 3288 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 3289 while (MBBI != E) { 3290 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 3291 Modified |= ExpandMI(MBB, MBBI, NMBBI); 3292 MBBI = NMBBI; 3293 } 3294 3295 return Modified; 3296 } 3297 3298 bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 3299 STI = &MF.getSubtarget<ARMSubtarget>(); 3300 TII = STI->getInstrInfo(); 3301 TRI = STI->getRegisterInfo(); 3302 AFI = MF.getInfo<ARMFunctionInfo>(); 3303 3304 LLVM_DEBUG(dbgs() << "********** ARM EXPAND PSEUDO INSTRUCTIONS **********\n" 3305 << "********** Function: " << MF.getName() << '\n'); 3306 3307 bool Modified = false; 3308 for (MachineBasicBlock &MBB : MF) 3309 Modified |= ExpandMBB(MBB); 3310 if (VerifyARMPseudo) 3311 MF.verify(this, "After expanding ARM pseudo instructions."); 3312 3313 LLVM_DEBUG(dbgs() << "***************************************************\n"); 3314 return Modified; 3315 } 3316 3317 /// createARMExpandPseudoPass - returns an instance of the pseudo instruction 3318 /// expansion pass. 3319 FunctionPass *llvm::createARMExpandPseudoPass() { 3320 return new ARMExpandPseudo(); 3321 } 3322