1 //===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling, if-conversion, and other late
11 // optimizations. This pass should be run after register allocation but before
12 // the post-regalloc scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "ARM.h"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMBaseRegisterInfo.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMMachineFunctionInfo.h"
21 #include "ARMSubtarget.h"
22 #include "MCTargetDesc/ARMAddressingModes.h"
23 #include "llvm/CodeGen/LivePhysRegs.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/Support/Debug.h"
29
30 using namespace llvm;
31
32 #define DEBUG_TYPE "arm-pseudo"
33
34 static cl::opt<bool>
35 VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden,
36 cl::desc("Verify machine code after expanding ARM pseudos"));
37
38 #define ARM_EXPAND_PSEUDO_NAME "ARM pseudo instruction expansion pass"
39
40 namespace {
41 class ARMExpandPseudo : public MachineFunctionPass {
42 public:
43 static char ID;
ARMExpandPseudo()44 ARMExpandPseudo() : MachineFunctionPass(ID) {}
45
46 const ARMBaseInstrInfo *TII;
47 const TargetRegisterInfo *TRI;
48 const ARMSubtarget *STI;
49 ARMFunctionInfo *AFI;
50
51 bool runOnMachineFunction(MachineFunction &Fn) override;
52
getRequiredProperties() const53 MachineFunctionProperties getRequiredProperties() const override {
54 return MachineFunctionProperties().setNoVRegs();
55 }
56
getPassName() const57 StringRef getPassName() const override {
58 return ARM_EXPAND_PSEUDO_NAME;
59 }
60
61 private:
62 bool ExpandMI(MachineBasicBlock &MBB,
63 MachineBasicBlock::iterator MBBI,
64 MachineBasicBlock::iterator &NextMBBI);
65 bool ExpandMBB(MachineBasicBlock &MBB);
66 void ExpandVLD(MachineBasicBlock::iterator &MBBI);
67 void ExpandVST(MachineBasicBlock::iterator &MBBI);
68 void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
69 void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
70 unsigned Opc, bool IsExt);
71 void ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI);
72 void ExpandTMOV32BitImm(MachineBasicBlock &MBB,
73 MachineBasicBlock::iterator &MBBI);
74 void ExpandMOV32BitImm(MachineBasicBlock &MBB,
75 MachineBasicBlock::iterator &MBBI);
76 void CMSEClearGPRegs(MachineBasicBlock &MBB,
77 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
78 const SmallVectorImpl<unsigned> &ClearRegs,
79 unsigned ClobberReg);
80 MachineBasicBlock &CMSEClearFPRegs(MachineBasicBlock &MBB,
81 MachineBasicBlock::iterator MBBI);
82 MachineBasicBlock &CMSEClearFPRegsV8(MachineBasicBlock &MBB,
83 MachineBasicBlock::iterator MBBI,
84 const BitVector &ClearRegs);
85 MachineBasicBlock &CMSEClearFPRegsV81(MachineBasicBlock &MBB,
86 MachineBasicBlock::iterator MBBI,
87 const BitVector &ClearRegs);
88 void CMSESaveClearFPRegs(MachineBasicBlock &MBB,
89 MachineBasicBlock::iterator MBBI, DebugLoc &DL,
90 const LivePhysRegs &LiveRegs,
91 SmallVectorImpl<unsigned> &AvailableRegs);
92 void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB,
93 MachineBasicBlock::iterator MBBI, DebugLoc &DL,
94 const LivePhysRegs &LiveRegs,
95 SmallVectorImpl<unsigned> &ScratchRegs);
96 void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
97 MachineBasicBlock::iterator MBBI, DebugLoc &DL,
98 const LivePhysRegs &LiveRegs);
99 void CMSERestoreFPRegs(MachineBasicBlock &MBB,
100 MachineBasicBlock::iterator MBBI, DebugLoc &DL,
101 SmallVectorImpl<unsigned> &AvailableRegs);
102 void CMSERestoreFPRegsV8(MachineBasicBlock &MBB,
103 MachineBasicBlock::iterator MBBI, DebugLoc &DL,
104 SmallVectorImpl<unsigned> &AvailableRegs);
105 void CMSERestoreFPRegsV81(MachineBasicBlock &MBB,
106 MachineBasicBlock::iterator MBBI, DebugLoc &DL,
107 SmallVectorImpl<unsigned> &AvailableRegs);
108 bool ExpandCMP_SWAP(MachineBasicBlock &MBB,
109 MachineBasicBlock::iterator MBBI, unsigned LdrexOp,
110 unsigned StrexOp, unsigned UxtOp,
111 MachineBasicBlock::iterator &NextMBBI);
112
113 bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
114 MachineBasicBlock::iterator MBBI,
115 MachineBasicBlock::iterator &NextMBBI);
116 };
117 char ARMExpandPseudo::ID = 0;
118 }
119
120 INITIALIZE_PASS(ARMExpandPseudo, DEBUG_TYPE, ARM_EXPAND_PSEUDO_NAME, false,
121 false)
122
123 namespace {
124 // Constants for register spacing in NEON load/store instructions.
125 // For quad-register load-lane and store-lane pseudo instructors, the
126 // spacing is initially assumed to be EvenDblSpc, and that is changed to
127 // OddDblSpc depending on the lane number operand.
128 enum NEONRegSpacing {
129 SingleSpc,
130 SingleLowSpc , // Single spacing, low registers, three and four vectors.
131 SingleHighQSpc, // Single spacing, high registers, four vectors.
132 SingleHighTSpc, // Single spacing, high registers, three vectors.
133 EvenDblSpc,
134 OddDblSpc
135 };
136
137 // Entries for NEON load/store information table. The table is sorted by
138 // PseudoOpc for fast binary-search lookups.
139 struct NEONLdStTableEntry {
140 uint16_t PseudoOpc;
141 uint16_t RealOpc;
142 bool IsLoad;
143 bool isUpdating;
144 bool hasWritebackOperand;
145 uint8_t RegSpacing; // One of type NEONRegSpacing
146 uint8_t NumRegs; // D registers loaded or stored
147 uint8_t RegElts; // elements per D register; used for lane ops
148 // FIXME: Temporary flag to denote whether the real instruction takes
149 // a single register (like the encoding) or all of the registers in
150 // the list (like the asm syntax and the isel DAG). When all definitions
151 // are converted to take only the single encoded register, this will
152 // go away.
153 bool copyAllListRegs;
154
155 // Comparison methods for binary search of the table.
operator <__anon746fcf360211::NEONLdStTableEntry156 bool operator<(const NEONLdStTableEntry &TE) const {
157 return PseudoOpc < TE.PseudoOpc;
158 }
operator <(const NEONLdStTableEntry & TE,unsigned PseudoOpc)159 friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
160 return TE.PseudoOpc < PseudoOpc;
161 }
operator <(unsigned PseudoOpc,const NEONLdStTableEntry & TE)162 friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
163 const NEONLdStTableEntry &TE) {
164 return PseudoOpc < TE.PseudoOpc;
165 }
166 };
167 }
168
169 static const NEONLdStTableEntry NEONLdStTable[] = {
170 { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
171 { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
172 { ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true},
173 { ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true},
174 { ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true},
175 { ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
176
177 { ARM::VLD1d16QPseudo, ARM::VLD1d16Q, true, false, false, SingleSpc, 4, 4 ,false},
178 { ARM::VLD1d16QPseudoWB_fixed, ARM::VLD1d16Qwb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
179 { ARM::VLD1d16QPseudoWB_register, ARM::VLD1d16Qwb_register, true, true, true, SingleSpc, 4, 4 ,false},
180 { ARM::VLD1d16TPseudo, ARM::VLD1d16T, true, false, false, SingleSpc, 3, 4 ,false},
181 { ARM::VLD1d16TPseudoWB_fixed, ARM::VLD1d16Twb_fixed, true, true, false, SingleSpc, 3, 4 ,false},
182 { ARM::VLD1d16TPseudoWB_register, ARM::VLD1d16Twb_register, true, true, true, SingleSpc, 3, 4 ,false},
183
184 { ARM::VLD1d32QPseudo, ARM::VLD1d32Q, true, false, false, SingleSpc, 4, 2 ,false},
185 { ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d32Qwb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
186 { ARM::VLD1d32QPseudoWB_register, ARM::VLD1d32Qwb_register, true, true, true, SingleSpc, 4, 2 ,false},
187 { ARM::VLD1d32TPseudo, ARM::VLD1d32T, true, false, false, SingleSpc, 3, 2 ,false},
188 { ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d32Twb_fixed, true, true, false, SingleSpc, 3, 2 ,false},
189 { ARM::VLD1d32TPseudoWB_register, ARM::VLD1d32Twb_register, true, true, true, SingleSpc, 3, 2 ,false},
190
191 { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
192 { ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false},
193 { ARM::VLD1d64QPseudoWB_register, ARM::VLD1d64Qwb_register, true, true, true, SingleSpc, 4, 1 ,false},
194 { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
195 { ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false},
196 { ARM::VLD1d64TPseudoWB_register, ARM::VLD1d64Twb_register, true, true, true, SingleSpc, 3, 1 ,false},
197
198 { ARM::VLD1d8QPseudo, ARM::VLD1d8Q, true, false, false, SingleSpc, 4, 8 ,false},
199 { ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d8Qwb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
200 { ARM::VLD1d8QPseudoWB_register, ARM::VLD1d8Qwb_register, true, true, true, SingleSpc, 4, 8 ,false},
201 { ARM::VLD1d8TPseudo, ARM::VLD1d8T, true, false, false, SingleSpc, 3, 8 ,false},
202 { ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d8Twb_fixed, true, true, false, SingleSpc, 3, 8 ,false},
203 { ARM::VLD1d8TPseudoWB_register, ARM::VLD1d8Twb_register, true, true, true, SingleSpc, 3, 8 ,false},
204
205 { ARM::VLD1q16HighQPseudo, ARM::VLD1d16Q, true, false, false, SingleHighQSpc, 4, 4 ,false},
206 { ARM::VLD1q16HighQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleHighQSpc, 4, 4 ,false},
207 { ARM::VLD1q16HighTPseudo, ARM::VLD1d16T, true, false, false, SingleHighTSpc, 3, 4 ,false},
208 { ARM::VLD1q16HighTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleHighTSpc, 3, 4 ,false},
209 { ARM::VLD1q16LowQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleLowSpc, 4, 4 ,false},
210 { ARM::VLD1q16LowTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleLowSpc, 3, 4 ,false},
211
212 { ARM::VLD1q32HighQPseudo, ARM::VLD1d32Q, true, false, false, SingleHighQSpc, 4, 2 ,false},
213 { ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleHighQSpc, 4, 2 ,false},
214 { ARM::VLD1q32HighTPseudo, ARM::VLD1d32T, true, false, false, SingleHighTSpc, 3, 2 ,false},
215 { ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleHighTSpc, 3, 2 ,false},
216 { ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleLowSpc, 4, 2 ,false},
217 { ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleLowSpc, 3, 2 ,false},
218
219 { ARM::VLD1q64HighQPseudo, ARM::VLD1d64Q, true, false, false, SingleHighQSpc, 4, 1 ,false},
220 { ARM::VLD1q64HighQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleHighQSpc, 4, 1 ,false},
221 { ARM::VLD1q64HighTPseudo, ARM::VLD1d64T, true, false, false, SingleHighTSpc, 3, 1 ,false},
222 { ARM::VLD1q64HighTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleHighTSpc, 3, 1 ,false},
223 { ARM::VLD1q64LowQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleLowSpc, 4, 1 ,false},
224 { ARM::VLD1q64LowTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleLowSpc, 3, 1 ,false},
225
226 { ARM::VLD1q8HighQPseudo, ARM::VLD1d8Q, true, false, false, SingleHighQSpc, 4, 8 ,false},
227 { ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleHighQSpc, 4, 8 ,false},
228 { ARM::VLD1q8HighTPseudo, ARM::VLD1d8T, true, false, false, SingleHighTSpc, 3, 8 ,false},
229 { ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleHighTSpc, 3, 8 ,false},
230 { ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleLowSpc, 4, 8 ,false},
231 { ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleLowSpc, 3, 8 ,false},
232
233 { ARM::VLD2DUPq16EvenPseudo, ARM::VLD2DUPd16x2, true, false, false, EvenDblSpc, 2, 4 ,false},
234 { ARM::VLD2DUPq16OddPseudo, ARM::VLD2DUPd16x2, true, false, false, OddDblSpc, 2, 4 ,false},
235 { ARM::VLD2DUPq16OddPseudoWB_fixed, ARM::VLD2DUPd16x2wb_fixed, true, true, false, OddDblSpc, 2, 4 ,false},
236 { ARM::VLD2DUPq16OddPseudoWB_register, ARM::VLD2DUPd16x2wb_register, true, true, true, OddDblSpc, 2, 4 ,false},
237 { ARM::VLD2DUPq32EvenPseudo, ARM::VLD2DUPd32x2, true, false, false, EvenDblSpc, 2, 2 ,false},
238 { ARM::VLD2DUPq32OddPseudo, ARM::VLD2DUPd32x2, true, false, false, OddDblSpc, 2, 2 ,false},
239 { ARM::VLD2DUPq32OddPseudoWB_fixed, ARM::VLD2DUPd32x2wb_fixed, true, true, false, OddDblSpc, 2, 2 ,false},
240 { ARM::VLD2DUPq32OddPseudoWB_register, ARM::VLD2DUPd32x2wb_register, true, true, true, OddDblSpc, 2, 2 ,false},
241 { ARM::VLD2DUPq8EvenPseudo, ARM::VLD2DUPd8x2, true, false, false, EvenDblSpc, 2, 8 ,false},
242 { ARM::VLD2DUPq8OddPseudo, ARM::VLD2DUPd8x2, true, false, false, OddDblSpc, 2, 8 ,false},
243 { ARM::VLD2DUPq8OddPseudoWB_fixed, ARM::VLD2DUPd8x2wb_fixed, true, true, false, OddDblSpc, 2, 8 ,false},
244 { ARM::VLD2DUPq8OddPseudoWB_register, ARM::VLD2DUPd8x2wb_register, true, true, true, OddDblSpc, 2, 8 ,false},
245
246 { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
247 { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
248 { ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true},
249 { ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true},
250 { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true},
251 { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true},
252 { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true},
253 { ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true},
254 { ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true},
255 { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true},
256
257 { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false},
258 { ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
259 { ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false},
260 { ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false},
261 { ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
262 { ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false},
263 { ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false},
264 { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
265 { ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false},
266
267 { ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true},
268 { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true},
269 { ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true},
270 { ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true},
271 { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true},
272 { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true},
273 { ARM::VLD3DUPq16EvenPseudo, ARM::VLD3DUPq16, true, false, false, EvenDblSpc, 3, 4 ,true},
274 { ARM::VLD3DUPq16OddPseudo, ARM::VLD3DUPq16, true, false, false, OddDblSpc, 3, 4 ,true},
275 { ARM::VLD3DUPq16OddPseudo_UPD, ARM::VLD3DUPq16_UPD, true, true, true, OddDblSpc, 3, 4 ,true},
276 { ARM::VLD3DUPq32EvenPseudo, ARM::VLD3DUPq32, true, false, false, EvenDblSpc, 3, 2 ,true},
277 { ARM::VLD3DUPq32OddPseudo, ARM::VLD3DUPq32, true, false, false, OddDblSpc, 3, 2 ,true},
278 { ARM::VLD3DUPq32OddPseudo_UPD, ARM::VLD3DUPq32_UPD, true, true, true, OddDblSpc, 3, 2 ,true},
279 { ARM::VLD3DUPq8EvenPseudo, ARM::VLD3DUPq8, true, false, false, EvenDblSpc, 3, 8 ,true},
280 { ARM::VLD3DUPq8OddPseudo, ARM::VLD3DUPq8, true, false, false, OddDblSpc, 3, 8 ,true},
281 { ARM::VLD3DUPq8OddPseudo_UPD, ARM::VLD3DUPq8_UPD, true, true, true, OddDblSpc, 3, 8 ,true},
282
283 { ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true},
284 { ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
285 { ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true},
286 { ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
287 { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true},
288 { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
289 { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true},
290 { ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
291 { ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true},
292 { ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
293
294 { ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true},
295 { ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
296 { ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true},
297 { ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
298 { ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true},
299 { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
300
301 { ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
302 { ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true},
303 { ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true},
304 { ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
305 { ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true},
306 { ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true},
307 { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true},
308 { ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true},
309 { ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true},
310
311 { ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true},
312 { ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true},
313 { ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true},
314 { ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true},
315 { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true},
316 { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true},
317 { ARM::VLD4DUPq16EvenPseudo, ARM::VLD4DUPq16, true, false, false, EvenDblSpc, 4, 4 ,true},
318 { ARM::VLD4DUPq16OddPseudo, ARM::VLD4DUPq16, true, false, false, OddDblSpc, 4, 4 ,true},
319 { ARM::VLD4DUPq16OddPseudo_UPD, ARM::VLD4DUPq16_UPD, true, true, true, OddDblSpc, 4, 4 ,true},
320 { ARM::VLD4DUPq32EvenPseudo, ARM::VLD4DUPq32, true, false, false, EvenDblSpc, 4, 2 ,true},
321 { ARM::VLD4DUPq32OddPseudo, ARM::VLD4DUPq32, true, false, false, OddDblSpc, 4, 2 ,true},
322 { ARM::VLD4DUPq32OddPseudo_UPD, ARM::VLD4DUPq32_UPD, true, true, true, OddDblSpc, 4, 2 ,true},
323 { ARM::VLD4DUPq8EvenPseudo, ARM::VLD4DUPq8, true, false, false, EvenDblSpc, 4, 8 ,true},
324 { ARM::VLD4DUPq8OddPseudo, ARM::VLD4DUPq8, true, false, false, OddDblSpc, 4, 8 ,true},
325 { ARM::VLD4DUPq8OddPseudo_UPD, ARM::VLD4DUPq8_UPD, true, true, true, OddDblSpc, 4, 8 ,true},
326
327 { ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true},
328 { ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
329 { ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true},
330 { ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
331 { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true},
332 { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
333 { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true},
334 { ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
335 { ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true},
336 { ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
337
338 { ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true},
339 { ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
340 { ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true},
341 { ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
342 { ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true},
343 { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
344
345 { ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
346 { ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true},
347 { ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true},
348 { ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
349 { ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true},
350 { ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true},
351 { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true},
352 { ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true},
353 { ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true},
354
355 { ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true},
356 { ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true},
357 { ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true},
358 { ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true},
359 { ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true},
360 { ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true},
361
362 { ARM::VST1d16QPseudo, ARM::VST1d16Q, false, false, false, SingleSpc, 4, 4 ,false},
363 { ARM::VST1d16QPseudoWB_fixed, ARM::VST1d16Qwb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
364 { ARM::VST1d16QPseudoWB_register, ARM::VST1d16Qwb_register, false, true, true, SingleSpc, 4, 4 ,false},
365 { ARM::VST1d16TPseudo, ARM::VST1d16T, false, false, false, SingleSpc, 3, 4 ,false},
366 { ARM::VST1d16TPseudoWB_fixed, ARM::VST1d16Twb_fixed, false, true, false, SingleSpc, 3, 4 ,false},
367 { ARM::VST1d16TPseudoWB_register, ARM::VST1d16Twb_register, false, true, true, SingleSpc, 3, 4 ,false},
368
369 { ARM::VST1d32QPseudo, ARM::VST1d32Q, false, false, false, SingleSpc, 4, 2 ,false},
370 { ARM::VST1d32QPseudoWB_fixed, ARM::VST1d32Qwb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
371 { ARM::VST1d32QPseudoWB_register, ARM::VST1d32Qwb_register, false, true, true, SingleSpc, 4, 2 ,false},
372 { ARM::VST1d32TPseudo, ARM::VST1d32T, false, false, false, SingleSpc, 3, 2 ,false},
373 { ARM::VST1d32TPseudoWB_fixed, ARM::VST1d32Twb_fixed, false, true, false, SingleSpc, 3, 2 ,false},
374 { ARM::VST1d32TPseudoWB_register, ARM::VST1d32Twb_register, false, true, true, SingleSpc, 3, 2 ,false},
375
376 { ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false},
377 { ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false},
378 { ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false},
379 { ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false},
380 { ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false},
381 { ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false},
382
383 { ARM::VST1d8QPseudo, ARM::VST1d8Q, false, false, false, SingleSpc, 4, 8 ,false},
384 { ARM::VST1d8QPseudoWB_fixed, ARM::VST1d8Qwb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
385 { ARM::VST1d8QPseudoWB_register, ARM::VST1d8Qwb_register, false, true, true, SingleSpc, 4, 8 ,false},
386 { ARM::VST1d8TPseudo, ARM::VST1d8T, false, false, false, SingleSpc, 3, 8 ,false},
387 { ARM::VST1d8TPseudoWB_fixed, ARM::VST1d8Twb_fixed, false, true, false, SingleSpc, 3, 8 ,false},
388 { ARM::VST1d8TPseudoWB_register, ARM::VST1d8Twb_register, false, true, true, SingleSpc, 3, 8 ,false},
389
390 { ARM::VST1q16HighQPseudo, ARM::VST1d16Q, false, false, false, SingleHighQSpc, 4, 4 ,false},
391 { ARM::VST1q16HighQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false},
392 { ARM::VST1q16HighTPseudo, ARM::VST1d16T, false, false, false, SingleHighTSpc, 3, 4 ,false},
393 { ARM::VST1q16HighTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleHighTSpc, 3, 4 ,false},
394 { ARM::VST1q16LowQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleLowSpc, 4, 4 ,false},
395 { ARM::VST1q16LowTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleLowSpc, 3, 4 ,false},
396
397 { ARM::VST1q32HighQPseudo, ARM::VST1d32Q, false, false, false, SingleHighQSpc, 4, 2 ,false},
398 { ARM::VST1q32HighQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false},
399 { ARM::VST1q32HighTPseudo, ARM::VST1d32T, false, false, false, SingleHighTSpc, 3, 2 ,false},
400 { ARM::VST1q32HighTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleHighTSpc, 3, 2 ,false},
401 { ARM::VST1q32LowQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleLowSpc, 4, 2 ,false},
402 { ARM::VST1q32LowTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleLowSpc, 3, 2 ,false},
403
404 { ARM::VST1q64HighQPseudo, ARM::VST1d64Q, false, false, false, SingleHighQSpc, 4, 1 ,false},
405 { ARM::VST1q64HighQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false},
406 { ARM::VST1q64HighTPseudo, ARM::VST1d64T, false, false, false, SingleHighTSpc, 3, 1 ,false},
407 { ARM::VST1q64HighTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleHighTSpc, 3, 1 ,false},
408 { ARM::VST1q64LowQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleLowSpc, 4, 1 ,false},
409 { ARM::VST1q64LowTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleLowSpc, 3, 1 ,false},
410
411 { ARM::VST1q8HighQPseudo, ARM::VST1d8Q, false, false, false, SingleHighQSpc, 4, 8 ,false},
412 { ARM::VST1q8HighQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false},
413 { ARM::VST1q8HighTPseudo, ARM::VST1d8T, false, false, false, SingleHighTSpc, 3, 8 ,false},
414 { ARM::VST1q8HighTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleHighTSpc, 3, 8 ,false},
415 { ARM::VST1q8LowQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleLowSpc, 4, 8 ,false},
416 { ARM::VST1q8LowTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleLowSpc, 3, 8 ,false},
417
418 { ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true},
419 { ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true},
420 { ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true},
421 { ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true},
422 { ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true},
423 { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true},
424 { ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true},
425 { ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true},
426 { ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true},
427 { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
428
429 { ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
430 { ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
431 { ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
432 { ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
433 { ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
434 { ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
435 { ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
436 { ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
437 { ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
438
439 { ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
440 { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
441 { ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true},
442 { ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
443 { ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true},
444 { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
445 { ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true},
446 { ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true},
447 { ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true},
448 { ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true},
449
450 { ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true},
451 { ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
452 { ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true},
453 { ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
454 { ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true},
455 { ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
456
457 { ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true},
458 { ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true},
459 { ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true},
460 { ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true},
461 { ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true},
462 { ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true},
463 { ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true},
464 { ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true},
465 { ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true},
466
467 { ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true},
468 { ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
469 { ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true},
470 { ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
471 { ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true},
472 { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
473 { ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true},
474 { ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true},
475 { ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true},
476 { ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true},
477
478 { ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true},
479 { ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
480 { ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true},
481 { ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
482 { ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true},
483 { ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
484
485 { ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true},
486 { ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true},
487 { ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true},
488 { ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true},
489 { ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true},
490 { ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true},
491 { ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true},
492 { ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true},
493 { ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true}
494 };
495
496 /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
497 /// load or store pseudo instruction.
LookupNEONLdSt(unsigned Opcode)498 static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
499 #ifndef NDEBUG
500 // Make sure the table is sorted.
501 static std::atomic<bool> TableChecked(false);
502 if (!TableChecked.load(std::memory_order_relaxed)) {
503 assert(llvm::is_sorted(NEONLdStTable) && "NEONLdStTable is not sorted!");
504 TableChecked.store(true, std::memory_order_relaxed);
505 }
506 #endif
507
508 auto I = llvm::lower_bound(NEONLdStTable, Opcode);
509 if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode)
510 return I;
511 return nullptr;
512 }
513
514 /// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
515 /// corresponding to the specified register spacing. Not all of the results
516 /// are necessarily valid, e.g., a Q register only has 2 D subregisters.
GetDSubRegs(unsigned Reg,NEONRegSpacing RegSpc,const TargetRegisterInfo * TRI,MCRegister & D0,MCRegister & D1,MCRegister & D2,MCRegister & D3)517 static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
518 const TargetRegisterInfo *TRI, MCRegister &D0,
519 MCRegister &D1, MCRegister &D2, MCRegister &D3) {
520 if (RegSpc == SingleSpc || RegSpc == SingleLowSpc) {
521 D0 = TRI->getSubReg(Reg, ARM::dsub_0);
522 D1 = TRI->getSubReg(Reg, ARM::dsub_1);
523 D2 = TRI->getSubReg(Reg, ARM::dsub_2);
524 D3 = TRI->getSubReg(Reg, ARM::dsub_3);
525 } else if (RegSpc == SingleHighQSpc) {
526 D0 = TRI->getSubReg(Reg, ARM::dsub_4);
527 D1 = TRI->getSubReg(Reg, ARM::dsub_5);
528 D2 = TRI->getSubReg(Reg, ARM::dsub_6);
529 D3 = TRI->getSubReg(Reg, ARM::dsub_7);
530 } else if (RegSpc == SingleHighTSpc) {
531 D0 = TRI->getSubReg(Reg, ARM::dsub_3);
532 D1 = TRI->getSubReg(Reg, ARM::dsub_4);
533 D2 = TRI->getSubReg(Reg, ARM::dsub_5);
534 D3 = TRI->getSubReg(Reg, ARM::dsub_6);
535 } else if (RegSpc == EvenDblSpc) {
536 D0 = TRI->getSubReg(Reg, ARM::dsub_0);
537 D1 = TRI->getSubReg(Reg, ARM::dsub_2);
538 D2 = TRI->getSubReg(Reg, ARM::dsub_4);
539 D3 = TRI->getSubReg(Reg, ARM::dsub_6);
540 } else {
541 assert(RegSpc == OddDblSpc && "unknown register spacing");
542 D0 = TRI->getSubReg(Reg, ARM::dsub_1);
543 D1 = TRI->getSubReg(Reg, ARM::dsub_3);
544 D2 = TRI->getSubReg(Reg, ARM::dsub_5);
545 D3 = TRI->getSubReg(Reg, ARM::dsub_7);
546 }
547 }
548
549 /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
550 /// operands to real VLD instructions with D register operands.
ExpandVLD(MachineBasicBlock::iterator & MBBI)551 void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
552 MachineInstr &MI = *MBBI;
553 MachineBasicBlock &MBB = *MI.getParent();
554 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
555
556 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
557 assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
558 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
559 unsigned NumRegs = TableEntry->NumRegs;
560
561 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
562 TII->get(TableEntry->RealOpc));
563 unsigned OpIdx = 0;
564
565 bool DstIsDead = MI.getOperand(OpIdx).isDead();
566 Register DstReg = MI.getOperand(OpIdx++).getReg();
567
568 bool IsVLD2DUP = TableEntry->RealOpc == ARM::VLD2DUPd8x2 ||
569 TableEntry->RealOpc == ARM::VLD2DUPd16x2 ||
570 TableEntry->RealOpc == ARM::VLD2DUPd32x2 ||
571 TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_fixed ||
572 TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_fixed ||
573 TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_fixed ||
574 TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_register ||
575 TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_register ||
576 TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_register;
577
578 if (IsVLD2DUP) {
579 unsigned SubRegIndex;
580 if (RegSpc == EvenDblSpc) {
581 SubRegIndex = ARM::dsub_0;
582 } else {
583 assert(RegSpc == OddDblSpc && "Unexpected spacing!");
584 SubRegIndex = ARM::dsub_1;
585 }
586 Register SubReg = TRI->getSubReg(DstReg, SubRegIndex);
587 MCRegister DstRegPair =
588 TRI->getMatchingSuperReg(SubReg, ARM::dsub_0, &ARM::DPairSpcRegClass);
589 MIB.addReg(DstRegPair, RegState::Define | getDeadRegState(DstIsDead));
590 } else {
591 MCRegister D0, D1, D2, D3;
592 GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
593 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
594 if (NumRegs > 1 && TableEntry->copyAllListRegs)
595 MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
596 if (NumRegs > 2 && TableEntry->copyAllListRegs)
597 MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
598 if (NumRegs > 3 && TableEntry->copyAllListRegs)
599 MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
600 }
601
602 if (TableEntry->isUpdating)
603 MIB.add(MI.getOperand(OpIdx++));
604
605 // Copy the addrmode6 operands.
606 MIB.add(MI.getOperand(OpIdx++));
607 MIB.add(MI.getOperand(OpIdx++));
608
609 // Copy the am6offset operand.
610 if (TableEntry->hasWritebackOperand) {
611 // TODO: The writing-back pseudo instructions we translate here are all
612 // defined to take am6offset nodes that are capable to represent both fixed
613 // and register forms. Some real instructions, however, do not rely on
614 // am6offset and have separate definitions for such forms. When this is the
615 // case, fixed forms do not take any offset nodes, so here we skip them for
616 // such instructions. Once all real and pseudo writing-back instructions are
617 // rewritten without use of am6offset nodes, this code will go away.
618 const MachineOperand &AM6Offset = MI.getOperand(OpIdx++);
619 if (TableEntry->RealOpc == ARM::VLD1d8Qwb_fixed ||
620 TableEntry->RealOpc == ARM::VLD1d16Qwb_fixed ||
621 TableEntry->RealOpc == ARM::VLD1d32Qwb_fixed ||
622 TableEntry->RealOpc == ARM::VLD1d64Qwb_fixed ||
623 TableEntry->RealOpc == ARM::VLD1d8Twb_fixed ||
624 TableEntry->RealOpc == ARM::VLD1d16Twb_fixed ||
625 TableEntry->RealOpc == ARM::VLD1d32Twb_fixed ||
626 TableEntry->RealOpc == ARM::VLD1d64Twb_fixed ||
627 TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_fixed ||
628 TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_fixed ||
629 TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_fixed) {
630 assert(AM6Offset.getReg() == 0 &&
631 "A fixed writing-back pseudo instruction provides an offset "
632 "register!");
633 } else {
634 MIB.add(AM6Offset);
635 }
636 }
637
638 // For an instruction writing double-spaced subregs, the pseudo instruction
639 // has an extra operand that is a use of the super-register. Record the
640 // operand index and skip over it.
641 unsigned SrcOpIdx = 0;
642 if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc || RegSpc == SingleLowSpc ||
643 RegSpc == SingleHighQSpc || RegSpc == SingleHighTSpc)
644 SrcOpIdx = OpIdx++;
645
646 // Copy the predicate operands.
647 MIB.add(MI.getOperand(OpIdx++));
648 MIB.add(MI.getOperand(OpIdx++));
649
650 // Copy the super-register source operand used for double-spaced subregs over
651 // to the new instruction as an implicit operand.
652 if (SrcOpIdx != 0) {
653 MachineOperand MO = MI.getOperand(SrcOpIdx);
654 MO.setImplicit(true);
655 MIB.add(MO);
656 }
657 // Add an implicit def for the super-register.
658 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
659 MIB.copyImplicitOps(MI);
660
661 // Transfer memoperands.
662 MIB.cloneMemRefs(MI);
663 MI.eraseFromParent();
664 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
665 }
666
667 /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
668 /// operands to real VST instructions with D register operands.
ExpandVST(MachineBasicBlock::iterator & MBBI)669 void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
670 MachineInstr &MI = *MBBI;
671 MachineBasicBlock &MBB = *MI.getParent();
672 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
673
674 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
675 assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
676 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
677 unsigned NumRegs = TableEntry->NumRegs;
678
679 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
680 TII->get(TableEntry->RealOpc));
681 unsigned OpIdx = 0;
682 if (TableEntry->isUpdating)
683 MIB.add(MI.getOperand(OpIdx++));
684
685 // Copy the addrmode6 operands.
686 MIB.add(MI.getOperand(OpIdx++));
687 MIB.add(MI.getOperand(OpIdx++));
688
689 if (TableEntry->hasWritebackOperand) {
690 // TODO: The writing-back pseudo instructions we translate here are all
691 // defined to take am6offset nodes that are capable to represent both fixed
692 // and register forms. Some real instructions, however, do not rely on
693 // am6offset and have separate definitions for such forms. When this is the
694 // case, fixed forms do not take any offset nodes, so here we skip them for
695 // such instructions. Once all real and pseudo writing-back instructions are
696 // rewritten without use of am6offset nodes, this code will go away.
697 const MachineOperand &AM6Offset = MI.getOperand(OpIdx++);
698 if (TableEntry->RealOpc == ARM::VST1d8Qwb_fixed ||
699 TableEntry->RealOpc == ARM::VST1d16Qwb_fixed ||
700 TableEntry->RealOpc == ARM::VST1d32Qwb_fixed ||
701 TableEntry->RealOpc == ARM::VST1d64Qwb_fixed ||
702 TableEntry->RealOpc == ARM::VST1d8Twb_fixed ||
703 TableEntry->RealOpc == ARM::VST1d16Twb_fixed ||
704 TableEntry->RealOpc == ARM::VST1d32Twb_fixed ||
705 TableEntry->RealOpc == ARM::VST1d64Twb_fixed) {
706 assert(AM6Offset.getReg() == 0 &&
707 "A fixed writing-back pseudo instruction provides an offset "
708 "register!");
709 } else {
710 MIB.add(AM6Offset);
711 }
712 }
713
714 bool SrcIsKill = MI.getOperand(OpIdx).isKill();
715 bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
716 Register SrcReg = MI.getOperand(OpIdx++).getReg();
717 MCRegister D0, D1, D2, D3;
718 GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
719 MIB.addReg(D0, getUndefRegState(SrcIsUndef));
720 if (NumRegs > 1 && TableEntry->copyAllListRegs)
721 MIB.addReg(D1, getUndefRegState(SrcIsUndef));
722 if (NumRegs > 2 && TableEntry->copyAllListRegs)
723 MIB.addReg(D2, getUndefRegState(SrcIsUndef));
724 if (NumRegs > 3 && TableEntry->copyAllListRegs)
725 MIB.addReg(D3, getUndefRegState(SrcIsUndef));
726
727 // Copy the predicate operands.
728 MIB.add(MI.getOperand(OpIdx++));
729 MIB.add(MI.getOperand(OpIdx++));
730
731 if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
732 MIB->addRegisterKilled(SrcReg, TRI, true);
733 else if (!SrcIsUndef)
734 MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg.
735 MIB.copyImplicitOps(MI);
736
737 // Transfer memoperands.
738 MIB.cloneMemRefs(MI);
739 MI.eraseFromParent();
740 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
741 }
742
743 /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
744 /// register operands to real instructions with D register operands.
ExpandLaneOp(MachineBasicBlock::iterator & MBBI)745 void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
746 MachineInstr &MI = *MBBI;
747 MachineBasicBlock &MBB = *MI.getParent();
748 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
749
750 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
751 assert(TableEntry && "NEONLdStTable lookup failed");
752 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
753 unsigned NumRegs = TableEntry->NumRegs;
754 unsigned RegElts = TableEntry->RegElts;
755
756 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
757 TII->get(TableEntry->RealOpc));
758 unsigned OpIdx = 0;
759 // The lane operand is always the 3rd from last operand, before the 2
760 // predicate operands.
761 unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
762
763 // Adjust the lane and spacing as needed for Q registers.
764 assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
765 if (RegSpc == EvenDblSpc && Lane >= RegElts) {
766 RegSpc = OddDblSpc;
767 Lane -= RegElts;
768 }
769 assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
770
771 MCRegister D0, D1, D2, D3;
772 unsigned DstReg = 0;
773 bool DstIsDead = false;
774 if (TableEntry->IsLoad) {
775 DstIsDead = MI.getOperand(OpIdx).isDead();
776 DstReg = MI.getOperand(OpIdx++).getReg();
777 GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
778 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
779 if (NumRegs > 1)
780 MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
781 if (NumRegs > 2)
782 MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
783 if (NumRegs > 3)
784 MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
785 }
786
787 if (TableEntry->isUpdating)
788 MIB.add(MI.getOperand(OpIdx++));
789
790 // Copy the addrmode6 operands.
791 MIB.add(MI.getOperand(OpIdx++));
792 MIB.add(MI.getOperand(OpIdx++));
793 // Copy the am6offset operand.
794 if (TableEntry->hasWritebackOperand)
795 MIB.add(MI.getOperand(OpIdx++));
796
797 // Grab the super-register source.
798 MachineOperand MO = MI.getOperand(OpIdx++);
799 if (!TableEntry->IsLoad)
800 GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
801
802 // Add the subregs as sources of the new instruction.
803 unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
804 getKillRegState(MO.isKill()));
805 MIB.addReg(D0, SrcFlags);
806 if (NumRegs > 1)
807 MIB.addReg(D1, SrcFlags);
808 if (NumRegs > 2)
809 MIB.addReg(D2, SrcFlags);
810 if (NumRegs > 3)
811 MIB.addReg(D3, SrcFlags);
812
813 // Add the lane number operand.
814 MIB.addImm(Lane);
815 OpIdx += 1;
816
817 // Copy the predicate operands.
818 MIB.add(MI.getOperand(OpIdx++));
819 MIB.add(MI.getOperand(OpIdx++));
820
821 // Copy the super-register source to be an implicit source.
822 MO.setImplicit(true);
823 MIB.add(MO);
824 if (TableEntry->IsLoad)
825 // Add an implicit def for the super-register.
826 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
827 MIB.copyImplicitOps(MI);
828 // Transfer memoperands.
829 MIB.cloneMemRefs(MI);
830 MI.eraseFromParent();
831 }
832
833 /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
834 /// register operands to real instructions with D register operands.
ExpandVTBL(MachineBasicBlock::iterator & MBBI,unsigned Opc,bool IsExt)835 void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
836 unsigned Opc, bool IsExt) {
837 MachineInstr &MI = *MBBI;
838 MachineBasicBlock &MBB = *MI.getParent();
839 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
840
841 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
842 unsigned OpIdx = 0;
843
844 // Transfer the destination register operand.
845 MIB.add(MI.getOperand(OpIdx++));
846 if (IsExt) {
847 MachineOperand VdSrc(MI.getOperand(OpIdx++));
848 MIB.add(VdSrc);
849 }
850
851 bool SrcIsKill = MI.getOperand(OpIdx).isKill();
852 Register SrcReg = MI.getOperand(OpIdx++).getReg();
853 MCRegister D0, D1, D2, D3;
854 GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
855 MIB.addReg(D0);
856
857 // Copy the other source register operand.
858 MachineOperand VmSrc(MI.getOperand(OpIdx++));
859 MIB.add(VmSrc);
860
861 // Copy the predicate operands.
862 MIB.add(MI.getOperand(OpIdx++));
863 MIB.add(MI.getOperand(OpIdx++));
864
865 // Add an implicit kill and use for the super-reg.
866 MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
867 MIB.copyImplicitOps(MI);
868 MI.eraseFromParent();
869 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
870 }
871
ExpandMQQPRLoadStore(MachineBasicBlock::iterator & MBBI)872 void ARMExpandPseudo::ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI) {
873 MachineInstr &MI = *MBBI;
874 MachineBasicBlock &MBB = *MI.getParent();
875 unsigned NewOpc =
876 MI.getOpcode() == ARM::MQQPRStore || MI.getOpcode() == ARM::MQQQQPRStore
877 ? ARM::VSTMDIA
878 : ARM::VLDMDIA;
879 MachineInstrBuilder MIB =
880 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
881
882 unsigned Flags = getKillRegState(MI.getOperand(0).isKill()) |
883 getDefRegState(MI.getOperand(0).isDef());
884 Register SrcReg = MI.getOperand(0).getReg();
885
886 // Copy the destination register.
887 MIB.add(MI.getOperand(1));
888 MIB.add(predOps(ARMCC::AL));
889 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_0), Flags);
890 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_1), Flags);
891 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_2), Flags);
892 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_3), Flags);
893 if (MI.getOpcode() == ARM::MQQQQPRStore ||
894 MI.getOpcode() == ARM::MQQQQPRLoad) {
895 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_4), Flags);
896 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_5), Flags);
897 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_6), Flags);
898 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_7), Flags);
899 }
900
901 if (NewOpc == ARM::VSTMDIA)
902 MIB.addReg(SrcReg, RegState::Implicit);
903
904 MIB.copyImplicitOps(MI);
905 MIB.cloneMemRefs(MI);
906 MI.eraseFromParent();
907 }
908
IsAnAddressOperand(const MachineOperand & MO)909 static bool IsAnAddressOperand(const MachineOperand &MO) {
910 // This check is overly conservative. Unless we are certain that the machine
911 // operand is not a symbol reference, we return that it is a symbol reference.
912 // This is important as the load pair may not be split up Windows.
913 switch (MO.getType()) {
914 case MachineOperand::MO_Register:
915 case MachineOperand::MO_Immediate:
916 case MachineOperand::MO_CImmediate:
917 case MachineOperand::MO_FPImmediate:
918 case MachineOperand::MO_ShuffleMask:
919 return false;
920 case MachineOperand::MO_MachineBasicBlock:
921 return true;
922 case MachineOperand::MO_FrameIndex:
923 return false;
924 case MachineOperand::MO_ConstantPoolIndex:
925 case MachineOperand::MO_TargetIndex:
926 case MachineOperand::MO_JumpTableIndex:
927 case MachineOperand::MO_ExternalSymbol:
928 case MachineOperand::MO_GlobalAddress:
929 case MachineOperand::MO_BlockAddress:
930 return true;
931 case MachineOperand::MO_RegisterMask:
932 case MachineOperand::MO_RegisterLiveOut:
933 return false;
934 case MachineOperand::MO_Metadata:
935 case MachineOperand::MO_MCSymbol:
936 return true;
937 case MachineOperand::MO_DbgInstrRef:
938 case MachineOperand::MO_CFIIndex:
939 return false;
940 case MachineOperand::MO_IntrinsicID:
941 case MachineOperand::MO_Predicate:
942 llvm_unreachable("should not exist post-isel");
943 }
944 llvm_unreachable("unhandled machine operand type");
945 }
946
makeImplicit(const MachineOperand & MO)947 static MachineOperand makeImplicit(const MachineOperand &MO) {
948 MachineOperand NewMO = MO;
949 NewMO.setImplicit();
950 return NewMO;
951 }
952
getMovOperand(const MachineOperand & MO,unsigned TargetFlag)953 static MachineOperand getMovOperand(const MachineOperand &MO,
954 unsigned TargetFlag) {
955 unsigned TF = MO.getTargetFlags() | TargetFlag;
956 switch (MO.getType()) {
957 case MachineOperand::MO_Immediate: {
958 unsigned Imm = MO.getImm();
959 switch (TargetFlag) {
960 case ARMII::MO_HI_8_15:
961 Imm = (Imm >> 24) & 0xff;
962 break;
963 case ARMII::MO_HI_0_7:
964 Imm = (Imm >> 16) & 0xff;
965 break;
966 case ARMII::MO_LO_8_15:
967 Imm = (Imm >> 8) & 0xff;
968 break;
969 case ARMII::MO_LO_0_7:
970 Imm = Imm & 0xff;
971 break;
972 case ARMII::MO_HI16:
973 Imm = (Imm >> 16) & 0xffff;
974 break;
975 case ARMII::MO_LO16:
976 Imm = Imm & 0xffff;
977 break;
978 default:
979 llvm_unreachable("Only HI/LO target flags are expected");
980 }
981 return MachineOperand::CreateImm(Imm);
982 }
983 case MachineOperand::MO_ExternalSymbol:
984 return MachineOperand::CreateES(MO.getSymbolName(), TF);
985 case MachineOperand::MO_JumpTableIndex:
986 return MachineOperand::CreateJTI(MO.getIndex(), TF);
987 default:
988 return MachineOperand::CreateGA(MO.getGlobal(), MO.getOffset(), TF);
989 }
990 }
991
ExpandTMOV32BitImm(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI)992 void ARMExpandPseudo::ExpandTMOV32BitImm(MachineBasicBlock &MBB,
993 MachineBasicBlock::iterator &MBBI) {
994 MachineInstr &MI = *MBBI;
995 Register DstReg = MI.getOperand(0).getReg();
996 bool DstIsDead = MI.getOperand(0).isDead();
997 const MachineOperand &MO = MI.getOperand(1);
998 unsigned MIFlags = MI.getFlags();
999
1000 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
1001
1002 // Expand the mov into a sequence of mov/add+lsl of the individual bytes. We
1003 // want to avoid emitting any zero bytes, as they won't change the result, and
1004 // also don't want any pointless shifts, so instead of immediately emitting
1005 // the shift for a byte we keep track of how much we will need to shift and do
1006 // it before the next nonzero byte.
1007 unsigned PendingShift = 0;
1008 for (unsigned Byte = 0; Byte < 4; ++Byte) {
1009 unsigned Flag = Byte == 0 ? ARMII::MO_HI_8_15
1010 : Byte == 1 ? ARMII::MO_HI_0_7
1011 : Byte == 2 ? ARMII::MO_LO_8_15
1012 : ARMII::MO_LO_0_7;
1013 MachineOperand Operand = getMovOperand(MO, Flag);
1014 bool ZeroImm = Operand.isImm() && Operand.getImm() == 0;
1015 unsigned Op = PendingShift ? ARM::tADDi8 : ARM::tMOVi8;
1016
1017 // Emit the pending shift if we're going to emit this byte or if we've
1018 // reached the end.
1019 if (PendingShift && (!ZeroImm || Byte == 3)) {
1020 MachineInstr *Lsl =
1021 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tLSLri), DstReg)
1022 .add(t1CondCodeOp(true))
1023 .addReg(DstReg)
1024 .addImm(PendingShift)
1025 .add(predOps(ARMCC::AL))
1026 .setMIFlags(MIFlags);
1027 (void)Lsl;
1028 LLVM_DEBUG(dbgs() << "And: "; Lsl->dump(););
1029 PendingShift = 0;
1030 }
1031
1032 // Emit this byte if it's nonzero.
1033 if (!ZeroImm) {
1034 MachineInstrBuilder MIB =
1035 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Op), DstReg)
1036 .add(t1CondCodeOp(true));
1037 if (Op == ARM::tADDi8)
1038 MIB.addReg(DstReg);
1039 MIB.add(Operand);
1040 MIB.add(predOps(ARMCC::AL));
1041 MIB.setMIFlags(MIFlags);
1042 LLVM_DEBUG(dbgs() << (Op == ARM::tMOVi8 ? "To: " : "And:") << " ";
1043 MIB.getInstr()->dump(););
1044 }
1045
1046 // Don't accumulate the shift value if we've not yet seen a nonzero byte.
1047 if (PendingShift || !ZeroImm)
1048 PendingShift += 8;
1049 }
1050
1051 // The dest is dead on the last instruction we emitted if it was dead on the
1052 // original instruction.
1053 (--MBBI)->getOperand(0).setIsDead(DstIsDead);
1054
1055 MI.eraseFromParent();
1056 }
1057
ExpandMOV32BitImm(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI)1058 void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
1059 MachineBasicBlock::iterator &MBBI) {
1060 MachineInstr &MI = *MBBI;
1061 unsigned Opcode = MI.getOpcode();
1062 Register PredReg;
1063 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1064 Register DstReg = MI.getOperand(0).getReg();
1065 bool DstIsDead = MI.getOperand(0).isDead();
1066 bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
1067 const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
1068 bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO);
1069 MachineInstrBuilder LO16, HI16;
1070 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
1071
1072 if (!STI->hasV6T2Ops() &&
1073 (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
1074 // FIXME Windows CE supports older ARM CPUs
1075 assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+");
1076
1077 assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
1078 unsigned ImmVal = (unsigned)MO.getImm();
1079 unsigned SOImmValV1 = 0, SOImmValV2 = 0;
1080
1081 if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr.
1082 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
1083 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
1084 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
1085 .addReg(DstReg);
1086 SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
1087 SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
1088 } else { // Expand into a mvn + sub.
1089 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg);
1090 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri))
1091 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
1092 .addReg(DstReg);
1093 SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal);
1094 SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal);
1095 SOImmValV1 = ~(-SOImmValV1);
1096 }
1097
1098 unsigned MIFlags = MI.getFlags();
1099 LO16 = LO16.addImm(SOImmValV1);
1100 HI16 = HI16.addImm(SOImmValV2);
1101 LO16.cloneMemRefs(MI);
1102 HI16.cloneMemRefs(MI);
1103 LO16.setMIFlags(MIFlags);
1104 HI16.setMIFlags(MIFlags);
1105 LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
1106 HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
1107 if (isCC)
1108 LO16.add(makeImplicit(MI.getOperand(1)));
1109 LO16.copyImplicitOps(MI);
1110 HI16.copyImplicitOps(MI);
1111 MI.eraseFromParent();
1112 return;
1113 }
1114
1115 unsigned LO16Opc = 0;
1116 unsigned HI16Opc = 0;
1117 unsigned MIFlags = MI.getFlags();
1118 if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
1119 LO16Opc = ARM::t2MOVi16;
1120 HI16Opc = ARM::t2MOVTi16;
1121 } else {
1122 LO16Opc = ARM::MOVi16;
1123 HI16Opc = ARM::MOVTi16;
1124 }
1125
1126 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
1127 LO16.setMIFlags(MIFlags);
1128 LO16.add(getMovOperand(MO, ARMII::MO_LO16));
1129 LO16.cloneMemRefs(MI);
1130 LO16.addImm(Pred).addReg(PredReg);
1131 if (isCC)
1132 LO16.add(makeImplicit(MI.getOperand(1)));
1133 LO16.copyImplicitOps(MI);
1134 LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump(););
1135
1136 MachineOperand HIOperand = getMovOperand(MO, ARMII::MO_HI16);
1137 if (!(HIOperand.isImm() && HIOperand.getImm() == 0)) {
1138 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
1139 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
1140 .addReg(DstReg);
1141 HI16.setMIFlags(MIFlags);
1142 HI16.add(HIOperand);
1143 HI16.cloneMemRefs(MI);
1144 HI16.addImm(Pred).addReg(PredReg);
1145 HI16.copyImplicitOps(MI);
1146 LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump(););
1147 } else {
1148 LO16->getOperand(0).setIsDead(DstIsDead);
1149 }
1150
1151 if (RequiresBundling)
1152 finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator());
1153
1154 MI.eraseFromParent();
1155 }
1156
1157 // The size of the area, accessed by that VLSTM/VLLDM
1158 // S0-S31 + FPSCR + 8 more bytes (VPR + pad, or just pad)
1159 static const int CMSE_FP_SAVE_SIZE = 136;
1160
determineGPRegsToClear(const MachineInstr & MI,const std::initializer_list<unsigned> & Regs,SmallVectorImpl<unsigned> & ClearRegs)1161 static void determineGPRegsToClear(const MachineInstr &MI,
1162 const std::initializer_list<unsigned> &Regs,
1163 SmallVectorImpl<unsigned> &ClearRegs) {
1164 SmallVector<unsigned, 4> OpRegs;
1165 for (const MachineOperand &Op : MI.operands()) {
1166 if (!Op.isReg() || !Op.isUse())
1167 continue;
1168 OpRegs.push_back(Op.getReg());
1169 }
1170 llvm::sort(OpRegs);
1171
1172 std::set_difference(Regs.begin(), Regs.end(), OpRegs.begin(), OpRegs.end(),
1173 std::back_inserter(ClearRegs));
1174 }
1175
CMSEClearGPRegs(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,const SmallVectorImpl<unsigned> & ClearRegs,unsigned ClobberReg)1176 void ARMExpandPseudo::CMSEClearGPRegs(
1177 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1178 const DebugLoc &DL, const SmallVectorImpl<unsigned> &ClearRegs,
1179 unsigned ClobberReg) {
1180
1181 if (STI->hasV8_1MMainlineOps()) {
1182 // Clear the registers using the CLRM instruction.
1183 MachineInstrBuilder CLRM =
1184 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2CLRM)).add(predOps(ARMCC::AL));
1185 for (unsigned R : ClearRegs)
1186 CLRM.addReg(R, RegState::Define);
1187 CLRM.addReg(ARM::APSR, RegState::Define);
1188 CLRM.addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
1189 } else {
1190 // Clear the registers and flags by copying ClobberReg into them.
1191 // (Baseline can't do a high register clear in one instruction).
1192 for (unsigned Reg : ClearRegs) {
1193 if (Reg == ClobberReg)
1194 continue;
1195 BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVr), Reg)
1196 .addReg(ClobberReg)
1197 .add(predOps(ARMCC::AL));
1198 }
1199
1200 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2MSR_M))
1201 .addImm(STI->hasDSP() ? 0xc00 : 0x800)
1202 .addReg(ClobberReg)
1203 .add(predOps(ARMCC::AL));
1204 }
1205 }
1206
1207 // Find which FP registers need to be cleared. The parameter `ClearRegs` is
1208 // initialised with all elements set to true, and this function resets all the
1209 // bits, which correspond to register uses. Returns true if any floating point
1210 // register is defined, false otherwise.
determineFPRegsToClear(const MachineInstr & MI,BitVector & ClearRegs)1211 static bool determineFPRegsToClear(const MachineInstr &MI,
1212 BitVector &ClearRegs) {
1213 bool DefFP = false;
1214 for (const MachineOperand &Op : MI.operands()) {
1215 if (!Op.isReg())
1216 continue;
1217
1218 Register Reg = Op.getReg();
1219 if (Op.isDef()) {
1220 if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
1221 (Reg >= ARM::D0 && Reg <= ARM::D15) ||
1222 (Reg >= ARM::S0 && Reg <= ARM::S31))
1223 DefFP = true;
1224 continue;
1225 }
1226
1227 if (Reg >= ARM::Q0 && Reg <= ARM::Q7) {
1228 int R = Reg - ARM::Q0;
1229 ClearRegs.reset(R * 4, (R + 1) * 4);
1230 } else if (Reg >= ARM::D0 && Reg <= ARM::D15) {
1231 int R = Reg - ARM::D0;
1232 ClearRegs.reset(R * 2, (R + 1) * 2);
1233 } else if (Reg >= ARM::S0 && Reg <= ARM::S31) {
1234 ClearRegs[Reg - ARM::S0] = false;
1235 }
1236 }
1237 return DefFP;
1238 }
1239
1240 MachineBasicBlock &
CMSEClearFPRegs(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)1241 ARMExpandPseudo::CMSEClearFPRegs(MachineBasicBlock &MBB,
1242 MachineBasicBlock::iterator MBBI) {
1243 BitVector ClearRegs(16, true);
1244 (void)determineFPRegsToClear(*MBBI, ClearRegs);
1245
1246 if (STI->hasV8_1MMainlineOps())
1247 return CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
1248 else
1249 return CMSEClearFPRegsV8(MBB, MBBI, ClearRegs);
1250 }
1251
1252 // Clear the FP registers for v8.0-M, by copying over the content
1253 // of LR. Uses R12 as a scratch register.
1254 MachineBasicBlock &
CMSEClearFPRegsV8(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const BitVector & ClearRegs)1255 ARMExpandPseudo::CMSEClearFPRegsV8(MachineBasicBlock &MBB,
1256 MachineBasicBlock::iterator MBBI,
1257 const BitVector &ClearRegs) {
1258 if (!STI->hasFPRegs())
1259 return MBB;
1260
1261 auto &RetI = *MBBI;
1262 const DebugLoc &DL = RetI.getDebugLoc();
1263
1264 // If optimising for minimum size, clear FP registers unconditionally.
1265 // Otherwise, check the CONTROL.SFPA (Secure Floating-Point Active) bit and
1266 // don't clear them if they belong to the non-secure state.
1267 MachineBasicBlock *ClearBB, *DoneBB;
1268 if (STI->hasMinSize()) {
1269 ClearBB = DoneBB = &MBB;
1270 } else {
1271 MachineFunction *MF = MBB.getParent();
1272 ClearBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
1273 DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
1274
1275 MF->insert(++MBB.getIterator(), ClearBB);
1276 MF->insert(++ClearBB->getIterator(), DoneBB);
1277
1278 DoneBB->splice(DoneBB->end(), &MBB, MBBI, MBB.end());
1279 DoneBB->transferSuccessors(&MBB);
1280 MBB.addSuccessor(ClearBB);
1281 MBB.addSuccessor(DoneBB);
1282 ClearBB->addSuccessor(DoneBB);
1283
1284 // At the new basic blocks we need to have live-in the registers, used
1285 // for the return value as well as LR, used to clear registers.
1286 for (const MachineOperand &Op : RetI.operands()) {
1287 if (!Op.isReg())
1288 continue;
1289 Register Reg = Op.getReg();
1290 if (Reg == ARM::NoRegister || Reg == ARM::LR)
1291 continue;
1292 assert(Reg.isPhysical() && "Unallocated register");
1293 ClearBB->addLiveIn(Reg);
1294 DoneBB->addLiveIn(Reg);
1295 }
1296 ClearBB->addLiveIn(ARM::LR);
1297 DoneBB->addLiveIn(ARM::LR);
1298
1299 // Read the CONTROL register.
1300 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2MRS_M), ARM::R12)
1301 .addImm(20)
1302 .add(predOps(ARMCC::AL));
1303 // Check bit 3 (SFPA).
1304 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2TSTri))
1305 .addReg(ARM::R12)
1306 .addImm(8)
1307 .add(predOps(ARMCC::AL));
1308 // If SFPA is clear, jump over ClearBB to DoneBB.
1309 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::tBcc))
1310 .addMBB(DoneBB)
1311 .addImm(ARMCC::EQ)
1312 .addReg(ARM::CPSR, RegState::Kill);
1313 }
1314
1315 // Emit the clearing sequence
1316 for (unsigned D = 0; D < 8; D++) {
1317 // Attempt to clear as double
1318 if (ClearRegs[D * 2 + 0] && ClearRegs[D * 2 + 1]) {
1319 unsigned Reg = ARM::D0 + D;
1320 BuildMI(ClearBB, DL, TII->get(ARM::VMOVDRR), Reg)
1321 .addReg(ARM::LR)
1322 .addReg(ARM::LR)
1323 .add(predOps(ARMCC::AL));
1324 } else {
1325 // Clear first part as single
1326 if (ClearRegs[D * 2 + 0]) {
1327 unsigned Reg = ARM::S0 + D * 2;
1328 BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
1329 .addReg(ARM::LR)
1330 .add(predOps(ARMCC::AL));
1331 }
1332 // Clear second part as single
1333 if (ClearRegs[D * 2 + 1]) {
1334 unsigned Reg = ARM::S0 + D * 2 + 1;
1335 BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
1336 .addReg(ARM::LR)
1337 .add(predOps(ARMCC::AL));
1338 }
1339 }
1340 }
1341
1342 // Clear FPSCR bits 0-4, 7, 28-31
1343 // The other bits are program global according to the AAPCS
1344 BuildMI(ClearBB, DL, TII->get(ARM::VMRS), ARM::R12)
1345 .add(predOps(ARMCC::AL));
1346 BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
1347 .addReg(ARM::R12)
1348 .addImm(0x0000009F)
1349 .add(predOps(ARMCC::AL))
1350 .add(condCodeOp());
1351 BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
1352 .addReg(ARM::R12)
1353 .addImm(0xF0000000)
1354 .add(predOps(ARMCC::AL))
1355 .add(condCodeOp());
1356 BuildMI(ClearBB, DL, TII->get(ARM::VMSR))
1357 .addReg(ARM::R12)
1358 .add(predOps(ARMCC::AL));
1359
1360 return *DoneBB;
1361 }
1362
1363 MachineBasicBlock &
CMSEClearFPRegsV81(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const BitVector & ClearRegs)1364 ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB,
1365 MachineBasicBlock::iterator MBBI,
1366 const BitVector &ClearRegs) {
1367 auto &RetI = *MBBI;
1368
1369 // Emit a sequence of VSCCLRM <sreglist> instructions, one instruction for
1370 // each contiguous sequence of S-registers.
1371 int Start = -1, End = -1;
1372 for (int S = 0, E = ClearRegs.size(); S != E; ++S) {
1373 if (ClearRegs[S] && S == End + 1) {
1374 End = S; // extend range
1375 continue;
1376 }
1377 // Emit current range.
1378 if (Start < End) {
1379 MachineInstrBuilder VSCCLRM =
1380 BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
1381 .add(predOps(ARMCC::AL));
1382 while (++Start <= End)
1383 VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
1384 VSCCLRM.addReg(ARM::VPR, RegState::Define);
1385 }
1386 Start = End = S;
1387 }
1388 // Emit last range.
1389 if (Start < End) {
1390 MachineInstrBuilder VSCCLRM =
1391 BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
1392 .add(predOps(ARMCC::AL));
1393 while (++Start <= End)
1394 VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
1395 VSCCLRM.addReg(ARM::VPR, RegState::Define);
1396 }
1397
1398 return MBB;
1399 }
1400
CMSESaveClearFPRegs(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,const LivePhysRegs & LiveRegs,SmallVectorImpl<unsigned> & ScratchRegs)1401 void ARMExpandPseudo::CMSESaveClearFPRegs(
1402 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
1403 const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
1404 if (STI->hasV8_1MMainlineOps())
1405 CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs);
1406 else if (STI->hasV8MMainlineOps())
1407 CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs);
1408 }
1409
1410 // Save and clear FP registers if present
CMSESaveClearFPRegsV8(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,const LivePhysRegs & LiveRegs,SmallVectorImpl<unsigned> & ScratchRegs)1411 void ARMExpandPseudo::CMSESaveClearFPRegsV8(
1412 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
1413 const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
1414
1415 // Store an available register for FPSCR clearing
1416 assert(!ScratchRegs.empty());
1417 unsigned SpareReg = ScratchRegs.front();
1418
1419 // save space on stack for VLSTM
1420 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
1421 .addReg(ARM::SP)
1422 .addImm(CMSE_FP_SAVE_SIZE >> 2)
1423 .add(predOps(ARMCC::AL));
1424
1425 // Use ScratchRegs to store the fp regs
1426 std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
1427 std::vector<unsigned> NonclearedFPRegs;
1428 bool ReturnsFPReg = false;
1429 for (const MachineOperand &Op : MBBI->operands()) {
1430 if (Op.isReg() && Op.isUse()) {
1431 Register Reg = Op.getReg();
1432 assert(!ARM::DPRRegClass.contains(Reg) ||
1433 ARM::DPR_VFP2RegClass.contains(Reg));
1434 assert(!ARM::QPRRegClass.contains(Reg));
1435 if (ARM::DPR_VFP2RegClass.contains(Reg)) {
1436 if (ScratchRegs.size() >= 2) {
1437 unsigned SaveReg2 = ScratchRegs.pop_back_val();
1438 unsigned SaveReg1 = ScratchRegs.pop_back_val();
1439 ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
1440
1441 // Save the fp register to the normal registers
1442 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
1443 .addReg(SaveReg1, RegState::Define)
1444 .addReg(SaveReg2, RegState::Define)
1445 .addReg(Reg)
1446 .add(predOps(ARMCC::AL));
1447 } else {
1448 NonclearedFPRegs.push_back(Reg);
1449 }
1450 } else if (ARM::SPRRegClass.contains(Reg)) {
1451 if (ScratchRegs.size() >= 1) {
1452 unsigned SaveReg = ScratchRegs.pop_back_val();
1453 ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
1454
1455 // Save the fp register to the normal registers
1456 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
1457 .addReg(Reg)
1458 .add(predOps(ARMCC::AL));
1459 } else {
1460 NonclearedFPRegs.push_back(Reg);
1461 }
1462 }
1463 } else if (Op.isReg() && Op.isDef()) {
1464 Register Reg = Op.getReg();
1465 if (ARM::SPRRegClass.contains(Reg) || ARM::DPRRegClass.contains(Reg) ||
1466 ARM::QPRRegClass.contains(Reg))
1467 ReturnsFPReg = true;
1468 }
1469 }
1470
1471 bool PassesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty());
1472
1473 if (PassesFPReg || ReturnsFPReg)
1474 assert(STI->hasFPRegs() && "Subtarget needs fpregs");
1475
1476 // CVE-2024-7883
1477 //
1478 // The VLLDM/VLSTM instructions set up lazy state preservation, but they
1479 // execute as NOPs if the FP register file is not considered to contain
1480 // secure data, represented by the CONTROL_S.SFPA bit. This means that the
1481 // state of CONTROL_S.SFPA must be the same when these two instructions are
1482 // executed. That might not be the case if we haven't used any FP
1483 // instructions before the VLSTM, so CONTROL_S.SFPA is clear, but do have one
1484 // before the VLLDM, which sets it..
1485 //
1486 // If we can't prove that SFPA will be the same for the VLSTM and VLLDM, we
1487 // execute a "vmov s0, s0" instruction before the VLSTM to ensure that
1488 // CONTROL_S.SFPA is set for both.
1489 //
1490 // That can only happen for callees which take no FP arguments (or we'd have
1491 // inserted a VMOV above) and which return values in FP regs (so that we need
1492 // to use a VMOV to back-up the return value before the VLLDM). It also can't
1493 // happen if the call is dominated by other existing floating-point
1494 // instructions, but we don't currently check for that case.
1495 //
1496 // These conditions mean that we only emit this instruction when using the
1497 // hard-float ABI, which means we can assume that FP instructions are
1498 // available, and don't need to make it conditional like we do for the
1499 // CVE-2021-35465 workaround.
1500 if (ReturnsFPReg && !PassesFPReg) {
1501 bool S0Dead = !LiveRegs.contains(ARM::S0);
1502 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVS))
1503 .addReg(ARM::S0, RegState::Define | getDeadRegState(S0Dead))
1504 .addReg(ARM::S0, getUndefRegState(S0Dead))
1505 .add(predOps(ARMCC::AL));
1506 }
1507
1508 // Lazy store all fp registers to the stack.
1509 // This executes as NOP in the absence of floating-point support.
1510 MachineInstrBuilder VLSTM =
1511 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
1512 .addReg(ARM::SP)
1513 .add(predOps(ARMCC::AL))
1514 .addImm(0); // Represents a pseoudo register list, has no effect on
1515 // the encoding.
1516 // Mark non-live registers as undef
1517 for (MachineOperand &MO : VLSTM->implicit_operands()) {
1518 if (MO.isReg() && !MO.isDef()) {
1519 Register Reg = MO.getReg();
1520 MO.setIsUndef(!LiveRegs.contains(Reg));
1521 }
1522 }
1523
1524 // Restore all arguments
1525 for (const auto &Regs : ClearedFPRegs) {
1526 unsigned Reg, SaveReg1, SaveReg2;
1527 std::tie(Reg, SaveReg1, SaveReg2) = Regs;
1528 if (ARM::DPR_VFP2RegClass.contains(Reg))
1529 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
1530 .addReg(SaveReg1)
1531 .addReg(SaveReg2)
1532 .add(predOps(ARMCC::AL));
1533 else if (ARM::SPRRegClass.contains(Reg))
1534 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
1535 .addReg(SaveReg1)
1536 .add(predOps(ARMCC::AL));
1537 }
1538
1539 for (unsigned Reg : NonclearedFPRegs) {
1540 if (ARM::DPR_VFP2RegClass.contains(Reg)) {
1541 if (STI->isLittle()) {
1542 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRD), Reg)
1543 .addReg(ARM::SP)
1544 .addImm((Reg - ARM::D0) * 2)
1545 .add(predOps(ARMCC::AL));
1546 } else {
1547 // For big-endian targets we need to load the two subregisters of Reg
1548 // manually because VLDRD would load them in wrong order
1549 MCRegister SReg0 = TRI->getSubReg(Reg, ARM::ssub_0);
1550 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0)
1551 .addReg(ARM::SP)
1552 .addImm((Reg - ARM::D0) * 2)
1553 .add(predOps(ARMCC::AL));
1554 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0 + 1)
1555 .addReg(ARM::SP)
1556 .addImm((Reg - ARM::D0) * 2 + 1)
1557 .add(predOps(ARMCC::AL));
1558 }
1559 } else if (ARM::SPRRegClass.contains(Reg)) {
1560 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), Reg)
1561 .addReg(ARM::SP)
1562 .addImm(Reg - ARM::S0)
1563 .add(predOps(ARMCC::AL));
1564 }
1565 }
1566 // restore FPSCR from stack and clear bits 0-4, 7, 28-31
1567 // The other bits are program global according to the AAPCS
1568 if (PassesFPReg) {
1569 BuildMI(MBB, MBBI, DL, TII->get(ARM::tLDRspi), SpareReg)
1570 .addReg(ARM::SP)
1571 .addImm(0x10)
1572 .add(predOps(ARMCC::AL));
1573 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
1574 .addReg(SpareReg)
1575 .addImm(0x0000009F)
1576 .add(predOps(ARMCC::AL))
1577 .add(condCodeOp());
1578 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
1579 .addReg(SpareReg)
1580 .addImm(0xF0000000)
1581 .add(predOps(ARMCC::AL))
1582 .add(condCodeOp());
1583 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMSR))
1584 .addReg(SpareReg)
1585 .add(predOps(ARMCC::AL));
1586 // The ldr must happen after a floating point instruction. To prevent the
1587 // post-ra scheduler to mess with the order, we create a bundle.
1588 finalizeBundle(MBB, VLSTM->getIterator(), MBBI->getIterator());
1589 }
1590 }
1591
CMSESaveClearFPRegsV81(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,const LivePhysRegs & LiveRegs)1592 void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
1593 MachineBasicBlock::iterator MBBI,
1594 DebugLoc &DL,
1595 const LivePhysRegs &LiveRegs) {
1596 BitVector ClearRegs(32, true);
1597 bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs);
1598
1599 // If the instruction does not write to a FP register and no elements were
1600 // removed from the set, then no FP registers were used to pass
1601 // arguments/returns.
1602 if (!DefFP && ClearRegs.count() == ClearRegs.size()) {
1603 // save space on stack for VLSTM
1604 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
1605 .addReg(ARM::SP)
1606 .addImm(CMSE_FP_SAVE_SIZE >> 2)
1607 .add(predOps(ARMCC::AL));
1608
1609 // Lazy store all FP registers to the stack
1610 MachineInstrBuilder VLSTM =
1611 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
1612 .addReg(ARM::SP)
1613 .add(predOps(ARMCC::AL))
1614 .addImm(0); // Represents a pseoudo register list, has no effect on
1615 // the encoding.
1616 // Mark non-live registers as undef
1617 for (MachineOperand &MO : VLSTM->implicit_operands()) {
1618 if (MO.isReg() && !MO.isDef()) {
1619 Register Reg = MO.getReg();
1620 MO.setIsUndef(!LiveRegs.contains(Reg));
1621 }
1622 }
1623 } else {
1624 // Push all the callee-saved registers (s16-s31).
1625 MachineInstrBuilder VPUSH =
1626 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP)
1627 .addReg(ARM::SP)
1628 .add(predOps(ARMCC::AL));
1629 for (unsigned Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
1630 VPUSH.addReg(Reg);
1631
1632 // Clear FP registers with a VSCCLRM.
1633 (void)CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
1634
1635 // Save floating-point context.
1636 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTR_FPCXTS_pre), ARM::SP)
1637 .addReg(ARM::SP)
1638 .addImm(-8)
1639 .add(predOps(ARMCC::AL));
1640 }
1641 }
1642
1643 // Restore FP registers if present
CMSERestoreFPRegs(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,SmallVectorImpl<unsigned> & AvailableRegs)1644 void ARMExpandPseudo::CMSERestoreFPRegs(
1645 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
1646 SmallVectorImpl<unsigned> &AvailableRegs) {
1647 if (STI->hasV8_1MMainlineOps())
1648 CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs);
1649 else if (STI->hasV8MMainlineOps())
1650 CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs);
1651 }
1652
CMSERestoreFPRegsV8(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,SmallVectorImpl<unsigned> & AvailableRegs)1653 void ARMExpandPseudo::CMSERestoreFPRegsV8(
1654 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
1655 SmallVectorImpl<unsigned> &AvailableRegs) {
1656
1657 // Keep a scratch register for the mitigation sequence.
1658 unsigned ScratchReg = ARM::NoRegister;
1659 if (STI->fixCMSE_CVE_2021_35465())
1660 ScratchReg = AvailableRegs.pop_back_val();
1661
1662 // Use AvailableRegs to store the fp regs
1663 std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
1664 std::vector<unsigned> NonclearedFPRegs;
1665 for (const MachineOperand &Op : MBBI->operands()) {
1666 if (Op.isReg() && Op.isDef()) {
1667 Register Reg = Op.getReg();
1668 assert(!ARM::DPRRegClass.contains(Reg) ||
1669 ARM::DPR_VFP2RegClass.contains(Reg));
1670 assert(!ARM::QPRRegClass.contains(Reg));
1671 if (ARM::DPR_VFP2RegClass.contains(Reg)) {
1672 if (AvailableRegs.size() >= 2) {
1673 unsigned SaveReg2 = AvailableRegs.pop_back_val();
1674 unsigned SaveReg1 = AvailableRegs.pop_back_val();
1675 ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
1676
1677 // Save the fp register to the normal registers
1678 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
1679 .addReg(SaveReg1, RegState::Define)
1680 .addReg(SaveReg2, RegState::Define)
1681 .addReg(Reg)
1682 .add(predOps(ARMCC::AL));
1683 } else {
1684 NonclearedFPRegs.push_back(Reg);
1685 }
1686 } else if (ARM::SPRRegClass.contains(Reg)) {
1687 if (AvailableRegs.size() >= 1) {
1688 unsigned SaveReg = AvailableRegs.pop_back_val();
1689 ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
1690
1691 // Save the fp register to the normal registers
1692 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
1693 .addReg(Reg)
1694 .add(predOps(ARMCC::AL));
1695 } else {
1696 NonclearedFPRegs.push_back(Reg);
1697 }
1698 }
1699 }
1700 }
1701
1702 bool returnsFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty());
1703
1704 if (returnsFPReg)
1705 assert(STI->hasFPRegs() && "Subtarget needs fpregs");
1706
1707 // Push FP regs that cannot be restored via normal registers on the stack
1708 for (unsigned Reg : NonclearedFPRegs) {
1709 if (ARM::DPR_VFP2RegClass.contains(Reg))
1710 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD))
1711 .addReg(Reg)
1712 .addReg(ARM::SP)
1713 .addImm((Reg - ARM::D0) * 2)
1714 .add(predOps(ARMCC::AL));
1715 else if (ARM::SPRRegClass.contains(Reg))
1716 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS))
1717 .addReg(Reg)
1718 .addReg(ARM::SP)
1719 .addImm(Reg - ARM::S0)
1720 .add(predOps(ARMCC::AL));
1721 }
1722
1723 // Lazy load fp regs from stack.
1724 // This executes as NOP in the absence of floating-point support.
1725 MachineInstrBuilder VLLDM =
1726 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
1727 .addReg(ARM::SP)
1728 .add(predOps(ARMCC::AL))
1729 .addImm(0); // Represents a pseoudo register list, has no effect on
1730 // the encoding.
1731
1732 if (STI->fixCMSE_CVE_2021_35465()) {
1733 auto Bundler = MIBundleBuilder(MBB, VLLDM);
1734 // Read the CONTROL register.
1735 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2MRS_M))
1736 .addReg(ScratchReg, RegState::Define)
1737 .addImm(20)
1738 .add(predOps(ARMCC::AL)));
1739 // Check bit 3 (SFPA).
1740 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2TSTri))
1741 .addReg(ScratchReg)
1742 .addImm(8)
1743 .add(predOps(ARMCC::AL)));
1744 // Emit the IT block.
1745 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2IT))
1746 .addImm(ARMCC::NE)
1747 .addImm(8));
1748 // If SFPA is clear jump over to VLLDM, otherwise execute an instruction
1749 // which has no functional effect apart from causing context creation:
1750 // vmovne s0, s0. In the absence of FPU we emit .inst.w 0xeeb00a40,
1751 // which is defined as NOP if not executed.
1752 if (STI->hasFPRegs())
1753 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::VMOVS))
1754 .addReg(ARM::S0, RegState::Define)
1755 .addReg(ARM::S0, RegState::Undef)
1756 .add(predOps(ARMCC::NE)));
1757 else
1758 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::INLINEASM))
1759 .addExternalSymbol(".inst.w 0xeeb00a40")
1760 .addImm(InlineAsm::Extra_HasSideEffects));
1761 finalizeBundle(MBB, Bundler.begin(), Bundler.end());
1762 }
1763
1764 // Restore all FP registers via normal registers
1765 for (const auto &Regs : ClearedFPRegs) {
1766 unsigned Reg, SaveReg1, SaveReg2;
1767 std::tie(Reg, SaveReg1, SaveReg2) = Regs;
1768 if (ARM::DPR_VFP2RegClass.contains(Reg))
1769 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
1770 .addReg(SaveReg1)
1771 .addReg(SaveReg2)
1772 .add(predOps(ARMCC::AL));
1773 else if (ARM::SPRRegClass.contains(Reg))
1774 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
1775 .addReg(SaveReg1)
1776 .add(predOps(ARMCC::AL));
1777 }
1778
1779 // Pop the stack space
1780 BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
1781 .addReg(ARM::SP)
1782 .addImm(CMSE_FP_SAVE_SIZE >> 2)
1783 .add(predOps(ARMCC::AL));
1784 }
1785
definesOrUsesFPReg(const MachineInstr & MI)1786 static bool definesOrUsesFPReg(const MachineInstr &MI) {
1787 for (const MachineOperand &Op : MI.operands()) {
1788 if (!Op.isReg())
1789 continue;
1790 Register Reg = Op.getReg();
1791 if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
1792 (Reg >= ARM::D0 && Reg <= ARM::D15) ||
1793 (Reg >= ARM::S0 && Reg <= ARM::S31))
1794 return true;
1795 }
1796 return false;
1797 }
1798
CMSERestoreFPRegsV81(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,SmallVectorImpl<unsigned> & AvailableRegs)1799 void ARMExpandPseudo::CMSERestoreFPRegsV81(
1800 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
1801 SmallVectorImpl<unsigned> &AvailableRegs) {
1802 if (!definesOrUsesFPReg(*MBBI)) {
1803 if (STI->fixCMSE_CVE_2021_35465()) {
1804 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSCCLRMS))
1805 .add(predOps(ARMCC::AL))
1806 .addReg(ARM::VPR, RegState::Define);
1807 }
1808
1809 // Load FP registers from stack.
1810 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
1811 .addReg(ARM::SP)
1812 .add(predOps(ARMCC::AL))
1813 .addImm(0); // Represents a pseoudo register list, has no effect on the
1814 // encoding.
1815
1816 // Pop the stack space
1817 BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
1818 .addReg(ARM::SP)
1819 .addImm(CMSE_FP_SAVE_SIZE >> 2)
1820 .add(predOps(ARMCC::AL));
1821 } else {
1822 // Restore the floating point context.
1823 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::VLDR_FPCXTS_post),
1824 ARM::SP)
1825 .addReg(ARM::SP)
1826 .addImm(8)
1827 .add(predOps(ARMCC::AL));
1828
1829 // Pop all the callee-saved registers (s16-s31).
1830 MachineInstrBuilder VPOP =
1831 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP)
1832 .addReg(ARM::SP)
1833 .add(predOps(ARMCC::AL));
1834 for (unsigned Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
1835 VPOP.addReg(Reg, RegState::Define);
1836 }
1837 }
1838
1839 /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
1840 /// possible. This only gets used at -O0 so we don't care about efficiency of
1841 /// the generated code.
ExpandCMP_SWAP(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned LdrexOp,unsigned StrexOp,unsigned UxtOp,MachineBasicBlock::iterator & NextMBBI)1842 bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
1843 MachineBasicBlock::iterator MBBI,
1844 unsigned LdrexOp, unsigned StrexOp,
1845 unsigned UxtOp,
1846 MachineBasicBlock::iterator &NextMBBI) {
1847 bool IsThumb = STI->isThumb();
1848 bool IsThumb1Only = STI->isThumb1Only();
1849 MachineInstr &MI = *MBBI;
1850 DebugLoc DL = MI.getDebugLoc();
1851 const MachineOperand &Dest = MI.getOperand(0);
1852 Register TempReg = MI.getOperand(1).getReg();
1853 // Duplicating undef operands into 2 instructions does not guarantee the same
1854 // value on both; However undef should be replaced by xzr anyway.
1855 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
1856 Register AddrReg = MI.getOperand(2).getReg();
1857 Register DesiredReg = MI.getOperand(3).getReg();
1858 Register NewReg = MI.getOperand(4).getReg();
1859
1860 if (IsThumb) {
1861 assert(STI->hasV8MBaselineOps() &&
1862 "CMP_SWAP not expected to be custom expanded for Thumb1");
1863 assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) &&
1864 "ARMv8-M.baseline does not have t2UXTB/t2UXTH");
1865 assert((UxtOp == 0 || ARM::tGPRRegClass.contains(DesiredReg)) &&
1866 "DesiredReg used for UXT op must be tGPR");
1867 }
1868
1869 MachineFunction *MF = MBB.getParent();
1870 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
1871 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
1872 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
1873
1874 MF->insert(++MBB.getIterator(), LoadCmpBB);
1875 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
1876 MF->insert(++StoreBB->getIterator(), DoneBB);
1877
1878 if (UxtOp) {
1879 MachineInstrBuilder MIB =
1880 BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg)
1881 .addReg(DesiredReg, RegState::Kill);
1882 if (!IsThumb)
1883 MIB.addImm(0);
1884 MIB.add(predOps(ARMCC::AL));
1885 }
1886
1887 // .Lloadcmp:
1888 // ldrex rDest, [rAddr]
1889 // cmp rDest, rDesired
1890 // bne .Ldone
1891
1892 MachineInstrBuilder MIB;
1893 MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg());
1894 MIB.addReg(AddrReg);
1895 if (LdrexOp == ARM::t2LDREX)
1896 MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset.
1897 MIB.add(predOps(ARMCC::AL));
1898
1899 unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
1900 BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
1901 .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
1902 .addReg(DesiredReg)
1903 .add(predOps(ARMCC::AL));
1904 unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
1905 BuildMI(LoadCmpBB, DL, TII->get(Bcc))
1906 .addMBB(DoneBB)
1907 .addImm(ARMCC::NE)
1908 .addReg(ARM::CPSR, RegState::Kill);
1909 LoadCmpBB->addSuccessor(DoneBB);
1910 LoadCmpBB->addSuccessor(StoreBB);
1911
1912 // .Lstore:
1913 // strex rTempReg, rNew, [rAddr]
1914 // cmp rTempReg, #0
1915 // bne .Lloadcmp
1916 MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg)
1917 .addReg(NewReg)
1918 .addReg(AddrReg);
1919 if (StrexOp == ARM::t2STREX)
1920 MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
1921 MIB.add(predOps(ARMCC::AL));
1922
1923 unsigned CMPri =
1924 IsThumb ? (IsThumb1Only ? ARM::tCMPi8 : ARM::t2CMPri) : ARM::CMPri;
1925 BuildMI(StoreBB, DL, TII->get(CMPri))
1926 .addReg(TempReg, RegState::Kill)
1927 .addImm(0)
1928 .add(predOps(ARMCC::AL));
1929 BuildMI(StoreBB, DL, TII->get(Bcc))
1930 .addMBB(LoadCmpBB)
1931 .addImm(ARMCC::NE)
1932 .addReg(ARM::CPSR, RegState::Kill);
1933 StoreBB->addSuccessor(LoadCmpBB);
1934 StoreBB->addSuccessor(DoneBB);
1935
1936 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
1937 DoneBB->transferSuccessors(&MBB);
1938
1939 MBB.addSuccessor(LoadCmpBB);
1940
1941 NextMBBI = MBB.end();
1942 MI.eraseFromParent();
1943
1944 // Recompute livein lists.
1945 LivePhysRegs LiveRegs;
1946 computeAndAddLiveIns(LiveRegs, *DoneBB);
1947 computeAndAddLiveIns(LiveRegs, *StoreBB);
1948 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
1949 // Do an extra pass around the loop to get loop carried registers right.
1950 StoreBB->clearLiveIns();
1951 computeAndAddLiveIns(LiveRegs, *StoreBB);
1952 LoadCmpBB->clearLiveIns();
1953 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
1954
1955 return true;
1956 }
1957
1958 /// ARM's ldrexd/strexd take a consecutive register pair (represented as a
1959 /// single GPRPair register), Thumb's take two separate registers so we need to
1960 /// extract the subregs from the pair.
addExclusiveRegPair(MachineInstrBuilder & MIB,MachineOperand & Reg,unsigned Flags,bool IsThumb,const TargetRegisterInfo * TRI)1961 static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg,
1962 unsigned Flags, bool IsThumb,
1963 const TargetRegisterInfo *TRI) {
1964 if (IsThumb) {
1965 Register RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0);
1966 Register RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1);
1967 MIB.addReg(RegLo, Flags);
1968 MIB.addReg(RegHi, Flags);
1969 } else
1970 MIB.addReg(Reg.getReg(), Flags);
1971 }
1972
1973 /// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop.
ExpandCMP_SWAP_64(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)1974 bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
1975 MachineBasicBlock::iterator MBBI,
1976 MachineBasicBlock::iterator &NextMBBI) {
1977 bool IsThumb = STI->isThumb();
1978 assert(!STI->isThumb1Only() && "CMP_SWAP_64 unsupported under Thumb1!");
1979 MachineInstr &MI = *MBBI;
1980 DebugLoc DL = MI.getDebugLoc();
1981 MachineOperand &Dest = MI.getOperand(0);
1982 // Duplicating undef operands into 2 instructions does not guarantee the same
1983 // value on both; However undef should be replaced by xzr anyway.
1984 assert(!MI.getOperand(1).isUndef() && "cannot handle undef");
1985 Register AddrAndTempReg = MI.getOperand(1).getReg();
1986 Register AddrReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_0);
1987 Register TempReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_1);
1988 assert(MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
1989 "tied operands have different registers");
1990 Register DesiredReg = MI.getOperand(3).getReg();
1991 MachineOperand New = MI.getOperand(4);
1992 New.setIsKill(false);
1993
1994 Register DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0);
1995 Register DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1);
1996 Register DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0);
1997 Register DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1);
1998
1999 MachineFunction *MF = MBB.getParent();
2000 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
2001 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
2002 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
2003
2004 MF->insert(++MBB.getIterator(), LoadCmpBB);
2005 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
2006 MF->insert(++StoreBB->getIterator(), DoneBB);
2007
2008 // .Lloadcmp:
2009 // ldrexd rDestLo, rDestHi, [rAddr]
2010 // cmp rDestLo, rDesiredLo
2011 // sbcs dead rTempReg, rDestHi, rDesiredHi
2012 // bne .Ldone
2013 unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD;
2014 MachineInstrBuilder MIB;
2015 MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD));
2016 addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI);
2017 MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
2018
2019 unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
2020 BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
2021 .addReg(DestLo, getKillRegState(Dest.isDead()))
2022 .addReg(DesiredLo)
2023 .add(predOps(ARMCC::AL));
2024
2025 BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
2026 .addReg(DestHi, getKillRegState(Dest.isDead()))
2027 .addReg(DesiredHi)
2028 .addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill);
2029
2030 unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
2031 BuildMI(LoadCmpBB, DL, TII->get(Bcc))
2032 .addMBB(DoneBB)
2033 .addImm(ARMCC::NE)
2034 .addReg(ARM::CPSR, RegState::Kill);
2035 LoadCmpBB->addSuccessor(DoneBB);
2036 LoadCmpBB->addSuccessor(StoreBB);
2037
2038 // .Lstore:
2039 // strexd rTempReg, rNewLo, rNewHi, [rAddr]
2040 // cmp rTempReg, #0
2041 // bne .Lloadcmp
2042 unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
2043 MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg);
2044 unsigned Flags = getKillRegState(New.isDead());
2045 addExclusiveRegPair(MIB, New, Flags, IsThumb, TRI);
2046 MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
2047
2048 unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
2049 BuildMI(StoreBB, DL, TII->get(CMPri))
2050 .addReg(TempReg, RegState::Kill)
2051 .addImm(0)
2052 .add(predOps(ARMCC::AL));
2053 BuildMI(StoreBB, DL, TII->get(Bcc))
2054 .addMBB(LoadCmpBB)
2055 .addImm(ARMCC::NE)
2056 .addReg(ARM::CPSR, RegState::Kill);
2057 StoreBB->addSuccessor(LoadCmpBB);
2058 StoreBB->addSuccessor(DoneBB);
2059
2060 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
2061 DoneBB->transferSuccessors(&MBB);
2062
2063 MBB.addSuccessor(LoadCmpBB);
2064
2065 NextMBBI = MBB.end();
2066 MI.eraseFromParent();
2067
2068 // Recompute livein lists.
2069 LivePhysRegs LiveRegs;
2070 computeAndAddLiveIns(LiveRegs, *DoneBB);
2071 computeAndAddLiveIns(LiveRegs, *StoreBB);
2072 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
2073 // Do an extra pass around the loop to get loop carried registers right.
2074 StoreBB->clearLiveIns();
2075 computeAndAddLiveIns(LiveRegs, *StoreBB);
2076 LoadCmpBB->clearLiveIns();
2077 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
2078
2079 return true;
2080 }
2081
CMSEPushCalleeSaves(const TargetInstrInfo & TII,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,Register JumpReg,const LivePhysRegs & LiveRegs,bool Thumb1Only)2082 static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
2083 MachineBasicBlock &MBB,
2084 MachineBasicBlock::iterator MBBI,
2085 Register JumpReg, const LivePhysRegs &LiveRegs,
2086 bool Thumb1Only) {
2087 const DebugLoc &DL = MBBI->getDebugLoc();
2088 if (Thumb1Only) { // push Lo and Hi regs separately
2089 MachineInstrBuilder PushMIB =
2090 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
2091 for (unsigned Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
2092 PushMIB.addReg(
2093 Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
2094 }
2095
2096 // Thumb1 can only tPUSH low regs, so we copy the high regs to the low
2097 // regs that we just saved and push the low regs again, taking care to
2098 // not clobber JumpReg. If JumpReg is one of the low registers, push first
2099 // the values of r9-r11, and then r8. That would leave them ordered in
2100 // memory, and allow us to later pop them with a single instructions.
2101 // FIXME: Could also use any of r0-r3 that are free (including in the
2102 // first PUSH above).
2103 for (unsigned LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4;
2104 --LoReg) {
2105 if (JumpReg == LoReg)
2106 continue;
2107 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
2108 .addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef)
2109 .add(predOps(ARMCC::AL));
2110 --HiReg;
2111 }
2112 MachineInstrBuilder PushMIB2 =
2113 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
2114 for (unsigned Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
2115 if (Reg == JumpReg)
2116 continue;
2117 PushMIB2.addReg(Reg, RegState::Kill);
2118 }
2119
2120 // If we couldn't use a low register for temporary storage (because it was
2121 // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been
2122 // saved.
2123 if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) {
2124 Register LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4;
2125 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
2126 .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef)
2127 .add(predOps(ARMCC::AL));
2128 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH))
2129 .add(predOps(ARMCC::AL))
2130 .addReg(LoReg, RegState::Kill);
2131 }
2132 } else { // push Lo and Hi registers with a single instruction
2133 MachineInstrBuilder PushMIB =
2134 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP)
2135 .addReg(ARM::SP)
2136 .add(predOps(ARMCC::AL));
2137 for (unsigned Reg = ARM::R4; Reg < ARM::R12; ++Reg) {
2138 PushMIB.addReg(
2139 Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
2140 }
2141 }
2142 }
2143
CMSEPopCalleeSaves(const TargetInstrInfo & TII,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool Thumb1Only)2144 static void CMSEPopCalleeSaves(const TargetInstrInfo &TII,
2145 MachineBasicBlock &MBB,
2146 MachineBasicBlock::iterator MBBI,
2147 bool Thumb1Only) {
2148 const DebugLoc &DL = MBBI->getDebugLoc();
2149 if (Thumb1Only) {
2150 MachineInstrBuilder PopMIB =
2151 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
2152 for (int R = 0; R < 4; ++R) {
2153 PopMIB.addReg(ARM::R4 + R, RegState::Define);
2154 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), ARM::R8 + R)
2155 .addReg(ARM::R4 + R, RegState::Kill)
2156 .add(predOps(ARMCC::AL));
2157 }
2158 MachineInstrBuilder PopMIB2 =
2159 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
2160 for (int R = 0; R < 4; ++R)
2161 PopMIB2.addReg(ARM::R4 + R, RegState::Define);
2162 } else { // pop Lo and Hi registers with a single instruction
2163 MachineInstrBuilder PopMIB =
2164 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP)
2165 .addReg(ARM::SP)
2166 .add(predOps(ARMCC::AL));
2167 for (unsigned Reg = ARM::R4; Reg < ARM::R12; ++Reg)
2168 PopMIB.addReg(Reg, RegState::Define);
2169 }
2170 }
2171
ExpandMI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)2172 bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
2173 MachineBasicBlock::iterator MBBI,
2174 MachineBasicBlock::iterator &NextMBBI) {
2175 MachineInstr &MI = *MBBI;
2176 unsigned Opcode = MI.getOpcode();
2177 switch (Opcode) {
2178 default:
2179 return false;
2180
2181 case ARM::VBSPd:
2182 case ARM::VBSPq: {
2183 Register DstReg = MI.getOperand(0).getReg();
2184 if (DstReg == MI.getOperand(3).getReg()) {
2185 // Expand to VBIT
2186 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq;
2187 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
2188 .add(MI.getOperand(0))
2189 .add(MI.getOperand(3))
2190 .add(MI.getOperand(2))
2191 .add(MI.getOperand(1))
2192 .addImm(MI.getOperand(4).getImm())
2193 .add(MI.getOperand(5));
2194 } else if (DstReg == MI.getOperand(2).getReg()) {
2195 // Expand to VBIF
2196 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq;
2197 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
2198 .add(MI.getOperand(0))
2199 .add(MI.getOperand(2))
2200 .add(MI.getOperand(3))
2201 .add(MI.getOperand(1))
2202 .addImm(MI.getOperand(4).getImm())
2203 .add(MI.getOperand(5));
2204 } else {
2205 // Expand to VBSL
2206 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq;
2207 if (DstReg == MI.getOperand(1).getReg()) {
2208 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
2209 .add(MI.getOperand(0))
2210 .add(MI.getOperand(1))
2211 .add(MI.getOperand(2))
2212 .add(MI.getOperand(3))
2213 .addImm(MI.getOperand(4).getImm())
2214 .add(MI.getOperand(5));
2215 } else {
2216 // Use move to satisfy constraints
2217 unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
2218 unsigned MO1Flags = getRegState(MI.getOperand(1)) & ~RegState::Kill;
2219 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
2220 .addReg(DstReg,
2221 RegState::Define |
2222 getRenamableRegState(MI.getOperand(0).isRenamable()))
2223 .addReg(MI.getOperand(1).getReg(), MO1Flags)
2224 .addReg(MI.getOperand(1).getReg(), MO1Flags)
2225 .addImm(MI.getOperand(4).getImm())
2226 .add(MI.getOperand(5));
2227 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
2228 .add(MI.getOperand(0))
2229 .addReg(DstReg,
2230 RegState::Kill |
2231 getRenamableRegState(MI.getOperand(0).isRenamable()))
2232 .add(MI.getOperand(2))
2233 .add(MI.getOperand(3))
2234 .addImm(MI.getOperand(4).getImm())
2235 .add(MI.getOperand(5));
2236 }
2237 }
2238 MI.eraseFromParent();
2239 return true;
2240 }
2241
2242 case ARM::TCRETURNdi:
2243 case ARM::TCRETURNri:
2244 case ARM::TCRETURNrinotr12: {
2245 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
2246 if (MBBI->getOpcode() == ARM::SEH_EpilogEnd)
2247 MBBI--;
2248 if (MBBI->getOpcode() == ARM::SEH_Nop_Ret)
2249 MBBI--;
2250 assert(MBBI->isReturn() &&
2251 "Can only insert epilog into returning blocks");
2252 unsigned RetOpcode = MBBI->getOpcode();
2253 DebugLoc dl = MBBI->getDebugLoc();
2254 const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
2255 MBB.getParent()->getSubtarget().getInstrInfo());
2256
2257 // Tail call return: adjust the stack pointer and jump to callee.
2258 MBBI = MBB.getLastNonDebugInstr();
2259 if (MBBI->getOpcode() == ARM::SEH_EpilogEnd)
2260 MBBI--;
2261 if (MBBI->getOpcode() == ARM::SEH_Nop_Ret)
2262 MBBI--;
2263 MachineOperand &JumpTarget = MBBI->getOperand(0);
2264
2265 // Jump to label or value in register.
2266 if (RetOpcode == ARM::TCRETURNdi) {
2267 MachineFunction *MF = MBB.getParent();
2268 bool NeedsWinCFI = MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
2269 MF->getFunction().needsUnwindTableEntry();
2270 unsigned TCOpcode =
2271 STI->isThumb()
2272 ? ((STI->isTargetMachO() || NeedsWinCFI) ? ARM::tTAILJMPd
2273 : ARM::tTAILJMPdND)
2274 : ARM::TAILJMPd;
2275 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
2276 if (JumpTarget.isGlobal())
2277 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
2278 JumpTarget.getTargetFlags());
2279 else {
2280 assert(JumpTarget.isSymbol());
2281 MIB.addExternalSymbol(JumpTarget.getSymbolName(),
2282 JumpTarget.getTargetFlags());
2283 }
2284
2285 // Add the default predicate in Thumb mode.
2286 if (STI->isThumb())
2287 MIB.add(predOps(ARMCC::AL));
2288 } else if (RetOpcode == ARM::TCRETURNri ||
2289 RetOpcode == ARM::TCRETURNrinotr12) {
2290 unsigned Opcode =
2291 STI->isThumb() ? ARM::tTAILJMPr
2292 : (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4);
2293 BuildMI(MBB, MBBI, dl,
2294 TII.get(Opcode))
2295 .addReg(JumpTarget.getReg(), RegState::Kill);
2296 }
2297
2298 auto NewMI = std::prev(MBBI);
2299 for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
2300 NewMI->addOperand(MBBI->getOperand(i));
2301
2302 // Update call info and delete the pseudo instruction TCRETURN.
2303 if (MI.isCandidateForAdditionalCallInfo())
2304 MI.getMF()->moveAdditionalCallInfo(&MI, &*NewMI);
2305 // Copy nomerge flag over to new instruction.
2306 if (MI.getFlag(MachineInstr::NoMerge))
2307 NewMI->setFlag(MachineInstr::NoMerge);
2308 MBB.erase(MBBI);
2309
2310 MBBI = NewMI;
2311 return true;
2312 }
2313 case ARM::tBXNS_RET: {
2314 // For v8.0-M.Main we need to authenticate LR before clearing FPRs, which
2315 // uses R12 as a scratch register.
2316 if (!STI->hasV8_1MMainlineOps() && AFI->shouldSignReturnAddress())
2317 BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARM::t2AUT));
2318
2319 MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI);
2320
2321 if (STI->hasV8_1MMainlineOps()) {
2322 // Restore the non-secure floating point context.
2323 BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
2324 TII->get(ARM::VLDR_FPCXTNS_post), ARM::SP)
2325 .addReg(ARM::SP)
2326 .addImm(4)
2327 .add(predOps(ARMCC::AL));
2328
2329 if (AFI->shouldSignReturnAddress())
2330 BuildMI(AfterBB, AfterBB.end(), DebugLoc(), TII->get(ARM::t2AUT));
2331 }
2332
2333 // Clear all GPR that are not a use of the return instruction.
2334 assert(llvm::all_of(MBBI->operands(), [](const MachineOperand &Op) {
2335 return !Op.isReg() || Op.getReg() != ARM::R12;
2336 }));
2337 SmallVector<unsigned, 5> ClearRegs;
2338 determineGPRegsToClear(
2339 *MBBI, {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12}, ClearRegs);
2340 CMSEClearGPRegs(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), ClearRegs,
2341 ARM::LR);
2342
2343 MachineInstrBuilder NewMI =
2344 BuildMI(AfterBB, AfterBB.end(), MBBI->getDebugLoc(),
2345 TII->get(ARM::tBXNS))
2346 .addReg(ARM::LR)
2347 .add(predOps(ARMCC::AL));
2348 for (const MachineOperand &Op : MI.operands())
2349 NewMI->addOperand(Op);
2350 MI.eraseFromParent();
2351 return true;
2352 }
2353 case ARM::tBLXNS_CALL: {
2354 DebugLoc DL = MBBI->getDebugLoc();
2355 Register JumpReg = MBBI->getOperand(0).getReg();
2356
2357 // Figure out which registers are live at the point immediately before the
2358 // call. When we indiscriminately push a set of registers, the live
2359 // registers are added as ordinary use operands, whereas dead registers
2360 // are "undef".
2361 LivePhysRegs LiveRegs(*TRI);
2362 LiveRegs.addLiveOuts(MBB);
2363 for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse()))
2364 LiveRegs.stepBackward(MI);
2365 LiveRegs.stepBackward(*MBBI);
2366
2367 CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs,
2368 AFI->isThumb1OnlyFunction());
2369
2370 SmallVector<unsigned, 16> ClearRegs;
2371 determineGPRegsToClear(*MBBI,
2372 {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4,
2373 ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9,
2374 ARM::R10, ARM::R11, ARM::R12},
2375 ClearRegs);
2376 auto OriginalClearRegs = ClearRegs;
2377
2378 // Get the first cleared register as a scratch (to use later with tBIC).
2379 // We need to use the first so we can ensure it is a low register.
2380 unsigned ScratchReg = ClearRegs.front();
2381
2382 // Clear LSB of JumpReg
2383 if (AFI->isThumb2Function()) {
2384 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), JumpReg)
2385 .addReg(JumpReg)
2386 .addImm(1)
2387 .add(predOps(ARMCC::AL))
2388 .add(condCodeOp());
2389 } else {
2390 // We need to use an extra register to cope with 8M Baseline,
2391 // since we have saved all of the registers we are ok to trash a non
2392 // argument register here.
2393 BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVi8), ScratchReg)
2394 .add(condCodeOp())
2395 .addImm(1)
2396 .add(predOps(ARMCC::AL));
2397 BuildMI(MBB, MBBI, DL, TII->get(ARM::tBIC), JumpReg)
2398 .addReg(ARM::CPSR, RegState::Define)
2399 .addReg(JumpReg)
2400 .addReg(ScratchReg)
2401 .add(predOps(ARMCC::AL));
2402 }
2403
2404 CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs,
2405 ClearRegs); // save+clear FP regs with ClearRegs
2406 CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg);
2407
2408 const MachineInstrBuilder NewCall =
2409 BuildMI(MBB, MBBI, DL, TII->get(ARM::tBLXNSr))
2410 .add(predOps(ARMCC::AL))
2411 .addReg(JumpReg, RegState::Kill);
2412
2413 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
2414 NewCall->addOperand(MO);
2415 if (MI.isCandidateForAdditionalCallInfo())
2416 MI.getMF()->moveAdditionalCallInfo(&MI, NewCall.getInstr());
2417
2418 CMSERestoreFPRegs(MBB, MBBI, DL, OriginalClearRegs); // restore FP registers
2419
2420 CMSEPopCalleeSaves(*TII, MBB, MBBI, AFI->isThumb1OnlyFunction());
2421
2422 MI.eraseFromParent();
2423 return true;
2424 }
2425 case ARM::VMOVHcc:
2426 case ARM::VMOVScc:
2427 case ARM::VMOVDcc: {
2428 unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD;
2429 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
2430 MI.getOperand(1).getReg())
2431 .add(MI.getOperand(2))
2432 .addImm(MI.getOperand(3).getImm()) // 'pred'
2433 .add(MI.getOperand(4))
2434 .add(makeImplicit(MI.getOperand(1)));
2435
2436 MI.eraseFromParent();
2437 return true;
2438 }
2439 case ARM::t2MOVCCr:
2440 case ARM::MOVCCr: {
2441 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
2442 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
2443 MI.getOperand(1).getReg())
2444 .add(MI.getOperand(2))
2445 .addImm(MI.getOperand(3).getImm()) // 'pred'
2446 .add(MI.getOperand(4))
2447 .add(condCodeOp()) // 's' bit
2448 .add(makeImplicit(MI.getOperand(1)));
2449
2450 MI.eraseFromParent();
2451 return true;
2452 }
2453 case ARM::MOVCCsi: {
2454 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
2455 (MI.getOperand(1).getReg()))
2456 .add(MI.getOperand(2))
2457 .addImm(MI.getOperand(3).getImm())
2458 .addImm(MI.getOperand(4).getImm()) // 'pred'
2459 .add(MI.getOperand(5))
2460 .add(condCodeOp()) // 's' bit
2461 .add(makeImplicit(MI.getOperand(1)));
2462
2463 MI.eraseFromParent();
2464 return true;
2465 }
2466 case ARM::MOVCCsr: {
2467 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
2468 (MI.getOperand(1).getReg()))
2469 .add(MI.getOperand(2))
2470 .add(MI.getOperand(3))
2471 .addImm(MI.getOperand(4).getImm())
2472 .addImm(MI.getOperand(5).getImm()) // 'pred'
2473 .add(MI.getOperand(6))
2474 .add(condCodeOp()) // 's' bit
2475 .add(makeImplicit(MI.getOperand(1)));
2476
2477 MI.eraseFromParent();
2478 return true;
2479 }
2480 case ARM::t2MOVCCi16:
2481 case ARM::MOVCCi16: {
2482 unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
2483 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
2484 MI.getOperand(1).getReg())
2485 .addImm(MI.getOperand(2).getImm())
2486 .addImm(MI.getOperand(3).getImm()) // 'pred'
2487 .add(MI.getOperand(4))
2488 .add(makeImplicit(MI.getOperand(1)));
2489 MI.eraseFromParent();
2490 return true;
2491 }
2492 case ARM::t2MOVCCi:
2493 case ARM::MOVCCi: {
2494 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
2495 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
2496 MI.getOperand(1).getReg())
2497 .addImm(MI.getOperand(2).getImm())
2498 .addImm(MI.getOperand(3).getImm()) // 'pred'
2499 .add(MI.getOperand(4))
2500 .add(condCodeOp()) // 's' bit
2501 .add(makeImplicit(MI.getOperand(1)));
2502
2503 MI.eraseFromParent();
2504 return true;
2505 }
2506 case ARM::t2MVNCCi:
2507 case ARM::MVNCCi: {
2508 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
2509 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
2510 MI.getOperand(1).getReg())
2511 .addImm(MI.getOperand(2).getImm())
2512 .addImm(MI.getOperand(3).getImm()) // 'pred'
2513 .add(MI.getOperand(4))
2514 .add(condCodeOp()) // 's' bit
2515 .add(makeImplicit(MI.getOperand(1)));
2516
2517 MI.eraseFromParent();
2518 return true;
2519 }
2520 case ARM::t2MOVCClsl:
2521 case ARM::t2MOVCClsr:
2522 case ARM::t2MOVCCasr:
2523 case ARM::t2MOVCCror: {
2524 unsigned NewOpc;
2525 switch (Opcode) {
2526 case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break;
2527 case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break;
2528 case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break;
2529 case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break;
2530 default: llvm_unreachable("unexpeced conditional move");
2531 }
2532 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
2533 MI.getOperand(1).getReg())
2534 .add(MI.getOperand(2))
2535 .addImm(MI.getOperand(3).getImm())
2536 .addImm(MI.getOperand(4).getImm()) // 'pred'
2537 .add(MI.getOperand(5))
2538 .add(condCodeOp()) // 's' bit
2539 .add(makeImplicit(MI.getOperand(1)));
2540 MI.eraseFromParent();
2541 return true;
2542 }
2543 case ARM::Int_eh_sjlj_dispatchsetup: {
2544 MachineFunction &MF = *MI.getParent()->getParent();
2545 const ARMBaseInstrInfo *AII =
2546 static_cast<const ARMBaseInstrInfo*>(TII);
2547 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
2548 // For functions using a base pointer, we rematerialize it (via the frame
2549 // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
2550 // for us. Otherwise, expand to nothing.
2551 if (RI.hasBasePointer(MF)) {
2552 int32_t NumBytes = AFI->getFramePtrSpillOffset();
2553 Register FramePtr = RI.getFrameRegister(MF);
2554 assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
2555 "base pointer without frame pointer?");
2556
2557 if (AFI->isThumb2Function()) {
2558 emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
2559 FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
2560 } else if (AFI->isThumbFunction()) {
2561 emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
2562 FramePtr, -NumBytes, *TII, RI);
2563 } else {
2564 emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
2565 FramePtr, -NumBytes, ARMCC::AL, 0,
2566 *TII);
2567 }
2568 // If there's dynamic realignment, adjust for it.
2569 if (RI.hasStackRealignment(MF)) {
2570 MachineFrameInfo &MFI = MF.getFrameInfo();
2571 Align MaxAlign = MFI.getMaxAlign();
2572 assert (!AFI->isThumb1OnlyFunction());
2573 // Emit bic r6, r6, MaxAlign
2574 assert(MaxAlign <= Align(256) &&
2575 "The BIC instruction cannot encode "
2576 "immediates larger than 256 with all lower "
2577 "bits set.");
2578 unsigned bicOpc = AFI->isThumbFunction() ?
2579 ARM::t2BICri : ARM::BICri;
2580 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6)
2581 .addReg(ARM::R6, RegState::Kill)
2582 .addImm(MaxAlign.value() - 1)
2583 .add(predOps(ARMCC::AL))
2584 .add(condCodeOp());
2585 }
2586 }
2587 MI.eraseFromParent();
2588 return true;
2589 }
2590
2591 case ARM::LSRs1:
2592 case ARM::ASRs1: {
2593 // These are just fancy MOVs instructions.
2594 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
2595 MI.getOperand(0).getReg())
2596 .add(MI.getOperand(1))
2597 .addImm(ARM_AM::getSORegOpc(
2598 (Opcode == ARM::LSRs1 ? ARM_AM::lsr : ARM_AM::asr), 1))
2599 .add(predOps(ARMCC::AL))
2600 .addReg(ARM::CPSR, RegState::Define);
2601 MI.eraseFromParent();
2602 return true;
2603 }
2604 case ARM::RRX: {
2605 // This encodes as "MOVs Rd, Rm, rrx
2606 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
2607 MI.getOperand(0).getReg())
2608 .add(MI.getOperand(1))
2609 .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))
2610 .add(predOps(ARMCC::AL))
2611 .add(condCodeOp())
2612 .copyImplicitOps(MI);
2613 MI.eraseFromParent();
2614 return true;
2615 }
2616 case ARM::tTPsoft:
2617 case ARM::TPsoft: {
2618 const bool Thumb = Opcode == ARM::tTPsoft;
2619
2620 MachineInstrBuilder MIB;
2621 MachineFunction *MF = MBB.getParent();
2622 if (STI->genLongCalls()) {
2623 MachineConstantPool *MCP = MF->getConstantPool();
2624 unsigned PCLabelID = AFI->createPICLabelUId();
2625 MachineConstantPoolValue *CPV =
2626 ARMConstantPoolSymbol::Create(MF->getFunction().getContext(),
2627 "__aeabi_read_tp", PCLabelID, 0);
2628 Register Reg = MI.getOperand(0).getReg();
2629 MIB =
2630 BuildMI(MBB, MBBI, MI.getDebugLoc(),
2631 TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
2632 .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
2633 if (!Thumb)
2634 MIB.addImm(0);
2635 MIB.add(predOps(ARMCC::AL));
2636
2637 MIB =
2638 BuildMI(MBB, MBBI, MI.getDebugLoc(),
2639 TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF)));
2640 if (Thumb)
2641 MIB.add(predOps(ARMCC::AL));
2642 MIB.addReg(Reg, RegState::Kill);
2643 } else {
2644 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
2645 TII->get(Thumb ? ARM::tBL : ARM::BL));
2646 if (Thumb)
2647 MIB.add(predOps(ARMCC::AL));
2648 MIB.addExternalSymbol("__aeabi_read_tp", 0);
2649 }
2650
2651 MIB.cloneMemRefs(MI);
2652 MIB.copyImplicitOps(MI);
2653 // Update the call info.
2654 if (MI.isCandidateForAdditionalCallInfo())
2655 MF->moveAdditionalCallInfo(&MI, &*MIB);
2656 MI.eraseFromParent();
2657 return true;
2658 }
2659 case ARM::tLDRpci_pic:
2660 case ARM::t2LDRpci_pic: {
2661 unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
2662 ? ARM::tLDRpci : ARM::t2LDRpci;
2663 Register DstReg = MI.getOperand(0).getReg();
2664 bool DstIsDead = MI.getOperand(0).isDead();
2665 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
2666 .add(MI.getOperand(1))
2667 .add(predOps(ARMCC::AL))
2668 .cloneMemRefs(MI)
2669 .copyImplicitOps(MI);
2670 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
2671 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
2672 .addReg(DstReg)
2673 .add(MI.getOperand(2))
2674 .copyImplicitOps(MI);
2675 MI.eraseFromParent();
2676 return true;
2677 }
2678
2679 case ARM::LDRLIT_ga_abs:
2680 case ARM::LDRLIT_ga_pcrel:
2681 case ARM::LDRLIT_ga_pcrel_ldr:
2682 case ARM::tLDRLIT_ga_abs:
2683 case ARM::t2LDRLIT_ga_pcrel:
2684 case ARM::tLDRLIT_ga_pcrel: {
2685 Register DstReg = MI.getOperand(0).getReg();
2686 bool DstIsDead = MI.getOperand(0).isDead();
2687 const MachineOperand &MO1 = MI.getOperand(1);
2688 auto Flags = MO1.getTargetFlags();
2689 const GlobalValue *GV = MO1.getGlobal();
2690 bool IsARM = Opcode != ARM::tLDRLIT_ga_pcrel &&
2691 Opcode != ARM::tLDRLIT_ga_abs &&
2692 Opcode != ARM::t2LDRLIT_ga_pcrel;
2693 bool IsPIC =
2694 Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs;
2695 unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci;
2696 if (Opcode == ARM::t2LDRLIT_ga_pcrel)
2697 LDRLITOpc = ARM::t2LDRpci;
2698 unsigned PICAddOpc =
2699 IsARM
2700 ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
2701 : ARM::tPICADD;
2702
2703 // We need a new const-pool entry to load from.
2704 MachineConstantPool *MCP = MBB.getParent()->getConstantPool();
2705 unsigned ARMPCLabelIndex = 0;
2706 MachineConstantPoolValue *CPV;
2707
2708 if (IsPIC) {
2709 unsigned PCAdj = IsARM ? 8 : 4;
2710 auto Modifier = (Flags & ARMII::MO_GOT)
2711 ? ARMCP::GOT_PREL
2712 : ARMCP::no_modifier;
2713 ARMPCLabelIndex = AFI->createPICLabelUId();
2714 CPV = ARMConstantPoolConstant::Create(
2715 GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, Modifier,
2716 /*AddCurrentAddr*/ Modifier == ARMCP::GOT_PREL);
2717 } else
2718 CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier);
2719
2720 MachineInstrBuilder MIB =
2721 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg)
2722 .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
2723 if (IsARM)
2724 MIB.addImm(0);
2725 MIB.add(predOps(ARMCC::AL));
2726
2727 if (IsPIC) {
2728 MachineInstrBuilder MIB =
2729 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc))
2730 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
2731 .addReg(DstReg)
2732 .addImm(ARMPCLabelIndex);
2733
2734 if (IsARM)
2735 MIB.add(predOps(ARMCC::AL));
2736 }
2737
2738 MI.eraseFromParent();
2739 return true;
2740 }
2741 case ARM::MOV_ga_pcrel:
2742 case ARM::MOV_ga_pcrel_ldr:
2743 case ARM::t2MOV_ga_pcrel: {
2744 // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
2745 unsigned LabelId = AFI->createPICLabelUId();
2746 Register DstReg = MI.getOperand(0).getReg();
2747 bool DstIsDead = MI.getOperand(0).isDead();
2748 const MachineOperand &MO1 = MI.getOperand(1);
2749 const GlobalValue *GV = MO1.getGlobal();
2750 unsigned TF = MO1.getTargetFlags();
2751 bool isARM = Opcode != ARM::t2MOV_ga_pcrel;
2752 unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
2753 unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
2754 unsigned LO16TF = TF | ARMII::MO_LO16;
2755 unsigned HI16TF = TF | ARMII::MO_HI16;
2756 unsigned PICAddOpc = isARM
2757 ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
2758 : ARM::tPICADD;
2759 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg)
2760 .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
2761 .addImm(LabelId)
2762 .copyImplicitOps(MI);
2763
2764 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg)
2765 .addReg(DstReg)
2766 .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
2767 .addImm(LabelId)
2768 .copyImplicitOps(MI);
2769
2770 MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
2771 TII->get(PICAddOpc))
2772 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
2773 .addReg(DstReg).addImm(LabelId);
2774 if (isARM) {
2775 MIB3.add(predOps(ARMCC::AL));
2776 if (Opcode == ARM::MOV_ga_pcrel_ldr)
2777 MIB3.cloneMemRefs(MI);
2778 }
2779 MIB3.copyImplicitOps(MI);
2780 MI.eraseFromParent();
2781 return true;
2782 }
2783
2784 case ARM::MOVi32imm:
2785 case ARM::MOVCCi32imm:
2786 case ARM::t2MOVi32imm:
2787 case ARM::t2MOVCCi32imm:
2788 ExpandMOV32BitImm(MBB, MBBI);
2789 return true;
2790
2791 case ARM::tMOVi32imm:
2792 ExpandTMOV32BitImm(MBB, MBBI);
2793 return true;
2794
2795 case ARM::tLEApcrelJT:
2796 // Inline jump tables are handled in ARMAsmPrinter.
2797 if (MI.getMF()->getJumpTableInfo()->getEntryKind() ==
2798 MachineJumpTableInfo::EK_Inline)
2799 return false;
2800
2801 // Use a 32-bit immediate move to generate the address of the jump table.
2802 assert(STI->isThumb() && "Non-inline jump tables expected only in thumb");
2803 ExpandTMOV32BitImm(MBB, MBBI);
2804 return true;
2805
2806 case ARM::SUBS_PC_LR: {
2807 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
2808 .addReg(ARM::LR)
2809 .add(MI.getOperand(0))
2810 .add(MI.getOperand(1))
2811 .add(MI.getOperand(2))
2812 .addReg(ARM::CPSR, RegState::Undef)
2813 .copyImplicitOps(MI);
2814 MI.eraseFromParent();
2815 return true;
2816 }
2817 case ARM::VLDMQIA: {
2818 unsigned NewOpc = ARM::VLDMDIA;
2819 MachineInstrBuilder MIB =
2820 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
2821 unsigned OpIdx = 0;
2822
2823 // Grab the Q register destination.
2824 bool DstIsDead = MI.getOperand(OpIdx).isDead();
2825 Register DstReg = MI.getOperand(OpIdx++).getReg();
2826
2827 // Copy the source register.
2828 MIB.add(MI.getOperand(OpIdx++));
2829
2830 // Copy the predicate operands.
2831 MIB.add(MI.getOperand(OpIdx++));
2832 MIB.add(MI.getOperand(OpIdx++));
2833
2834 // Add the destination operands (D subregs).
2835 Register D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
2836 Register D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
2837 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
2838 .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
2839
2840 // Add an implicit def for the super-register.
2841 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
2842 MIB.copyImplicitOps(MI);
2843 MIB.cloneMemRefs(MI);
2844 MI.eraseFromParent();
2845 return true;
2846 }
2847
2848 case ARM::VSTMQIA: {
2849 unsigned NewOpc = ARM::VSTMDIA;
2850 MachineInstrBuilder MIB =
2851 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
2852 unsigned OpIdx = 0;
2853
2854 // Grab the Q register source.
2855 bool SrcIsKill = MI.getOperand(OpIdx).isKill();
2856 Register SrcReg = MI.getOperand(OpIdx++).getReg();
2857
2858 // Copy the destination register.
2859 MachineOperand Dst(MI.getOperand(OpIdx++));
2860 MIB.add(Dst);
2861
2862 // Copy the predicate operands.
2863 MIB.add(MI.getOperand(OpIdx++));
2864 MIB.add(MI.getOperand(OpIdx++));
2865
2866 // Add the source operands (D subregs).
2867 Register D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
2868 Register D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
2869 MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0)
2870 .addReg(D1, SrcIsKill ? RegState::Kill : 0);
2871
2872 if (SrcIsKill) // Add an implicit kill for the Q register.
2873 MIB->addRegisterKilled(SrcReg, TRI, true);
2874
2875 MIB.copyImplicitOps(MI);
2876 MIB.cloneMemRefs(MI);
2877 MI.eraseFromParent();
2878 return true;
2879 }
2880
2881 case ARM::VLD2q8Pseudo:
2882 case ARM::VLD2q16Pseudo:
2883 case ARM::VLD2q32Pseudo:
2884 case ARM::VLD2q8PseudoWB_fixed:
2885 case ARM::VLD2q16PseudoWB_fixed:
2886 case ARM::VLD2q32PseudoWB_fixed:
2887 case ARM::VLD2q8PseudoWB_register:
2888 case ARM::VLD2q16PseudoWB_register:
2889 case ARM::VLD2q32PseudoWB_register:
2890 case ARM::VLD3d8Pseudo:
2891 case ARM::VLD3d16Pseudo:
2892 case ARM::VLD3d32Pseudo:
2893 case ARM::VLD1d8TPseudo:
2894 case ARM::VLD1d8TPseudoWB_fixed:
2895 case ARM::VLD1d8TPseudoWB_register:
2896 case ARM::VLD1d16TPseudo:
2897 case ARM::VLD1d16TPseudoWB_fixed:
2898 case ARM::VLD1d16TPseudoWB_register:
2899 case ARM::VLD1d32TPseudo:
2900 case ARM::VLD1d32TPseudoWB_fixed:
2901 case ARM::VLD1d32TPseudoWB_register:
2902 case ARM::VLD1d64TPseudo:
2903 case ARM::VLD1d64TPseudoWB_fixed:
2904 case ARM::VLD1d64TPseudoWB_register:
2905 case ARM::VLD3d8Pseudo_UPD:
2906 case ARM::VLD3d16Pseudo_UPD:
2907 case ARM::VLD3d32Pseudo_UPD:
2908 case ARM::VLD3q8Pseudo_UPD:
2909 case ARM::VLD3q16Pseudo_UPD:
2910 case ARM::VLD3q32Pseudo_UPD:
2911 case ARM::VLD3q8oddPseudo:
2912 case ARM::VLD3q16oddPseudo:
2913 case ARM::VLD3q32oddPseudo:
2914 case ARM::VLD3q8oddPseudo_UPD:
2915 case ARM::VLD3q16oddPseudo_UPD:
2916 case ARM::VLD3q32oddPseudo_UPD:
2917 case ARM::VLD4d8Pseudo:
2918 case ARM::VLD4d16Pseudo:
2919 case ARM::VLD4d32Pseudo:
2920 case ARM::VLD1d8QPseudo:
2921 case ARM::VLD1d8QPseudoWB_fixed:
2922 case ARM::VLD1d8QPseudoWB_register:
2923 case ARM::VLD1d16QPseudo:
2924 case ARM::VLD1d16QPseudoWB_fixed:
2925 case ARM::VLD1d16QPseudoWB_register:
2926 case ARM::VLD1d32QPseudo:
2927 case ARM::VLD1d32QPseudoWB_fixed:
2928 case ARM::VLD1d32QPseudoWB_register:
2929 case ARM::VLD1d64QPseudo:
2930 case ARM::VLD1d64QPseudoWB_fixed:
2931 case ARM::VLD1d64QPseudoWB_register:
2932 case ARM::VLD1q8HighQPseudo:
2933 case ARM::VLD1q8HighQPseudo_UPD:
2934 case ARM::VLD1q8LowQPseudo_UPD:
2935 case ARM::VLD1q8HighTPseudo:
2936 case ARM::VLD1q8HighTPseudo_UPD:
2937 case ARM::VLD1q8LowTPseudo_UPD:
2938 case ARM::VLD1q16HighQPseudo:
2939 case ARM::VLD1q16HighQPseudo_UPD:
2940 case ARM::VLD1q16LowQPseudo_UPD:
2941 case ARM::VLD1q16HighTPseudo:
2942 case ARM::VLD1q16HighTPseudo_UPD:
2943 case ARM::VLD1q16LowTPseudo_UPD:
2944 case ARM::VLD1q32HighQPseudo:
2945 case ARM::VLD1q32HighQPseudo_UPD:
2946 case ARM::VLD1q32LowQPseudo_UPD:
2947 case ARM::VLD1q32HighTPseudo:
2948 case ARM::VLD1q32HighTPseudo_UPD:
2949 case ARM::VLD1q32LowTPseudo_UPD:
2950 case ARM::VLD1q64HighQPseudo:
2951 case ARM::VLD1q64HighQPseudo_UPD:
2952 case ARM::VLD1q64LowQPseudo_UPD:
2953 case ARM::VLD1q64HighTPseudo:
2954 case ARM::VLD1q64HighTPseudo_UPD:
2955 case ARM::VLD1q64LowTPseudo_UPD:
2956 case ARM::VLD4d8Pseudo_UPD:
2957 case ARM::VLD4d16Pseudo_UPD:
2958 case ARM::VLD4d32Pseudo_UPD:
2959 case ARM::VLD4q8Pseudo_UPD:
2960 case ARM::VLD4q16Pseudo_UPD:
2961 case ARM::VLD4q32Pseudo_UPD:
2962 case ARM::VLD4q8oddPseudo:
2963 case ARM::VLD4q16oddPseudo:
2964 case ARM::VLD4q32oddPseudo:
2965 case ARM::VLD4q8oddPseudo_UPD:
2966 case ARM::VLD4q16oddPseudo_UPD:
2967 case ARM::VLD4q32oddPseudo_UPD:
2968 case ARM::VLD3DUPd8Pseudo:
2969 case ARM::VLD3DUPd16Pseudo:
2970 case ARM::VLD3DUPd32Pseudo:
2971 case ARM::VLD3DUPd8Pseudo_UPD:
2972 case ARM::VLD3DUPd16Pseudo_UPD:
2973 case ARM::VLD3DUPd32Pseudo_UPD:
2974 case ARM::VLD4DUPd8Pseudo:
2975 case ARM::VLD4DUPd16Pseudo:
2976 case ARM::VLD4DUPd32Pseudo:
2977 case ARM::VLD4DUPd8Pseudo_UPD:
2978 case ARM::VLD4DUPd16Pseudo_UPD:
2979 case ARM::VLD4DUPd32Pseudo_UPD:
2980 case ARM::VLD2DUPq8EvenPseudo:
2981 case ARM::VLD2DUPq8OddPseudo:
2982 case ARM::VLD2DUPq16EvenPseudo:
2983 case ARM::VLD2DUPq16OddPseudo:
2984 case ARM::VLD2DUPq32EvenPseudo:
2985 case ARM::VLD2DUPq32OddPseudo:
2986 case ARM::VLD2DUPq8OddPseudoWB_fixed:
2987 case ARM::VLD2DUPq8OddPseudoWB_register:
2988 case ARM::VLD2DUPq16OddPseudoWB_fixed:
2989 case ARM::VLD2DUPq16OddPseudoWB_register:
2990 case ARM::VLD2DUPq32OddPseudoWB_fixed:
2991 case ARM::VLD2DUPq32OddPseudoWB_register:
2992 case ARM::VLD3DUPq8EvenPseudo:
2993 case ARM::VLD3DUPq8OddPseudo:
2994 case ARM::VLD3DUPq16EvenPseudo:
2995 case ARM::VLD3DUPq16OddPseudo:
2996 case ARM::VLD3DUPq32EvenPseudo:
2997 case ARM::VLD3DUPq32OddPseudo:
2998 case ARM::VLD3DUPq8OddPseudo_UPD:
2999 case ARM::VLD3DUPq16OddPseudo_UPD:
3000 case ARM::VLD3DUPq32OddPseudo_UPD:
3001 case ARM::VLD4DUPq8EvenPseudo:
3002 case ARM::VLD4DUPq8OddPseudo:
3003 case ARM::VLD4DUPq16EvenPseudo:
3004 case ARM::VLD4DUPq16OddPseudo:
3005 case ARM::VLD4DUPq32EvenPseudo:
3006 case ARM::VLD4DUPq32OddPseudo:
3007 case ARM::VLD4DUPq8OddPseudo_UPD:
3008 case ARM::VLD4DUPq16OddPseudo_UPD:
3009 case ARM::VLD4DUPq32OddPseudo_UPD:
3010 ExpandVLD(MBBI);
3011 return true;
3012
3013 case ARM::VST2q8Pseudo:
3014 case ARM::VST2q16Pseudo:
3015 case ARM::VST2q32Pseudo:
3016 case ARM::VST2q8PseudoWB_fixed:
3017 case ARM::VST2q16PseudoWB_fixed:
3018 case ARM::VST2q32PseudoWB_fixed:
3019 case ARM::VST2q8PseudoWB_register:
3020 case ARM::VST2q16PseudoWB_register:
3021 case ARM::VST2q32PseudoWB_register:
3022 case ARM::VST3d8Pseudo:
3023 case ARM::VST3d16Pseudo:
3024 case ARM::VST3d32Pseudo:
3025 case ARM::VST1d8TPseudo:
3026 case ARM::VST1d8TPseudoWB_fixed:
3027 case ARM::VST1d8TPseudoWB_register:
3028 case ARM::VST1d16TPseudo:
3029 case ARM::VST1d16TPseudoWB_fixed:
3030 case ARM::VST1d16TPseudoWB_register:
3031 case ARM::VST1d32TPseudo:
3032 case ARM::VST1d32TPseudoWB_fixed:
3033 case ARM::VST1d32TPseudoWB_register:
3034 case ARM::VST1d64TPseudo:
3035 case ARM::VST1d64TPseudoWB_fixed:
3036 case ARM::VST1d64TPseudoWB_register:
3037 case ARM::VST3d8Pseudo_UPD:
3038 case ARM::VST3d16Pseudo_UPD:
3039 case ARM::VST3d32Pseudo_UPD:
3040 case ARM::VST3q8Pseudo_UPD:
3041 case ARM::VST3q16Pseudo_UPD:
3042 case ARM::VST3q32Pseudo_UPD:
3043 case ARM::VST3q8oddPseudo:
3044 case ARM::VST3q16oddPseudo:
3045 case ARM::VST3q32oddPseudo:
3046 case ARM::VST3q8oddPseudo_UPD:
3047 case ARM::VST3q16oddPseudo_UPD:
3048 case ARM::VST3q32oddPseudo_UPD:
3049 case ARM::VST4d8Pseudo:
3050 case ARM::VST4d16Pseudo:
3051 case ARM::VST4d32Pseudo:
3052 case ARM::VST1d8QPseudo:
3053 case ARM::VST1d8QPseudoWB_fixed:
3054 case ARM::VST1d8QPseudoWB_register:
3055 case ARM::VST1d16QPseudo:
3056 case ARM::VST1d16QPseudoWB_fixed:
3057 case ARM::VST1d16QPseudoWB_register:
3058 case ARM::VST1d32QPseudo:
3059 case ARM::VST1d32QPseudoWB_fixed:
3060 case ARM::VST1d32QPseudoWB_register:
3061 case ARM::VST1d64QPseudo:
3062 case ARM::VST1d64QPseudoWB_fixed:
3063 case ARM::VST1d64QPseudoWB_register:
3064 case ARM::VST4d8Pseudo_UPD:
3065 case ARM::VST4d16Pseudo_UPD:
3066 case ARM::VST4d32Pseudo_UPD:
3067 case ARM::VST1q8HighQPseudo:
3068 case ARM::VST1q8LowQPseudo_UPD:
3069 case ARM::VST1q8HighTPseudo:
3070 case ARM::VST1q8LowTPseudo_UPD:
3071 case ARM::VST1q16HighQPseudo:
3072 case ARM::VST1q16LowQPseudo_UPD:
3073 case ARM::VST1q16HighTPseudo:
3074 case ARM::VST1q16LowTPseudo_UPD:
3075 case ARM::VST1q32HighQPseudo:
3076 case ARM::VST1q32LowQPseudo_UPD:
3077 case ARM::VST1q32HighTPseudo:
3078 case ARM::VST1q32LowTPseudo_UPD:
3079 case ARM::VST1q64HighQPseudo:
3080 case ARM::VST1q64LowQPseudo_UPD:
3081 case ARM::VST1q64HighTPseudo:
3082 case ARM::VST1q64LowTPseudo_UPD:
3083 case ARM::VST1q8HighTPseudo_UPD:
3084 case ARM::VST1q16HighTPseudo_UPD:
3085 case ARM::VST1q32HighTPseudo_UPD:
3086 case ARM::VST1q64HighTPseudo_UPD:
3087 case ARM::VST1q8HighQPseudo_UPD:
3088 case ARM::VST1q16HighQPseudo_UPD:
3089 case ARM::VST1q32HighQPseudo_UPD:
3090 case ARM::VST1q64HighQPseudo_UPD:
3091 case ARM::VST4q8Pseudo_UPD:
3092 case ARM::VST4q16Pseudo_UPD:
3093 case ARM::VST4q32Pseudo_UPD:
3094 case ARM::VST4q8oddPseudo:
3095 case ARM::VST4q16oddPseudo:
3096 case ARM::VST4q32oddPseudo:
3097 case ARM::VST4q8oddPseudo_UPD:
3098 case ARM::VST4q16oddPseudo_UPD:
3099 case ARM::VST4q32oddPseudo_UPD:
3100 ExpandVST(MBBI);
3101 return true;
3102
3103 case ARM::VLD1LNq8Pseudo:
3104 case ARM::VLD1LNq16Pseudo:
3105 case ARM::VLD1LNq32Pseudo:
3106 case ARM::VLD1LNq8Pseudo_UPD:
3107 case ARM::VLD1LNq16Pseudo_UPD:
3108 case ARM::VLD1LNq32Pseudo_UPD:
3109 case ARM::VLD2LNd8Pseudo:
3110 case ARM::VLD2LNd16Pseudo:
3111 case ARM::VLD2LNd32Pseudo:
3112 case ARM::VLD2LNq16Pseudo:
3113 case ARM::VLD2LNq32Pseudo:
3114 case ARM::VLD2LNd8Pseudo_UPD:
3115 case ARM::VLD2LNd16Pseudo_UPD:
3116 case ARM::VLD2LNd32Pseudo_UPD:
3117 case ARM::VLD2LNq16Pseudo_UPD:
3118 case ARM::VLD2LNq32Pseudo_UPD:
3119 case ARM::VLD3LNd8Pseudo:
3120 case ARM::VLD3LNd16Pseudo:
3121 case ARM::VLD3LNd32Pseudo:
3122 case ARM::VLD3LNq16Pseudo:
3123 case ARM::VLD3LNq32Pseudo:
3124 case ARM::VLD3LNd8Pseudo_UPD:
3125 case ARM::VLD3LNd16Pseudo_UPD:
3126 case ARM::VLD3LNd32Pseudo_UPD:
3127 case ARM::VLD3LNq16Pseudo_UPD:
3128 case ARM::VLD3LNq32Pseudo_UPD:
3129 case ARM::VLD4LNd8Pseudo:
3130 case ARM::VLD4LNd16Pseudo:
3131 case ARM::VLD4LNd32Pseudo:
3132 case ARM::VLD4LNq16Pseudo:
3133 case ARM::VLD4LNq32Pseudo:
3134 case ARM::VLD4LNd8Pseudo_UPD:
3135 case ARM::VLD4LNd16Pseudo_UPD:
3136 case ARM::VLD4LNd32Pseudo_UPD:
3137 case ARM::VLD4LNq16Pseudo_UPD:
3138 case ARM::VLD4LNq32Pseudo_UPD:
3139 case ARM::VST1LNq8Pseudo:
3140 case ARM::VST1LNq16Pseudo:
3141 case ARM::VST1LNq32Pseudo:
3142 case ARM::VST1LNq8Pseudo_UPD:
3143 case ARM::VST1LNq16Pseudo_UPD:
3144 case ARM::VST1LNq32Pseudo_UPD:
3145 case ARM::VST2LNd8Pseudo:
3146 case ARM::VST2LNd16Pseudo:
3147 case ARM::VST2LNd32Pseudo:
3148 case ARM::VST2LNq16Pseudo:
3149 case ARM::VST2LNq32Pseudo:
3150 case ARM::VST2LNd8Pseudo_UPD:
3151 case ARM::VST2LNd16Pseudo_UPD:
3152 case ARM::VST2LNd32Pseudo_UPD:
3153 case ARM::VST2LNq16Pseudo_UPD:
3154 case ARM::VST2LNq32Pseudo_UPD:
3155 case ARM::VST3LNd8Pseudo:
3156 case ARM::VST3LNd16Pseudo:
3157 case ARM::VST3LNd32Pseudo:
3158 case ARM::VST3LNq16Pseudo:
3159 case ARM::VST3LNq32Pseudo:
3160 case ARM::VST3LNd8Pseudo_UPD:
3161 case ARM::VST3LNd16Pseudo_UPD:
3162 case ARM::VST3LNd32Pseudo_UPD:
3163 case ARM::VST3LNq16Pseudo_UPD:
3164 case ARM::VST3LNq32Pseudo_UPD:
3165 case ARM::VST4LNd8Pseudo:
3166 case ARM::VST4LNd16Pseudo:
3167 case ARM::VST4LNd32Pseudo:
3168 case ARM::VST4LNq16Pseudo:
3169 case ARM::VST4LNq32Pseudo:
3170 case ARM::VST4LNd8Pseudo_UPD:
3171 case ARM::VST4LNd16Pseudo_UPD:
3172 case ARM::VST4LNd32Pseudo_UPD:
3173 case ARM::VST4LNq16Pseudo_UPD:
3174 case ARM::VST4LNq32Pseudo_UPD:
3175 ExpandLaneOp(MBBI);
3176 return true;
3177
3178 case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
3179 case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
3180 case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
3181 case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
3182
3183 case ARM::MQQPRLoad:
3184 case ARM::MQQPRStore:
3185 case ARM::MQQQQPRLoad:
3186 case ARM::MQQQQPRStore:
3187 ExpandMQQPRLoadStore(MBBI);
3188 return true;
3189
3190 case ARM::tCMP_SWAP_8:
3191 assert(STI->isThumb());
3192 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB,
3193 NextMBBI);
3194 case ARM::tCMP_SWAP_16:
3195 assert(STI->isThumb());
3196 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH,
3197 NextMBBI);
3198 case ARM::tCMP_SWAP_32:
3199 assert(STI->isThumb());
3200 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, NextMBBI);
3201
3202 case ARM::CMP_SWAP_8:
3203 assert(!STI->isThumb());
3204 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, ARM::UXTB,
3205 NextMBBI);
3206 case ARM::CMP_SWAP_16:
3207 assert(!STI->isThumb());
3208 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH,
3209 NextMBBI);
3210 case ARM::CMP_SWAP_32:
3211 assert(!STI->isThumb());
3212 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI);
3213
3214 case ARM::CMP_SWAP_64:
3215 return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI);
3216
3217 case ARM::tBL_PUSHLR:
3218 case ARM::BL_PUSHLR: {
3219 const bool Thumb = Opcode == ARM::tBL_PUSHLR;
3220 Register Reg = MI.getOperand(0).getReg();
3221 assert(Reg == ARM::LR && "expect LR register!");
3222 MachineInstrBuilder MIB;
3223 if (Thumb) {
3224 // push {lr}
3225 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH))
3226 .add(predOps(ARMCC::AL))
3227 .addReg(Reg);
3228
3229 // bl __gnu_mcount_nc
3230 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL));
3231 } else {
3232 // stmdb sp!, {lr}
3233 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD))
3234 .addReg(ARM::SP, RegState::Define)
3235 .addReg(ARM::SP)
3236 .add(predOps(ARMCC::AL))
3237 .addReg(Reg);
3238
3239 // bl __gnu_mcount_nc
3240 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL));
3241 }
3242 MIB.cloneMemRefs(MI);
3243 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
3244 MIB.add(MO);
3245 MI.eraseFromParent();
3246 return true;
3247 }
3248 case ARM::t2CALL_BTI: {
3249 MachineFunction &MF = *MI.getMF();
3250 MachineInstrBuilder MIB =
3251 BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::tBL));
3252 MIB.cloneMemRefs(MI);
3253 for (unsigned i = 0; i < MI.getNumOperands(); ++i)
3254 MIB.add(MI.getOperand(i));
3255 if (MI.isCandidateForAdditionalCallInfo())
3256 MF.moveAdditionalCallInfo(&MI, MIB.getInstr());
3257 MIBundleBuilder Bundler(MBB, MI);
3258 Bundler.append(MIB);
3259 Bundler.append(BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::t2BTI)));
3260 finalizeBundle(MBB, Bundler.begin(), Bundler.end());
3261 MI.eraseFromParent();
3262 return true;
3263 }
3264 case ARM::LOADDUAL:
3265 case ARM::STOREDUAL: {
3266 Register PairReg = MI.getOperand(0).getReg();
3267
3268 MachineInstrBuilder MIB =
3269 BuildMI(MBB, MBBI, MI.getDebugLoc(),
3270 TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD))
3271 .addReg(TRI->getSubReg(PairReg, ARM::gsub_0),
3272 Opcode == ARM::LOADDUAL ? RegState::Define : 0)
3273 .addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
3274 Opcode == ARM::LOADDUAL ? RegState::Define : 0);
3275 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
3276 MIB.add(MO);
3277 MIB.add(predOps(ARMCC::AL));
3278 MIB.cloneMemRefs(MI);
3279 MI.eraseFromParent();
3280 return true;
3281 }
3282 }
3283 }
3284
ExpandMBB(MachineBasicBlock & MBB)3285 bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
3286 bool Modified = false;
3287
3288 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
3289 while (MBBI != E) {
3290 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
3291 Modified |= ExpandMI(MBB, MBBI, NMBBI);
3292 MBBI = NMBBI;
3293 }
3294
3295 return Modified;
3296 }
3297
runOnMachineFunction(MachineFunction & MF)3298 bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
3299 STI = &MF.getSubtarget<ARMSubtarget>();
3300 TII = STI->getInstrInfo();
3301 TRI = STI->getRegisterInfo();
3302 AFI = MF.getInfo<ARMFunctionInfo>();
3303
3304 LLVM_DEBUG(dbgs() << "********** ARM EXPAND PSEUDO INSTRUCTIONS **********\n"
3305 << "********** Function: " << MF.getName() << '\n');
3306
3307 bool Modified = false;
3308 for (MachineBasicBlock &MBB : MF)
3309 Modified |= ExpandMBB(MBB);
3310 if (VerifyARMPseudo)
3311 MF.verify(this, "After expanding ARM pseudo instructions.");
3312
3313 LLVM_DEBUG(dbgs() << "***************************************************\n");
3314 return Modified;
3315 }
3316
3317 /// createARMExpandPseudoPass - returns an instance of the pseudo instruction
3318 /// expansion pass.
createARMExpandPseudoPass()3319 FunctionPass *llvm::createARMExpandPseudoPass() {
3320 return new ARMExpandPseudo();
3321 }
3322