1//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the itinerary class data for the ARM Cortex A9 processors. 10// 11//===----------------------------------------------------------------------===// 12 13// ===---------------------------------------------------------------------===// 14// This section contains legacy support for itineraries. This is 15// required until SD and PostRA schedulers are replaced by MachineScheduler. 16 17// 18// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical 19// Reference Manual". 20// 21// Functional units 22def A9_Issue0 : FuncUnit; // Issue 0 23def A9_Issue1 : FuncUnit; // Issue 1 24def A9_Branch : FuncUnit; // Branch 25def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0 26def A9_ALU1 : FuncUnit; // ALU pipeline 1 27def A9_AGU : FuncUnit; // Address generation unit for ld / st 28def A9_NPipe : FuncUnit; // NEON pipeline 29def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer 30def A9_LSUnit : FuncUnit; // L/S Unit 31def A9_DRegsVFP: FuncUnit; // FP register set, VFP side 32def A9_DRegsN : FuncUnit; // FP register set, NEON side 33 34// Bypasses 35def A9_LdBypass : Bypass; 36 37def CortexA9Itineraries : ProcessorItineraries< 38 [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0, 39 A9_LSUnit, A9_DRegsVFP, A9_DRegsN], 40 [A9_LdBypass], [ 41 // Two fully-pipelined integer ALU pipelines 42 43 // 44 // Move instructions, unconditional 45 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 46 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, 47 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 48 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 49 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 50 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 51 InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 52 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, 53 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 54 InstrStage<1, [A9_ALU0, A9_ALU1]>, 55 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, 56 InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 57 InstrStage<1, [A9_ALU0, A9_ALU1]>, 58 InstrStage<1, [A9_ALU0, A9_ALU1]>, 59 InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>, 60 InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 61 InstrStage<1, [A9_ALU0, A9_ALU1]>, 62 InstrStage<1, [A9_ALU0, A9_ALU1]>, 63 InstrStage<1, [A9_MUX0], 0>, 64 InstrStage<1, [A9_AGU], 0>, 65 InstrStage<1, [A9_LSUnit]>], [5]>, 66 // 67 // MVN instructions 68 InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 69 InstrStage<1, [A9_ALU0, A9_ALU1]>], 70 [1]>, 71 InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 72 InstrStage<1, [A9_ALU0, A9_ALU1]>], 73 [1, 1], [NoBypass, A9_LdBypass]>, 74 InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 75 InstrStage<2, [A9_ALU0, A9_ALU1]>], 76 [2, 1]>, 77 InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 78 InstrStage<3, [A9_ALU0, A9_ALU1]>], 79 [3, 1, 1]>, 80 // 81 // No operand cycles 82 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 83 InstrStage<1, [A9_ALU0, A9_ALU1]>]>, 84 // 85 // Binary Instructions that produce a result 86 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 87 InstrStage<1, [A9_ALU0, A9_ALU1]>], 88 [1, 1], [NoBypass, A9_LdBypass]>, 89 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 90 InstrStage<1, [A9_ALU0, A9_ALU1]>], 91 [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>, 92 InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 93 InstrStage<2, [A9_ALU0, A9_ALU1]>], 94 [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>, 95 InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 96 InstrStage<2, [A9_ALU0, A9_ALU1]>], 97 [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>, 98 InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 99 InstrStage<3, [A9_ALU0, A9_ALU1]>], 100 [3, 1, 1, 1], 101 [NoBypass, A9_LdBypass, NoBypass, NoBypass]>, 102 // 103 // Bitwise Instructions that produce a result 104 InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 105 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 106 InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 107 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>, 108 InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 109 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, 110 InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 111 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>, 112 // 113 // Unary Instructions that produce a result 114 115 // CLZ, RBIT, etc. 116 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 117 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 118 119 // BFC, BFI, UBFX, SBFX 120 InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 121 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>, 122 123 // 124 // Zero and sign extension instructions 125 InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 126 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>, 127 InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 128 InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>, 129 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 130 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>, 131 // 132 // Compare instructions 133 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 134 InstrStage<1, [A9_ALU0, A9_ALU1]>], 135 [1], [A9_LdBypass]>, 136 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 137 InstrStage<1, [A9_ALU0, A9_ALU1]>], 138 [1, 1], [A9_LdBypass, A9_LdBypass]>, 139 InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 140 InstrStage<2, [A9_ALU0, A9_ALU1]>], 141 [1, 1], [A9_LdBypass, NoBypass]>, 142 InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 143 InstrStage<3, [A9_ALU0, A9_ALU1]>], 144 [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>, 145 // 146 // Test instructions 147 InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 148 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, 149 InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 150 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 151 InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 152 InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>, 153 InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 154 InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>, 155 // 156 // Move instructions, conditional 157 // FIXME: Correctly model the extra input dep on the destination. 158 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 159 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, 160 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 161 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 162 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 163 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 164 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 165 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, 166 InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 167 InstrStage<1, [A9_ALU0, A9_ALU1]>, 168 InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 169 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, 170 171 // Integer multiply pipeline 172 // 173 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 174 InstrStage<2, [A9_ALU0]>], [3, 1, 1]>, 175 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 176 InstrStage<2, [A9_ALU0]>], 177 [3, 1, 1, 1]>, 178 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 179 InstrStage<2, [A9_ALU0]>], [4, 1, 1]>, 180 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 181 InstrStage<2, [A9_ALU0]>], 182 [4, 1, 1, 1]>, 183 InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 184 InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>, 185 InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 186 InstrStage<3, [A9_ALU0]>], 187 [4, 5, 1, 1]>, 188 // Integer load pipeline 189 // FIXME: The timings are some rough approximations 190 // 191 // Immediate offset 192 InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 193 InstrStage<1, [A9_MUX0], 0>, 194 InstrStage<1, [A9_AGU], 0>, 195 InstrStage<1, [A9_LSUnit]>], 196 [3, 1], [A9_LdBypass]>, 197 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 198 InstrStage<1, [A9_MUX0], 0>, 199 InstrStage<2, [A9_AGU], 0>, 200 InstrStage<1, [A9_LSUnit]>], 201 [4, 1], [A9_LdBypass]>, 202 // FIXME: If address is 64-bit aligned, AGU cycles is 1. 203 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 204 InstrStage<1, [A9_MUX0], 0>, 205 InstrStage<2, [A9_AGU], 0>, 206 InstrStage<1, [A9_LSUnit]>], 207 [3, 3, 1], [A9_LdBypass]>, 208 // 209 // Register offset 210 InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 211 InstrStage<1, [A9_MUX0], 0>, 212 InstrStage<1, [A9_AGU], 0>, 213 InstrStage<1, [A9_LSUnit]>], 214 [3, 1, 1], [A9_LdBypass]>, 215 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 216 InstrStage<1, [A9_MUX0], 0>, 217 InstrStage<2, [A9_AGU], 0>, 218 InstrStage<1, [A9_LSUnit]>], 219 [4, 1, 1], [A9_LdBypass]>, 220 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 221 InstrStage<1, [A9_MUX0], 0>, 222 InstrStage<2, [A9_AGU], 0>, 223 InstrStage<1, [A9_LSUnit]>], 224 [3, 3, 1, 1], [A9_LdBypass]>, 225 // 226 // Scaled register offset 227 InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 228 InstrStage<1, [A9_MUX0], 0>, 229 InstrStage<1, [A9_AGU], 0>, 230 InstrStage<1, [A9_LSUnit], 0>], 231 [4, 1, 1], [A9_LdBypass]>, 232 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 233 InstrStage<1, [A9_MUX0], 0>, 234 InstrStage<2, [A9_AGU], 0>, 235 InstrStage<1, [A9_LSUnit]>], 236 [5, 1, 1], [A9_LdBypass]>, 237 // 238 // Immediate offset with update 239 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 240 InstrStage<1, [A9_MUX0], 0>, 241 InstrStage<1, [A9_AGU], 0>, 242 InstrStage<1, [A9_LSUnit]>], 243 [3, 2, 1], [A9_LdBypass]>, 244 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 245 InstrStage<1, [A9_MUX0], 0>, 246 InstrStage<2, [A9_AGU], 0>, 247 InstrStage<1, [A9_LSUnit]>], 248 [4, 3, 1], [A9_LdBypass]>, 249 // 250 // Register offset with update 251 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 252 InstrStage<1, [A9_MUX0], 0>, 253 InstrStage<1, [A9_AGU], 0>, 254 InstrStage<1, [A9_LSUnit]>], 255 [3, 2, 1, 1], [A9_LdBypass]>, 256 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 257 InstrStage<1, [A9_MUX0], 0>, 258 InstrStage<2, [A9_AGU], 0>, 259 InstrStage<1, [A9_LSUnit]>], 260 [4, 3, 1, 1], [A9_LdBypass]>, 261 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 262 InstrStage<1, [A9_MUX0], 0>, 263 InstrStage<2, [A9_AGU], 0>, 264 InstrStage<1, [A9_LSUnit]>], 265 [3, 3, 1, 1], [A9_LdBypass]>, 266 // 267 // Scaled register offset with update 268 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 269 InstrStage<1, [A9_MUX0], 0>, 270 InstrStage<1, [A9_AGU], 0>, 271 InstrStage<1, [A9_LSUnit]>], 272 [4, 3, 1, 1], [A9_LdBypass]>, 273 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 274 InstrStage<1, [A9_MUX0], 0>, 275 InstrStage<2, [A9_AGU], 0>, 276 InstrStage<1, [A9_LSUnit]>], 277 [5, 4, 1, 1], [A9_LdBypass]>, 278 // 279 // Load multiple, def is the 5th operand. 280 // FIXME: This assumes 3 to 4 registers. 281 InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 282 InstrStage<1, [A9_MUX0], 0>, 283 InstrStage<2, [A9_AGU], 1>, 284 InstrStage<2, [A9_LSUnit]>], 285 [1, 1, 1, 1, 3], 286 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], 287 -1>, // dynamic uops 288 // 289 // Load multiple + update, defs are the 1st and 5th operands. 290 InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 291 InstrStage<1, [A9_MUX0], 0>, 292 InstrStage<2, [A9_AGU], 1>, 293 InstrStage<2, [A9_LSUnit]>], 294 [2, 1, 1, 1, 3], 295 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], 296 -1>, // dynamic uops 297 // 298 // Load multiple plus branch 299 InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 300 InstrStage<1, [A9_MUX0], 0>, 301 InstrStage<1, [A9_AGU], 1>, 302 InstrStage<2, [A9_LSUnit]>, 303 InstrStage<1, [A9_Branch]>], 304 [1, 2, 1, 1, 3], 305 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], 306 -1>, // dynamic uops 307 // 308 // Pop, def is the 3rd operand. 309 InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 310 InstrStage<1, [A9_MUX0], 0>, 311 InstrStage<2, [A9_AGU], 1>, 312 InstrStage<2, [A9_LSUnit]>], 313 [1, 1, 3], 314 [NoBypass, NoBypass, A9_LdBypass], 315 -1>, // dynamic uops 316 // 317 // Pop + branch, def is the 3rd operand. 318 InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 319 InstrStage<1, [A9_MUX0], 0>, 320 InstrStage<2, [A9_AGU], 1>, 321 InstrStage<2, [A9_LSUnit]>, 322 InstrStage<1, [A9_Branch]>], 323 [1, 1, 3], 324 [NoBypass, NoBypass, A9_LdBypass], 325 -1>, // dynamic uops 326 // 327 // iLoadi + iALUr for t2LDRpci_pic. 328 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 329 InstrStage<1, [A9_MUX0], 0>, 330 InstrStage<1, [A9_AGU], 0>, 331 InstrStage<1, [A9_LSUnit]>, 332 InstrStage<1, [A9_ALU0, A9_ALU1]>], 333 [2, 1]>, 334 335 // Integer store pipeline 336 /// 337 // Immediate offset 338 InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 339 InstrStage<1, [A9_MUX0], 0>, 340 InstrStage<1, [A9_AGU], 0>, 341 InstrStage<1, [A9_LSUnit]>], [1, 1]>, 342 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 343 InstrStage<1, [A9_MUX0], 0>, 344 InstrStage<2, [A9_AGU], 1>, 345 InstrStage<1, [A9_LSUnit]>], [1, 1]>, 346 // FIXME: If address is 64-bit aligned, AGU cycles is 1. 347 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 348 InstrStage<1, [A9_MUX0], 0>, 349 InstrStage<2, [A9_AGU], 1>, 350 InstrStage<1, [A9_LSUnit]>], [1, 1]>, 351 // 352 // Register offset 353 InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 354 InstrStage<1, [A9_MUX0], 0>, 355 InstrStage<1, [A9_AGU], 0>, 356 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 357 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 358 InstrStage<1, [A9_MUX0], 0>, 359 InstrStage<2, [A9_AGU], 1>, 360 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 361 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 362 InstrStage<1, [A9_MUX0], 0>, 363 InstrStage<2, [A9_AGU], 1>, 364 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 365 // 366 // Scaled register offset 367 InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 368 InstrStage<1, [A9_MUX0], 0>, 369 InstrStage<1, [A9_AGU], 0>, 370 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 371 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 372 InstrStage<1, [A9_MUX0], 0>, 373 InstrStage<2, [A9_AGU], 1>, 374 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 375 // 376 // Immediate offset with update 377 InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 378 InstrStage<1, [A9_MUX0], 0>, 379 InstrStage<1, [A9_AGU], 0>, 380 InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>, 381 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 382 InstrStage<1, [A9_MUX0], 0>, 383 InstrStage<2, [A9_AGU], 1>, 384 InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>, 385 // 386 // Register offset with update 387 InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 388 InstrStage<1, [A9_MUX0], 0>, 389 InstrStage<1, [A9_AGU], 0>, 390 InstrStage<1, [A9_LSUnit]>], 391 [2, 1, 1, 1]>, 392 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 393 InstrStage<1, [A9_MUX0], 0>, 394 InstrStage<2, [A9_AGU], 1>, 395 InstrStage<1, [A9_LSUnit]>], 396 [3, 1, 1, 1]>, 397 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 398 InstrStage<1, [A9_MUX0], 0>, 399 InstrStage<2, [A9_AGU], 1>, 400 InstrStage<1, [A9_LSUnit]>], 401 [3, 1, 1, 1]>, 402 // 403 // Scaled register offset with update 404 InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 405 InstrStage<1, [A9_MUX0], 0>, 406 InstrStage<1, [A9_AGU], 0>, 407 InstrStage<1, [A9_LSUnit]>], 408 [2, 1, 1, 1]>, 409 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 410 InstrStage<1, [A9_MUX0], 0>, 411 InstrStage<2, [A9_AGU], 1>, 412 InstrStage<1, [A9_LSUnit]>], 413 [3, 1, 1, 1]>, 414 // 415 // Store multiple 416 InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 417 InstrStage<1, [A9_MUX0], 0>, 418 InstrStage<1, [A9_AGU], 0>, 419 InstrStage<2, [A9_LSUnit]>], 420 [], [], -1>, // dynamic uops 421 // 422 // Store multiple + update 423 InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 424 InstrStage<1, [A9_MUX0], 0>, 425 InstrStage<1, [A9_AGU], 0>, 426 InstrStage<2, [A9_LSUnit]>], 427 [2], [], -1>, // dynamic uops 428 // 429 // Preload 430 InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>, 431 432 // Branch 433 // 434 // no delay slots, so the latency of a branch is unimportant 435 InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>, 436 InstrStage<1, [A9_Issue1], 0>, 437 InstrStage<1, [A9_Branch]>]>, 438 439 // VFP and NEON shares the same register file. This means that every VFP 440 // instruction should wait for full completion of the consecutive NEON 441 // instruction and vice-versa. We model this behavior with two artificial FUs: 442 // DRegsVFP and DRegsVFP. 443 // 444 // Every VFP instruction: 445 // - Acquires DRegsVFP resource for 1 cycle 446 // - Reserves DRegsN resource for the whole duration (including time to 447 // register file writeback!). 448 // Every NEON instruction does the same but with FUs swapped. 449 // 450 // Since the reserved FU cannot be acquired, this models precisely 451 // "cross-domain" stalls. 452 453 // VFP 454 // Issue through integer pipeline, and execute in NEON unit. 455 456 // FP Special Register to Integer Register File Move 457 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 458 InstrStage<1, [A9_MUX0], 0>, 459 InstrStage<1, [A9_DRegsVFP], 0, Required>, 460 InstrStage<2, [A9_DRegsN], 0, Reserved>, 461 InstrStage<1, [A9_NPipe]>], 462 [1]>, 463 // 464 // Single-precision FP Unary 465 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 466 InstrStage<1, [A9_MUX0], 0>, 467 InstrStage<1, [A9_DRegsVFP], 0, Required>, 468 // Extra latency cycles since wbck is 2 cycles 469 InstrStage<3, [A9_DRegsN], 0, Reserved>, 470 InstrStage<1, [A9_NPipe]>], 471 [1, 1]>, 472 // 473 // Double-precision FP Unary 474 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 475 InstrStage<1, [A9_MUX0], 0>, 476 InstrStage<1, [A9_DRegsVFP], 0, Required>, 477 // Extra latency cycles since wbck is 2 cycles 478 InstrStage<3, [A9_DRegsN], 0, Reserved>, 479 InstrStage<1, [A9_NPipe]>], 480 [1, 1]>, 481 482 // 483 // Single-precision FP Compare 484 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 485 InstrStage<1, [A9_MUX0], 0>, 486 InstrStage<1, [A9_DRegsVFP], 0, Required>, 487 // Extra latency cycles since wbck is 4 cycles 488 InstrStage<5, [A9_DRegsN], 0, Reserved>, 489 InstrStage<1, [A9_NPipe]>], 490 [1, 1]>, 491 // 492 // Double-precision FP Compare 493 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 494 InstrStage<1, [A9_MUX0], 0>, 495 InstrStage<1, [A9_DRegsVFP], 0, Required>, 496 // Extra latency cycles since wbck is 4 cycles 497 InstrStage<5, [A9_DRegsN], 0, Reserved>, 498 InstrStage<1, [A9_NPipe]>], 499 [1, 1]>, 500 // 501 // Single to Double FP Convert 502 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 503 InstrStage<1, [A9_MUX0], 0>, 504 InstrStage<1, [A9_DRegsVFP], 0, Required>, 505 InstrStage<5, [A9_DRegsN], 0, Reserved>, 506 InstrStage<1, [A9_NPipe]>], 507 [4, 1]>, 508 // 509 // Double to Single FP Convert 510 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 511 InstrStage<1, [A9_MUX0], 0>, 512 InstrStage<1, [A9_DRegsVFP], 0, Required>, 513 InstrStage<5, [A9_DRegsN], 0, Reserved>, 514 InstrStage<1, [A9_NPipe]>], 515 [4, 1]>, 516 517 // 518 // Single to Half FP Convert 519 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 520 InstrStage<1, [A9_MUX0], 0>, 521 InstrStage<1, [A9_DRegsVFP], 0, Required>, 522 InstrStage<5, [A9_DRegsN], 0, Reserved>, 523 InstrStage<1, [A9_NPipe]>], 524 [4, 1]>, 525 // 526 // Half to Single FP Convert 527 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 528 InstrStage<1, [A9_MUX0], 0>, 529 InstrStage<1, [A9_DRegsVFP], 0, Required>, 530 InstrStage<3, [A9_DRegsN], 0, Reserved>, 531 InstrStage<1, [A9_NPipe]>], 532 [2, 1]>, 533 534 // 535 // Single-Precision FP to Integer Convert 536 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 537 InstrStage<1, [A9_MUX0], 0>, 538 InstrStage<1, [A9_DRegsVFP], 0, Required>, 539 InstrStage<5, [A9_DRegsN], 0, Reserved>, 540 InstrStage<1, [A9_NPipe]>], 541 [4, 1]>, 542 // 543 // Double-Precision FP to Integer Convert 544 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 545 InstrStage<1, [A9_MUX0], 0>, 546 InstrStage<1, [A9_DRegsVFP], 0, Required>, 547 InstrStage<5, [A9_DRegsN], 0, Reserved>, 548 InstrStage<1, [A9_NPipe]>], 549 [4, 1]>, 550 // 551 // Integer to Single-Precision FP Convert 552 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 553 InstrStage<1, [A9_MUX0], 0>, 554 InstrStage<1, [A9_DRegsVFP], 0, Required>, 555 InstrStage<5, [A9_DRegsN], 0, Reserved>, 556 InstrStage<1, [A9_NPipe]>], 557 [4, 1]>, 558 // 559 // Integer to Double-Precision FP Convert 560 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 561 InstrStage<1, [A9_MUX0], 0>, 562 InstrStage<1, [A9_DRegsVFP], 0, Required>, 563 InstrStage<5, [A9_DRegsN], 0, Reserved>, 564 InstrStage<1, [A9_NPipe]>], 565 [4, 1]>, 566 // 567 // Single-precision FP ALU 568 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 569 InstrStage<1, [A9_MUX0], 0>, 570 InstrStage<1, [A9_DRegsVFP], 0, Required>, 571 InstrStage<5, [A9_DRegsN], 0, Reserved>, 572 InstrStage<1, [A9_NPipe]>], 573 [4, 1, 1]>, 574 // 575 // Double-precision FP ALU 576 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 577 InstrStage<1, [A9_MUX0], 0>, 578 InstrStage<1, [A9_DRegsVFP], 0, Required>, 579 InstrStage<5, [A9_DRegsN], 0, Reserved>, 580 InstrStage<1, [A9_NPipe]>], 581 [4, 1, 1]>, 582 // 583 // Single-precision FP Multiply 584 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 585 InstrStage<1, [A9_MUX0], 0>, 586 InstrStage<1, [A9_DRegsVFP], 0, Required>, 587 InstrStage<6, [A9_DRegsN], 0, Reserved>, 588 InstrStage<1, [A9_NPipe]>], 589 [5, 1, 1]>, 590 // 591 // Double-precision FP Multiply 592 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 593 InstrStage<1, [A9_MUX0], 0>, 594 InstrStage<1, [A9_DRegsVFP], 0, Required>, 595 InstrStage<7, [A9_DRegsN], 0, Reserved>, 596 InstrStage<2, [A9_NPipe]>], 597 [6, 1, 1]>, 598 // 599 // Single-precision FP MAC 600 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 601 InstrStage<1, [A9_MUX0], 0>, 602 InstrStage<1, [A9_DRegsVFP], 0, Required>, 603 InstrStage<9, [A9_DRegsN], 0, Reserved>, 604 InstrStage<1, [A9_NPipe]>], 605 [8, 1, 1, 1]>, 606 // 607 // Double-precision FP MAC 608 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 609 InstrStage<1, [A9_MUX0], 0>, 610 InstrStage<1, [A9_DRegsVFP], 0, Required>, 611 InstrStage<10, [A9_DRegsN], 0, Reserved>, 612 InstrStage<2, [A9_NPipe]>], 613 [9, 1, 1, 1]>, 614 // 615 // Single-precision Fused FP MAC 616 InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 617 InstrStage<1, [A9_MUX0], 0>, 618 InstrStage<1, [A9_DRegsVFP], 0, Required>, 619 InstrStage<9, [A9_DRegsN], 0, Reserved>, 620 InstrStage<1, [A9_NPipe]>], 621 [8, 1, 1, 1]>, 622 // 623 // Double-precision Fused FP MAC 624 InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 625 InstrStage<1, [A9_MUX0], 0>, 626 InstrStage<1, [A9_DRegsVFP], 0, Required>, 627 InstrStage<10, [A9_DRegsN], 0, Reserved>, 628 InstrStage<2, [A9_NPipe]>], 629 [9, 1, 1, 1]>, 630 // 631 // Single-precision FP DIV 632 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 633 InstrStage<1, [A9_MUX0], 0>, 634 InstrStage<1, [A9_DRegsVFP], 0, Required>, 635 InstrStage<16, [A9_DRegsN], 0, Reserved>, 636 InstrStage<10, [A9_NPipe]>], 637 [15, 1, 1]>, 638 // 639 // Double-precision FP DIV 640 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 641 InstrStage<1, [A9_MUX0], 0>, 642 InstrStage<1, [A9_DRegsVFP], 0, Required>, 643 InstrStage<26, [A9_DRegsN], 0, Reserved>, 644 InstrStage<20, [A9_NPipe]>], 645 [25, 1, 1]>, 646 // 647 // Single-precision FP SQRT 648 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 649 InstrStage<1, [A9_MUX0], 0>, 650 InstrStage<1, [A9_DRegsVFP], 0, Required>, 651 InstrStage<18, [A9_DRegsN], 0, Reserved>, 652 InstrStage<13, [A9_NPipe]>], 653 [17, 1]>, 654 // 655 // Double-precision FP SQRT 656 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 657 InstrStage<1, [A9_MUX0], 0>, 658 InstrStage<1, [A9_DRegsVFP], 0, Required>, 659 InstrStage<33, [A9_DRegsN], 0, Reserved>, 660 InstrStage<28, [A9_NPipe]>], 661 [32, 1]>, 662 663 // 664 // Integer to Single-precision Move 665 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 666 InstrStage<1, [A9_MUX0], 0>, 667 InstrStage<1, [A9_DRegsVFP], 0, Required>, 668 // Extra 1 latency cycle since wbck is 2 cycles 669 InstrStage<3, [A9_DRegsN], 0, Reserved>, 670 InstrStage<1, [A9_NPipe]>], 671 [1, 1]>, 672 // 673 // Integer to Double-precision Move 674 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 675 InstrStage<1, [A9_MUX0], 0>, 676 InstrStage<1, [A9_DRegsVFP], 0, Required>, 677 // Extra 1 latency cycle since wbck is 2 cycles 678 InstrStage<3, [A9_DRegsN], 0, Reserved>, 679 InstrStage<1, [A9_NPipe]>], 680 [1, 1, 1]>, 681 // 682 // Single-precision to Integer Move 683 // 684 // On A9 move-from-VFP is free to issue with no stall if other VFP 685 // operations are in flight. I assume it still can't dual-issue though. 686 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 687 InstrStage<1, [A9_MUX0], 0>], 688 [2, 1]>, 689 // 690 // Double-precision to Integer Move 691 // 692 // On A9 move-from-VFP is free to issue with no stall if other VFP 693 // operations are in flight. I assume it still can't dual-issue though. 694 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 695 InstrStage<1, [A9_MUX0], 0>], 696 [2, 1, 1]>, 697 // 698 // Single-precision FP Load 699 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 700 InstrStage<1, [A9_MUX0], 0>, 701 InstrStage<1, [A9_DRegsVFP], 0, Required>, 702 InstrStage<2, [A9_DRegsN], 0, Reserved>, 703 InstrStage<1, [A9_NPipe], 0>, 704 InstrStage<1, [A9_LSUnit]>], 705 [1, 1]>, 706 // 707 // Double-precision FP Load 708 // FIXME: Result latency is 1 if address is 64-bit aligned. 709 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 710 InstrStage<1, [A9_MUX0], 0>, 711 InstrStage<1, [A9_DRegsVFP], 0, Required>, 712 InstrStage<2, [A9_DRegsN], 0, Reserved>, 713 InstrStage<1, [A9_NPipe], 0>, 714 InstrStage<1, [A9_LSUnit]>], 715 [2, 1]>, 716 // 717 // FP Load Multiple 718 // FIXME: assumes 2 doubles which requires 2 LS cycles. 719 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 720 InstrStage<1, [A9_MUX0], 0>, 721 InstrStage<1, [A9_DRegsVFP], 0, Required>, 722 InstrStage<2, [A9_DRegsN], 0, Reserved>, 723 InstrStage<1, [A9_NPipe], 0>, 724 InstrStage<2, [A9_LSUnit]>], 725 [1, 1, 1, 1], [], -1>, // dynamic uops 726 // 727 // FP Load Multiple + update 728 // FIXME: assumes 2 doubles which requires 2 LS cycles. 729 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 730 InstrStage<1, [A9_MUX0], 0>, 731 InstrStage<1, [A9_DRegsVFP], 0, Required>, 732 InstrStage<2, [A9_DRegsN], 0, Reserved>, 733 InstrStage<1, [A9_NPipe], 0>, 734 InstrStage<2, [A9_LSUnit]>], 735 [2, 1, 1, 1], [], -1>, // dynamic uops 736 // 737 // Single-precision FP Store 738 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 739 InstrStage<1, [A9_MUX0], 0>, 740 InstrStage<1, [A9_DRegsVFP], 0, Required>, 741 InstrStage<2, [A9_DRegsN], 0, Reserved>, 742 InstrStage<1, [A9_NPipe], 0>, 743 InstrStage<1, [A9_LSUnit]>], 744 [1, 1]>, 745 // 746 // Double-precision FP Store 747 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 748 InstrStage<1, [A9_MUX0], 0>, 749 InstrStage<1, [A9_DRegsVFP], 0, Required>, 750 InstrStage<2, [A9_DRegsN], 0, Reserved>, 751 InstrStage<1, [A9_NPipe], 0>, 752 InstrStage<1, [A9_LSUnit]>], 753 [1, 1]>, 754 // 755 // FP Store Multiple 756 // FIXME: assumes 2 doubles which requires 2 LS cycles. 757 InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 758 InstrStage<1, [A9_MUX0], 0>, 759 InstrStage<1, [A9_DRegsVFP], 0, Required>, 760 InstrStage<2, [A9_DRegsN], 0, Reserved>, 761 InstrStage<1, [A9_NPipe], 0>, 762 InstrStage<2, [A9_LSUnit]>], 763 [1, 1, 1, 1], [], -1>, // dynamic uops 764 // 765 // FP Store Multiple + update 766 // FIXME: assumes 2 doubles which requires 2 LS cycles. 767 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 768 InstrStage<1, [A9_MUX0], 0>, 769 InstrStage<1, [A9_DRegsVFP], 0, Required>, 770 InstrStage<2, [A9_DRegsN], 0, Reserved>, 771 InstrStage<1, [A9_NPipe], 0>, 772 InstrStage<2, [A9_LSUnit]>], 773 [2, 1, 1, 1], [], -1>, // dynamic uops 774 // NEON 775 // VLD1 776 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 777 InstrStage<1, [A9_MUX0], 0>, 778 InstrStage<1, [A9_DRegsN], 0, Required>, 779 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 780 InstrStage<1, [A9_NPipe], 0>, 781 InstrStage<1, [A9_LSUnit]>], 782 [1, 1]>, 783 // VLD1x2 784 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 785 InstrStage<1, [A9_MUX0], 0>, 786 InstrStage<1, [A9_DRegsN], 0, Required>, 787 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 788 InstrStage<1, [A9_NPipe], 0>, 789 InstrStage<1, [A9_LSUnit]>], 790 [1, 1, 1]>, 791 // VLD1x3 792 InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 793 InstrStage<1, [A9_MUX0], 0>, 794 InstrStage<1, [A9_DRegsN], 0, Required>, 795 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 796 InstrStage<2, [A9_NPipe], 0>, 797 InstrStage<2, [A9_LSUnit]>], 798 [1, 1, 2, 1]>, 799 // VLD1x4 800 InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 801 InstrStage<1, [A9_MUX0], 0>, 802 InstrStage<1, [A9_DRegsN], 0, Required>, 803 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 804 InstrStage<2, [A9_NPipe], 0>, 805 InstrStage<2, [A9_LSUnit]>], 806 [1, 1, 2, 2, 1]>, 807 // VLD1u 808 InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 809 InstrStage<1, [A9_MUX0], 0>, 810 InstrStage<1, [A9_DRegsN], 0, Required>, 811 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 812 InstrStage<1, [A9_NPipe], 0>, 813 InstrStage<1, [A9_LSUnit]>], 814 [1, 2, 1]>, 815 // VLD1x2u 816 InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 817 InstrStage<1, [A9_MUX0], 0>, 818 InstrStage<1, [A9_DRegsN], 0, Required>, 819 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 820 InstrStage<1, [A9_NPipe], 0>, 821 InstrStage<1, [A9_LSUnit]>], 822 [1, 1, 2, 1]>, 823 // VLD1x3u 824 InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 825 InstrStage<1, [A9_MUX0], 0>, 826 InstrStage<1, [A9_DRegsN], 0, Required>, 827 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 828 InstrStage<2, [A9_NPipe], 0>, 829 InstrStage<2, [A9_LSUnit]>], 830 [1, 1, 2, 2, 1]>, 831 // VLD1x4u 832 InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 833 InstrStage<1, [A9_MUX0], 0>, 834 InstrStage<1, [A9_DRegsN], 0, Required>, 835 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 836 InstrStage<2, [A9_NPipe], 0>, 837 InstrStage<2, [A9_LSUnit]>], 838 [1, 1, 2, 2, 2, 1]>, 839 // 840 // VLD1ln 841 InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 842 InstrStage<1, [A9_MUX0], 0>, 843 InstrStage<1, [A9_DRegsN], 0, Required>, 844 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 845 InstrStage<2, [A9_NPipe], 0>, 846 InstrStage<2, [A9_LSUnit]>], 847 [3, 1, 1, 1]>, 848 // 849 // VLD1lnu 850 InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 851 InstrStage<1, [A9_MUX0], 0>, 852 InstrStage<1, [A9_DRegsN], 0, Required>, 853 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 854 InstrStage<2, [A9_NPipe], 0>, 855 InstrStage<2, [A9_LSUnit]>], 856 [3, 2, 1, 1, 1, 1]>, 857 // 858 // VLD1dup 859 InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 860 InstrStage<1, [A9_MUX0], 0>, 861 InstrStage<1, [A9_DRegsN], 0, Required>, 862 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 863 InstrStage<1, [A9_NPipe], 0>, 864 InstrStage<1, [A9_LSUnit]>], 865 [2, 1]>, 866 // 867 // VLD1dupu 868 InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 869 InstrStage<1, [A9_MUX0], 0>, 870 InstrStage<1, [A9_DRegsN], 0, Required>, 871 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 872 InstrStage<1, [A9_NPipe], 0>, 873 InstrStage<1, [A9_LSUnit]>], 874 [2, 2, 1, 1]>, 875 // 876 // VLD2 877 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 878 InstrStage<1, [A9_MUX0], 0>, 879 InstrStage<1, [A9_DRegsN], 0, Required>, 880 // Extra latency cycles since wbck is 7 cycles 881 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 882 InstrStage<1, [A9_NPipe], 0>, 883 InstrStage<1, [A9_LSUnit]>], 884 [2, 2, 1]>, 885 // 886 // VLD2x2 887 InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 888 InstrStage<1, [A9_MUX0], 0>, 889 InstrStage<1, [A9_DRegsN], 0, Required>, 890 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 891 InstrStage<2, [A9_NPipe], 0>, 892 InstrStage<2, [A9_LSUnit]>], 893 [2, 3, 2, 3, 1]>, 894 // 895 // VLD2ln 896 InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 897 InstrStage<1, [A9_MUX0], 0>, 898 InstrStage<1, [A9_DRegsN], 0, Required>, 899 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 900 InstrStage<2, [A9_NPipe], 0>, 901 InstrStage<2, [A9_LSUnit]>], 902 [3, 3, 1, 1, 1, 1]>, 903 // 904 // VLD2u 905 InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 906 InstrStage<1, [A9_MUX0], 0>, 907 InstrStage<1, [A9_DRegsN], 0, Required>, 908 // Extra latency cycles since wbck is 7 cycles 909 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 910 InstrStage<1, [A9_NPipe], 0>, 911 InstrStage<1, [A9_LSUnit]>], 912 [2, 2, 2, 1, 1, 1]>, 913 // 914 // VLD2x2u 915 InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 916 InstrStage<1, [A9_MUX0], 0>, 917 InstrStage<1, [A9_DRegsN], 0, Required>, 918 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 919 InstrStage<2, [A9_NPipe], 0>, 920 InstrStage<2, [A9_LSUnit]>], 921 [2, 3, 2, 3, 2, 1]>, 922 // 923 // VLD2lnu 924 InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 925 InstrStage<1, [A9_MUX0], 0>, 926 InstrStage<1, [A9_DRegsN], 0, Required>, 927 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 928 InstrStage<2, [A9_NPipe], 0>, 929 InstrStage<2, [A9_LSUnit]>], 930 [3, 3, 2, 1, 1, 1, 1, 1]>, 931 // 932 // VLD2dup 933 InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 934 InstrStage<1, [A9_MUX0], 0>, 935 InstrStage<1, [A9_DRegsN], 0, Required>, 936 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 937 InstrStage<1, [A9_NPipe], 0>, 938 InstrStage<1, [A9_LSUnit]>], 939 [2, 2, 1]>, 940 // 941 // VLD2dupu 942 InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 943 InstrStage<1, [A9_MUX0], 0>, 944 InstrStage<1, [A9_DRegsN], 0, Required>, 945 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 946 InstrStage<1, [A9_NPipe], 0>, 947 InstrStage<1, [A9_LSUnit]>], 948 [2, 2, 2, 1, 1]>, 949 // 950 // VLD3 951 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 952 InstrStage<1, [A9_MUX0], 0>, 953 InstrStage<1, [A9_DRegsN], 0, Required>, 954 InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 955 InstrStage<3, [A9_NPipe], 0>, 956 InstrStage<3, [A9_LSUnit]>], 957 [3, 3, 4, 1]>, 958 // 959 // VLD3ln 960 InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 961 InstrStage<1, [A9_MUX0], 0>, 962 InstrStage<1, [A9_DRegsN], 0, Required>, 963 InstrStage<11,[A9_DRegsVFP], 0, Reserved>, 964 InstrStage<5, [A9_NPipe], 0>, 965 InstrStage<5, [A9_LSUnit]>], 966 [5, 5, 6, 1, 1, 1, 1, 2]>, 967 // 968 // VLD3u 969 InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 970 InstrStage<1, [A9_MUX0], 0>, 971 InstrStage<1, [A9_DRegsN], 0, Required>, 972 InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 973 InstrStage<3, [A9_NPipe], 0>, 974 InstrStage<3, [A9_LSUnit]>], 975 [3, 3, 4, 2, 1]>, 976 // 977 // VLD3lnu 978 InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 979 InstrStage<1, [A9_MUX0], 0>, 980 InstrStage<1, [A9_DRegsN], 0, Required>, 981 InstrStage<11,[A9_DRegsVFP], 0, Reserved>, 982 InstrStage<5, [A9_NPipe], 0>, 983 InstrStage<5, [A9_LSUnit]>], 984 [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>, 985 // 986 // VLD3dup 987 InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 988 InstrStage<1, [A9_MUX0], 0>, 989 InstrStage<1, [A9_DRegsN], 0, Required>, 990 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 991 InstrStage<3, [A9_NPipe], 0>, 992 InstrStage<3, [A9_LSUnit]>], 993 [3, 3, 4, 1]>, 994 // 995 // VLD3dupu 996 InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 997 InstrStage<1, [A9_MUX0], 0>, 998 InstrStage<1, [A9_DRegsN], 0, Required>, 999 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1000 InstrStage<3, [A9_NPipe], 0>, 1001 InstrStage<3, [A9_LSUnit]>], 1002 [3, 3, 4, 2, 1, 1]>, 1003 // 1004 // VLD4 1005 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1006 InstrStage<1, [A9_MUX0], 0>, 1007 InstrStage<1, [A9_DRegsN], 0, Required>, 1008 InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 1009 InstrStage<3, [A9_NPipe], 0>, 1010 InstrStage<3, [A9_LSUnit]>], 1011 [3, 3, 4, 4, 1]>, 1012 // 1013 // VLD4ln 1014 InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1015 InstrStage<1, [A9_MUX0], 0>, 1016 InstrStage<1, [A9_DRegsN], 0, Required>, 1017 InstrStage<10,[A9_DRegsVFP], 0, Reserved>, 1018 InstrStage<4, [A9_NPipe], 0>, 1019 InstrStage<4, [A9_LSUnit]>], 1020 [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, 1021 // 1022 // VLD4u 1023 InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1024 InstrStage<1, [A9_MUX0], 0>, 1025 InstrStage<1, [A9_DRegsN], 0, Required>, 1026 InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 1027 InstrStage<3, [A9_NPipe], 0>, 1028 InstrStage<3, [A9_LSUnit]>], 1029 [3, 3, 4, 4, 2, 1]>, 1030 // 1031 // VLD4lnu 1032 InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1033 InstrStage<1, [A9_MUX0], 0>, 1034 InstrStage<1, [A9_DRegsN], 0, Required>, 1035 InstrStage<10,[A9_DRegsVFP], 0, Reserved>, 1036 InstrStage<4, [A9_NPipe], 0>, 1037 InstrStage<4, [A9_LSUnit]>], 1038 [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, 1039 // 1040 // VLD4dup 1041 InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1042 InstrStage<1, [A9_MUX0], 0>, 1043 InstrStage<1, [A9_DRegsN], 0, Required>, 1044 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1045 InstrStage<2, [A9_NPipe], 0>, 1046 InstrStage<2, [A9_LSUnit]>], 1047 [2, 2, 3, 3, 1]>, 1048 // 1049 // VLD4dupu 1050 InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1051 InstrStage<1, [A9_MUX0], 0>, 1052 InstrStage<1, [A9_DRegsN], 0, Required>, 1053 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1054 InstrStage<2, [A9_NPipe], 0>, 1055 InstrStage<2, [A9_LSUnit]>], 1056 [2, 2, 3, 3, 2, 1, 1]>, 1057 // 1058 // VST1 1059 InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1060 InstrStage<1, [A9_MUX0], 0>, 1061 InstrStage<1, [A9_DRegsN], 0, Required>, 1062 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1063 InstrStage<1, [A9_NPipe], 0>, 1064 InstrStage<1, [A9_LSUnit]>], 1065 [1, 1, 1]>, 1066 // 1067 // VST1x2 1068 InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1069 InstrStage<1, [A9_MUX0], 0>, 1070 InstrStage<1, [A9_DRegsN], 0, Required>, 1071 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1072 InstrStage<1, [A9_NPipe], 0>, 1073 InstrStage<1, [A9_LSUnit]>], 1074 [1, 1, 1, 1]>, 1075 // 1076 // VST1x3 1077 InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1078 InstrStage<1, [A9_MUX0], 0>, 1079 InstrStage<1, [A9_DRegsN], 0, Required>, 1080 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1081 InstrStage<2, [A9_NPipe], 0>, 1082 InstrStage<2, [A9_LSUnit]>], 1083 [1, 1, 1, 1, 2]>, 1084 // 1085 // VST1x4 1086 InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1087 InstrStage<1, [A9_MUX0], 0>, 1088 InstrStage<1, [A9_DRegsN], 0, Required>, 1089 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1090 InstrStage<2, [A9_NPipe], 0>, 1091 InstrStage<2, [A9_LSUnit]>], 1092 [1, 1, 1, 1, 2, 2]>, 1093 // 1094 // VST1u 1095 InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1096 InstrStage<1, [A9_MUX0], 0>, 1097 InstrStage<1, [A9_DRegsN], 0, Required>, 1098 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1099 InstrStage<1, [A9_NPipe], 0>, 1100 InstrStage<1, [A9_LSUnit]>], 1101 [2, 1, 1, 1, 1]>, 1102 // 1103 // VST1x2u 1104 InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1105 InstrStage<1, [A9_MUX0], 0>, 1106 InstrStage<1, [A9_DRegsN], 0, Required>, 1107 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1108 InstrStage<1, [A9_NPipe], 0>, 1109 InstrStage<1, [A9_LSUnit]>], 1110 [2, 1, 1, 1, 1, 1]>, 1111 // 1112 // VST1x3u 1113 InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1114 InstrStage<1, [A9_MUX0], 0>, 1115 InstrStage<1, [A9_DRegsN], 0, Required>, 1116 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1117 InstrStage<2, [A9_NPipe], 0>, 1118 InstrStage<2, [A9_LSUnit]>], 1119 [2, 1, 1, 1, 1, 1, 2]>, 1120 // 1121 // VST1x4u 1122 InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1123 InstrStage<1, [A9_MUX0], 0>, 1124 InstrStage<1, [A9_DRegsN], 0, Required>, 1125 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1126 InstrStage<2, [A9_NPipe], 0>, 1127 InstrStage<2, [A9_LSUnit]>], 1128 [2, 1, 1, 1, 1, 1, 2, 2]>, 1129 // 1130 // VST1ln 1131 InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1132 InstrStage<1, [A9_MUX0], 0>, 1133 InstrStage<1, [A9_DRegsN], 0, Required>, 1134 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1135 InstrStage<1, [A9_NPipe], 0>, 1136 InstrStage<1, [A9_LSUnit]>], 1137 [1, 1, 1]>, 1138 // 1139 // VST1lnu 1140 InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1141 InstrStage<1, [A9_MUX0], 0>, 1142 InstrStage<1, [A9_DRegsN], 0, Required>, 1143 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1144 InstrStage<1, [A9_NPipe], 0>, 1145 InstrStage<1, [A9_LSUnit]>], 1146 [2, 1, 1, 1, 1]>, 1147 // 1148 // VST2 1149 InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1150 InstrStage<1, [A9_MUX0], 0>, 1151 InstrStage<1, [A9_DRegsN], 0, Required>, 1152 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1153 InstrStage<1, [A9_NPipe], 0>, 1154 InstrStage<1, [A9_LSUnit]>], 1155 [1, 1, 1, 1]>, 1156 // 1157 // VST2x2 1158 InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1159 InstrStage<1, [A9_MUX0], 0>, 1160 InstrStage<1, [A9_DRegsN], 0, Required>, 1161 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1162 InstrStage<3, [A9_NPipe], 0>, 1163 InstrStage<3, [A9_LSUnit]>], 1164 [1, 1, 1, 1, 2, 2]>, 1165 // 1166 // VST2u 1167 InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1168 InstrStage<1, [A9_MUX0], 0>, 1169 InstrStage<1, [A9_DRegsN], 0, Required>, 1170 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1171 InstrStage<1, [A9_NPipe], 0>, 1172 InstrStage<1, [A9_LSUnit]>], 1173 [2, 1, 1, 1, 1, 1]>, 1174 // 1175 // VST2x2u 1176 InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1177 InstrStage<1, [A9_MUX0], 0>, 1178 InstrStage<1, [A9_DRegsN], 0, Required>, 1179 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1180 InstrStage<3, [A9_NPipe], 0>, 1181 InstrStage<3, [A9_LSUnit]>], 1182 [2, 1, 1, 1, 1, 1, 2, 2]>, 1183 // 1184 // VST2ln 1185 InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1186 InstrStage<1, [A9_MUX0], 0>, 1187 InstrStage<1, [A9_DRegsN], 0, Required>, 1188 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1189 InstrStage<1, [A9_NPipe], 0>, 1190 InstrStage<1, [A9_LSUnit]>], 1191 [1, 1, 1, 1]>, 1192 // 1193 // VST2lnu 1194 InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1195 InstrStage<1, [A9_MUX0], 0>, 1196 InstrStage<1, [A9_DRegsN], 0, Required>, 1197 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1198 InstrStage<1, [A9_NPipe], 0>, 1199 InstrStage<1, [A9_LSUnit]>], 1200 [2, 1, 1, 1, 1, 1]>, 1201 // 1202 // VST3 1203 InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1204 InstrStage<1, [A9_MUX0], 0>, 1205 InstrStage<1, [A9_DRegsN], 0, Required>, 1206 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1207 InstrStage<2, [A9_NPipe], 0>, 1208 InstrStage<2, [A9_LSUnit]>], 1209 [1, 1, 1, 1, 2]>, 1210 // 1211 // VST3u 1212 InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1213 InstrStage<1, [A9_MUX0], 0>, 1214 InstrStage<1, [A9_DRegsN], 0, Required>, 1215 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1216 InstrStage<2, [A9_NPipe], 0>, 1217 InstrStage<2, [A9_LSUnit]>], 1218 [2, 1, 1, 1, 1, 1, 2]>, 1219 // 1220 // VST3ln 1221 InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1222 InstrStage<1, [A9_MUX0], 0>, 1223 InstrStage<1, [A9_DRegsN], 0, Required>, 1224 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1225 InstrStage<3, [A9_NPipe], 0>, 1226 InstrStage<3, [A9_LSUnit]>], 1227 [1, 1, 1, 1, 2]>, 1228 // 1229 // VST3lnu 1230 InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1231 InstrStage<1, [A9_MUX0], 0>, 1232 InstrStage<1, [A9_DRegsN], 0, Required>, 1233 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1234 InstrStage<3, [A9_NPipe], 0>, 1235 InstrStage<3, [A9_LSUnit]>], 1236 [2, 1, 1, 1, 1, 1, 2]>, 1237 // 1238 // VST4 1239 InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1240 InstrStage<1, [A9_MUX0], 0>, 1241 InstrStage<1, [A9_DRegsN], 0, Required>, 1242 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1243 InstrStage<2, [A9_NPipe], 0>, 1244 InstrStage<2, [A9_LSUnit]>], 1245 [1, 1, 1, 1, 2, 2]>, 1246 // 1247 // VST4u 1248 InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1249 InstrStage<1, [A9_MUX0], 0>, 1250 InstrStage<1, [A9_DRegsN], 0, Required>, 1251 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1252 InstrStage<2, [A9_NPipe], 0>, 1253 InstrStage<2, [A9_LSUnit]>], 1254 [2, 1, 1, 1, 1, 1, 2, 2]>, 1255 // 1256 // VST4ln 1257 InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1258 InstrStage<1, [A9_MUX0], 0>, 1259 InstrStage<1, [A9_DRegsN], 0, Required>, 1260 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1261 InstrStage<2, [A9_NPipe], 0>, 1262 InstrStage<2, [A9_LSUnit]>], 1263 [1, 1, 1, 1, 2, 2]>, 1264 // 1265 // VST4lnu 1266 InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1267 InstrStage<1, [A9_MUX0], 0>, 1268 InstrStage<1, [A9_DRegsN], 0, Required>, 1269 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1270 InstrStage<2, [A9_NPipe], 0>, 1271 InstrStage<2, [A9_LSUnit]>], 1272 [2, 1, 1, 1, 1, 1, 2, 2]>, 1273 1274 // 1275 // Double-register Integer Unary 1276 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1277 InstrStage<1, [A9_MUX0], 0>, 1278 InstrStage<1, [A9_DRegsN], 0, Required>, 1279 // Extra latency cycles since wbck is 6 cycles 1280 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1281 InstrStage<1, [A9_NPipe]>], 1282 [4, 2]>, 1283 // 1284 // Quad-register Integer Unary 1285 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1286 InstrStage<1, [A9_MUX0], 0>, 1287 InstrStage<1, [A9_DRegsN], 0, Required>, 1288 // Extra latency cycles since wbck is 6 cycles 1289 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1290 InstrStage<1, [A9_NPipe]>], 1291 [4, 2]>, 1292 // 1293 // Double-register Integer Q-Unary 1294 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1295 InstrStage<1, [A9_MUX0], 0>, 1296 InstrStage<1, [A9_DRegsN], 0, Required>, 1297 // Extra latency cycles since wbck is 6 cycles 1298 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1299 InstrStage<1, [A9_NPipe]>], 1300 [4, 1]>, 1301 // 1302 // Quad-register Integer CountQ-Unary 1303 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1304 InstrStage<1, [A9_MUX0], 0>, 1305 InstrStage<1, [A9_DRegsN], 0, Required>, 1306 // Extra latency cycles since wbck is 6 cycles 1307 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1308 InstrStage<1, [A9_NPipe]>], 1309 [4, 1]>, 1310 // 1311 // Double-register Integer Binary 1312 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1313 InstrStage<1, [A9_MUX0], 0>, 1314 InstrStage<1, [A9_DRegsN], 0, Required>, 1315 // Extra latency cycles since wbck is 6 cycles 1316 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1317 InstrStage<1, [A9_NPipe]>], 1318 [3, 2, 2]>, 1319 // 1320 // Quad-register Integer Binary 1321 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1322 InstrStage<1, [A9_MUX0], 0>, 1323 InstrStage<1, [A9_DRegsN], 0, Required>, 1324 // Extra latency cycles since wbck is 6 cycles 1325 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1326 InstrStage<1, [A9_NPipe]>], 1327 [3, 2, 2]>, 1328 // 1329 // Double-register Integer Subtract 1330 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1331 InstrStage<1, [A9_MUX0], 0>, 1332 InstrStage<1, [A9_DRegsN], 0, Required>, 1333 // Extra latency cycles since wbck is 6 cycles 1334 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1335 InstrStage<1, [A9_NPipe]>], 1336 [3, 2, 1]>, 1337 // 1338 // Quad-register Integer Subtract 1339 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1340 InstrStage<1, [A9_MUX0], 0>, 1341 InstrStage<1, [A9_DRegsN], 0, Required>, 1342 // Extra latency cycles since wbck is 6 cycles 1343 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1344 InstrStage<1, [A9_NPipe]>], 1345 [3, 2, 1]>, 1346 // 1347 // Double-register Integer Shift 1348 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1349 InstrStage<1, [A9_MUX0], 0>, 1350 InstrStage<1, [A9_DRegsN], 0, Required>, 1351 // Extra latency cycles since wbck is 6 cycles 1352 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1353 InstrStage<1, [A9_NPipe]>], 1354 [3, 1, 1]>, 1355 // 1356 // Quad-register Integer Shift 1357 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1358 InstrStage<1, [A9_MUX0], 0>, 1359 InstrStage<1, [A9_DRegsN], 0, Required>, 1360 // Extra latency cycles since wbck is 6 cycles 1361 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1362 InstrStage<1, [A9_NPipe]>], 1363 [3, 1, 1]>, 1364 // 1365 // Double-register Integer Shift (4 cycle) 1366 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1367 InstrStage<1, [A9_MUX0], 0>, 1368 InstrStage<1, [A9_DRegsN], 0, Required>, 1369 // Extra latency cycles since wbck is 6 cycles 1370 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1371 InstrStage<1, [A9_NPipe]>], 1372 [4, 1, 1]>, 1373 // 1374 // Quad-register Integer Shift (4 cycle) 1375 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1376 InstrStage<1, [A9_MUX0], 0>, 1377 InstrStage<1, [A9_DRegsN], 0, Required>, 1378 // Extra latency cycles since wbck is 6 cycles 1379 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1380 InstrStage<1, [A9_NPipe]>], 1381 [4, 1, 1]>, 1382 // 1383 // Double-register Integer Binary (4 cycle) 1384 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1385 InstrStage<1, [A9_MUX0], 0>, 1386 InstrStage<1, [A9_DRegsN], 0, Required>, 1387 // Extra latency cycles since wbck is 6 cycles 1388 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1389 InstrStage<1, [A9_NPipe]>], 1390 [4, 2, 2]>, 1391 // 1392 // Quad-register Integer Binary (4 cycle) 1393 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1394 InstrStage<1, [A9_MUX0], 0>, 1395 InstrStage<1, [A9_DRegsN], 0, Required>, 1396 // Extra latency cycles since wbck is 6 cycles 1397 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1398 InstrStage<1, [A9_NPipe]>], 1399 [4, 2, 2]>, 1400 // 1401 // Double-register Integer Subtract (4 cycle) 1402 InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1403 InstrStage<1, [A9_MUX0], 0>, 1404 InstrStage<1, [A9_DRegsN], 0, Required>, 1405 // Extra latency cycles since wbck is 6 cycles 1406 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1407 InstrStage<1, [A9_NPipe]>], 1408 [4, 2, 1]>, 1409 // 1410 // Quad-register Integer Subtract (4 cycle) 1411 InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1412 InstrStage<1, [A9_MUX0], 0>, 1413 InstrStage<1, [A9_DRegsN], 0, Required>, 1414 // Extra latency cycles since wbck is 6 cycles 1415 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1416 InstrStage<1, [A9_NPipe]>], 1417 [4, 2, 1]>, 1418 1419 // 1420 // Double-register Integer Count 1421 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1422 InstrStage<1, [A9_MUX0], 0>, 1423 InstrStage<1, [A9_DRegsN], 0, Required>, 1424 // Extra latency cycles since wbck is 6 cycles 1425 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1426 InstrStage<1, [A9_NPipe]>], 1427 [3, 2, 2]>, 1428 // 1429 // Quad-register Integer Count 1430 // Result written in N3, but that is relative to the last cycle of multicycle, 1431 // so we use 4 for those cases 1432 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1433 InstrStage<1, [A9_MUX0], 0>, 1434 InstrStage<1, [A9_DRegsN], 0, Required>, 1435 // Extra latency cycles since wbck is 7 cycles 1436 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1437 InstrStage<2, [A9_NPipe]>], 1438 [4, 2, 2]>, 1439 // 1440 // Double-register Absolute Difference and Accumulate 1441 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1442 InstrStage<1, [A9_MUX0], 0>, 1443 InstrStage<1, [A9_DRegsN], 0, Required>, 1444 // Extra latency cycles since wbck is 6 cycles 1445 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1446 InstrStage<1, [A9_NPipe]>], 1447 [6, 3, 2, 1]>, 1448 // 1449 // Quad-register Absolute Difference and Accumulate 1450 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1451 InstrStage<1, [A9_MUX0], 0>, 1452 InstrStage<1, [A9_DRegsN], 0, Required>, 1453 // Extra latency cycles since wbck is 6 cycles 1454 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1455 InstrStage<2, [A9_NPipe]>], 1456 [6, 3, 2, 1]>, 1457 // 1458 // Double-register Integer Pair Add Long 1459 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1460 InstrStage<1, [A9_MUX0], 0>, 1461 InstrStage<1, [A9_DRegsN], 0, Required>, 1462 // Extra latency cycles since wbck is 6 cycles 1463 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1464 InstrStage<1, [A9_NPipe]>], 1465 [6, 3, 1]>, 1466 // 1467 // Quad-register Integer Pair Add Long 1468 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1469 InstrStage<1, [A9_MUX0], 0>, 1470 InstrStage<1, [A9_DRegsN], 0, Required>, 1471 // Extra latency cycles since wbck is 6 cycles 1472 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1473 InstrStage<2, [A9_NPipe]>], 1474 [6, 3, 1]>, 1475 1476 // 1477 // Double-register Integer Multiply (.8, .16) 1478 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1479 InstrStage<1, [A9_MUX0], 0>, 1480 InstrStage<1, [A9_DRegsN], 0, Required>, 1481 // Extra latency cycles since wbck is 6 cycles 1482 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1483 InstrStage<1, [A9_NPipe]>], 1484 [6, 2, 2]>, 1485 // 1486 // Quad-register Integer Multiply (.8, .16) 1487 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1488 InstrStage<1, [A9_MUX0], 0>, 1489 InstrStage<1, [A9_DRegsN], 0, Required>, 1490 // Extra latency cycles since wbck is 7 cycles 1491 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1492 InstrStage<2, [A9_NPipe]>], 1493 [7, 2, 2]>, 1494 1495 // 1496 // Double-register Integer Multiply (.32) 1497 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1498 InstrStage<1, [A9_MUX0], 0>, 1499 InstrStage<1, [A9_DRegsN], 0, Required>, 1500 // Extra latency cycles since wbck is 7 cycles 1501 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1502 InstrStage<2, [A9_NPipe]>], 1503 [7, 2, 1]>, 1504 // 1505 // Quad-register Integer Multiply (.32) 1506 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1507 InstrStage<1, [A9_MUX0], 0>, 1508 InstrStage<1, [A9_DRegsN], 0, Required>, 1509 // Extra latency cycles since wbck is 9 cycles 1510 InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1511 InstrStage<4, [A9_NPipe]>], 1512 [9, 2, 1]>, 1513 // 1514 // Double-register Integer Multiply-Accumulate (.8, .16) 1515 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1516 InstrStage<1, [A9_MUX0], 0>, 1517 InstrStage<1, [A9_DRegsN], 0, Required>, 1518 // Extra latency cycles since wbck is 6 cycles 1519 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1520 InstrStage<1, [A9_NPipe]>], 1521 [6, 3, 2, 2]>, 1522 // 1523 // Double-register Integer Multiply-Accumulate (.32) 1524 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1525 InstrStage<1, [A9_MUX0], 0>, 1526 InstrStage<1, [A9_DRegsN], 0, Required>, 1527 // Extra latency cycles since wbck is 7 cycles 1528 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1529 InstrStage<2, [A9_NPipe]>], 1530 [7, 3, 2, 1]>, 1531 // 1532 // Quad-register Integer Multiply-Accumulate (.8, .16) 1533 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1534 InstrStage<1, [A9_MUX0], 0>, 1535 InstrStage<1, [A9_DRegsN], 0, Required>, 1536 // Extra latency cycles since wbck is 7 cycles 1537 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1538 InstrStage<2, [A9_NPipe]>], 1539 [7, 3, 2, 2]>, 1540 // 1541 // Quad-register Integer Multiply-Accumulate (.32) 1542 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1543 InstrStage<1, [A9_MUX0], 0>, 1544 InstrStage<1, [A9_DRegsN], 0, Required>, 1545 // Extra latency cycles since wbck is 9 cycles 1546 InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1547 InstrStage<4, [A9_NPipe]>], 1548 [9, 3, 2, 1]>, 1549 1550 // 1551 // Move 1552 InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1553 InstrStage<1, [A9_MUX0], 0>, 1554 InstrStage<1, [A9_DRegsN], 0, Required>, 1555 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1556 InstrStage<1, [A9_NPipe]>], 1557 [1,1]>, 1558 // 1559 // Move Immediate 1560 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1561 InstrStage<1, [A9_MUX0], 0>, 1562 InstrStage<1, [A9_DRegsN], 0, Required>, 1563 // Extra latency cycles since wbck is 6 cycles 1564 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1565 InstrStage<1, [A9_NPipe]>], 1566 [3]>, 1567 // 1568 // Double-register Permute Move 1569 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1570 InstrStage<1, [A9_MUX0], 0>, 1571 InstrStage<1, [A9_DRegsN], 0, Required>, 1572 // Extra latency cycles since wbck is 6 cycles 1573 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1574 InstrStage<1, [A9_NPipe]>], 1575 [2, 1]>, 1576 // 1577 // Quad-register Permute Move 1578 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1579 InstrStage<1, [A9_MUX0], 0>, 1580 InstrStage<1, [A9_DRegsN], 0, Required>, 1581 // Extra latency cycles since wbck is 6 cycles 1582 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1583 InstrStage<1, [A9_NPipe]>], 1584 [2, 1]>, 1585 // 1586 // Integer to Single-precision Move 1587 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1588 InstrStage<1, [A9_MUX0], 0>, 1589 InstrStage<1, [A9_DRegsN], 0, Required>, 1590 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1591 InstrStage<1, [A9_NPipe]>], 1592 [1, 1]>, 1593 // 1594 // Integer to Double-precision Move 1595 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1596 InstrStage<1, [A9_MUX0], 0>, 1597 InstrStage<1, [A9_DRegsN], 0, Required>, 1598 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1599 InstrStage<1, [A9_NPipe]>], 1600 [1, 1, 1]>, 1601 // 1602 // Single-precision to Integer Move 1603 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1604 InstrStage<1, [A9_MUX0], 0>, 1605 InstrStage<1, [A9_DRegsN], 0, Required>, 1606 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1607 InstrStage<1, [A9_NPipe]>], 1608 [2, 1]>, 1609 // 1610 // Double-precision to Integer Move 1611 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1612 InstrStage<1, [A9_MUX0], 0>, 1613 InstrStage<1, [A9_DRegsN], 0, Required>, 1614 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1615 InstrStage<1, [A9_NPipe]>], 1616 [2, 2, 1]>, 1617 // 1618 // Integer to Lane Move 1619 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1620 InstrStage<1, [A9_MUX0], 0>, 1621 InstrStage<1, [A9_DRegsN], 0, Required>, 1622 InstrStage<4, [A9_DRegsVFP], 0, Reserved>, 1623 InstrStage<2, [A9_NPipe]>], 1624 [3, 1, 1]>, 1625 1626 // 1627 // Vector narrow move 1628 InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1629 InstrStage<1, [A9_MUX0], 0>, 1630 InstrStage<1, [A9_DRegsN], 0, Required>, 1631 // Extra latency cycles since wbck is 6 cycles 1632 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1633 InstrStage<1, [A9_NPipe]>], 1634 [3, 1]>, 1635 // 1636 // Double-register FP Unary 1637 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1638 InstrStage<1, [A9_MUX0], 0>, 1639 InstrStage<1, [A9_DRegsN], 0, Required>, 1640 // Extra latency cycles since wbck is 6 cycles 1641 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1642 InstrStage<1, [A9_NPipe]>], 1643 [5, 2]>, 1644 // 1645 // Quad-register FP Unary 1646 // Result written in N5, but that is relative to the last cycle of multicycle, 1647 // so we use 6 for those cases 1648 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1649 InstrStage<1, [A9_MUX0], 0>, 1650 InstrStage<1, [A9_DRegsN], 0, Required>, 1651 // Extra latency cycles since wbck is 7 cycles 1652 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1653 InstrStage<2, [A9_NPipe]>], 1654 [6, 2]>, 1655 // 1656 // Double-register FP Binary 1657 // FIXME: We're using this itin for many instructions and [2, 2] here is too 1658 // optimistic. 1659 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1660 InstrStage<1, [A9_MUX0], 0>, 1661 InstrStage<1, [A9_DRegsN], 0, Required>, 1662 // Extra latency cycles since wbck is 6 cycles 1663 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1664 InstrStage<1, [A9_NPipe]>], 1665 [5, 2, 2]>, 1666 1667 // 1668 // VPADD, etc. 1669 InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1670 InstrStage<1, [A9_MUX0], 0>, 1671 InstrStage<1, [A9_DRegsN], 0, Required>, 1672 // Extra latency cycles since wbck is 6 cycles 1673 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1674 InstrStage<1, [A9_NPipe]>], 1675 [5, 1, 1]>, 1676 // 1677 // Double-register FP VMUL 1678 InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1679 InstrStage<1, [A9_MUX0], 0>, 1680 InstrStage<1, [A9_DRegsN], 0, Required>, 1681 // Extra latency cycles since wbck is 6 cycles 1682 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1683 InstrStage<1, [A9_NPipe]>], 1684 [5, 2, 1]>, 1685 // 1686 // Quad-register FP Binary 1687 // Result written in N5, but that is relative to the last cycle of multicycle, 1688 // so we use 6 for those cases 1689 // FIXME: We're using this itin for many instructions and [2, 2] here is too 1690 // optimistic. 1691 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1692 InstrStage<1, [A9_MUX0], 0>, 1693 InstrStage<1, [A9_DRegsN], 0, Required>, 1694 // Extra latency cycles since wbck is 7 cycles 1695 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1696 InstrStage<2, [A9_NPipe]>], 1697 [6, 2, 2]>, 1698 // 1699 // Quad-register FP VMUL 1700 InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1701 InstrStage<1, [A9_MUX0], 0>, 1702 InstrStage<1, [A9_DRegsN], 0, Required>, 1703 // Extra latency cycles since wbck is 7 cycles 1704 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1705 InstrStage<1, [A9_NPipe]>], 1706 [6, 2, 1]>, 1707 // 1708 // Double-register FP Multiple-Accumulate 1709 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1710 InstrStage<1, [A9_MUX0], 0>, 1711 InstrStage<1, [A9_DRegsN], 0, Required>, 1712 // Extra latency cycles since wbck is 7 cycles 1713 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1714 InstrStage<2, [A9_NPipe]>], 1715 [6, 3, 2, 1]>, 1716 // 1717 // Quad-register FP Multiple-Accumulate 1718 // Result written in N9, but that is relative to the last cycle of multicycle, 1719 // so we use 10 for those cases 1720 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1721 InstrStage<1, [A9_MUX0], 0>, 1722 InstrStage<1, [A9_DRegsN], 0, Required>, 1723 // Extra latency cycles since wbck is 9 cycles 1724 InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1725 InstrStage<4, [A9_NPipe]>], 1726 [8, 4, 2, 1]>, 1727 // 1728 // Double-register Fused FP Multiple-Accumulate 1729 InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1730 InstrStage<1, [A9_MUX0], 0>, 1731 InstrStage<1, [A9_DRegsN], 0, Required>, 1732 // Extra latency cycles since wbck is 7 cycles 1733 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1734 InstrStage<2, [A9_NPipe]>], 1735 [6, 3, 2, 1]>, 1736 // 1737 // Quad-register Fused FP Multiple-Accumulate 1738 // Result written in N9, but that is relative to the last cycle of multicycle, 1739 // so we use 10 for those cases 1740 InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1741 InstrStage<1, [A9_MUX0], 0>, 1742 InstrStage<1, [A9_DRegsN], 0, Required>, 1743 // Extra latency cycles since wbck is 9 cycles 1744 InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1745 InstrStage<4, [A9_NPipe]>], 1746 [8, 4, 2, 1]>, 1747 // 1748 // Double-register Reciprical Step 1749 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1750 InstrStage<1, [A9_MUX0], 0>, 1751 InstrStage<1, [A9_DRegsN], 0, Required>, 1752 // Extra latency cycles since wbck is 10 cycles 1753 InstrStage<11, [A9_DRegsVFP], 0, Reserved>, 1754 InstrStage<1, [A9_NPipe]>], 1755 [9, 2, 2]>, 1756 // 1757 // Quad-register Reciprical Step 1758 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1759 InstrStage<1, [A9_MUX0], 0>, 1760 InstrStage<1, [A9_DRegsN], 0, Required>, 1761 // Extra latency cycles since wbck is 11 cycles 1762 InstrStage<12, [A9_DRegsVFP], 0, Reserved>, 1763 InstrStage<2, [A9_NPipe]>], 1764 [10, 2, 2]>, 1765 // 1766 // Double-register Permute 1767 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1768 InstrStage<1, [A9_MUX0], 0>, 1769 InstrStage<1, [A9_DRegsN], 0, Required>, 1770 // Extra latency cycles since wbck is 6 cycles 1771 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1772 InstrStage<1, [A9_NPipe]>], 1773 [2, 2, 1, 1]>, 1774 // 1775 // Quad-register Permute 1776 // Result written in N2, but that is relative to the last cycle of multicycle, 1777 // so we use 3 for those cases 1778 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1779 InstrStage<1, [A9_MUX0], 0>, 1780 InstrStage<1, [A9_DRegsN], 0, Required>, 1781 // Extra latency cycles since wbck is 7 cycles 1782 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1783 InstrStage<2, [A9_NPipe]>], 1784 [3, 3, 1, 1]>, 1785 // 1786 // Quad-register Permute (3 cycle issue) 1787 // Result written in N2, but that is relative to the last cycle of multicycle, 1788 // so we use 4 for those cases 1789 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1790 InstrStage<1, [A9_MUX0], 0>, 1791 InstrStage<1, [A9_DRegsN], 0, Required>, 1792 // Extra latency cycles since wbck is 8 cycles 1793 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1794 InstrStage<3, [A9_NPipe]>], 1795 [4, 4, 1, 1]>, 1796 1797 // 1798 // Double-register VEXT 1799 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1800 InstrStage<1, [A9_MUX0], 0>, 1801 InstrStage<1, [A9_DRegsN], 0, Required>, 1802 // Extra latency cycles since wbck is 6 cycles 1803 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1804 InstrStage<1, [A9_NPipe]>], 1805 [2, 1, 1]>, 1806 // 1807 // Quad-register VEXT 1808 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1809 InstrStage<1, [A9_MUX0], 0>, 1810 InstrStage<1, [A9_DRegsN], 0, Required>, 1811 // Extra latency cycles since wbck is 7 cycles 1812 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1813 InstrStage<2, [A9_NPipe]>], 1814 [3, 1, 2]>, 1815 // 1816 // VTB 1817 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1818 InstrStage<1, [A9_MUX0], 0>, 1819 InstrStage<1, [A9_DRegsN], 0, Required>, 1820 // Extra latency cycles since wbck is 7 cycles 1821 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1822 InstrStage<2, [A9_NPipe]>], 1823 [3, 2, 1]>, 1824 InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1825 InstrStage<1, [A9_MUX0], 0>, 1826 InstrStage<2, [A9_DRegsN], 0, Required>, 1827 // Extra latency cycles since wbck is 7 cycles 1828 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1829 InstrStage<2, [A9_NPipe]>], 1830 [3, 2, 2, 1]>, 1831 InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1832 InstrStage<1, [A9_MUX0], 0>, 1833 InstrStage<2, [A9_DRegsN], 0, Required>, 1834 // Extra latency cycles since wbck is 8 cycles 1835 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1836 InstrStage<3, [A9_NPipe]>], 1837 [4, 2, 2, 3, 1]>, 1838 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1839 InstrStage<1, [A9_MUX0], 0>, 1840 InstrStage<1, [A9_DRegsN], 0, Required>, 1841 // Extra latency cycles since wbck is 8 cycles 1842 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1843 InstrStage<3, [A9_NPipe]>], 1844 [4, 2, 2, 3, 3, 1]>, 1845 // 1846 // VTBX 1847 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1848 InstrStage<1, [A9_MUX0], 0>, 1849 InstrStage<1, [A9_DRegsN], 0, Required>, 1850 // Extra latency cycles since wbck is 7 cycles 1851 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1852 InstrStage<2, [A9_NPipe]>], 1853 [3, 1, 2, 1]>, 1854 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1855 InstrStage<1, [A9_MUX0], 0>, 1856 InstrStage<1, [A9_DRegsN], 0, Required>, 1857 // Extra latency cycles since wbck is 7 cycles 1858 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1859 InstrStage<2, [A9_NPipe]>], 1860 [3, 1, 2, 2, 1]>, 1861 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1862 InstrStage<1, [A9_MUX0], 0>, 1863 InstrStage<1, [A9_DRegsN], 0, Required>, 1864 // Extra latency cycles since wbck is 8 cycles 1865 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1866 InstrStage<3, [A9_NPipe]>], 1867 [4, 1, 2, 2, 3, 1]>, 1868 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1869 InstrStage<1, [A9_MUX0], 0>, 1870 InstrStage<1, [A9_DRegsN], 0, Required>, 1871 // Extra latency cycles since wbck is 8 cycles 1872 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1873 InstrStage<2, [A9_NPipe]>], 1874 [4, 1, 2, 2, 3, 3, 1]> 1875]>; 1876 1877// ===---------------------------------------------------------------------===// 1878// The following definitions describe the simpler per-operand machine model. 1879// This works with MachineScheduler and will eventually replace itineraries. 1880 1881class A9WriteLMOpsListType<list<WriteSequence> writes> { 1882 list <WriteSequence> Writes = writes; 1883 SchedMachineModel SchedModel = ?; 1884} 1885 1886// Cortex-A9 machine model for scheduling and other instruction cost heuristics. 1887def CortexA9Model : SchedMachineModel { 1888 let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. 1889 let MicroOpBufferSize = 56; // Based on available renamed registers. 1890 let LoadLatency = 2; // Optimistic load latency assuming bypass. 1891 // This is overriden by OperandCycles if the 1892 // Itineraries are queried instead. 1893 let MispredictPenalty = 8; // Based on estimate of pipeline depth. 1894 1895 let Itineraries = CortexA9Itineraries; 1896 1897 // FIXME: Many vector operations were never given an itinerary. We 1898 // haven't mapped these to the new model either. 1899 let CompleteModel = 0; 1900 1901 // FIXME: Remove when all errors have been fixed. 1902 let FullInstRWOverlapCheck = 0; 1903} 1904 1905//===----------------------------------------------------------------------===// 1906// Define each kind of processor resource and number available. 1907// 1908// The AGU unit has BufferSize=1 so that the latency between operations 1909// that use it are considered to stall other operations. 1910// 1911// The FP unit has BufferSize=0 so that it is a hard dispatch 1912// hazard. No instruction may be dispatched while the unit is reserved. 1913 1914let SchedModel = CortexA9Model in { 1915 1916def A9UnitALU : ProcResource<2>; 1917def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } 1918def A9UnitAGU : ProcResource<1> { let BufferSize = 1; } 1919def A9UnitLS : ProcResource<1>; 1920def A9UnitFP : ProcResource<1> { let BufferSize = 0; } 1921def A9UnitB : ProcResource<1>; 1922 1923//===----------------------------------------------------------------------===// 1924// Define scheduler read/write types with their resources and latency on A9. 1925 1926// Consume an issue slot, but no processor resources. This is useful when all 1927// other writes associated with the operand have NumMicroOps = 0. 1928def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; } 1929 1930// Write an integer register. 1931def A9WriteI : SchedWriteRes<[A9UnitALU]>; 1932// Write an integer shifted-by register 1933def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } 1934 1935// Basic ALU. 1936def A9WriteALU : SchedWriteRes<[A9UnitALU]>; 1937// ALU with operand shifted by immediate. 1938def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; } 1939// ALU with operand shifted by register. 1940def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; } 1941 1942// Multiplication 1943def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; } 1944def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5; 1945 let NumMicroOps = 0; } 1946def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; } 1947def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4; 1948 let NumMicroOps = 0; } 1949def : SchedAlias<WriteMUL16, A9WriteM16>; 1950def : SchedAlias<WriteMUL32, A9WriteM>; 1951def : SchedAlias<WriteMUL64Lo, A9WriteM>; 1952def : SchedAlias<WriteMUL64Hi, A9WriteMHi>; 1953def : SchedAlias<WriteMAC16, A9WriteM16>; 1954def : SchedAlias<WriteMAC32, A9WriteM>; 1955def : SchedAlias<WriteMAC64Lo, A9WriteM>; 1956def : SchedAlias<WriteMAC64Hi, A9WriteMHi>; 1957def : ReadAdvance<ReadMUL, 0>; 1958def : ReadAdvance<ReadMAC, 0>; 1959 1960// Floating-point 1961// Only one FP or AGU instruction may issue per cycle. We model this 1962// by having FP instructions consume the AGU resource. 1963def A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } 1964def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } 1965def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } 1966def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } 1967def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; } 1968 1969def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } 1970def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; } 1971def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; } 1972def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; } 1973def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; } 1974 1975// NEON has an odd mix of latencies. Simply name the write types by latency. 1976def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } 1977def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; } 1978def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; } 1979def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } 1980def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } 1981def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } 1982def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; } 1983def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } 1984def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; } 1985 1986def : WriteRes<WriteVLD1, []>; 1987def : WriteRes<WriteVLD2, []>; 1988def : WriteRes<WriteVLD3, []>; 1989def : WriteRes<WriteVLD4, []>; 1990def : WriteRes<WriteVST1, []>; 1991def : WriteRes<WriteVST2, []>; 1992def : WriteRes<WriteVST3, []>; 1993def : WriteRes<WriteVST4, []>; 1994 1995// Reserve A9UnitFP for 2 consecutive cycles. 1996def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { 1997 let Latency = 4; 1998 let ResourceCycles = [2, 1]; 1999} 2000def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { 2001 let Latency = 7; 2002 let ResourceCycles = [2, 1]; 2003} 2004def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { 2005 let Latency = 9; 2006 let ResourceCycles = [2, 1]; 2007} 2008 2009// Branches don't have a def operand but still consume resources. 2010def A9WriteB : SchedWriteRes<[A9UnitB]>; 2011 2012// Address generation. 2013def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; } 2014 2015// Load Integer. 2016def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; } 2017def : SchedAlias<WriteLd, A9WriteL>; 2018// Load the upper 32-bits using the same micro-op. 2019def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3; 2020 let NumMicroOps = 0; } 2021// Offset shifted by register. 2022def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } 2023// Load (and zero extend) a byte. 2024def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } 2025def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; } 2026 2027// Load or Store Float, aligned. 2028def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; } 2029 2030// Store Integer. 2031def A9WriteS : SchedWriteRes<[A9UnitLS]>; 2032 2033//===----------------------------------------------------------------------===// 2034// Define resources dynamically for load multiple variants. 2035 2036// Define helpers for extra latency without consuming resources. 2037def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; } 2038foreach NumCycles = 2-8 in { 2039def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>; 2040} // foreach NumCycles 2041 2042// Define address generation sequences and predicates for 8 flavors of LDMs. 2043foreach NumAddr = 1-8 in { 2044 2045// Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive 2046// latency for instructions that generate multiple loads or stores. 2047def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>; 2048 2049// Define a predicate to select the LDM based on number of memory addresses. 2050def A9LMAdr#NumAddr#Pred : 2051 SchedPredicate<"(TII->getNumLDMAddresses(*MI)+1)/2 == "#NumAddr>; 2052 2053} // foreach NumAddr 2054 2055// Fall-back for unknown LDMs. 2056def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == 0">; 2057 2058// LDM/VLDM/VLDn address generation latency & resources. 2059// Dynamically select the A9WriteAdrN sequence using a predicate. 2060def A9WriteLMAdr : SchedWriteVariant<[ 2061 SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>, 2062 SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>, 2063 SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>, 2064 SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>, 2065 SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>, 2066 SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>, 2067 SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>, 2068 SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>, 2069 // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers. 2070 SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>; 2071 2072// Define LDM Resources. 2073// These take no issue resource, so they can be combined with other 2074// writes like WriteB. 2075// A9WriteLMLo takes a single LS resource and 2 cycles. 2076def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2; 2077 let NumMicroOps = 0; } 2078// Assuming aligned access, the upper half of each pair is free with 2079// the same latency. 2080def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2; 2081 let NumMicroOps = 0; } 2082// Each A9WriteL#N variant adds N cycles of latency without consuming 2083// additional resources. 2084foreach NumAddr = 1-8 in { 2085def A9WriteL#NumAddr : WriteSequence< 2086 [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2087def A9WriteL#NumAddr#Hi : WriteSequence< 2088 [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2089} 2090 2091//===----------------------------------------------------------------------===// 2092// LDM: Load multiple into 32-bit integer registers. 2093 2094def A9WriteLMOpsList : A9WriteLMOpsListType< 2095 [A9WriteL1, A9WriteL1Hi, 2096 A9WriteL2, A9WriteL2Hi, 2097 A9WriteL3, A9WriteL3Hi, 2098 A9WriteL4, A9WriteL4Hi, 2099 A9WriteL5, A9WriteL5Hi, 2100 A9WriteL6, A9WriteL6Hi, 2101 A9WriteL7, A9WriteL7Hi, 2102 A9WriteL8, A9WriteL8Hi]>; 2103 2104// A9WriteLM variants expand into a pair of writes for each 64-bit 2105// value loaded. When the number of registers is odd, the last 2106// A9WriteLnHi is naturally ignored because the instruction has no 2107// following def operands. These variants take no issue resource, so 2108// they may need to be part of a WriteSequence that includes A9WriteIssue. 2109def A9WriteLM : SchedWriteVariant<[ 2110 SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>, 2111 SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>, 2112 SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>, 2113 SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>, 2114 SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>, 2115 SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>, 2116 SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>, 2117 SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>, 2118 // For unknown LDMs, define the maximum number of writes, but only 2119 // make the first two consume resources. 2120 SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi, 2121 A9WriteL2, A9WriteL2Hi, 2122 A9WriteL3Hi, A9WriteL3Hi, 2123 A9WriteL4Hi, A9WriteL4Hi, 2124 A9WriteL5Hi, A9WriteL5Hi, 2125 A9WriteL6Hi, A9WriteL6Hi, 2126 A9WriteL7Hi, A9WriteL7Hi, 2127 A9WriteL8Hi, A9WriteL8Hi]>]> { 2128 let Variadic = 1; 2129} 2130 2131//===----------------------------------------------------------------------===// 2132// VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support. 2133 2134// A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources 2135// so can be used in WriteSequences for in single-issue instructions that 2136// encapsulate multiple loads. 2137def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { 2138 let Latency = 1; 2139 let NumMicroOps = 0; 2140} 2141 2142foreach NumAddr = 1-8 in { 2143 2144// Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops. 2145def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>; 2146 2147// A9WriteLfp1-8 definitions are statically expanded into a sequence of 2148// A9WriteLfpOps with additive latency that takes a single issue slot. 2149// Used directly to describe NEON VLDn. 2150def A9WriteLfp#NumAddr : WriteSequence< 2151 [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; 2152 2153// A9WriteLfp1-8Mov adds a cycle of latency and FP resource for 2154// permuting loaded values. 2155def A9WriteLfp#NumAddr#Mov : WriteSequence< 2156 [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; 2157 2158} // foreach NumAddr 2159 2160// Define VLDM/VSTM PreRA resources. 2161// A9WriteLMfpPreRA are dynamically expanded into the correct 2162// A9WriteLfp1-8 sequence based on a predicate. This supports the 2163// preRA VLDM variants in which all 64-bit loads are written to the 2164// same tuple of either single or double precision registers. 2165def A9WriteLMfpPreRA : SchedWriteVariant<[ 2166 SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>, 2167 SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>, 2168 SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>, 2169 SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>, 2170 SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>, 2171 SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>, 2172 SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>, 2173 SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>, 2174 // For unknown VLDM/VSTM PreRA, assume 2xS registers. 2175 SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>; 2176 2177// Define VLDM/VSTM PostRA Resources. 2178// A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency. 2179def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; } 2180 2181foreach NumAddr = 1-8 in { 2182 2183// Each A9WriteL#N variant adds N cycles of latency without consuming 2184// additional resources. 2185def A9WriteLMfp#NumAddr : WriteSequence< 2186 [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2187 2188// Assuming aligned access, the upper half of each pair is free with 2189// the same latency. 2190def A9WriteLMfp#NumAddr#Hi : WriteSequence< 2191 [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2192 2193} // foreach NumAddr 2194 2195// VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a 2196// pair of writes for each 64-bit data loaded. When the number of 2197// registers is odd, the last WriteLMfpnHi is naturally ignored because 2198// the instruction has no following def operands. 2199 2200def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType< 2201 [A9WriteLMfp1, A9WriteLMfp2, // 0-1 2202 A9WriteLMfp3, A9WriteLMfp4, // 2-3 2203 A9WriteLMfp5, A9WriteLMfp6, // 4-5 2204 A9WriteLMfp7, A9WriteLMfp8, // 6-7 2205 A9WriteLMfp1Hi, // 8-8 2206 A9WriteLMfp2Hi, A9WriteLMfp2Hi, // 9-10 2207 A9WriteLMfp3Hi, A9WriteLMfp3Hi, // 11-12 2208 A9WriteLMfp4Hi, A9WriteLMfp4Hi, // 13-14 2209 A9WriteLMfp5Hi, A9WriteLMfp5Hi, // 15-16 2210 A9WriteLMfp6Hi, A9WriteLMfp6Hi, // 17-18 2211 A9WriteLMfp7Hi, A9WriteLMfp7Hi, // 19-20 2212 A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22 2213 2214def A9WriteLMfpPostRA : SchedWriteVariant<[ 2215 SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>, 2216 SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>, 2217 SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>, 2218 SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>, 2219 SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>, 2220 SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>, 2221 SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>, 2222 SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>, 2223 // For unknown LDMs, define the maximum number of writes, but only 2224 // make the first two consume resources. We are optimizing for the case 2225 // where the operands are DPRs, and this determines the first eight 2226 // types. The remaining eight types are filled to cover the case 2227 // where the operands are SPRs. 2228 SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2, 2229 A9WriteLMfp3Hi, A9WriteLMfp4Hi, 2230 A9WriteLMfp5Hi, A9WriteLMfp6Hi, 2231 A9WriteLMfp7Hi, A9WriteLMfp8Hi, 2232 A9WriteLMfp5Hi, A9WriteLMfp5Hi, 2233 A9WriteLMfp6Hi, A9WriteLMfp6Hi, 2234 A9WriteLMfp7Hi, A9WriteLMfp7Hi, 2235 A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> { 2236 let Variadic = 1; 2237} 2238 2239// Distinguish between our multiple MI-level forms of the same 2240// VLDM/VSTM instructions. 2241def A9PreRA : SchedPredicate< 2242 "MI->getOperand(0).getReg().isVirtual()">; 2243def A9PostRA : SchedPredicate< 2244 "MI->getOperand(0).getReg().isPhysical()">; 2245 2246// VLDM represents all destination registers as a single register 2247// tuple, unlike LDM. So the number of write operands is not variadic. 2248def A9WriteLMfp : SchedWriteVariant<[ 2249 SchedVar<A9PreRA, [A9WriteLMfpPreRA]>, 2250 SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>; 2251 2252//===----------------------------------------------------------------------===// 2253// Resources for other (non-LDM/VLDM) Variants. 2254 2255// These mov immediate writers are unconditionally expanded with 2256// additive latency. 2257def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>; 2258def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>; 2259def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>; 2260 2261// Some ALU operations can read loaded integer values one cycle early. 2262def A9ReadALU : SchedReadAdvance<1, 2263 [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi, 2264 A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4, 2265 A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8, 2266 A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi, 2267 A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>; 2268 2269// Read types for operands that are unconditionally read in cycle N 2270// after the instruction issues, decreases producer latency by N-1. 2271def A9Read2 : SchedReadAdvance<1>; 2272def A9Read3 : SchedReadAdvance<2>; 2273def A9Read4 : SchedReadAdvance<3>; 2274 2275//===----------------------------------------------------------------------===// 2276// Map itinerary classes to scheduler read/write resources per operand. 2277// 2278// For ARM, we piggyback scheduler resources on the Itinerary classes 2279// to avoid perturbing the existing instruction definitions. 2280 2281// This table follows the ARM Cortex-A9 Technical Reference Manuals, 2282// mostly in order. 2283 2284def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, 2285 IIC_iMVNi,IIC_iMVNsi, 2286 IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>; 2287def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>; 2288def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>; 2289 2290def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>; 2291def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>; 2292def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>; 2293 2294def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; 2295def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; 2296def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>; 2297def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; 2298def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>; 2299def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB 2300def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; 2301def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>; 2302 2303// A9WriteHi ignored for MUL32. 2304def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32, 2305 IIC_iMUL64,IIC_iMAC64]>; 2306// FIXME: SMLALxx needs itin classes 2307def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>; 2308 2309// TODO: For floating-point ops, we model the pipeline forwarding 2310// latencies here. WAW latencies are sometimes longer. 2311 2312def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI, 2313 IIC_fpUNA32, IIC_fpUNA64, 2314 IIC_fpCMP32, IIC_fpCMP64]>; 2315def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>; 2316def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS, 2317 IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI, 2318 IIC_fpALU32, IIC_fpALU64]>; 2319def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>; 2320def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>; 2321def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>; 2322def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>; 2323def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>; 2324def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>; 2325def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>; 2326def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>; 2327 2328def :ItinRW<[A9WriteB], [IIC_Br]>; 2329 2330// A9 PLD is processed in a dedicated unit. 2331def :ItinRW<[], [IIC_Preload]>; 2332 2333// Note: We must assume that loads are aligned, since the machine 2334// model cannot know this statically and A9 ignores alignment hints. 2335 2336// A9WriteAdr consumes AGU regardless address writeback. But it's 2337// latency is only relevant for users of an updated address. 2338def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r, 2339 IIC_iLoad_iu,IIC_iLoad_ru]>; 2340def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>; 2341def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r, 2342 IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>; 2343def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>; 2344def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r, 2345 IIC_iLoad_d_ru]>; 2346// Store either has no def operands, or the one def for address writeback. 2347def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r, 2348 IIC_iStore_iu, IIC_iStore_ru, 2349 IIC_iStore_d_i, IIC_iStore_d_r, 2350 IIC_iStore_d_ru]>; 2351def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu, 2352 IIC_iStore_bh_i, IIC_iStore_bh_r, 2353 IIC_iStore_bh_iu, IIC_iStore_bh_ru]>; 2354def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>; 2355 2356// A9WriteML will be expanded into a separate write for each def 2357// operand. Address generation consumes resources, but A9WriteLMAdr 2358// is listed after all def operands, so has no effective latency. 2359// 2360// Note: A9WriteLM expands into an even number of def operands. The 2361// actual number of def operands may be less by one. 2362def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>; 2363 2364// Load multiple with address writeback has an extra def operand in 2365// front of the loaded registers. 2366// 2367// Reuse the load-multiple variants for store-multiple because the 2368// resources are identical, For stores only the address writeback 2369// has a def operand so the WriteL latencies are unused. 2370def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu, 2371 IIC_iStore_m, 2372 IIC_iStore_mu]>; 2373def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>; 2374def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>; 2375 2376def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>; 2377 2378def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>; 2379def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>; 2380def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64, 2381 IIC_fpStore_m, IIC_fpStore_mu]>; 2382 2383// Note: Unlike VLDM, VLD1 expects the writeback operand after the 2384// normal writes. 2385def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u, 2386 IIC_VLD1x2, IIC_VLD1x2u]>; 2387def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u, 2388 IIC_VLD1x4, IIC_VLD1x4u, 2389 IIC_VLD4dup, IIC_VLD4dupu]>; 2390def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu, 2391 IIC_VLD2, IIC_VLD2u, 2392 IIC_VLD2dup, IIC_VLD2dupu]>; 2393def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu, 2394 IIC_VLD2x2, IIC_VLD2x2u, 2395 IIC_VLD2ln, IIC_VLD2lnu]>; 2396def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u, 2397 IIC_VLD3dup, IIC_VLD3dupu]>; 2398def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u, 2399 IIC_VLD4ln, IIC_VLD4lnu]>; 2400def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>; 2401 2402// Vector stores use similar resources to vector loads, so use the 2403// same write types. The address write must be first for stores with 2404// address writeback. 2405def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u, 2406 IIC_VST1x2, IIC_VST1x2u, 2407 IIC_VST1ln, IIC_VST1lnu, 2408 IIC_VST2, IIC_VST2u, 2409 IIC_VST2x2, IIC_VST2x2u, 2410 IIC_VST2ln, IIC_VST2lnu]>; 2411def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u, 2412 IIC_VST1x4, IIC_VST1x4u, 2413 IIC_VST3, IIC_VST3u, 2414 IIC_VST3ln, IIC_VST3lnu, 2415 IIC_VST4, IIC_VST4u, 2416 IIC_VST4ln, IIC_VST4lnu]>; 2417 2418// NEON moves. 2419def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>; 2420def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>; 2421def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>; 2422 2423// NEON integer arithmetic 2424// 2425// VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL 2426def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>; 2427// VSUB/VMVN/VCLSD/VCLZD/VCNTD 2428def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>; 2429// VADDL/VSUBL/VNEG are mapped later under IIC_SHLi. 2430// ... 2431// VHADD/VRHADD/VQADD/VTST/VADH/VRADH 2432def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>; 2433 2434// VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL 2435def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>; 2436// VQNEG/VQABS 2437def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>; 2438// VABS 2439def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>; 2440// VPADD/VPADDL are mapped later under IIC_SHLi. 2441// ... 2442// VCLSQ/VCLZQ/VCNTQ, takes two cycles. 2443def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>; 2444// VMOVimm/VMVNimm/VORRimm/VBICimm 2445def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>; 2446def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>; 2447def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>; 2448 2449// NEON integer multiply 2450// 2451// Note: these don't quite match the timing docs, but they do match 2452// the original A9 itinerary. 2453def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>; 2454def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>; 2455def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>; 2456def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>; 2457def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>; 2458def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>; 2459def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>; 2460def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>; 2461 2462// NEON integer shift 2463// TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles. 2464def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>; 2465def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>; 2466 2467// NEON permute 2468def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>; 2469def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2], 2470 [IIC_VPERMQ3, IIC_VEXTQ]>; 2471def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>; 2472def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>; 2473def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>; 2474def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>; 2475def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>; 2476def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>; 2477def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>; 2478def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3], 2479 [IIC_VTBX4]>; 2480 2481// NEON floating-point 2482def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>; 2483def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>; 2484def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>; 2485def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>; 2486def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>; 2487def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>; 2488def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>; 2489def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>; 2490 2491// Map SchedRWs that are identical for cortexa9 to existing resources. 2492def : SchedAlias<WriteALU, A9WriteALU>; 2493def : SchedAlias<WriteALUsr, A9WriteALUsr>; 2494def : SchedAlias<WriteALUSsr, A9WriteALUsr>; 2495def : SchedAlias<ReadALU, A9ReadALU>; 2496def : SchedAlias<ReadALUsr, A9ReadALU>; 2497def : SchedAlias<WriteST, A9WriteS>; 2498 2499// ===---------------------------------------------------------------------===// 2500// Floating-point. Map target defined SchedReadWrite to processor specific ones 2501// 2502def : WriteRes<WriteFPCVT, [A9UnitFP, A9UnitAGU]> { let Latency = 4; } 2503def : SchedAlias<WriteFPMOV, A9WriteFMov>; 2504 2505def : SchedAlias<WriteFPALU32, A9WriteF>; 2506def : SchedAlias<WriteFPALU64, A9WriteF>; 2507 2508def : SchedAlias<WriteFPMUL32, A9WriteFMulS>; 2509def : SchedAlias<WriteFPMUL64, A9WriteFMulD>; 2510 2511def : SchedAlias<WriteFPMAC32, A9WriteFMAS>; 2512def : SchedAlias<WriteFPMAC64, A9WriteFMAD>; 2513 2514def : SchedAlias<WriteFPDIV32, A9WriteFDivS>; 2515def : SchedAlias<WriteFPDIV64, A9WriteFDivD>; 2516def : SchedAlias<WriteFPSQRT32, A9WriteFSqrtS>; 2517def : SchedAlias<WriteFPSQRT64, A9WriteFSqrtD>; 2518 2519def : ReadAdvance<ReadFPMUL, 0>; 2520def : ReadAdvance<ReadFPMAC, 0>; 2521 2522// ===---------------------------------------------------------------------===// 2523// Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types. 2524// 2525def : InstRW< [WriteALU], 2526 (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr", 2527 "BICrr")>; 2528def : InstRW< [WriteALUsi], (instrs ANDrsi, ORRrsi, EORrsi, BICrsi)>; 2529def : InstRW< [WriteALUsr], (instrs ANDrsr, ORRrsr, EORrsr, BICrsr)>; 2530 2531 2532def : SchedAlias<WriteCMP, A9WriteALU>; 2533def : SchedAlias<WriteCMPsi, A9WriteALU>; 2534def : SchedAlias<WriteCMPsr, A9WriteALU>; 2535 2536def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi", 2537 "MOVCCsr")>; 2538def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>; 2539def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm")>; 2540def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>; 2541def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; 2542 2543def : InstRW< [WriteALU], (instregex "SEL")>; 2544 2545def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>; 2546 2547def : InstRW< [A9WriteM], 2548 (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS", 2549 "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>; 2550def : InstRW< [A9WriteM, A9WriteMHi], 2551 (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL", 2552 "UMAAL", "SMLALv5", "UMLALv5", "SMLALBB", "SMLALBT", "SMLALTB", 2553 "SMLALTT")>; 2554// FIXME: These instructions used to have NoItinerary. Just copied the one from above. 2555def : InstRW< [A9WriteM, A9WriteMHi], 2556 (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX", 2557 "SMLSLD", "SMLSLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>; 2558 2559def : InstRW<[A9WriteM16, A9WriteM16Hi], 2560 (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>; 2561def : InstRW<[A9WriteM16, A9WriteM16Hi], 2562 (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>; 2563 2564def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>; 2565def : InstRW<[A9WriteLsi], (instregex "LDRrs")>; 2566def : InstRW<[A9WriteLb], 2567 (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB", 2568 "LDRH", "LDRSH", "LDRSB")>; 2569def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>; 2570 2571def : WriteRes<WriteDIV, []> { let Latency = 0; } 2572 2573def : WriteRes<WriteBr, [A9UnitB]>; 2574def : WriteRes<WriteBrL, [A9UnitB]>; 2575def : WriteRes<WriteBrTbl, [A9UnitB]>; 2576def : WriteRes<WritePreLd, []>; 2577def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } 2578} // SchedModel = CortexA9Model 2579