1//==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the SchedRead/Write data for the ARM Cortex-R52 processor. 10// 11//===----------------------------------------------------------------------===// 12 13// ===---------------------------------------------------------------------===// 14// The Cortex-R52 is an in-order pipelined superscalar microprocessor with 15// a 8 stage pipeline. It can issue maximum two instructions in each cycle. 16// There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV. 17// A number of forwarding paths enable results of computations to be input 18// to subsequent operations before they are written to registers. 19// This scheduler is a MachineScheduler. See TargetSchedule.td for details. 20 21def CortexR52Model : SchedMachineModel { 22 let MicroOpBufferSize = 0; // R52 is in-order processor 23 let IssueWidth = 2; // 2 micro-ops dispatched per cycle 24 let LoadLatency = 1; // Optimistic, assuming no misses 25 let MispredictPenalty = 8; // A branch direction mispredict, including PFU 26 let CompleteModel = 0; // Covers instructions applicable to cortex-r52. 27} 28 29 30//===----------------------------------------------------------------------===// 31// Define each kind of processor resource and number available. 32 33// Modeling each pipeline as a ProcResource using the BufferSize = 0 since 34// Cortex-R52 is an in-order processor. 35 36def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU 37def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC 38def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division 39def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store 40def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch 41def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU 42def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL 43def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV 44 45// Cortex-R52 specific SchedReads 46def R52Read_ISS : SchedRead; 47def R52Read_EX1 : SchedRead; 48def R52Read_EX2 : SchedRead; 49def R52Read_WRI : SchedRead; 50def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe 51def R52Read_F1 : SchedRead; 52def R52Read_F2 : SchedRead; 53 54 55//===----------------------------------------------------------------------===// 56// Subtarget-specific SchedWrite types which map ProcResources and set latency. 57 58let SchedModel = CortexR52Model in { 59 60// ALU - Write occurs in Late EX2 (independent of whether shift was required) 61def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; } 62def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; } 63def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; } 64def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; } 65 66// Compares 67def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; } 68def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; } 69def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; } 70 71// Multiply - aliased to sub-target specific later 72 73// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2) 74def : WriteRes<WriteDIV, [R52UnitDiv]> { 75 let Latency = 8; let ReleaseAtCycles = [8]; // non-pipelined 76} 77 78// Branches - LR written in Late EX2 79def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; } 80def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; } 81def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; } 82 83// Misc 84def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } 85 86// Integer pipeline by-passes 87def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage 88def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS 89def : ReadAdvance<ReadMUL, 0>; 90def : ReadAdvance<ReadMAC, 0>; 91 92// Floating-point. Map target-defined SchedReadWrites to subtarget 93def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; } 94 95def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> { 96 let Latency = 6; 97} 98 99def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> { 100 let Latency = 11; // as it is internally two insns (MUL then ADD) 101} 102 103def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL, 104 R52UnitFPALU, R52UnitFPALU]> { 105 let Latency = 11; 106} 107 108def : WriteRes<WriteFPDIV32, [R52UnitDiv]> { 109 let Latency = 7; // FP div takes fixed #cycles 110 let ReleaseAtCycles = [7]; // is not pipelined 111} 112 113def : WriteRes<WriteFPDIV64, [R52UnitDiv]> { 114 let Latency = 17; 115 let ReleaseAtCycles = [17]; 116} 117 118def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; } 119def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; } 120 121// Overriden via InstRW for this processor. 122def : WriteRes<WriteVST1, []>; 123def : WriteRes<WriteVST2, []>; 124def : WriteRes<WriteVST3, []>; 125def : WriteRes<WriteVST4, []>; 126 127def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1 128def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1 129 130//===----------------------------------------------------------------------===// 131// Subtarget-specific SchedReadWrites. 132 133// Forwarding information - based on when an operand is read 134def : ReadAdvance<R52Read_ISS, 0>; 135def : ReadAdvance<R52Read_EX1, 1>; 136def : ReadAdvance<R52Read_EX2, 2>; 137def : ReadAdvance<R52Read_F0, 0>; 138def : ReadAdvance<R52Read_F1, 1>; 139def : ReadAdvance<R52Read_F2, 2>; 140 141 142// Cortex-R52 specific SchedWrites for use with InstRW 143def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; } 144def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> { 145 let Latency = 4; let NumMicroOps = 0; 146} 147def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> { 148 let Latency = 8; let ReleaseAtCycles = [8]; // not pipelined 149} 150def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; } 151def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; } 152def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; } 153def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; } 154def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; } 155def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; } 156def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; } 157 158def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; } 159def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; } 160 161// Alias generics to sub-target specific 162def : SchedAlias<WriteMUL16, R52WriteMAC>; 163def : SchedAlias<WriteMUL32, R52WriteMAC>; 164def : SchedAlias<WriteMUL64Lo, R52WriteMAC>; 165def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>; 166def : SchedAlias<WriteMAC16, R52WriteMAC>; 167def : SchedAlias<WriteMAC32, R52WriteMAC>; 168def : SchedAlias<WriteMAC64Lo, R52WriteMAC>; 169def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>; 170def : SchedAlias<WritePreLd, R52WriteLd>; 171def : SchedAlias<WriteLd, R52WriteLd>; 172def : SchedAlias<WriteST, R52WriteST>; 173 174def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; } 175def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { 176 let Latency = 4; 177} 178def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; } 179def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { 180 let Latency = 5; 181} 182def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; } 183def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { 184 let Latency = 6; 185} 186def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; } 187def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> { 188 let Latency = 6; 189} 190def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> { 191 let Latency = 11; // as it is internally two insns (MUL then ADD) 192} 193def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL, 194 R52UnitFPALU, R52UnitFPALU]> { 195 let Latency = 11; 196} 197 198def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; } 199def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; } 200 201//===----------------------------------------------------------------------===// 202// Floating-point. Map target defined SchedReadWrites to processor specific ones 203// 204def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>; 205def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>; 206def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>; 207def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>; 208 209//===----------------------------------------------------------------------===// 210// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types. 211// 212def : InstRW<[WriteALU], (instrs COPY)>; 213 214def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], 215 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16", 216 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>; 217 218def : InstRW<[R52WriteALU_EX1, R52Read_ISS], 219 (instregex "MOVCCi32imm", "MOVi32imm", "t2MOVCCi", "t2MOVi")>; 220def : InstRW<[R52WriteALU_EX2, R52Read_EX1], 221 (instregex "MOV_ga_pcrel$")>; 222def : InstRW<[R52WriteLd,R52Read_ISS], 223 (instregex "MOV_ga_pcrel_ldr")>; 224 225def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>; 226 227def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS], 228 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI", 229 "(t|t2)UBFX", "(t|t2)SBFX")>; 230 231// Saturating arithmetic 232def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1], 233 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT", 234 "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX", 235 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD", 236 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT", 237 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX", 238 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>; 239 240// Parallel arithmetic 241def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], 242 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX", 243 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8", 244 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8", 245 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>; 246 247// Flag setting. 248def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], 249 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX", 250 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16", 251 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16", 252 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16", 253 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX", 254 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>; 255 256// Sum of Absolute Difference 257def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS], 258 (instregex "USAD8", "t2USAD8", "USADA8", "t2USADA8") >; 259 260// Integer Multiply 261def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS], 262 (instregex "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT", 263 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDX", "t2MUL", 264 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT", 265 "t2SMULWB", "t2SMULWT", "t2SMUSD")>; 266 267// Multiply Accumulate 268// Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs). 269// The store pipeline is used partly for 64-bit operations. 270def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS], 271 (instregex "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR", 272 "t2MLA", "t2MLS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR", 273 "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX", 274 "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX", 275 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT", 276 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT", 277 "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX", 278 "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$", 279 "SMLAL", "UMLAL", "SMLALBT", 280 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX", 281 "UMAAL", "t2SMLAL", "t2UMLAL", 282 "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", 283 "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>; 284 285def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS], 286 (instregex "t2SDIV", "t2UDIV")>; 287 288// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles. 289// However, that's non-trivial to specify, so we keep it uniform 290def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], 291 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)", 292 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX", 293 "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)", 294 "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$", 295 "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$", 296 "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>; 297def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS], 298 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)", 299 "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)", 300 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T", 301 "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)", 302 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)?", 303 "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>; 304 305def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>; 306def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>; 307 308def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri", "ANDS?ri", 309 "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri", 310 "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN", 311 "t2ORRri", "t2RSBS?ri", "t2SBCri")>; 312 313def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr", 314 "ANDS?rr", "BICS?rr", "CRC", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr", 315 "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>; 316 317def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi", 318 "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi", 319 "t2AD(C|D)S?rs", "t2ANDS?rs", "t2BICS?rs", "t2EORrs", "t2ORRrs", "t2RSBrs", "t2SBCrs")>; 320 321def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS], 322 (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr", 323 "ORRrsr", "RSBrsr", "RSCrsr", "SBCrsr")>; 324 325def : InstRW<[R52WriteALU_EX1], 326 (instregex "ADR", "MOVsi", "MVNS?s?i", "t2MOVS?si")>; 327 328def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>; 329def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS], 330 (instregex "ASRr", "RORS?r", "LSR", "LSL")>; 331 332def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>; 333def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>; 334def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>; 335def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>; 336 337def : InstRW<[R52WriteALU_EX2, R52Read_ISS], 338 (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>; 339 340def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>; 341 342def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>; 343def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>; 344 345// Integer Load, Multiple. 346foreach Lat = 3-25 in { 347 def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> { 348 let Latency = Lat; 349 } 350 def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> { 351 let Latency = Lat; 352 let NumMicroOps = 0; 353 } 354} 355foreach NAddr = 1-16 in { 356 def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>; 357} 358def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; } 359def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>; 360def R52WriteILDM : SchedWriteVariant<[ 361 SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>, 362 363 SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 364 R52WriteILDM6Cy]>, 365 SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 366 R52WriteILDM6Cy, R52WriteILDM7Cy]>, 367 368 SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 369 R52WriteILDM6Cy, R52WriteILDM7Cy, 370 R52WriteILDM8Cy]>, 371 SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 372 R52WriteILDM6Cy, R52WriteILDM7Cy, 373 R52WriteILDM8Cy, R52WriteILDM9Cy]>, 374 375 SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 376 R52WriteILDM6Cy, R52WriteILDM7Cy, 377 R52WriteILDM8Cy, R52WriteILDM9Cy, 378 R52WriteILDM10Cy]>, 379 SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 380 R52WriteILDM6Cy, R52WriteILDM7Cy, 381 R52WriteILDM8Cy, R52WriteILDM9Cy, 382 R52WriteILDM10Cy, R52WriteILDM11Cy]>, 383 384 SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 385 R52WriteILDM6Cy, R52WriteILDM7Cy, 386 R52WriteILDM8Cy, R52WriteILDM9Cy, 387 R52WriteILDM10Cy, R52WriteILDM11Cy, 388 R52WriteILDM12Cy]>, 389 SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 390 R52WriteILDM6Cy, R52WriteILDM7Cy, 391 R52WriteILDM8Cy, R52WriteILDM9Cy, 392 R52WriteILDM10Cy, R52WriteILDM11Cy, 393 R52WriteILDM12Cy, R52WriteILDM13Cy]>, 394 395 SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 396 R52WriteILDM6Cy, R52WriteILDM7Cy, 397 R52WriteILDM8Cy, R52WriteILDM9Cy, 398 R52WriteILDM10Cy, R52WriteILDM11Cy, 399 R52WriteILDM12Cy, R52WriteILDM13Cy, 400 R52WriteILDM14Cy]>, 401 SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 402 R52WriteILDM6Cy, R52WriteILDM7Cy, 403 R52WriteILDM8Cy, R52WriteILDM9Cy, 404 R52WriteILDM10Cy, R52WriteILDM11Cy, 405 R52WriteILDM12Cy, R52WriteILDM13Cy, 406 R52WriteILDM14Cy, R52WriteILDM15Cy]>, 407 408 SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 409 R52WriteILDM6Cy, R52WriteILDM7Cy, 410 R52WriteILDM8Cy, R52WriteILDM9Cy, 411 R52WriteILDM10Cy, R52WriteILDM11Cy, 412 R52WriteILDM12Cy, R52WriteILDM13Cy, 413 R52WriteILDM14Cy, R52WriteILDM15Cy, 414 R52WriteILDM16Cy]>, 415 SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 416 R52WriteILDM6Cy, R52WriteILDM7Cy, 417 R52WriteILDM8Cy, R52WriteILDM9Cy, 418 R52WriteILDM10Cy, R52WriteILDM11Cy, 419 R52WriteILDM12Cy, R52WriteILDM13Cy, 420 R52WriteILDM14Cy, R52WriteILDM15Cy, 421 R52WriteILDM16Cy, R52WriteILDM17Cy]>, 422 423 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 424 R52WriteILDM6Cy, R52WriteILDM7Cy, 425 R52WriteILDM8Cy, R52WriteILDM9Cy, 426 R52WriteILDM10Cy, R52WriteILDM11Cy, 427 R52WriteILDM12Cy, R52WriteILDM13Cy, 428 R52WriteILDM14Cy, R52WriteILDM15Cy, 429 R52WriteILDM16Cy, R52WriteILDM17Cy, 430 R52WriteILDM18Cy]>, 431 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 432 R52WriteILDM6Cy, R52WriteILDM7Cy, 433 R52WriteILDM8Cy, R52WriteILDM9Cy, 434 R52WriteILDM10Cy, R52WriteILDM11Cy, 435 R52WriteILDM12Cy, R52WriteILDM13Cy, 436 R52WriteILDM14Cy, R52WriteILDM15Cy, 437 R52WriteILDM16Cy, R52WriteILDM17Cy, 438 R52WriteILDM18Cy, R52WriteILDM19Cy]>, 439 440// Unknown number of registers, just use resources for two registers. 441 SchedVar<NoSchedPred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 442 R52WriteILDM6CyNo, R52WriteILDM7CyNo, 443 R52WriteILDM8CyNo, R52WriteILDM9CyNo, 444 R52WriteILDM10CyNo, R52WriteILDM11CyNo, 445 R52WriteILDM12CyNo, R52WriteILDM13CyNo, 446 R52WriteILDM14CyNo, R52WriteILDM15CyNo, 447 R52WriteILDM16CyNo, R52WriteILDM17CyNo, 448 R52WriteILDM18Cy, R52WriteILDM19Cy]> 449]> { let Variadic=1; } 450 451// Integer Store, Multiple 452def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> { 453 let Latency = 4; 454 let NumMicroOps = 2; 455} 456foreach NumAddr = 1-16 in { 457 def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>; 458} 459def R52WriteISTM : SchedWriteVariant<[ 460 SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>, 461 SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>, 462 SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>, 463 SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>, 464 SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>, 465 SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>, 466 SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>, 467 SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>, 468 SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>, 469 SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>, 470 SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>, 471 SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>, 472 SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>, 473 SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>, 474 SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>, 475 // Unknow number of registers, just use resources for two registers. 476 SchedVar<NoSchedPred, [R52WriteISTM2]> 477]>; 478 479def : InstRW<[R52WriteILDM, R52Read_ISS], 480 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$", 481 "(t|sys)LDM(IA|DA|DB|IB)$")>; 482def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS], 483 (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>; 484def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS], 485 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "tPOP")>; 486 487// Integer Store, Single Element 488def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2], 489 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS", 490 "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$", 491 "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>; 492 493def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2], 494 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)", 495 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)", 496 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)", 497 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>; 498 499// Integer Store, Dual 500def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2], 501 (instregex "STRD$", "t2STRDi8", "STL", "t2STL")>; 502def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2], 503 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>; 504 505def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2], 506 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>; 507def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2], 508 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD", 509 "tPUSH")>; 510 511// LDRLIT pseudo instructions, they expand to LDR + PICADD 512def : InstRW<[R52WriteLd], 513 (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel$")>; 514// LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR 515def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>; 516 517 518 519//===----------------------------------------------------------------------===// 520// VFP, Floating Point Support 521def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>; 522def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>; 523 524def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>; 525def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>; 526def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>; 527 528def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>; 529def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>; 530 531def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)$")>; 532def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>; 533 534def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>; 535def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>; 536 537 538//===----------------------------------------------------------------------===// 539// Neon Support 540 541// vector multiple load stores 542foreach NumAddr = 1-16 in { 543 def R52LMAddrPred#NumAddr : 544 SchedPredicate<"MI->getNumOperands() == "#NumAddr>; 545} 546foreach Lat = 1-32 in { 547 def R52WriteLM#Lat#Cy : SchedWriteRes<[]> { 548 let Latency = Lat; 549 } 550} 551foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue 552 def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> { 553 let Latency = 0; 554 let NumMicroOps = Num; 555 let ReleaseAtCycles = [Num]; 556 } 557} 558def R52WriteVLDM : SchedWriteVariant<[ 559 // 1 D reg 560 SchedVar<R52LMAddrPred1, [R52WriteLM5Cy, 561 R52ReserveLd5Cy]>, 562 SchedVar<R52LMAddrPred2, [R52WriteLM5Cy, 563 R52ReserveLd5Cy]>, 564 565 // 2 D reg 566 SchedVar<R52LMAddrPred3, [R52WriteLM5Cy, R52WriteLM6Cy, 567 R52ReserveLd6Cy]>, 568 SchedVar<R52LMAddrPred4, [R52WriteLM5Cy, R52WriteLM6Cy, 569 R52ReserveLd6Cy]>, 570 571 // 3 D reg 572 SchedVar<R52LMAddrPred5, [R52WriteLM5Cy, R52WriteLM6Cy, 573 R52WriteLM7Cy, 574 R52ReserveLd4Cy]>, 575 SchedVar<R52LMAddrPred6, [R52WriteLM5Cy, R52WriteLM6Cy, 576 R52WriteLM7Cy, 577 R52ReserveLd7Cy]>, 578 579 // 4 D reg 580 SchedVar<R52LMAddrPred7, [R52WriteLM5Cy, R52WriteLM6Cy, 581 R52WriteLM7Cy, R52WriteLM8Cy, 582 R52ReserveLd8Cy]>, 583 SchedVar<R52LMAddrPred8, [R52WriteLM5Cy, R52WriteLM6Cy, 584 R52WriteLM7Cy, R52WriteLM8Cy, 585 R52ReserveLd8Cy]>, 586 587 // 5 D reg 588 SchedVar<R52LMAddrPred9, [R52WriteLM5Cy, R52WriteLM6Cy, 589 R52WriteLM7Cy, R52WriteLM8Cy, 590 R52WriteLM9Cy, 591 R52ReserveLd9Cy]>, 592 SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy, 593 R52WriteLM7Cy, R52WriteLM8Cy, 594 R52WriteLM9Cy, 595 R52ReserveLd9Cy]>, 596 597 // 6 D reg 598 SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy, 599 R52WriteLM7Cy, R52WriteLM8Cy, 600 R52WriteLM9Cy, R52WriteLM10Cy, 601 R52ReserveLd10Cy]>, 602 SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy, 603 R52WriteLM7Cy, R52WriteLM8Cy, 604 R52WriteLM9Cy, R52WriteLM10Cy, 605 R52ReserveLd10Cy]>, 606 607 // 7 D reg 608 SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy, 609 R52WriteLM7Cy, R52WriteLM8Cy, 610 R52WriteLM9Cy, R52WriteLM10Cy, 611 R52WriteLM11Cy, 612 R52ReserveLd11Cy]>, 613 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy, 614 R52WriteLM7Cy, R52WriteLM8Cy, 615 R52WriteLM9Cy, R52WriteLM10Cy, 616 R52WriteLM11Cy, 617 R52ReserveLd11Cy]>, 618 619 // 8 D reg 620 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy, 621 R52WriteLM7Cy, R52WriteLM8Cy, 622 R52WriteLM9Cy, R52WriteLM10Cy, 623 R52WriteLM11Cy, R52WriteLM12Cy, 624 R52ReserveLd12Cy]>, 625 SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy, 626 R52WriteLM7Cy, R52WriteLM8Cy, 627 R52WriteLM9Cy, R52WriteLM10Cy, 628 R52WriteLM11Cy, R52WriteLM12Cy, 629 R52ReserveLd12Cy]>, 630 // unknown number of reg. 631 SchedVar<NoSchedPred, [R52WriteLM5Cy, R52WriteLM6Cy, 632 R52WriteLM7Cy, R52WriteLM8Cy, 633 R52WriteLM9Cy, R52WriteLM10Cy, 634 R52WriteLM11Cy, R52WriteLM12Cy, 635 R52ReserveLd5Cy]> 636]> { let Variadic=1;} 637 638// variable stores. Cannot dual-issue 639def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> { 640 let Latency = 5; 641 let NumMicroOps = 2; 642 let ReleaseAtCycles = [1]; 643} 644def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> { 645 let Latency = 6; 646 let NumMicroOps = 4; 647 let ReleaseAtCycles = [2]; 648} 649def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> { 650 let Latency = 7; 651 let NumMicroOps = 6; 652 let ReleaseAtCycles = [3]; 653} 654def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> { 655 let Latency = 8; 656 let NumMicroOps = 8; 657 let ReleaseAtCycles = [4]; 658} 659def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> { 660 let Latency = 9; 661 let NumMicroOps = 10; 662 let ReleaseAtCycles = [5]; 663} 664def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> { 665 let Latency = 10; 666 let NumMicroOps = 12; 667 let ReleaseAtCycles = [6]; 668} 669def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> { 670 let Latency = 11; 671 let NumMicroOps = 14; 672 let ReleaseAtCycles = [7]; 673} 674def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> { 675 let Latency = 12; 676 let NumMicroOps = 16; 677 let ReleaseAtCycles = [8]; 678} 679def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> { 680 let Latency = 13; 681 let NumMicroOps = 18; 682 let ReleaseAtCycles = [9]; 683} 684def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> { 685 let Latency = 14; 686 let NumMicroOps = 20; 687 let ReleaseAtCycles = [10]; 688} 689def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> { 690 let Latency = 15; 691 let NumMicroOps = 22; 692 let ReleaseAtCycles = [11]; 693} 694 695def R52WriteSTM : SchedWriteVariant<[ 696 SchedVar<R52LMAddrPred1, [R52WriteSTM5]>, 697 SchedVar<R52LMAddrPred2, [R52WriteSTM5]>, 698 SchedVar<R52LMAddrPred3, [R52WriteSTM6]>, 699 SchedVar<R52LMAddrPred4, [R52WriteSTM6]>, 700 SchedVar<R52LMAddrPred5, [R52WriteSTM7]>, 701 SchedVar<R52LMAddrPred6, [R52WriteSTM7]>, 702 SchedVar<R52LMAddrPred7, [R52WriteSTM8]>, 703 SchedVar<R52LMAddrPred8, [R52WriteSTM8]>, 704 SchedVar<R52LMAddrPred9, [R52WriteSTM9]>, 705 SchedVar<R52LMAddrPred10, [R52WriteSTM9]>, 706 SchedVar<R52LMAddrPred11, [R52WriteSTM10]>, 707 SchedVar<R52LMAddrPred12, [R52WriteSTM10]>, 708 SchedVar<R52LMAddrPred13, [R52WriteSTM11]>, 709 SchedVar<R52LMAddrPred14, [R52WriteSTM11]>, 710 SchedVar<R52LMAddrPred15, [R52WriteSTM12]>, 711 SchedVar<R52LMAddrPred16, [R52WriteSTM12]>, 712 // unknown number of registers, just use resources for two 713 SchedVar<NoSchedPred, [R52WriteSTM6]> 714]>; 715 716// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with 717// another instruction in slot-1, but only in the last issue. 718def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;} 719def : WriteRes<WriteVLD2, [R52UnitLd]> { 720 let Latency = 6; 721 let NumMicroOps = 3; 722 let ReleaseAtCycles = [2]; 723 let SingleIssue = 1; 724} 725def : WriteRes<WriteVLD3, [R52UnitLd]> { 726 let Latency = 7; 727 let NumMicroOps = 5; 728 let ReleaseAtCycles = [3]; 729 let SingleIssue = 1; 730} 731def : WriteRes<WriteVLD4, [R52UnitLd]> { 732 let Latency = 8; 733 let NumMicroOps = 7; 734 let ReleaseAtCycles = [4]; 735 let SingleIssue = 1; 736} 737def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> { 738 let Latency = 5; 739 let NumMicroOps = 1; 740 let ReleaseAtCycles = [1]; 741} 742def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> { 743 let Latency = 6; 744 let NumMicroOps = 3; 745 let ReleaseAtCycles = [2]; 746} 747def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> { 748 let Latency = 7; 749 let NumMicroOps = 5; 750 let ReleaseAtCycles = [3]; 751} 752def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> { 753 let Latency = 8; 754 let NumMicroOps = 7; 755 let ReleaseAtCycles = [4]; 756} 757def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> { 758 let Latency = 9; 759 let NumMicroOps = 9; 760 let ReleaseAtCycles = [5]; 761} 762 763 764def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>; 765def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>; 766def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>; 767 768def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>; 769def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>; 770def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>; 771 772def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>; 773 774def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], 775 (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>; 776def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], 777 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>; 778def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2], 779 (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>; 780 781def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], 782 (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>; 783 784def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>; 785def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>; 786 787def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>; 788def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>; 789 790def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)d")>; 791def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)q")>; 792 793def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], 794 (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>; 795def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], 796 (instregex "VCVT", "VSITO", "VUITO", "VTO")>; 797 798def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>; 799def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>; 800def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>; 801def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>; 802 803def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>; 804def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>; 805 806def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>; 807def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>; 808 809def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>; 810def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>; 811 812def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>; 813def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>; 814def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>; 815def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>; 816def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>; 817def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>; 818def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>; 819def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>; 820def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>; 821def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2], 822 (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>; 823def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2], 824 (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>; 825def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>; 826def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>; 827def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], 828 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>; 829def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>; 830def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>; 831 832//--- 833// VSTx. Vector Stores 834//--- 835// 1-element structure store 836def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>; 837def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>; 838def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>; 839def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>; 840def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>; 841def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>; 842 843def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>; 844def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>; 845def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>; 846 847def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>; 848def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>; 849def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>; 850def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>; 851def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>; 852def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>; 853 854def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>; 855def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>; 856def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>; 857 858// 2-element structure store 859def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>; 860def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>; 861def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>; 862 863def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>; 864def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>; 865def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>; 866def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>; 867def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>; 868def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>; 869 870def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>; 871def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>; 872def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>; 873 874def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>; 875def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>; 876def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>; 877def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>; 878def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>; 879def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>; 880 881// 3-element structure store 882def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>; 883def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>; 884def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>; 885 886def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>; 887def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>; 888def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>; 889def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>; 890def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>; 891def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>; 892 893def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>; 894def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>; 895def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; 896 897def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>; 898def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>; 899def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>; 900def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>; 901def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>; 902def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>; 903 904// 4-element structure store 905def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>; 906def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>; 907def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>; 908 909def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>; 910def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>; 911def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>; 912def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>; 913def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>; 914def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>; 915 916def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>; 917def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; 918def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; 919 920def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>; 921def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>; 922def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>; 923def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>; 924def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>; 925def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>; 926 927} // R52 SchedModel 928