1//=- AArch64SchedNeoverseN1.td - NeoverseN1 Scheduling Model -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Arm Neoverse N1 processors. 10// 11// References: 12// - "Arm Neoverse N1 Software Optimization Guide" 13// - https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_n1 14// 15//===----------------------------------------------------------------------===// 16 17def NeoverseN1Model : SchedMachineModel { 18 let IssueWidth = 8; // Maximum micro-ops dispatch rate. 19 let MicroOpBufferSize = 128; // NOTE: Copied from Cortex-A76. 20 let LoadLatency = 4; // Optimistic load latency. 21 let MispredictPenalty = 11; // Cycles cost of branch mispredicted. 22 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. 23 let CompleteModel = 1; 24 25 list<Predicate> UnsupportedFeatures = !listconcat(PAUnsupported.F, 26 SMEUnsupported.F, 27 SVEUnsupported.F, 28 [HasMTE, HasCSSC]); 29} 30 31//===----------------------------------------------------------------------===// 32// Define each kind of processor resource and number available on Neoverse N1. 33// Instructions are first fetched and then decoded into internal macro-ops 34// (MOPs). From there, the MOPs proceed through register renaming and dispatch 35// stages. A MOP can be split into one or more micro-ops further down the 36// pipeline, after the decode stage. Once dispatched, micro-ops wait for their 37// operands and issue out-of-order to one of the issue pipelines. Each issue 38// pipeline can accept one micro-op per cycle. 39 40let SchedModel = NeoverseN1Model in { 41 42// Define the issue ports. 43def N1UnitB : ProcResource<1>; // Branch 44def N1UnitS : ProcResource<2>; // Integer single cycle 0/1 45def N1UnitM : ProcResource<1>; // Integer multicycle 46def N1UnitL : ProcResource<2>; // Load/Store 0/1 47def N1UnitD : ProcResource<2>; // Store data 0/1 48def N1UnitV0 : ProcResource<1>; // FP/ASIMD 0 49def N1UnitV1 : ProcResource<1>; // FP/ASIMD 1 50 51def N1UnitI : ProcResGroup<[N1UnitS, N1UnitM]>; // Integer units 52def N1UnitV : ProcResGroup<[N1UnitV0, N1UnitV1]>; // FP/ASIMD units 53 54// Define commonly used read types. 55 56// No generic forwarding is provided for these types. 57def : ReadAdvance<ReadI, 0>; 58def : ReadAdvance<ReadISReg, 0>; 59def : ReadAdvance<ReadIEReg, 0>; 60def : ReadAdvance<ReadIM, 0>; 61def : ReadAdvance<ReadIMA, 0>; 62def : ReadAdvance<ReadID, 0>; 63def : ReadAdvance<ReadExtrHi, 0>; 64def : ReadAdvance<ReadAdrBase, 0>; 65def : ReadAdvance<ReadST, 0>; 66def : ReadAdvance<ReadVLD, 0>; 67 68def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 69def : WriteRes<WriteBarrier, []> { let Latency = 1; } 70def : WriteRes<WriteHint, []> { let Latency = 1; } 71 72 73//===----------------------------------------------------------------------===// 74// Define generic 0 micro-op types 75 76let Latency = 0, NumMicroOps = 0 in 77def N1Write_0c_0Z : SchedWriteRes<[]>; 78 79//===----------------------------------------------------------------------===// 80// Define generic 1 micro-op types 81 82def N1Write_1c_1B : SchedWriteRes<[N1UnitB]> { let Latency = 1; } 83def N1Write_1c_1I : SchedWriteRes<[N1UnitI]> { let Latency = 1; } 84def N1Write_2c_1M : SchedWriteRes<[N1UnitM]> { let Latency = 2; } 85def N1Write_3c_1M : SchedWriteRes<[N1UnitM]> { let Latency = 3; } 86def N1Write_4c3_1M : SchedWriteRes<[N1UnitM]> { let Latency = 4; 87 let ReleaseAtCycles = [3]; } 88def N1Write_5c3_1M : SchedWriteRes<[N1UnitM]> { let Latency = 5; 89 let ReleaseAtCycles = [3]; } 90def N1Write_12c5_1M : SchedWriteRes<[N1UnitM]> { let Latency = 12; 91 let ReleaseAtCycles = [5]; } 92def N1Write_20c5_1M : SchedWriteRes<[N1UnitM]> { let Latency = 20; 93 let ReleaseAtCycles = [5]; } 94def N1Write_4c_1L : SchedWriteRes<[N1UnitL]> { let Latency = 4; } 95def N1Write_5c_1L : SchedWriteRes<[N1UnitL]> { let Latency = 5; } 96def N1Write_7c_1L : SchedWriteRes<[N1UnitL]> { let Latency = 7; } 97def N1Write_2c_1V : SchedWriteRes<[N1UnitV]> { let Latency = 2; } 98def N1Write_3c_1V : SchedWriteRes<[N1UnitV]> { let Latency = 3; } 99def N1Write_4c_1V : SchedWriteRes<[N1UnitV]> { let Latency = 4; } 100def N1Write_5c_1V : SchedWriteRes<[N1UnitV]> { let Latency = 5; } 101def N1Write_2c_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 2; } 102def N1Write_3c_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 3; } 103def N1Write_4c_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 4; } 104def N1Write_7c7_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 7; 105 let ReleaseAtCycles = [7]; } 106def N1Write_10c7_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 10; 107 let ReleaseAtCycles = [7]; } 108def N1Write_13c10_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 13; 109 let ReleaseAtCycles = [10]; } 110def N1Write_15c7_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 15; 111 let ReleaseAtCycles = [7]; } 112def N1Write_17c7_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 17; 113 let ReleaseAtCycles = [7]; } 114def N1Write_2c_1V1 : SchedWriteRes<[N1UnitV1]> { let Latency = 2; } 115def N1Write_3c_1V1 : SchedWriteRes<[N1UnitV1]> { let Latency = 3; } 116def N1Write_4c_1V1 : SchedWriteRes<[N1UnitV1]> { let Latency = 4; } 117 118//===----------------------------------------------------------------------===// 119// Define generic 2 micro-op types 120 121let Latency = 1, NumMicroOps = 2 in 122def N1Write_1c_1B_1I : SchedWriteRes<[N1UnitB, N1UnitI]>; 123let Latency = 3, NumMicroOps = 2 in 124def N1Write_3c_1I_1M : SchedWriteRes<[N1UnitI, N1UnitM]>; 125let Latency = 2, NumMicroOps = 2 in 126def N1Write_2c_1I_1L : SchedWriteRes<[N1UnitI, N1UnitL]>; 127let Latency = 5, NumMicroOps = 2 in 128def N1Write_5c_1I_1L : SchedWriteRes<[N1UnitI, N1UnitL]>; 129let Latency = 6, NumMicroOps = 2 in 130def N1Write_6c_1I_1L : SchedWriteRes<[N1UnitI, N1UnitL]>; 131let Latency = 7, NumMicroOps = 2 in 132def N1Write_7c_1I_1L : SchedWriteRes<[N1UnitI, N1UnitL]>; 133let Latency = 5, NumMicroOps = 2 in 134def N1Write_5c_1M_1V : SchedWriteRes<[N1UnitM, N1UnitV]>; 135let Latency = 6, NumMicroOps = 2 in 136def N1Write_6c_1M_1V0 : SchedWriteRes<[N1UnitM, N1UnitV0]>; 137let Latency = 5, NumMicroOps = 2 in 138def N1Write_5c_2L : SchedWriteRes<[N1UnitL, N1UnitL]>; 139let Latency = 1, NumMicroOps = 2 in 140def N1Write_1c_1L_1D : SchedWriteRes<[N1UnitL, N1UnitD]>; 141let Latency = 2, NumMicroOps = 2 in 142def N1Write_2c_1L_1V : SchedWriteRes<[N1UnitL, N1UnitV]>; 143let Latency = 4, NumMicroOps = 2 in 144def N1Write_4c_1L_1V : SchedWriteRes<[N1UnitL, N1UnitV]>; 145let Latency = 7, NumMicroOps = 2 in 146def N1Write_7c_1L_1V : SchedWriteRes<[N1UnitL, N1UnitV]>; 147let Latency = 4, NumMicroOps = 2 in 148def N1Write_4c_1V0_1V1 : SchedWriteRes<[N1UnitV0, N1UnitV1]>; 149let Latency = 4, NumMicroOps = 2 in 150def N1Write_4c_2V0 : SchedWriteRes<[N1UnitV0, N1UnitV0]>; 151let Latency = 5, NumMicroOps = 2 in 152def N1Write_5c_2V0 : SchedWriteRes<[N1UnitV0, N1UnitV0]>; 153let Latency = 6, NumMicroOps = 2 in 154def N1Write_6c_2V1 : SchedWriteRes<[N1UnitV1, N1UnitV1]>; 155let Latency = 5, NumMicroOps = 2 in 156def N1Write_5c_1V1_1V : SchedWriteRes<[N1UnitV1, N1UnitV]>; 157 158//===----------------------------------------------------------------------===// 159// Define generic 3 micro-op types 160 161let Latency = 7, NumMicroOps = 3 in 162def N1Write_2c_1I_1L_1V : SchedWriteRes<[N1UnitI, N1UnitL, N1UnitV]>; 163let Latency = 1, NumMicroOps = 3 in 164def N1Write_1c_2L_1D : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitD]>; 165let Latency = 2, NumMicroOps = 3 in 166def N1Write_2c_1L_2V : SchedWriteRes<[N1UnitL, N1UnitV, N1UnitV]>; 167let Latency = 6, NumMicroOps = 3 in 168def N1Write_6c_3L : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL]>; 169let Latency = 4, NumMicroOps = 3 in 170def N1Write_4c_3V : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>; 171let Latency = 6, NumMicroOps = 3 in 172def N1Write_6c_3V : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>; 173let Latency = 8, NumMicroOps = 3 in 174def N1Write_8c_3V : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>; 175 176//===----------------------------------------------------------------------===// 177// Define generic 4 micro-op types 178 179let Latency = 2, NumMicroOps = 4 in 180def N1Write_2c_2I_2L : SchedWriteRes<[N1UnitI, N1UnitI, N1UnitL, N1UnitL]>; 181let Latency = 6, NumMicroOps = 4 in 182def N1Write_6c_4L : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL]>; 183let Latency = 2, NumMicroOps = 4 in 184def N1Write_2c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>; 185let Latency = 2, NumMicroOps = 4 in 186def N1Write_3c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>; 187let Latency = 5, NumMicroOps = 4 in 188def N1Write_5c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>; 189let Latency = 7, NumMicroOps = 4 in 190def N1Write_7c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>; 191let Latency = 4, NumMicroOps = 4 in 192def N1Write_4c_4V : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV, N1UnitV]>; 193let Latency = 6, NumMicroOps = 4 in 194def N1Write_6c_4V0 : SchedWriteRes<[N1UnitV0, N1UnitV0, N1UnitV0, N1UnitV0]>; 195 196//===----------------------------------------------------------------------===// 197// Define generic 5 micro-op types 198 199let Latency = 3, NumMicroOps = 5 in 200def N1Write_3c_2L_3V : SchedWriteRes<[N1UnitL, N1UnitL, 201 N1UnitV, N1UnitV, N1UnitV]>; 202let Latency = 7, NumMicroOps = 5 in 203def N1Write_7c_2L_3V : SchedWriteRes<[N1UnitL, N1UnitL, 204 N1UnitV, N1UnitV, N1UnitV]>; 205let Latency = 6, NumMicroOps = 5 in 206def N1Write_6c_5V : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV, N1UnitV, N1UnitV]>; 207 208//===----------------------------------------------------------------------===// 209// Define generic 6 micro-op types 210 211let Latency = 3, NumMicroOps = 6 in 212def N1Write_3c_4L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL, 213 N1UnitV, N1UnitV]>; 214let Latency = 4, NumMicroOps = 6 in 215def N1Write_4c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, 216 N1UnitV, N1UnitV, N1UnitV]>; 217let Latency = 5, NumMicroOps = 6 in 218def N1Write_5c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, 219 N1UnitV, N1UnitV, N1UnitV]>; 220let Latency = 6, NumMicroOps = 6 in 221def N1Write_6c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, 222 N1UnitV, N1UnitV, N1UnitV]>; 223let Latency = 7, NumMicroOps = 6 in 224def N1Write_7c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, 225 N1UnitV, N1UnitV, N1UnitV]>; 226let Latency = 8, NumMicroOps = 6 in 227def N1Write_8c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, 228 N1UnitV, N1UnitV, N1UnitV]>; 229 230//===----------------------------------------------------------------------===// 231// Define generic 7 micro-op types 232 233let Latency = 8, NumMicroOps = 7 in 234def N1Write_8c_3L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, 235 N1UnitV, N1UnitV, N1UnitV, N1UnitV]>; 236 237//===----------------------------------------------------------------------===// 238// Define generic 8 micro-op types 239 240let Latency = 5, NumMicroOps = 8 in 241def N1Write_5c_4L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL, 242 N1UnitV, N1UnitV, N1UnitV, N1UnitV]>; 243let Latency = 6, NumMicroOps = 8 in 244def N1Write_6c_4L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL, 245 N1UnitV, N1UnitV, N1UnitV, N1UnitV]>; 246let Latency = 8, NumMicroOps = 8 in 247def N1Write_8c_4L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL, 248 N1UnitV, N1UnitV, N1UnitV, N1UnitV]>; 249let Latency = 10, NumMicroOps = 8 in 250def N1Write_10c_4L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL, 251 N1UnitV, N1UnitV, N1UnitV, N1UnitV]>; 252 253//===----------------------------------------------------------------------===// 254// Define generic 12 micro-op types 255 256let Latency = 9, NumMicroOps = 12 in 257def N1Write_9c_6L_6V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, 258 N1UnitL, N1UnitL, N1UnitL, 259 N1UnitV, N1UnitV, N1UnitV, 260 N1UnitV, N1UnitV, N1UnitV]>; 261 262 263// Miscellaneous Instructions 264// ----------------------------------------------------------------------------- 265 266def : InstRW<[WriteI], (instrs COPY)>; 267 268// Convert floating-point condition flags 269// Flag manipulation instructions 270def : WriteRes<WriteSys, []> { let Latency = 1; } 271 272 273// Branch Instructions 274// ----------------------------------------------------------------------------- 275 276// Branch, immed 277// Compare and branch 278def : SchedAlias<WriteBr, N1Write_1c_1B>; 279 280// Branch, register 281def : SchedAlias<WriteBrReg, N1Write_1c_1B>; 282 283// Branch and link, immed 284// Branch and link, register 285def : InstRW<[N1Write_1c_1B_1I], (instrs BL, BLR)>; 286 287// Compare and branch 288def : InstRW<[N1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>; 289 290 291// Arithmetic and Logical Instructions 292// ----------------------------------------------------------------------------- 293 294// ALU, basic 295// ALU, basic, flagset 296// Conditional compare 297// Conditional select 298// Logical, basic 299// Address generation 300// Count leading 301// Reverse bits/bytes 302// Move immediate 303def : SchedAlias<WriteI, N1Write_1c_1I>; 304 305// ALU, extend and shift 306def : SchedAlias<WriteIEReg, N1Write_2c_1M>; 307 308// Arithmetic, LSL shift, shift <= 4 309// Arithmetic, flagset, LSL shift, shift <= 4 310// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 311def N1WriteISReg : SchedWriteVariant<[ 312 SchedVar<IsCheapLSL, [N1Write_1c_1I]>, 313 SchedVar<NoSchedPred, [N1Write_2c_1M]>]>; 314def : SchedAlias<WriteISReg, N1WriteISReg>; 315 316// Logical, shift, no flagset 317def : InstRW<[N1Write_1c_1I], 318 (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>; 319 320// Logical, shift, flagset 321def : InstRW<[N1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>; 322 323 324// Divide and multiply instructions 325// ----------------------------------------------------------------------------- 326 327// Divide 328def : SchedAlias<WriteID32, N1Write_12c5_1M>; 329def : SchedAlias<WriteID64, N1Write_20c5_1M>; 330 331// Multiply accumulate 332// Multiply accumulate, long 333def : SchedAlias<WriteIM32, N1Write_2c_1M>; 334def : SchedAlias<WriteIM64, N1Write_4c3_1M>; 335 336// Multiply high 337def : InstRW<[N1Write_5c3_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>; 338 339 340// Miscellaneous data-processing instructions 341// ----------------------------------------------------------------------------- 342 343// Bitfield extract, one reg 344// Bitfield extract, two regs 345def N1WriteExtr : SchedWriteVariant<[ 346 SchedVar<IsRORImmIdiomPred, [N1Write_1c_1I]>, 347 SchedVar<NoSchedPred, [N1Write_3c_1I_1M]>]>; 348def : SchedAlias<WriteExtr, N1WriteExtr>; 349 350// Bitfield move, basic 351// Variable shift 352def : SchedAlias<WriteIS, N1Write_1c_1I>; 353 354// Bitfield move, insert 355def : InstRW<[N1Write_2c_1M], (instregex "^BFM[WX]ri$")>; 356 357// Move immediate 358def : SchedAlias<WriteImm, N1Write_1c_1I>; 359 360// Load instructions 361// ----------------------------------------------------------------------------- 362 363// Load register, immed offset 364def : SchedAlias<WriteLD, N1Write_4c_1L>; 365 366// Load register, immed offset, index 367def : SchedAlias<WriteLDIdx, N1Write_4c_1L>; 368def : SchedAlias<WriteAdr, N1Write_1c_1I>; 369 370// Load pair, immed offset 371def : SchedAlias<WriteLDHi, N1Write_4c_1L>; 372 373// Load pair, immed offset, W-form 374def : InstRW<[N1Write_4c_1L, N1Write_0c_0Z], (instrs LDPWi, LDNPWi)>; 375 376// Load pair, signed immed offset, signed words 377def : InstRW<[N1Write_5c_1I_1L, N1Write_0c_0Z], (instrs LDPSWi)>; 378 379// Load pair, immed post or pre-index, signed words 380def : InstRW<[WriteAdr, N1Write_5c_1I_1L, N1Write_0c_0Z], 381 (instrs LDPSWpost, LDPSWpre)>; 382 383 384// Store instructions 385// ----------------------------------------------------------------------------- 386 387// Store register, immed offset 388def : SchedAlias<WriteST, N1Write_1c_1L_1D>; 389 390// Store register, immed offset, index 391def : SchedAlias<WriteSTIdx, N1Write_1c_1L_1D>; 392 393// Store pair, immed offset 394def : SchedAlias<WriteSTP, N1Write_1c_2L_1D>; 395 396// Store pair, immed offset, W-form 397def : InstRW<[N1Write_1c_1L_1D], (instrs STPWi)>; 398 399 400// FP data processing instructions 401// ----------------------------------------------------------------------------- 402 403// FP absolute value 404// FP arithmetic 405// FP min/max 406// FP negate 407// FP select 408def : SchedAlias<WriteF, N1Write_2c_1V>; 409 410// FP compare 411def : SchedAlias<WriteFCmp, N1Write_2c_1V0>; 412 413// FP divide 414// FP square root 415def : SchedAlias<WriteFDiv, N1Write_10c7_1V0>; 416 417// FP divide, H-form 418// FP square root, H-form 419def : InstRW<[N1Write_7c7_1V0], (instrs FDIVHrr, FSQRTHr)>; 420 421// FP divide, S-form 422// FP square root, S-form 423def : InstRW<[N1Write_10c7_1V0], (instrs FDIVSrr, FSQRTSr)>; 424 425// FP divide, D-form 426def : InstRW<[N1Write_15c7_1V0], (instrs FDIVDrr)>; 427 428// FP square root, D-form 429def : InstRW<[N1Write_17c7_1V0], (instrs FSQRTDr)>; 430 431// FP multiply 432def : SchedAlias<WriteFMul, N1Write_3c_1V>; 433 434// FP multiply accumulate 435def : InstRW<[N1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; 436 437// FP round to integral 438def : InstRW<[N1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$", 439 "^FRINT(32|64)[XZ][SD]r$")>; 440 441 442// FP miscellaneous instructions 443// ----------------------------------------------------------------------------- 444 445// FP convert, from vec to vec reg 446// FP convert, Javascript from vec to gen reg 447def : SchedAlias<WriteFCvt, N1Write_3c_1V>; 448 449// FP convert, from gen to vec reg 450def : InstRW<[N1Write_6c_1M_1V0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; 451 452// FP convert, from vec to gen reg 453def : InstRW<[N1Write_4c_1V0_1V1], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>; 454 455// FP move, immed 456def : SchedAlias<WriteFImm, N1Write_2c_1V>; 457 458// FP move, register 459def : InstRW<[N1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>; 460 461// FP transfer, from gen to low half of vec reg 462// FP transfer, from gen to high half of vec reg 463def : InstRW<[N1Write_3c_1M], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr, 464 FMOVXDHighr)>; 465 466// FP transfer, from vec to gen reg 467def : SchedAlias<WriteFCopy, N1Write_2c_1V1>; 468 469 470// FP load instructions 471// ----------------------------------------------------------------------------- 472 473// Load vector reg, literal, S/D/Q forms 474// Load vector reg, unscaled immed 475def : InstRW<[N1Write_5c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$", 476 "^LDUR[BHSDQ]i$")>; 477 478// Load vector reg, immed post-index 479// Load vector reg, immed pre-index 480def : InstRW<[WriteAdr, N1Write_5c_1L], 481 (instregex "^LDR[BHSDQ](post|pre)$")>; 482 483// Load vector reg, unsigned immed 484def : InstRW<[N1Write_5c_1I_1L], (instregex "^LDR[BHSDQ]ui$")>; 485 486// Load vector reg, register offset, basic 487// Load vector reg, register offset, scale, S/D-form 488// Load vector reg, register offset, extend 489// Load vector reg, register offset, extend, scale, S/D-form 490def : InstRW<[N1Write_5c_1I_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>; 491 492// Load vector reg, register offset, scale, H/Q-form 493// Load vector reg, register offset, extend, scale, H/Q-form 494def : InstRW<[N1Write_6c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>; 495 496// Load vector pair, immed offset, S/D-form 497def : InstRW<[N1Write_5c_1I_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>; 498 499// Load vector pair, immed offset, H/Q-form 500def : InstRW<[N1Write_7c_1I_1L, WriteLDHi], (instregex "^LDPN?[HQ]i$")>; 501 502// Load vector pair, immed post-index, S/D-form 503// Load vector pair, immed pre-index, S/D-form 504def : InstRW<[WriteAdr, N1Write_5c_1L, WriteLDHi], 505 (instregex "^LDP[SD](pre|post)$")>; 506 507// Load vector pair, immed post-index, Q-form 508// Load vector pair, immed pre-index, Q-form 509def : InstRW<[WriteAdr, N1Write_7c_1L, WriteLDHi], 510 (instrs LDPQpost, LDPQpre)>; 511 512 513// FP store instructions 514// ----------------------------------------------------------------------------- 515 516// Store vector reg, unscaled immed, B/H/S/D-form 517def : InstRW<[N1Write_2c_1I_1L], (instregex "^STUR[BHSD]i$")>; 518 519// Store vector reg, unscaled immed, Q-form 520def : InstRW<[N1Write_2c_2I_2L], (instrs STURQi)>; 521 522// Store vector reg, immed post-index, B/H/S/D-form 523// Store vector reg, immed pre-index, B/H/S/D-form 524def : InstRW<[WriteAdr, N1Write_2c_1L_1V], (instregex "^STR[BHSD](pre|post)$")>; 525 526// Store vector reg, immed pre-index, Q-form 527// Store vector reg, immed post-index, Q-form 528def : InstRW<[WriteAdr, N1Write_2c_2L_2V], (instrs STRQpre, STRQpost)>; 529 530// Store vector reg, unsigned immed, B/H/S/D-form 531def : InstRW<[N1Write_2c_1L_1V], (instregex "^STR[BHSD]ui$")>; 532 533// Store vector reg, unsigned immed, Q-form 534def : InstRW<[N1Write_2c_2L_2V], (instrs STRQui)>; 535 536// Store vector reg, register offset, basic, B/S/D-form 537// Store vector reg, register offset, scale, B/S/D-form 538// Store vector reg, register offset, extend, B/S/D-form 539// Store vector reg, register offset, extend, scale, B/S/D-form 540def : InstRW<[N1Write_2c_1L_1V, ReadAdrBase], (instregex "^STR[BSD]ro[WX]$")>; 541 542// Store vector reg, register offset, basic, H-form 543// Store vector reg, register offset, scale, H-form 544// Store vector reg, register offset, extend, H-form 545// Store vector reg, register offset, extend, scale, H-form 546def : InstRW<[N1Write_2c_1I_1L_1V, ReadAdrBase], (instregex "^STRHro[WX]$")>; 547 548// Store vector reg, register offset, basic, Q-form 549// Store vector reg, register offset, scale, Q-form 550// Store vector reg, register offset, extend, Q-form 551// Store vector reg, register offset, extend, scale, Q-form 552def : InstRW<[N1Write_2c_2L_2V, ReadAdrBase], (instregex "^STRQro[WX]$")>; 553 554// Store vector pair, immed offset, S-form 555def : InstRW<[N1Write_2c_1L_1V], (instrs STPSi, STNPSi)>; 556 557// Store vector pair, immed offset, D-form 558def : InstRW<[N1Write_2c_2L_2V], (instrs STPDi, STNPDi)>; 559 560// Store vector pair, immed offset, Q-form 561def : InstRW<[N1Write_3c_4L_2V], (instrs STPQi, STNPQi)>; 562 563// Store vector pair, immed post-index, S-form 564// Store vector pair, immed pre-index, S-form 565def : InstRW<[WriteAdr, N1Write_2c_1L_1V], (instrs STPSpre, STPSpost)>; 566 567// Store vector pair, immed post-index, D-form 568// Store vector pair, immed pre-index, D-form 569def : InstRW<[WriteAdr, N1Write_2c_2L_2V], (instrs STPDpre, STPDpost)>; 570 571// Store vector pair, immed post-index, Q-form 572// Store vector pair, immed pre-index, Q-form 573def : InstRW<[WriteAdr, N1Write_3c_4L_2V], (instrs STPQpre, STPQpost)>; 574 575 576// ASIMD integer instructions 577// ----------------------------------------------------------------------------- 578 579// ASIMD absolute diff 580// ASIMD absolute diff long 581// ASIMD arith, basic 582// ASIMD arith, complex 583// ASIMD arith, pair-wise 584// ASIMD compare 585// ASIMD logical 586// ASIMD max/min, basic and pair-wise 587def : SchedAlias<WriteVd, N1Write_2c_1V>; 588def : SchedAlias<WriteVq, N1Write_2c_1V>; 589 590// ASIMD absolute diff accum 591// ASIMD absolute diff accum long 592def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]ABAL?v")>; 593 594// ASIMD arith, reduce, 4H/4S 595def : InstRW<[N1Write_3c_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>; 596 597// ASIMD arith, reduce, 8B/8H 598def : InstRW<[N1Write_5c_1V1_1V], (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>; 599 600// ASIMD arith, reduce, 16B 601def : InstRW<[N1Write_6c_2V1], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>; 602 603// ASIMD max/min, reduce, 4H/4S 604def : InstRW<[N1Write_3c_1V1], (instregex "^[SU](MAX|MIN)Vv4(i16|i32)v$")>; 605 606// ASIMD max/min, reduce, 8B/8H 607def : InstRW<[N1Write_5c_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8(i8|i16)v$")>; 608 609// ASIMD max/min, reduce, 16B 610def : InstRW<[N1Write_6c_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>; 611 612// ASIMD multiply, D-form 613// ASIMD multiply accumulate, D-form 614// ASIMD multiply accumulate high, D-form 615// ASIMD multiply accumulate saturating long 616// ASIMD multiply long 617// ASIMD multiply accumulate long 618def : InstRW<[N1Write_4c_1V0], (instregex "^MUL(v[14]i16|v[12]i32)$", 619 "^ML[AS](v[14]i16|v[12]i32)$", 620 "^SQ(R)?DMULH(v[14]i16|v[12]i32)$", 621 "^SQRDML[AS]H(v[14]i16|v[12]i32)$", 622 "^SQDML[AS]Lv", 623 "^([SU]|SQD)MULLv", 624 "^[SU]ML[AS]Lv")>; 625 626// ASIMD multiply, Q-form 627// ASIMD multiply accumulate, Q-form 628// ASIMD multiply accumulate high, Q-form 629def : InstRW<[N1Write_5c_2V0], (instregex "^MUL(v8i16|v4i32)$", 630 "^ML[AS](v8i16|v4i32)$", 631 "^SQ(R)?DMULH(v8i16|v4i32)$", 632 "^SQRDML[AS]H(v8i16|v4i32)$")>; 633 634// ASIMD multiply/multiply long (8x8) polynomial, D-form 635def : InstRW<[N1Write_3c_1V0], (instrs PMULv8i8, PMULLv8i8)>; 636 637// ASIMD multiply/multiply long (8x8) polynomial, Q-form 638def : InstRW<[N1Write_4c_2V0], (instrs PMULv16i8, PMULLv16i8)>; 639 640// ASIMD pairwise add and accumulate long 641def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]ADALPv")>; 642 643// ASIMD shift accumulate 644def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]R?SRAv")>; 645 646// ASIMD shift by immed, basic 647// ASIMD shift by immed and insert, basic 648// ASIMD shift by register, basic 649def : InstRW<[N1Write_2c_1V1], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv", 650 "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>; 651 652// ASIMD shift by immed, complex 653// ASIMD shift by register, complex 654def : InstRW<[N1Write_4c_1V1], 655 (instregex "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$", 656 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", 657 "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv", 658 "^[SU]Q?RSHLv", "^[SU]QSHLv")>; 659 660 661// ASIMD FP instructions 662// ----------------------------------------------------------------------------- 663 664// ASIMD FP absolute value/difference 665// ASIMD FP arith, normal 666// ASIMD FP compare 667// ASIMD FP max/min, normal 668// ASIMD FP max/min, pairwise 669// ASIMD FP negate 670// Covered by "SchedAlias (WriteV[dq]...)" above 671 672// ASIMD FP convert, long (F16 to F32) 673def : InstRW<[N1Write_4c_2V0], (instregex "^FCVTL(v4|v8)i16$")>; 674 675// ASIMD FP convert, long (F32 to F64) 676def : InstRW<[N1Write_3c_1V0], (instregex "^FCVTL(v2|v4)i32$")>; 677 678// ASIMD FP convert, narrow (F32 to F16) 679def : InstRW<[N1Write_4c_2V0], (instregex "^FCVTN(v4|v8)i16$")>; 680 681// ASIMD FP convert, narrow (F64 to F32) 682def : InstRW<[N1Write_3c_1V0], (instregex "^FCVTN(v2|v4)i32$", 683 "^FCVTXN(v2|v4)f32$")>; 684 685// ASIMD FP convert, other, D-form F32 and Q-form F64 686def : InstRW<[N1Write_3c_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$", 687 "^[SU]CVTFv2f(32|64)$")>; 688 689// ASIMD FP convert, other, D-form F16 and Q-form F32 690def : InstRW<[N1Write_4c_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$", 691 "^[SU]CVTFv4f(16|32)$")>; 692 693// ASIMD FP convert, other, Q-form F16 694def : InstRW<[N1Write_6c_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$", 695 "^[SU]CVTFv8f16$")>; 696 697// ASIMD FP divide, D-form, F16 698// ASIMD FP square root, D-form, F16 699def : InstRW<[N1Write_7c7_1V0], (instrs FDIVv4f16, FSQRTv4f16)>; 700 701// ASIMD FP divide, D-form, F32 702// ASIMD FP square root, D-form, F32 703def : InstRW<[N1Write_10c7_1V0], (instrs FDIVv2f32, FSQRTv2f32)>; 704 705// ASIMD FP divide, Q-form, F16 706// ASIMD FP square root, Q-form, F16 707def : InstRW<[N1Write_13c10_1V0], (instrs FDIVv8f16, FSQRTv8f16)>; 708 709// ASIMD FP divide, Q-form, F32 710// ASIMD FP square root, Q-form, F32 711def : InstRW<[N1Write_10c7_1V0], (instrs FDIVv4f32, FSQRTv4f32)>; 712 713// ASIMD FP divide, Q-form, F64 714def : InstRW<[N1Write_15c7_1V0], (instrs FDIVv2f64)>; 715 716// ASIMD FP square root, Q-form, F64 717def : InstRW<[N1Write_17c7_1V0], (instrs FSQRTv2f64)>; 718 719// ASIMD FP max/min, reduce, F32 and D-form F16 720def : InstRW<[N1Write_5c_1V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>; 721 722// ASIMD FP max/min, reduce, Q-form F16 723def : InstRW<[N1Write_8c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>; 724 725// ASIMD FP multiply 726def : InstRW<[N1Write_3c_1V], (instregex "^FMULX?v")>; 727 728// ASIMD FP multiply accumulate 729def : InstRW<[N1Write_4c_1V], (instregex "^FML[AS]v")>; 730 731// ASIMD FP multiply accumulate long 732def : InstRW<[N1Write_5c_1V], (instregex "^FML[AS]L2?v")>; 733 734// ASIMD FP round, D-form F32 and Q-form F64 735def : InstRW<[N1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>; 736 737// ASIMD FP round, D-form F16 and Q-form F32 738def : InstRW<[N1Write_4c_2V0], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>; 739 740// ASIMD FP round, Q-form F16 741def : InstRW<[N1Write_6c_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>; 742 743 744// ASIMD miscellaneous instructions 745// ----------------------------------------------------------------------------- 746 747// ASIMD bit reverse 748// ASIMD bitwise insert 749// ASIMD count 750// ASIMD duplicate, element 751// ASIMD extract 752// ASIMD extract narrow 753// ASIMD insert, element to element 754// ASIMD move, FP immed 755// ASIMD move, integer immed 756// ASIMD reverse 757// ASIMD table lookup, 1 or 2 table regs 758// ASIMD table lookup extension, 1 table reg 759// ASIMD transfer, element to gen reg 760// ASIMD transpose 761// ASIMD unzip/zip 762// Covered by "SchedAlias (WriteV[dq]...)" above 763 764// ASIMD duplicate, gen reg 765def : InstRW<[N1Write_3c_1M], 766 (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>; 767 768// ASIMD extract narrow, saturating 769def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>; 770 771// ASIMD reciprocal and square root estimate, D-form F32 and F64 772def : InstRW<[N1Write_3c_1V0], (instrs FRECPEv1i32, FRECPEv2f32, FRECPEv1i64, 773 FRECPXv1i32, FRECPXv1i64, 774 URECPEv2i32, 775 FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64, 776 URSQRTEv2i32)>; 777 778// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 779def : InstRW<[N1Write_4c_2V0], (instrs FRECPEv1f16, FRECPEv4f16, FRECPEv4f32, 780 FRECPXv1f16, 781 URECPEv4i32, 782 FRSQRTEv1f16, FRSQRTEv4f16, FRSQRTEv4f32, 783 URSQRTEv4i32)>; 784 785// ASIMD reciprocal and square root estimate, Q-form F16 786def : InstRW<[N1Write_6c_4V0], (instrs FRECPEv8f16, 787 FRSQRTEv8f16)>; 788 789// ASIMD reciprocal step 790def : InstRW<[N1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv", 791 "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>; 792 793// ASIMD table lookup, 3 table regs 794// ASIMD table lookup extension, 2 table reg 795def : InstRW<[N1Write_4c_4V], (instrs TBLv8i8Three, TBLv16i8Three, 796 TBXv8i8Two, TBXv16i8Two)>; 797 798// ASIMD table lookup, 4 table regs 799def : InstRW<[N1Write_4c_3V], (instrs TBLv8i8Four, TBLv16i8Four)>; 800 801// ASIMD table lookup extension, 3 table reg 802def : InstRW<[N1Write_6c_3V], (instrs TBXv8i8Three, TBXv16i8Three)>; 803 804// ASIMD table lookup extension, 4 table reg 805def : InstRW<[N1Write_6c_5V], (instrs TBXv8i8Four, TBXv16i8Four)>; 806 807// ASIMD transfer, element to gen reg 808def : InstRW<[N1Write_2c_1V1], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$", 809 "^UMOVvi(8|16|32|64)$")>; 810 811// ASIMD transfer, gen reg to element 812def : InstRW<[N1Write_5c_1M_1V], (instregex "^INSvi(8|16|32|64)gpr$")>; 813 814 815// ASIMD load instructions 816// ----------------------------------------------------------------------------- 817 818// ASIMD load, 1 element, multiple, 1 reg 819def : InstRW<[N1Write_5c_1L], 820 (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 821def : InstRW<[WriteAdr, N1Write_5c_1L], 822 (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 823 824// ASIMD load, 1 element, multiple, 2 reg 825def : InstRW<[N1Write_5c_2L], 826 (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 827def : InstRW<[WriteAdr, N1Write_5c_2L], 828 (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 829 830// ASIMD load, 1 element, multiple, 3 reg 831def : InstRW<[N1Write_6c_3L], 832 (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 833def : InstRW<[WriteAdr, N1Write_6c_3L], 834 (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 835 836// ASIMD load, 1 element, multiple, 4 reg 837def : InstRW<[N1Write_6c_4L], 838 (instregex "^LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 839def : InstRW<[WriteAdr, N1Write_6c_4L], 840 (instregex "^LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 841 842// ASIMD load, 1 element, one lane 843// ASIMD load, 1 element, all lanes 844def : InstRW<[N1Write_7c_1L_1V], 845 (instregex "LD1(i|Rv)(8|16|32|64)$", 846 "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 847def : InstRW<[WriteAdr, N1Write_7c_1L_1V], 848 (instregex "LD1i(8|16|32|64)_POST$", 849 "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 850 851// ASIMD load, 2 element, multiple 852// ASIMD load, 2 element, one lane 853// ASIMD load, 2 element, all lanes 854def : InstRW<[N1Write_7c_2L_2V], 855 (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)$", 856 "LD2i(8|16|32|64)$", 857 "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 858def : InstRW<[WriteAdr, N1Write_7c_2L_2V], 859 (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)_POST$", 860 "LD2i(8|16|32|64)_POST$", 861 "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 862 863// ASIMD load, 3 element, multiple 864def : InstRW<[N1Write_8c_3L_3V], 865 (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)$")>; 866def : InstRW<[WriteAdr, N1Write_8c_3L_3V], 867 (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)_POST$")>; 868 869// ASIMD load, 3 element, one lane 870// ASIMD load, 3 element, all lanes 871def : InstRW<[N1Write_7c_2L_3V], 872 (instregex "LD3i(8|16|32|64)$", 873 "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 874def : InstRW<[WriteAdr, N1Write_7c_2L_3V], 875 (instregex "LD3i(8|16|32|64)_POST$", 876 "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 877 878// ASIMD load, 4 element, multiple, D-form 879def : InstRW<[N1Write_8c_3L_4V], 880 (instregex "LD4Fourv(8b|4h|2s)$")>; 881def : InstRW<[WriteAdr, N1Write_8c_3L_4V], 882 (instregex "LD4Fourv(8b|4h|2s)_POST$")>; 883 884// ASIMD load, 4 element, multiple, Q-form 885def : InstRW<[N1Write_10c_4L_4V], 886 (instregex "LD4Fourv(16b|8h|4s|2d)$")>; 887def : InstRW<[WriteAdr, N1Write_10c_4L_4V], 888 (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; 889 890// ASIMD load, 4 element, one lane 891// ASIMD load, 4 element, all lanes 892def : InstRW<[N1Write_8c_4L_4V], 893 (instregex "LD4i(8|16|32|64)$", 894 "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 895def : InstRW<[WriteAdr, N1Write_8c_4L_4V], 896 (instregex "LD4i(8|16|32|64)_POST$", 897 "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 898 899 900// ASIMD store instructions 901// ----------------------------------------------------------------------------- 902 903// ASIMD store, 1 element, multiple, 1 reg, D-form 904def : InstRW<[N1Write_2c_1L_1V], 905 (instregex "ST1Onev(8b|4h|2s|1d)$")>; 906def : InstRW<[WriteAdr, N1Write_2c_1L_1V], 907 (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; 908 909// ASIMD store, 1 element, multiple, 1 reg, Q-form 910def : InstRW<[N1Write_2c_1L_1V], 911 (instregex "ST1Onev(16b|8h|4s|2d)$")>; 912def : InstRW<[WriteAdr, N1Write_2c_1L_1V], 913 (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; 914 915// ASIMD store, 1 element, multiple, 2 reg, D-form 916def : InstRW<[N1Write_2c_1L_2V], 917 (instregex "ST1Twov(8b|4h|2s|1d)$")>; 918def : InstRW<[WriteAdr, N1Write_2c_1L_2V], 919 (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; 920 921// ASIMD store, 1 element, multiple, 2 reg, Q-form 922def : InstRW<[N1Write_3c_2L_2V], 923 (instregex "ST1Twov(16b|8h|4s|2d)$")>; 924def : InstRW<[WriteAdr, N1Write_3c_2L_2V], 925 (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; 926 927// ASIMD store, 1 element, multiple, 3 reg, D-form 928def : InstRW<[N1Write_3c_2L_3V], 929 (instregex "ST1Threev(8b|4h|2s|1d)$")>; 930def : InstRW<[WriteAdr, N1Write_3c_2L_3V], 931 (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; 932 933// ASIMD store, 1 element, multiple, 3 reg, Q-form 934def : InstRW<[N1Write_4c_3L_3V], 935 (instregex "ST1Threev(16b|8h|4s|2d)$")>; 936def : InstRW<[WriteAdr, N1Write_4c_3L_3V], 937 (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; 938 939// ASIMD store, 1 element, multiple, 4 reg, D-form 940def : InstRW<[N1Write_3c_2L_2V], 941 (instregex "ST1Fourv(8b|4h|2s|1d)$")>; 942def : InstRW<[WriteAdr, N1Write_3c_2L_2V], 943 (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; 944 945// ASIMD store, 1 element, multiple, 4 reg, Q-form 946def : InstRW<[N1Write_5c_4L_4V], 947 (instregex "ST1Fourv(16b|8h|4s|2d)$")>; 948def : InstRW<[WriteAdr, N1Write_5c_4L_4V], 949 (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; 950 951// ASIMD store, 1 element, one lane 952def : InstRW<[N1Write_4c_1L_1V], 953 (instregex "ST1i(8|16|32|64)$")>; 954def : InstRW<[WriteAdr, N1Write_4c_1L_1V], 955 (instregex "ST1i(8|16|32|64)_POST$")>; 956 957// ASIMD store, 2 element, multiple, D-form, B/H/S 958def : InstRW<[N1Write_4c_1L_1V], 959 (instregex "ST2Twov(8b|4h|2s)$")>; 960def : InstRW<[WriteAdr, N1Write_4c_1L_1V], 961 (instregex "ST2Twov(8b|4h|2s)_POST$")>; 962 963// ASIMD store, 2 element, multiple, Q-form 964def : InstRW<[N1Write_5c_2L_2V], 965 (instregex "ST2Twov(16b|8h|4s|2d)$")>; 966def : InstRW<[WriteAdr, N1Write_5c_2L_2V], 967 (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; 968 969// ASIMD store, 2 element, one lane 970def : InstRW<[N1Write_4c_1L_1V], 971 (instregex "ST2i(8|16|32|64)$")>; 972def : InstRW<[WriteAdr, N1Write_4c_1L_1V], 973 (instregex "ST2i(8|16|32|64)_POST$")>; 974 975// ASIMD store, 3 element, multiple, D-form, B/H/S 976def : InstRW<[N1Write_5c_2L_2V], 977 (instregex "ST3Threev(8b|4h|2s)$")>; 978def : InstRW<[WriteAdr, N1Write_5c_2L_2V], 979 (instregex "ST3Threev(8b|4h|2s)_POST$")>; 980 981// ASIMD store, 3 element, multiple, Q-form 982def : InstRW<[N1Write_6c_3L_3V], 983 (instregex "ST3Threev(16b|8h|4s|2d)$")>; 984def : InstRW<[WriteAdr, N1Write_6c_3L_3V], 985 (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; 986 987// ASIMD store, 3 element, one lane, B/H/S 988def : InstRW<[N1Write_4c_3L_3V], 989 (instregex "ST3i(8|16|32)$")>; 990def : InstRW<[WriteAdr, N1Write_4c_3L_3V], 991 (instregex "ST3i(8|16|32)_POST$")>; 992 993// ASIMD store, 3 element, one lane, D 994def : InstRW<[N1Write_5c_3L_3V], 995 (instrs ST3i64)>; 996def : InstRW<[WriteAdr, N1Write_5c_3L_3V], 997 (instrs ST3i64_POST)>; 998 999// ASIMD store, 4 element, multiple, D-form, B/H/S 1000def : InstRW<[N1Write_7c_3L_3V], 1001 (instregex "ST4Fourv(8b|4h|2s)$")>; 1002def : InstRW<[WriteAdr, N1Write_7c_3L_3V], 1003 (instregex "ST4Fourv(8b|4h|2s)_POST$")>; 1004 1005// ASIMD store, 4 element, multiple, Q-form, B/H/S 1006def : InstRW<[N1Write_9c_6L_6V], 1007 (instregex "ST4Fourv(16b|8h|4s)$")>; 1008def : InstRW<[WriteAdr, N1Write_9c_6L_6V], 1009 (instregex "ST4Fourv(16b|8h|4s)_POST$")>; 1010 1011// ASIMD store, 4 element, multiple, Q-form, D 1012def : InstRW<[N1Write_6c_4L_4V], 1013 (instrs ST4Fourv2d)>; 1014def : InstRW<[WriteAdr, N1Write_6c_4L_4V], 1015 (instrs ST4Fourv2d_POST)>; 1016 1017// ASIMD store, 4 element, one lane, B/H/S 1018def : InstRW<[N1Write_5c_3L_3V], 1019 (instregex "ST4i(8|16|32)$")>; 1020def : InstRW<[WriteAdr, N1Write_5c_3L_3V], 1021 (instregex "ST4i(8|16|32)_POST$")>; 1022 1023// ASIMD store, 4 element, one lane, D 1024def : InstRW<[N1Write_4c_3L_3V], 1025 (instrs ST4i64)>; 1026def : InstRW<[WriteAdr, N1Write_4c_3L_3V], 1027 (instrs ST4i64_POST)>; 1028 1029 1030// Cryptography extensions 1031// ----------------------------------------------------------------------------- 1032 1033// Crypto AES ops 1034def N1WriteVC : WriteSequence<[N1Write_2c_1V0]>; 1035def N1ReadVC : SchedReadAdvance<2, [N1WriteVC]>; 1036def : InstRW<[N1WriteVC], (instrs AESDrr, AESErr)>; 1037def : InstRW<[N1Write_2c_1V0, N1ReadVC], (instrs AESMCrr, AESIMCrr)>; 1038 1039// Crypto polynomial (64x64) multiply long 1040// Crypto SHA1 hash acceleration op 1041// Crypto SHA1 schedule acceleration ops 1042// Crypto SHA256 schedule acceleration ops 1043def : InstRW<[N1Write_2c_1V0], (instregex "^PMULLv[12]i64$", 1044 "^SHA1(H|SU0|SU1)rr", 1045 "^SHA256SU[01]rr")>; 1046 1047// Crypto SHA1 hash acceleration ops 1048// Crypto SHA256 hash acceleration ops 1049def : InstRW<[N1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$", 1050 "^SHA256H2?rrr$")>; 1051 1052 1053// CRC 1054// ----------------------------------------------------------------------------- 1055 1056// CRC checksum ops 1057def : InstRW<[N1Write_2c_1M], (instregex "^CRC32C?[BHWX]rr$")>; 1058 1059 1060} 1061