1//=- AArch64SchedOryon.td - Qualcomm Oryon CPU 001 ---*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for Qualcomm Oryon 10// family of processors. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// Pipeline Description. 16 17def OryonModel : SchedMachineModel { 18 let IssueWidth = 14; 19 let MicroOpBufferSize = 376; 20 let LoadLatency = 4; 21 let MispredictPenalty = 13; // 13 cycles for mispredicted branch. 22 let LoopMicroOpBufferSize = 0; // Do not have a LoopMicroOpBuffer 23 let PostRAScheduler = 1; // Using PostRA sched. 24 let CompleteModel = 1; 25 26 list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, 27 SMEUnsupported.F, 28 MTEUnsupported.F, 29 PAUnsupported.F, 30 [HasPAuth, HasCSSC]); 31} 32 33let SchedModel = OryonModel in { 34 35// Issue ports. 36// IXU has 6 ports p0 ~ p5 37// LSU has 4 ports p6 ~ p9(ls0 ~ ls3), p10/p11(std0, std1) has to work with ls0~ls3 38// VXU has 4 ports p12 ~ p15 39 40// cross IXU/LSU/VXU resource group for FMOV P41 of VXU 41// I2V 42def ORYONI4FP0 : ProcResource<1>; 43def ORYONI5FP1 : ProcResource<1>; 44// V2I 45def ORYONFP0I4 : ProcResource<1>; 46def ORYONFP1I5 : ProcResource<1>; 47 48// store 1 for normal store instructions 49def ORYONST0 : ProcResource<1>; 50// store 2 for normal store instructions 51def ORYONST1 : ProcResource<1>; 52 53// Port 0: ALU/Indirect/Direct Branch. 54def ORYONP0 : ProcResource<1>; 55 56// Port 1: ALU/Direct Branch. 57def ORYONP1 : ProcResource<1>; 58 59// Port 2: ALU. 60def ORYONP2 : ProcResource<1>; 61 62// Port 3: ALU. 63def ORYONP3 : ProcResource<1>; 64 65// Port 4: ALU. 66def ORYONP4 : ProcResource<1> { 67 let Super = ORYONI4FP0; 68 let Super = ORYONFP0I4; } 69 70// Port 5: ALU. 71def ORYONP5 : ProcResource<1> { 72 let Super = ORYONI5FP1; 73 let Super = ORYONFP1I5; } 74 75// Port 6: Load/Store. LS0 76def ORYONP6 : ProcResource<1> { 77 let Super = ORYONST0; } 78 79// Port 7: Load/store. LS1 80def ORYONP7 : ProcResource<1> { 81 let Super = ORYONST0; } 82 83// Port 8: Load/Store. LS2 84def ORYONP8 : ProcResource<1> { 85 let Super = ORYONST1; } 86 87// Port 9: Load/store. LS3 88def ORYONP9 : ProcResource<1> { 89 let Super = ORYONST1; } 90 91// Port 10: Load/Store. STD0 92def ORYONP10SD0 : ProcResource<1> { 93 let Super = ORYONST0; } 94 95// Port 11: Load/store. STD1 96def ORYONP11SD1 : ProcResource<1> { 97 let Super = ORYONST1; } 98 99// Port 12: FP/Neon/SIMD/Crypto. 100def ORYONP12FP0 : ProcResource<1> { 101 let Super = ORYONI4FP0; 102 let Super = ORYONFP0I4; } 103 104// Port 13: FP/Neon/SIMD/Crypto. 105def ORYONP13FP1 : ProcResource<1> { 106 let Super = ORYONI5FP1; 107 let Super = ORYONFP1I5; } 108 109// Port 14: FP/Neon/SIMD/Crypto. 110def ORYONP14FP2 : ProcResource<1>; 111 112// Port 15: FP/Neon/SIMD/Crypto. 113def ORYONP15FP3 : ProcResource<1>; 114 115// Define groups for the functional units on each issue port. Each group 116// created will be used by a WriteRes. 117 118// Integer add/shift/logical/misc. instructions on port I0/I1/I2/I3/I4/I5. 119def ORYONI012345 : ProcResGroup<[ORYONP0, ORYONP1, ORYONP2, 120 ORYONP3, ORYONP4, ORYONP5]> { 121 let BufferSize = 120; 122} 123 124// Direct Conditional Branch instructions on ports I0/I1. 125def ORYONI01 : ProcResGroup<[ORYONP0, ORYONP1]> { 126 let BufferSize = 40; 127} 128 129// Indirect/crypto Conditional Branch instructions on ports I0. 130def ORYONI0 : ProcResGroup<[ORYONP0]> { 131 let BufferSize = 20; 132} 133 134// Crypto/CRC/PAU instructions on ports I2. 135def ORYONI2 : ProcResGroup<[ORYONP2]> { 136 let BufferSize = 20; 137} 138 139// Multiply/Multiply-ADD instructions on ports I4/I5. 140def ORYONI45 : ProcResGroup<[ORYONP4, ORYONP5]> { 141 let BufferSize = 40; 142} 143 144// Divide instructions on ports I5. 145def ORYONI5 : ProcResGroup<[ORYONP5]> { 146 let BufferSize = 20; 147} 148 149// Comparison instructions on ports I0/I1/I2/I3. 150def ORYONI0123 : ProcResGroup<[ORYONP0, ORYONP1, 151 ORYONP2, ORYONP3]> { 152 let BufferSize = 80; 153} 154 155// Load instructions on ports P6/P7/P8/P9. 156def ORYONLD : ProcResGroup<[ORYONP6, ORYONP7, ORYONP8, ORYONP9]> { 157 let BufferSize = 64; 158} 159 160// Store instructions on combo of STA/STD pipes 161def ORYONST : ProcResGroup<[ORYONST0, ORYONST1]> { 162 let BufferSize = 64; 163} 164 165// Arithmetic and CRYP-AED ASIMD/FP instructions on ports FP0/FP1/FP2/FP3. 166def ORYONFP0123 : ProcResGroup<[ORYONP12FP0, ORYONP13FP1, 167 ORYONP14FP2, ORYONP15FP3]> { 168 let BufferSize = 192; 169} 170 171// FP Comparison and F/I move instructions on ports FP0/FP1. 172def ORYONFP01 : ProcResGroup<[ORYONP12FP0, ORYONP13FP1]> { 173 let BufferSize = 96; 174} 175 176// FDIV instructions on ports FP3. 177def ORYONFP3 : ProcResGroup<[ORYONP15FP3]> { 178 let BufferSize = 48; 179} 180 181// CRYP-SHA instructions on ports FP1. 182def ORYONFP1 : ProcResGroup<[ORYONP14FP2]> { 183 let BufferSize = 48; 184} 185 186def ORYONFP2 : ProcResGroup<[ORYONP14FP2]> { 187 let BufferSize = 48; 188} 189 190// Reciprocal, Squre root on FP0. 191def ORYONFP0 : ProcResGroup<[ORYONP12FP0]> { 192 let BufferSize = 48; 193} 194 195// cross IXU/LSU/VXU resource group for FMOV P41 of VXU 196// I2V 197def ORYONI2V : ProcResGroup<[ORYONI4FP0, ORYONI5FP1]> { 198 let BufferSize = 40; 199} 200 201// V2I 202def ORYONV2I : ProcResGroup<[ORYONFP0I4, ORYONFP1I5]> { 203 let BufferSize = 96; 204} 205 206// Define commonly used write types for InstRW specializations. 207// All definitions follow the format: ORYONWrite_<NumCycles>Cyc_<Resources>. 208 209// Because of the complexity of Oryon CPU, we skip the following 210// generic definitions and define each instruction specifically 211 212// These WriteRes entries are not used in the Falkor sched model. 213def : WriteRes<WriteImm, []> { let Unsupported = 1; } 214def : WriteRes<WriteI, []> { let Unsupported = 1; } 215def : WriteRes<WriteISReg, []> { let Unsupported = 1; } 216def : WriteRes<WriteIEReg, []> { let Unsupported = 1; } 217def : WriteRes<WriteExtr, []> { let Unsupported = 1; } 218def : WriteRes<WriteIS, []> { let Unsupported = 1; } 219def : WriteRes<WriteID32, []> { let Unsupported = 1; } 220def : WriteRes<WriteID64, []> { let Unsupported = 1; } 221def : WriteRes<WriteIM32, []> { let Unsupported = 1; } 222def : WriteRes<WriteIM64, []> { let Unsupported = 1; } 223def : WriteRes<WriteBr, []> { let Unsupported = 1; } 224def : WriteRes<WriteBrReg, []> { let Unsupported = 1; } 225def : WriteRes<WriteLD, []> { let Unsupported = 1; } 226def : WriteRes<WriteST, []> { let Unsupported = 1; } 227def : WriteRes<WriteSTP, []> { let Unsupported = 1; } 228def : WriteRes<WriteAdr, []> { let Unsupported = 1; } 229def : WriteRes<WriteLDIdx, []> { let Unsupported = 1; } 230def : WriteRes<WriteSTIdx, []> { let Unsupported = 1; } 231def : WriteRes<WriteF, []> { let Unsupported = 1; } 232def : WriteRes<WriteFCmp, []> { let Unsupported = 1; } 233def : WriteRes<WriteFCvt, []> { let Unsupported = 1; } 234def : WriteRes<WriteFCopy, []> { let Unsupported = 1; } 235def : WriteRes<WriteFImm, []> { let Unsupported = 1; } 236def : WriteRes<WriteFMul, []> { let Unsupported = 1; } 237def : WriteRes<WriteFDiv, []> { let Unsupported = 1; } 238def : WriteRes<WriteVd, []> { let Unsupported = 1; } 239def : WriteRes<WriteVq, []> { let Unsupported = 1; } 240def : WriteRes<WriteVLD, []> { let Unsupported = 1; } 241def : WriteRes<WriteVST, []> { let Unsupported = 1; } 242def : WriteRes<WriteSys, []> { let Unsupported = 1; } 243def : WriteRes<WriteBarrier, []> { let Unsupported = 1; } 244def : WriteRes<WriteHint, []> { let Unsupported = 1; } 245def : WriteRes<WriteLDHi, []> { let Unsupported = 1; } 246def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 247 248// These ReadAdvance entries will be defined in later implementation 249def : ReadAdvance<ReadI, 0>; 250def : ReadAdvance<ReadISReg, 0>; 251def : ReadAdvance<ReadIEReg, 0>; 252def : ReadAdvance<ReadIM, 0>; 253def : ReadAdvance<ReadIMA, 0>; 254def : ReadAdvance<ReadID, 0>; 255def : ReadAdvance<ReadExtrHi, 0>; 256def : ReadAdvance<ReadAdrBase, 0>; 257def : ReadAdvance<ReadVLD, 0>; 258def : ReadAdvance<ReadST, 0>; 259 260 261//IXU resource definition 262// 1 cycles NO pipe 263def ORYONWrite_1Cyc_NONE : SchedWriteRes<[]>; 264 265// 1 cycles on I01. 266def ORYONWrite_1Cyc_I01 : SchedWriteRes<[ORYONI01]>; 267 268def ORYONWrite_1Cyc_2Uops_I01 : SchedWriteRes<[ORYONI01]> { 269 let NumMicroOps = 2; 270} 271 272def ORYONWrite_1Cyc_I0 : SchedWriteRes<[ORYONI0]>; 273 274// 7 cycles on I2. PAC*/AUT* instructions 275def ORYONWrite_7Cyc_I2 : SchedWriteRes<[ORYONI2]> { 276 let Latency = 7; 277} 278 279// 7 cycles on I2. PAC*/AUT* instructions 280def ORYONWrite_7Cyc_3Uops_I2 : SchedWriteRes<[ORYONI2]> { 281 let Latency = 7; 282 let NumMicroOps = 3; 283} 284 285// 9 (7+1+1) cycles on I2 and I0/I1, I0. Authentication branch instructions 286// these instructions are broken down to three uops 287// a. PtrAuth on pipe 2 taking 7 cycles 288// b. Link Register Update on pipes 0 and 1 taking 1 cycle 289// c. Indirect branch on pipe 0 taking 1 cycle 290 291def ORYONWrite_9Cyc_I012 : SchedWriteRes<[ORYONI2, ORYONI01]> { 292 let Latency = 9; 293 let NumMicroOps = 3; 294} 295 296// 3 cycles on I2. CRC32 and CRC32C instructions 297def ORYONWrite_3Cyc_I2 : SchedWriteRes<[ORYONI2]> { 298 let Latency = 3; 299} 300 301// 1 cycle on I012345 302def ORYONWrite_1Cyc_I012345 : SchedWriteRes<[ORYONI012345]>; 303 304// 1 cycle on I0123 305def ORYONWrite_1Cyc_I0123 : SchedWriteRes<[ORYONI0123]>; 306 307// 1 cycle on 2 of I012345 308def ORYONWrite_1Cyc_I012345_I012345 : 309SchedWriteRes<[ORYONI012345, ORYONI012345]> ; 310 311// 2 cycle on 2 of I0123 with ReleaseAtCycles 312def ORYONWrite_2Cyc_I0123_I0123_RC : 313SchedWriteRes<[ORYONI0123, ORYONI0123]> { 314 let Latency = 2; 315 let ReleaseAtCycles = [2,2]; 316} 317 318// 2 cycle on 2 of I012345 319def ORYONWrite_2Cyc_I012345_I012345_RC : 320SchedWriteRes<[ORYONI012345, ORYONI012345]> { 321 let Latency = 2; 322 let ReleaseAtCycles = [2,2]; 323} 324 325// 3 cycle on 2 of I45 326def ORYONWrite_3Cyc_I45_I45_RC : 327SchedWriteRes<[ORYONI45, ORYONI45]> { 328 let Latency = 3; 329 let ReleaseAtCycles = [2,2]; 330} 331 332// 3 cycle on I45 333def ORYONWrite_3Cyc_I45 : SchedWriteRes<[ORYONI45]> { 334 let Latency = 3; 335} 336 337// 7 cycle on I2 32-bit integer division 338def ORYONWrite_7Cyc_I2_RC : SchedWriteRes<[ORYONI2]> { 339 let Latency = 7; 340 let ReleaseAtCycles = [2]; 341} 342 343// 9 cycle on I2 64-bit integer division 344def ORYONWrite_9Cyc_I2_RC : SchedWriteRes<[ORYONI2]> { 345 let Latency = 9; 346 let ReleaseAtCycles = [2]; 347} 348 349// LSU resource definition 350// need to define WriteLDAdr, WriteAdrAdr, WriteLDHi, WriteSTX 351// 4 cycle on LS(P6789) 352def ORYONWrite_4Cyc_LD : SchedWriteRes<[ORYONLD]> { 353 let Latency = 4; 354} 355 356// 4 cycle for Post/Pre inc/dec access, also covers all pair loads Post/Pre 357def ORYONWrite_4Cyc_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { 358 let Latency = 4; 359} 360 361// 5 (4+1) for VXU SIMD access/could also include FP 362// resource might not be correct, as VXU resource not included 363def ORYONWrite_5Cyc_LD : SchedWriteRes<[ORYONLD]> { 364 let Latency = 5; 365} 366 367def ORYONWrite_5Cyc_2Uops_LD : SchedWriteRes<[ORYONLD]> { 368 let Latency = 5; 369 let NumMicroOps = 2; 370} 371 372def ORYONWrite_5Cyc_3Uops_LD : SchedWriteRes<[ORYONLD]> { 373 let Latency = 5; 374 let NumMicroOps = 3; 375} 376 377def ORYONWrite_5Cyc_4Uops_LD : SchedWriteRes<[ORYONLD]> { 378 let Latency = 5; 379 let NumMicroOps = 4; 380} 381 382def ORYONWrite_5Cyc_5Uops_LD : SchedWriteRes<[ORYONLD]> { 383 let Latency = 5; 384 let NumMicroOps = 5; 385} 386 387def ORYONWrite_5Cyc_6Uops_LD : SchedWriteRes<[ORYONLD]> { 388 let Latency = 5; 389 let NumMicroOps = 6; 390} 391 392def ORYONWrite_5Cyc_8Uops_LD : SchedWriteRes<[ORYONLD]> { 393 let Latency = 5; 394 let NumMicroOps = 8; 395} 396 397def ORYONWrite_5Cyc_10Uops_LD : SchedWriteRes<[ORYONLD]> { 398 let Latency = 5; 399 let NumMicroOps = 10; 400} 401 402// 6 cycle for Post/Pre inc/dec access 403def ORYONWrite_5Cyc_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { 404 let Latency = 5; 405} 406 407def ORYONWrite_5Cyc_2Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { 408 let Latency = 5; 409 let NumMicroOps = 2; 410} 411 412def ORYONWrite_5Cyc_3Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { 413 let Latency = 5; 414 let NumMicroOps = 3; 415} 416 417def ORYONWrite_5Cyc_4Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { 418 let Latency = 5; 419 let NumMicroOps = 4; 420} 421 422def ORYONWrite_5Cyc_5Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { 423 let Latency = 5; 424 let NumMicroOps = 5; 425} 426 427def ORYONWrite_5Cyc_6Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { 428 let Latency = 5; 429 let NumMicroOps = 6; 430} 431 432def ORYONWrite_5Cyc_8Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { 433 let Latency = 5; 434 let NumMicroOps = 8; 435} 436 437def ORYONWrite_5Cyc_10Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { 438 let Latency = 5; 439 let NumMicroOps = 10; 440} 441 442// 1 cycle for all generic stores 443def ORYONWrite_1Cyc_ST : SchedWriteRes<[ORYONST]>; 444 445def ORYONWrite_1Cyc_2Uops_ST : SchedWriteRes<[ORYONST]> { 446 let NumMicroOps = 2; 447} 448 449def ORYONWrite_1Cyc_3Uops_ST : SchedWriteRes<[ORYONST]> { 450 let NumMicroOps = 3; 451} 452 453def ORYONWrite_1Cyc_4Uops_ST : SchedWriteRes<[ORYONST]> { 454 let NumMicroOps = 4; 455} 456 457def ORYONWrite_1Cyc_5Uops_ST : SchedWriteRes<[ORYONST]> { 458 let NumMicroOps = 5; 459} 460 461def ORYONWrite_1Cyc_6Uops_ST : SchedWriteRes<[ORYONST]> { 462 let NumMicroOps = 6; 463} 464 465def ORYONWrite_1Cyc_8Uops_ST : SchedWriteRes<[ORYONST]> { 466 let NumMicroOps = 8; 467} 468 469def ORYONWrite_1Cyc_10Uops_ST : SchedWriteRes<[ORYONST]> { 470 let NumMicroOps = 10; 471} 472 473// 1 cycle for neon write: float + ASIMD with Post/Pre Inc/Dec access 474// also includes Pair store until further informed 475def ORYONWrite_1Cyc_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { 476 let NumMicroOps = 3; 477} 478 479def ORYONWrite_1Cyc_2Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { 480 let NumMicroOps = 2; 481} 482 483def ORYONWrite_1Cyc_3Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { 484 let NumMicroOps = 3; 485} 486 487def ORYONWrite_1Cyc_4Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { 488 let NumMicroOps = 4; 489} 490 491def ORYONWrite_1Cyc_5Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { 492 let NumMicroOps = 5; 493} 494 495def ORYONWrite_1Cyc_6Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { 496 let NumMicroOps = 6; 497} 498 499def ORYONWrite_1Cyc_8Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { 500 let NumMicroOps = 8; 501} 502 503def ORYONWrite_1Cyc_10Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { 504 let NumMicroOps = 10; 505} 506 507// VXU resource definition 508 509// I2V instruction has 1 uOp 510// I2v with convert has 2 uOps 511// all I2V, V2I's throughputs are 2 512// On VXU doc, p37 -- latencies and throughput 513// P41, resource taken, P42, uOps 514def ORYONWrite_I2V_4Cyc_I45 : SchedWriteRes<[ORYONI2V]> { 515 let Latency = 4; 516} 517 518// inline a FCVT, so add one more uOp 519def ORYONWrite_I2V_7Cyc_I45 : SchedWriteRes<[ORYONI2V]> { 520 let Latency = 7; 521 let NumMicroOps = 2; 522} 523 524// V2I move instruction has 1/2 uOps, P42 in VXU doc 525// Latency is 3, FCVT is also 3 cycle 526// move + convert is 6 (3+3) cycles 527// throughput is 2 528def ORYONWrite_V2I_3Cyc_FP01 : SchedWriteRes<[ORYONV2I]> { 529 let Latency = 3; 530} 531 532// inline a FCVT, so add one more uOp 533def ORYONWrite_V2I_6Cyc_FP01 : SchedWriteRes<[ORYONV2I]> { 534 let Latency = 6; 535 let NumMicroOps = 2; 536} 537 538def ORYONWrite_V2V_2Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { 539 let Latency = 2; 540} 541 542def ORYONWrite_V2V_3Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { 543 let Latency = 3; 544} 545 546def ORYONWrite_V2V_6Cyc_FP01 : SchedWriteRes<[ORYONFP0123]> { 547 let Latency = 6; 548 let NumMicroOps = 3; 549} 550 551def ORYONWrite_4Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { 552 let Latency = 4; 553} 554 555def ORYONWrite_3Cyc_FP0 : SchedWriteRes<[ORYONFP0]> { 556 let Latency = 3; 557} 558 559def ORYONWrite_3Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { 560 let Latency = 3; 561} 562 563def ORYONWrite_3Cyc_2Uops_FP0123 : SchedWriteRes<[ORYONFP0123]> { 564 let Latency = 3; 565 let NumMicroOps = 2; 566} 567 568def ORYONWrite_2Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { 569 let Latency = 2; 570} 571 572def ORYONWrite_2Cyc_FP01 : SchedWriteRes<[ORYONFP01]> { 573 let Latency = 2; 574} 575 576// 2 cycle on FP1 577def ORYONWrite_2Cyc_FP1 : SchedWriteRes<[ORYONFP1]> { 578 let Latency = 2; 579} 580 581// 3 cycle on FP1 582def ORYONWrite_3Cyc_FP1 : SchedWriteRes<[ORYONFP1]> { 583 let Latency = 3; 584} 585 586// 4 cycle , 0.5 throughput on FP1 587def ORYONWrite_4Cyc_FP1_RC4 : SchedWriteRes<[ORYONFP1]> { 588 let Latency = 4; 589 let ReleaseAtCycles = [4]; 590} 591 592// 5 cycle , 1 throughput on FP1 593def ORYONWrite_5Cyc_FP1 : SchedWriteRes<[ORYONFP1]> { 594 let Latency = 5; 595} 596 597// 8 cycle , 2 throughput on FP0123 598def ORYONWrite_8Cyc_FP0123_RC : SchedWriteRes<[ORYONFP0123]> { 599 let Latency = 8; 600 let ReleaseAtCycles = [2]; 601} 602 603def ORYONWrite_6Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { 604 let Latency = 6; 605} 606 607def ORYONWrite_7Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { 608 let Latency = 7; 609} 610 611def ORYONWrite_8Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { 612 let Latency = 8; 613} 614 615def ORYONWrite_9Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { 616 let Latency = 9; 617} 618 619def ORYONWrite_10Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { 620 let Latency = 10; 621} 622 623def ORYONWrite_8Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> { 624 let Latency = 8; 625 let ReleaseAtCycles = [2]; 626} 627 628def ORYONWrite_10Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> { 629 let Latency = 10; 630 let ReleaseAtCycles = [2]; 631} 632 633def ORYONWrite_13Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> { 634 let Latency = 13; 635 let ReleaseAtCycles = [2]; 636} 637 638def ORYONWrite_4Cyc_FP0123_RC : 639SchedWriteRes<[ORYONFP0123]> { 640 let Latency = 4; 641 let ReleaseAtCycles = [2]; 642} 643 644def ORYONWrite_4Cyc_FP0123_FP0123_RC : 645SchedWriteRes<[ORYONFP0123, ORYONFP0123]> { 646 let Latency = 4; 647 let NumMicroOps = 2; 648 let ReleaseAtCycles = [2,2]; 649} 650 651def ORYONWrite_4Cyc_FP0123_FP0123_FP0123_RC : 652SchedWriteRes<[ORYONFP0123, ORYONFP0123, ORYONFP0123]> { 653 let Latency = 4; 654 let NumMicroOps = 3; 655 let ReleaseAtCycles = [3,3,3]; 656} 657 658def ORYONWrite_6Cyc_FP0123_FP0123_FP0123_FP0123_RC : 659SchedWriteRes<[ORYONFP0123, ORYONFP0123, ORYONFP0123, ORYONFP0123]> { 660 let Latency = 6; 661 let NumMicroOps = 4; 662 let ReleaseAtCycles = [6,6,6,6]; 663} 664 665//===----------------------------------------------------------------------===// 666// Instruction Tables in IXU 667//===----------------------------------------------------------------------===// 668 669//--- 670// Arithmetic Instructions 671//--- 672 673//1, 1, 6 674def : InstRW<[ORYONWrite_1Cyc_I012345], 675 (instregex "^ADD(W|X)r(i|r|x)", "^SUB(W|X)r(i|r|x)")>; 676 677//2,2,3 678def : InstRW<[ORYONWrite_2Cyc_I012345_I012345_RC], 679 (instregex "^ADD(W|X)rs", "^SUB(W|X)rs")>; 680 681//1,1,4 alias CMP, CMN on page 75 682def : InstRW<[ORYONWrite_1Cyc_I0123], 683 (instregex "^ADDS(W|X)r(i|r|x)(64)?", "^SUBS(W|X)r(i|r|x)")>; 684 685//2,2,2 alias CMP, CMN on page 75 686def : InstRW<[ORYONWrite_2Cyc_I0123_I0123_RC], 687 (instregex "^ADDS(W|X)rs", "^SUBS(W|X)rs")>; 688 689//1,1,4 690def : InstRW<[ORYONWrite_1Cyc_I0123], 691 (instregex "^ADC(W|X)r","^SBC(W|X)r", 692 "^ADCS(W|X)r","^SBCS(W|X)r")>; 693 694//1,1,2 695def : InstRW<[ORYONWrite_1Cyc_2Uops_I01], 696 (instrs ADR,ADRP)>; 697 698//1,1,4 699def : InstRW<[ORYONWrite_1Cyc_I0123], 700 (instregex "^CSEL(W|X)r", "^CSINV(W|X)r", 701 "^CSNEG(W|X)r", "^CSINC(W|X)r")>; 702 703//--- 704//Compare Instruciton 705//--- 706 707// We have CCMP, CCMN as LLVM DAG node 708// CMP is an alias of SUBS as above 709// CMN is an alias of ADDS as above 710// We also have no way to get shift compare node in LLVM 711//2,2,1.5 CMP, CMN 712 713//1,1,4 714def : InstRW<[ORYONWrite_1Cyc_I0123], 715 (instregex "^CCMP(W|X)(i|r)", "^CCMN(W|X)(i|r)")>; 716 717//--- 718// Branch 719//--- 720 721def : InstRW<[ORYONWrite_1Cyc_NONE], (instrs B)>; 722def : InstRW<[ORYONWrite_1Cyc_I01], (instrs BL)>; 723def : InstRW<[ORYONWrite_1Cyc_I01], 724 (instrs Bcc, CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; 725def : InstRW<[ORYONWrite_1Cyc_I0], (instrs BR, BLR)>; 726def : InstRW<[ORYONWrite_1Cyc_I0], (instrs RET)>; 727 728// 3 uOp, 1 cycle for branch, 7 cycle for Authentication, 729// 1 cycle for updating link register 730// V8.3a PAC 731def : InstRW<[ORYONWrite_9Cyc_I012], 732 (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, 733 BRAA, BRAAZ, BRAB, BRABZ)>; 734def : InstRW<[ORYONWrite_9Cyc_I012], (instrs RETAA, RETAB, ERETAA, ERETAB)>; 735 736def : InstRW<[ORYONWrite_7Cyc_3Uops_I2], (instregex "^LDRAA", "^LDRAB")>; 737 738// Logical Instructions 739//--- 740 741//1,1,4 TST is an alias of ANDS 742def : InstRW<[ORYONWrite_1Cyc_I0123], 743 (instregex "^ANDS(W|X)r(i|r|x)", "^BICS(W|X)r(i|r|x)")>; 744 745//2,2,2 TST shift is an alias 746def : InstRW<[ORYONWrite_2Cyc_I0123_I0123_RC], 747 (instregex "^ANDS(W|X)rs", "^BICS(W|X)rs")>; 748 749//1,1,6 750def : InstRW<[ORYONWrite_1Cyc_I012345], 751 (instregex "^AND(W|X)r(i|r|x)", "^EOR(W|X)r(i|r|x)", 752 "^ORR(W|X)r(i|r|x)", "^BIC(W|X)r(i|r|x)", 753 "^EON(W|X)r(i|r|x)", "^ORN(W|X)r(i|r|x)")>; 754 755//2,2,3 756def : InstRW<[ORYONWrite_2Cyc_I012345_I012345_RC], 757 (instregex "^AND(W|X)rs", "^EOR(W|X)rs", "^ORR(W|X)rs", 758 "^BIC(W|X)rs", "^EON(W|X)rs", "^ORN(W|X)rs")>; 759 760 761//--- 762// Shift Instructions 763//--- 764 765//1,1,6 766def : InstRW<[ORYONWrite_1Cyc_I012345], 767 (instregex "^ASRV(W|X)r", "^LSLV(W|X)r", 768 "^LSRV(W|X)r", "^RORV(W|X)r", 769 "RMIF")>; 770 771//--- 772// Move-Data Bit-field and Sign_Extension Instructions 773//--- 774 775//1,1,6 776def : InstRW<[ORYONWrite_1Cyc_I012345], 777 (instregex "^MOVK(W|X)i", "^MOVN(W|X)i", 778 "^MOVZ(W|X)i", "^SBFM(W|X)ri", 779 "^UBFM(W|X)ri", "^BFM(W|X)ri", 780 "^SXT(W|B|H|X)", "^UXT(H|B)")>; 781 782// COPY instruction is an LLVM internal DAG node, needs further study 783def : InstRW<[ORYONWrite_1Cyc_I012345], (instrs COPY)>; 784 785//--- 786// Reverse Instructions 787//--- 788 789//1,1,6 790def : InstRW<[ORYONWrite_1Cyc_I012345], 791 (instregex "^RBIT(W|X)r", "^REV(16|32|64)?(W|X)r")>; 792 793 794//--- 795// Flag Manipulate Instructions 796//--- 797 798//1,1,4 799def : InstRW<[ORYONWrite_1Cyc_I0123], 800 (instregex "^SETF8", "^SETF16", "^CFINV")>; 801 802//--- 803// Miscellaneous Instructions 804//--- 805 806//1,1,6 807def : InstRW<[ORYONWrite_1Cyc_I012345], 808 (instregex "^CLS(W|X)r$", "^CLZ(W|X)r$", "^EXTR(W|X)rri")>; 809 810 811//--- 812// Multiply Instructions 813//--- 814 815//1,3,2 816def : InstRW<[ORYONWrite_3Cyc_I45], 817 (instregex "^MADD(W|X)rrr", "^MSUB(W|X)rrr", 818 "^(S|U)MADDLrrr", "^(S|U)MSUBLrrr", 819 "^(S|U)MULHrr")>; 820 821//--- 822// Divide Instructions 823//--- 824 825def : InstRW<[ORYONWrite_7Cyc_I2_RC], 826 (instregex "^(S|U)DIVWr")>; 827 828def : InstRW<[ORYONWrite_9Cyc_I2_RC], 829 (instregex "^(S|U)DIVXr")>; 830 831 832//--- 833// Cryptgraphy Instructions 834// 835//1,3,1 on I2 836def : InstRW<[ORYONWrite_3Cyc_I2], 837 (instregex "^CRC32(B|H|W|X)rr", "^CRC32C(B|H|W|X)rr")>; 838 839//--- 840// PAU instructions 841//--- 842 843// on p47 of IXU document, we have 7 cycles for all PAU instructions 844// here we just assume all signing and pauth instructions are 7 cycles 845// assume all are 7 cycles here 846 847// signing instrucitons 848def : InstRW<[ORYONWrite_7Cyc_I2], (instrs PACIA, PACIB, 849 PACDA, PACDB, 850 PACIZA, PACIZB, 851 PACDZA, PACDZB, 852 PACGA)>; 853// authentication instrucitons 854def : InstRW<[ORYONWrite_7Cyc_I2], (instrs AUTIA, AUTIB, 855 AUTDA, AUTDB, 856 AUTIZA, AUTIZB, 857 AUTDZA, AUTDZB)>; 858def : InstRW<[ORYONWrite_7Cyc_I2], (instrs XPACI, XPACD)>; 859 860//===----------------------------------------------------------------------===// 861// Instruction Tables in LSU 862//===----------------------------------------------------------------------===// 863 864// 4 cycle Load-to-use from L1D$ 865// Neon load with 5 cycle 866// 6 cycle to STA ? 867// STD cycle ? 868// NEON STD + 2 869 870// Load Instructions 871// FP Load Instructions 872 873// Load pair, immed pre-index, normal 874// Load pair, immed pre-index, signed words 875// Load pair, immed post-index, normal 876// Load pair, immed post-index, signed words 877// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. 878 879def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPDi)>; 880def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPQi)>; 881def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPSi)>; 882def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPWi)>; 883def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPXi)>; 884 885def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPDi)>; 886def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPQi)>; 887def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPSi)>; 888def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPSWi)>; 889def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPWi)>; 890def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPXi)>; 891 892def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBui)>; 893def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDui)>; 894def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHui)>; 895def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQui)>; 896def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSui)>; 897 898def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDl)>; 899def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQl)>; 900def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWl)>; 901def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXl)>; 902 903def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRBi)>; 904def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRHi)>; 905def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRWi)>; 906def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRXi)>; 907 908def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSBWi)>; 909def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSBXi)>; 910def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSHWi)>; 911def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSHXi)>; 912def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSWi)>; 913 914def : InstRW<[ORYONWrite_4Cyc_LD_I012345], 915 (instrs LDPDpre)>; 916def : InstRW<[ORYONWrite_4Cyc_LD_I012345], 917 (instrs LDPQpre)>; 918def : InstRW<[ORYONWrite_4Cyc_LD_I012345], 919 (instrs LDPSpre)>; 920def : InstRW<[ORYONWrite_4Cyc_LD_I012345], 921 (instrs LDPWpre)>; 922 923def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBpre)>; 924def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRDpre)>; 925def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHpre)>; 926def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRQpre)>; 927def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSpre)>; 928def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRWpre)>; 929def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRXpre)>; 930 931def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBWpre)>; 932def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBXpre)>; 933def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBWpost)>; 934def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBXpost)>; 935 936def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHWpre)>; 937def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHXpre)>; 938def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHWpost)>; 939def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHXpost)>; 940 941def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBBpre)>; 942def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBBpost)>; 943 944def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHHpre)>; 945def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHHpost)>; 946 947def : InstRW<[ORYONWrite_4Cyc_LD_I012345], 948 (instrs LDPDpost)>; 949def : InstRW<[ORYONWrite_4Cyc_LD_I012345], 950 (instrs LDPQpost)>; 951def : InstRW<[ORYONWrite_4Cyc_LD_I012345], 952 (instrs LDPSpost)>; 953def : InstRW<[ORYONWrite_4Cyc_LD_I012345], 954 (instrs LDPWpost)>; 955def : InstRW<[ORYONWrite_4Cyc_LD_I012345], 956 (instrs LDPXpost)>; 957 958def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBpost)>; 959def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRDpost)>; 960def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHpost)>; 961def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRQpost)>; 962def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSpost)>; 963def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRWpost)>; 964def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRXpost)>; 965 966def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBroW)>; 967def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDroW)>; 968def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHroW)>; 969def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHHroW)>; 970def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQroW)>; 971def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSroW)>; 972def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHWroW)>; 973def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHXroW)>; 974def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWroW)>; 975def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXroW)>; 976 977def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBroX)>; 978def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDroX)>; 979def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHHroX)>; 980def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHroX)>; 981def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQroX)>; 982def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSroX)>; 983def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHWroX)>; 984def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHXroX)>; 985def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWroX)>; 986def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXroX)>; 987 988def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURBi)>; 989def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURBBi)>; 990def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURDi)>; 991def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURHi)>; 992def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURHHi)>; 993def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURQi)>; 994def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSi)>; 995def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURXi)>; 996def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSBWi)>; 997def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSBXi)>; 998def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSHWi)>; 999def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSHXi)>; 1000def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSWi)>; 1001 1002 1003 1004// Store register, immed post-index 1005// NOTE: Handled by WriteST, ReadAdrBase 1006 1007// Store register, immed pre-index 1008// NOTE: Handled by WriteST 1009 1010// Store pair, immed post-index, W-form 1011// Store pair, immed post-indx, X-form 1012// Store pair, immed pre-index, W-form 1013// Store pair, immed pre-index, X-form 1014// NOTE: Handled by WriteSTP. 1015 1016def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURBi)>; 1017def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURBBi)>; 1018def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURDi)>; 1019def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURHi)>; 1020def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURHHi)>; 1021def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURQi)>; 1022def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURSi)>; 1023def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURWi)>; 1024def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURXi)>; 1025 1026def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRBi)>; 1027def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRHi)>; 1028def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRWi)>; 1029def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRXi)>; 1030 1031def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPDi)>; 1032def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPQi)>; 1033def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPXi)>; 1034def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPWi)>; 1035 1036def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPDi)>; 1037def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPQi)>; 1038def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPXi)>; 1039def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPWi)>; 1040 1041def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRBui)>; 1042def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRDui)>; 1043def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRHui)>; 1044def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRQui)>; 1045def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRXui)>; 1046def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRWui)>; 1047 1048def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1049 (instrs STPDpre, STPDpost)>; 1050def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1051 (instrs STPSpre, STPSpost)>; 1052def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1053 (instrs STPWpre, STPWpost)>; 1054def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1055 (instrs STPXpre, STPXpost)>; 1056 1057def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1058 (instrs STRBpre, STRBpost)>; 1059def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1060 (instrs STRBBpre, STRBBpost)>; 1061def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1062 (instrs STRDpre, STRDpost)>; 1063def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1064 (instrs STRHpre, STRHpost)>; 1065def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1066 (instrs STRHHpre, STRHHpost)>; 1067def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1068 (instrs STRQpre, STRQpost)>; 1069def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1070 (instrs STRSpre, STRSpost)>; 1071def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1072 (instrs STRWpre, STRWpost)>; 1073def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1074 (instrs STRXpre, STRXpost)>; 1075 1076def : InstRW<[ORYONWrite_1Cyc_ST], 1077 (instrs STRBroW, STRBroX)>; 1078def : InstRW<[ORYONWrite_1Cyc_ST], 1079 (instrs STRDroW, STRDroX)>; 1080def : InstRW<[ORYONWrite_1Cyc_ST], 1081 (instrs STRHroW, STRHroX)>; 1082def : InstRW<[ORYONWrite_1Cyc_ST], 1083 (instrs STRHHroW, STRHHroX)>; 1084def : InstRW<[ORYONWrite_1Cyc_ST], 1085 (instrs STRQroW, STRQroX)>; 1086def : InstRW<[ORYONWrite_1Cyc_ST], 1087 (instrs STRSroW, STRSroX)>; 1088def : InstRW<[ORYONWrite_1Cyc_ST], 1089 (instrs STRWroW, STRWroX)>; 1090def : InstRW<[ORYONWrite_1Cyc_ST], 1091 (instrs STRXroW, STRXroX)>; 1092 1093// ASIMD Load instructions, 4 cycle access + 2 cycle NEON access 1094// ASIMD load, 1 element, multiple, 1 reg, D-form 1uOps 1095// ASIMD load, 1 element, multiple, 1 reg, Q-form 1uOps 1096def : InstRW<[ORYONWrite_5Cyc_LD], 1097 (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1098 1099def : InstRW<[ORYONWrite_5Cyc_LD_I012345], 1100 (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1101 1102// ASIMD load, 1 element, multiple, 2 reg, D-form 3 uOps 1103// ASIMD load, 1 element, multiple, 2 reg, Q-form 2 uOps 1104def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], 1105 (instregex "^LD1Twov(8b|4h|2s|1d)$")>; 1106 1107def : InstRW<[ORYONWrite_5Cyc_2Uops_LD], 1108 (instregex "^LD1Twov(16b|8h|4s|2d)$")>; 1109 1110def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], 1111 (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; 1112 1113def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345], 1114 (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; 1115 1116// ASIMD load, 1 element, multiple, 3 reg, D-form 4 uOps 1117// ASIMD load, 1 element, multiple, 3 reg, Q-form 3 uOps 1118def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], 1119 (instregex "^LD1Threev(8b|4h|2s|1d)$")>; 1120 1121def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], 1122 (instregex "^LD1Threev(16b|8h|4s|2d)$")>; 1123 1124def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], 1125 (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; 1126 1127def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], 1128 (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; 1129 1130// ASIMD load, 1 element, multiple, 4 reg, D-form 6 uOps 1131// ASIMD load, 1 element, multiple, 4 reg, Q-form 4 uOps 1132def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], 1133 (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; 1134def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], 1135 (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; 1136 1137def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], 1138 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; 1139def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], 1140 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; 1141 1142// ASIMD load, 1 element, one lane, B/H/S 2uOps 1143// ASIMD load, 1 element, one lane, D 2UOps 1144def : InstRW<[ORYONWrite_5Cyc_2Uops_LD], (instregex "^LD1i(8|16|32|64)$")>; 1145def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345], 1146 (instregex "^LD1i(8|16|32|64)_POST$")>; 1147 1148// ASIMD load, 1 element, all lanes, D-form, B/H/S 2uOps 1149// ASIMD load, 1 element, all lanes, D-form, D 2uOps 1150// ASIMD load, 1 element, all lanes, Q-form 2uOps 1151def : InstRW<[ORYONWrite_5Cyc_2Uops_LD], 1152 (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1153def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345], 1154 (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1155 1156// ASIMD load, 2 element, multiple, D-form, B/H/S 3 uOps 1157// ASIMD load, 2 element, multiple, Q-form, D 4 uOps 1158def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], 1159 (instregex "^LD2Twov(8b|4h|2s)$")>; 1160def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], 1161 (instregex "^LD2Twov(16b|8h|4s|2d)$")>; 1162def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], 1163 (instregex "^LD2Twov(8b|4h|2s)_POST$")>; 1164def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], 1165 (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; 1166 1167// ASIMD load, 2 element, one lane, B/H 3 uOps 1168// ASIMD load, 2 element, one lane, S 3 uOps 1169// ASIMD load, 2 element, one lane, D 3 uOps 1170def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], (instregex "^LD2i(8|16|32|64)$")>; 1171def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], 1172 (instregex "^LD2i(8|16|32|64)_POST$")>; 1173 1174// ASIMD load, 2 element, all lanes, D-form, B/H/S 3 uOps 1175// ASIMD load, 2 element, all lanes, D-form, D 3 uOps 1176// ASIMD load, 2 element, all lanes, Q-form 3 uOps 1177def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], 1178 (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1179def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], 1180 (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1181 1182// ASIMD load, 3 element, multiple, D-form, B/H/S 5 uOps 1183// ASIMD load, 3 element, multiple, Q-form, B/H/S 6 uOps 1184// ASIMD load, 3 element, multiple, Q-form, D 6 uOps 1185def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], 1186 (instregex "^LD3Threev(8b|4h|2s)$")>; 1187def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], 1188 (instregex "^LD3Threev(16b|8h|4s|2d)$")>; 1189def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], 1190 (instregex "^LD3Threev(8b|4h|2s)_POST$")>; 1191def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], 1192 (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>; 1193 1194// ASIMD load, 3 element, one lone, B/H 4 uOps 1195// ASIMD load, 3 element, one lane, S 4 uOps 1196// ASIMD load, 3 element, one lane, D 5 uOps 1197def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], (instregex "^LD3i(8|16|32)$")>; 1198def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], (instregex "^LD3i(64)$")>; 1199def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], 1200 (instregex "^LD3i(8|16|32)_POST$")>; 1201def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], 1202 (instregex "^LD3i(64)_POST$")>; 1203 1204// ASIMD load, 3 element, all lanes, D-form, B/H/S 4 uOps 1205// ASIMD load, 3 element, all lanes, D-form, D 5 uOps 1206// ASIMD load, 3 element, all lanes, Q-form, B/H/S 4 uOps 1207// ASIMD load, 3 element, all lanes, Q-form, D 5 uOps 1208def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], 1209 (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s)$")>; 1210def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], 1211 (instregex "^LD3Rv(1d|2d)$")>; 1212def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], 1213 (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s)_POST$")>; 1214def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], 1215 (instregex "^LD3Rv(1d|2d)_POST$")>; 1216 1217// ASIMD load, 4 element, multiple, D-form, B/H/S 6 uOps 1218// ASIMD load, 4 element, multiple, Q-form, B/H/S 10 uOps 1219// ASIMD load, 4 element, multiple, Q-form, D 8 uOps 1220def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], 1221 (instregex "^LD4Fourv(8b|4h|2s)$")>; 1222def : InstRW<[ORYONWrite_5Cyc_10Uops_LD], 1223 (instregex "^LD4Fourv(16b|8h|4s)$")>; 1224def : InstRW<[ORYONWrite_5Cyc_8Uops_LD], 1225 (instregex "^LD4Fourv(2d)$")>; 1226def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], 1227 (instregex "^LD4Fourv(8b|4h|2s)_POST$")>; 1228def : InstRW<[ORYONWrite_5Cyc_10Uops_LD_I012345], 1229 (instregex "^LD4Fourv(16b|8h|4s)_POST$")>; 1230def : InstRW<[ORYONWrite_5Cyc_8Uops_LD_I012345], 1231 (instregex "^LD4Fourv(2d)_POST$")>; 1232 1233// ASIMD load, 4 element, one lane, B/H 5 uOps 1234// ASIMD load, 4 element, one lane, S 5 uOps 1235// ASIMD load, 4 element, one lane, D 6 uOps 1236def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], (instregex "^LD4i(8|16|32)$")>; 1237def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], (instregex "^LD4i(64)$")>; 1238def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], 1239 (instregex "^LD4i(8|16|32)_POST$")>; 1240def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], 1241 (instregex "^LD4i(64)_POST$")>; 1242 1243// ASIMD load, 4 element, all lanes, D-form, B/H/S 5 uOps 1244// ASIMD load, 4 element, all lanes, D-form, D 6 uOps 1245// ASIMD load, 4 element, all lanes, Q-form, B/H/S 5 uOps 1246// ASIMD load, 4 element, all lanes, Q-form, D 6 uOps 1247def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], 1248 (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s)$")>; 1249def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], 1250 (instregex "^LD4Rv(1d|2d)$")>; 1251def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], 1252 (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s)_POST$")>; 1253def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], 1254 (instregex "^LD4Rv(1d|2d)_POST$")>; 1255 1256// ASIMD Store Instructions 1257// ASIMD store, 1 element, multiple, 1 reg, D-form 1 uOps 1258// ASIMD store, 1 element, multiple, 1 reg, Q-form 1 uops 1259def : InstRW<[ORYONWrite_1Cyc_ST], 1260 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1261def : InstRW<[ORYONWrite_1Cyc_ST_I012345], 1262 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1263 1264// ASIMD store, 1 element, multiple, 2 reg, D-form 2 uOps 1265// ASIMD store, 1 element, multiple, 2 reg, Q-form 2 uOps 1266def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], 1267 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1268def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], 1269 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1270 1271// ASIMD store, 1 element, multiple, 3 reg, D-form 3 uOps 1272// ASIMD store, 1 element, multiple, 3 reg, Q-form 3 uOps 1273def : InstRW<[ORYONWrite_1Cyc_3Uops_ST], 1274 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1275def : InstRW<[ORYONWrite_1Cyc_3Uops_ST_I012345], 1276 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1277 1278// ASIMD store, 1 element, multiple, 4 reg, D-form 4 uOps 1279// ASIMD store, 1 element, multiple, 4 reg, Q-form 4 uOps 1280def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], 1281 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1282def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], 1283 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1284 1285// ASIMD store, 1 element, one lane, B/H/S 2 uOps 1286// ASIMD store, 1 element, one lane, D 2 uOps 1287def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], 1288 (instregex "^ST1i(8|16|32|64)$")>; 1289def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], 1290 (instregex "^ST1i(8|16|32|64)_POST$")>; 1291 1292// ASIMD store, 2 element, multiple, D-form, B/H/S 2 uOps 1293// ASIMD store, 2 element, multiple, Q-form, B/H/S 4 uOps 1294// ASIMD store, 2 element, multiple, Q-form, D 4 uOps 1295def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], 1296 (instregex "^ST2Twov(8b|4h|2s)$")>; 1297def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], 1298 (instregex "^ST2Twov(16b|8h|4s|2d)$")>; 1299def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], 1300 (instregex "^ST2Twov(8b|4h|2s)_POST$")>; 1301def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], 1302 (instregex "^ST2Twov(16b|8h|4s|2d)_POST$")>; 1303 1304// ASIMD store, 2 element, one lane, B/H/S 2 uOps 1305// ASIMD store, 2 element, one lane, D 2 uOps 1306def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], 1307 (instregex "^ST2i(8|16|32|64)$")>; 1308def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], 1309 (instregex "^ST2i(8|16|32|64)_POST$")>; 1310 1311// ASIMD store, 3 element, multiple, D-form, B/H/S 4 uOps 1312// ASIMD store, 3 element, multiple, Q-form, B/H/S 6 uOps 1313// ASIMD store, 3 element, multiple, Q-form, D 6 uOps 1314def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], 1315 (instregex "^ST3Threev(8b|4h|2s)$")>; 1316def : InstRW<[ORYONWrite_1Cyc_6Uops_ST], 1317 (instregex "^ST3Threev(16b|8h|4s|2d)$")>; 1318def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], 1319 (instregex "^ST3Threev(8b|4h|2s)_POST$")>; 1320def : InstRW<[ORYONWrite_1Cyc_6Uops_ST_I012345], 1321 (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>; 1322 1323// ASIMD store, 3 element, one lane, B/H 2 uOps 1324// ASIMD store, 3 element, one lane, S 2 uOps 1325// ASIMD store, 3 element, one lane, D 4 uOps 1326def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], (instregex "^ST3i(8|16|32)$")>; 1327def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], (instregex "^ST3i(64)$")>; 1328def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], 1329 (instregex "^ST3i(8|16|32)_POST$")>; 1330def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], 1331 (instregex "^ST3i(64)_POST$")>; 1332 1333 1334// ASIMD store, 4 element, multiple, D-form, B/H/S 5 uOps 1335// ASIMD store, 4 element, multiple, Q-form, B/H/S 10 uOps 1336// ASIMD store, 4 element, multiple, Q-form, D 8 uOps 1337def : InstRW<[ORYONWrite_1Cyc_5Uops_ST], 1338 (instregex "^ST4Fourv(8b|4h|2s)$")>; 1339def : InstRW<[ORYONWrite_1Cyc_10Uops_ST], 1340 (instregex "^ST4Fourv(16b|8h|4s)$")>; 1341def : InstRW<[ORYONWrite_1Cyc_8Uops_ST], 1342 (instregex "^ST4Fourv(2d)$")>; 1343def : InstRW<[ORYONWrite_1Cyc_5Uops_ST_I012345], 1344 (instregex "^ST4Fourv(8b|4h|2s)_POST$")>; 1345def : InstRW<[ORYONWrite_1Cyc_10Uops_ST_I012345], 1346 (instregex "^ST4Fourv(16b|8h|4s)_POST$")>; 1347def : InstRW<[ORYONWrite_1Cyc_8Uops_ST_I012345], 1348 (instregex "^ST4Fourv(2d)_POST$")>; 1349 1350// ASIMD store, 4 element, one lane, B/H 3 uOps 1351// ASIMD store, 4 element, one lane, S 3 uOps 1352// ASIMD store, 4 element, one lane, D 4 uOps 1353def : InstRW<[ORYONWrite_1Cyc_3Uops_ST], (instregex "^ST4i(8|16|32)$")>; 1354def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], (instregex "^ST4i(64)$")>; 1355def : InstRW<[ORYONWrite_1Cyc_3Uops_ST_I012345], 1356 (instregex "^ST4i(8|16|32)_POST$")>; 1357def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], 1358 (instregex "^ST4i(64)_POST$")>; 1359 1360 1361//===----------------------------------------------------------------------===// 1362// Instruction Tables in VXU 1363//===----------------------------------------------------------------------===// 1364// all uOps are not clearly written in the VXU document 1365 1366// I2V 1367def : InstRW<[ORYONWrite_I2V_4Cyc_I45], (instregex "^FMOV[HSD][WX]r", "^FMOVDXHighr")>; 1368 1369// I2V with convert 1370def : InstRW<[ORYONWrite_I2V_7Cyc_I45], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>; 1371 1372// V2I 1373def : InstRW<[ORYONWrite_V2I_3Cyc_FP01], (instregex "^FMOV[WX][HSD]r", "FMOVXDHighr")>; 1374 1375// V2I with convert 2nd [SU] necessary? 1376def : InstRW<[ORYONWrite_V2I_6Cyc_FP01], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; 1377 1378// float to float move immediate, row 7 in big chart 1379def : InstRW<[ORYONWrite_V2V_2Cyc_FP0123], (instregex "^FMOV[HSD]r")>; 1380def : InstRW<[ORYONWrite_V2V_2Cyc_FP0123], (instregex "^FMOV[HSD]i")>; 1381 1382// float to float conversion within VXU, precision conversion 1383def : InstRW<[ORYONWrite_V2V_6Cyc_FP01], (instregex "^FJCVTZS")>; 1384def : InstRW<[ORYONWrite_V2V_3Cyc_FP0123], (instregex "^FCVT[HSD][HSD]r", 1385 "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; 1386 1387// floating comparison write to NZCV 1388def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCMP(E)?[HSD]r[ir]")>; 1389def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCCMP(E)?[HSD]rr")>; 1390 1391// floating point conditional select 1392def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FCSEL")>; 1393 1394// floating multiply-add 1395def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^(F|FN)MADD", "^(F|FN)MSUB")>; 1396 1397// floating unary, cycle/throughput? xls row14 1398def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^F(ABS|NEG)[SD]r")>; 1399 1400//floating division/square root 1401def : InstRW<[ORYONWrite_7Cyc_FP3], (instregex "^FDIVHrr")>; 1402def : InstRW<[ORYONWrite_8Cyc_FP3], (instregex "^FDIVSrr")>; 1403def : InstRW<[ORYONWrite_10Cyc_FP3], (instregex "^FDIVDrr")>; 1404 1405def : InstRW<[ORYONWrite_8Cyc_FP3_RC], (instregex "^FSQRTHr")>; 1406def : InstRW<[ORYONWrite_10Cyc_FP3_RC], (instregex "^FSQRTSr")>; 1407def : InstRW<[ORYONWrite_13Cyc_FP3_RC], (instregex "^FSQRTDr")>; 1408 1409//========== 1410// SIMD move instructions 1411//========== 1412 1413// ASIMD DUP element 1414def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^DUPv.+lane")>; 1415// ASIMD DUP general thoughput undecided, 3? FP0123 1416// VXU doc, p42, 2 uOps 1417def : InstRW<[ORYONWrite_3Cyc_2Uops_FP0123], (instregex "^DUPv.+gpr")>; 1418 1419// ASIMD insert, element to element 1420def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^INSv.+lane")>; 1421// ASIMD insert, gen reg 3? FP0123? 1422def : InstRW<[ORYONWrite_3Cyc_2Uops_FP0123], (instregex "^INSv.+gpr")>; 1423 1424// ASIMD move, FP immed 1425def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMOVv")>; 1426 1427// ASIMD transfer, element to gen reg 1428def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^[SU]MOVv")>; 1429 1430//========== 1431// SIMD arithmetic instructions 1432//========== 1433def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDv", "^SUBv", 1434 "^BIFv", "^BITv", "^BSLv", 1435 "^ANDv", "^BICv", "^EORv", 1436 "^ORRv", "^ORNv")>; 1437 1438 1439def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^FABDv", "^FADDv", "^FSUBv")>; 1440 1441// floating division 1442def : InstRW<[ORYONWrite_6Cyc_FP3], (instregex "^FDIVv.*16$")>; 1443def : InstRW<[ORYONWrite_7Cyc_FP3], (instregex "^FDIVv.*32$")>; 1444def : InstRW<[ORYONWrite_9Cyc_FP3], (instregex "^FDIVv.*64$")>; 1445 1446def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMUL(X)?v", 1447 "^FRECPSv", "^FRSQRTSv")>; 1448 1449def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^MLAv","^MLSv", "^MULv", 1450 "^PMULv", "UABAv")>; 1451 1452def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "SABAv", "SABDv", 1453 "^(SH|UH)(ADD|SUB)v", 1454 "^S(MAX|MIN)v", 1455 "^(SQ|UQ)(ADD|SUB)v", 1456 "^(SQ|SQR|UQ|UQR)SHLv", 1457 "^(SR|UR)HADDv", 1458 "^(SR|UR)SHLv", 1459 "^UABDv", 1460 "^U(MAX|MIN)v")>; 1461// IMAX or UMAX in the above line 1462//========== 1463// SIMD compare instructions 1464//========== 1465 1466def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^CMEQv","^CMGEv","^CMGTv", 1467 "^CMLEv","^CMLTv", "^CMHIv", 1468 "^CMHSv", 1469 "^FCMEQv", "^FCMGEv", 1470 "^FCMGTv", "^FCMLEv", 1471 "^FCMLTv", 1472 "^FACGEv", "^FACGTv")>; 1473 1474//========== 1475// SIMD widening and narrowing arithmetic instructions 1476//========== 1477// NO need to list ADDHN2, RADDHN2, RSUBHN2 as they are not distinguished 1478// from ADDHN, RADDHN, RSUBHN in td file(v16i8, v8i16, v4i32). 1479def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDHNv", 1480 "^SUBHNv", 1481 "^RADDHNv", 1482 "^RSUBHNv", 1483 "^SABD(L|L2)v", "^UABD(L|L2)v", 1484 "^(S|U)(ADD|SUB)(L|L2|W|W2)v")>; 1485 1486def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^PMUL(L|L2)v","^SABA(L|L2)v", 1487 "^(S|U|SQ)(MLA|MSL|MUL)(L|L2)v")>; 1488 1489//========== 1490// SIMD unary arithmetic instructions 1491//========== 1492//^MVNv is an alias of ^NOTv 1493def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ABSv", "^CLSv","^CLZv", "^CNTv", 1494 "^NEGv", "^NOTv", 1495 "^RBITv", "^REV(16|32|64)v", 1496 "^SQ(ABS|NEG)v", "^SQ(XT|XTU)(N|N2)v", 1497 "^(SU|US)QADDv", 1498 "^UQXT(N|N2)v", "^XTN2?v")>; 1499 1500def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^FCVT(L|L2|N|N2|XN|XN2)v", 1501 "^FRINT[AIMNPXZ]v", 1502 "^FRSQRTEv", 1503 "^(S|U)ADALPv", 1504 "^(S|U)ADDLPv")>; 1505 1506 1507def : InstRW<[ORYONWrite_3Cyc_FP0], (instregex "^URECPEv", "^URSQRTEv", 1508 "^FRECPEv", "^FRECPXv")>; 1509 1510def : InstRW<[ORYONWrite_8Cyc_FP3_RC], (instregex "^FSQRTv.*16$")>; 1511def : InstRW<[ORYONWrite_10Cyc_FP3_RC], (instregex "^FSQRTv.*32$")>; 1512def : InstRW<[ORYONWrite_13Cyc_FP3_RC], (instregex "^FSQRTv.*64$")>; 1513 1514//========== 1515// SIMD binary elememt arithmetic instructions 1516//========== 1517 1518def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMLAv", "^FMLSv")>; 1519 1520def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^SQDMULHv", 1521 "^SQRD(MLA|MLS|MUL)Hv")>; 1522 1523//========== 1524// SIMD permute instructions 1525//========== 1526 1527def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^EXTv", "^TRN(1|2)v", 1528 "^UZP(1|2)v", "^ZIP(1|2)v")>; 1529 1530//========== 1531// SIMD immediate instructions 1532//========== 1533 1534def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^MOVIv", "^MVNIv")>; 1535 1536//========== 1537// SIMD shift(immediate) instructions 1538//========== 1539def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^RSHR(N|N2)v", "^SHLv", 1540 "^(SHL|SHR)(N|N2)v", 1541 "^SLIv", 1542 "^(SQ|SQR)SHR(U)?(N|N2)v", 1543 "^(UQ|UQR)SHR(N|N2)v", 1544 "^SQSHLUv", 1545 "^SRIv", 1546 "^(S|SR|U|UR)SHRv", 1547 "^(S|SR|U|UR)SRAv", 1548 "^(S|U)SHL(L|L2)v")>; 1549 1550//========== 1551// SIMD floating-point and integer conversion instructions 1552//========== 1553// same as above conversion 1554 1555//========== 1556// SIMD reduce (acoss vector lanes) instructions 1557//========== 1558 1559def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDVv", 1560 "^(FMAX|FMIN)(V|NMV)v", 1561 "^(S|U)ADDLVv", 1562 "^(S|U)(MAX|MIN)Vv")>; 1563//========== 1564// SIMD pairwise arithmetic instructions 1565//========== 1566 1567def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDPv", "^FADDPv", 1568 "^(FMAX|FMIN)(NMP|P)v", 1569 "^(S|U)(MIN|MAX)Pv")>; 1570//========== 1571// SIMD dot prodcut instructions 1572//========== 1573 1574def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^(U|S)DOTv")>; 1575 1576//========== 1577// SIMD table lookup instructions 1578//========== 1579// TBL 1-reg/2-reg; TBX 1-reg, 1uOp, throughput=4 latency=2 1580def : InstRW<[ORYONWrite_2Cyc_FP0123], (instrs TBLv8i8One, TBLv16i8One, 1581 TBXv8i8One, TBXv16i8One, 1582 TBLv8i8Two, TBLv16i8Two)>; 1583 1584// TBL 3-reg/4-reg, 3uops, throughtput=4/3=1.33 latency=4 1585def : InstRW<[ORYONWrite_4Cyc_FP0123_FP0123_FP0123_RC], 1586 (instrs TBLv8i8Three, TBLv16i8Three, 1587 TBLv8i8Four, TBLv16i8Four)>; 1588 1589 1590// TBX 2-reg 2 uOps, throughput=2 latency=4 1591def : InstRW<[ORYONWrite_4Cyc_FP0123_FP0123_RC], (instrs TBXv8i8Two, TBXv16i8Two)>; 1592 1593// TBX 3-reg/4-reg, 4uOps, throughput=1, latency=6 1594def : InstRW<[ORYONWrite_6Cyc_FP0123_FP0123_FP0123_FP0123_RC], 1595 (instrs TBXv8i8Three, TBXv16i8Three, 1596 TBXv8i8Four, TBXv16i8Four)>; 1597 1598 1599//========== 1600// SIMD complex number arithmetic instructions 1601//========== 1602 1603def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FCADDv", "^FCMLAv")>; 1604 1605//========== 1606// SIMD cryptographic instructions 1607//========== 1608// 3,4 on IMLA, CRYP 1609def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^AES[DE]", 1610 "^SM3(TT1|TT2)(A|B)")>; 1611 1612// 2,4 on CRYP 1613def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^AESI?MC", 1614 "^EOR3", 1615 "^RAX1", 1616 "^XAR", 1617 "^BCAX", 1618 "^SM3SS1", 1619 "^SM3PART(W1|W2)")>; 1620// 5,1 on CRYP 1621def : InstRW<[ORYONWrite_5Cyc_FP1], (instregex "^SM4E", 1622 "^SM4EKEY")>; 1623 1624// 2,1 on CRYP 1625def : InstRW<[ORYONWrite_2Cyc_FP1], (instregex "^SHA1(H|SU0|SU1)", 1626 "^SHA256SU0", 1627 "^SHA512(SU0|SU1)")>; 1628 1629// 3,1 on CRYP 1630def : InstRW<[ORYONWrite_3Cyc_FP1], (instregex "^SHA256SU1", 1631 "^SHA512(H|H2)")>; 1632 1633// 4,0.25 on CRYP 1634def : InstRW<[ORYONWrite_4Cyc_FP1_RC4], (instregex "^SHA1(C|P|M)", 1635 "^SHA256(H|H2)")>; 1636 1637//========== 1638// SIMD v8.6 instructions 1639//========== 1640// 4,2 on IMLA 1641def : InstRW<[ORYONWrite_4Cyc_FP0123_RC], (instregex "^(S|U|US)MMLA$")>; 1642 1643// 4,0.5 on IMLA 1644def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMMLA$")>; 1645 1646// 4,0.5 on IMLA 1647def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMLAL(B|T)")>; 1648 1649// 3,4 1650def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^(US|SU)DOTv")>; 1651 1652// 3,1 1653def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^BF(16)?DOTv")>; 1654 1655// 3,4 1656def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^BFCVT(N|N2)?$")>; 1657 1658 1659} // SchedModel = OryonModel 1660