1//=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for AMD bdver2 (Piledriver) to support 10// instruction scheduling and other instruction cost heuristics. 11// Based on: 12// * AMD Software Optimization Guide for AMD Family 15h Processors. 13// https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf 14// * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog 15// http://www.agner.org/optimize/microarchitecture.pdf 16// * https://www.realworldtech.com/bulldozer/ 17// Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2. 18// 19//===----------------------------------------------------------------------===// 20 21def BdVer2Model : SchedMachineModel { 22 let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired. 23 let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed. 24 let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer. 25 let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency. 26 let HighLatency = 25; // FIXME: any better choice? 27 let MispredictPenalty = 20; // Minimum branch misdirection penalty. 28 29 let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass. 30 31 // FIXME: Incomplete. This flag is set to allow the scheduler to assign 32 // a default model to unrecognized opcodes. 33 let CompleteModel = 0; 34} // SchedMachineModel 35 36let SchedModel = BdVer2Model in { 37 38 39//===----------------------------------------------------------------------===// 40// Pipes 41//===----------------------------------------------------------------------===// 42 43// There are total of eight pipes. 44 45//===----------------------------------------------------------------------===// 46// Integer execution pipes 47// 48 49// Two EX (ALU) pipes. 50def PdEX0 : ProcResource<1>; // ALU, Integer Pipe0 51def PdEX1 : ProcResource<1>; // ALU, Integer Pipe1 52def PdEX01 : ProcResGroup<[PdEX0, PdEX1]>; 53 54// Two AGLU pipes, identical. 55def PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23] 56 57//===----------------------------------------------------------------------===// 58// Floating point execution pipes 59// 60 61// Four FPU pipes. 62 63def PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0 64def PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1 65def PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2 66def PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3 67 68// FPU grouping 69def PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>; 70def PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>; 71 72 73//===----------------------------------------------------------------------===// 74// RCU 75//===----------------------------------------------------------------------===// 76 77// The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle. 78// On the other hand, the RCU reorder buffer size for Piledriver does not 79// seem be specified in any trustworthy source. 80// But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had 81// RCU reorder buffer size of 128. So that is a good guess for now. 82def PdRCU : RetireControlUnit<128, 4>; 83 84 85//===----------------------------------------------------------------------===// 86// Pipelines 87//===----------------------------------------------------------------------===// 88 89// There are total of two pipelines, each one with it's own scheduler. 90 91//===----------------------------------------------------------------------===// 92// Integer Pipeline Scheduling 93// 94 95// There is one Integer Scheduler per core. 96 97// Integer physical register file has 96 registers of 64-bit. 98def PdIntegerPRF : RegisterFile<96, [GR64, CCR]>; 99 100// Unified Integer, Memory Scheduler has 40 entries. 101def PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> { 102 // Up to 4 IPC can be decoded, issued, retired. 103 let BufferSize = 40; 104} 105 106 107//===----------------------------------------------------------------------===// 108// FPU Pipeline Scheduling 109// 110 111// The FPU unit is shared between the two cores. 112 113// FP physical register file has 160 registers of 128-bit. 114// Operations on 256-bit data types are cracked into two COPs. 115def PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>; 116 117// Unified FP Scheduler has 64 entries, 118def PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> { 119 // Up to 4 IPC can be decoded, issued, retired. 120 let BufferSize = 64; 121} 122 123 124//===----------------------------------------------------------------------===// 125// Functional units 126//===----------------------------------------------------------------------===// 127 128//===----------------------------------------------------------------------===// 129// Load-Store Units 130// 131 132let Super = PdAGLU01 in 133def PdLoad : ProcResource<2> { 134 // For Piledriver, the load queue is 40 entries deep. 135 let BufferSize = 40; 136} 137 138def PdLoadQueue : LoadQueue<PdLoad>; 139 140let Super = PdAGLU01 in 141def PdStore : ProcResource<1> { 142 // For Piledriver, the store queue is 24 entries deep. 143 let BufferSize = 24; 144} 145 146def PdStoreQueue : StoreQueue<PdStore>; 147 148//===----------------------------------------------------------------------===// 149// Integer Execution Units 150// 151 152def PdDiv : ProcResource<1>; // PdEX0; unpipelined integer division 153def PdCount : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT 154 155def PdMul : ProcResource<1>; // PdEX1; integer multiplication 156def PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches 157 158//===----------------------------------------------------------------------===// 159// Floating-Point Units 160// 161 162// Two FMAC/FPFMA units. 163def PdFPFMA : ProcResource<2>; // PdFPU0, PdFPU1 164 165// One 128-bit integer multiply-accumulate unit. 166def PdFPMMA : ProcResource<1>; // PdFPU0 167 168// One fp conversion unit. 169def PdFPCVT : ProcResource<1>; // PdFPU0 170 171// One unit for shuffles, packs, permutes, shifts. 172def PdFPXBR : ProcResource<1>; // PdFPU1 173 174// Two 128-bit packed integer units. 175def PdFPMAL : ProcResource<2>; // PdFPU2, PdFPU3 176 177// One FP store unit. 178def PdFPSTO : ProcResource<1>; // PdFPU3 179 180 181//===----------------------------------------------------------------------===// 182// Basic helper classes. 183//===----------------------------------------------------------------------===// 184 185// Many SchedWrites are defined in pairs with and without a folded load. 186// Instructions with folded loads are usually micro-fused, so they only appear 187// as two micro-ops when dispatched by the schedulers. 188// This multiclass defines the resource usage for variants with and without 189// folded loads. 190multiclass PdWriteRes<SchedWrite SchedRW, 191 list<ProcResourceKind> ExePorts, int Lat = 1, 192 list<int> Res = [], int UOps = 1> { 193 def : WriteRes<SchedRW, ExePorts> { 194 let Latency = Lat; 195 let ResourceCycles = Res; 196 let NumMicroOps = UOps; 197 } 198} 199 200multiclass __pdWriteResPair<X86FoldableSchedWrite SchedRW, 201 list<ProcResourceKind> ExePorts, int Lat, 202 list<int> Res, int UOps, 203 int LoadLat, int LoadRes, int LoadUOps> { 204 defm : PdWriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 205 206 defm : PdWriteRes<SchedRW.Folded, 207 !listconcat([PdLoad], ExePorts), 208 !add(Lat, LoadLat), 209 !if(!and(!empty(Res), !eq(LoadRes, 1)), 210 [], 211 !listconcat([LoadRes], 212 !if(!empty(Res), 213 !listsplat(1, !size(ExePorts)), 214 Res))), 215 !add(UOps, LoadUOps)>; 216} 217 218multiclass PdWriteResExPair<X86FoldableSchedWrite SchedRW, 219 list<ProcResourceKind> ExePorts, int Lat = 1, 220 list<int> Res = [], int UOps = 1, 221 int LoadUOps = 0> { 222 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 223 /*LoadLat*/4, /*LoadRes*/3, LoadUOps>; 224} 225 226multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW, 227 list<ProcResourceKind> ExePorts, int Lat = 1, 228 list<int> Res = [], int UOps = 1, 229 int LoadUOps = 0> { 230 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 231 /*LoadLat*/5, /*LoadRes*/3, LoadUOps>; 232} 233 234multiclass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW, 235 list<ProcResourceKind> ExePorts, int Lat, 236 list<int> Res = [], int UOps = 2, 237 int LoadUOps = 0> { 238 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 239 /*LoadLat*/5, /*LoadRes*/3, LoadUOps>; 240} 241 242//===----------------------------------------------------------------------===// 243// Here be dragons. 244//===----------------------------------------------------------------------===// 245 246// L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers 247// needn't be available until 4 cycles after the memory operand. 248def : ReadAdvance<ReadAfterLd, 4>; 249 250// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available 251// until 5 cycles after the memory operand. 252def : ReadAdvance<ReadAfterVecLd, 5>; 253def : ReadAdvance<ReadAfterVecXLd, 5>; 254def : ReadAdvance<ReadAfterVecYLd, 5>; 255 256// Transfer from int domain to ivec domain incurs additional latency of 8..10cy 257// Reference: Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller 258// and Excavator pipeline", "Data delay between different execution domains" 259def : ReadAdvance<ReadInt2Fpu, -10>; 260 261// A folded store needs a cycle on the PdStore for the store data. 262def : WriteRes<WriteRMW, [PdStore]>; 263 264//////////////////////////////////////////////////////////////////////////////// 265// Loads, stores, and moves, not folded with other operations. 266//////////////////////////////////////////////////////////////////////////////// 267 268def : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; let ResourceCycles = [2]; } 269def : WriteRes<WriteStore, [PdStore]>; 270def : WriteRes<WriteStoreNT, [PdStore]>; 271def : WriteRes<WriteMove, [PdEX01]> { let ResourceCycles = [2]; } 272 273// Load/store MXCSR. 274// FIXME: These are copy and pasted from WriteLoad/Store. 275def : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; } 276def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; let ResourceCycles = [18]; } 277 278// Treat misc copies as a move. 279def : InstRW<[WriteMove], (instrs COPY)>; 280 281//////////////////////////////////////////////////////////////////////////////// 282// Idioms that clear a register, like xorps %xmm0, %xmm0. 283// These can often bypass execution ports completely. 284//////////////////////////////////////////////////////////////////////////////// 285 286def : WriteRes<WriteZero, [/*No ExePorts*/]>; 287 288//////////////////////////////////////////////////////////////////////////////// 289// Branches don't produce values, so they have no latency, but they still 290// consume resources. Indirect branches can fold loads. 291//////////////////////////////////////////////////////////////////////////////// 292 293defm : PdWriteResExPair<WriteJump, [PdEX1, PdBranch]>; 294 295//////////////////////////////////////////////////////////////////////////////// 296// Special case scheduling classes. 297//////////////////////////////////////////////////////////////////////////////// 298 299def : WriteRes<WriteSystem, [PdEX01]> { let Latency = 100; } 300def : WriteRes<WriteMicrocoded, [PdEX01]> { let Latency = 100; } 301def : WriteRes<WriteFence, [PdStore]>; 302 303def PdWriteXLAT : SchedWriteRes<[PdEX01]> { 304 let Latency = 6; 305} 306def : InstRW<[PdWriteXLAT], (instrs XLAT)>; 307 308def PdWriteLARrr : SchedWriteRes<[PdEX01]> { 309 let Latency = 184; 310 let ResourceCycles = [375]; 311 let NumMicroOps = 45; 312} 313def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr", 314 "LSL(16|32|64)rr")>; 315 316// Nops don't have dependencies, so there's no actual latency, but we set this 317// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. 318def : WriteRes<WriteNop, [PdEX01]> { let ResourceCycles = [2]; } 319 320//////////////////////////////////////////////////////////////////////////////// 321// Arithmetic. 322//////////////////////////////////////////////////////////////////////////////// 323 324defm : PdWriteResExPair<WriteALU, [PdEX01], 1, [2]>; 325 326def PdWriteALURMW : SchedWriteRes<[PdLoad, PdEX01, PdStore]> { 327 let Latency = 6; 328 let ResourceCycles = [3, 2, 1]; 329 let NumMicroOps = 1; 330} 331def : SchedAlias<WriteALURMW, PdWriteALURMW>; 332 333def PdWriteLXADD : SchedWriteRes<[PdEX01]> { 334 let Latency = 6; 335 let ResourceCycles = [88]; 336 let NumMicroOps = 4; 337} 338def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>; 339 340def PdWriteBMI1 : SchedWriteRes<[PdEX01]> { 341 let Latency = 2; 342 let ResourceCycles = [2]; 343 let NumMicroOps = 2; 344} 345def : InstRW<[PdWriteBMI1], 346 (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr, 347 BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr, 348 BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr, 349 BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr, 350 TZMSK32rr, TZMSK64rr)>; 351 352def PdWriteBMI1m : SchedWriteRes<[PdLoad, PdEX01]> { 353 let Latency = 6; 354 let ResourceCycles = [3, 3]; 355 let NumMicroOps = 2; 356} 357def : InstRW<[PdWriteBMI1m], 358 (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm, 359 BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm, 360 BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm, 361 BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm, 362 TZMSK32rm, TZMSK64rm)>; 363 364defm : PdWriteResExPair<WriteADC, [PdEX01], 1, [2]>; 365 366def PdWriteADCSBB64ri32 : SchedWriteRes<[PdEX01]> { 367 let ResourceCycles = [3]; 368} 369def : InstRW<[PdWriteADCSBB64ri32], (instrs ADC64ri32, SBB64ri32)>; 370 371defm : PdWriteRes<WriteBSWAP32, [PdEX01]>; 372defm : PdWriteRes<WriteBSWAP64, [PdEX01]>; 373defm : PdWriteRes<WriteCMPXCHG, [PdEX1], 3, [3], 5>; 374defm : PdWriteRes<WriteCMPXCHGRMW, [PdEX1, PdStore, PdLoad], 3, [44, 1, 1], 2>; 375defm : PdWriteRes<WriteXCHG, [PdEX1], 1, [], 2>; 376 377def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> { 378 let Latency = 3; 379 let ResourceCycles = [3]; 380 let NumMicroOps = 3; 381} 382def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>; 383 384def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> { 385 let Latency = 3; 386 let ResourceCycles = [23]; 387 let NumMicroOps = 5; 388} 389def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>; 390 391def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> { 392 let Latency = 3; 393 let ResourceCycles = [21]; 394 let NumMicroOps = 6; 395} 396def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm], 397 (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>; 398 399def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> { 400 let Latency = 3; 401 let ResourceCycles = [26]; 402 let NumMicroOps = 18; 403} 404def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>; 405 406def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> { 407 let Latency = 3; 408 let ResourceCycles = [69]; 409 let NumMicroOps = 22; 410} 411def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>; 412 413def PdWriteXADD : SchedWriteRes<[PdEX1]> { 414 let Latency = 1; 415 let ResourceCycles = [1]; 416 let NumMicroOps = 2; 417} 418def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>; 419 420def PdWriteXADDm : SchedWriteRes<[PdEX1]> { 421 let Latency = 6; 422 let ResourceCycles = [20]; 423 let NumMicroOps = 4; 424} 425def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>; 426 427defm : PdWriteResExPair<WriteIMul8, [PdEX1, PdMul], 4, [1, 4]>; 428defm : PdWriteResExPair<WriteIMul16, [PdEX1, PdMul], 4, [1, 5], 2>; 429defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul], 5, [1, 5], 2>; 430defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul], 4, [1, 2]>; 431defm : PdWriteResExPair<WriteIMul32, [PdEX1, PdMul], 4, [1, 4]>; 432defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul], 4, [1, 2], 1, 1>; 433defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4, [1, 2]>; 434defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 6]>; 435defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>; 436defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>; 437defm : X86WriteResUnsupported<WriteIMulH>; // BMI2 MULX 438 439defm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>; 440defm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>; 441defm : PdWriteResExPair<WriteDiv32, [PdEX1, PdDiv], 14, [1, 14], 2>; 442defm : PdWriteResExPair<WriteDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>; 443 444defm : PdWriteResExPair<WriteIDiv8, [PdEX1, PdDiv], 12, [1, 12]>; 445defm : PdWriteResExPair<WriteIDiv16, [PdEX1, PdDiv], 15, [1, 17], 2>; 446defm : PdWriteResExPair<WriteIDiv32, [PdEX1, PdDiv], 14, [1, 25], 2>; 447defm : PdWriteResExPair<WriteIDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>; 448 449defm : PdWriteResExPair<WriteCRC32, [PdEX01], 2, [4], 3>; 450 451def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> { 452 let Latency = 5; 453 let ResourceCycles = [10]; 454 let NumMicroOps = 5; 455} 456def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>; 457 458def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> { 459 let Latency = 6; 460 let ResourceCycles = [12]; 461 let NumMicroOps = 7; 462} 463def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>; 464 465def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> { 466 let Latency = 10; 467 let ResourceCycles = [17]; 468 let NumMicroOps = 11; 469} 470def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>; 471 472defm : PdWriteResExPair<WriteCMOV, [PdEX01]>; // Conditional move. 473 474def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> { 475 let Latency = 5; 476 let ResourceCycles = [3, 3]; 477 let NumMicroOps = 2; 478} 479 480def PdWriteCMOVmVar : SchedWriteVariant<[ 481 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_BE">>, [PdWriteCMOVm]>, 482 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_A">>, [PdWriteCMOVm]>, 483 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_L">>, [PdWriteCMOVm]>, 484 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_GE">>, [PdWriteCMOVm]>, 485 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_LE">>, [PdWriteCMOVm]>, 486 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_G">>, [PdWriteCMOVm]>, 487 SchedVar<NoSchedPred, [WriteCMOV.Folded]> 488]>; 489 490def : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; 491 492defm : PdWriteRes<WriteFCMOV, [PdFPU0, PdFPFMA]>; // x87 conditional move. 493 494def : WriteRes<WriteSETCC, [PdEX01]>; // Setcc. 495def : WriteRes<WriteSETCCStore, [PdEX01, PdStore]>; 496 497def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> { 498 let ResourceCycles = [2]; 499 let NumMicroOps = 2; 500} 501 502def PdSETGEmSETGmSETLEmSETLm : SchedWriteVariant<[ 503 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_GE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 504 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_G">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 505 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_LE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 506 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_L">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 507 SchedVar<NoSchedPred, [WriteSETCCStore]> 508]>; 509def : InstRW<[PdSETGEmSETGmSETLEmSETLm], (instrs SETCCm)>; 510 511defm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [4], 2>; 512 513def PdWriteLAHF : SchedWriteRes<[PdEX01]> { 514 let Latency = 2; 515 let ResourceCycles = [4]; 516 let NumMicroOps = 4; 517} 518def : InstRW<[PdWriteLAHF], (instrs LAHF)>; 519 520def PdWriteSAHF : SchedWriteRes<[PdEX01]> { 521 let Latency = 2; 522 let ResourceCycles = [2]; 523 let NumMicroOps = 2; 524} 525def : InstRW<[PdWriteSAHF], (instrs SAHF)>; 526 527defm : PdWriteRes<WriteBitTest, [PdEX01], 1, [2], 1>; 528defm : PdWriteRes<WriteBitTestImmLd, [PdEX01, PdLoad], 5, [2, 3], 1>; 529defm : PdWriteRes<WriteBitTestRegLd, [PdEX01, PdLoad], 5, [7, 2], 7>; 530defm : PdWriteRes<WriteBitTestSet, [PdEX01], 2, [2], 2>; 531defm : PdWriteRes<WriteBitTestSetImmLd, [PdEX01, PdLoad], 6, [1, 1], 4>; 532defm : PdWriteRes<WriteBitTestSetRegLd, [PdEX01, PdLoad], 6, [1, 1], 10>; 533 534def PdWriteBTSIm : SchedWriteRes<[PdEX01, PdLoad]> { 535 let Latency = 7; 536 let ResourceCycles = [42, 1]; 537 let NumMicroOps = 4; 538} 539def : SchedAlias<WriteBitTestSetImmRMW, PdWriteBTSIm>; 540def PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> { 541 let Latency = 7; 542 let ResourceCycles = [44, 1]; 543 let NumMicroOps = 10; 544} 545def : SchedAlias<WriteBitTestSetRegRMW, PdWriteBTSRm>; 546 547// This is for simple LEAs with one or two input operands. 548def : WriteRes<WriteLEA, [PdEX01]> { let ResourceCycles = [2]; } 549 550// This write is used for slow LEA instructions. 551def PdWrite3OpsLEA : SchedWriteRes<[PdEX01]> { 552 let Latency = 2; 553 let ResourceCycles = [2]; 554} 555 556// On Piledriver, a slow LEA is either a 3Ops LEA (base, index, offset), 557// or an LEA with a `Scale` value different than 1. 558def PdSlowLEAPredicate : MCSchedPredicate< 559 CheckAny<[ 560 // A 3-operand LEA (base, index, offset). 561 IsThreeOperandsLEAFn, 562 // An LEA with a "Scale" different than 1. 563 CheckAll<[ 564 CheckIsImmOperand<2>, 565 CheckNot<CheckImmOperand<2, 1>> 566 ]> 567 ]> 568>; 569 570def PdWriteLEA : SchedWriteVariant<[ 571 SchedVar<PdSlowLEAPredicate, [PdWrite3OpsLEA]>, 572 SchedVar<NoSchedPred, [WriteLEA]> 573]>; 574 575def : InstRW<[PdWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>; 576 577def PdWriteLEA16r : SchedWriteRes<[PdEX01]> { 578 let ResourceCycles = [3]; 579 let NumMicroOps = 2; 580} 581def : InstRW<[PdWriteLEA16r], (instrs LEA16r)>; 582 583// Bit counts. 584defm : PdWriteResExPair<WriteBSF, [PdEX01], 3, [6], 6, 2>; 585defm : PdWriteResExPair<WriteBSR, [PdEX01], 4, [8], 7, 2>; 586defm : PdWriteResExPair<WritePOPCNT, [PdEX01], 4, [4]>; 587defm : PdWriteResExPair<WriteLZCNT, [PdEX0], 2, [2], 2>; 588defm : PdWriteResExPair<WriteTZCNT, [PdEX0], 2, [2], 2>; 589 590// BMI1 BEXTR, BMI2 BZHI 591defm : PdWriteResExPair<WriteBEXTR, [PdEX01], 2, [2], 2>; 592defm : PdWriteResExPair<WriteBLS, [PdEX01], 2, [2], 2>; 593defm : PdWriteResExPair<WriteBZHI, [PdEX01]>; 594 595def PdWriteBEXTRI : SchedWriteRes<[PdEX01]> { 596 let Latency = 2; 597 let ResourceCycles = [4]; 598 let NumMicroOps = 2; 599} 600def : InstRW<[PdWriteBEXTRI], (instrs BEXTRI32ri, BEXTRI64ri)>; 601 602def PdWriteBEXTRIm : SchedWriteRes<[PdEX01]> { 603 let Latency = 2; 604 let ResourceCycles = [5]; 605 let NumMicroOps = 2; 606} 607def : InstRW<[PdWriteBEXTRIm], (instrs BEXTRI32mi, BEXTRI64mi)>; 608 609//////////////////////////////////////////////////////////////////////////////// 610// Integer shifts and rotates. 611//////////////////////////////////////////////////////////////////////////////// 612 613defm : PdWriteResExPair<WriteShift, [PdEX01], 1, [2]>; 614defm : PdWriteResExPair<WriteShiftCL, [PdEX01]>; 615defm : PdWriteResExPair<WriteRotate, [PdEX01], 1, [2]>; 616defm : PdWriteResExPair<WriteRotateCL, [PdEX01]>; 617 618def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> { 619 let Latency = 12; 620 let ResourceCycles = [24]; 621 let NumMicroOps = 26; 622} 623def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>; 624 625def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> { 626 let Latency = 12; 627 let ResourceCycles = [23]; 628 let NumMicroOps = 23; 629} 630def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>; 631 632def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> { 633 let Latency = 11; 634 let ResourceCycles = [22]; 635 let NumMicroOps = 24; 636} 637def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>; 638 639def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> { 640 let Latency = 10; 641 let ResourceCycles = [20]; 642 let NumMicroOps = 22; 643} 644def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>; 645 646def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> { 647 let Latency = 10; 648 let ResourceCycles = [19]; 649 let NumMicroOps = 19; 650} 651def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>; 652 653def PdWriteRCL3264rCL : SchedWriteRes<[PdEX01]> { 654 let Latency = 7; 655 let ResourceCycles = [14]; 656 let NumMicroOps = 17; 657} 658def : InstRW<[PdWriteRCL3264rCL], (instrs RCL32rCL, RCL64rCL)>; 659 660def PdWriteRCR3264rCL : SchedWriteRes<[PdEX01]> { 661 let Latency = 7; 662 let ResourceCycles = [13]; 663 let NumMicroOps = 16; 664} 665def : InstRW<[PdWriteRCR3264rCL], (instrs RCR32rCL, RCR64rCL)>; 666 667def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> { 668 let Latency = 7; 669 let ResourceCycles = [14]; 670 let NumMicroOps = 15; 671} 672def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>; 673 674 675def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> { 676 let Latency = 9; 677 let ResourceCycles = [18]; 678 let NumMicroOps = 20; 679} 680def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>; 681 682def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> { 683 let Latency = 11; 684 let ResourceCycles = [21]; 685 let NumMicroOps = 21; 686} 687def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>; 688 689def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> { 690 let Latency = 8; 691 let ResourceCycles = [15]; 692 let NumMicroOps = 16; 693} 694def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>; 695 696def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> { 697 let Latency = 13; 698 let ResourceCycles = [25]; 699 let NumMicroOps = 25; 700} 701def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>; 702 703// SHLD/SHRD. 704defm : PdWriteRes<WriteSHDrri, [PdEX01], 3, [6], 6>; 705defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 3, [8], 7>; 706 707def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> { 708 let Latency = 3; 709 let ResourceCycles = [6]; 710 let NumMicroOps = 6; 711} 712def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>; 713 714def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> { 715 let Latency = 3; 716 let ResourceCycles = [6]; 717 let NumMicroOps = 7; 718} 719def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL, 720 SHLD32rrCL, 721 SHRD32rrCL)>; 722 723defm : PdWriteRes<WriteSHDmri, [PdLoad, PdEX01], 4, [1, 22], 8>; 724defm : PdWriteRes<WriteSHDmrcl, [PdLoad, PdEX01], 4, [1, 22], 8>; 725 726//////////////////////////////////////////////////////////////////////////////// 727// Floating point. This covers both scalar and vector operations. 728//////////////////////////////////////////////////////////////////////////////// 729 730defm : PdWriteRes<WriteFLD0, [PdFPU1, PdFPSTO], 3>; 731defm : PdWriteRes<WriteFLD1, [PdFPU1, PdFPSTO], 3>; 732defm : PdWriteRes<WriteFLDC, [PdFPU1, PdFPSTO], 3>; 733 734defm : PdWriteRes<WriteFLoad, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>; 735defm : PdWriteRes<WriteFLoadX, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>; 736defm : PdWriteRes<WriteFLoadY, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3], 2>; 737 738defm : PdWriteRes<WriteFMaskedLoad, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 1, 4]>; 739defm : PdWriteRes<WriteFMaskedLoadY, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 2, 4], 2>; 740 741defm : PdWriteRes<WriteFStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>; 742defm : PdWriteRes<WriteFStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>; 743defm : PdWriteRes<WriteFStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [1, 36, 2], 4>; 744 745def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU23, PdFPSTO]> { 746 let Latency = 2; 747 let ResourceCycles = [1, 3, 1]; 748 let NumMicroOps = 2; 749} 750def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>; 751 752def PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { 753 let NumMicroOps = 8; 754} 755def : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>; 756 757defm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>; 758defm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>; 759defm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>; 760 761defm : PdWriteRes<WriteFMaskedStore32, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; 762defm : PdWriteRes<WriteFMaskedStore64, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; 763defm : PdWriteRes<WriteFMaskedStore32Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; 764defm : PdWriteRes<WriteFMaskedStore64Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; 765 766defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>; 767defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>; 768defm : PdWriteRes<WriteFMoveY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>; 769 770defm : PdWriteRes<WriteEMMS, [PdFPU01, PdFPFMA], 2>; 771 772defm : PdWriteResXMMPair<WriteFAdd, [PdFPU0, PdFPFMA], 5>; 773defm : PdWriteResXMMPair<WriteFAddX, [PdFPU0, PdFPFMA], 5>; 774defm : PdWriteResYMMPair<WriteFAddY, [PdFPU0, PdFPFMA], 5, [1, 2]>; 775defm : X86WriteResPairUnsupported<WriteFAddZ>; 776 777def PdWriteX87Add: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { 778 let Latency = 5; 779 let ResourceCycles = [3, 1, 10]; 780} 781def : InstRW<[PdWriteX87Add], (instrs ADD_FI16m, ADD_FI32m, ADD_F32m, ADD_F64m, 782 SUB_FI16m, SUB_FI32m, SUB_F32m, SUB_F64m, 783 SUBR_FI16m, SUBR_FI32m, SUBR_F32m, SUBR_F64m)>; 784 785defm : PdWriteResXMMPair<WriteFAdd64, [PdFPU0, PdFPFMA], 5>; 786defm : PdWriteResXMMPair<WriteFAdd64X, [PdFPU0, PdFPFMA], 5>; 787defm : PdWriteResYMMPair<WriteFAdd64Y, [PdFPU0, PdFPFMA], 5, [1, 2]>; 788defm : X86WriteResPairUnsupported<WriteFAdd64Z>; 789 790defm : PdWriteResXMMPair<WriteFCmp, [PdFPU0, PdFPFMA], 2>; 791defm : PdWriteResXMMPair<WriteFCmpX, [PdFPU0, PdFPFMA], 2>; 792defm : PdWriteResYMMPair<WriteFCmpY, [PdFPU0, PdFPFMA], 2, [1, 2]>; 793defm : X86WriteResPairUnsupported<WriteFCmpZ>; 794 795defm : PdWriteResXMMPair<WriteFCmp64, [PdFPU0, PdFPFMA], 2>; 796defm : PdWriteResXMMPair<WriteFCmp64X, [PdFPU0, PdFPFMA], 2>; 797defm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [1, 2]>; 798defm : X86WriteResPairUnsupported<WriteFCmp64Z>; 799 800defm : PdWriteResXMMPair<WriteFCom, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 801defm : PdWriteResXMMPair<WriteFComX, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 802 803def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> { 804 let Latency = 6; 805} 806def : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>; 807 808def PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>; 809def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>; 810 811defm : PdWriteResXMMPair<WriteFMul, [PdFPU1, PdFPFMA], 5>; 812defm : PdWriteResXMMPair<WriteFMulX, [PdFPU1, PdFPFMA], 5>; 813defm : PdWriteResYMMPair<WriteFMulY, [PdFPU1, PdFPFMA], 5, [1, 2]>; 814defm : X86WriteResPairUnsupported<WriteFMulZ>; 815 816def PdWriteX87Mul: SchedWriteRes<[PdLoad, PdFPU1, PdFPFMA]> { 817 let Latency = 5; 818 let ResourceCycles = [3, 1, 10]; 819} 820def : InstRW<[PdWriteX87Mul], (instrs MUL_FI16m, MUL_FI32m, MUL_F32m, MUL_F64m)>; 821 822defm : PdWriteResXMMPair<WriteFMul64, [PdFPU1, PdFPFMA], 5>; 823defm : PdWriteResXMMPair<WriteFMul64X, [PdFPU1, PdFPFMA], 5>; 824defm : PdWriteResYMMPair<WriteFMul64Y, [PdFPU1, PdFPFMA], 5, [1, 2]>; 825defm : X86WriteResPairUnsupported<WriteFMul64Z>; 826 827defm : PdWriteResXMMPair<WriteFMA, [PdFPU, PdFPFMA], 5, [1, 3]>; 828defm : PdWriteResXMMPair<WriteFMAX, [PdFPU, PdFPFMA], 5, [1, 3]>; 829defm : PdWriteResYMMPair<WriteFMAY, [PdFPU, PdFPFMA], 5, [1, 3]>; 830defm : X86WriteResPairUnsupported<WriteFMAZ>; 831 832 833defm : PdWriteResXMMPair<WriteDPPD, [PdFPU1, PdFPFMA], 15, [1, 10], 15, 2>; 834 835defm : PdWriteResXMMPair<WriteDPPS, [PdFPU1, PdFPFMA], 25, [1, 14], 16, 2>; 836defm : PdWriteResYMMPair<WriteDPPSY, [PdFPU1, PdFPFMA], 27, [2, 25], /*or 29*/ 25, 4>; 837defm : X86WriteResPairUnsupported<WriteDPPSZ>; 838 839def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> { 840 let Latency = 27; 841 let ResourceCycles = [1, 14]; 842 let NumMicroOps = 17; 843} 844def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>; 845 846defm : PdWriteResXMMPair<WriteFRcp, [PdFPU1, PdFPFMA], 5>; 847defm : PdWriteResXMMPair<WriteFRcpX, [PdFPU1, PdFPFMA], 5>; 848defm : PdWriteResYMMPair<WriteFRcpY, [PdFPU1, PdFPFMA], 5, [2, 1]>; 849defm : X86WriteResPairUnsupported<WriteFRcpZ>; 850 851defm : PdWriteResXMMPair<WriteFRsqrt, [PdFPU1, PdFPFMA], 5, [1, 2]>; 852defm : PdWriteResXMMPair<WriteFRsqrtX, [PdFPU1, PdFPFMA], 5>; 853defm : PdWriteResYMMPair<WriteFRsqrtY, [PdFPU1, PdFPFMA], 5, [2, 2]>; 854defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; 855 856defm : PdWriteResXMMPair<WriteFDiv, [PdFPU1, PdFPFMA], 9, [1, 9]>; 857defm : PdWriteResXMMPair<WriteFDivX, [PdFPU1, PdFPFMA], 9, [1, 9]>; 858defm : PdWriteResYMMPair<WriteFDivY, [PdFPU1, PdFPFMA], 9, [2, 18]>; 859defm : X86WriteResPairUnsupported<WriteFDivZ>; 860 861def PdWriteX87Div: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { 862 let Latency = 9; 863 let ResourceCycles = [3, 1, 18]; 864} 865def : InstRW<[PdWriteX87Div], (instrs DIV_FI16m, DIV_FI32m, 866 DIVR_FI16m, DIVR_FI32m, 867 DIV_F32m, DIV_F64m, 868 DIVR_F32m, DIVR_F64m)>; 869 870defm : PdWriteResXMMPair<WriteFDiv64, [PdFPU1, PdFPFMA], 9, [1, 9]>; 871defm : PdWriteResXMMPair<WriteFDiv64X, [PdFPU1, PdFPFMA], 9, [1, 9]>; 872defm : PdWriteResYMMPair<WriteFDiv64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>; 873defm : X86WriteResPairUnsupported<WriteFDiv64Z>; 874 875defm : PdWriteResXMMPair<WriteFSqrt, [PdFPU1, PdFPFMA], 9, [1, 9]>; 876defm : PdWriteResXMMPair<WriteFSqrtX, [PdFPU1, PdFPFMA], 9, [1, 9]>; 877defm : PdWriteResYMMPair<WriteFSqrtY, [PdFPU1, PdFPFMA], 9, [2, 18]>; 878defm : X86WriteResPairUnsupported<WriteFSqrtZ>; 879 880defm : PdWriteResXMMPair<WriteFSqrt64, [PdFPU1, PdFPFMA], 9, [1, 9]>; 881defm : PdWriteResXMMPair<WriteFSqrt64X, [PdFPU1, PdFPFMA], 9, [1, 9]>; 882defm : PdWriteResYMMPair<WriteFSqrt64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>; 883defm : X86WriteResPairUnsupported<WriteFSqrt64Z>; 884 885defm : PdWriteResXMMPair<WriteFSqrt80, [PdFPU1, PdFPFMA], 1, [1, 18]>; 886defm : PdWriteResXMMPair<WriteFSign, [PdFPU1, PdFPFMA], 1, [1, 4]>; 887 888defm : PdWriteResXMMPair<WriteFRnd, [PdFPU1, PdFPSTO], 4, []>; 889defm : PdWriteResYMMPair<WriteFRndY, [PdFPU1, PdFPSTO], 4, [2, 1], 2>; 890defm : X86WriteResPairUnsupported<WriteFRndZ>; 891 892def PdWriteVFRCZP : SchedWriteRes<[PdFPU1, PdFPSTO]> { 893 let Latency = 10; 894 let ResourceCycles = [2, 1]; 895 let NumMicroOps = 2; 896} 897def : InstRW<[PdWriteVFRCZP], (instrs VFRCZPDrr, VFRCZPSrr)>; 898 899def PdWriteVFRCZS : SchedWriteRes<[PdFPU1, PdFPSTO]> { 900 let Latency = 10; 901 let ResourceCycles = [10, 1]; 902 let NumMicroOps = 2; 903} 904def : InstRW<[PdWriteVFRCZS], (instrs VFRCZSDrr, VFRCZSSrr)>; 905 906def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> { 907 let Latency = 15; 908 let ResourceCycles = [2, 1]; 909 let NumMicroOps = 3; 910} 911def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm, 912 VFRCZSDrm, VFRCZSSrm)>; 913 914def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> { 915 let Latency = 10; 916 let ResourceCycles = [3, 1]; 917 let NumMicroOps = 4; 918} 919def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>; 920 921def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> { 922 let Latency = 15; 923 let ResourceCycles = [4, 1]; 924 let NumMicroOps = 8; 925} 926def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>; 927 928defm : PdWriteResXMMPair<WriteFLogic, [PdFPU01, PdFPFMA], 2, [1, 2]>; 929defm : PdWriteResYMMPair<WriteFLogicY, [PdFPU01, PdFPFMA], 2, [2, 2]>; 930defm : X86WriteResPairUnsupported<WriteFLogicZ>; 931 932defm : PdWriteResXMMPair<WriteFTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 933defm : PdWriteResYMMPair<WriteFTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [4, 4, 1], 4, 2>; 934defm : X86WriteResPairUnsupported<WriteFTestZ>; 935 936defm : PdWriteResXMMPair<WriteFShuffle, [PdFPU01, PdFPFMA], 2, [1, 2]>; 937defm : PdWriteResYMMPair<WriteFShuffleY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>; 938defm : X86WriteResPairUnsupported<WriteFShuffleZ>; 939 940def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> { 941 let Latency = 7; 942 let ResourceCycles = [1, 3]; 943 let NumMicroOps = 2; 944} 945def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>; 946 947defm : PdWriteResXMMPair<WriteFVarShuffle, [PdFPU01, PdFPFMA], 3, [1, 2]>; 948defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU01, PdFPFMA], 3, [2, 4], 2>; 949defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; 950 951defm : PdWriteResXMMPair<WriteFBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>; 952defm : PdWriteResYMMPair<WriteFBlendY, [PdFPU01, PdFPFMA], 2, [2, 3], 2>; 953defm : X86WriteResPairUnsupported<WriteFBlendZ>; 954 955defm : PdWriteResXMMPair<WriteFVarBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>; 956defm : PdWriteResYMMPair<WriteFVarBlendY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>; 957defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; 958 959defm : PdWriteResXMMPair<WriteFShuffle256, [PdFPU01, PdFPFMA], 2, [1, 3], 2>; 960defm : X86WriteResPairUnsupported<WriteFVarShuffle256>; 961 962def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 963 let Latency = 2; 964 let ResourceCycles = [1, 2]; 965} 966def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>; 967 968def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 969 let Latency = 7; 970 let ResourceCycles = [1, 4]; 971 let NumMicroOps = 2; 972} 973def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>; 974 975def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 976 let Latency = 4; 977 let ResourceCycles = [1, 6]; 978 let NumMicroOps = 8; 979} 980def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>; 981 982def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> { 983 let Latency = 8; // 4 + 4 984 let ResourceCycles = [1, 8]; 985 let NumMicroOps = 10; 986} 987def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>; 988 989//////////////////////////////////////////////////////////////////////////////// 990// Conversions. 991//////////////////////////////////////////////////////////////////////////////// 992 993defm : PdWriteResXMMPair<WriteCvtSS2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>; 994 995defm : PdWriteResXMMPair<WriteCvtPS2I, [PdFPU0, PdFPCVT, PdFPSTO], 4>; 996defm : PdWriteResYMMPair<WriteCvtPS2IY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 997defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; 998 999defm : PdWriteResXMMPair<WriteCvtSD2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>; 1000 1001defm : PdWriteResXMMPair<WriteCvtPD2I, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1002defm : PdWriteResYMMPair<WriteCvtPD2IY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 1003defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; 1004 1005def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1006 let Latency = 6; 1007 let NumMicroOps = 2; 1008} 1009def : InstRW<[PdWriteMMX_CVTTPD2PIirr], (instrs MMX_CVTTPD2PIirr)>; 1010 1011// FIXME: f+3 ST, LD+STC latency 1012defm : PdWriteResXMMPair<WriteCvtI2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>; 1013// FIXME: .Folded version is one NumMicroOp *less*.. 1014 1015defm : PdWriteResXMMPair<WriteCvtI2PS, [PdFPU0, PdFPCVT, PdFPSTO], 4>; 1016defm : PdWriteResYMMPair<WriteCvtI2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1017defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; 1018 1019defm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>; 1020// FIXME: .Folded version is one NumMicroOp *less*.. 1021 1022def PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1023 let Latency = 13; 1024 let ResourceCycles = [1, 3, 1]; 1025 let NumMicroOps = 2; 1026} 1027def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>; 1028 1029defm : PdWriteResXMMPair<WriteCvtI2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1030defm : PdWriteResYMMPair<WriteCvtI2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>; 1031defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; 1032 1033defm : PdWriteResXMMPair<WriteCvtSS2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1034 1035defm : PdWriteResXMMPair<WriteCvtPS2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1036defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>; 1037defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; 1038 1039defm : PdWriteResXMMPair<WriteCvtSD2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1040 1041defm : PdWriteResXMMPair<WriteCvtPD2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1042defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 1043defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; 1044 1045def PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1046 let Latency = 6; 1047 let NumMicroOps = 2; 1048} 1049def : InstRW<[PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr, 1050 MMX_CVTPI2PDirr)>; 1051 1052def PdWriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1053 let Latency = 4; 1054 let NumMicroOps = 2; 1055} 1056def : InstRW<[PdWriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>; 1057 1058defm : PdWriteResXMMPair<WriteCvtPH2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2, 1>; 1059defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 3>; 1060defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>; 1061 1062defm : PdWriteRes<WriteCvtPS2PH, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2>; 1063defm : PdWriteRes<WriteCvtPS2PHY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 1064defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; 1065 1066defm : PdWriteRes<WriteCvtPS2PHSt, [PdFPU0, PdFPCVT, PdFPSTO, PdStore], 4, [1, 2, 1, 1], 3>; 1067defm : PdWriteRes<WriteCvtPS2PHYSt, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdStore], 4, [1, 2, 1, 1, 1], 4>; 1068defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; 1069 1070//////////////////////////////////////////////////////////////////////////////// 1071// Vector integer operations. 1072//////////////////////////////////////////////////////////////////////////////// 1073 1074defm : PdWriteRes<WriteVecLoad, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>; 1075defm : PdWriteRes<WriteVecLoadX, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>; 1076defm : PdWriteRes<WriteVecLoadY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 3], 2>; 1077 1078defm : PdWriteRes<WriteVecLoadNT, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 4]>; 1079defm : PdWriteRes<WriteVecLoadNTY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 4]>; 1080 1081defm : PdWriteRes<WriteVecMaskedLoad, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 1, 2]>; 1082defm : PdWriteRes<WriteVecMaskedLoadY, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 2, 4], 2>; 1083 1084defm : PdWriteRes<WriteVecStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>; 1085defm : PdWriteRes<WriteVecStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>; 1086defm : PdWriteRes<WriteVecStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [2, 36, 2], 4>; 1087 1088def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { 1089 let NumMicroOps = 8; 1090} 1091def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>; 1092 1093defm : PdWriteRes<WriteVecStoreNT, [PdStore, PdFPU1, PdFPSTO], 2>; 1094defm : PdWriteRes<WriteVecStoreNTY, [PdStore, PdFPU1, PdFPSTO], 2, [2, 2, 2], 4>; 1095 1096defm : X86WriteResUnsupported<WriteVecMaskedStore32>; 1097defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>; 1098defm : X86WriteResUnsupported<WriteVecMaskedStore64>; 1099defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>; 1100 1101defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>; 1102defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>; 1103defm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>; 1104 1105def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1106} 1107def : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>; 1108 1109def PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1110 let Latency = 4; 1111} 1112def : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>; 1113 1114defm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 11>; 1115defm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 11, [1, 2], 2>; 1116 1117defm : PdWriteResXMMPair<WriteVecALU, [PdFPU01, PdFPMAL], 2>; 1118defm : PdWriteResXMMPair<WriteVecALUX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1119defm : X86WriteResPairUnsupported<WriteVecALUY>; 1120defm : X86WriteResPairUnsupported<WriteVecALUZ>; 1121 1122defm : PdWriteResXMMPair<WriteVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1123defm : PdWriteResXMMPair<WriteVecShiftX, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1124defm : X86WriteResPairUnsupported<WriteVecShiftY>; 1125defm : X86WriteResPairUnsupported<WriteVecShiftZ>; 1126 1127defm : PdWriteResXMMPair<WriteVecShiftImm, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1128defm : PdWriteResXMMPair<WriteVecShiftImmX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1129defm : X86WriteResPairUnsupported<WriteVecShiftImmY>; 1130defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; 1131 1132defm : PdWriteResXMMPair<WriteVecIMul, [PdFPU0, PdFPMMA], 4>; 1133defm : PdWriteResXMMPair<WriteVecIMulX, [PdFPU0, PdFPMMA], 4>; 1134defm : X86WriteResPairUnsupported<WriteVecIMulY>; 1135defm : X86WriteResPairUnsupported<WriteVecIMulZ>; 1136 1137defm : PdWriteResXMMPair<WritePMULLD, [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL], 5, [2, 1, 2, 1]>; 1138defm : X86WriteResPairUnsupported<WritePMULLDY>; 1139defm : X86WriteResPairUnsupported<WritePMULLDZ>; 1140 1141def PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPMMA, PdFPMAL]> { 1142 let Latency = 4; 1143} 1144def : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr, 1145 VPMACSSDQLrr)>; 1146 1147defm : PdWriteResXMMPair<WriteMPSAD, [PdFPU0, PdFPMMA], 9, [1, 4], 8>; 1148defm : X86WriteResPairUnsupported<WriteMPSADY>; 1149defm : X86WriteResPairUnsupported<WriteMPSADZ>; 1150 1151def PdWriteVMPSADBW : SchedWriteRes<[PdFPU0, PdFPMMA]> { 1152 let Latency = 8; 1153 let ResourceCycles = [1, 4]; 1154 let NumMicroOps = 10; 1155} 1156def : InstRW<[PdWriteVMPSADBW], (instrs VMPSADBWrri)>; 1157 1158defm : PdWriteResXMMPair<WritePSADBW, [PdFPU01, PdFPMAL], 4, [1, 2], 2>; 1159defm : PdWriteResXMMPair<WritePSADBWX, [PdFPU01, PdFPMAL], 4, [1, 2], 2>; 1160defm : X86WriteResPairUnsupported<WritePSADBWY>; 1161defm : X86WriteResPairUnsupported<WritePSADBWZ>; 1162 1163defm : PdWriteResXMMPair<WritePHMINPOS, [PdFPU0, PdFPMAL], 4, [], 2>; 1164 1165defm : PdWriteResXMMPair<WriteShuffle, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1166defm : PdWriteResXMMPair<WriteShuffleX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1167defm : PdWriteResYMMPair<WriteShuffleY, [PdFPU01, PdFPMAL], 2, [1, 4]>; 1168defm : X86WriteResPairUnsupported<WriteShuffleZ>; 1169 1170defm : PdWriteResXMMPair<WriteVarShuffle, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1171defm : PdWriteResXMMPair<WriteVarShuffleX, [PdFPU01, PdFPMAL], 3, [1, 3]>; 1172defm : X86WriteResPairUnsupported<WriteVarShuffleY>; 1173defm : X86WriteResPairUnsupported<WriteVarShuffleZ>; 1174 1175def PdWriteVPPERM : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1176 let Latency = 2; 1177 let ResourceCycles = [1, 3]; 1178} 1179def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>; 1180 1181defm : PdWriteResXMMPair<WriteBlend, [PdFPU01, PdFPMAL], 2>; 1182defm : X86WriteResPairUnsupported<WriteBlendY>; 1183defm : X86WriteResPairUnsupported<WriteBlendZ>; 1184 1185defm : PdWriteResXMMPair<WriteVarBlend, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1186defm : X86WriteResPairUnsupported<WriteVarBlendY>; 1187defm : X86WriteResPairUnsupported<WriteVarBlendZ>; 1188 1189defm : PdWriteResXMMPair<WriteVecLogic, [PdFPU01, PdFPMAL], 2>; 1190defm : PdWriteResXMMPair<WriteVecLogicX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1191defm : X86WriteResPairUnsupported<WriteVecLogicY>; 1192defm : X86WriteResPairUnsupported<WriteVecLogicZ>; 1193 1194defm : PdWriteResXMMPair<WriteVecTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 1195defm : PdWriteResYMMPair<WriteVecTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 4, 1], 4, 2>; 1196defm : X86WriteResPairUnsupported<WriteVecTestZ>; 1197 1198defm : PdWriteResXMMPair<WriteShuffle256, [PdFPU01, PdFPMAL]>; 1199defm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>; 1200 1201defm : PdWriteResXMMPair<WriteVarVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1202defm : X86WriteResPairUnsupported<WriteVarVecShiftY>; 1203defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; 1204 1205//////////////////////////////////////////////////////////////////////////////// 1206// Vector insert/extract operations. 1207//////////////////////////////////////////////////////////////////////////////// 1208 1209defm : PdWriteRes<WriteVecInsert, [PdFPU01, PdFPMAL], 2, [1, 3], 2>; 1210defm : PdWriteRes<WriteVecInsertLd, [PdFPU01, PdFPMAL, PdLoad], 6, [1, 4, 3], 2>; 1211 1212defm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 12, [1, 3, 1], 2>; 1213defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [2, 1, 1], 2>; 1214 1215def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1216 let Latency = 3; 1217 let ResourceCycles = [1, 3]; 1218} 1219def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>; 1220 1221//////////////////////////////////////////////////////////////////////////////// 1222// SSE42 String instructions. 1223//////////////////////////////////////////////////////////////////////////////// 1224 1225defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 11, [1, 6, 1], 7, 1>; 1226defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 7, [1, 8, 1], 7, 2>; 1227 1228defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 14, [1, 10, 10, 10, 1, 1], 27, 1>; 1229defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 10, 10, 10, 1, 1], 27, 1>; 1230 1231//////////////////////////////////////////////////////////////////////////////// 1232// MOVMSK Instructions. 1233//////////////////////////////////////////////////////////////////////////////// 1234 1235defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>; 1236 1237defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>; 1238defm : X86WriteResUnsupported<WriteVecMOVMSKY>; 1239// defm : X86WriteResUnsupported<WriteVecMOVMSKZ>; 1240 1241defm : PdWriteRes<WriteMMXMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>; 1242 1243//////////////////////////////////////////////////////////////////////////////// 1244// AES Instructions. 1245//////////////////////////////////////////////////////////////////////////////// 1246 1247defm : PdWriteResXMMPair<WriteAESIMC, [PdFPU0, PdFPMMA], 5>; 1248defm : PdWriteResXMMPair<WriteAESKeyGen, [PdFPU0, PdFPMMA], 5>; 1249defm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>; 1250 1251//////////////////////////////////////////////////////////////////////////////// 1252// Horizontal add/sub instructions. 1253//////////////////////////////////////////////////////////////////////////////// 1254 1255defm : PdWriteResXMMPair<WriteFHAdd, [PdFPU0, PdFPFMA], 11, [1, 5], 3, 1>; 1256defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [1, 8], 8, 2>; 1257defm : X86WriteResPairUnsupported<WriteFHAddZ>; 1258 1259defm : PdWriteResXMMPair<WritePHAdd, [PdFPU01, PdFPMAL], 5, [1, 4], 3, 1>; 1260defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1261defm : X86WriteResPairUnsupported<WritePHAddY>; 1262defm : X86WriteResPairUnsupported<WritePHAddZ>; 1263 1264def : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr, 1265 PHADDWrr, PHSUBWrr, 1266 PHADDSWrr, PHSUBSWrr, 1267 VPHADDDrr, VPHSUBDrr, 1268 VPHADDWrr, VPHSUBWrr, 1269 VPHADDSWrr, VPHSUBSWrr)>; 1270 1271def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm, 1272 PHADDWrm, PHSUBWrm, 1273 PHADDSWrm, PHSUBSWrm, 1274 VPHADDDrm, VPHSUBDrm, 1275 VPHADDWrm, VPHSUBWrm, 1276 VPHADDSWrm, VPHSUBSWrm)>; 1277 1278//////////////////////////////////////////////////////////////////////////////// 1279// Carry-less multiplication instructions. 1280//////////////////////////////////////////////////////////////////////////////// 1281 1282defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [1, 7], 5, 1>; 1283 1284def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> { 1285 let Latency = 12; 1286 let ResourceCycles = [1, 7]; 1287 let NumMicroOps = 6; 1288} 1289def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>; 1290 1291//////////////////////////////////////////////////////////////////////////////// 1292// SSE4A instructions. 1293//////////////////////////////////////////////////////////////////////////////// 1294 1295def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1296 let Latency = 3; 1297 let ResourceCycles = [1, 2]; 1298} 1299def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ)>; 1300 1301def PdWriteINSERTQI : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1302 let Latency = 3; 1303 let ResourceCycles = [1, 3]; 1304} 1305def : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>; 1306 1307//////////////////////////////////////////////////////////////////////////////// 1308// AVX instructions. 1309//////////////////////////////////////////////////////////////////////////////// 1310 1311def PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> { 1312 let Latency = 6; 1313 let ResourceCycles = [1, 2, 4]; 1314 let NumMicroOps = 2; 1315} 1316def : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, 1317 VBROADCASTSSYrm)>; 1318 1319def PdWriteVZEROALL : SchedWriteRes<[]> { 1320 let Latency = 90; 1321 let NumMicroOps = 32; 1322} 1323def : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>; 1324 1325def PdWriteVZEROUPPER : SchedWriteRes<[]> { 1326 let Latency = 46; 1327 let NumMicroOps = 16; 1328} 1329def : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>; 1330 1331/////////////////////////////////////////////////////////////////////////////// 1332// SchedWriteVariant definitions. 1333/////////////////////////////////////////////////////////////////////////////// 1334 1335def PdWriteZeroLatency : SchedWriteRes<[]> { 1336 let Latency = 0; 1337} 1338 1339def PdWriteZeroIdiom : SchedWriteVariant<[ 1340 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1341 SchedVar<MCSchedPredicate<TruePred>, [WriteALU]> 1342]>; 1343def : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr, 1344 XOR32rr, XOR64rr)>; 1345 1346def PdWriteFZeroIdiom : SchedWriteVariant<[ 1347 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1348 SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]> 1349]>; 1350def : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, 1351 XORPDrr, VXORPDrr, 1352 ANDNPSrr, VANDNPSrr, 1353 ANDNPDrr, VANDNPDrr)>; 1354 1355// VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1. 1356 1357def PdWriteVZeroIdiomLogic : SchedWriteVariant<[ 1358 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1359 SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]> 1360]>; 1361def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>; 1362 1363def PdWriteVZeroIdiomLogicX : SchedWriteVariant<[ 1364 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1365 SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]> 1366]>; 1367def : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, 1368 PANDNrr, VPANDNrr)>; 1369 1370def PdWriteVZeroIdiomALU : SchedWriteVariant<[ 1371 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1372 SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]> 1373]>; 1374def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr, 1375 MMX_PSUBQirr, MMX_PSUBWirr, 1376 MMX_PCMPGTBirr, 1377 MMX_PCMPGTDirr, 1378 MMX_PCMPGTWirr)>; 1379 1380def PdWriteVZeroIdiomALUX : SchedWriteVariant<[ 1381 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1382 SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]> 1383]>; 1384def : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, 1385 PSUBDrr, VPSUBDrr, 1386 PSUBQrr, VPSUBQrr, 1387 PSUBWrr, VPSUBWrr, 1388 PCMPGTBrr, VPCMPGTBrr, 1389 PCMPGTDrr, VPCMPGTDrr, 1390 PCMPGTWrr, VPCMPGTWrr)>; 1391 1392/////////////////////////////////////////////////////////////////////////////// 1393// Dependency breaking instructions. 1394/////////////////////////////////////////////////////////////////////////////// 1395 1396// VPCMPGTQ, but not PCMPGTQ! 1397 1398def : IsZeroIdiomFunction<[ 1399 // GPR Zero-idioms. 1400 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, 1401 1402 // MMX Zero-idioms. 1403 DepBreakingClass<[ 1404 MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr, 1405 MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr, 1406 MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr, 1407 MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr 1408 ], ZeroIdiomPredicate>, 1409 1410 // SSE Zero-idioms. 1411 DepBreakingClass<[ 1412 // fp variants. 1413 XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr, 1414 1415 // int variants. 1416 PXORrr, PANDNrr, 1417 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 1418 PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, 1419 PCMPGTBrr, PCMPGTDrr, PCMPGTWrr 1420 ], ZeroIdiomPredicate>, 1421 1422 // AVX Zero-idioms. 1423 DepBreakingClass<[ 1424 // xmm fp variants. 1425 VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr, 1426 1427 // xmm int variants. 1428 VPXORrr, VPANDNrr, 1429 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 1430 VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr, 1431 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 1432 1433 // ymm variants. 1434 VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr 1435 ], ZeroIdiomPredicate> 1436]>; 1437 1438def : IsDepBreakingFunction<[ 1439 // GPR 1440 DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>, 1441 DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >, 1442 1443 // MMX 1444 DepBreakingClass<[ 1445 MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr 1446 ], ZeroIdiomPredicate>, 1447 1448 // SSE 1449 DepBreakingClass<[ 1450 PCMPEQBrr, PCMPEQWrr, PCMPEQDrr 1451 // But not PCMPEQQrr. 1452 ], ZeroIdiomPredicate>, 1453 1454 // AVX 1455 DepBreakingClass<[ 1456 VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr 1457 // But not VPCMPEQQrr. 1458 ], ZeroIdiomPredicate> 1459]>; 1460 1461 1462} // SchedModel 1463