1//=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for AMD bdver2 (Piledriver) to support 10// instruction scheduling and other instruction cost heuristics. 11// Based on: 12// * AMD Software Optimization Guide for AMD Family 15h Processors. 13// https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf 14// * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog 15// http://www.agner.org/optimize/microarchitecture.pdf 16// * https://www.realworldtech.com/bulldozer/ 17// Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2. 18// 19//===----------------------------------------------------------------------===// 20 21def BdVer2Model : SchedMachineModel { 22 let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired. 23 let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed. 24 let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer. 25 let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency. 26 let HighLatency = 25; // FIXME: any better choice? 27 let MispredictPenalty = 20; // Minimum branch misdirection penalty. 28 29 let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass. 30 31 // FIXME: Incomplete. This flag is set to allow the scheduler to assign 32 // a default model to unrecognized opcodes. 33 let CompleteModel = 0; 34} // SchedMachineModel 35 36let SchedModel = BdVer2Model in { 37 38 39//===----------------------------------------------------------------------===// 40// Pipes 41//===----------------------------------------------------------------------===// 42 43// There are total of eight pipes. 44 45//===----------------------------------------------------------------------===// 46// Integer execution pipes 47// 48 49// Two EX (ALU) pipes. 50def PdEX0 : ProcResource<1>; // ALU, Integer Pipe0 51def PdEX1 : ProcResource<1>; // ALU, Integer Pipe1 52def PdEX01 : ProcResGroup<[PdEX0, PdEX1]>; 53 54// Two AGLU pipes, identical. 55def PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23] 56 57//===----------------------------------------------------------------------===// 58// Floating point execution pipes 59// 60 61// Four FPU pipes. 62 63def PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0 64def PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1 65def PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2 66def PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3 67 68// FPU grouping 69def PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>; 70def PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>; 71 72 73//===----------------------------------------------------------------------===// 74// RCU 75//===----------------------------------------------------------------------===// 76 77// The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle. 78// On the other hand, the RCU reorder buffer size for Piledriver does not 79// seem be specified in any trustworthy source. 80// But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had 81// RCU reorder buffer size of 128. So that is a good guess for now. 82def PdRCU : RetireControlUnit<128, 4>; 83 84 85//===----------------------------------------------------------------------===// 86// Pipelines 87//===----------------------------------------------------------------------===// 88 89// There are total of two pipelines, each one with it's own scheduler. 90 91//===----------------------------------------------------------------------===// 92// Integer Pipeline Scheduling 93// 94 95// There is one Integer Scheduler per core. 96 97// Integer physical register file has 96 registers of 64-bit. 98def PdIntegerPRF : RegisterFile<96, [GR64, CCR]>; 99 100// Unified Integer, Memory Scheduler has 40 entries. 101def PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> { 102 // Up to 4 IPC can be decoded, issued, retired. 103 let BufferSize = 40; 104} 105 106 107//===----------------------------------------------------------------------===// 108// FPU Pipeline Scheduling 109// 110 111// The FPU unit is shared between the two cores. 112 113// FP physical register file has 160 registers of 128-bit. 114// Operations on 256-bit data types are cracked into two COPs. 115def PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>; 116 117// Unified FP Scheduler has 64 entries, 118def PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> { 119 // Up to 4 IPC can be decoded, issued, retired. 120 let BufferSize = 64; 121} 122 123 124//===----------------------------------------------------------------------===// 125// Functional units 126//===----------------------------------------------------------------------===// 127 128//===----------------------------------------------------------------------===// 129// Load-Store Units 130// 131 132let Super = PdAGLU01 in 133def PdLoad : ProcResource<2> { 134 // For Piledriver, the load queue is 40 entries deep. 135 let BufferSize = 40; 136} 137 138def PdLoadQueue : LoadQueue<PdLoad>; 139 140let Super = PdAGLU01 in 141def PdStore : ProcResource<1> { 142 // For Piledriver, the store queue is 24 entries deep. 143 let BufferSize = 24; 144} 145 146def PdStoreQueue : StoreQueue<PdStore>; 147 148//===----------------------------------------------------------------------===// 149// Integer Execution Units 150// 151 152def PdDiv : ProcResource<1>; // PdEX0; unpipelined integer division 153def PdCount : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT 154 155def PdMul : ProcResource<1>; // PdEX1; integer multiplication 156def PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches 157 158//===----------------------------------------------------------------------===// 159// Floating-Point Units 160// 161 162// Two FMAC/FPFMA units. 163def PdFPFMA : ProcResource<2>; // PdFPU0, PdFPU1 164 165// One 128-bit integer multiply-accumulate unit. 166def PdFPMMA : ProcResource<1>; // PdFPU0 167 168// One fp conversion unit. 169def PdFPCVT : ProcResource<1>; // PdFPU0 170 171// One unit for shuffles, packs, permutes, shifts. 172def PdFPXBR : ProcResource<1>; // PdFPU1 173 174// Two 128-bit packed integer units. 175def PdFPMAL : ProcResource<2>; // PdFPU2, PdFPU3 176 177// One FP store unit. 178def PdFPSTO : ProcResource<1>; // PdFPU3 179 180 181//===----------------------------------------------------------------------===// 182// Basic helper classes. 183//===----------------------------------------------------------------------===// 184 185// Many SchedWrites are defined in pairs with and without a folded load. 186// Instructions with folded loads are usually micro-fused, so they only appear 187// as two micro-ops when dispatched by the schedulers. 188// This multiclass defines the resource usage for variants with and without 189// folded loads. 190multiclass PdWriteRes<SchedWrite SchedRW, 191 list<ProcResourceKind> ExePorts, int Lat = 1, 192 list<int> Res = [], int UOps = 1> { 193 def : WriteRes<SchedRW, ExePorts> { 194 let Latency = Lat; 195 let ResourceCycles = Res; 196 let NumMicroOps = UOps; 197 } 198} 199 200multiclass __pdWriteResPair<X86FoldableSchedWrite SchedRW, 201 list<ProcResourceKind> ExePorts, int Lat, 202 list<int> Res, int UOps, 203 int LoadLat, int LoadRes, int LoadUOps> { 204 defm : PdWriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 205 206 defm : PdWriteRes<SchedRW.Folded, 207 !listconcat([PdLoad], ExePorts), 208 !add(Lat, LoadLat), 209 !if(!and(!empty(Res), !eq(LoadRes, 1)), 210 [], 211 !listconcat([LoadRes], 212 !if(!empty(Res), 213 !listsplat(1, !size(ExePorts)), 214 Res))), 215 !add(UOps, LoadUOps)>; 216} 217 218multiclass PdWriteResExPair<X86FoldableSchedWrite SchedRW, 219 list<ProcResourceKind> ExePorts, int Lat = 1, 220 list<int> Res = [], int UOps = 1, 221 int LoadUOps = 0> { 222 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 223 /*LoadLat*/4, /*LoadRes*/3, LoadUOps>; 224} 225 226multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW, 227 list<ProcResourceKind> ExePorts, int Lat = 1, 228 list<int> Res = [], int UOps = 1, 229 int LoadUOps = 0> { 230 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 231 /*LoadLat*/5, /*LoadRes*/3, LoadUOps>; 232} 233 234multiclass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW, 235 list<ProcResourceKind> ExePorts, int Lat, 236 list<int> Res = [], int UOps = 2, 237 int LoadUOps = 0> { 238 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 239 /*LoadLat*/5, /*LoadRes*/3, LoadUOps>; 240} 241 242//===----------------------------------------------------------------------===// 243// Here be dragons. 244//===----------------------------------------------------------------------===// 245 246// L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers 247// needn't be available until 4 cycles after the memory operand. 248def : ReadAdvance<ReadAfterLd, 4>; 249 250// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available 251// until 5 cycles after the memory operand. 252def : ReadAdvance<ReadAfterVecLd, 5>; 253def : ReadAdvance<ReadAfterVecXLd, 5>; 254def : ReadAdvance<ReadAfterVecYLd, 5>; 255 256// Transfer from int domain to ivec domain incurs additional latency of 8..10cy 257// Reference: Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller 258// and Excavator pipeline", "Data delay between different execution domains" 259def : ReadAdvance<ReadInt2Fpu, -10>; 260 261// A folded store needs a cycle on the PdStore for the store data. 262def : WriteRes<WriteRMW, [PdStore]>; 263 264//////////////////////////////////////////////////////////////////////////////// 265// Loads, stores, and moves, not folded with other operations. 266//////////////////////////////////////////////////////////////////////////////// 267 268def : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; let ResourceCycles = [2]; } 269def : WriteRes<WriteStore, [PdStore]>; 270def : WriteRes<WriteStoreNT, [PdStore]>; 271def : WriteRes<WriteMove, [PdEX01]> { let ResourceCycles = [2]; } 272defm : X86WriteResUnsupported<WriteVecMaskedGatherWriteback>; 273 274// Load/store MXCSR. 275// FIXME: These are copy and pasted from WriteLoad/Store. 276def : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; } 277def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; let ResourceCycles = [18]; } 278 279// Treat misc copies as a move. 280def : InstRW<[WriteMove], (instrs COPY)>; 281 282//////////////////////////////////////////////////////////////////////////////// 283// Idioms that clear a register, like xorps %xmm0, %xmm0. 284// These can often bypass execution ports completely. 285//////////////////////////////////////////////////////////////////////////////// 286 287def : WriteRes<WriteZero, [/*No ExePorts*/]>; 288 289//////////////////////////////////////////////////////////////////////////////// 290// Branches don't produce values, so they have no latency, but they still 291// consume resources. Indirect branches can fold loads. 292//////////////////////////////////////////////////////////////////////////////// 293 294defm : PdWriteResExPair<WriteJump, [PdEX1, PdBranch]>; 295 296//////////////////////////////////////////////////////////////////////////////// 297// Special case scheduling classes. 298//////////////////////////////////////////////////////////////////////////////// 299 300def : WriteRes<WriteSystem, [PdEX01]> { let Latency = 100; } 301def : WriteRes<WriteMicrocoded, [PdEX01]> { let Latency = 100; } 302def : WriteRes<WriteFence, [PdStore]>; 303 304def PdWriteXLAT : SchedWriteRes<[PdEX01]> { 305 let Latency = 6; 306} 307def : InstRW<[PdWriteXLAT], (instrs XLAT)>; 308 309def PdWriteLARrr : SchedWriteRes<[PdEX01]> { 310 let Latency = 184; 311 let ResourceCycles = [375]; 312 let NumMicroOps = 45; 313} 314def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr", 315 "LSL(16|32|64)rr")>; 316 317// Nops don't have dependencies, so there's no actual latency, but we set this 318// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. 319def : WriteRes<WriteNop, [PdEX01]> { let ResourceCycles = [2]; } 320 321//////////////////////////////////////////////////////////////////////////////// 322// Arithmetic. 323//////////////////////////////////////////////////////////////////////////////// 324 325defm : PdWriteResExPair<WriteALU, [PdEX01], 1, [2]>; 326 327def PdWriteALURMW : SchedWriteRes<[PdLoad, PdEX01, PdStore]> { 328 let Latency = 6; 329 let ResourceCycles = [3, 2, 1]; 330 let NumMicroOps = 1; 331} 332def : SchedAlias<WriteALURMW, PdWriteALURMW>; 333 334def PdWriteLXADD : SchedWriteRes<[PdEX01]> { 335 let Latency = 6; 336 let ResourceCycles = [88]; 337 let NumMicroOps = 4; 338} 339def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>; 340 341def PdWriteBMI1 : SchedWriteRes<[PdEX01]> { 342 let Latency = 2; 343 let ResourceCycles = [2]; 344 let NumMicroOps = 2; 345} 346def : InstRW<[PdWriteBMI1], 347 (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr, 348 BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr, 349 BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr, 350 BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr, 351 TZMSK32rr, TZMSK64rr)>; 352 353def PdWriteBMI1m : SchedWriteRes<[PdLoad, PdEX01]> { 354 let Latency = 6; 355 let ResourceCycles = [3, 3]; 356 let NumMicroOps = 2; 357} 358def : InstRW<[PdWriteBMI1m], 359 (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm, 360 BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm, 361 BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm, 362 BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm, 363 TZMSK32rm, TZMSK64rm)>; 364 365defm : PdWriteResExPair<WriteADC, [PdEX01], 1, [2]>; 366 367def PdWriteADCSBB64ri32 : SchedWriteRes<[PdEX01]> { 368 let ResourceCycles = [3]; 369} 370def : InstRW<[PdWriteADCSBB64ri32], (instrs ADC64ri32, SBB64ri32)>; 371 372defm : PdWriteRes<WriteBSWAP32, [PdEX01]>; 373defm : PdWriteRes<WriteBSWAP64, [PdEX01]>; 374defm : PdWriteRes<WriteCMPXCHG, [PdEX1], 3, [3], 5>; 375defm : PdWriteRes<WriteCMPXCHGRMW, [PdEX1, PdStore, PdLoad], 3, [44, 1, 1], 2>; 376defm : PdWriteRes<WriteXCHG, [PdEX1], 1, [], 2>; 377 378def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> { 379 let Latency = 3; 380 let ResourceCycles = [3]; 381 let NumMicroOps = 3; 382} 383def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>; 384 385def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> { 386 let Latency = 3; 387 let ResourceCycles = [23]; 388 let NumMicroOps = 5; 389} 390def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>; 391 392def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> { 393 let Latency = 3; 394 let ResourceCycles = [21]; 395 let NumMicroOps = 6; 396} 397def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm], 398 (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>; 399 400def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> { 401 let Latency = 3; 402 let ResourceCycles = [26]; 403 let NumMicroOps = 18; 404} 405def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>; 406 407def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> { 408 let Latency = 3; 409 let ResourceCycles = [69]; 410 let NumMicroOps = 22; 411} 412def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>; 413 414def PdWriteXADD : SchedWriteRes<[PdEX1]> { 415 let Latency = 1; 416 let ResourceCycles = [1]; 417 let NumMicroOps = 2; 418} 419def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>; 420 421def PdWriteXADDm : SchedWriteRes<[PdEX1]> { 422 let Latency = 6; 423 let ResourceCycles = [20]; 424 let NumMicroOps = 4; 425} 426def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>; 427 428defm : PdWriteResExPair<WriteIMul8, [PdEX1, PdMul], 4, [1, 4]>; 429defm : PdWriteResExPair<WriteIMul16, [PdEX1, PdMul], 4, [1, 5], 2>; 430defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul], 5, [1, 5], 2>; 431defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul], 4, [1, 2]>; 432defm : PdWriteResExPair<WriteIMul32, [PdEX1, PdMul], 4, [1, 4]>; 433defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul], 4, [1, 2], 1, 1>; 434defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4, [1, 2]>; 435defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 6]>; 436defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>; 437defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>; 438 439// BMI2 MULX 440defm : X86WriteResUnsupported<WriteIMulH>; 441defm : X86WriteResUnsupported<WriteIMulHLd>; 442defm : X86WriteResPairUnsupported<WriteMULX32>; 443defm : X86WriteResPairUnsupported<WriteMULX64>; 444 445defm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>; 446defm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>; 447defm : PdWriteResExPair<WriteDiv32, [PdEX1, PdDiv], 14, [1, 14], 2>; 448defm : PdWriteResExPair<WriteDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>; 449 450defm : PdWriteResExPair<WriteIDiv8, [PdEX1, PdDiv], 12, [1, 12]>; 451defm : PdWriteResExPair<WriteIDiv16, [PdEX1, PdDiv], 15, [1, 17], 2>; 452defm : PdWriteResExPair<WriteIDiv32, [PdEX1, PdDiv], 14, [1, 25], 2>; 453defm : PdWriteResExPair<WriteIDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>; 454 455defm : PdWriteResExPair<WriteCRC32, [PdEX01], 2, [4], 3>; 456 457def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> { 458 let Latency = 5; 459 let ResourceCycles = [10]; 460 let NumMicroOps = 5; 461} 462def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>; 463 464def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> { 465 let Latency = 6; 466 let ResourceCycles = [12]; 467 let NumMicroOps = 7; 468} 469def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>; 470 471def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> { 472 let Latency = 10; 473 let ResourceCycles = [17]; 474 let NumMicroOps = 11; 475} 476def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>; 477 478defm : PdWriteResExPair<WriteCMOV, [PdEX01]>; // Conditional move. 479 480def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> { 481 let Latency = 5; 482 let ResourceCycles = [3, 3]; 483 let NumMicroOps = 2; 484} 485 486def PdWriteCMOVmVar : SchedWriteVariant<[ 487 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_BE">>, [PdWriteCMOVm]>, 488 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_A">>, [PdWriteCMOVm]>, 489 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_L">>, [PdWriteCMOVm]>, 490 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_GE">>, [PdWriteCMOVm]>, 491 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_LE">>, [PdWriteCMOVm]>, 492 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_G">>, [PdWriteCMOVm]>, 493 SchedVar<NoSchedPred, [WriteCMOV.Folded]> 494]>; 495 496def : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; 497 498defm : PdWriteRes<WriteFCMOV, [PdFPU0, PdFPFMA]>; // x87 conditional move. 499 500def : WriteRes<WriteSETCC, [PdEX01]>; // Setcc. 501def : WriteRes<WriteSETCCStore, [PdEX01, PdStore]>; 502 503def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> { 504 let ResourceCycles = [2]; 505 let NumMicroOps = 2; 506} 507 508def PdSETGEmSETGmSETLEmSETLm : SchedWriteVariant<[ 509 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_GE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 510 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_G">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 511 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_LE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 512 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_L">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 513 SchedVar<NoSchedPred, [WriteSETCCStore]> 514]>; 515def : InstRW<[PdSETGEmSETGmSETLEmSETLm], (instrs SETCCm)>; 516 517defm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [4], 2>; 518 519def PdWriteLAHF : SchedWriteRes<[PdEX01]> { 520 let Latency = 2; 521 let ResourceCycles = [4]; 522 let NumMicroOps = 4; 523} 524def : InstRW<[PdWriteLAHF], (instrs LAHF)>; 525 526def PdWriteSAHF : SchedWriteRes<[PdEX01]> { 527 let Latency = 2; 528 let ResourceCycles = [2]; 529 let NumMicroOps = 2; 530} 531def : InstRW<[PdWriteSAHF], (instrs SAHF)>; 532 533defm : PdWriteRes<WriteBitTest, [PdEX01], 1, [2], 1>; 534defm : PdWriteRes<WriteBitTestImmLd, [PdEX01, PdLoad], 5, [2, 3], 1>; 535defm : PdWriteRes<WriteBitTestRegLd, [PdEX01, PdLoad], 5, [7, 2], 7>; 536defm : PdWriteRes<WriteBitTestSet, [PdEX01], 2, [2], 2>; 537defm : PdWriteRes<WriteBitTestSetImmLd, [PdEX01, PdLoad], 6, [1, 1], 4>; 538defm : PdWriteRes<WriteBitTestSetRegLd, [PdEX01, PdLoad], 6, [1, 1], 10>; 539 540def PdWriteBTSIm : SchedWriteRes<[PdEX01, PdLoad]> { 541 let Latency = 7; 542 let ResourceCycles = [42, 1]; 543 let NumMicroOps = 4; 544} 545def : SchedAlias<WriteBitTestSetImmRMW, PdWriteBTSIm>; 546def PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> { 547 let Latency = 7; 548 let ResourceCycles = [44, 1]; 549 let NumMicroOps = 10; 550} 551def : SchedAlias<WriteBitTestSetRegRMW, PdWriteBTSRm>; 552 553// This is for simple LEAs with one or two input operands. 554def : WriteRes<WriteLEA, [PdEX01]> { let ResourceCycles = [2]; } 555 556// This write is used for slow LEA instructions. 557def PdWrite3OpsLEA : SchedWriteRes<[PdEX01]> { 558 let Latency = 2; 559 let ResourceCycles = [2]; 560} 561 562// On Piledriver, a slow LEA is either a 3Ops LEA (base, index, offset), 563// or an LEA with a `Scale` value different than 1. 564def PdSlowLEAPredicate : MCSchedPredicate< 565 CheckAny<[ 566 // A 3-operand LEA (base, index, offset). 567 IsThreeOperandsLEAFn, 568 // An LEA with a "Scale" different than 1. 569 CheckAll<[ 570 CheckIsImmOperand<2>, 571 CheckNot<CheckImmOperand<2, 1>> 572 ]> 573 ]> 574>; 575 576def PdWriteLEA : SchedWriteVariant<[ 577 SchedVar<PdSlowLEAPredicate, [PdWrite3OpsLEA]>, 578 SchedVar<NoSchedPred, [WriteLEA]> 579]>; 580 581def : InstRW<[PdWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>; 582 583def PdWriteLEA16r : SchedWriteRes<[PdEX01]> { 584 let ResourceCycles = [3]; 585 let NumMicroOps = 2; 586} 587def : InstRW<[PdWriteLEA16r], (instrs LEA16r)>; 588 589// Bit counts. 590defm : PdWriteResExPair<WriteBSF, [PdEX01], 3, [6], 6, 2>; 591defm : PdWriteResExPair<WriteBSR, [PdEX01], 4, [8], 7, 2>; 592defm : PdWriteResExPair<WritePOPCNT, [PdEX01], 4, [4]>; 593defm : PdWriteResExPair<WriteLZCNT, [PdEX0], 2, [2], 2>; 594defm : PdWriteResExPair<WriteTZCNT, [PdEX0], 2, [2], 2>; 595 596// BMI1 BEXTR, BMI2 BZHI 597defm : PdWriteResExPair<WriteBEXTR, [PdEX01], 2, [2], 2>; 598defm : PdWriteResExPair<WriteBLS, [PdEX01], 2, [2], 2>; 599defm : PdWriteResExPair<WriteBZHI, [PdEX01]>; 600 601def PdWriteBEXTRI : SchedWriteRes<[PdEX01]> { 602 let Latency = 2; 603 let ResourceCycles = [4]; 604 let NumMicroOps = 2; 605} 606def : InstRW<[PdWriteBEXTRI], (instrs BEXTRI32ri, BEXTRI64ri)>; 607 608def PdWriteBEXTRIm : SchedWriteRes<[PdEX01]> { 609 let Latency = 2; 610 let ResourceCycles = [5]; 611 let NumMicroOps = 2; 612} 613def : InstRW<[PdWriteBEXTRIm], (instrs BEXTRI32mi, BEXTRI64mi)>; 614 615//////////////////////////////////////////////////////////////////////////////// 616// Integer shifts and rotates. 617//////////////////////////////////////////////////////////////////////////////// 618 619defm : PdWriteResExPair<WriteShift, [PdEX01], 1, [2]>; 620defm : PdWriteResExPair<WriteShiftCL, [PdEX01]>; 621defm : PdWriteResExPair<WriteRotate, [PdEX01], 1, [2]>; 622defm : PdWriteResExPair<WriteRotateCL, [PdEX01]>; 623 624def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> { 625 let Latency = 12; 626 let ResourceCycles = [24]; 627 let NumMicroOps = 26; 628} 629def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>; 630 631def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> { 632 let Latency = 12; 633 let ResourceCycles = [23]; 634 let NumMicroOps = 23; 635} 636def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>; 637 638def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> { 639 let Latency = 11; 640 let ResourceCycles = [22]; 641 let NumMicroOps = 24; 642} 643def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>; 644 645def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> { 646 let Latency = 10; 647 let ResourceCycles = [20]; 648 let NumMicroOps = 22; 649} 650def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>; 651 652def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> { 653 let Latency = 10; 654 let ResourceCycles = [19]; 655 let NumMicroOps = 19; 656} 657def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>; 658 659def PdWriteRCL3264rCL : SchedWriteRes<[PdEX01]> { 660 let Latency = 7; 661 let ResourceCycles = [14]; 662 let NumMicroOps = 17; 663} 664def : InstRW<[PdWriteRCL3264rCL], (instrs RCL32rCL, RCL64rCL)>; 665 666def PdWriteRCR3264rCL : SchedWriteRes<[PdEX01]> { 667 let Latency = 7; 668 let ResourceCycles = [13]; 669 let NumMicroOps = 16; 670} 671def : InstRW<[PdWriteRCR3264rCL], (instrs RCR32rCL, RCR64rCL)>; 672 673def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> { 674 let Latency = 7; 675 let ResourceCycles = [14]; 676 let NumMicroOps = 15; 677} 678def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>; 679 680 681def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> { 682 let Latency = 9; 683 let ResourceCycles = [18]; 684 let NumMicroOps = 20; 685} 686def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>; 687 688def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> { 689 let Latency = 11; 690 let ResourceCycles = [21]; 691 let NumMicroOps = 21; 692} 693def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>; 694 695def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> { 696 let Latency = 8; 697 let ResourceCycles = [15]; 698 let NumMicroOps = 16; 699} 700def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>; 701 702def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> { 703 let Latency = 13; 704 let ResourceCycles = [25]; 705 let NumMicroOps = 25; 706} 707def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>; 708 709// SHLD/SHRD. 710defm : PdWriteRes<WriteSHDrri, [PdEX01], 3, [6], 6>; 711defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 3, [8], 7>; 712 713def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> { 714 let Latency = 3; 715 let ResourceCycles = [6]; 716 let NumMicroOps = 6; 717} 718def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>; 719 720def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> { 721 let Latency = 3; 722 let ResourceCycles = [6]; 723 let NumMicroOps = 7; 724} 725def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL, 726 SHLD32rrCL, 727 SHRD32rrCL)>; 728 729defm : PdWriteRes<WriteSHDmri, [PdLoad, PdEX01], 4, [1, 22], 8>; 730defm : PdWriteRes<WriteSHDmrcl, [PdLoad, PdEX01], 4, [1, 22], 8>; 731 732//////////////////////////////////////////////////////////////////////////////// 733// Floating point. This covers both scalar and vector operations. 734//////////////////////////////////////////////////////////////////////////////// 735 736defm : PdWriteRes<WriteFLD0, [PdFPU1, PdFPSTO], 3>; 737defm : PdWriteRes<WriteFLD1, [PdFPU1, PdFPSTO], 3>; 738defm : PdWriteRes<WriteFLDC, [PdFPU1, PdFPSTO], 3>; 739 740defm : PdWriteRes<WriteFLoad, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>; 741defm : PdWriteRes<WriteFLoadX, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>; 742defm : PdWriteRes<WriteFLoadY, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3], 2>; 743 744defm : PdWriteRes<WriteFMaskedLoad, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 1, 4]>; 745defm : PdWriteRes<WriteFMaskedLoadY, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 2, 4], 2>; 746 747defm : PdWriteRes<WriteFStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>; 748defm : PdWriteRes<WriteFStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>; 749defm : PdWriteRes<WriteFStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [1, 36, 2], 4>; 750 751def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU23, PdFPSTO]> { 752 let Latency = 2; 753 let ResourceCycles = [1, 3, 1]; 754 let NumMicroOps = 2; 755} 756def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>; 757 758def PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { 759 let NumMicroOps = 8; 760} 761def : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>; 762 763defm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>; 764defm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>; 765defm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>; 766 767defm : PdWriteRes<WriteFMaskedStore32, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; 768defm : PdWriteRes<WriteFMaskedStore64, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; 769defm : PdWriteRes<WriteFMaskedStore32Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; 770defm : PdWriteRes<WriteFMaskedStore64Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; 771 772defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>; 773defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>; 774defm : PdWriteRes<WriteFMoveY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>; 775defm : X86WriteResUnsupported<WriteFMoveZ>; 776 777defm : PdWriteRes<WriteEMMS, [PdFPU01, PdFPFMA], 2>; 778 779defm : PdWriteResXMMPair<WriteFAdd, [PdFPU0, PdFPFMA], 5>; 780defm : PdWriteResXMMPair<WriteFAddX, [PdFPU0, PdFPFMA], 5>; 781defm : PdWriteResYMMPair<WriteFAddY, [PdFPU0, PdFPFMA], 5, [1, 2]>; 782defm : X86WriteResPairUnsupported<WriteFAddZ>; 783 784def PdWriteX87Add: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { 785 let Latency = 5; 786 let ResourceCycles = [3, 1, 10]; 787} 788def : InstRW<[PdWriteX87Add], (instrs ADD_FI16m, ADD_FI32m, ADD_F32m, ADD_F64m, 789 SUB_FI16m, SUB_FI32m, SUB_F32m, SUB_F64m, 790 SUBR_FI16m, SUBR_FI32m, SUBR_F32m, SUBR_F64m)>; 791 792defm : PdWriteResXMMPair<WriteFAdd64, [PdFPU0, PdFPFMA], 5>; 793defm : PdWriteResXMMPair<WriteFAdd64X, [PdFPU0, PdFPFMA], 5>; 794defm : PdWriteResYMMPair<WriteFAdd64Y, [PdFPU0, PdFPFMA], 5, [1, 2]>; 795defm : X86WriteResPairUnsupported<WriteFAdd64Z>; 796 797defm : PdWriteResXMMPair<WriteFCmp, [PdFPU0, PdFPFMA], 2>; 798defm : PdWriteResXMMPair<WriteFCmpX, [PdFPU0, PdFPFMA], 2>; 799defm : PdWriteResYMMPair<WriteFCmpY, [PdFPU0, PdFPFMA], 2, [1, 2]>; 800defm : X86WriteResPairUnsupported<WriteFCmpZ>; 801 802defm : PdWriteResXMMPair<WriteFCmp64, [PdFPU0, PdFPFMA], 2>; 803defm : PdWriteResXMMPair<WriteFCmp64X, [PdFPU0, PdFPFMA], 2>; 804defm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [1, 2]>; 805defm : X86WriteResPairUnsupported<WriteFCmp64Z>; 806 807defm : PdWriteResXMMPair<WriteFCom, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 808defm : PdWriteResXMMPair<WriteFComX, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 809 810def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> { 811 let Latency = 6; 812} 813def : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>; 814 815def PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>; 816def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>; 817 818defm : PdWriteResXMMPair<WriteFMul, [PdFPU1, PdFPFMA], 5>; 819defm : PdWriteResXMMPair<WriteFMulX, [PdFPU1, PdFPFMA], 5>; 820defm : PdWriteResYMMPair<WriteFMulY, [PdFPU1, PdFPFMA], 5, [1, 2]>; 821defm : X86WriteResPairUnsupported<WriteFMulZ>; 822 823def PdWriteX87Mul: SchedWriteRes<[PdLoad, PdFPU1, PdFPFMA]> { 824 let Latency = 5; 825 let ResourceCycles = [3, 1, 10]; 826} 827def : InstRW<[PdWriteX87Mul], (instrs MUL_FI16m, MUL_FI32m, MUL_F32m, MUL_F64m)>; 828 829defm : PdWriteResXMMPair<WriteFMul64, [PdFPU1, PdFPFMA], 5>; 830defm : PdWriteResXMMPair<WriteFMul64X, [PdFPU1, PdFPFMA], 5>; 831defm : PdWriteResYMMPair<WriteFMul64Y, [PdFPU1, PdFPFMA], 5, [1, 2]>; 832defm : X86WriteResPairUnsupported<WriteFMul64Z>; 833 834defm : PdWriteResXMMPair<WriteFMA, [PdFPU, PdFPFMA], 5, [1, 3]>; 835defm : PdWriteResXMMPair<WriteFMAX, [PdFPU, PdFPFMA], 5, [1, 3]>; 836defm : PdWriteResYMMPair<WriteFMAY, [PdFPU, PdFPFMA], 5, [1, 3]>; 837defm : X86WriteResPairUnsupported<WriteFMAZ>; 838 839 840defm : PdWriteResXMMPair<WriteDPPD, [PdFPU1, PdFPFMA], 15, [1, 10], 15, 2>; 841 842defm : PdWriteResXMMPair<WriteDPPS, [PdFPU1, PdFPFMA], 25, [1, 14], 16, 2>; 843defm : PdWriteResYMMPair<WriteDPPSY, [PdFPU1, PdFPFMA], 27, [2, 25], /*or 29*/ 25, 4>; 844defm : X86WriteResPairUnsupported<WriteDPPSZ>; 845 846def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> { 847 let Latency = 27; 848 let ResourceCycles = [1, 14]; 849 let NumMicroOps = 17; 850} 851def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>; 852 853defm : PdWriteResXMMPair<WriteFRcp, [PdFPU1, PdFPFMA], 5>; 854defm : PdWriteResXMMPair<WriteFRcpX, [PdFPU1, PdFPFMA], 5>; 855defm : PdWriteResYMMPair<WriteFRcpY, [PdFPU1, PdFPFMA], 5, [2, 1]>; 856defm : X86WriteResPairUnsupported<WriteFRcpZ>; 857 858defm : PdWriteResXMMPair<WriteFRsqrt, [PdFPU1, PdFPFMA], 5, [1, 2]>; 859defm : PdWriteResXMMPair<WriteFRsqrtX, [PdFPU1, PdFPFMA], 5>; 860defm : PdWriteResYMMPair<WriteFRsqrtY, [PdFPU1, PdFPFMA], 5, [2, 2]>; 861defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; 862 863defm : PdWriteResXMMPair<WriteFDiv, [PdFPU1, PdFPFMA], 9, [1, 9]>; 864defm : PdWriteResXMMPair<WriteFDivX, [PdFPU1, PdFPFMA], 9, [1, 9]>; 865defm : PdWriteResYMMPair<WriteFDivY, [PdFPU1, PdFPFMA], 9, [2, 18]>; 866defm : X86WriteResPairUnsupported<WriteFDivZ>; 867 868def PdWriteX87Div: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { 869 let Latency = 9; 870 let ResourceCycles = [3, 1, 18]; 871} 872def : InstRW<[PdWriteX87Div], (instrs DIV_FI16m, DIV_FI32m, 873 DIVR_FI16m, DIVR_FI32m, 874 DIV_F32m, DIV_F64m, 875 DIVR_F32m, DIVR_F64m)>; 876 877defm : PdWriteResXMMPair<WriteFDiv64, [PdFPU1, PdFPFMA], 9, [1, 9]>; 878defm : PdWriteResXMMPair<WriteFDiv64X, [PdFPU1, PdFPFMA], 9, [1, 9]>; 879defm : PdWriteResYMMPair<WriteFDiv64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>; 880defm : X86WriteResPairUnsupported<WriteFDiv64Z>; 881 882defm : PdWriteResXMMPair<WriteFSqrt, [PdFPU1, PdFPFMA], 9, [1, 9]>; 883defm : PdWriteResXMMPair<WriteFSqrtX, [PdFPU1, PdFPFMA], 9, [1, 9]>; 884defm : PdWriteResYMMPair<WriteFSqrtY, [PdFPU1, PdFPFMA], 9, [2, 18]>; 885defm : X86WriteResPairUnsupported<WriteFSqrtZ>; 886 887defm : PdWriteResXMMPair<WriteFSqrt64, [PdFPU1, PdFPFMA], 9, [1, 9]>; 888defm : PdWriteResXMMPair<WriteFSqrt64X, [PdFPU1, PdFPFMA], 9, [1, 9]>; 889defm : PdWriteResYMMPair<WriteFSqrt64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>; 890defm : X86WriteResPairUnsupported<WriteFSqrt64Z>; 891 892defm : PdWriteResXMMPair<WriteFSqrt80, [PdFPU1, PdFPFMA], 1, [1, 18]>; 893defm : PdWriteResXMMPair<WriteFSign, [PdFPU1, PdFPFMA], 1, [1, 4]>; 894 895defm : PdWriteResXMMPair<WriteFRnd, [PdFPU1, PdFPSTO], 4, []>; 896defm : PdWriteResYMMPair<WriteFRndY, [PdFPU1, PdFPSTO], 4, [2, 1], 2>; 897defm : X86WriteResPairUnsupported<WriteFRndZ>; 898 899def PdWriteVFRCZP : SchedWriteRes<[PdFPU1, PdFPSTO]> { 900 let Latency = 10; 901 let ResourceCycles = [2, 1]; 902 let NumMicroOps = 2; 903} 904def : InstRW<[PdWriteVFRCZP], (instrs VFRCZPDrr, VFRCZPSrr)>; 905 906def PdWriteVFRCZS : SchedWriteRes<[PdFPU1, PdFPSTO]> { 907 let Latency = 10; 908 let ResourceCycles = [10, 1]; 909 let NumMicroOps = 2; 910} 911def : InstRW<[PdWriteVFRCZS], (instrs VFRCZSDrr, VFRCZSSrr)>; 912 913def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> { 914 let Latency = 15; 915 let ResourceCycles = [2, 1]; 916 let NumMicroOps = 3; 917} 918def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm, 919 VFRCZSDrm, VFRCZSSrm)>; 920 921def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> { 922 let Latency = 10; 923 let ResourceCycles = [3, 1]; 924 let NumMicroOps = 4; 925} 926def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>; 927 928def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> { 929 let Latency = 15; 930 let ResourceCycles = [4, 1]; 931 let NumMicroOps = 8; 932} 933def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>; 934 935defm : PdWriteResXMMPair<WriteFLogic, [PdFPU01, PdFPFMA], 2, [1, 2]>; 936defm : PdWriteResYMMPair<WriteFLogicY, [PdFPU01, PdFPFMA], 2, [2, 2]>; 937defm : X86WriteResPairUnsupported<WriteFLogicZ>; 938 939defm : PdWriteResXMMPair<WriteFTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 940defm : PdWriteResYMMPair<WriteFTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [4, 4, 1], 4, 2>; 941defm : X86WriteResPairUnsupported<WriteFTestZ>; 942 943defm : PdWriteResXMMPair<WriteFShuffle, [PdFPU01, PdFPFMA], 2, [1, 2]>; 944defm : PdWriteResYMMPair<WriteFShuffleY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>; 945defm : X86WriteResPairUnsupported<WriteFShuffleZ>; 946 947def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> { 948 let Latency = 7; 949 let ResourceCycles = [1, 3]; 950 let NumMicroOps = 2; 951} 952def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>; 953 954defm : PdWriteResXMMPair<WriteFVarShuffle, [PdFPU01, PdFPFMA], 3, [1, 2]>; 955defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU01, PdFPFMA], 3, [2, 4], 2>; 956defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; 957 958defm : PdWriteResXMMPair<WriteFBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>; 959defm : PdWriteResYMMPair<WriteFBlendY, [PdFPU01, PdFPFMA], 2, [2, 3], 2>; 960defm : X86WriteResPairUnsupported<WriteFBlendZ>; 961 962defm : PdWriteResXMMPair<WriteFVarBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>; 963defm : PdWriteResYMMPair<WriteFVarBlendY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>; 964defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; 965 966defm : PdWriteResXMMPair<WriteFShuffle256, [PdFPU01, PdFPFMA], 2, [1, 3], 2>; 967defm : X86WriteResPairUnsupported<WriteFVarShuffle256>; 968 969def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 970 let Latency = 2; 971 let ResourceCycles = [1, 2]; 972} 973def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>; 974 975def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 976 let Latency = 7; 977 let ResourceCycles = [1, 4]; 978 let NumMicroOps = 2; 979} 980def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>; 981 982def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 983 let Latency = 4; 984 let ResourceCycles = [1, 6]; 985 let NumMicroOps = 8; 986} 987def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>; 988 989def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> { 990 let Latency = 8; // 4 + 4 991 let ResourceCycles = [1, 8]; 992 let NumMicroOps = 10; 993} 994def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>; 995 996//////////////////////////////////////////////////////////////////////////////// 997// Conversions. 998//////////////////////////////////////////////////////////////////////////////// 999 1000defm : PdWriteResXMMPair<WriteCvtSS2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>; 1001 1002defm : PdWriteResXMMPair<WriteCvtPS2I, [PdFPU0, PdFPCVT, PdFPSTO], 4>; 1003defm : PdWriteResYMMPair<WriteCvtPS2IY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1004defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; 1005 1006defm : PdWriteResXMMPair<WriteCvtSD2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>; 1007 1008defm : PdWriteResXMMPair<WriteCvtPD2I, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1009defm : PdWriteResYMMPair<WriteCvtPD2IY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 1010defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; 1011 1012def PdWriteMMX_CVTTPD2PIrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1013 let Latency = 6; 1014 let NumMicroOps = 2; 1015} 1016def : InstRW<[PdWriteMMX_CVTTPD2PIrr], (instrs MMX_CVTTPD2PIrr)>; 1017 1018// FIXME: f+3 ST, LD+STC latency 1019defm : PdWriteResXMMPair<WriteCvtI2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>; 1020// FIXME: .Folded version is one NumMicroOp *less*.. 1021 1022defm : PdWriteResXMMPair<WriteCvtI2PS, [PdFPU0, PdFPCVT, PdFPSTO], 4>; 1023defm : PdWriteResYMMPair<WriteCvtI2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1024defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; 1025 1026defm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>; 1027// FIXME: .Folded version is one NumMicroOp *less*.. 1028 1029def PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1030 let Latency = 13; 1031 let ResourceCycles = [1, 3, 1]; 1032 let NumMicroOps = 2; 1033} 1034def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>; 1035 1036defm : PdWriteResXMMPair<WriteCvtI2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1037defm : PdWriteResYMMPair<WriteCvtI2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>; 1038defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; 1039 1040defm : PdWriteResXMMPair<WriteCvtSS2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1041 1042defm : PdWriteResXMMPair<WriteCvtPS2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1043defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>; 1044defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; 1045 1046defm : PdWriteResXMMPair<WriteCvtSD2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1047 1048defm : PdWriteResXMMPair<WriteCvtPD2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1049defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 1050defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; 1051 1052def PdWriteMMX_CVTPD2PIrrMMX_CVTPI2PDrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1053 let Latency = 6; 1054 let NumMicroOps = 2; 1055} 1056def : InstRW<[PdWriteMMX_CVTPD2PIrrMMX_CVTPI2PDrr], (instrs MMX_CVTPD2PIrr, 1057 MMX_CVTPI2PDrr)>; 1058 1059def PdWriteMMX_CVTPI2PSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1060 let Latency = 4; 1061 let NumMicroOps = 2; 1062} 1063def : InstRW<[PdWriteMMX_CVTPI2PSrr], (instrs MMX_CVTPI2PSrr)>; 1064 1065defm : PdWriteResXMMPair<WriteCvtPH2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2, 1>; 1066defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 3>; 1067defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>; 1068 1069defm : PdWriteRes<WriteCvtPS2PH, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2>; 1070defm : PdWriteRes<WriteCvtPS2PHY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 1071defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; 1072 1073defm : PdWriteRes<WriteCvtPS2PHSt, [PdFPU0, PdFPCVT, PdFPSTO, PdStore], 4, [1, 2, 1, 1], 3>; 1074defm : PdWriteRes<WriteCvtPS2PHYSt, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdStore], 4, [1, 2, 1, 1, 1], 4>; 1075defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; 1076 1077//////////////////////////////////////////////////////////////////////////////// 1078// Vector integer operations. 1079//////////////////////////////////////////////////////////////////////////////// 1080 1081defm : PdWriteRes<WriteVecLoad, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>; 1082defm : PdWriteRes<WriteVecLoadX, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>; 1083defm : PdWriteRes<WriteVecLoadY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 3], 2>; 1084 1085defm : PdWriteRes<WriteVecLoadNT, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 4]>; 1086defm : PdWriteRes<WriteVecLoadNTY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 4]>; 1087 1088defm : PdWriteRes<WriteVecMaskedLoad, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 1, 2]>; 1089defm : PdWriteRes<WriteVecMaskedLoadY, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 2, 4], 2>; 1090 1091defm : PdWriteRes<WriteVecStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>; 1092defm : PdWriteRes<WriteVecStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>; 1093defm : PdWriteRes<WriteVecStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [2, 36, 2], 4>; 1094 1095def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { 1096 let NumMicroOps = 8; 1097} 1098def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>; 1099 1100defm : PdWriteRes<WriteVecStoreNT, [PdStore, PdFPU1, PdFPSTO], 2>; 1101defm : PdWriteRes<WriteVecStoreNTY, [PdStore, PdFPU1, PdFPSTO], 2, [2, 2, 2], 4>; 1102 1103defm : X86WriteResUnsupported<WriteVecMaskedStore32>; 1104defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>; 1105defm : X86WriteResUnsupported<WriteVecMaskedStore64>; 1106defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>; 1107 1108defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>; 1109defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>; 1110defm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>; 1111defm : X86WriteResUnsupported<WriteVecMoveZ>; 1112 1113def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1114} 1115def : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>; 1116 1117def PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1118 let Latency = 4; 1119} 1120def : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>; 1121 1122defm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 11>; 1123defm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 11, [1, 2], 2>; 1124 1125defm : PdWriteResXMMPair<WriteVecALU, [PdFPU01, PdFPMAL], 2>; 1126defm : PdWriteResXMMPair<WriteVecALUX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1127defm : X86WriteResPairUnsupported<WriteVecALUY>; 1128defm : X86WriteResPairUnsupported<WriteVecALUZ>; 1129 1130defm : PdWriteResXMMPair<WriteVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1131defm : PdWriteResXMMPair<WriteVecShiftX, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1132defm : X86WriteResPairUnsupported<WriteVecShiftY>; 1133defm : X86WriteResPairUnsupported<WriteVecShiftZ>; 1134 1135defm : PdWriteResXMMPair<WriteVecShiftImm, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1136defm : PdWriteResXMMPair<WriteVecShiftImmX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1137defm : X86WriteResPairUnsupported<WriteVecShiftImmY>; 1138defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; 1139 1140defm : PdWriteResXMMPair<WriteVecIMul, [PdFPU0, PdFPMMA], 4>; 1141defm : PdWriteResXMMPair<WriteVecIMulX, [PdFPU0, PdFPMMA], 4>; 1142defm : X86WriteResPairUnsupported<WriteVecIMulY>; 1143defm : X86WriteResPairUnsupported<WriteVecIMulZ>; 1144 1145defm : PdWriteResXMMPair<WritePMULLD, [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL], 5, [2, 1, 2, 1]>; 1146defm : X86WriteResPairUnsupported<WritePMULLDY>; 1147defm : X86WriteResPairUnsupported<WritePMULLDZ>; 1148 1149def PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPMMA, PdFPMAL]> { 1150 let Latency = 4; 1151} 1152def : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr, 1153 VPMACSSDQLrr)>; 1154 1155defm : PdWriteResXMMPair<WriteMPSAD, [PdFPU0, PdFPMMA], 9, [1, 4], 8>; 1156defm : X86WriteResPairUnsupported<WriteMPSADY>; 1157defm : X86WriteResPairUnsupported<WriteMPSADZ>; 1158 1159def PdWriteVMPSADBW : SchedWriteRes<[PdFPU0, PdFPMMA]> { 1160 let Latency = 8; 1161 let ResourceCycles = [1, 4]; 1162 let NumMicroOps = 10; 1163} 1164def : InstRW<[PdWriteVMPSADBW], (instrs VMPSADBWrri)>; 1165 1166defm : PdWriteResXMMPair<WritePSADBW, [PdFPU01, PdFPMAL], 4, [1, 2], 2>; 1167defm : PdWriteResXMMPair<WritePSADBWX, [PdFPU01, PdFPMAL], 4, [1, 2], 2>; 1168defm : X86WriteResPairUnsupported<WritePSADBWY>; 1169defm : X86WriteResPairUnsupported<WritePSADBWZ>; 1170 1171defm : PdWriteResXMMPair<WritePHMINPOS, [PdFPU0, PdFPMAL], 4, [], 2>; 1172 1173defm : PdWriteResXMMPair<WriteShuffle, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1174defm : PdWriteResXMMPair<WriteShuffleX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1175defm : PdWriteResYMMPair<WriteShuffleY, [PdFPU01, PdFPMAL], 2, [1, 4]>; 1176defm : X86WriteResPairUnsupported<WriteShuffleZ>; 1177 1178defm : PdWriteResXMMPair<WriteVarShuffle, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1179defm : PdWriteResXMMPair<WriteVarShuffleX, [PdFPU01, PdFPMAL], 3, [1, 3]>; 1180defm : X86WriteResPairUnsupported<WriteVarShuffleY>; 1181defm : X86WriteResPairUnsupported<WriteVarShuffleZ>; 1182 1183def PdWriteVPPERM : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1184 let Latency = 2; 1185 let ResourceCycles = [1, 3]; 1186} 1187def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>; 1188 1189defm : PdWriteResXMMPair<WriteBlend, [PdFPU01, PdFPMAL], 2>; 1190defm : X86WriteResPairUnsupported<WriteBlendY>; 1191defm : X86WriteResPairUnsupported<WriteBlendZ>; 1192 1193defm : PdWriteResXMMPair<WriteVarBlend, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1194defm : X86WriteResPairUnsupported<WriteVarBlendY>; 1195defm : X86WriteResPairUnsupported<WriteVarBlendZ>; 1196 1197defm : PdWriteResXMMPair<WriteVecLogic, [PdFPU01, PdFPMAL], 2>; 1198defm : PdWriteResXMMPair<WriteVecLogicX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1199defm : X86WriteResPairUnsupported<WriteVecLogicY>; 1200defm : X86WriteResPairUnsupported<WriteVecLogicZ>; 1201 1202defm : PdWriteResXMMPair<WriteVecTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 1203defm : PdWriteResYMMPair<WriteVecTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 4, 1], 4, 2>; 1204defm : X86WriteResPairUnsupported<WriteVecTestZ>; 1205 1206defm : PdWriteResXMMPair<WriteShuffle256, [PdFPU01, PdFPMAL]>; 1207defm : PdWriteResXMMPair<WriteVPMOV256, [PdFPU01, PdFPMAL]>; 1208defm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>; 1209 1210defm : PdWriteResXMMPair<WriteVarVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1211defm : X86WriteResPairUnsupported<WriteVarVecShiftY>; 1212defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; 1213 1214//////////////////////////////////////////////////////////////////////////////// 1215// Vector insert/extract operations. 1216//////////////////////////////////////////////////////////////////////////////// 1217 1218defm : PdWriteRes<WriteVecInsert, [PdFPU01, PdFPMAL], 2, [1, 3], 2>; 1219defm : PdWriteRes<WriteVecInsertLd, [PdFPU01, PdFPMAL, PdLoad], 6, [1, 4, 3], 2>; 1220 1221defm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 12, [1, 3, 1], 2>; 1222defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [2, 1, 1], 2>; 1223 1224def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1225 let Latency = 3; 1226 let ResourceCycles = [1, 3]; 1227} 1228def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>; 1229 1230//////////////////////////////////////////////////////////////////////////////// 1231// SSE42 String instructions. 1232//////////////////////////////////////////////////////////////////////////////// 1233 1234defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 11, [1, 6, 1], 7, 1>; 1235defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 7, [1, 8, 1], 7, 2>; 1236 1237defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 14, [1, 10, 10, 10, 1, 1], 27, 1>; 1238defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 10, 10, 10, 1, 1], 27, 1>; 1239 1240//////////////////////////////////////////////////////////////////////////////// 1241// MOVMSK Instructions. 1242//////////////////////////////////////////////////////////////////////////////// 1243 1244defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>; 1245 1246defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>; 1247defm : X86WriteResUnsupported<WriteVecMOVMSKY>; 1248// defm : X86WriteResUnsupported<WriteVecMOVMSKZ>; 1249 1250defm : PdWriteRes<WriteMMXMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>; 1251 1252//////////////////////////////////////////////////////////////////////////////// 1253// AES Instructions. 1254//////////////////////////////////////////////////////////////////////////////// 1255 1256defm : PdWriteResXMMPair<WriteAESIMC, [PdFPU0, PdFPMMA], 5>; 1257defm : PdWriteResXMMPair<WriteAESKeyGen, [PdFPU0, PdFPMMA], 5>; 1258defm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>; 1259 1260//////////////////////////////////////////////////////////////////////////////// 1261// Horizontal add/sub instructions. 1262//////////////////////////////////////////////////////////////////////////////// 1263 1264defm : PdWriteResXMMPair<WriteFHAdd, [PdFPU0, PdFPFMA], 11, [1, 5], 3, 1>; 1265defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [1, 8], 8, 2>; 1266defm : X86WriteResPairUnsupported<WriteFHAddZ>; 1267 1268defm : PdWriteResXMMPair<WritePHAdd, [PdFPU01, PdFPMAL], 5, [1, 4], 3, 1>; 1269defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1270defm : X86WriteResPairUnsupported<WritePHAddY>; 1271defm : X86WriteResPairUnsupported<WritePHAddZ>; 1272 1273def : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr, 1274 PHADDWrr, PHSUBWrr, 1275 PHADDSWrr, PHSUBSWrr, 1276 VPHADDDrr, VPHSUBDrr, 1277 VPHADDWrr, VPHSUBWrr, 1278 VPHADDSWrr, VPHSUBSWrr)>; 1279 1280def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm, 1281 PHADDWrm, PHSUBWrm, 1282 PHADDSWrm, PHSUBSWrm, 1283 VPHADDDrm, VPHSUBDrm, 1284 VPHADDWrm, VPHSUBWrm, 1285 VPHADDSWrm, VPHSUBSWrm)>; 1286 1287//////////////////////////////////////////////////////////////////////////////// 1288// Carry-less multiplication instructions. 1289//////////////////////////////////////////////////////////////////////////////// 1290 1291defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [1, 7], 5, 1>; 1292 1293def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> { 1294 let Latency = 12; 1295 let ResourceCycles = [1, 7]; 1296 let NumMicroOps = 6; 1297} 1298def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>; 1299 1300//////////////////////////////////////////////////////////////////////////////// 1301// SSE4A instructions. 1302//////////////////////////////////////////////////////////////////////////////// 1303 1304def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1305 let Latency = 3; 1306 let ResourceCycles = [1, 2]; 1307} 1308def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ)>; 1309 1310def PdWriteINSERTQI : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1311 let Latency = 3; 1312 let ResourceCycles = [1, 3]; 1313} 1314def : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>; 1315 1316//////////////////////////////////////////////////////////////////////////////// 1317// AVX instructions. 1318//////////////////////////////////////////////////////////////////////////////// 1319 1320def PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> { 1321 let Latency = 6; 1322 let ResourceCycles = [1, 2, 4]; 1323 let NumMicroOps = 2; 1324} 1325def : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, 1326 VBROADCASTSSYrm)>; 1327 1328def PdWriteVZEROALL : SchedWriteRes<[]> { 1329 let Latency = 90; 1330 let NumMicroOps = 32; 1331} 1332def : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>; 1333 1334def PdWriteVZEROUPPER : SchedWriteRes<[]> { 1335 let Latency = 46; 1336 let NumMicroOps = 16; 1337} 1338def : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>; 1339 1340/////////////////////////////////////////////////////////////////////////////// 1341// SchedWriteVariant definitions. 1342/////////////////////////////////////////////////////////////////////////////// 1343 1344def PdWriteZeroLatency : SchedWriteRes<[]> { 1345 let Latency = 0; 1346} 1347 1348def PdWriteZeroIdiom : SchedWriteVariant<[ 1349 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1350 SchedVar<MCSchedPredicate<TruePred>, [WriteALU]> 1351]>; 1352def : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr, 1353 XOR32rr, XOR64rr)>; 1354 1355def PdWriteFZeroIdiom : SchedWriteVariant<[ 1356 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1357 SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]> 1358]>; 1359def : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, 1360 XORPDrr, VXORPDrr, 1361 ANDNPSrr, VANDNPSrr, 1362 ANDNPDrr, VANDNPDrr)>; 1363 1364// VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1. 1365 1366def PdWriteVZeroIdiomLogic : SchedWriteVariant<[ 1367 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1368 SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]> 1369]>; 1370def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORrr, MMX_PANDNrr)>; 1371 1372def PdWriteVZeroIdiomLogicX : SchedWriteVariant<[ 1373 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1374 SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]> 1375]>; 1376def : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, 1377 PANDNrr, VPANDNrr)>; 1378 1379def PdWriteVZeroIdiomALU : SchedWriteVariant<[ 1380 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1381 SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]> 1382]>; 1383def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBrr, MMX_PSUBDrr, 1384 MMX_PSUBQrr, MMX_PSUBWrr, 1385 MMX_PCMPGTBrr, 1386 MMX_PCMPGTDrr, 1387 MMX_PCMPGTWrr)>; 1388 1389def PdWriteVZeroIdiomALUX : SchedWriteVariant<[ 1390 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1391 SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]> 1392]>; 1393def : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, 1394 PSUBDrr, VPSUBDrr, 1395 PSUBQrr, VPSUBQrr, 1396 PSUBWrr, VPSUBWrr, 1397 PCMPGTBrr, VPCMPGTBrr, 1398 PCMPGTDrr, VPCMPGTDrr, 1399 PCMPGTWrr, VPCMPGTWrr)>; 1400 1401/////////////////////////////////////////////////////////////////////////////// 1402// Dependency breaking instructions. 1403/////////////////////////////////////////////////////////////////////////////// 1404 1405// VPCMPGTQ, but not PCMPGTQ! 1406 1407def : IsZeroIdiomFunction<[ 1408 // GPR Zero-idioms. 1409 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, 1410 1411 // MMX Zero-idioms. 1412 DepBreakingClass<[ 1413 MMX_PXORrr, MMX_PANDNrr, MMX_PSUBBrr, 1414 MMX_PSUBDrr, MMX_PSUBQrr, MMX_PSUBWrr, 1415 MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr, 1416 MMX_PCMPGTBrr, MMX_PCMPGTDrr, MMX_PCMPGTWrr 1417 ], ZeroIdiomPredicate>, 1418 1419 // SSE Zero-idioms. 1420 DepBreakingClass<[ 1421 // fp variants. 1422 XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr, 1423 1424 // int variants. 1425 PXORrr, PANDNrr, 1426 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 1427 PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, 1428 PCMPGTBrr, PCMPGTDrr, PCMPGTWrr 1429 ], ZeroIdiomPredicate>, 1430 1431 // AVX Zero-idioms. 1432 DepBreakingClass<[ 1433 // xmm fp variants. 1434 VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr, 1435 1436 // xmm int variants. 1437 VPXORrr, VPANDNrr, 1438 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 1439 VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr, 1440 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 1441 1442 // ymm variants. 1443 VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr 1444 ], ZeroIdiomPredicate> 1445]>; 1446 1447def : IsDepBreakingFunction<[ 1448 // GPR 1449 DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>, 1450 DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >, 1451 1452 // MMX 1453 DepBreakingClass<[ 1454 MMX_PCMPEQBrr, MMX_PCMPEQDrr, MMX_PCMPEQWrr 1455 ], ZeroIdiomPredicate>, 1456 1457 // SSE 1458 DepBreakingClass<[ 1459 PCMPEQBrr, PCMPEQWrr, PCMPEQDrr 1460 // But not PCMPEQQrr. 1461 ], ZeroIdiomPredicate>, 1462 1463 // AVX 1464 DepBreakingClass<[ 1465 VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr 1466 // But not VPCMPEQQrr. 1467 ], ZeroIdiomPredicate> 1468]>; 1469 1470 1471} // SchedModel 1472