1//=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for AMD bdver2 (Piledriver) to support 10// instruction scheduling and other instruction cost heuristics. 11// Based on: 12// * AMD Software Optimization Guide for AMD Family 15h Processors. 13// https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf 14// * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog 15// http://www.agner.org/optimize/microarchitecture.pdf 16// * https://www.realworldtech.com/bulldozer/ 17// Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2. 18// 19//===----------------------------------------------------------------------===// 20 21def BdVer2Model : SchedMachineModel { 22 let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired. 23 let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed. 24 let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer. 25 let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency. 26 let HighLatency = 25; // FIXME: any better choice? 27 let MispredictPenalty = 20; // Minimum branch misdirection penalty. 28 29 let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass. 30 31 // FIXME: Incomplete. This flag is set to allow the scheduler to assign 32 // a default model to unrecognized opcodes. 33 let CompleteModel = 0; 34} // SchedMachineModel 35 36let SchedModel = BdVer2Model in { 37 38 39//===----------------------------------------------------------------------===// 40// Pipes 41//===----------------------------------------------------------------------===// 42 43// There are total of eight pipes. 44 45//===----------------------------------------------------------------------===// 46// Integer execution pipes 47// 48 49// Two EX (ALU) pipes. 50def PdEX0 : ProcResource<1>; // ALU, Integer Pipe0 51def PdEX1 : ProcResource<1>; // ALU, Integer Pipe1 52def PdEX01 : ProcResGroup<[PdEX0, PdEX1]>; 53 54// Two AGLU pipes, identical. 55def PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23] 56 57//===----------------------------------------------------------------------===// 58// Floating point execution pipes 59// 60 61// Four FPU pipes. 62 63def PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0 64def PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1 65def PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2 66def PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3 67 68// FPU grouping 69def PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>; 70def PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>; 71 72 73//===----------------------------------------------------------------------===// 74// RCU 75//===----------------------------------------------------------------------===// 76 77// The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle. 78// On the other hand, the RCU reorder buffer size for Piledriver does not 79// seem be specified in any trustworthy source. 80// But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had 81// RCU reorder buffer size of 128. So that is a good guess for now. 82def PdRCU : RetireControlUnit<128, 4>; 83 84 85//===----------------------------------------------------------------------===// 86// Pipelines 87//===----------------------------------------------------------------------===// 88 89// There are total of two pipelines, each one with it's own scheduler. 90 91//===----------------------------------------------------------------------===// 92// Integer Pipeline Scheduling 93// 94 95// There is one Integer Scheduler per core. 96 97// Integer physical register file has 96 registers of 64-bit. 98def PdIntegerPRF : RegisterFile<96, [GR64, CCR]>; 99 100// Unified Integer, Memory Scheduler has 40 entries. 101def PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> { 102 // Up to 4 IPC can be decoded, issued, retired. 103 let BufferSize = 40; 104} 105 106 107//===----------------------------------------------------------------------===// 108// FPU Pipeline Scheduling 109// 110 111// The FPU unit is shared between the two cores. 112 113// FP physical register file has 160 registers of 128-bit. 114// Operations on 256-bit data types are cracked into two COPs. 115def PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>; 116 117// Unified FP Scheduler has 64 entries, 118def PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> { 119 // Up to 4 IPC can be decoded, issued, retired. 120 let BufferSize = 64; 121} 122 123 124//===----------------------------------------------------------------------===// 125// Functional units 126//===----------------------------------------------------------------------===// 127 128//===----------------------------------------------------------------------===// 129// Load-Store Units 130// 131 132let Super = PdAGLU01 in 133def PdLoad : ProcResource<2> { 134 // For Piledriver, the load queue is 40 entries deep. 135 let BufferSize = 40; 136} 137 138def PdLoadQueue : LoadQueue<PdLoad>; 139 140let Super = PdAGLU01 in 141def PdStore : ProcResource<1> { 142 // For Piledriver, the store queue is 24 entries deep. 143 let BufferSize = 24; 144} 145 146def PdStoreQueue : StoreQueue<PdStore>; 147 148//===----------------------------------------------------------------------===// 149// Integer Execution Units 150// 151 152def PdDiv : ProcResource<1>; // PdEX0; unpipelined integer division 153def PdCount : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT 154 155def PdMul : ProcResource<1>; // PdEX1; integer multiplication 156def PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches 157 158//===----------------------------------------------------------------------===// 159// Floating-Point Units 160// 161 162// Two FMAC/FPFMA units. 163def PdFPFMA : ProcResource<2>; // PdFPU0, PdFPU1 164 165// One 128-bit integer multiply-accumulate unit. 166def PdFPMMA : ProcResource<1>; // PdFPU0 167 168// One fp conversion unit. 169def PdFPCVT : ProcResource<1>; // PdFPU0 170 171// One unit for shuffles, packs, permutes, shifts. 172def PdFPXBR : ProcResource<1>; // PdFPU1 173 174// Two 128-bit packed integer units. 175def PdFPMAL : ProcResource<2>; // PdFPU2, PdFPU3 176 177// One FP store unit. 178def PdFPSTO : ProcResource<1>; // PdFPU3 179 180 181//===----------------------------------------------------------------------===// 182// Basic helper classes. 183//===----------------------------------------------------------------------===// 184 185// Many SchedWrites are defined in pairs with and without a folded load. 186// Instructions with folded loads are usually micro-fused, so they only appear 187// as two micro-ops when dispatched by the schedulers. 188// This multiclass defines the resource usage for variants with and without 189// folded loads. 190multiclass PdWriteRes<SchedWrite SchedRW, 191 list<ProcResourceKind> ExePorts, int Lat = 1, 192 list<int> Res = [], int UOps = 1> { 193 def : WriteRes<SchedRW, ExePorts> { 194 let Latency = Lat; 195 let ResourceCycles = Res; 196 let NumMicroOps = UOps; 197 } 198} 199 200multiclass __pdWriteResPair<X86FoldableSchedWrite SchedRW, 201 list<ProcResourceKind> ExePorts, int Lat, 202 list<int> Res, int UOps, 203 int LoadLat, int LoadRes, int LoadUOps> { 204 defm : PdWriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 205 206 defm : PdWriteRes<SchedRW.Folded, 207 !listconcat([PdLoad], ExePorts), 208 !add(Lat, LoadLat), 209 !if(!and(!empty(Res), !eq(LoadRes, 1)), 210 [], 211 !listconcat([LoadRes], 212 !if(!empty(Res), 213 !listsplat(1, !size(ExePorts)), 214 Res))), 215 !add(UOps, LoadUOps)>; 216} 217 218multiclass PdWriteResExPair<X86FoldableSchedWrite SchedRW, 219 list<ProcResourceKind> ExePorts, int Lat = 1, 220 list<int> Res = [], int UOps = 1, 221 int LoadUOps = 0> { 222 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 223 /*LoadLat*/4, /*LoadRes*/3, LoadUOps>; 224} 225 226multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW, 227 list<ProcResourceKind> ExePorts, int Lat = 1, 228 list<int> Res = [], int UOps = 1, 229 int LoadUOps = 0> { 230 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 231 /*LoadLat*/5, /*LoadRes*/3, LoadUOps>; 232} 233 234multiclass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW, 235 list<ProcResourceKind> ExePorts, int Lat, 236 list<int> Res = [], int UOps = 2, 237 int LoadUOps = 0> { 238 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 239 /*LoadLat*/5, /*LoadRes*/3, LoadUOps>; 240} 241 242//===----------------------------------------------------------------------===// 243// Here be dragons. 244//===----------------------------------------------------------------------===// 245 246// L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers 247// needn't be available until 4 cycles after the memory operand. 248def : ReadAdvance<ReadAfterLd, 4>; 249 250// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available 251// until 5 cycles after the memory operand. 252def : ReadAdvance<ReadAfterVecLd, 5>; 253def : ReadAdvance<ReadAfterVecXLd, 5>; 254def : ReadAdvance<ReadAfterVecYLd, 5>; 255 256// Transfer from int domain to ivec domain incurs additional latency of 8..10cy 257// Reference: Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller 258// and Excavator pipeline", "Data delay between different execution domains" 259def : ReadAdvance<ReadInt2Fpu, -10>; 260 261// A folded store needs a cycle on the PdStore for the store data. 262def : WriteRes<WriteRMW, [PdStore]>; 263 264//////////////////////////////////////////////////////////////////////////////// 265// Loads, stores, and moves, not folded with other operations. 266//////////////////////////////////////////////////////////////////////////////// 267 268def : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; let ResourceCycles = [2]; } 269def : WriteRes<WriteStore, [PdStore]>; 270def : WriteRes<WriteStoreNT, [PdStore]>; 271def : WriteRes<WriteMove, [PdEX01]> { let ResourceCycles = [2]; } 272defm : X86WriteResUnsupported<WriteVecMaskedGatherWriteback>; 273 274// Load/store MXCSR. 275// FIXME: These are copy and pasted from WriteLoad/Store. 276def : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; } 277def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; let ResourceCycles = [18]; } 278 279// Treat misc copies as a move. 280def : InstRW<[WriteMove], (instrs COPY)>; 281 282//////////////////////////////////////////////////////////////////////////////// 283// Idioms that clear a register, like xorps %xmm0, %xmm0. 284// These can often bypass execution ports completely. 285//////////////////////////////////////////////////////////////////////////////// 286 287def : WriteRes<WriteZero, [/*No ExePorts*/]>; 288 289//////////////////////////////////////////////////////////////////////////////// 290// Branches don't produce values, so they have no latency, but they still 291// consume resources. Indirect branches can fold loads. 292//////////////////////////////////////////////////////////////////////////////// 293 294defm : PdWriteResExPair<WriteJump, [PdEX1, PdBranch]>; 295 296//////////////////////////////////////////////////////////////////////////////// 297// Special case scheduling classes. 298//////////////////////////////////////////////////////////////////////////////// 299 300def : WriteRes<WriteSystem, [PdEX01]> { let Latency = 100; } 301def : WriteRes<WriteMicrocoded, [PdEX01]> { let Latency = 100; } 302def : WriteRes<WriteFence, [PdStore]>; 303 304def PdWriteXLAT : SchedWriteRes<[PdEX01]> { 305 let Latency = 6; 306} 307def : InstRW<[PdWriteXLAT], (instrs XLAT)>; 308 309def PdWriteLARrr : SchedWriteRes<[PdEX01]> { 310 let Latency = 184; 311 let ResourceCycles = [375]; 312 let NumMicroOps = 45; 313} 314def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr", 315 "LSL(16|32|64)rr")>; 316 317// Nops don't have dependencies, so there's no actual latency, but we set this 318// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. 319def : WriteRes<WriteNop, [PdEX01]> { let ResourceCycles = [2]; } 320 321//////////////////////////////////////////////////////////////////////////////// 322// Arithmetic. 323//////////////////////////////////////////////////////////////////////////////// 324 325defm : PdWriteResExPair<WriteALU, [PdEX01], 1, [2]>; 326 327def PdWriteALURMW : SchedWriteRes<[PdLoad, PdEX01, PdStore]> { 328 let Latency = 6; 329 let ResourceCycles = [3, 2, 1]; 330 let NumMicroOps = 1; 331} 332def : SchedAlias<WriteALURMW, PdWriteALURMW>; 333 334def PdWriteLXADD : SchedWriteRes<[PdEX01]> { 335 let Latency = 6; 336 let ResourceCycles = [88]; 337 let NumMicroOps = 4; 338} 339def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>; 340 341def PdWriteBMI1 : SchedWriteRes<[PdEX01]> { 342 let Latency = 2; 343 let ResourceCycles = [2]; 344 let NumMicroOps = 2; 345} 346def : InstRW<[PdWriteBMI1], 347 (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr, 348 BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr, 349 BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr, 350 BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr, 351 TZMSK32rr, TZMSK64rr)>; 352 353def PdWriteBMI1m : SchedWriteRes<[PdLoad, PdEX01]> { 354 let Latency = 6; 355 let ResourceCycles = [3, 3]; 356 let NumMicroOps = 2; 357} 358def : InstRW<[PdWriteBMI1m], 359 (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm, 360 BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm, 361 BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm, 362 BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm, 363 TZMSK32rm, TZMSK64rm)>; 364 365defm : PdWriteResExPair<WriteADC, [PdEX01], 1, [2]>; 366 367def PdWriteADCSBB64ri32 : SchedWriteRes<[PdEX01]> { 368 let ResourceCycles = [3]; 369} 370def : InstRW<[PdWriteADCSBB64ri32], (instrs ADC64ri32, SBB64ri32)>; 371 372defm : PdWriteRes<WriteBSWAP32, [PdEX01]>; 373defm : PdWriteRes<WriteBSWAP64, [PdEX01]>; 374defm : PdWriteRes<WriteCMPXCHG, [PdEX1], 3, [3], 5>; 375defm : PdWriteRes<WriteCMPXCHGRMW, [PdEX1, PdStore, PdLoad], 3, [44, 1, 1], 2>; 376defm : PdWriteRes<WriteXCHG, [PdEX1], 1, [], 2>; 377 378def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> { 379 let Latency = 3; 380 let ResourceCycles = [3]; 381 let NumMicroOps = 3; 382} 383def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>; 384 385def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> { 386 let Latency = 3; 387 let ResourceCycles = [23]; 388 let NumMicroOps = 5; 389} 390def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>; 391 392def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> { 393 let Latency = 3; 394 let ResourceCycles = [21]; 395 let NumMicroOps = 6; 396} 397def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm], 398 (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>; 399 400def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> { 401 let Latency = 3; 402 let ResourceCycles = [26]; 403 let NumMicroOps = 18; 404} 405def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>; 406 407def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> { 408 let Latency = 3; 409 let ResourceCycles = [69]; 410 let NumMicroOps = 22; 411} 412def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>; 413 414def PdWriteXADD : SchedWriteRes<[PdEX1]> { 415 let Latency = 1; 416 let ResourceCycles = [1]; 417 let NumMicroOps = 2; 418} 419def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>; 420 421def PdWriteXADDm : SchedWriteRes<[PdEX1]> { 422 let Latency = 6; 423 let ResourceCycles = [20]; 424 let NumMicroOps = 4; 425} 426def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>; 427 428defm : PdWriteResExPair<WriteIMul8, [PdEX1, PdMul], 4, [1, 4]>; 429defm : PdWriteResExPair<WriteIMul16, [PdEX1, PdMul], 4, [1, 5], 2>; 430defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul], 5, [1, 5], 2>; 431defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul], 4, [1, 2]>; 432defm : PdWriteResExPair<WriteIMul32, [PdEX1, PdMul], 4, [1, 4]>; 433defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul], 4, [1, 2], 1, 1>; 434defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4, [1, 2]>; 435defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 6]>; 436defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>; 437defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>; 438defm : X86WriteResUnsupported<WriteIMulH>; // BMI2 MULX 439 440defm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>; 441defm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>; 442defm : PdWriteResExPair<WriteDiv32, [PdEX1, PdDiv], 14, [1, 14], 2>; 443defm : PdWriteResExPair<WriteDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>; 444 445defm : PdWriteResExPair<WriteIDiv8, [PdEX1, PdDiv], 12, [1, 12]>; 446defm : PdWriteResExPair<WriteIDiv16, [PdEX1, PdDiv], 15, [1, 17], 2>; 447defm : PdWriteResExPair<WriteIDiv32, [PdEX1, PdDiv], 14, [1, 25], 2>; 448defm : PdWriteResExPair<WriteIDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>; 449 450defm : PdWriteResExPair<WriteCRC32, [PdEX01], 2, [4], 3>; 451 452def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> { 453 let Latency = 5; 454 let ResourceCycles = [10]; 455 let NumMicroOps = 5; 456} 457def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>; 458 459def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> { 460 let Latency = 6; 461 let ResourceCycles = [12]; 462 let NumMicroOps = 7; 463} 464def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>; 465 466def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> { 467 let Latency = 10; 468 let ResourceCycles = [17]; 469 let NumMicroOps = 11; 470} 471def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>; 472 473defm : PdWriteResExPair<WriteCMOV, [PdEX01]>; // Conditional move. 474 475def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> { 476 let Latency = 5; 477 let ResourceCycles = [3, 3]; 478 let NumMicroOps = 2; 479} 480 481def PdWriteCMOVmVar : SchedWriteVariant<[ 482 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_BE">>, [PdWriteCMOVm]>, 483 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_A">>, [PdWriteCMOVm]>, 484 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_L">>, [PdWriteCMOVm]>, 485 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_GE">>, [PdWriteCMOVm]>, 486 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_LE">>, [PdWriteCMOVm]>, 487 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_G">>, [PdWriteCMOVm]>, 488 SchedVar<NoSchedPred, [WriteCMOV.Folded]> 489]>; 490 491def : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; 492 493defm : PdWriteRes<WriteFCMOV, [PdFPU0, PdFPFMA]>; // x87 conditional move. 494 495def : WriteRes<WriteSETCC, [PdEX01]>; // Setcc. 496def : WriteRes<WriteSETCCStore, [PdEX01, PdStore]>; 497 498def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> { 499 let ResourceCycles = [2]; 500 let NumMicroOps = 2; 501} 502 503def PdSETGEmSETGmSETLEmSETLm : SchedWriteVariant<[ 504 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_GE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 505 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_G">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 506 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_LE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 507 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_L">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 508 SchedVar<NoSchedPred, [WriteSETCCStore]> 509]>; 510def : InstRW<[PdSETGEmSETGmSETLEmSETLm], (instrs SETCCm)>; 511 512defm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [4], 2>; 513 514def PdWriteLAHF : SchedWriteRes<[PdEX01]> { 515 let Latency = 2; 516 let ResourceCycles = [4]; 517 let NumMicroOps = 4; 518} 519def : InstRW<[PdWriteLAHF], (instrs LAHF)>; 520 521def PdWriteSAHF : SchedWriteRes<[PdEX01]> { 522 let Latency = 2; 523 let ResourceCycles = [2]; 524 let NumMicroOps = 2; 525} 526def : InstRW<[PdWriteSAHF], (instrs SAHF)>; 527 528defm : PdWriteRes<WriteBitTest, [PdEX01], 1, [2], 1>; 529defm : PdWriteRes<WriteBitTestImmLd, [PdEX01, PdLoad], 5, [2, 3], 1>; 530defm : PdWriteRes<WriteBitTestRegLd, [PdEX01, PdLoad], 5, [7, 2], 7>; 531defm : PdWriteRes<WriteBitTestSet, [PdEX01], 2, [2], 2>; 532defm : PdWriteRes<WriteBitTestSetImmLd, [PdEX01, PdLoad], 6, [1, 1], 4>; 533defm : PdWriteRes<WriteBitTestSetRegLd, [PdEX01, PdLoad], 6, [1, 1], 10>; 534 535def PdWriteBTSIm : SchedWriteRes<[PdEX01, PdLoad]> { 536 let Latency = 7; 537 let ResourceCycles = [42, 1]; 538 let NumMicroOps = 4; 539} 540def : SchedAlias<WriteBitTestSetImmRMW, PdWriteBTSIm>; 541def PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> { 542 let Latency = 7; 543 let ResourceCycles = [44, 1]; 544 let NumMicroOps = 10; 545} 546def : SchedAlias<WriteBitTestSetRegRMW, PdWriteBTSRm>; 547 548// This is for simple LEAs with one or two input operands. 549def : WriteRes<WriteLEA, [PdEX01]> { let ResourceCycles = [2]; } 550 551// This write is used for slow LEA instructions. 552def PdWrite3OpsLEA : SchedWriteRes<[PdEX01]> { 553 let Latency = 2; 554 let ResourceCycles = [2]; 555} 556 557// On Piledriver, a slow LEA is either a 3Ops LEA (base, index, offset), 558// or an LEA with a `Scale` value different than 1. 559def PdSlowLEAPredicate : MCSchedPredicate< 560 CheckAny<[ 561 // A 3-operand LEA (base, index, offset). 562 IsThreeOperandsLEAFn, 563 // An LEA with a "Scale" different than 1. 564 CheckAll<[ 565 CheckIsImmOperand<2>, 566 CheckNot<CheckImmOperand<2, 1>> 567 ]> 568 ]> 569>; 570 571def PdWriteLEA : SchedWriteVariant<[ 572 SchedVar<PdSlowLEAPredicate, [PdWrite3OpsLEA]>, 573 SchedVar<NoSchedPred, [WriteLEA]> 574]>; 575 576def : InstRW<[PdWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>; 577 578def PdWriteLEA16r : SchedWriteRes<[PdEX01]> { 579 let ResourceCycles = [3]; 580 let NumMicroOps = 2; 581} 582def : InstRW<[PdWriteLEA16r], (instrs LEA16r)>; 583 584// Bit counts. 585defm : PdWriteResExPair<WriteBSF, [PdEX01], 3, [6], 6, 2>; 586defm : PdWriteResExPair<WriteBSR, [PdEX01], 4, [8], 7, 2>; 587defm : PdWriteResExPair<WritePOPCNT, [PdEX01], 4, [4]>; 588defm : PdWriteResExPair<WriteLZCNT, [PdEX0], 2, [2], 2>; 589defm : PdWriteResExPair<WriteTZCNT, [PdEX0], 2, [2], 2>; 590 591// BMI1 BEXTR, BMI2 BZHI 592defm : PdWriteResExPair<WriteBEXTR, [PdEX01], 2, [2], 2>; 593defm : PdWriteResExPair<WriteBLS, [PdEX01], 2, [2], 2>; 594defm : PdWriteResExPair<WriteBZHI, [PdEX01]>; 595 596def PdWriteBEXTRI : SchedWriteRes<[PdEX01]> { 597 let Latency = 2; 598 let ResourceCycles = [4]; 599 let NumMicroOps = 2; 600} 601def : InstRW<[PdWriteBEXTRI], (instrs BEXTRI32ri, BEXTRI64ri)>; 602 603def PdWriteBEXTRIm : SchedWriteRes<[PdEX01]> { 604 let Latency = 2; 605 let ResourceCycles = [5]; 606 let NumMicroOps = 2; 607} 608def : InstRW<[PdWriteBEXTRIm], (instrs BEXTRI32mi, BEXTRI64mi)>; 609 610//////////////////////////////////////////////////////////////////////////////// 611// Integer shifts and rotates. 612//////////////////////////////////////////////////////////////////////////////// 613 614defm : PdWriteResExPair<WriteShift, [PdEX01], 1, [2]>; 615defm : PdWriteResExPair<WriteShiftCL, [PdEX01]>; 616defm : PdWriteResExPair<WriteRotate, [PdEX01], 1, [2]>; 617defm : PdWriteResExPair<WriteRotateCL, [PdEX01]>; 618 619def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> { 620 let Latency = 12; 621 let ResourceCycles = [24]; 622 let NumMicroOps = 26; 623} 624def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>; 625 626def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> { 627 let Latency = 12; 628 let ResourceCycles = [23]; 629 let NumMicroOps = 23; 630} 631def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>; 632 633def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> { 634 let Latency = 11; 635 let ResourceCycles = [22]; 636 let NumMicroOps = 24; 637} 638def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>; 639 640def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> { 641 let Latency = 10; 642 let ResourceCycles = [20]; 643 let NumMicroOps = 22; 644} 645def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>; 646 647def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> { 648 let Latency = 10; 649 let ResourceCycles = [19]; 650 let NumMicroOps = 19; 651} 652def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>; 653 654def PdWriteRCL3264rCL : SchedWriteRes<[PdEX01]> { 655 let Latency = 7; 656 let ResourceCycles = [14]; 657 let NumMicroOps = 17; 658} 659def : InstRW<[PdWriteRCL3264rCL], (instrs RCL32rCL, RCL64rCL)>; 660 661def PdWriteRCR3264rCL : SchedWriteRes<[PdEX01]> { 662 let Latency = 7; 663 let ResourceCycles = [13]; 664 let NumMicroOps = 16; 665} 666def : InstRW<[PdWriteRCR3264rCL], (instrs RCR32rCL, RCR64rCL)>; 667 668def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> { 669 let Latency = 7; 670 let ResourceCycles = [14]; 671 let NumMicroOps = 15; 672} 673def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>; 674 675 676def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> { 677 let Latency = 9; 678 let ResourceCycles = [18]; 679 let NumMicroOps = 20; 680} 681def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>; 682 683def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> { 684 let Latency = 11; 685 let ResourceCycles = [21]; 686 let NumMicroOps = 21; 687} 688def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>; 689 690def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> { 691 let Latency = 8; 692 let ResourceCycles = [15]; 693 let NumMicroOps = 16; 694} 695def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>; 696 697def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> { 698 let Latency = 13; 699 let ResourceCycles = [25]; 700 let NumMicroOps = 25; 701} 702def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>; 703 704// SHLD/SHRD. 705defm : PdWriteRes<WriteSHDrri, [PdEX01], 3, [6], 6>; 706defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 3, [8], 7>; 707 708def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> { 709 let Latency = 3; 710 let ResourceCycles = [6]; 711 let NumMicroOps = 6; 712} 713def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>; 714 715def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> { 716 let Latency = 3; 717 let ResourceCycles = [6]; 718 let NumMicroOps = 7; 719} 720def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL, 721 SHLD32rrCL, 722 SHRD32rrCL)>; 723 724defm : PdWriteRes<WriteSHDmri, [PdLoad, PdEX01], 4, [1, 22], 8>; 725defm : PdWriteRes<WriteSHDmrcl, [PdLoad, PdEX01], 4, [1, 22], 8>; 726 727//////////////////////////////////////////////////////////////////////////////// 728// Floating point. This covers both scalar and vector operations. 729//////////////////////////////////////////////////////////////////////////////// 730 731defm : PdWriteRes<WriteFLD0, [PdFPU1, PdFPSTO], 3>; 732defm : PdWriteRes<WriteFLD1, [PdFPU1, PdFPSTO], 3>; 733defm : PdWriteRes<WriteFLDC, [PdFPU1, PdFPSTO], 3>; 734 735defm : PdWriteRes<WriteFLoad, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>; 736defm : PdWriteRes<WriteFLoadX, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>; 737defm : PdWriteRes<WriteFLoadY, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3], 2>; 738 739defm : PdWriteRes<WriteFMaskedLoad, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 1, 4]>; 740defm : PdWriteRes<WriteFMaskedLoadY, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 2, 4], 2>; 741 742defm : PdWriteRes<WriteFStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>; 743defm : PdWriteRes<WriteFStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>; 744defm : PdWriteRes<WriteFStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [1, 36, 2], 4>; 745 746def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU23, PdFPSTO]> { 747 let Latency = 2; 748 let ResourceCycles = [1, 3, 1]; 749 let NumMicroOps = 2; 750} 751def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>; 752 753def PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { 754 let NumMicroOps = 8; 755} 756def : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>; 757 758defm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>; 759defm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>; 760defm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>; 761 762defm : PdWriteRes<WriteFMaskedStore32, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; 763defm : PdWriteRes<WriteFMaskedStore64, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; 764defm : PdWriteRes<WriteFMaskedStore32Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; 765defm : PdWriteRes<WriteFMaskedStore64Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; 766 767defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>; 768defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>; 769defm : PdWriteRes<WriteFMoveY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>; 770 771defm : PdWriteRes<WriteEMMS, [PdFPU01, PdFPFMA], 2>; 772 773defm : PdWriteResXMMPair<WriteFAdd, [PdFPU0, PdFPFMA], 5>; 774defm : PdWriteResXMMPair<WriteFAddX, [PdFPU0, PdFPFMA], 5>; 775defm : PdWriteResYMMPair<WriteFAddY, [PdFPU0, PdFPFMA], 5, [1, 2]>; 776defm : X86WriteResPairUnsupported<WriteFAddZ>; 777 778def PdWriteX87Add: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { 779 let Latency = 5; 780 let ResourceCycles = [3, 1, 10]; 781} 782def : InstRW<[PdWriteX87Add], (instrs ADD_FI16m, ADD_FI32m, ADD_F32m, ADD_F64m, 783 SUB_FI16m, SUB_FI32m, SUB_F32m, SUB_F64m, 784 SUBR_FI16m, SUBR_FI32m, SUBR_F32m, SUBR_F64m)>; 785 786defm : PdWriteResXMMPair<WriteFAdd64, [PdFPU0, PdFPFMA], 5>; 787defm : PdWriteResXMMPair<WriteFAdd64X, [PdFPU0, PdFPFMA], 5>; 788defm : PdWriteResYMMPair<WriteFAdd64Y, [PdFPU0, PdFPFMA], 5, [1, 2]>; 789defm : X86WriteResPairUnsupported<WriteFAdd64Z>; 790 791defm : PdWriteResXMMPair<WriteFCmp, [PdFPU0, PdFPFMA], 2>; 792defm : PdWriteResXMMPair<WriteFCmpX, [PdFPU0, PdFPFMA], 2>; 793defm : PdWriteResYMMPair<WriteFCmpY, [PdFPU0, PdFPFMA], 2, [1, 2]>; 794defm : X86WriteResPairUnsupported<WriteFCmpZ>; 795 796defm : PdWriteResXMMPair<WriteFCmp64, [PdFPU0, PdFPFMA], 2>; 797defm : PdWriteResXMMPair<WriteFCmp64X, [PdFPU0, PdFPFMA], 2>; 798defm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [1, 2]>; 799defm : X86WriteResPairUnsupported<WriteFCmp64Z>; 800 801defm : PdWriteResXMMPair<WriteFCom, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 802defm : PdWriteResXMMPair<WriteFComX, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 803 804def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> { 805 let Latency = 6; 806} 807def : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>; 808 809def PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>; 810def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>; 811 812defm : PdWriteResXMMPair<WriteFMul, [PdFPU1, PdFPFMA], 5>; 813defm : PdWriteResXMMPair<WriteFMulX, [PdFPU1, PdFPFMA], 5>; 814defm : PdWriteResYMMPair<WriteFMulY, [PdFPU1, PdFPFMA], 5, [1, 2]>; 815defm : X86WriteResPairUnsupported<WriteFMulZ>; 816 817def PdWriteX87Mul: SchedWriteRes<[PdLoad, PdFPU1, PdFPFMA]> { 818 let Latency = 5; 819 let ResourceCycles = [3, 1, 10]; 820} 821def : InstRW<[PdWriteX87Mul], (instrs MUL_FI16m, MUL_FI32m, MUL_F32m, MUL_F64m)>; 822 823defm : PdWriteResXMMPair<WriteFMul64, [PdFPU1, PdFPFMA], 5>; 824defm : PdWriteResXMMPair<WriteFMul64X, [PdFPU1, PdFPFMA], 5>; 825defm : PdWriteResYMMPair<WriteFMul64Y, [PdFPU1, PdFPFMA], 5, [1, 2]>; 826defm : X86WriteResPairUnsupported<WriteFMul64Z>; 827 828defm : PdWriteResXMMPair<WriteFMA, [PdFPU, PdFPFMA], 5, [1, 3]>; 829defm : PdWriteResXMMPair<WriteFMAX, [PdFPU, PdFPFMA], 5, [1, 3]>; 830defm : PdWriteResYMMPair<WriteFMAY, [PdFPU, PdFPFMA], 5, [1, 3]>; 831defm : X86WriteResPairUnsupported<WriteFMAZ>; 832 833 834defm : PdWriteResXMMPair<WriteDPPD, [PdFPU1, PdFPFMA], 15, [1, 10], 15, 2>; 835 836defm : PdWriteResXMMPair<WriteDPPS, [PdFPU1, PdFPFMA], 25, [1, 14], 16, 2>; 837defm : PdWriteResYMMPair<WriteDPPSY, [PdFPU1, PdFPFMA], 27, [2, 25], /*or 29*/ 25, 4>; 838defm : X86WriteResPairUnsupported<WriteDPPSZ>; 839 840def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> { 841 let Latency = 27; 842 let ResourceCycles = [1, 14]; 843 let NumMicroOps = 17; 844} 845def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>; 846 847defm : PdWriteResXMMPair<WriteFRcp, [PdFPU1, PdFPFMA], 5>; 848defm : PdWriteResXMMPair<WriteFRcpX, [PdFPU1, PdFPFMA], 5>; 849defm : PdWriteResYMMPair<WriteFRcpY, [PdFPU1, PdFPFMA], 5, [2, 1]>; 850defm : X86WriteResPairUnsupported<WriteFRcpZ>; 851 852defm : PdWriteResXMMPair<WriteFRsqrt, [PdFPU1, PdFPFMA], 5, [1, 2]>; 853defm : PdWriteResXMMPair<WriteFRsqrtX, [PdFPU1, PdFPFMA], 5>; 854defm : PdWriteResYMMPair<WriteFRsqrtY, [PdFPU1, PdFPFMA], 5, [2, 2]>; 855defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; 856 857defm : PdWriteResXMMPair<WriteFDiv, [PdFPU1, PdFPFMA], 9, [1, 9]>; 858defm : PdWriteResXMMPair<WriteFDivX, [PdFPU1, PdFPFMA], 9, [1, 9]>; 859defm : PdWriteResYMMPair<WriteFDivY, [PdFPU1, PdFPFMA], 9, [2, 18]>; 860defm : X86WriteResPairUnsupported<WriteFDivZ>; 861 862def PdWriteX87Div: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { 863 let Latency = 9; 864 let ResourceCycles = [3, 1, 18]; 865} 866def : InstRW<[PdWriteX87Div], (instrs DIV_FI16m, DIV_FI32m, 867 DIVR_FI16m, DIVR_FI32m, 868 DIV_F32m, DIV_F64m, 869 DIVR_F32m, DIVR_F64m)>; 870 871defm : PdWriteResXMMPair<WriteFDiv64, [PdFPU1, PdFPFMA], 9, [1, 9]>; 872defm : PdWriteResXMMPair<WriteFDiv64X, [PdFPU1, PdFPFMA], 9, [1, 9]>; 873defm : PdWriteResYMMPair<WriteFDiv64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>; 874defm : X86WriteResPairUnsupported<WriteFDiv64Z>; 875 876defm : PdWriteResXMMPair<WriteFSqrt, [PdFPU1, PdFPFMA], 9, [1, 9]>; 877defm : PdWriteResXMMPair<WriteFSqrtX, [PdFPU1, PdFPFMA], 9, [1, 9]>; 878defm : PdWriteResYMMPair<WriteFSqrtY, [PdFPU1, PdFPFMA], 9, [2, 18]>; 879defm : X86WriteResPairUnsupported<WriteFSqrtZ>; 880 881defm : PdWriteResXMMPair<WriteFSqrt64, [PdFPU1, PdFPFMA], 9, [1, 9]>; 882defm : PdWriteResXMMPair<WriteFSqrt64X, [PdFPU1, PdFPFMA], 9, [1, 9]>; 883defm : PdWriteResYMMPair<WriteFSqrt64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>; 884defm : X86WriteResPairUnsupported<WriteFSqrt64Z>; 885 886defm : PdWriteResXMMPair<WriteFSqrt80, [PdFPU1, PdFPFMA], 1, [1, 18]>; 887defm : PdWriteResXMMPair<WriteFSign, [PdFPU1, PdFPFMA], 1, [1, 4]>; 888 889defm : PdWriteResXMMPair<WriteFRnd, [PdFPU1, PdFPSTO], 4, []>; 890defm : PdWriteResYMMPair<WriteFRndY, [PdFPU1, PdFPSTO], 4, [2, 1], 2>; 891defm : X86WriteResPairUnsupported<WriteFRndZ>; 892 893def PdWriteVFRCZP : SchedWriteRes<[PdFPU1, PdFPSTO]> { 894 let Latency = 10; 895 let ResourceCycles = [2, 1]; 896 let NumMicroOps = 2; 897} 898def : InstRW<[PdWriteVFRCZP], (instrs VFRCZPDrr, VFRCZPSrr)>; 899 900def PdWriteVFRCZS : SchedWriteRes<[PdFPU1, PdFPSTO]> { 901 let Latency = 10; 902 let ResourceCycles = [10, 1]; 903 let NumMicroOps = 2; 904} 905def : InstRW<[PdWriteVFRCZS], (instrs VFRCZSDrr, VFRCZSSrr)>; 906 907def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> { 908 let Latency = 15; 909 let ResourceCycles = [2, 1]; 910 let NumMicroOps = 3; 911} 912def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm, 913 VFRCZSDrm, VFRCZSSrm)>; 914 915def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> { 916 let Latency = 10; 917 let ResourceCycles = [3, 1]; 918 let NumMicroOps = 4; 919} 920def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>; 921 922def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> { 923 let Latency = 15; 924 let ResourceCycles = [4, 1]; 925 let NumMicroOps = 8; 926} 927def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>; 928 929defm : PdWriteResXMMPair<WriteFLogic, [PdFPU01, PdFPFMA], 2, [1, 2]>; 930defm : PdWriteResYMMPair<WriteFLogicY, [PdFPU01, PdFPFMA], 2, [2, 2]>; 931defm : X86WriteResPairUnsupported<WriteFLogicZ>; 932 933defm : PdWriteResXMMPair<WriteFTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 934defm : PdWriteResYMMPair<WriteFTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [4, 4, 1], 4, 2>; 935defm : X86WriteResPairUnsupported<WriteFTestZ>; 936 937defm : PdWriteResXMMPair<WriteFShuffle, [PdFPU01, PdFPFMA], 2, [1, 2]>; 938defm : PdWriteResYMMPair<WriteFShuffleY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>; 939defm : X86WriteResPairUnsupported<WriteFShuffleZ>; 940 941def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> { 942 let Latency = 7; 943 let ResourceCycles = [1, 3]; 944 let NumMicroOps = 2; 945} 946def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>; 947 948defm : PdWriteResXMMPair<WriteFVarShuffle, [PdFPU01, PdFPFMA], 3, [1, 2]>; 949defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU01, PdFPFMA], 3, [2, 4], 2>; 950defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; 951 952defm : PdWriteResXMMPair<WriteFBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>; 953defm : PdWriteResYMMPair<WriteFBlendY, [PdFPU01, PdFPFMA], 2, [2, 3], 2>; 954defm : X86WriteResPairUnsupported<WriteFBlendZ>; 955 956defm : PdWriteResXMMPair<WriteFVarBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>; 957defm : PdWriteResYMMPair<WriteFVarBlendY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>; 958defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; 959 960defm : PdWriteResXMMPair<WriteFShuffle256, [PdFPU01, PdFPFMA], 2, [1, 3], 2>; 961defm : X86WriteResPairUnsupported<WriteFVarShuffle256>; 962 963def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 964 let Latency = 2; 965 let ResourceCycles = [1, 2]; 966} 967def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>; 968 969def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 970 let Latency = 7; 971 let ResourceCycles = [1, 4]; 972 let NumMicroOps = 2; 973} 974def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>; 975 976def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 977 let Latency = 4; 978 let ResourceCycles = [1, 6]; 979 let NumMicroOps = 8; 980} 981def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>; 982 983def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> { 984 let Latency = 8; // 4 + 4 985 let ResourceCycles = [1, 8]; 986 let NumMicroOps = 10; 987} 988def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>; 989 990//////////////////////////////////////////////////////////////////////////////// 991// Conversions. 992//////////////////////////////////////////////////////////////////////////////// 993 994defm : PdWriteResXMMPair<WriteCvtSS2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>; 995 996defm : PdWriteResXMMPair<WriteCvtPS2I, [PdFPU0, PdFPCVT, PdFPSTO], 4>; 997defm : PdWriteResYMMPair<WriteCvtPS2IY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 998defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; 999 1000defm : PdWriteResXMMPair<WriteCvtSD2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>; 1001 1002defm : PdWriteResXMMPair<WriteCvtPD2I, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1003defm : PdWriteResYMMPair<WriteCvtPD2IY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 1004defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; 1005 1006def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1007 let Latency = 6; 1008 let NumMicroOps = 2; 1009} 1010def : InstRW<[PdWriteMMX_CVTTPD2PIirr], (instrs MMX_CVTTPD2PIirr)>; 1011 1012// FIXME: f+3 ST, LD+STC latency 1013defm : PdWriteResXMMPair<WriteCvtI2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>; 1014// FIXME: .Folded version is one NumMicroOp *less*.. 1015 1016defm : PdWriteResXMMPair<WriteCvtI2PS, [PdFPU0, PdFPCVT, PdFPSTO], 4>; 1017defm : PdWriteResYMMPair<WriteCvtI2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1018defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; 1019 1020defm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>; 1021// FIXME: .Folded version is one NumMicroOp *less*.. 1022 1023def PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1024 let Latency = 13; 1025 let ResourceCycles = [1, 3, 1]; 1026 let NumMicroOps = 2; 1027} 1028def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>; 1029 1030defm : PdWriteResXMMPair<WriteCvtI2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1031defm : PdWriteResYMMPair<WriteCvtI2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>; 1032defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; 1033 1034defm : PdWriteResXMMPair<WriteCvtSS2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1035 1036defm : PdWriteResXMMPair<WriteCvtPS2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1037defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>; 1038defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; 1039 1040defm : PdWriteResXMMPair<WriteCvtSD2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1041 1042defm : PdWriteResXMMPair<WriteCvtPD2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1043defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 1044defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; 1045 1046def PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1047 let Latency = 6; 1048 let NumMicroOps = 2; 1049} 1050def : InstRW<[PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr, 1051 MMX_CVTPI2PDirr)>; 1052 1053def PdWriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1054 let Latency = 4; 1055 let NumMicroOps = 2; 1056} 1057def : InstRW<[PdWriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>; 1058 1059defm : PdWriteResXMMPair<WriteCvtPH2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2, 1>; 1060defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 3>; 1061defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>; 1062 1063defm : PdWriteRes<WriteCvtPS2PH, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2>; 1064defm : PdWriteRes<WriteCvtPS2PHY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 1065defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; 1066 1067defm : PdWriteRes<WriteCvtPS2PHSt, [PdFPU0, PdFPCVT, PdFPSTO, PdStore], 4, [1, 2, 1, 1], 3>; 1068defm : PdWriteRes<WriteCvtPS2PHYSt, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdStore], 4, [1, 2, 1, 1, 1], 4>; 1069defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; 1070 1071//////////////////////////////////////////////////////////////////////////////// 1072// Vector integer operations. 1073//////////////////////////////////////////////////////////////////////////////// 1074 1075defm : PdWriteRes<WriteVecLoad, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>; 1076defm : PdWriteRes<WriteVecLoadX, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>; 1077defm : PdWriteRes<WriteVecLoadY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 3], 2>; 1078 1079defm : PdWriteRes<WriteVecLoadNT, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 4]>; 1080defm : PdWriteRes<WriteVecLoadNTY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 4]>; 1081 1082defm : PdWriteRes<WriteVecMaskedLoad, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 1, 2]>; 1083defm : PdWriteRes<WriteVecMaskedLoadY, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 2, 4], 2>; 1084 1085defm : PdWriteRes<WriteVecStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>; 1086defm : PdWriteRes<WriteVecStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>; 1087defm : PdWriteRes<WriteVecStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [2, 36, 2], 4>; 1088 1089def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { 1090 let NumMicroOps = 8; 1091} 1092def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>; 1093 1094defm : PdWriteRes<WriteVecStoreNT, [PdStore, PdFPU1, PdFPSTO], 2>; 1095defm : PdWriteRes<WriteVecStoreNTY, [PdStore, PdFPU1, PdFPSTO], 2, [2, 2, 2], 4>; 1096 1097defm : X86WriteResUnsupported<WriteVecMaskedStore32>; 1098defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>; 1099defm : X86WriteResUnsupported<WriteVecMaskedStore64>; 1100defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>; 1101 1102defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>; 1103defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>; 1104defm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>; 1105 1106def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1107} 1108def : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>; 1109 1110def PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1111 let Latency = 4; 1112} 1113def : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>; 1114 1115defm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 11>; 1116defm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 11, [1, 2], 2>; 1117 1118defm : PdWriteResXMMPair<WriteVecALU, [PdFPU01, PdFPMAL], 2>; 1119defm : PdWriteResXMMPair<WriteVecALUX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1120defm : X86WriteResPairUnsupported<WriteVecALUY>; 1121defm : X86WriteResPairUnsupported<WriteVecALUZ>; 1122 1123defm : PdWriteResXMMPair<WriteVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1124defm : PdWriteResXMMPair<WriteVecShiftX, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1125defm : X86WriteResPairUnsupported<WriteVecShiftY>; 1126defm : X86WriteResPairUnsupported<WriteVecShiftZ>; 1127 1128defm : PdWriteResXMMPair<WriteVecShiftImm, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1129defm : PdWriteResXMMPair<WriteVecShiftImmX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1130defm : X86WriteResPairUnsupported<WriteVecShiftImmY>; 1131defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; 1132 1133defm : PdWriteResXMMPair<WriteVecIMul, [PdFPU0, PdFPMMA], 4>; 1134defm : PdWriteResXMMPair<WriteVecIMulX, [PdFPU0, PdFPMMA], 4>; 1135defm : X86WriteResPairUnsupported<WriteVecIMulY>; 1136defm : X86WriteResPairUnsupported<WriteVecIMulZ>; 1137 1138defm : PdWriteResXMMPair<WritePMULLD, [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL], 5, [2, 1, 2, 1]>; 1139defm : X86WriteResPairUnsupported<WritePMULLDY>; 1140defm : X86WriteResPairUnsupported<WritePMULLDZ>; 1141 1142def PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPMMA, PdFPMAL]> { 1143 let Latency = 4; 1144} 1145def : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr, 1146 VPMACSSDQLrr)>; 1147 1148defm : PdWriteResXMMPair<WriteMPSAD, [PdFPU0, PdFPMMA], 9, [1, 4], 8>; 1149defm : X86WriteResPairUnsupported<WriteMPSADY>; 1150defm : X86WriteResPairUnsupported<WriteMPSADZ>; 1151 1152def PdWriteVMPSADBW : SchedWriteRes<[PdFPU0, PdFPMMA]> { 1153 let Latency = 8; 1154 let ResourceCycles = [1, 4]; 1155 let NumMicroOps = 10; 1156} 1157def : InstRW<[PdWriteVMPSADBW], (instrs VMPSADBWrri)>; 1158 1159defm : PdWriteResXMMPair<WritePSADBW, [PdFPU01, PdFPMAL], 4, [1, 2], 2>; 1160defm : PdWriteResXMMPair<WritePSADBWX, [PdFPU01, PdFPMAL], 4, [1, 2], 2>; 1161defm : X86WriteResPairUnsupported<WritePSADBWY>; 1162defm : X86WriteResPairUnsupported<WritePSADBWZ>; 1163 1164defm : PdWriteResXMMPair<WritePHMINPOS, [PdFPU0, PdFPMAL], 4, [], 2>; 1165 1166defm : PdWriteResXMMPair<WriteShuffle, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1167defm : PdWriteResXMMPair<WriteShuffleX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1168defm : PdWriteResYMMPair<WriteShuffleY, [PdFPU01, PdFPMAL], 2, [1, 4]>; 1169defm : X86WriteResPairUnsupported<WriteShuffleZ>; 1170 1171defm : PdWriteResXMMPair<WriteVarShuffle, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1172defm : PdWriteResXMMPair<WriteVarShuffleX, [PdFPU01, PdFPMAL], 3, [1, 3]>; 1173defm : X86WriteResPairUnsupported<WriteVarShuffleY>; 1174defm : X86WriteResPairUnsupported<WriteVarShuffleZ>; 1175 1176def PdWriteVPPERM : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1177 let Latency = 2; 1178 let ResourceCycles = [1, 3]; 1179} 1180def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>; 1181 1182defm : PdWriteResXMMPair<WriteBlend, [PdFPU01, PdFPMAL], 2>; 1183defm : X86WriteResPairUnsupported<WriteBlendY>; 1184defm : X86WriteResPairUnsupported<WriteBlendZ>; 1185 1186defm : PdWriteResXMMPair<WriteVarBlend, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1187defm : X86WriteResPairUnsupported<WriteVarBlendY>; 1188defm : X86WriteResPairUnsupported<WriteVarBlendZ>; 1189 1190defm : PdWriteResXMMPair<WriteVecLogic, [PdFPU01, PdFPMAL], 2>; 1191defm : PdWriteResXMMPair<WriteVecLogicX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1192defm : X86WriteResPairUnsupported<WriteVecLogicY>; 1193defm : X86WriteResPairUnsupported<WriteVecLogicZ>; 1194 1195defm : PdWriteResXMMPair<WriteVecTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 1196defm : PdWriteResYMMPair<WriteVecTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 4, 1], 4, 2>; 1197defm : X86WriteResPairUnsupported<WriteVecTestZ>; 1198 1199defm : PdWriteResXMMPair<WriteShuffle256, [PdFPU01, PdFPMAL]>; 1200defm : PdWriteResXMMPair<WriteVPMOV256, [PdFPU01, PdFPMAL]>; 1201defm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>; 1202 1203defm : PdWriteResXMMPair<WriteVarVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1204defm : X86WriteResPairUnsupported<WriteVarVecShiftY>; 1205defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; 1206 1207//////////////////////////////////////////////////////////////////////////////// 1208// Vector insert/extract operations. 1209//////////////////////////////////////////////////////////////////////////////// 1210 1211defm : PdWriteRes<WriteVecInsert, [PdFPU01, PdFPMAL], 2, [1, 3], 2>; 1212defm : PdWriteRes<WriteVecInsertLd, [PdFPU01, PdFPMAL, PdLoad], 6, [1, 4, 3], 2>; 1213 1214defm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 12, [1, 3, 1], 2>; 1215defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [2, 1, 1], 2>; 1216 1217def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1218 let Latency = 3; 1219 let ResourceCycles = [1, 3]; 1220} 1221def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>; 1222 1223//////////////////////////////////////////////////////////////////////////////// 1224// SSE42 String instructions. 1225//////////////////////////////////////////////////////////////////////////////// 1226 1227defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 11, [1, 6, 1], 7, 1>; 1228defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 7, [1, 8, 1], 7, 2>; 1229 1230defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 14, [1, 10, 10, 10, 1, 1], 27, 1>; 1231defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 10, 10, 10, 1, 1], 27, 1>; 1232 1233//////////////////////////////////////////////////////////////////////////////// 1234// MOVMSK Instructions. 1235//////////////////////////////////////////////////////////////////////////////// 1236 1237defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>; 1238 1239defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>; 1240defm : X86WriteResUnsupported<WriteVecMOVMSKY>; 1241// defm : X86WriteResUnsupported<WriteVecMOVMSKZ>; 1242 1243defm : PdWriteRes<WriteMMXMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>; 1244 1245//////////////////////////////////////////////////////////////////////////////// 1246// AES Instructions. 1247//////////////////////////////////////////////////////////////////////////////// 1248 1249defm : PdWriteResXMMPair<WriteAESIMC, [PdFPU0, PdFPMMA], 5>; 1250defm : PdWriteResXMMPair<WriteAESKeyGen, [PdFPU0, PdFPMMA], 5>; 1251defm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>; 1252 1253//////////////////////////////////////////////////////////////////////////////// 1254// Horizontal add/sub instructions. 1255//////////////////////////////////////////////////////////////////////////////// 1256 1257defm : PdWriteResXMMPair<WriteFHAdd, [PdFPU0, PdFPFMA], 11, [1, 5], 3, 1>; 1258defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [1, 8], 8, 2>; 1259defm : X86WriteResPairUnsupported<WriteFHAddZ>; 1260 1261defm : PdWriteResXMMPair<WritePHAdd, [PdFPU01, PdFPMAL], 5, [1, 4], 3, 1>; 1262defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1263defm : X86WriteResPairUnsupported<WritePHAddY>; 1264defm : X86WriteResPairUnsupported<WritePHAddZ>; 1265 1266def : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr, 1267 PHADDWrr, PHSUBWrr, 1268 PHADDSWrr, PHSUBSWrr, 1269 VPHADDDrr, VPHSUBDrr, 1270 VPHADDWrr, VPHSUBWrr, 1271 VPHADDSWrr, VPHSUBSWrr)>; 1272 1273def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm, 1274 PHADDWrm, PHSUBWrm, 1275 PHADDSWrm, PHSUBSWrm, 1276 VPHADDDrm, VPHSUBDrm, 1277 VPHADDWrm, VPHSUBWrm, 1278 VPHADDSWrm, VPHSUBSWrm)>; 1279 1280//////////////////////////////////////////////////////////////////////////////// 1281// Carry-less multiplication instructions. 1282//////////////////////////////////////////////////////////////////////////////// 1283 1284defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [1, 7], 5, 1>; 1285 1286def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> { 1287 let Latency = 12; 1288 let ResourceCycles = [1, 7]; 1289 let NumMicroOps = 6; 1290} 1291def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>; 1292 1293//////////////////////////////////////////////////////////////////////////////// 1294// SSE4A instructions. 1295//////////////////////////////////////////////////////////////////////////////// 1296 1297def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1298 let Latency = 3; 1299 let ResourceCycles = [1, 2]; 1300} 1301def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ)>; 1302 1303def PdWriteINSERTQI : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1304 let Latency = 3; 1305 let ResourceCycles = [1, 3]; 1306} 1307def : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>; 1308 1309//////////////////////////////////////////////////////////////////////////////// 1310// AVX instructions. 1311//////////////////////////////////////////////////////////////////////////////// 1312 1313def PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> { 1314 let Latency = 6; 1315 let ResourceCycles = [1, 2, 4]; 1316 let NumMicroOps = 2; 1317} 1318def : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, 1319 VBROADCASTSSYrm)>; 1320 1321def PdWriteVZEROALL : SchedWriteRes<[]> { 1322 let Latency = 90; 1323 let NumMicroOps = 32; 1324} 1325def : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>; 1326 1327def PdWriteVZEROUPPER : SchedWriteRes<[]> { 1328 let Latency = 46; 1329 let NumMicroOps = 16; 1330} 1331def : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>; 1332 1333/////////////////////////////////////////////////////////////////////////////// 1334// SchedWriteVariant definitions. 1335/////////////////////////////////////////////////////////////////////////////// 1336 1337def PdWriteZeroLatency : SchedWriteRes<[]> { 1338 let Latency = 0; 1339} 1340 1341def PdWriteZeroIdiom : SchedWriteVariant<[ 1342 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1343 SchedVar<MCSchedPredicate<TruePred>, [WriteALU]> 1344]>; 1345def : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr, 1346 XOR32rr, XOR64rr)>; 1347 1348def PdWriteFZeroIdiom : SchedWriteVariant<[ 1349 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1350 SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]> 1351]>; 1352def : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, 1353 XORPDrr, VXORPDrr, 1354 ANDNPSrr, VANDNPSrr, 1355 ANDNPDrr, VANDNPDrr)>; 1356 1357// VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1. 1358 1359def PdWriteVZeroIdiomLogic : SchedWriteVariant<[ 1360 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1361 SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]> 1362]>; 1363def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>; 1364 1365def PdWriteVZeroIdiomLogicX : SchedWriteVariant<[ 1366 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1367 SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]> 1368]>; 1369def : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, 1370 PANDNrr, VPANDNrr)>; 1371 1372def PdWriteVZeroIdiomALU : SchedWriteVariant<[ 1373 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1374 SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]> 1375]>; 1376def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr, 1377 MMX_PSUBQirr, MMX_PSUBWirr, 1378 MMX_PCMPGTBirr, 1379 MMX_PCMPGTDirr, 1380 MMX_PCMPGTWirr)>; 1381 1382def PdWriteVZeroIdiomALUX : SchedWriteVariant<[ 1383 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1384 SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]> 1385]>; 1386def : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, 1387 PSUBDrr, VPSUBDrr, 1388 PSUBQrr, VPSUBQrr, 1389 PSUBWrr, VPSUBWrr, 1390 PCMPGTBrr, VPCMPGTBrr, 1391 PCMPGTDrr, VPCMPGTDrr, 1392 PCMPGTWrr, VPCMPGTWrr)>; 1393 1394/////////////////////////////////////////////////////////////////////////////// 1395// Dependency breaking instructions. 1396/////////////////////////////////////////////////////////////////////////////// 1397 1398// VPCMPGTQ, but not PCMPGTQ! 1399 1400def : IsZeroIdiomFunction<[ 1401 // GPR Zero-idioms. 1402 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, 1403 1404 // MMX Zero-idioms. 1405 DepBreakingClass<[ 1406 MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr, 1407 MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr, 1408 MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr, 1409 MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr 1410 ], ZeroIdiomPredicate>, 1411 1412 // SSE Zero-idioms. 1413 DepBreakingClass<[ 1414 // fp variants. 1415 XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr, 1416 1417 // int variants. 1418 PXORrr, PANDNrr, 1419 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 1420 PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, 1421 PCMPGTBrr, PCMPGTDrr, PCMPGTWrr 1422 ], ZeroIdiomPredicate>, 1423 1424 // AVX Zero-idioms. 1425 DepBreakingClass<[ 1426 // xmm fp variants. 1427 VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr, 1428 1429 // xmm int variants. 1430 VPXORrr, VPANDNrr, 1431 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 1432 VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr, 1433 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 1434 1435 // ymm variants. 1436 VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr 1437 ], ZeroIdiomPredicate> 1438]>; 1439 1440def : IsDepBreakingFunction<[ 1441 // GPR 1442 DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>, 1443 DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >, 1444 1445 // MMX 1446 DepBreakingClass<[ 1447 MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr 1448 ], ZeroIdiomPredicate>, 1449 1450 // SSE 1451 DepBreakingClass<[ 1452 PCMPEQBrr, PCMPEQWrr, PCMPEQDrr 1453 // But not PCMPEQQrr. 1454 ], ZeroIdiomPredicate>, 1455 1456 // AVX 1457 DepBreakingClass<[ 1458 VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr 1459 // But not VPCMPEQQrr. 1460 ], ZeroIdiomPredicate> 1461]>; 1462 1463 1464} // SchedModel 1465