1//=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for AMD bdver2 (Piledriver) to support 10// instruction scheduling and other instruction cost heuristics. 11// Based on: 12// * AMD Software Optimization Guide for AMD Family 15h Processors. 13// https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf 14// * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog 15// http://www.agner.org/optimize/microarchitecture.pdf 16// * https://www.realworldtech.com/bulldozer/ 17// Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2. 18// 19//===----------------------------------------------------------------------===// 20 21def BdVer2Model : SchedMachineModel { 22 let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired. 23 let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed. 24 let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer. 25 let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency. 26 let HighLatency = 25; // FIXME: any better choice? 27 let MispredictPenalty = 20; // Minimum branch misdirection penalty. 28 29 let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass. 30 31 // FIXME: Incomplete. This flag is set to allow the scheduler to assign 32 // a default model to unrecognized opcodes. 33 let CompleteModel = 0; 34} // SchedMachineModel 35 36let SchedModel = BdVer2Model in { 37 38 39//===----------------------------------------------------------------------===// 40// Pipes 41//===----------------------------------------------------------------------===// 42 43// There are total of eight pipes. 44 45//===----------------------------------------------------------------------===// 46// Integer execution pipes 47// 48 49// Two EX (ALU) pipes. 50def PdEX0 : ProcResource<1>; // ALU, Integer Pipe0 51def PdEX1 : ProcResource<1>; // ALU, Integer Pipe1 52def PdEX01 : ProcResGroup<[PdEX0, PdEX1]>; 53 54// Two AGLU pipes, identical. 55def PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23] 56 57//===----------------------------------------------------------------------===// 58// Floating point execution pipes 59// 60 61// Four FPU pipes. 62 63def PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0 64def PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1 65def PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2 66def PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3 67 68// FPU grouping 69def PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>; 70def PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>; 71 72 73//===----------------------------------------------------------------------===// 74// RCU 75//===----------------------------------------------------------------------===// 76 77// The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle. 78// On the other hand, the RCU reorder buffer size for Piledriver does not 79// seem be specified in any trustworthy source. 80// But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had 81// RCU reorder buffer size of 128. So that is a good guess for now. 82def PdRCU : RetireControlUnit<128, 4>; 83 84 85//===----------------------------------------------------------------------===// 86// Pipelines 87//===----------------------------------------------------------------------===// 88 89// There are total of two pipelines, each one with it's own scheduler. 90 91//===----------------------------------------------------------------------===// 92// Integer Pipeline Scheduling 93// 94 95// There is one Integer Scheduler per core. 96 97// Integer physical register file has 96 registers of 64-bit. 98def PdIntegerPRF : RegisterFile<96, [GR64, CCR]>; 99 100// Unified Integer, Memory Scheduler has 40 entries. 101def PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> { 102 // Up to 4 IPC can be decoded, issued, retired. 103 let BufferSize = 40; 104} 105 106 107//===----------------------------------------------------------------------===// 108// FPU Pipeline Scheduling 109// 110 111// The FPU unit is shared between the two cores. 112 113// FP physical register file has 160 registers of 128-bit. 114// Operations on 256-bit data types are cracked into two COPs. 115def PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>; 116 117// Unified FP Scheduler has 64 entries, 118def PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> { 119 // Up to 4 IPC can be decoded, issued, retired. 120 let BufferSize = 64; 121} 122 123 124//===----------------------------------------------------------------------===// 125// Functional units 126//===----------------------------------------------------------------------===// 127 128//===----------------------------------------------------------------------===// 129// Load-Store Units 130// 131 132let Super = PdAGLU01 in 133def PdLoad : ProcResource<2> { 134 // For Piledriver, the load queue is 40 entries deep. 135 let BufferSize = 40; 136} 137 138def PdLoadQueue : LoadQueue<PdLoad>; 139 140let Super = PdAGLU01 in 141def PdStore : ProcResource<1> { 142 // For Piledriver, the store queue is 24 entries deep. 143 let BufferSize = 24; 144} 145 146def PdStoreQueue : StoreQueue<PdStore>; 147 148//===----------------------------------------------------------------------===// 149// Integer Execution Units 150// 151 152def PdDiv : ProcResource<1>; // PdEX0; unpipelined integer division 153def PdCount : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT 154 155def PdMul : ProcResource<1>; // PdEX1; integer multiplication 156def PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches 157 158//===----------------------------------------------------------------------===// 159// Floating-Point Units 160// 161 162// Two FMAC/FPFMA units. 163def PdFPFMA : ProcResource<2>; // PdFPU0, PdFPU1 164 165// One 128-bit integer multiply-accumulate unit. 166def PdFPMMA : ProcResource<1>; // PdFPU0 167 168// One fp conversion unit. 169def PdFPCVT : ProcResource<1>; // PdFPU0 170 171// One unit for shuffles, packs, permutes, shifts. 172def PdFPXBR : ProcResource<1>; // PdFPU1 173 174// Two 128-bit packed integer units. 175def PdFPMAL : ProcResource<2>; // PdFPU2, PdFPU3 176 177// One FP store unit. 178def PdFPSTO : ProcResource<1>; // PdFPU3 179 180 181//===----------------------------------------------------------------------===// 182// Basic helper classes. 183//===----------------------------------------------------------------------===// 184 185// Many SchedWrites are defined in pairs with and without a folded load. 186// Instructions with folded loads are usually micro-fused, so they only appear 187// as two micro-ops when dispatched by the schedulers. 188// This multiclass defines the resource usage for variants with and without 189// folded loads. 190multiclass PdWriteRes<SchedWrite SchedRW, 191 list<ProcResourceKind> ExePorts, int Lat = 1, 192 list<int> Res = [], int UOps = 1> { 193 def : WriteRes<SchedRW, ExePorts> { 194 let Latency = Lat; 195 let ReleaseAtCycles = Res; 196 let NumMicroOps = UOps; 197 } 198} 199 200multiclass __pdWriteResPair<X86FoldableSchedWrite SchedRW, 201 list<ProcResourceKind> ExePorts, int Lat, 202 list<int> Res, int UOps, 203 int LoadLat, int LoadRes, int LoadUOps> { 204 defm : PdWriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 205 206 defm : PdWriteRes<SchedRW.Folded, 207 !listconcat([PdLoad], ExePorts), 208 !add(Lat, LoadLat), 209 !if(!and(!empty(Res), !eq(LoadRes, 1)), 210 [], 211 !listconcat([LoadRes], 212 !if(!empty(Res), 213 !listsplat(1, !size(ExePorts)), 214 Res))), 215 !add(UOps, LoadUOps)>; 216} 217 218multiclass PdWriteResExPair<X86FoldableSchedWrite SchedRW, 219 list<ProcResourceKind> ExePorts, int Lat = 1, 220 list<int> Res = [], int UOps = 1, 221 int LoadUOps = 0> { 222 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 223 /*LoadLat*/4, /*LoadRes*/3, LoadUOps>; 224} 225 226multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW, 227 list<ProcResourceKind> ExePorts, int Lat = 1, 228 list<int> Res = [], int UOps = 1, 229 int LoadUOps = 0> { 230 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 231 /*LoadLat*/5, /*LoadRes*/3, LoadUOps>; 232} 233 234multiclass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW, 235 list<ProcResourceKind> ExePorts, int Lat, 236 list<int> Res = [], int UOps = 2, 237 int LoadUOps = 0> { 238 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 239 /*LoadLat*/5, /*LoadRes*/3, LoadUOps>; 240} 241 242//===----------------------------------------------------------------------===// 243// Here be dragons. 244//===----------------------------------------------------------------------===// 245 246// L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers 247// needn't be available until 4 cycles after the memory operand. 248def : ReadAdvance<ReadAfterLd, 4>; 249 250// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available 251// until 5 cycles after the memory operand. 252def : ReadAdvance<ReadAfterVecLd, 5>; 253def : ReadAdvance<ReadAfterVecXLd, 5>; 254def : ReadAdvance<ReadAfterVecYLd, 5>; 255 256// Transfer from int domain to ivec domain incurs additional latency of 8..10cy 257// Reference: Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller 258// and Excavator pipeline", "Data delay between different execution domains" 259def : ReadAdvance<ReadInt2Fpu, -10>; 260 261// A folded store needs a cycle on the PdStore for the store data. 262def : WriteRes<WriteRMW, [PdStore]>; 263 264//////////////////////////////////////////////////////////////////////////////// 265// Loads, stores, and moves, not folded with other operations. 266//////////////////////////////////////////////////////////////////////////////// 267 268def : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; let ReleaseAtCycles = [2]; } 269def : WriteRes<WriteStore, [PdStore]>; 270def : WriteRes<WriteStoreNT, [PdStore]>; 271def : WriteRes<WriteMove, [PdEX01]> { let ReleaseAtCycles = [2]; } 272defm : X86WriteResUnsupported<WriteVecMaskedGatherWriteback>; 273 274// Load/store MXCSR. 275// FIXME: These are copy and pasted from WriteLoad/Store. 276def : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; } 277def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; let ReleaseAtCycles = [18]; } 278 279// Treat misc copies as a move. 280def : InstRW<[WriteMove], (instrs COPY)>; 281 282//////////////////////////////////////////////////////////////////////////////// 283// Idioms that clear a register, like xorps %xmm0, %xmm0. 284// These can often bypass execution ports completely. 285//////////////////////////////////////////////////////////////////////////////// 286 287def : WriteRes<WriteZero, [/*No ExePorts*/]>; 288 289//////////////////////////////////////////////////////////////////////////////// 290// Branches don't produce values, so they have no latency, but they still 291// consume resources. Indirect branches can fold loads. 292//////////////////////////////////////////////////////////////////////////////// 293 294defm : PdWriteResExPair<WriteJump, [PdEX1, PdBranch]>; 295 296//////////////////////////////////////////////////////////////////////////////// 297// Special case scheduling classes. 298//////////////////////////////////////////////////////////////////////////////// 299 300def : WriteRes<WriteSystem, [PdEX01]> { let Latency = 100; } 301def : WriteRes<WriteMicrocoded, [PdEX01]> { let Latency = 100; } 302def : WriteRes<WriteFence, [PdStore]>; 303 304def PdWriteXLAT : SchedWriteRes<[PdEX01]> { 305 let Latency = 6; 306} 307def : InstRW<[PdWriteXLAT], (instrs XLAT)>; 308 309def PdWriteLARrr : SchedWriteRes<[PdEX01]> { 310 let Latency = 184; 311 let ReleaseAtCycles = [375]; 312 let NumMicroOps = 45; 313} 314def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr", 315 "LSL(16|32|64)rr")>; 316 317// Nops don't have dependencies, so there's no actual latency, but we set this 318// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. 319def : WriteRes<WriteNop, [PdEX01]> { let ReleaseAtCycles = [2]; } 320 321//////////////////////////////////////////////////////////////////////////////// 322// Arithmetic. 323//////////////////////////////////////////////////////////////////////////////// 324 325defm : PdWriteResExPair<WriteALU, [PdEX01], 1, [2]>; 326 327def PdWriteALURMW : SchedWriteRes<[PdLoad, PdEX01, PdStore]> { 328 let Latency = 6; 329 let ReleaseAtCycles = [3, 2, 1]; 330 let NumMicroOps = 1; 331} 332def : SchedAlias<WriteALURMW, PdWriteALURMW>; 333 334def PdWriteLXADD : SchedWriteRes<[PdEX01]> { 335 let Latency = 6; 336 let ReleaseAtCycles = [88]; 337 let NumMicroOps = 4; 338} 339def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>; 340 341def PdWriteBMI1 : SchedWriteRes<[PdEX01]> { 342 let Latency = 2; 343 let ReleaseAtCycles = [2]; 344 let NumMicroOps = 2; 345} 346def : InstRW<[PdWriteBMI1], 347 (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr, 348 BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr, 349 BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr, 350 BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr, 351 TZMSK32rr, TZMSK64rr)>; 352 353def PdWriteBMI1m : SchedWriteRes<[PdLoad, PdEX01]> { 354 let Latency = 6; 355 let ReleaseAtCycles = [3, 3]; 356 let NumMicroOps = 2; 357} 358def : InstRW<[PdWriteBMI1m], 359 (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm, 360 BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm, 361 BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm, 362 BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm, 363 TZMSK32rm, TZMSK64rm)>; 364 365defm : PdWriteResExPair<WriteADC, [PdEX01], 1, [2]>; 366 367def PdWriteADCSBB64ri32 : SchedWriteRes<[PdEX01]> { 368 let ReleaseAtCycles = [3]; 369} 370def : InstRW<[PdWriteADCSBB64ri32], (instrs ADC64ri32, SBB64ri32)>; 371 372defm : PdWriteRes<WriteBSWAP32, [PdEX01]>; 373defm : PdWriteRes<WriteBSWAP64, [PdEX01]>; 374defm : PdWriteRes<WriteCMPXCHG, [PdEX1], 3, [3], 5>; 375defm : PdWriteRes<WriteCMPXCHGRMW, [PdEX1, PdStore, PdLoad], 3, [44, 1, 1], 2>; 376defm : PdWriteRes<WriteXCHG, [PdEX1], 1, [], 2>; 377 378def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> { 379 let Latency = 3; 380 let ReleaseAtCycles = [3]; 381 let NumMicroOps = 3; 382} 383def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>; 384 385def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> { 386 let Latency = 3; 387 let ReleaseAtCycles = [23]; 388 let NumMicroOps = 5; 389} 390def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>; 391 392def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> { 393 let Latency = 3; 394 let ReleaseAtCycles = [21]; 395 let NumMicroOps = 6; 396} 397def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm], 398 (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>; 399 400def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> { 401 let Latency = 3; 402 let ReleaseAtCycles = [26]; 403 let NumMicroOps = 18; 404} 405def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>; 406 407def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> { 408 let Latency = 3; 409 let ReleaseAtCycles = [69]; 410 let NumMicroOps = 22; 411} 412def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>; 413 414def PdWriteXADDm : SchedWriteRes<[PdEX1]> { 415 let Latency = 6; 416 let ReleaseAtCycles = [20]; 417 let NumMicroOps = 4; 418} 419def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>; 420 421defm : PdWriteResExPair<WriteIMul8, [PdEX1, PdMul], 4, [1, 4]>; 422defm : PdWriteResExPair<WriteIMul16, [PdEX1, PdMul], 4, [1, 5], 2>; 423defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul], 5, [1, 5], 2>; 424defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul], 4, [1, 2]>; 425defm : PdWriteResExPair<WriteIMul32, [PdEX1, PdMul], 4, [1, 4]>; 426defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul], 4, [1, 2], 1, 1>; 427defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4, [1, 2]>; 428defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 6]>; 429defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>; 430defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>; 431 432// BMI2 MULX 433defm : X86WriteResUnsupported<WriteIMulH>; 434defm : X86WriteResUnsupported<WriteIMulHLd>; 435defm : X86WriteResPairUnsupported<WriteMULX32>; 436defm : X86WriteResPairUnsupported<WriteMULX64>; 437 438defm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>; 439defm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>; 440defm : PdWriteResExPair<WriteDiv32, [PdEX1, PdDiv], 14, [1, 14], 2>; 441defm : PdWriteResExPair<WriteDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>; 442 443defm : PdWriteResExPair<WriteIDiv8, [PdEX1, PdDiv], 12, [1, 12]>; 444defm : PdWriteResExPair<WriteIDiv16, [PdEX1, PdDiv], 15, [1, 17], 2>; 445defm : PdWriteResExPair<WriteIDiv32, [PdEX1, PdDiv], 14, [1, 25], 2>; 446defm : PdWriteResExPair<WriteIDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>; 447 448defm : PdWriteResExPair<WriteCRC32, [PdEX01], 2, [4], 3>; 449 450def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> { 451 let Latency = 5; 452 let ReleaseAtCycles = [10]; 453 let NumMicroOps = 5; 454} 455def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>; 456 457def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> { 458 let Latency = 6; 459 let ReleaseAtCycles = [12]; 460 let NumMicroOps = 7; 461} 462def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>; 463 464def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> { 465 let Latency = 10; 466 let ReleaseAtCycles = [17]; 467 let NumMicroOps = 11; 468} 469def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>; 470 471defm : PdWriteResExPair<WriteCMOV, [PdEX01]>; // Conditional move. 472 473def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> { 474 let Latency = 5; 475 let ReleaseAtCycles = [3, 3]; 476 let NumMicroOps = 2; 477} 478 479def PdWriteCMOVmVar : SchedWriteVariant<[ 480 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_BE">>, [PdWriteCMOVm]>, 481 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_A">>, [PdWriteCMOVm]>, 482 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_L">>, [PdWriteCMOVm]>, 483 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_GE">>, [PdWriteCMOVm]>, 484 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_LE">>, [PdWriteCMOVm]>, 485 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_G">>, [PdWriteCMOVm]>, 486 SchedVar<NoSchedPred, [WriteCMOV.Folded]> 487]>; 488 489def : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; 490 491defm : PdWriteRes<WriteFCMOV, [PdFPU0, PdFPFMA]>; // x87 conditional move. 492 493def : WriteRes<WriteSETCC, [PdEX01]>; // Setcc. 494def : WriteRes<WriteSETCCStore, [PdEX01, PdStore]>; 495 496def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> { 497 let ReleaseAtCycles = [2]; 498 let NumMicroOps = 2; 499} 500 501def PdSETGEmSETGmSETLEmSETLm : SchedWriteVariant<[ 502 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_GE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 503 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_G">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 504 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_LE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 505 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_L">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 506 SchedVar<NoSchedPred, [WriteSETCCStore]> 507]>; 508def : InstRW<[PdSETGEmSETGmSETLEmSETLm], (instrs SETCCm)>; 509 510defm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [4], 2>; 511 512def PdWriteLAHF : SchedWriteRes<[PdEX01]> { 513 let Latency = 2; 514 let ReleaseAtCycles = [4]; 515 let NumMicroOps = 4; 516} 517def : InstRW<[PdWriteLAHF], (instrs LAHF)>; 518 519def PdWriteSAHF : SchedWriteRes<[PdEX01]> { 520 let Latency = 2; 521 let ReleaseAtCycles = [2]; 522 let NumMicroOps = 2; 523} 524def : InstRW<[PdWriteSAHF], (instrs SAHF)>; 525 526defm : PdWriteRes<WriteBitTest, [PdEX01], 1, [2], 1>; 527defm : PdWriteRes<WriteBitTestImmLd, [PdEX01, PdLoad], 5, [2, 3], 1>; 528defm : PdWriteRes<WriteBitTestRegLd, [PdEX01, PdLoad], 5, [7, 2], 7>; 529defm : PdWriteRes<WriteBitTestSet, [PdEX01], 2, [2], 2>; 530defm : PdWriteRes<WriteBitTestSetImmLd, [PdEX01, PdLoad], 6, [1, 1], 4>; 531defm : PdWriteRes<WriteBitTestSetRegLd, [PdEX01, PdLoad], 6, [1, 1], 10>; 532 533def PdWriteBTSIm : SchedWriteRes<[PdEX01, PdLoad]> { 534 let Latency = 7; 535 let ReleaseAtCycles = [42, 1]; 536 let NumMicroOps = 4; 537} 538def : SchedAlias<WriteBitTestSetImmRMW, PdWriteBTSIm>; 539def PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> { 540 let Latency = 7; 541 let ReleaseAtCycles = [44, 1]; 542 let NumMicroOps = 10; 543} 544def : SchedAlias<WriteBitTestSetRegRMW, PdWriteBTSRm>; 545 546// This is for simple LEAs with one or two input operands. 547def : WriteRes<WriteLEA, [PdEX01]> { let ReleaseAtCycles = [2]; } 548 549// This write is used for slow LEA instructions. 550def PdWrite3OpsLEA : SchedWriteRes<[PdEX01]> { 551 let Latency = 2; 552 let ReleaseAtCycles = [2]; 553} 554 555// On Piledriver, a slow LEA is either a 3Ops LEA (base, index, offset), 556// or an LEA with a `Scale` value different than 1. 557def PdSlowLEAPredicate : MCSchedPredicate< 558 CheckAny<[ 559 // A 3-operand LEA (base, index, offset). 560 IsThreeOperandsLEAFn, 561 // An LEA with a "Scale" different than 1. 562 CheckAll<[ 563 CheckIsImmOperand<2>, 564 CheckNot<CheckImmOperand<2, 1>> 565 ]> 566 ]> 567>; 568 569def PdWriteLEA : SchedWriteVariant<[ 570 SchedVar<PdSlowLEAPredicate, [PdWrite3OpsLEA]>, 571 SchedVar<NoSchedPred, [WriteLEA]> 572]>; 573 574def : InstRW<[PdWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>; 575 576def PdWriteLEA16r : SchedWriteRes<[PdEX01]> { 577 let ReleaseAtCycles = [3]; 578 let NumMicroOps = 2; 579} 580def : InstRW<[PdWriteLEA16r], (instrs LEA16r)>; 581 582// Bit counts. 583defm : PdWriteResExPair<WriteBSF, [PdEX01], 3, [6], 6, 2>; 584defm : PdWriteResExPair<WriteBSR, [PdEX01], 4, [8], 7, 2>; 585defm : PdWriteResExPair<WritePOPCNT, [PdEX01], 4, [4]>; 586defm : PdWriteResExPair<WriteLZCNT, [PdEX0], 2, [2], 2>; 587defm : PdWriteResExPair<WriteTZCNT, [PdEX0], 2, [2], 2>; 588 589// BMI1 BEXTR, BMI2 BZHI 590defm : PdWriteResExPair<WriteBEXTR, [PdEX01], 2, [2], 2>; 591defm : PdWriteResExPair<WriteBLS, [PdEX01], 2, [2], 2>; 592defm : PdWriteResExPair<WriteBZHI, [PdEX01]>; 593 594def PdWriteBEXTRI : SchedWriteRes<[PdEX01]> { 595 let Latency = 2; 596 let ReleaseAtCycles = [4]; 597 let NumMicroOps = 2; 598} 599def : InstRW<[PdWriteBEXTRI], (instrs BEXTRI32ri, BEXTRI64ri)>; 600 601def PdWriteBEXTRIm : SchedWriteRes<[PdEX01]> { 602 let Latency = 2; 603 let ReleaseAtCycles = [5]; 604 let NumMicroOps = 2; 605} 606def : InstRW<[PdWriteBEXTRIm], (instrs BEXTRI32mi, BEXTRI64mi)>; 607 608//////////////////////////////////////////////////////////////////////////////// 609// Integer shifts and rotates. 610//////////////////////////////////////////////////////////////////////////////// 611 612defm : PdWriteResExPair<WriteShift, [PdEX01], 1, [2]>; 613defm : PdWriteResExPair<WriteShiftCL, [PdEX01]>; 614defm : PdWriteResExPair<WriteRotate, [PdEX01], 1, [2]>; 615defm : PdWriteResExPair<WriteRotateCL, [PdEX01]>; 616 617def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> { 618 let Latency = 12; 619 let ReleaseAtCycles = [24]; 620 let NumMicroOps = 26; 621} 622def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>; 623 624def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> { 625 let Latency = 12; 626 let ReleaseAtCycles = [23]; 627 let NumMicroOps = 23; 628} 629def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>; 630 631def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> { 632 let Latency = 11; 633 let ReleaseAtCycles = [22]; 634 let NumMicroOps = 24; 635} 636def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>; 637 638def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> { 639 let Latency = 10; 640 let ReleaseAtCycles = [20]; 641 let NumMicroOps = 22; 642} 643def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>; 644 645def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> { 646 let Latency = 10; 647 let ReleaseAtCycles = [19]; 648 let NumMicroOps = 19; 649} 650def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>; 651 652def PdWriteRCL3264rCL : SchedWriteRes<[PdEX01]> { 653 let Latency = 7; 654 let ReleaseAtCycles = [14]; 655 let NumMicroOps = 17; 656} 657def : InstRW<[PdWriteRCL3264rCL], (instrs RCL32rCL, RCL64rCL)>; 658 659def PdWriteRCR3264rCL : SchedWriteRes<[PdEX01]> { 660 let Latency = 7; 661 let ReleaseAtCycles = [13]; 662 let NumMicroOps = 16; 663} 664def : InstRW<[PdWriteRCR3264rCL], (instrs RCR32rCL, RCR64rCL)>; 665 666def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> { 667 let Latency = 7; 668 let ReleaseAtCycles = [14]; 669 let NumMicroOps = 15; 670} 671def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>; 672 673 674def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> { 675 let Latency = 9; 676 let ReleaseAtCycles = [18]; 677 let NumMicroOps = 20; 678} 679def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>; 680 681def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> { 682 let Latency = 11; 683 let ReleaseAtCycles = [21]; 684 let NumMicroOps = 21; 685} 686def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>; 687 688def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> { 689 let Latency = 8; 690 let ReleaseAtCycles = [15]; 691 let NumMicroOps = 16; 692} 693def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>; 694 695def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> { 696 let Latency = 13; 697 let ReleaseAtCycles = [25]; 698 let NumMicroOps = 25; 699} 700def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>; 701 702// SHLD/SHRD. 703defm : PdWriteRes<WriteSHDrri, [PdEX01], 3, [6], 6>; 704defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 3, [8], 7>; 705 706def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> { 707 let Latency = 3; 708 let ReleaseAtCycles = [6]; 709 let NumMicroOps = 7; 710} 711def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL, 712 SHLD32rrCL, 713 SHRD32rrCL)>; 714 715defm : PdWriteRes<WriteSHDmri, [PdLoad, PdEX01], 4, [1, 22], 8>; 716defm : PdWriteRes<WriteSHDmrcl, [PdLoad, PdEX01], 4, [1, 22], 8>; 717 718//////////////////////////////////////////////////////////////////////////////// 719// Floating point. This covers both scalar and vector operations. 720//////////////////////////////////////////////////////////////////////////////// 721 722defm : PdWriteRes<WriteFLD0, [PdFPU1, PdFPSTO], 3>; 723defm : PdWriteRes<WriteFLD1, [PdFPU1, PdFPSTO], 3>; 724defm : PdWriteRes<WriteFLDC, [PdFPU1, PdFPSTO], 3>; 725 726defm : PdWriteRes<WriteFLoad, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>; 727defm : PdWriteRes<WriteFLoadX, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>; 728defm : PdWriteRes<WriteFLoadY, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3], 2>; 729 730defm : PdWriteRes<WriteFMaskedLoad, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 1, 4]>; 731defm : PdWriteRes<WriteFMaskedLoadY, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 2, 4], 2>; 732 733defm : PdWriteRes<WriteFStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>; 734defm : PdWriteRes<WriteFStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>; 735defm : PdWriteRes<WriteFStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [1, 36, 2], 4>; 736 737def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU23, PdFPSTO]> { 738 let Latency = 2; 739 let ReleaseAtCycles = [1, 3, 1]; 740 let NumMicroOps = 2; 741} 742def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>; 743 744def PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { 745 let NumMicroOps = 8; 746} 747def : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>; 748 749defm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>; 750defm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>; 751defm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>; 752 753defm : PdWriteRes<WriteFMaskedStore32, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; 754defm : PdWriteRes<WriteFMaskedStore64, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; 755defm : PdWriteRes<WriteFMaskedStore32Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; 756defm : PdWriteRes<WriteFMaskedStore64Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; 757 758defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>; 759defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>; 760defm : PdWriteRes<WriteFMoveY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>; 761defm : X86WriteResUnsupported<WriteFMoveZ>; 762 763defm : PdWriteRes<WriteEMMS, [PdFPU01, PdFPFMA], 2>; 764 765defm : PdWriteResXMMPair<WriteFAdd, [PdFPU0, PdFPFMA], 5>; 766defm : PdWriteResXMMPair<WriteFAddX, [PdFPU0, PdFPFMA], 5>; 767defm : PdWriteResYMMPair<WriteFAddY, [PdFPU0, PdFPFMA], 5, [1, 2]>; 768defm : X86WriteResPairUnsupported<WriteFAddZ>; 769 770def PdWriteX87Add: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { 771 let Latency = 5; 772 let ReleaseAtCycles = [3, 1, 10]; 773} 774def : InstRW<[PdWriteX87Add], (instrs ADD_FI16m, ADD_FI32m, ADD_F32m, ADD_F64m, 775 SUB_FI16m, SUB_FI32m, SUB_F32m, SUB_F64m, 776 SUBR_FI16m, SUBR_FI32m, SUBR_F32m, SUBR_F64m)>; 777 778defm : PdWriteResXMMPair<WriteFAdd64, [PdFPU0, PdFPFMA], 5>; 779defm : PdWriteResXMMPair<WriteFAdd64X, [PdFPU0, PdFPFMA], 5>; 780defm : PdWriteResYMMPair<WriteFAdd64Y, [PdFPU0, PdFPFMA], 5, [1, 2]>; 781defm : X86WriteResPairUnsupported<WriteFAdd64Z>; 782 783defm : PdWriteResXMMPair<WriteFCmp, [PdFPU0, PdFPFMA], 2>; 784defm : PdWriteResXMMPair<WriteFCmpX, [PdFPU0, PdFPFMA], 2>; 785defm : PdWriteResYMMPair<WriteFCmpY, [PdFPU0, PdFPFMA], 2, [1, 2]>; 786defm : X86WriteResPairUnsupported<WriteFCmpZ>; 787 788defm : PdWriteResXMMPair<WriteFCmp64, [PdFPU0, PdFPFMA], 2>; 789defm : PdWriteResXMMPair<WriteFCmp64X, [PdFPU0, PdFPFMA], 2>; 790defm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [1, 2]>; 791defm : X86WriteResPairUnsupported<WriteFCmp64Z>; 792 793defm : PdWriteResXMMPair<WriteFCom, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 794defm : PdWriteResXMMPair<WriteFComX, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 795 796def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> { 797 let Latency = 6; 798} 799def : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>; 800 801def PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>; 802def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>; 803 804defm : PdWriteResXMMPair<WriteFMul, [PdFPU1, PdFPFMA], 5>; 805defm : PdWriteResXMMPair<WriteFMulX, [PdFPU1, PdFPFMA], 5>; 806defm : PdWriteResYMMPair<WriteFMulY, [PdFPU1, PdFPFMA], 5, [1, 2]>; 807defm : X86WriteResPairUnsupported<WriteFMulZ>; 808 809def PdWriteX87Mul: SchedWriteRes<[PdLoad, PdFPU1, PdFPFMA]> { 810 let Latency = 5; 811 let ReleaseAtCycles = [3, 1, 10]; 812} 813def : InstRW<[PdWriteX87Mul], (instrs MUL_FI16m, MUL_FI32m, MUL_F32m, MUL_F64m)>; 814 815defm : PdWriteResXMMPair<WriteFMul64, [PdFPU1, PdFPFMA], 5>; 816defm : PdWriteResXMMPair<WriteFMul64X, [PdFPU1, PdFPFMA], 5>; 817defm : PdWriteResYMMPair<WriteFMul64Y, [PdFPU1, PdFPFMA], 5, [1, 2]>; 818defm : X86WriteResPairUnsupported<WriteFMul64Z>; 819 820defm : PdWriteResXMMPair<WriteFMA, [PdFPU, PdFPFMA], 5, [1, 3]>; 821defm : PdWriteResXMMPair<WriteFMAX, [PdFPU, PdFPFMA], 5, [1, 3]>; 822defm : PdWriteResYMMPair<WriteFMAY, [PdFPU, PdFPFMA], 5, [1, 3]>; 823defm : X86WriteResPairUnsupported<WriteFMAZ>; 824 825defm : PdWriteResXMMPair<WriteDPPD, [PdFPU1, PdFPFMA], 15, [1, 10], 15, 2>; 826 827defm : PdWriteResXMMPair<WriteDPPS, [PdFPU1, PdFPFMA], 25, [1, 14], 16, 2>; 828defm : PdWriteResYMMPair<WriteDPPSY, [PdFPU1, PdFPFMA], 27, [2, 25], /*or 29*/ 25, 4>; 829 830def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> { 831 let Latency = 27; 832 let ReleaseAtCycles = [1, 14]; 833 let NumMicroOps = 17; 834} 835def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>; 836 837defm : PdWriteResXMMPair<WriteFRcp, [PdFPU1, PdFPFMA], 5>; 838defm : PdWriteResXMMPair<WriteFRcpX, [PdFPU1, PdFPFMA], 5>; 839defm : PdWriteResYMMPair<WriteFRcpY, [PdFPU1, PdFPFMA], 5, [2, 1]>; 840defm : X86WriteResPairUnsupported<WriteFRcpZ>; 841 842defm : PdWriteResXMMPair<WriteFRsqrt, [PdFPU1, PdFPFMA], 5, [1, 2]>; 843defm : PdWriteResXMMPair<WriteFRsqrtX, [PdFPU1, PdFPFMA], 5>; 844defm : PdWriteResYMMPair<WriteFRsqrtY, [PdFPU1, PdFPFMA], 5, [2, 2]>; 845defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; 846 847defm : PdWriteResXMMPair<WriteFDiv, [PdFPU1, PdFPFMA], 9, [1, 9]>; 848defm : PdWriteResXMMPair<WriteFDivX, [PdFPU1, PdFPFMA], 9, [1, 9]>; 849defm : PdWriteResYMMPair<WriteFDivY, [PdFPU1, PdFPFMA], 9, [2, 18]>; 850defm : X86WriteResPairUnsupported<WriteFDivZ>; 851 852def PdWriteX87Div: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { 853 let Latency = 9; 854 let ReleaseAtCycles = [3, 1, 18]; 855} 856def : InstRW<[PdWriteX87Div], (instrs DIV_FI16m, DIV_FI32m, 857 DIVR_FI16m, DIVR_FI32m, 858 DIV_F32m, DIV_F64m, 859 DIVR_F32m, DIVR_F64m)>; 860 861defm : PdWriteResXMMPair<WriteFDiv64, [PdFPU1, PdFPFMA], 9, [1, 9]>; 862defm : PdWriteResXMMPair<WriteFDiv64X, [PdFPU1, PdFPFMA], 9, [1, 9]>; 863defm : PdWriteResYMMPair<WriteFDiv64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>; 864defm : X86WriteResPairUnsupported<WriteFDiv64Z>; 865 866defm : PdWriteResXMMPair<WriteFSqrt, [PdFPU1, PdFPFMA], 9, [1, 9]>; 867defm : PdWriteResXMMPair<WriteFSqrtX, [PdFPU1, PdFPFMA], 9, [1, 9]>; 868defm : PdWriteResYMMPair<WriteFSqrtY, [PdFPU1, PdFPFMA], 9, [2, 18]>; 869defm : X86WriteResPairUnsupported<WriteFSqrtZ>; 870 871defm : PdWriteResXMMPair<WriteFSqrt64, [PdFPU1, PdFPFMA], 9, [1, 9]>; 872defm : PdWriteResXMMPair<WriteFSqrt64X, [PdFPU1, PdFPFMA], 9, [1, 9]>; 873defm : PdWriteResYMMPair<WriteFSqrt64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>; 874defm : X86WriteResPairUnsupported<WriteFSqrt64Z>; 875 876defm : PdWriteResXMMPair<WriteFSqrt80, [PdFPU1, PdFPFMA], 1, [1, 18]>; 877defm : PdWriteResXMMPair<WriteFSign, [PdFPU1, PdFPFMA], 1, [1, 4]>; 878 879defm : PdWriteResXMMPair<WriteFRnd, [PdFPU1, PdFPSTO], 4, []>; 880defm : PdWriteResYMMPair<WriteFRndY, [PdFPU1, PdFPSTO], 4, [2, 1], 2>; 881defm : X86WriteResPairUnsupported<WriteFRndZ>; 882 883def PdWriteVFRCZP : SchedWriteRes<[PdFPU1, PdFPSTO]> { 884 let Latency = 10; 885 let ReleaseAtCycles = [2, 1]; 886 let NumMicroOps = 2; 887} 888def : InstRW<[PdWriteVFRCZP], (instrs VFRCZPDrr, VFRCZPSrr)>; 889 890def PdWriteVFRCZS : SchedWriteRes<[PdFPU1, PdFPSTO]> { 891 let Latency = 10; 892 let ReleaseAtCycles = [10, 1]; 893 let NumMicroOps = 2; 894} 895def : InstRW<[PdWriteVFRCZS], (instrs VFRCZSDrr, VFRCZSSrr)>; 896 897def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> { 898 let Latency = 15; 899 let ReleaseAtCycles = [2, 1]; 900 let NumMicroOps = 3; 901} 902def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm, 903 VFRCZSDrm, VFRCZSSrm)>; 904 905def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> { 906 let Latency = 10; 907 let ReleaseAtCycles = [3, 1]; 908 let NumMicroOps = 4; 909} 910def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>; 911 912def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> { 913 let Latency = 15; 914 let ReleaseAtCycles = [4, 1]; 915 let NumMicroOps = 8; 916} 917def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>; 918 919defm : PdWriteResXMMPair<WriteFLogic, [PdFPU23, PdFPMAL], 2>; 920defm : PdWriteResYMMPair<WriteFLogicY, [PdFPU23, PdFPMAL], 2, [2, 2]>; 921defm : X86WriteResPairUnsupported<WriteFLogicZ>; 922 923defm : PdWriteResXMMPair<WriteFTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 924defm : PdWriteResYMMPair<WriteFTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [4, 4, 1], 4, 2>; 925defm : X86WriteResPairUnsupported<WriteFTestZ>; 926 927defm : PdWriteResXMMPair<WriteFShuffle, [PdFPU01, PdFPFMA], 2, [1, 2]>; 928defm : PdWriteResYMMPair<WriteFShuffleY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>; 929defm : X86WriteResPairUnsupported<WriteFShuffleZ>; 930 931def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> { 932 let Latency = 7; 933 let ReleaseAtCycles = [1, 3]; 934 let NumMicroOps = 2; 935} 936def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128rm)>; 937 938defm : PdWriteResXMMPair<WriteFVarShuffle, [PdFPU1, PdFPXBR], 3>; 939defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU1, PdFPXBR], 3, [2, 2], 2>; 940defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; 941 942defm : PdWriteResXMMPair<WriteFBlend, [PdFPU23, PdFPMAL], 2>; 943defm : PdWriteResYMMPair<WriteFBlendY, [PdFPU23, PdFPMAL], 2, [2, 2], 2>; 944defm : X86WriteResPairUnsupported<WriteFBlendZ>; 945 946defm : PdWriteResXMMPair<WriteFVarBlend, [PdFPU1, PdFPXBR], 2>; 947defm : PdWriteResYMMPair<WriteFVarBlendY, [PdFPU1, PdFPXBR], 2, [2, 2], 2>; 948defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; 949 950defm : PdWriteResXMMPair<WriteFShuffle256, [PdFPU01, PdFPFMA], 2, [1, 3], 2>; 951defm : X86WriteResPairUnsupported<WriteFVarShuffle256>; 952 953def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 954 let Latency = 2; 955 let ReleaseAtCycles = [1, 2]; 956} 957def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>; 958 959def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 960 let Latency = 7; 961 let ReleaseAtCycles = [1, 4]; 962 let NumMicroOps = 2; 963} 964def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>; 965 966def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 967 let Latency = 4; 968 let ReleaseAtCycles = [1, 6]; 969 let NumMicroOps = 8; 970} 971def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>; 972 973def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> { 974 let Latency = 8; // 4 + 4 975 let ReleaseAtCycles = [1, 8]; 976 let NumMicroOps = 10; 977} 978def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>; 979 980//////////////////////////////////////////////////////////////////////////////// 981// Conversions. 982//////////////////////////////////////////////////////////////////////////////// 983 984defm : PdWriteResXMMPair<WriteCvtSS2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>; 985 986defm : PdWriteResXMMPair<WriteCvtPS2I, [PdFPU0, PdFPCVT, PdFPSTO], 4>; 987defm : PdWriteResYMMPair<WriteCvtPS2IY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 988defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; 989 990defm : PdWriteResXMMPair<WriteCvtSD2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>; 991 992defm : PdWriteResXMMPair<WriteCvtPD2I, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 993defm : PdWriteResYMMPair<WriteCvtPD2IY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 994defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; 995 996def PdWriteMMX_CVTTPD2PIrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 997 let Latency = 6; 998 let NumMicroOps = 2; 999} 1000def : InstRW<[PdWriteMMX_CVTTPD2PIrr], (instrs MMX_CVTTPD2PIrr)>; 1001 1002// FIXME: f+3 ST, LD+STC latency 1003defm : PdWriteResXMMPair<WriteCvtI2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>; 1004// FIXME: .Folded version is one NumMicroOp *less*.. 1005 1006defm : PdWriteResXMMPair<WriteCvtI2PS, [PdFPU0, PdFPCVT, PdFPSTO], 4>; 1007defm : PdWriteResYMMPair<WriteCvtI2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1008defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; 1009 1010defm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>; 1011// FIXME: .Folded version is one NumMicroOp *less*.. 1012 1013def PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1014 let Latency = 13; 1015 let ReleaseAtCycles = [1, 3, 1]; 1016 let NumMicroOps = 2; 1017} 1018def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>; 1019 1020defm : PdWriteResXMMPair<WriteCvtI2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1021defm : PdWriteResYMMPair<WriteCvtI2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>; 1022defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; 1023 1024defm : PdWriteResXMMPair<WriteCvtSS2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1025 1026defm : PdWriteResXMMPair<WriteCvtPS2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1027defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>; 1028defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; 1029 1030defm : PdWriteResXMMPair<WriteCvtSD2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1031 1032defm : PdWriteResXMMPair<WriteCvtPD2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1033defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 1034defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; 1035 1036def PdWriteMMX_CVTPD2PIrrMMX_CVTPI2PDrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1037 let Latency = 6; 1038 let NumMicroOps = 2; 1039} 1040def : InstRW<[PdWriteMMX_CVTPD2PIrrMMX_CVTPI2PDrr], (instrs MMX_CVTPD2PIrr, 1041 MMX_CVTPI2PDrr)>; 1042 1043def PdWriteMMX_CVTPI2PSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1044 let Latency = 4; 1045 let NumMicroOps = 2; 1046} 1047def : InstRW<[PdWriteMMX_CVTPI2PSrr], (instrs MMX_CVTPI2PSrr)>; 1048 1049defm : PdWriteResXMMPair<WriteCvtPH2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2, 1>; 1050defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 3>; 1051defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>; 1052 1053defm : PdWriteRes<WriteCvtPS2PH, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2>; 1054defm : PdWriteRes<WriteCvtPS2PHY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 1055defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; 1056 1057defm : PdWriteRes<WriteCvtPS2PHSt, [PdFPU0, PdFPCVT, PdFPSTO, PdStore], 4, [1, 2, 1, 1], 3>; 1058defm : PdWriteRes<WriteCvtPS2PHYSt, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdStore], 4, [1, 2, 1, 1, 1], 4>; 1059defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; 1060 1061//////////////////////////////////////////////////////////////////////////////// 1062// Vector integer operations. 1063//////////////////////////////////////////////////////////////////////////////// 1064 1065defm : PdWriteRes<WriteVecLoad, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>; 1066defm : PdWriteRes<WriteVecLoadX, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>; 1067defm : PdWriteRes<WriteVecLoadY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 3], 2>; 1068 1069defm : PdWriteRes<WriteVecLoadNT, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 4]>; 1070defm : PdWriteRes<WriteVecLoadNTY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 4]>; 1071 1072defm : PdWriteRes<WriteVecMaskedLoad, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 1, 2]>; 1073defm : PdWriteRes<WriteVecMaskedLoadY, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 2, 4], 2>; 1074 1075defm : PdWriteRes<WriteVecStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>; 1076defm : PdWriteRes<WriteVecStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>; 1077defm : PdWriteRes<WriteVecStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [2, 36, 2], 4>; 1078 1079def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { 1080 let NumMicroOps = 8; 1081} 1082def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>; 1083 1084defm : PdWriteRes<WriteVecStoreNT, [PdStore, PdFPU1, PdFPSTO], 2>; 1085defm : PdWriteRes<WriteVecStoreNTY, [PdStore, PdFPU1, PdFPSTO], 2, [2, 2, 2], 4>; 1086 1087defm : X86WriteResUnsupported<WriteVecMaskedStore32>; 1088defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>; 1089defm : X86WriteResUnsupported<WriteVecMaskedStore64>; 1090defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>; 1091 1092defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>; 1093defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>; 1094defm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>; 1095defm : X86WriteResUnsupported<WriteVecMoveZ>; 1096 1097def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1098} 1099def : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>; 1100 1101def PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1102 let Latency = 4; 1103} 1104def : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>; 1105 1106defm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 11>; 1107defm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 11, [1, 2], 2>; 1108 1109defm : PdWriteResXMMPair<WriteVecALU, [PdFPU23, PdFPMAL], 2>; 1110defm : PdWriteResXMMPair<WriteVecALUX, [PdFPU23, PdFPMAL], 2>; 1111defm : X86WriteResPairUnsupported<WriteVecALUY>; 1112defm : X86WriteResPairUnsupported<WriteVecALUZ>; 1113 1114defm : PdWriteResXMMPair<WriteVecShift, [PdFPU1, PdFPXBR], 3>; 1115defm : PdWriteResXMMPair<WriteVecShiftX, [PdFPU1, PdFPXBR], 3>; 1116defm : X86WriteResPairUnsupported<WriteVecShiftY>; 1117defm : X86WriteResPairUnsupported<WriteVecShiftZ>; 1118 1119defm : PdWriteResXMMPair<WriteVecShiftImm, [PdFPU1, PdFPXBR], 2>; 1120defm : PdWriteResXMMPair<WriteVecShiftImmX, [PdFPU1, PdFPXBR], 2>; 1121defm : X86WriteResPairUnsupported<WriteVecShiftImmY>; 1122defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; 1123 1124defm : PdWriteResXMMPair<WriteVecIMul, [PdFPU0, PdFPMMA], 4>; 1125defm : PdWriteResXMMPair<WriteVecIMulX, [PdFPU0, PdFPMMA], 4>; 1126defm : X86WriteResPairUnsupported<WriteVecIMulY>; 1127defm : X86WriteResPairUnsupported<WriteVecIMulZ>; 1128 1129defm : PdWriteResXMMPair<WritePMULLD, [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL], 5, [2, 1, 2, 1]>; 1130defm : X86WriteResPairUnsupported<WritePMULLDY>; 1131defm : X86WriteResPairUnsupported<WritePMULLDZ>; 1132 1133def PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPMMA, PdFPMAL]> { 1134 let Latency = 4; 1135} 1136def : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr, 1137 VPMACSSDQLrr)>; 1138 1139// FIXME: Investigate RR vs RM differences. 1140defm : PdWriteRes<WriteMPSAD, [PdFPU0, PdFPMMA], 8, [1, 4], 8>; 1141defm : PdWriteRes<WriteMPSADLd, [PdFPU0, PdFPMMA, PdLoad], 14, [1, 4, 3], 8>; 1142defm : X86WriteResPairUnsupported<WriteMPSADY>; 1143defm : X86WriteResPairUnsupported<WriteMPSADZ>; 1144 1145defm : PdWriteResXMMPair<WritePSADBW, [PdFPU01, PdFPMAL], 4, [1, 2], 2>; 1146defm : PdWriteResXMMPair<WritePSADBWX, [PdFPU01, PdFPMAL], 4, [1, 2], 2>; 1147defm : X86WriteResPairUnsupported<WritePSADBWY>; 1148defm : X86WriteResPairUnsupported<WritePSADBWZ>; 1149 1150defm : PdWriteResXMMPair<WritePHMINPOS, [PdFPU0, PdFPMAL], 4, [], 2>; 1151 1152defm : PdWriteResXMMPair<WriteShuffle, [PdFPU1, PdFPXBR], 2>; 1153defm : PdWriteResXMMPair<WriteShuffleX, [PdFPU1, PdFPXBR], 2>; 1154defm : PdWriteResYMMPair<WriteShuffleY, [PdFPU1, PdFPXBR], 2, [2, 2]>; 1155defm : X86WriteResPairUnsupported<WriteShuffleZ>; 1156 1157defm : PdWriteResXMMPair<WriteVarShuffle, [PdFPU1, PdFPXBR], 3>; 1158defm : PdWriteResXMMPair<WriteVarShuffleX, [PdFPU1, PdFPXBR], 3>; 1159defm : X86WriteResPairUnsupported<WriteVarShuffleY>; 1160defm : X86WriteResPairUnsupported<WriteVarShuffleZ>; 1161 1162def PdWriteVPPERM : SchedWriteRes<[PdFPU1, PdFPXBR]> { 1163 let Latency = 2; 1164 let ReleaseAtCycles = [1, 1]; 1165} 1166def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>; 1167 1168def PdWriteVPPERMLd : SchedWriteRes<[PdFPU1, PdFPXBR, PdLoad]> { 1169 let Latency = 7; 1170 let ReleaseAtCycles = [1, 1, 3]; 1171} 1172def : InstRW<[PdWriteVPPERMLd], (instrs VPPERMrrm, VPPERMrmr)>; 1173 1174defm : PdWriteResXMMPair<WriteBlend, [PdFPU23, PdFPMAL], 2>; 1175defm : X86WriteResPairUnsupported<WriteBlendY>; 1176defm : X86WriteResPairUnsupported<WriteBlendZ>; 1177 1178defm : PdWriteResXMMPair<WriteVarBlend, [PdFPU1, PdFPXBR], 2>; 1179defm : X86WriteResPairUnsupported<WriteVarBlendY>; 1180defm : X86WriteResPairUnsupported<WriteVarBlendZ>; 1181 1182defm : PdWriteResXMMPair<WriteVecLogic, [PdFPU23, PdFPMAL], 2>; 1183defm : PdWriteResXMMPair<WriteVecLogicX, [PdFPU23, PdFPMAL], 2>; 1184defm : X86WriteResPairUnsupported<WriteVecLogicY>; 1185defm : X86WriteResPairUnsupported<WriteVecLogicZ>; 1186 1187defm : PdWriteResXMMPair<WriteVecTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 1188defm : PdWriteResYMMPair<WriteVecTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 4, 1], 4, 2>; 1189defm : X86WriteResPairUnsupported<WriteVecTestZ>; 1190 1191defm : PdWriteResXMMPair<WriteShuffle256, [PdFPU01, PdFPMAL]>; 1192defm : PdWriteResXMMPair<WriteVPMOV256, [PdFPU01, PdFPMAL]>; 1193defm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>; 1194 1195defm : PdWriteResXMMPair<WriteVarVecShift, [PdFPU1, PdFPXBR], 3>; 1196defm : X86WriteResPairUnsupported<WriteVarVecShiftY>; 1197defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; 1198 1199//////////////////////////////////////////////////////////////////////////////// 1200// Vector insert/extract operations. 1201//////////////////////////////////////////////////////////////////////////////// 1202 1203defm : PdWriteRes<WriteVecInsert, [PdFPU01, PdFPMAL], 2, [1, 3], 2>; 1204defm : PdWriteRes<WriteVecInsertLd, [PdFPU01, PdFPMAL, PdLoad], 6, [1, 4, 3], 2>; 1205 1206defm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 12, [1, 3, 1], 2>; 1207defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [2, 1, 1], 2>; 1208 1209def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1210 let Latency = 3; 1211 let ReleaseAtCycles = [1, 3]; 1212} 1213def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>; 1214 1215//////////////////////////////////////////////////////////////////////////////// 1216// SSE42 String instructions. 1217//////////////////////////////////////////////////////////////////////////////// 1218 1219defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 11, [1, 6, 1], 7, 1>; 1220defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 7, [1, 8, 1], 7, 2>; 1221 1222defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 14, [1, 10, 10, 10, 1, 1], 27, 1>; 1223defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 10, 10, 10, 1, 1], 27, 1>; 1224 1225//////////////////////////////////////////////////////////////////////////////// 1226// MOVMSK Instructions. 1227//////////////////////////////////////////////////////////////////////////////// 1228 1229defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>; 1230 1231defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>; 1232defm : X86WriteResUnsupported<WriteVecMOVMSKY>; 1233// defm : X86WriteResUnsupported<WriteVecMOVMSKZ>; 1234 1235defm : PdWriteRes<WriteMMXMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>; 1236 1237//////////////////////////////////////////////////////////////////////////////// 1238// AES Instructions. 1239//////////////////////////////////////////////////////////////////////////////// 1240 1241defm : PdWriteResXMMPair<WriteAESIMC, [PdFPU0, PdFPMMA], 5>; 1242defm : PdWriteResXMMPair<WriteAESKeyGen, [PdFPU0, PdFPMMA], 5>; 1243defm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>; 1244 1245//////////////////////////////////////////////////////////////////////////////// 1246// Horizontal add/sub instructions. 1247//////////////////////////////////////////////////////////////////////////////// 1248 1249defm : PdWriteResXMMPair<WriteFHAdd, [PdFPU0, PdFPFMA], 11, [1, 5], 3, 1>; 1250defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [1, 8], 8, 2>; 1251defm : X86WriteResPairUnsupported<WriteFHAddZ>; 1252 1253defm : PdWriteResXMMPair<WritePHAdd, [PdFPU01, PdFPMAL], 5, [1, 4], 3, 1>; 1254defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1255defm : X86WriteResPairUnsupported<WritePHAddY>; 1256defm : X86WriteResPairUnsupported<WritePHAddZ>; 1257 1258def : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr, 1259 PHADDWrr, PHSUBWrr, 1260 PHADDSWrr, PHSUBSWrr, 1261 VPHADDDrr, VPHSUBDrr, 1262 VPHADDWrr, VPHSUBWrr, 1263 VPHADDSWrr, VPHSUBSWrr)>; 1264 1265def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm, 1266 PHADDWrm, PHSUBWrm, 1267 PHADDSWrm, PHSUBSWrm, 1268 VPHADDDrm, VPHSUBDrm, 1269 VPHADDWrm, VPHSUBWrm, 1270 VPHADDSWrm, VPHSUBSWrm)>; 1271 1272//////////////////////////////////////////////////////////////////////////////// 1273// Carry-less multiplication instructions. 1274//////////////////////////////////////////////////////////////////////////////// 1275 1276defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [1, 7], 5, 1>; 1277 1278def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> { 1279 let Latency = 12; 1280 let ReleaseAtCycles = [1, 7]; 1281 let NumMicroOps = 6; 1282} 1283def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>; 1284 1285//////////////////////////////////////////////////////////////////////////////// 1286// SSE4A instructions. 1287//////////////////////////////////////////////////////////////////////////////// 1288 1289def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1290 let Latency = 3; 1291 let ReleaseAtCycles = [1, 2]; 1292} 1293def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ)>; 1294 1295def PdWriteINSERTQI : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1296 let Latency = 3; 1297 let ReleaseAtCycles = [1, 3]; 1298} 1299def : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>; 1300 1301//////////////////////////////////////////////////////////////////////////////// 1302// AVX instructions. 1303//////////////////////////////////////////////////////////////////////////////// 1304 1305def PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> { 1306 let Latency = 6; 1307 let ReleaseAtCycles = [1, 2, 4]; 1308 let NumMicroOps = 2; 1309} 1310def : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, 1311 VBROADCASTSSYrm)>; 1312 1313def PdWriteVZEROALL : SchedWriteRes<[]> { 1314 let Latency = 90; 1315 let NumMicroOps = 32; 1316} 1317def : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>; 1318 1319def PdWriteVZEROUPPER : SchedWriteRes<[]> { 1320 let Latency = 46; 1321 let NumMicroOps = 16; 1322} 1323def : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>; 1324 1325/////////////////////////////////////////////////////////////////////////////// 1326// SchedWriteVariant definitions. 1327/////////////////////////////////////////////////////////////////////////////// 1328 1329def PdWriteZeroLatency : SchedWriteRes<[]> { 1330 let Latency = 0; 1331} 1332 1333def PdWriteZeroIdiom : SchedWriteVariant<[ 1334 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1335 SchedVar<MCSchedPredicate<TruePred>, [WriteALU]> 1336]>; 1337def : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr, 1338 XOR32rr, XOR64rr)>; 1339 1340def PdWriteFZeroIdiom : SchedWriteVariant<[ 1341 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1342 SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]> 1343]>; 1344def : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, 1345 XORPDrr, VXORPDrr, 1346 ANDNPSrr, VANDNPSrr, 1347 ANDNPDrr, VANDNPDrr)>; 1348 1349// VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1. 1350 1351def PdWriteVZeroIdiomLogic : SchedWriteVariant<[ 1352 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1353 SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]> 1354]>; 1355def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORrr, MMX_PANDNrr)>; 1356 1357def PdWriteVZeroIdiomLogicX : SchedWriteVariant<[ 1358 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1359 SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]> 1360]>; 1361def : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, 1362 PANDNrr, VPANDNrr)>; 1363 1364def PdWriteVZeroIdiomALU : SchedWriteVariant<[ 1365 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1366 SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]> 1367]>; 1368def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBrr, MMX_PSUBDrr, 1369 MMX_PSUBQrr, MMX_PSUBWrr, 1370 MMX_PCMPGTBrr, 1371 MMX_PCMPGTDrr, 1372 MMX_PCMPGTWrr)>; 1373 1374def PdWriteVZeroIdiomALUX : SchedWriteVariant<[ 1375 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1376 SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]> 1377]>; 1378def : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, 1379 PSUBDrr, VPSUBDrr, 1380 PSUBQrr, VPSUBQrr, 1381 PSUBWrr, VPSUBWrr, 1382 PCMPGTBrr, VPCMPGTBrr, 1383 PCMPGTDrr, VPCMPGTDrr, 1384 PCMPGTWrr, VPCMPGTWrr)>; 1385 1386/////////////////////////////////////////////////////////////////////////////// 1387// Dependency breaking instructions. 1388/////////////////////////////////////////////////////////////////////////////// 1389 1390// VPCMPGTQ, but not PCMPGTQ! 1391 1392def : IsZeroIdiomFunction<[ 1393 // GPR Zero-idioms. 1394 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, 1395 1396 // MMX Zero-idioms. 1397 DepBreakingClass<[ 1398 MMX_PXORrr, MMX_PANDNrr, MMX_PSUBBrr, 1399 MMX_PSUBDrr, MMX_PSUBQrr, MMX_PSUBWrr, 1400 MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr, 1401 MMX_PCMPGTBrr, MMX_PCMPGTDrr, MMX_PCMPGTWrr 1402 ], ZeroIdiomPredicate>, 1403 1404 // SSE Zero-idioms. 1405 DepBreakingClass<[ 1406 // fp variants. 1407 XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr, 1408 1409 // int variants. 1410 PXORrr, PANDNrr, 1411 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 1412 PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, 1413 PCMPGTBrr, PCMPGTDrr, PCMPGTWrr 1414 ], ZeroIdiomPredicate>, 1415 1416 // AVX Zero-idioms. 1417 DepBreakingClass<[ 1418 // xmm fp variants. 1419 VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr, 1420 1421 // xmm int variants. 1422 VPXORrr, VPANDNrr, 1423 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 1424 VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr, 1425 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 1426 1427 // ymm variants. 1428 VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr 1429 ], ZeroIdiomPredicate> 1430]>; 1431 1432def : IsDepBreakingFunction<[ 1433 // GPR 1434 DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>, 1435 DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >, 1436 1437 // MMX 1438 DepBreakingClass<[ 1439 MMX_PCMPEQBrr, MMX_PCMPEQDrr, MMX_PCMPEQWrr 1440 ], ZeroIdiomPredicate>, 1441 1442 // SSE 1443 DepBreakingClass<[ 1444 PCMPEQBrr, PCMPEQWrr, PCMPEQDrr 1445 // But not PCMPEQQrr. 1446 ], ZeroIdiomPredicate>, 1447 1448 // AVX 1449 DepBreakingClass<[ 1450 VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr 1451 // But not VPCMPEQQrr. 1452 ], ZeroIdiomPredicate> 1453]>; 1454 1455 1456} // SchedModel 1457