1//=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for AMD bdver2 (Piledriver) to support 10// instruction scheduling and other instruction cost heuristics. 11// Based on: 12// * AMD Software Optimization Guide for AMD Family 15h Processors. 13// https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf 14// * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog 15// http://www.agner.org/optimize/microarchitecture.pdf 16// * https://www.realworldtech.com/bulldozer/ 17// Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2. 18// 19//===----------------------------------------------------------------------===// 20 21def BdVer2Model : SchedMachineModel { 22 let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired. 23 let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed. 24 let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer. 25 let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency. 26 let HighLatency = 25; // FIXME: any better choice? 27 let MispredictPenalty = 20; // Minimum branch misdirection penalty. 28 29 let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass. 30 31 // FIXME: Incomplete. This flag is set to allow the scheduler to assign 32 // a default model to unrecognized opcodes. 33 let CompleteModel = 0; 34} // SchedMachineModel 35 36let SchedModel = BdVer2Model in { 37 38 39//===----------------------------------------------------------------------===// 40// Pipes 41//===----------------------------------------------------------------------===// 42 43// There are total of eight pipes. 44 45//===----------------------------------------------------------------------===// 46// Integer execution pipes 47// 48 49// Two EX (ALU) pipes. 50def PdEX0 : ProcResource<1>; // ALU, Integer Pipe0 51def PdEX1 : ProcResource<1>; // ALU, Integer Pipe1 52def PdEX01 : ProcResGroup<[PdEX0, PdEX1]>; 53 54// Two AGLU pipes, identical. 55def PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23] 56 57//===----------------------------------------------------------------------===// 58// Floating point execution pipes 59// 60 61// Four FPU pipes. 62 63def PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0 64def PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1 65def PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2 66def PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3 67 68// FPU grouping 69def PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>; 70def PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>; 71 72 73//===----------------------------------------------------------------------===// 74// RCU 75//===----------------------------------------------------------------------===// 76 77// The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle. 78// On the other hand, the RCU reorder buffer size for Piledriver does not 79// seem be specified in any trustworthy source. 80// But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had 81// RCU reorder buffer size of 128. So that is a good guess for now. 82def PdRCU : RetireControlUnit<128, 4>; 83 84 85//===----------------------------------------------------------------------===// 86// Pipelines 87//===----------------------------------------------------------------------===// 88 89// There are total of two pipelines, each one with it's own scheduler. 90 91//===----------------------------------------------------------------------===// 92// Integer Pipeline Scheduling 93// 94 95// There is one Integer Scheduler per core. 96 97// Integer physical register file has 96 registers of 64-bit. 98def PdIntegerPRF : RegisterFile<96, [GR64, CCR]>; 99 100// Unified Integer, Memory Scheduler has 40 entries. 101def PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> { 102 // Up to 4 IPC can be decoded, issued, retired. 103 let BufferSize = 40; 104} 105 106 107//===----------------------------------------------------------------------===// 108// FPU Pipeline Scheduling 109// 110 111// The FPU unit is shared between the two cores. 112 113// FP physical register file has 160 registers of 128-bit. 114// Operations on 256-bit data types are cracked into two COPs. 115def PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>; 116 117// Unified FP Scheduler has 64 entries, 118def PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> { 119 // Up to 4 IPC can be decoded, issued, retired. 120 let BufferSize = 64; 121} 122 123 124//===----------------------------------------------------------------------===// 125// Functional units 126//===----------------------------------------------------------------------===// 127 128//===----------------------------------------------------------------------===// 129// Load-Store Units 130// 131 132let Super = PdAGLU01 in 133def PdLoad : ProcResource<2> { 134 // For Piledriver, the load queue is 40 entries deep. 135 let BufferSize = 40; 136} 137 138def PdLoadQueue : LoadQueue<PdLoad>; 139 140let Super = PdAGLU01 in 141def PdStore : ProcResource<1> { 142 // For Piledriver, the store queue is 24 entries deep. 143 let BufferSize = 24; 144} 145 146def PdStoreQueue : StoreQueue<PdStore>; 147 148//===----------------------------------------------------------------------===// 149// Integer Execution Units 150// 151 152def PdDiv : ProcResource<1>; // PdEX0; unpipelined integer division 153def PdCount : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT 154 155def PdMul : ProcResource<1>; // PdEX1; integer multiplication 156def PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches 157 158//===----------------------------------------------------------------------===// 159// Floating-Point Units 160// 161 162// Two FMAC/FPFMA units. 163def PdFPFMA : ProcResource<2>; // PdFPU0, PdFPU1 164 165// One 128-bit integer multiply-accumulate unit. 166def PdFPMMA : ProcResource<1>; // PdFPU0 167 168// One fp conversion unit. 169def PdFPCVT : ProcResource<1>; // PdFPU0 170 171// One unit for shuffles, packs, permutes, shifts. 172def PdFPXBR : ProcResource<1>; // PdFPU1 173 174// Two 128-bit packed integer units. 175def PdFPMAL : ProcResource<2>; // PdFPU2, PdFPU3 176 177// One FP store unit. 178def PdFPSTO : ProcResource<1>; // PdFPU3 179 180 181//===----------------------------------------------------------------------===// 182// Basic helper classes. 183//===----------------------------------------------------------------------===// 184 185// Many SchedWrites are defined in pairs with and without a folded load. 186// Instructions with folded loads are usually micro-fused, so they only appear 187// as two micro-ops when dispatched by the schedulers. 188// This multiclass defines the resource usage for variants with and without 189// folded loads. 190multiclass PdWriteRes<SchedWrite SchedRW, 191 list<ProcResourceKind> ExePorts, int Lat = 1, 192 list<int> Res = [], int UOps = 1> { 193 def : WriteRes<SchedRW, ExePorts> { 194 let Latency = Lat; 195 let ResourceCycles = Res; 196 let NumMicroOps = UOps; 197 } 198} 199 200multiclass __pdWriteResPair<X86FoldableSchedWrite SchedRW, 201 list<ProcResourceKind> ExePorts, int Lat, 202 list<int> Res, int UOps, 203 int LoadLat, int LoadRes, int LoadUOps> { 204 defm : PdWriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 205 206 defm : PdWriteRes<SchedRW.Folded, 207 !listconcat([PdLoad], ExePorts), 208 !add(Lat, LoadLat), 209 !if(!and(!empty(Res), !eq(LoadRes, 1)), 210 [], 211 !listconcat([LoadRes], 212 !if(!empty(Res), 213 !listsplat(1, !size(ExePorts)), 214 Res))), 215 !add(UOps, LoadUOps)>; 216} 217 218multiclass PdWriteResExPair<X86FoldableSchedWrite SchedRW, 219 list<ProcResourceKind> ExePorts, int Lat = 1, 220 list<int> Res = [], int UOps = 1, 221 int LoadUOps = 0> { 222 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 223 /*LoadLat*/4, /*LoadRes*/3, LoadUOps>; 224} 225 226multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW, 227 list<ProcResourceKind> ExePorts, int Lat = 1, 228 list<int> Res = [], int UOps = 1, 229 int LoadUOps = 0> { 230 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 231 /*LoadLat*/5, /*LoadRes*/3, LoadUOps>; 232} 233 234multiclass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW, 235 list<ProcResourceKind> ExePorts, int Lat, 236 list<int> Res = [], int UOps = 2, 237 int LoadUOps = 0> { 238 defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 239 /*LoadLat*/5, /*LoadRes*/3, LoadUOps>; 240} 241 242//===----------------------------------------------------------------------===// 243// Here be dragons. 244//===----------------------------------------------------------------------===// 245 246// L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers 247// needn't be available until 4 cycles after the memory operand. 248def : ReadAdvance<ReadAfterLd, 4>; 249 250// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available 251// until 5 cycles after the memory operand. 252def : ReadAdvance<ReadAfterVecLd, 5>; 253def : ReadAdvance<ReadAfterVecXLd, 5>; 254def : ReadAdvance<ReadAfterVecYLd, 5>; 255 256// Transfer from int domain to ivec domain incurs additional latency of 8..10cy 257// Reference: Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller 258// and Excavator pipeline", "Data delay between different execution domains" 259def : ReadAdvance<ReadInt2Fpu, -10>; 260 261// A folded store needs a cycle on the PdStore for the store data. 262def : WriteRes<WriteRMW, [PdStore]>; 263 264//////////////////////////////////////////////////////////////////////////////// 265// Loads, stores, and moves, not folded with other operations. 266//////////////////////////////////////////////////////////////////////////////// 267 268def : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; let ResourceCycles = [2]; } 269def : WriteRes<WriteStore, [PdStore]>; 270def : WriteRes<WriteStoreNT, [PdStore]>; 271def : WriteRes<WriteMove, [PdEX01]> { let ResourceCycles = [2]; } 272 273// Load/store MXCSR. 274// FIXME: These are copy and pasted from WriteLoad/Store. 275def : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; } 276def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; let ResourceCycles = [18]; } 277 278// Treat misc copies as a move. 279def : InstRW<[WriteMove], (instrs COPY)>; 280 281//////////////////////////////////////////////////////////////////////////////// 282// Idioms that clear a register, like xorps %xmm0, %xmm0. 283// These can often bypass execution ports completely. 284//////////////////////////////////////////////////////////////////////////////// 285 286def : WriteRes<WriteZero, [/*No ExePorts*/]>; 287 288//////////////////////////////////////////////////////////////////////////////// 289// Branches don't produce values, so they have no latency, but they still 290// consume resources. Indirect branches can fold loads. 291//////////////////////////////////////////////////////////////////////////////// 292 293defm : PdWriteResExPair<WriteJump, [PdEX1, PdBranch]>; 294 295//////////////////////////////////////////////////////////////////////////////// 296// Special case scheduling classes. 297//////////////////////////////////////////////////////////////////////////////// 298 299def : WriteRes<WriteSystem, [PdEX01]> { let Latency = 100; } 300def : WriteRes<WriteMicrocoded, [PdEX01]> { let Latency = 100; } 301def : WriteRes<WriteFence, [PdStore]>; 302 303def PdWriteXLAT : SchedWriteRes<[PdEX01]> { 304 let Latency = 6; 305} 306def : InstRW<[PdWriteXLAT], (instrs XLAT)>; 307 308def PdWriteLARrr : SchedWriteRes<[PdEX01]> { 309 let Latency = 184; 310 let ResourceCycles = [375]; 311 let NumMicroOps = 45; 312} 313def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr", 314 "LSL(16|32|64)rr")>; 315 316// Nops don't have dependencies, so there's no actual latency, but we set this 317// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. 318def : WriteRes<WriteNop, [PdEX01]> { let ResourceCycles = [2]; } 319 320//////////////////////////////////////////////////////////////////////////////// 321// Arithmetic. 322//////////////////////////////////////////////////////////////////////////////// 323 324defm : PdWriteResExPair<WriteALU, [PdEX01], 1, [2]>; 325 326def PdWriteALURMW : SchedWriteRes<[PdLoad, PdEX01, PdStore]> { 327 let Latency = 6; 328 let ResourceCycles = [3, 2, 1]; 329 let NumMicroOps = 1; 330} 331def : SchedAlias<WriteALURMW, PdWriteALURMW>; 332 333def PdWriteLXADD : SchedWriteRes<[PdEX01]> { 334 let Latency = 6; 335 let ResourceCycles = [88]; 336 let NumMicroOps = 4; 337} 338def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>; 339 340def PdWriteBMI1 : SchedWriteRes<[PdEX01]> { 341 let Latency = 2; 342 let ResourceCycles = [2]; 343 let NumMicroOps = 2; 344} 345def : InstRW<[PdWriteBMI1], 346 (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr, 347 BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr, 348 BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr, 349 BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr, 350 TZMSK32rr, TZMSK64rr)>; 351 352def PdWriteBMI1m : SchedWriteRes<[PdLoad, PdEX01]> { 353 let Latency = 6; 354 let ResourceCycles = [3, 3]; 355 let NumMicroOps = 2; 356} 357def : InstRW<[PdWriteBMI1m], 358 (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm, 359 BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm, 360 BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm, 361 BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm, 362 TZMSK32rm, TZMSK64rm)>; 363 364defm : PdWriteResExPair<WriteADC, [PdEX01], 1, [2]>; 365 366def PdWriteADCSBB64ri32 : SchedWriteRes<[PdEX01]> { 367 let ResourceCycles = [3]; 368} 369def : InstRW<[PdWriteADCSBB64ri32], (instrs ADC64ri32, SBB64ri32)>; 370 371defm : PdWriteRes<WriteBSWAP32, [PdEX01]>; 372defm : PdWriteRes<WriteBSWAP64, [PdEX01]>; 373defm : PdWriteRes<WriteCMPXCHG, [PdEX1], 3, [3], 5>; 374defm : PdWriteRes<WriteCMPXCHGRMW, [PdEX1, PdStore, PdLoad], 3, [44, 1, 1], 2>; 375defm : PdWriteRes<WriteXCHG, [PdEX1], 1, [], 2>; 376 377def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> { 378 let Latency = 3; 379 let ResourceCycles = [3]; 380 let NumMicroOps = 3; 381} 382def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>; 383 384def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> { 385 let Latency = 3; 386 let ResourceCycles = [23]; 387 let NumMicroOps = 5; 388} 389def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>; 390 391def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> { 392 let Latency = 3; 393 let ResourceCycles = [21]; 394 let NumMicroOps = 6; 395} 396def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm], 397 (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>; 398 399def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> { 400 let Latency = 3; 401 let ResourceCycles = [26]; 402 let NumMicroOps = 18; 403} 404def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>; 405 406def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> { 407 let Latency = 3; 408 let ResourceCycles = [69]; 409 let NumMicroOps = 22; 410} 411def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>; 412 413def PdWriteXADD : SchedWriteRes<[PdEX1]> { 414 let Latency = 1; 415 let ResourceCycles = [1]; 416 let NumMicroOps = 2; 417} 418def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>; 419 420def PdWriteXADDm : SchedWriteRes<[PdEX1]> { 421 let Latency = 6; 422 let ResourceCycles = [20]; 423 let NumMicroOps = 4; 424} 425def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>; 426 427defm : PdWriteResExPair<WriteIMul8, [PdEX1, PdMul], 4, [1, 4]>; 428defm : PdWriteResExPair<WriteIMul16, [PdEX1, PdMul], 4, [1, 5], 2>; 429defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul], 5, [1, 5], 2>; 430defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul], 4, [1, 2]>; 431defm : PdWriteResExPair<WriteIMul32, [PdEX1, PdMul], 4, [1, 4]>; 432defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul], 4, [1, 2], 1, 1>; 433defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4, [1, 2]>; 434defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 6]>; 435defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>; 436defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>; 437defm : X86WriteResUnsupported<WriteIMulH>; // BMI2 MULX 438 439defm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>; 440defm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>; 441defm : PdWriteResExPair<WriteDiv32, [PdEX1, PdDiv], 14, [1, 14], 2>; 442defm : PdWriteResExPair<WriteDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>; 443 444defm : PdWriteResExPair<WriteIDiv8, [PdEX1, PdDiv], 12, [1, 12]>; 445defm : PdWriteResExPair<WriteIDiv16, [PdEX1, PdDiv], 15, [1, 17], 2>; 446defm : PdWriteResExPair<WriteIDiv32, [PdEX1, PdDiv], 14, [1, 25], 2>; 447defm : PdWriteResExPair<WriteIDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>; 448 449defm : PdWriteResExPair<WriteCRC32, [PdEX01], 2, [4], 3>; 450 451def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> { 452 let Latency = 5; 453 let ResourceCycles = [10]; 454 let NumMicroOps = 5; 455} 456def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>; 457 458def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> { 459 let Latency = 6; 460 let ResourceCycles = [12]; 461 let NumMicroOps = 7; 462} 463def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>; 464 465def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> { 466 let Latency = 10; 467 let ResourceCycles = [17]; 468 let NumMicroOps = 11; 469} 470def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>; 471 472defm : PdWriteResExPair<WriteCMOV, [PdEX01]>; // Conditional move. 473 474def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> { 475 let Latency = 5; 476 let ResourceCycles = [3, 3]; 477 let NumMicroOps = 2; 478} 479 480def PdWriteCMOVmVar : SchedWriteVariant<[ 481 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_BE">>, [PdWriteCMOVm]>, 482 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_A">>, [PdWriteCMOVm]>, 483 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_L">>, [PdWriteCMOVm]>, 484 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_GE">>, [PdWriteCMOVm]>, 485 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_LE">>, [PdWriteCMOVm]>, 486 SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_G">>, [PdWriteCMOVm]>, 487 SchedVar<NoSchedPred, [WriteCMOV.Folded]> 488]>; 489 490def : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; 491 492defm : PdWriteRes<WriteFCMOV, [PdFPU0, PdFPFMA]>; // x87 conditional move. 493 494def : WriteRes<WriteSETCC, [PdEX01]>; // Setcc. 495def : WriteRes<WriteSETCCStore, [PdEX01, PdStore]>; 496 497def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> { 498 let ResourceCycles = [2]; 499 let NumMicroOps = 2; 500} 501 502def PdSETGEmSETGmSETLEmSETLm : SchedWriteVariant<[ 503 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_GE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 504 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_G">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 505 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_LE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 506 SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_L">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 507 SchedVar<NoSchedPred, [WriteSETCCStore]> 508]>; 509def : InstRW<[PdSETGEmSETGmSETLEmSETLm], (instrs SETCCm)>; 510 511defm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [4], 2>; 512 513def PdWriteLAHF : SchedWriteRes<[PdEX01]> { 514 let Latency = 2; 515 let ResourceCycles = [4]; 516 let NumMicroOps = 4; 517} 518def : InstRW<[PdWriteLAHF], (instrs LAHF)>; 519 520def PdWriteSAHF : SchedWriteRes<[PdEX01]> { 521 let Latency = 2; 522 let ResourceCycles = [2]; 523 let NumMicroOps = 2; 524} 525def : InstRW<[PdWriteSAHF], (instrs SAHF)>; 526 527defm : PdWriteRes<WriteBitTest, [PdEX01], 1, [2], 1>; 528defm : PdWriteRes<WriteBitTestImmLd, [PdEX01, PdLoad], 5, [2, 3], 1>; 529defm : PdWriteRes<WriteBitTestRegLd, [PdEX01, PdLoad], 5, [7, 2], 7>; 530defm : PdWriteRes<WriteBitTestSet, [PdEX01], 2, [2], 2>; 531defm : PdWriteRes<WriteBitTestSetImmLd, [PdEX01, PdLoad], 6, [1, 1], 4>; 532defm : PdWriteRes<WriteBitTestSetRegLd, [PdEX01, PdLoad], 6, [1, 1], 10>; 533 534def PdWriteBTSIm : SchedWriteRes<[PdEX01, PdLoad]> { 535 let Latency = 7; 536 let ResourceCycles = [42, 1]; 537 let NumMicroOps = 4; 538} 539def : SchedAlias<WriteBitTestSetImmRMW, PdWriteBTSIm>; 540def PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> { 541 let Latency = 7; 542 let ResourceCycles = [44, 1]; 543 let NumMicroOps = 10; 544} 545def : SchedAlias<WriteBitTestSetRegRMW, PdWriteBTSRm>; 546 547// This is for simple LEAs with one or two input operands. 548// FIXME: SAGU 3-operand LEA 549def : WriteRes<WriteLEA, [PdEX01]> { let NumMicroOps = 2; } 550 551// Bit counts. 552defm : PdWriteResExPair<WriteBSF, [PdEX01], 3, [6], 6, 2>; 553defm : PdWriteResExPair<WriteBSR, [PdEX01], 4, [8], 7, 2>; 554defm : PdWriteResExPair<WritePOPCNT, [PdEX01], 4, [4]>; 555defm : PdWriteResExPair<WriteLZCNT, [PdEX0], 2, [2], 2>; 556defm : PdWriteResExPair<WriteTZCNT, [PdEX0], 2, [2], 2>; 557 558// BMI1 BEXTR, BMI2 BZHI 559defm : PdWriteResExPair<WriteBEXTR, [PdEX01], 2, [2], 2>; 560defm : PdWriteResExPair<WriteBLS, [PdEX01], 2, [2], 2>; 561defm : PdWriteResExPair<WriteBZHI, [PdEX01]>; 562 563def PdWriteBEXTRI : SchedWriteRes<[PdEX01]> { 564 let Latency = 2; 565 let ResourceCycles = [4]; 566 let NumMicroOps = 2; 567} 568def : InstRW<[PdWriteBEXTRI], (instrs BEXTRI32ri, BEXTRI64ri)>; 569 570def PdWriteBEXTRIm : SchedWriteRes<[PdEX01]> { 571 let Latency = 2; 572 let ResourceCycles = [5]; 573 let NumMicroOps = 2; 574} 575def : InstRW<[PdWriteBEXTRIm], (instrs BEXTRI32mi, BEXTRI64mi)>; 576 577//////////////////////////////////////////////////////////////////////////////// 578// Integer shifts and rotates. 579//////////////////////////////////////////////////////////////////////////////// 580 581defm : PdWriteResExPair<WriteShift, [PdEX01], 1, [2]>; 582defm : PdWriteResExPair<WriteShiftCL, [PdEX01]>; 583defm : PdWriteResExPair<WriteRotate, [PdEX01], 1, [2]>; 584defm : PdWriteResExPair<WriteRotateCL, [PdEX01]>; 585 586def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> { 587 let Latency = 12; 588 let ResourceCycles = [24]; 589 let NumMicroOps = 26; 590} 591def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>; 592 593def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> { 594 let Latency = 12; 595 let ResourceCycles = [23]; 596 let NumMicroOps = 23; 597} 598def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>; 599 600def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> { 601 let Latency = 11; 602 let ResourceCycles = [22]; 603 let NumMicroOps = 24; 604} 605def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>; 606 607def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> { 608 let Latency = 10; 609 let ResourceCycles = [20]; 610 let NumMicroOps = 22; 611} 612def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>; 613 614def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> { 615 let Latency = 10; 616 let ResourceCycles = [19]; 617 let NumMicroOps = 19; 618} 619def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>; 620 621def PdWriteRCL3264rCL : SchedWriteRes<[PdEX01]> { 622 let Latency = 7; 623 let ResourceCycles = [14]; 624 let NumMicroOps = 17; 625} 626def : InstRW<[PdWriteRCL3264rCL], (instrs RCL32rCL, RCL64rCL)>; 627 628def PdWriteRCR3264rCL : SchedWriteRes<[PdEX01]> { 629 let Latency = 7; 630 let ResourceCycles = [13]; 631 let NumMicroOps = 16; 632} 633def : InstRW<[PdWriteRCR3264rCL], (instrs RCR32rCL, RCR64rCL)>; 634 635def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> { 636 let Latency = 7; 637 let ResourceCycles = [14]; 638 let NumMicroOps = 15; 639} 640def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>; 641 642 643def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> { 644 let Latency = 9; 645 let ResourceCycles = [18]; 646 let NumMicroOps = 20; 647} 648def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>; 649 650def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> { 651 let Latency = 11; 652 let ResourceCycles = [21]; 653 let NumMicroOps = 21; 654} 655def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>; 656 657def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> { 658 let Latency = 8; 659 let ResourceCycles = [15]; 660 let NumMicroOps = 16; 661} 662def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>; 663 664def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> { 665 let Latency = 13; 666 let ResourceCycles = [25]; 667 let NumMicroOps = 25; 668} 669def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>; 670 671// SHLD/SHRD. 672defm : PdWriteRes<WriteSHDrri, [PdEX01], 3, [6], 6>; 673defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 3, [8], 7>; 674 675def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> { 676 let Latency = 3; 677 let ResourceCycles = [6]; 678 let NumMicroOps = 6; 679} 680def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>; 681 682def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> { 683 let Latency = 3; 684 let ResourceCycles = [6]; 685 let NumMicroOps = 7; 686} 687def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL, 688 SHLD32rrCL, 689 SHRD32rrCL)>; 690 691defm : PdWriteRes<WriteSHDmri, [PdLoad, PdEX01], 4, [1, 22], 8>; 692defm : PdWriteRes<WriteSHDmrcl, [PdLoad, PdEX01], 4, [1, 22], 8>; 693 694//////////////////////////////////////////////////////////////////////////////// 695// Floating point. This covers both scalar and vector operations. 696//////////////////////////////////////////////////////////////////////////////// 697 698defm : PdWriteRes<WriteFLD0, [PdFPU1, PdFPSTO], 3>; 699defm : PdWriteRes<WriteFLD1, [PdFPU1, PdFPSTO], 3>; 700defm : PdWriteRes<WriteFLDC, [PdFPU1, PdFPSTO], 3>; 701 702defm : PdWriteRes<WriteFLoad, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>; 703defm : PdWriteRes<WriteFLoadX, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>; 704defm : PdWriteRes<WriteFLoadY, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3], 2>; 705 706defm : PdWriteRes<WriteFMaskedLoad, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 1, 4]>; 707defm : PdWriteRes<WriteFMaskedLoadY, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 2, 4], 2>; 708 709defm : PdWriteRes<WriteFStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>; 710defm : PdWriteRes<WriteFStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>; 711defm : PdWriteRes<WriteFStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [1, 36, 2], 4>; 712 713def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU23, PdFPSTO]> { 714 let Latency = 2; 715 let ResourceCycles = [1, 3, 1]; 716 let NumMicroOps = 2; 717} 718def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>; 719 720def PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { 721 let NumMicroOps = 8; 722} 723def : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>; 724 725defm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>; 726defm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>; 727defm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>; 728 729defm : PdWriteRes<WriteFMaskedStore32, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; 730defm : PdWriteRes<WriteFMaskedStore64, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; 731defm : PdWriteRes<WriteFMaskedStore32Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; 732defm : PdWriteRes<WriteFMaskedStore64Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; 733 734defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>; 735defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>; 736defm : PdWriteRes<WriteFMoveY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>; 737 738defm : PdWriteRes<WriteEMMS, [PdFPU01, PdFPFMA], 2>; 739 740defm : PdWriteResXMMPair<WriteFAdd, [PdFPU0, PdFPFMA], 5>; 741defm : PdWriteResXMMPair<WriteFAddX, [PdFPU0, PdFPFMA], 5>; 742defm : PdWriteResYMMPair<WriteFAddY, [PdFPU0, PdFPFMA], 5, [1, 2]>; 743defm : X86WriteResPairUnsupported<WriteFAddZ>; 744 745def PdWriteX87Add: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { 746 let Latency = 5; 747 let ResourceCycles = [3, 1, 10]; 748} 749def : InstRW<[PdWriteX87Add], (instrs ADD_FI16m, ADD_FI32m, ADD_F32m, ADD_F64m, 750 SUB_FI16m, SUB_FI32m, SUB_F32m, SUB_F64m, 751 SUBR_FI16m, SUBR_FI32m, SUBR_F32m, SUBR_F64m)>; 752 753defm : PdWriteResXMMPair<WriteFAdd64, [PdFPU0, PdFPFMA], 5>; 754defm : PdWriteResXMMPair<WriteFAdd64X, [PdFPU0, PdFPFMA], 5>; 755defm : PdWriteResYMMPair<WriteFAdd64Y, [PdFPU0, PdFPFMA], 5, [1, 2]>; 756defm : X86WriteResPairUnsupported<WriteFAdd64Z>; 757 758defm : PdWriteResXMMPair<WriteFCmp, [PdFPU0, PdFPFMA], 2>; 759defm : PdWriteResXMMPair<WriteFCmpX, [PdFPU0, PdFPFMA], 2>; 760defm : PdWriteResYMMPair<WriteFCmpY, [PdFPU0, PdFPFMA], 2, [1, 2]>; 761defm : X86WriteResPairUnsupported<WriteFCmpZ>; 762 763defm : PdWriteResXMMPair<WriteFCmp64, [PdFPU0, PdFPFMA], 2>; 764defm : PdWriteResXMMPair<WriteFCmp64X, [PdFPU0, PdFPFMA], 2>; 765defm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [1, 2]>; 766defm : X86WriteResPairUnsupported<WriteFCmp64Z>; 767 768defm : PdWriteResXMMPair<WriteFCom, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 769 770def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> { 771 let Latency = 6; 772} 773def : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>; 774 775def PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>; 776def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>; 777 778defm : PdWriteResXMMPair<WriteFMul, [PdFPU1, PdFPFMA], 5>; 779defm : PdWriteResXMMPair<WriteFMulX, [PdFPU1, PdFPFMA], 5>; 780defm : PdWriteResYMMPair<WriteFMulY, [PdFPU1, PdFPFMA], 5, [1, 2]>; 781defm : X86WriteResPairUnsupported<WriteFMulZ>; 782 783def PdWriteX87Mul: SchedWriteRes<[PdLoad, PdFPU1, PdFPFMA]> { 784 let Latency = 5; 785 let ResourceCycles = [3, 1, 10]; 786} 787def : InstRW<[PdWriteX87Mul], (instrs MUL_FI16m, MUL_FI32m, MUL_F32m, MUL_F64m)>; 788 789defm : PdWriteResXMMPair<WriteFMul64, [PdFPU1, PdFPFMA], 5>; 790defm : PdWriteResXMMPair<WriteFMul64X, [PdFPU1, PdFPFMA], 5>; 791defm : PdWriteResYMMPair<WriteFMul64Y, [PdFPU1, PdFPFMA], 5, [1, 2]>; 792defm : X86WriteResPairUnsupported<WriteFMul64Z>; 793 794defm : PdWriteResXMMPair<WriteFMA, [PdFPU, PdFPFMA], 5, [1, 3]>; 795defm : PdWriteResXMMPair<WriteFMAX, [PdFPU, PdFPFMA], 5, [1, 3]>; 796defm : PdWriteResYMMPair<WriteFMAY, [PdFPU, PdFPFMA], 5, [1, 3]>; 797defm : X86WriteResPairUnsupported<WriteFMAZ>; 798 799 800defm : PdWriteResXMMPair<WriteDPPD, [PdFPU1, PdFPFMA], 15, [1, 10], 15, 2>; 801 802defm : PdWriteResXMMPair<WriteDPPS, [PdFPU1, PdFPFMA], 25, [1, 14], 16, 2>; 803defm : PdWriteResYMMPair<WriteDPPSY, [PdFPU1, PdFPFMA], 27, [2, 25], /*or 29*/ 25, 4>; 804defm : X86WriteResPairUnsupported<WriteDPPSZ>; 805 806def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> { 807 let Latency = 27; 808 let ResourceCycles = [1, 14]; 809 let NumMicroOps = 17; 810} 811def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>; 812 813defm : PdWriteResXMMPair<WriteFRcp, [PdFPU1, PdFPFMA], 5>; 814defm : PdWriteResXMMPair<WriteFRcpX, [PdFPU1, PdFPFMA], 5>; 815defm : PdWriteResYMMPair<WriteFRcpY, [PdFPU1, PdFPFMA], 5, [2, 1]>; 816defm : X86WriteResPairUnsupported<WriteFRcpZ>; 817 818defm : PdWriteResXMMPair<WriteFRsqrt, [PdFPU1, PdFPFMA], 5, [1, 2]>; 819defm : PdWriteResXMMPair<WriteFRsqrtX, [PdFPU1, PdFPFMA], 5>; 820defm : PdWriteResYMMPair<WriteFRsqrtY, [PdFPU1, PdFPFMA], 5, [2, 2]>; 821defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; 822 823defm : PdWriteResXMMPair<WriteFDiv, [PdFPU1, PdFPFMA], 9, [1, 9]>; 824defm : PdWriteResXMMPair<WriteFDivX, [PdFPU1, PdFPFMA], 9, [1, 9]>; 825defm : PdWriteResYMMPair<WriteFDivY, [PdFPU1, PdFPFMA], 9, [2, 18]>; 826defm : X86WriteResPairUnsupported<WriteFDivZ>; 827 828def PdWriteX87Div: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { 829 let Latency = 9; 830 let ResourceCycles = [3, 1, 18]; 831} 832def : InstRW<[PdWriteX87Div], (instrs DIV_FI16m, DIV_FI32m, 833 DIVR_FI16m, DIVR_FI32m, 834 DIV_F32m, DIV_F64m, 835 DIVR_F32m, DIVR_F64m)>; 836 837defm : PdWriteResXMMPair<WriteFDiv64, [PdFPU1, PdFPFMA], 9, [1, 9]>; 838defm : PdWriteResXMMPair<WriteFDiv64X, [PdFPU1, PdFPFMA], 9, [1, 9]>; 839defm : PdWriteResYMMPair<WriteFDiv64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>; 840defm : X86WriteResPairUnsupported<WriteFDiv64Z>; 841 842defm : PdWriteResXMMPair<WriteFSqrt, [PdFPU1, PdFPFMA], 9, [1, 9]>; 843defm : PdWriteResXMMPair<WriteFSqrtX, [PdFPU1, PdFPFMA], 9, [1, 9]>; 844defm : PdWriteResYMMPair<WriteFSqrtY, [PdFPU1, PdFPFMA], 9, [2, 18]>; 845defm : X86WriteResPairUnsupported<WriteFSqrtZ>; 846 847defm : PdWriteResXMMPair<WriteFSqrt64, [PdFPU1, PdFPFMA], 9, [1, 9]>; 848defm : PdWriteResXMMPair<WriteFSqrt64X, [PdFPU1, PdFPFMA], 9, [1, 9]>; 849defm : PdWriteResYMMPair<WriteFSqrt64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>; 850defm : X86WriteResPairUnsupported<WriteFSqrt64Z>; 851 852defm : PdWriteResXMMPair<WriteFSqrt80, [PdFPU1, PdFPFMA], 1, [1, 18]>; 853defm : PdWriteResXMMPair<WriteFSign, [PdFPU1, PdFPFMA], 1, [1, 4]>; 854 855defm : PdWriteResXMMPair<WriteFRnd, [PdFPU1, PdFPSTO], 4, []>; 856defm : PdWriteResYMMPair<WriteFRndY, [PdFPU1, PdFPSTO], 4, [2, 1], 2>; 857defm : X86WriteResPairUnsupported<WriteFRndZ>; 858 859def PdWriteVFRCZP : SchedWriteRes<[PdFPU1, PdFPSTO]> { 860 let Latency = 10; 861 let ResourceCycles = [2, 1]; 862 let NumMicroOps = 2; 863} 864def : InstRW<[PdWriteVFRCZP], (instrs VFRCZPDrr, VFRCZPSrr)>; 865 866def PdWriteVFRCZS : SchedWriteRes<[PdFPU1, PdFPSTO]> { 867 let Latency = 10; 868 let ResourceCycles = [10, 1]; 869 let NumMicroOps = 2; 870} 871def : InstRW<[PdWriteVFRCZS], (instrs VFRCZSDrr, VFRCZSSrr)>; 872 873def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> { 874 let Latency = 15; 875 let ResourceCycles = [2, 1]; 876 let NumMicroOps = 3; 877} 878def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm, 879 VFRCZSDrm, VFRCZSSrm)>; 880 881def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> { 882 let Latency = 10; 883 let ResourceCycles = [3, 1]; 884 let NumMicroOps = 4; 885} 886def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>; 887 888def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> { 889 let Latency = 15; 890 let ResourceCycles = [4, 1]; 891 let NumMicroOps = 8; 892} 893def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>; 894 895defm : PdWriteResXMMPair<WriteFLogic, [PdFPU01, PdFPFMA], 2, [1, 2]>; 896defm : PdWriteResYMMPair<WriteFLogicY, [PdFPU01, PdFPFMA], 2, [2, 2]>; 897defm : X86WriteResPairUnsupported<WriteFLogicZ>; 898 899defm : PdWriteResXMMPair<WriteFTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 900defm : PdWriteResYMMPair<WriteFTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [4, 4, 1], 4, 2>; 901defm : X86WriteResPairUnsupported<WriteFTestZ>; 902 903defm : PdWriteResXMMPair<WriteFShuffle, [PdFPU01, PdFPFMA], 2, [1, 2]>; 904defm : PdWriteResYMMPair<WriteFShuffleY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>; 905defm : X86WriteResPairUnsupported<WriteFShuffleZ>; 906 907def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> { 908 let Latency = 7; 909 let ResourceCycles = [1, 3]; 910 let NumMicroOps = 2; 911} 912def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>; 913 914defm : PdWriteResXMMPair<WriteFVarShuffle, [PdFPU01, PdFPFMA], 3, [1, 2]>; 915defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU01, PdFPFMA], 3, [2, 4], 2>; 916defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; 917 918defm : PdWriteResXMMPair<WriteFBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>; 919defm : PdWriteResYMMPair<WriteFBlendY, [PdFPU01, PdFPFMA], 2, [2, 3], 2>; 920defm : X86WriteResPairUnsupported<WriteFBlendZ>; 921 922defm : PdWriteResXMMPair<WriteFVarBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>; 923defm : PdWriteResYMMPair<WriteFVarBlendY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>; 924defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; 925 926defm : PdWriteResXMMPair<WriteFShuffle256, [PdFPU01, PdFPFMA], 2, [1, 3], 2>; 927defm : X86WriteResPairUnsupported<WriteFVarShuffle256>; 928 929def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 930 let Latency = 2; 931 let ResourceCycles = [1, 2]; 932} 933def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>; 934 935def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 936 let Latency = 7; 937 let ResourceCycles = [1, 4]; 938 let NumMicroOps = 2; 939} 940def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>; 941 942def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 943 let Latency = 4; 944 let ResourceCycles = [1, 6]; 945 let NumMicroOps = 8; 946} 947def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>; 948 949def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> { 950 let Latency = 8; // 4 + 4 951 let ResourceCycles = [1, 8]; 952 let NumMicroOps = 10; 953} 954def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>; 955 956//////////////////////////////////////////////////////////////////////////////// 957// Conversions. 958//////////////////////////////////////////////////////////////////////////////// 959 960defm : PdWriteResXMMPair<WriteCvtSS2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>; 961 962defm : PdWriteResXMMPair<WriteCvtPS2I, [PdFPU0, PdFPCVT, PdFPSTO], 4>; 963defm : PdWriteResYMMPair<WriteCvtPS2IY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 964defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; 965 966defm : PdWriteResXMMPair<WriteCvtSD2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>; 967 968defm : PdWriteResXMMPair<WriteCvtPD2I, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 969defm : PdWriteResYMMPair<WriteCvtPD2IY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 970defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; 971 972def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 973 let Latency = 6; 974 let NumMicroOps = 2; 975} 976def : InstRW<[PdWriteMMX_CVTTPD2PIirr], (instrs MMX_CVTTPD2PIirr)>; 977 978// FIXME: f+3 ST, LD+STC latency 979defm : PdWriteResXMMPair<WriteCvtI2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>; 980// FIXME: .Folded version is one NumMicroOp *less*.. 981 982defm : PdWriteResXMMPair<WriteCvtI2PS, [PdFPU0, PdFPCVT, PdFPSTO], 4>; 983defm : PdWriteResYMMPair<WriteCvtI2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 984defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; 985 986defm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>; 987// FIXME: .Folded version is one NumMicroOp *less*.. 988 989def PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 990 let Latency = 13; 991 let ResourceCycles = [1, 3, 1]; 992 let NumMicroOps = 2; 993} 994def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>; 995 996defm : PdWriteResXMMPair<WriteCvtI2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 997defm : PdWriteResYMMPair<WriteCvtI2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>; 998defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; 999 1000defm : PdWriteResXMMPair<WriteCvtSS2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1001 1002defm : PdWriteResXMMPair<WriteCvtPS2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1003defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>; 1004defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; 1005 1006defm : PdWriteResXMMPair<WriteCvtSD2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 1007 1008defm : PdWriteResXMMPair<WriteCvtPD2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 1009defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 1010defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; 1011 1012def PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1013 let Latency = 6; 1014 let NumMicroOps = 2; 1015} 1016def : InstRW<[PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr, 1017 MMX_CVTPI2PDirr)>; 1018 1019def PdWriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 1020 let Latency = 4; 1021 let NumMicroOps = 2; 1022} 1023def : InstRW<[PdWriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>; 1024 1025defm : PdWriteResXMMPair<WriteCvtPH2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2, 1>; 1026defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 3>; 1027defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>; 1028 1029defm : PdWriteRes<WriteCvtPS2PH, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2>; 1030defm : PdWriteRes<WriteCvtPS2PHY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 1031defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; 1032 1033defm : PdWriteRes<WriteCvtPS2PHSt, [PdFPU0, PdFPCVT, PdFPSTO, PdStore], 4, [1, 2, 1, 1], 3>; 1034defm : PdWriteRes<WriteCvtPS2PHYSt, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdStore], 4, [1, 2, 1, 1, 1], 4>; 1035defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; 1036 1037//////////////////////////////////////////////////////////////////////////////// 1038// Vector integer operations. 1039//////////////////////////////////////////////////////////////////////////////// 1040 1041defm : PdWriteRes<WriteVecLoad, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>; 1042defm : PdWriteRes<WriteVecLoadX, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>; 1043defm : PdWriteRes<WriteVecLoadY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 3], 2>; 1044 1045defm : PdWriteRes<WriteVecLoadNT, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 4]>; 1046defm : PdWriteRes<WriteVecLoadNTY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 4]>; 1047 1048defm : PdWriteRes<WriteVecMaskedLoad, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 1, 2]>; 1049defm : PdWriteRes<WriteVecMaskedLoadY, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 2, 4], 2>; 1050 1051defm : PdWriteRes<WriteVecStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>; 1052defm : PdWriteRes<WriteVecStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>; 1053defm : PdWriteRes<WriteVecStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [2, 36, 2], 4>; 1054 1055def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { 1056 let NumMicroOps = 8; 1057} 1058def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>; 1059 1060defm : PdWriteRes<WriteVecStoreNT, [PdStore, PdFPU1, PdFPSTO], 2>; 1061defm : PdWriteRes<WriteVecStoreNTY, [PdStore, PdFPU1, PdFPSTO], 2, [2, 2, 2], 4>; 1062 1063defm : PdWriteRes<WriteVecMaskedStore, [PdStore, PdFPU01, PdFPMAL], 6, [1, 1, 4]>; 1064defm : PdWriteRes<WriteVecMaskedStoreY, [PdStore, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>; 1065 1066defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>; 1067defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>; 1068defm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>; 1069 1070def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1071} 1072def : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>; 1073 1074def PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1075 let Latency = 4; 1076} 1077def : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>; 1078 1079defm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 11>; 1080defm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 11, [1, 2], 2>; 1081 1082defm : PdWriteResXMMPair<WriteVecALU, [PdFPU01, PdFPMAL], 2>; 1083defm : PdWriteResXMMPair<WriteVecALUX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1084defm : X86WriteResPairUnsupported<WriteVecALUY>; 1085defm : X86WriteResPairUnsupported<WriteVecALUZ>; 1086 1087defm : PdWriteResXMMPair<WriteVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1088defm : PdWriteResXMMPair<WriteVecShiftX, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1089defm : X86WriteResPairUnsupported<WriteVecShiftY>; 1090defm : X86WriteResPairUnsupported<WriteVecShiftZ>; 1091 1092defm : PdWriteResXMMPair<WriteVecShiftImm, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1093defm : PdWriteResXMMPair<WriteVecShiftImmX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1094defm : X86WriteResPairUnsupported<WriteVecShiftImmY>; 1095defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; 1096 1097defm : PdWriteResXMMPair<WriteVecIMul, [PdFPU0, PdFPMMA], 4>; 1098defm : PdWriteResXMMPair<WriteVecIMulX, [PdFPU0, PdFPMMA], 4>; 1099defm : X86WriteResPairUnsupported<WriteVecIMulY>; 1100defm : X86WriteResPairUnsupported<WriteVecIMulZ>; 1101 1102defm : PdWriteResXMMPair<WritePMULLD, [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL], 5, [2, 1, 2, 1]>; 1103defm : X86WriteResPairUnsupported<WritePMULLDY>; 1104defm : X86WriteResPairUnsupported<WritePMULLDZ>; 1105 1106def PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPMMA, PdFPMAL]> { 1107 let Latency = 4; 1108} 1109def : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr, 1110 VPMACSSDQLrr)>; 1111 1112defm : PdWriteResXMMPair<WriteMPSAD, [PdFPU0, PdFPMMA], 9, [1, 4], 8>; 1113defm : X86WriteResPairUnsupported<WriteMPSADY>; 1114defm : X86WriteResPairUnsupported<WriteMPSADZ>; 1115 1116def PdWriteVMPSADBW : SchedWriteRes<[PdFPU0, PdFPMMA]> { 1117 let Latency = 8; 1118 let ResourceCycles = [1, 4]; 1119 let NumMicroOps = 10; 1120} 1121def : InstRW<[PdWriteVMPSADBW], (instrs VMPSADBWrri)>; 1122 1123defm : PdWriteResXMMPair<WritePSADBW, [PdFPU01, PdFPMAL], 4, [1, 2], 2>; 1124defm : PdWriteResXMMPair<WritePSADBWX, [PdFPU01, PdFPMAL], 4, [1, 2], 2>; 1125defm : X86WriteResPairUnsupported<WritePSADBWY>; 1126defm : X86WriteResPairUnsupported<WritePSADBWZ>; 1127 1128defm : PdWriteResXMMPair<WritePHMINPOS, [PdFPU0, PdFPMAL], 4, [], 2>; 1129 1130defm : PdWriteResXMMPair<WriteShuffle, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1131defm : PdWriteResXMMPair<WriteShuffleX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1132defm : PdWriteResYMMPair<WriteShuffleY, [PdFPU01, PdFPMAL], 2, [1, 4]>; 1133defm : X86WriteResPairUnsupported<WriteShuffleZ>; 1134 1135defm : PdWriteResXMMPair<WriteVarShuffle, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1136defm : PdWriteResXMMPair<WriteVarShuffleX, [PdFPU01, PdFPMAL], 3, [1, 3]>; 1137defm : X86WriteResPairUnsupported<WriteVarShuffleY>; 1138defm : X86WriteResPairUnsupported<WriteVarShuffleZ>; 1139 1140def PdWriteVPPERM : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1141 let Latency = 2; 1142 let ResourceCycles = [1, 3]; 1143} 1144def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>; 1145 1146defm : PdWriteResXMMPair<WriteBlend, [PdFPU01, PdFPMAL], 2>; 1147defm : X86WriteResPairUnsupported<WriteBlendY>; 1148defm : X86WriteResPairUnsupported<WriteBlendZ>; 1149 1150defm : PdWriteResXMMPair<WriteVarBlend, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1151defm : X86WriteResPairUnsupported<WriteVarBlendY>; 1152defm : X86WriteResPairUnsupported<WriteVarBlendZ>; 1153 1154defm : PdWriteResXMMPair<WriteVecLogic, [PdFPU01, PdFPMAL], 2>; 1155defm : PdWriteResXMMPair<WriteVecLogicX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1156defm : X86WriteResPairUnsupported<WriteVecLogicY>; 1157defm : X86WriteResPairUnsupported<WriteVecLogicZ>; 1158 1159defm : PdWriteResXMMPair<WriteVecTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 1160defm : PdWriteResYMMPair<WriteVecTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 4, 1], 4, 2>; 1161defm : X86WriteResPairUnsupported<WriteVecTestZ>; 1162 1163defm : PdWriteResXMMPair<WriteShuffle256, [PdFPU01, PdFPMAL]>; 1164defm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>; 1165 1166defm : PdWriteResXMMPair<WriteVarVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>; 1167defm : X86WriteResPairUnsupported<WriteVarVecShiftY>; 1168defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; 1169 1170//////////////////////////////////////////////////////////////////////////////// 1171// Vector insert/extract operations. 1172//////////////////////////////////////////////////////////////////////////////// 1173 1174defm : PdWriteRes<WriteVecInsert, [PdFPU01, PdFPMAL], 2, [1, 3], 2>; 1175defm : PdWriteRes<WriteVecInsertLd, [PdFPU01, PdFPMAL, PdLoad], 6, [1, 4, 3], 2>; 1176 1177defm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 12, [1, 3, 1], 2>; 1178defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [2, 1, 1], 2>; 1179 1180def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1181 let Latency = 3; 1182 let ResourceCycles = [1, 3]; 1183} 1184def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>; 1185 1186//////////////////////////////////////////////////////////////////////////////// 1187// SSE42 String instructions. 1188//////////////////////////////////////////////////////////////////////////////// 1189 1190defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 11, [1, 6, 1], 7, 1>; 1191defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 7, [1, 8, 1], 7, 2>; 1192 1193defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 14, [1, 10, 10, 10, 1, 1], 27, 1>; 1194defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 10, 10, 10, 1, 1], 27, 1>; 1195 1196//////////////////////////////////////////////////////////////////////////////// 1197// MOVMSK Instructions. 1198//////////////////////////////////////////////////////////////////////////////// 1199 1200defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>; 1201 1202defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>; 1203defm : X86WriteResUnsupported<WriteVecMOVMSKY>; 1204// defm : X86WriteResUnsupported<WriteVecMOVMSKZ>; 1205 1206defm : PdWriteRes<WriteMMXMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>; 1207 1208//////////////////////////////////////////////////////////////////////////////// 1209// AES Instructions. 1210//////////////////////////////////////////////////////////////////////////////// 1211 1212defm : PdWriteResXMMPair<WriteAESIMC, [PdFPU0, PdFPMMA], 5>; 1213defm : PdWriteResXMMPair<WriteAESKeyGen, [PdFPU0, PdFPMMA], 5>; 1214defm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>; 1215 1216//////////////////////////////////////////////////////////////////////////////// 1217// Horizontal add/sub instructions. 1218//////////////////////////////////////////////////////////////////////////////// 1219 1220defm : PdWriteResXMMPair<WriteFHAdd, [PdFPU0, PdFPFMA], 11, [1, 5], 3, 1>; 1221defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [1, 8], 8, 2>; 1222defm : X86WriteResPairUnsupported<WriteFHAddZ>; 1223 1224defm : PdWriteResXMMPair<WritePHAdd, [PdFPU01, PdFPMAL], 5, [1, 4], 3, 1>; 1225defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 1226defm : X86WriteResPairUnsupported<WritePHAddY>; 1227defm : X86WriteResPairUnsupported<WritePHAddZ>; 1228 1229def : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr, 1230 PHADDWrr, PHSUBWrr, 1231 PHADDSWrr, PHSUBSWrr, 1232 VPHADDDrr, VPHSUBDrr, 1233 VPHADDWrr, VPHSUBWrr, 1234 VPHADDSWrr, VPHSUBSWrr)>; 1235 1236def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm, 1237 PHADDWrm, PHSUBWrm, 1238 PHADDSWrm, PHSUBSWrm, 1239 VPHADDDrm, VPHSUBDrm, 1240 VPHADDWrm, VPHSUBWrm, 1241 VPHADDSWrm, VPHSUBSWrm)>; 1242 1243//////////////////////////////////////////////////////////////////////////////// 1244// Carry-less multiplication instructions. 1245//////////////////////////////////////////////////////////////////////////////// 1246 1247defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [1, 7], 5, 1>; 1248 1249def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> { 1250 let Latency = 12; 1251 let ResourceCycles = [1, 7]; 1252 let NumMicroOps = 6; 1253} 1254def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>; 1255 1256//////////////////////////////////////////////////////////////////////////////// 1257// SSE4A instructions. 1258//////////////////////////////////////////////////////////////////////////////// 1259 1260def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1261 let Latency = 3; 1262 let ResourceCycles = [1, 2]; 1263} 1264def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ)>; 1265 1266def PdWriteINSERTQI : SchedWriteRes<[PdFPU01, PdFPMAL]> { 1267 let Latency = 3; 1268 let ResourceCycles = [1, 3]; 1269} 1270def : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>; 1271 1272//////////////////////////////////////////////////////////////////////////////// 1273// AVX instructions. 1274//////////////////////////////////////////////////////////////////////////////// 1275 1276def PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> { 1277 let Latency = 6; 1278 let ResourceCycles = [1, 2, 4]; 1279 let NumMicroOps = 2; 1280} 1281def : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, 1282 VBROADCASTSSYrm)>; 1283 1284def PdWriteVZEROALL : SchedWriteRes<[]> { 1285 let Latency = 90; 1286 let NumMicroOps = 32; 1287} 1288def : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>; 1289 1290def PdWriteVZEROUPPER : SchedWriteRes<[]> { 1291 let Latency = 46; 1292 let NumMicroOps = 16; 1293} 1294def : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>; 1295 1296/////////////////////////////////////////////////////////////////////////////// 1297// SchedWriteVariant definitions. 1298/////////////////////////////////////////////////////////////////////////////// 1299 1300def PdWriteZeroLatency : SchedWriteRes<[]> { 1301 let Latency = 0; 1302} 1303 1304def PdWriteZeroIdiom : SchedWriteVariant<[ 1305 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1306 SchedVar<MCSchedPredicate<TruePred>, [WriteALU]> 1307]>; 1308def : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr, 1309 XOR32rr, XOR64rr)>; 1310 1311def PdWriteFZeroIdiom : SchedWriteVariant<[ 1312 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1313 SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]> 1314]>; 1315def : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, 1316 XORPDrr, VXORPDrr, 1317 ANDNPSrr, VANDNPSrr, 1318 ANDNPDrr, VANDNPDrr)>; 1319 1320// VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1. 1321 1322def PdWriteVZeroIdiomLogic : SchedWriteVariant<[ 1323 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1324 SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]> 1325]>; 1326def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>; 1327 1328def PdWriteVZeroIdiomLogicX : SchedWriteVariant<[ 1329 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1330 SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]> 1331]>; 1332def : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, 1333 PANDNrr, VPANDNrr)>; 1334 1335def PdWriteVZeroIdiomALU : SchedWriteVariant<[ 1336 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1337 SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]> 1338]>; 1339def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr, 1340 MMX_PSUBQirr, MMX_PSUBWirr, 1341 MMX_PCMPGTBirr, 1342 MMX_PCMPGTDirr, 1343 MMX_PCMPGTWirr)>; 1344 1345def PdWriteVZeroIdiomALUX : SchedWriteVariant<[ 1346 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 1347 SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]> 1348]>; 1349def : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, 1350 PSUBDrr, VPSUBDrr, 1351 PSUBQrr, VPSUBQrr, 1352 PSUBWrr, VPSUBWrr, 1353 PCMPGTBrr, VPCMPGTBrr, 1354 PCMPGTDrr, VPCMPGTDrr, 1355 PCMPGTWrr, VPCMPGTWrr)>; 1356 1357/////////////////////////////////////////////////////////////////////////////// 1358// Dependency breaking instructions. 1359/////////////////////////////////////////////////////////////////////////////// 1360 1361// VPCMPGTQ, but not PCMPGTQ! 1362 1363def : IsZeroIdiomFunction<[ 1364 // GPR Zero-idioms. 1365 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, 1366 1367 // MMX Zero-idioms. 1368 DepBreakingClass<[ 1369 MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr, 1370 MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr, 1371 MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr, 1372 MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr 1373 ], ZeroIdiomPredicate>, 1374 1375 // SSE Zero-idioms. 1376 DepBreakingClass<[ 1377 // fp variants. 1378 XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr, 1379 1380 // int variants. 1381 PXORrr, PANDNrr, 1382 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 1383 PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, 1384 PCMPGTBrr, PCMPGTDrr, PCMPGTWrr 1385 ], ZeroIdiomPredicate>, 1386 1387 // AVX Zero-idioms. 1388 DepBreakingClass<[ 1389 // xmm fp variants. 1390 VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr, 1391 1392 // xmm int variants. 1393 VPXORrr, VPANDNrr, 1394 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 1395 VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr, 1396 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 1397 1398 // ymm variants. 1399 VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr 1400 ], ZeroIdiomPredicate> 1401]>; 1402 1403def : IsDepBreakingFunction<[ 1404 // GPR 1405 DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>, 1406 DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >, 1407 1408 // MMX 1409 DepBreakingClass<[ 1410 MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr 1411 ], ZeroIdiomPredicate>, 1412 1413 // SSE 1414 DepBreakingClass<[ 1415 PCMPEQBrr, PCMPEQWrr, PCMPEQDrr 1416 // But not PCMPEQQrr. 1417 ], ZeroIdiomPredicate>, 1418 1419 // AVX 1420 DepBreakingClass<[ 1421 VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr 1422 // But not VPCMPEQQrr. 1423 ], ZeroIdiomPredicate> 1424]>; 1425 1426 1427} // SchedModel 1428