//=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the machine model for AMD bdver2 (Piledriver) to support // instruction scheduling and other instruction cost heuristics. // Based on: // * AMD Software Optimization Guide for AMD Family 15h Processors. // https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf // * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog // http://www.agner.org/optimize/microarchitecture.pdf // * https://www.realworldtech.com/bulldozer/ // Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2. // //===----------------------------------------------------------------------===// def BdVer2Model : SchedMachineModel { let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired. let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed. let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer. let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency. let HighLatency = 25; // FIXME: any better choice? let MispredictPenalty = 20; // Minimum branch misdirection penalty. let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass. // FIXME: Incomplete. This flag is set to allow the scheduler to assign // a default model to unrecognized opcodes. let CompleteModel = 0; } // SchedMachineModel let SchedModel = BdVer2Model in { //===----------------------------------------------------------------------===// // Pipes //===----------------------------------------------------------------------===// // There are total of eight pipes. //===----------------------------------------------------------------------===// // Integer execution pipes // // Two EX (ALU) pipes. def PdEX0 : ProcResource<1>; // ALU, Integer Pipe0 def PdEX1 : ProcResource<1>; // ALU, Integer Pipe1 def PdEX01 : ProcResGroup<[PdEX0, PdEX1]>; // Two AGLU pipes, identical. def PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23] //===----------------------------------------------------------------------===// // Floating point execution pipes // // Four FPU pipes. def PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0 def PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1 def PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2 def PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3 // FPU grouping def PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>; def PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>; //===----------------------------------------------------------------------===// // RCU //===----------------------------------------------------------------------===// // The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle. // On the other hand, the RCU reorder buffer size for Piledriver does not // seem be specified in any trustworthy source. // But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had // RCU reorder buffer size of 128. So that is a good guess for now. def PdRCU : RetireControlUnit<128, 4>; //===----------------------------------------------------------------------===// // Pipelines //===----------------------------------------------------------------------===// // There are total of two pipelines, each one with it's own scheduler. //===----------------------------------------------------------------------===// // Integer Pipeline Scheduling // // There is one Integer Scheduler per core. // Integer physical register file has 96 registers of 64-bit. def PdIntegerPRF : RegisterFile<96, [GR64, CCR]>; // Unified Integer, Memory Scheduler has 40 entries. def PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> { // Up to 4 IPC can be decoded, issued, retired. let BufferSize = 40; } //===----------------------------------------------------------------------===// // FPU Pipeline Scheduling // // The FPU unit is shared between the two cores. // FP physical register file has 160 registers of 128-bit. // Operations on 256-bit data types are cracked into two COPs. def PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>; // Unified FP Scheduler has 64 entries, def PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> { // Up to 4 IPC can be decoded, issued, retired. let BufferSize = 64; } //===----------------------------------------------------------------------===// // Functional units //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Load-Store Units // let Super = PdAGLU01 in def PdLoad : ProcResource<2> { // For Piledriver, the load queue is 40 entries deep. let BufferSize = 40; } def PdLoadQueue : LoadQueue; let Super = PdAGLU01 in def PdStore : ProcResource<1> { // For Piledriver, the store queue is 24 entries deep. let BufferSize = 24; } def PdStoreQueue : StoreQueue; //===----------------------------------------------------------------------===// // Integer Execution Units // def PdDiv : ProcResource<1>; // PdEX0; unpipelined integer division def PdCount : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT def PdMul : ProcResource<1>; // PdEX1; integer multiplication def PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches //===----------------------------------------------------------------------===// // Floating-Point Units // // Two FMAC/FPFMA units. def PdFPFMA : ProcResource<2>; // PdFPU0, PdFPU1 // One 128-bit integer multiply-accumulate unit. def PdFPMMA : ProcResource<1>; // PdFPU0 // One fp conversion unit. def PdFPCVT : ProcResource<1>; // PdFPU0 // One unit for shuffles, packs, permutes, shifts. def PdFPXBR : ProcResource<1>; // PdFPU1 // Two 128-bit packed integer units. def PdFPMAL : ProcResource<2>; // PdFPU2, PdFPU3 // One FP store unit. def PdFPSTO : ProcResource<1>; // PdFPU3 //===----------------------------------------------------------------------===// // Basic helper classes. //===----------------------------------------------------------------------===// // Many SchedWrites are defined in pairs with and without a folded load. // Instructions with folded loads are usually micro-fused, so they only appear // as two micro-ops when dispatched by the schedulers. // This multiclass defines the resource usage for variants with and without // folded loads. multiclass PdWriteRes ExePorts, int Lat = 1, list Res = [], int UOps = 1> { def : WriteRes { let Latency = Lat; let ResourceCycles = Res; let NumMicroOps = UOps; } } multiclass __pdWriteResPair ExePorts, int Lat, list Res, int UOps, int LoadLat, int LoadRes, int LoadUOps> { defm : PdWriteRes; defm : PdWriteRes; } multiclass PdWriteResExPair ExePorts, int Lat = 1, list Res = [], int UOps = 1, int LoadUOps = 0> { defm : __pdWriteResPair; } multiclass PdWriteResXMMPair ExePorts, int Lat = 1, list Res = [], int UOps = 1, int LoadUOps = 0> { defm : __pdWriteResPair; } multiclass PdWriteResYMMPair ExePorts, int Lat, list Res = [], int UOps = 2, int LoadUOps = 0> { defm : __pdWriteResPair; } //===----------------------------------------------------------------------===// // Here be dragons. //===----------------------------------------------------------------------===// // L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers // needn't be available until 4 cycles after the memory operand. def : ReadAdvance; // Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available // until 5 cycles after the memory operand. def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; // Transfer from int domain to ivec domain incurs additional latency of 8..10cy // Reference: Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller // and Excavator pipeline", "Data delay between different execution domains" def : ReadAdvance; // A folded store needs a cycle on the PdStore for the store data. def : WriteRes; //////////////////////////////////////////////////////////////////////////////// // Loads, stores, and moves, not folded with other operations. //////////////////////////////////////////////////////////////////////////////// def : WriteRes { let Latency = 5; let ResourceCycles = [2]; } def : WriteRes; def : WriteRes; def : WriteRes { let ResourceCycles = [2]; } // Load/store MXCSR. // FIXME: These are copy and pasted from WriteLoad/Store. def : WriteRes { let Latency = 5; } def : WriteRes { let NumMicroOps = 2; let ResourceCycles = [18]; } // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; //////////////////////////////////////////////////////////////////////////////// // Idioms that clear a register, like xorps %xmm0, %xmm0. // These can often bypass execution ports completely. //////////////////////////////////////////////////////////////////////////////// def : WriteRes; //////////////////////////////////////////////////////////////////////////////// // Branches don't produce values, so they have no latency, but they still // consume resources. Indirect branches can fold loads. //////////////////////////////////////////////////////////////////////////////// defm : PdWriteResExPair; //////////////////////////////////////////////////////////////////////////////// // Special case scheduling classes. //////////////////////////////////////////////////////////////////////////////// def : WriteRes { let Latency = 100; } def : WriteRes { let Latency = 100; } def : WriteRes; def PdWriteXLAT : SchedWriteRes<[PdEX01]> { let Latency = 6; } def : InstRW<[PdWriteXLAT], (instrs XLAT)>; def PdWriteLARrr : SchedWriteRes<[PdEX01]> { let Latency = 184; let ResourceCycles = [375]; let NumMicroOps = 45; } def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr", "LSL(16|32|64)rr")>; // Nops don't have dependencies, so there's no actual latency, but we set this // to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. def : WriteRes { let ResourceCycles = [2]; } //////////////////////////////////////////////////////////////////////////////// // Arithmetic. //////////////////////////////////////////////////////////////////////////////// defm : PdWriteResExPair; def PdWriteALURMW : SchedWriteRes<[PdLoad, PdEX01, PdStore]> { let Latency = 6; let ResourceCycles = [3, 2, 1]; let NumMicroOps = 1; } def : SchedAlias; def PdWriteLXADD : SchedWriteRes<[PdEX01]> { let Latency = 6; let ResourceCycles = [88]; let NumMicroOps = 4; } def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>; def PdWriteBMI1 : SchedWriteRes<[PdEX01]> { let Latency = 2; let ResourceCycles = [2]; let NumMicroOps = 2; } def : InstRW<[PdWriteBMI1], (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr, BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr, BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr, BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr, TZMSK32rr, TZMSK64rr)>; def PdWriteBMI1m : SchedWriteRes<[PdLoad, PdEX01]> { let Latency = 6; let ResourceCycles = [3, 3]; let NumMicroOps = 2; } def : InstRW<[PdWriteBMI1m], (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm, BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm, BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm, BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm, TZMSK32rm, TZMSK64rm)>; defm : PdWriteResExPair; def PdWriteADCSBB64ri32 : SchedWriteRes<[PdEX01]> { let ResourceCycles = [3]; } def : InstRW<[PdWriteADCSBB64ri32], (instrs ADC64ri32, SBB64ri32)>; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> { let Latency = 3; let ResourceCycles = [3]; let NumMicroOps = 3; } def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>; def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> { let Latency = 3; let ResourceCycles = [23]; let NumMicroOps = 5; } def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>; def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> { let Latency = 3; let ResourceCycles = [21]; let NumMicroOps = 6; } def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>; def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> { let Latency = 3; let ResourceCycles = [26]; let NumMicroOps = 18; } def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>; def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> { let Latency = 3; let ResourceCycles = [69]; let NumMicroOps = 22; } def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>; def PdWriteXADD : SchedWriteRes<[PdEX1]> { let Latency = 1; let ResourceCycles = [1]; let NumMicroOps = 2; } def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>; def PdWriteXADDm : SchedWriteRes<[PdEX1]> { let Latency = 6; let ResourceCycles = [20]; let NumMicroOps = 4; } def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : X86WriteResUnsupported; // BMI2 MULX defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> { let Latency = 5; let ResourceCycles = [10]; let NumMicroOps = 5; } def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>; def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> { let Latency = 6; let ResourceCycles = [12]; let NumMicroOps = 7; } def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>; def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> { let Latency = 10; let ResourceCycles = [17]; let NumMicroOps = 11; } def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>; defm : PdWriteResExPair; // Conditional move. def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> { let Latency = 5; let ResourceCycles = [3, 3]; let NumMicroOps = 2; } def PdWriteCMOVmVar : SchedWriteVariant<[ SchedVar>, [PdWriteCMOVm]>, SchedVar>, [PdWriteCMOVm]>, SchedVar>, [PdWriteCMOVm]>, SchedVar>, [PdWriteCMOVm]>, SchedVar>, [PdWriteCMOVm]>, SchedVar>, [PdWriteCMOVm]>, SchedVar ]>; def : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; defm : PdWriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes; def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> { let ResourceCycles = [2]; let NumMicroOps = 2; } def PdSETGEmSETGmSETLEmSETLm : SchedWriteVariant<[ SchedVar>, [PdWriteSETGEmSETGmSETLEmSETLm]>, SchedVar>, [PdWriteSETGEmSETGmSETLEmSETLm]>, SchedVar>, [PdWriteSETGEmSETGmSETLEmSETLm]>, SchedVar>, [PdWriteSETGEmSETGmSETLEmSETLm]>, SchedVar ]>; def : InstRW<[PdSETGEmSETGmSETLEmSETLm], (instrs SETCCm)>; defm : PdWriteRes; def PdWriteLAHF : SchedWriteRes<[PdEX01]> { let Latency = 2; let ResourceCycles = [4]; let NumMicroOps = 4; } def : InstRW<[PdWriteLAHF], (instrs LAHF)>; def PdWriteSAHF : SchedWriteRes<[PdEX01]> { let Latency = 2; let ResourceCycles = [2]; let NumMicroOps = 2; } def : InstRW<[PdWriteSAHF], (instrs SAHF)>; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; def PdWriteBTSIm : SchedWriteRes<[PdEX01, PdLoad]> { let Latency = 7; let ResourceCycles = [42, 1]; let NumMicroOps = 4; } def : SchedAlias; def PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> { let Latency = 7; let ResourceCycles = [44, 1]; let NumMicroOps = 10; } def : SchedAlias; // This is for simple LEAs with one or two input operands. // FIXME: SAGU 3-operand LEA def : WriteRes { let NumMicroOps = 2; } // Bit counts. defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; // BMI1 BEXTR, BMI2 BZHI defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; def PdWriteBEXTRI : SchedWriteRes<[PdEX01]> { let Latency = 2; let ResourceCycles = [4]; let NumMicroOps = 2; } def : InstRW<[PdWriteBEXTRI], (instrs BEXTRI32ri, BEXTRI64ri)>; def PdWriteBEXTRIm : SchedWriteRes<[PdEX01]> { let Latency = 2; let ResourceCycles = [5]; let NumMicroOps = 2; } def : InstRW<[PdWriteBEXTRIm], (instrs BEXTRI32mi, BEXTRI64mi)>; //////////////////////////////////////////////////////////////////////////////// // Integer shifts and rotates. //////////////////////////////////////////////////////////////////////////////// defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; defm : PdWriteResExPair; def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> { let Latency = 12; let ResourceCycles = [24]; let NumMicroOps = 26; } def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>; def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> { let Latency = 12; let ResourceCycles = [23]; let NumMicroOps = 23; } def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>; def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> { let Latency = 11; let ResourceCycles = [22]; let NumMicroOps = 24; } def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>; def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> { let Latency = 10; let ResourceCycles = [20]; let NumMicroOps = 22; } def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>; def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> { let Latency = 10; let ResourceCycles = [19]; let NumMicroOps = 19; } def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>; def PdWriteRCL3264rCL : SchedWriteRes<[PdEX01]> { let Latency = 7; let ResourceCycles = [14]; let NumMicroOps = 17; } def : InstRW<[PdWriteRCL3264rCL], (instrs RCL32rCL, RCL64rCL)>; def PdWriteRCR3264rCL : SchedWriteRes<[PdEX01]> { let Latency = 7; let ResourceCycles = [13]; let NumMicroOps = 16; } def : InstRW<[PdWriteRCR3264rCL], (instrs RCR32rCL, RCR64rCL)>; def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> { let Latency = 7; let ResourceCycles = [14]; let NumMicroOps = 15; } def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>; def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> { let Latency = 9; let ResourceCycles = [18]; let NumMicroOps = 20; } def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>; def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> { let Latency = 11; let ResourceCycles = [21]; let NumMicroOps = 21; } def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>; def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> { let Latency = 8; let ResourceCycles = [15]; let NumMicroOps = 16; } def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>; def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> { let Latency = 13; let ResourceCycles = [25]; let NumMicroOps = 25; } def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>; // SHLD/SHRD. defm : PdWriteRes; defm : PdWriteRes; def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> { let Latency = 3; let ResourceCycles = [6]; let NumMicroOps = 6; } def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>; def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> { let Latency = 3; let ResourceCycles = [6]; let NumMicroOps = 7; } def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL, SHLD32rrCL, SHRD32rrCL)>; defm : PdWriteRes; defm : PdWriteRes; //////////////////////////////////////////////////////////////////////////////// // Floating point. This covers both scalar and vector operations. //////////////////////////////////////////////////////////////////////////////// defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU23, PdFPSTO]> { let Latency = 2; let ResourceCycles = [1, 3, 1]; let NumMicroOps = 2; } def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>; def PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { let NumMicroOps = 8; } def : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; def PdWriteX87Add: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { let Latency = 5; let ResourceCycles = [3, 1, 10]; } def : InstRW<[PdWriteX87Add], (instrs ADD_FI16m, ADD_FI32m, ADD_F32m, ADD_F64m, SUB_FI16m, SUB_FI32m, SUB_F32m, SUB_F64m, SUBR_FI16m, SUBR_FI32m, SUBR_F32m, SUBR_F64m)>; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> { let Latency = 6; } def : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>; def PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>; def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; def PdWriteX87Mul: SchedWriteRes<[PdLoad, PdFPU1, PdFPFMA]> { let Latency = 5; let ResourceCycles = [3, 1, 10]; } def : InstRW<[PdWriteX87Mul], (instrs MUL_FI16m, MUL_FI32m, MUL_F32m, MUL_F64m)>; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> { let Latency = 27; let ResourceCycles = [1, 14]; let NumMicroOps = 17; } def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; def PdWriteX87Div: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { let Latency = 9; let ResourceCycles = [3, 1, 18]; } def : InstRW<[PdWriteX87Div], (instrs DIV_FI16m, DIV_FI32m, DIVR_FI16m, DIVR_FI32m, DIV_F32m, DIV_F64m, DIVR_F32m, DIVR_F64m)>; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; def PdWriteVFRCZP : SchedWriteRes<[PdFPU1, PdFPSTO]> { let Latency = 10; let ResourceCycles = [2, 1]; let NumMicroOps = 2; } def : InstRW<[PdWriteVFRCZP], (instrs VFRCZPDrr, VFRCZPSrr)>; def PdWriteVFRCZS : SchedWriteRes<[PdFPU1, PdFPSTO]> { let Latency = 10; let ResourceCycles = [10, 1]; let NumMicroOps = 2; } def : InstRW<[PdWriteVFRCZS], (instrs VFRCZSDrr, VFRCZSSrr)>; def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> { let Latency = 15; let ResourceCycles = [2, 1]; let NumMicroOps = 3; } def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm, VFRCZSDrm, VFRCZSSrm)>; def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> { let Latency = 10; let ResourceCycles = [3, 1]; let NumMicroOps = 4; } def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>; def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> { let Latency = 15; let ResourceCycles = [4, 1]; let NumMicroOps = 8; } def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> { let Latency = 7; let ResourceCycles = [1, 3]; let NumMicroOps = 2; } def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { let Latency = 2; let ResourceCycles = [1, 2]; } def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>; def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> { let Latency = 7; let ResourceCycles = [1, 4]; let NumMicroOps = 2; } def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>; def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { let Latency = 4; let ResourceCycles = [1, 6]; let NumMicroOps = 8; } def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>; def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> { let Latency = 8; // 4 + 4 let ResourceCycles = [1, 8]; let NumMicroOps = 10; } def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>; //////////////////////////////////////////////////////////////////////////////// // Conversions. //////////////////////////////////////////////////////////////////////////////// defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[PdWriteMMX_CVTTPD2PIirr], (instrs MMX_CVTTPD2PIirr)>; // FIXME: f+3 ST, LD+STC latency defm : PdWriteResXMMPair; // FIXME: .Folded version is one NumMicroOp *less*.. defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; // FIXME: .Folded version is one NumMicroOp *less*.. def PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { let Latency = 13; let ResourceCycles = [1, 3, 1]; let NumMicroOps = 2; } def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; def PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr, MMX_CVTPI2PDirr)>; def PdWriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { let Latency = 4; let NumMicroOps = 2; } def : InstRW<[PdWriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteRes; defm : PdWriteRes; defm : X86WriteResUnsupported; defm : PdWriteRes; defm : PdWriteRes; defm : X86WriteResUnsupported; //////////////////////////////////////////////////////////////////////////////// // Vector integer operations. //////////////////////////////////////////////////////////////////////////////// defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { let NumMicroOps = 8; } def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> { } def : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>; def PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> { let Latency = 4; } def : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; def PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPMMA, PdFPMAL]> { let Latency = 4; } def : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr, VPMACSSDQLrr)>; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; def PdWriteVMPSADBW : SchedWriteRes<[PdFPU0, PdFPMMA]> { let Latency = 8; let ResourceCycles = [1, 4]; let NumMicroOps = 10; } def : InstRW<[PdWriteVMPSADBW], (instrs VMPSADBWrri)>; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; def PdWriteVPPERM : SchedWriteRes<[PdFPU01, PdFPMAL]> { let Latency = 2; let ResourceCycles = [1, 3]; } def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; //////////////////////////////////////////////////////////////////////////////// // Vector insert/extract operations. //////////////////////////////////////////////////////////////////////////////// defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { let Latency = 3; let ResourceCycles = [1, 3]; } def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>; //////////////////////////////////////////////////////////////////////////////// // SSE42 String instructions. //////////////////////////////////////////////////////////////////////////////// defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; //////////////////////////////////////////////////////////////////////////////// // MOVMSK Instructions. //////////////////////////////////////////////////////////////////////////////// defm : PdWriteRes; defm : PdWriteRes; defm : X86WriteResUnsupported; // defm : X86WriteResUnsupported; defm : PdWriteRes; //////////////////////////////////////////////////////////////////////////////// // AES Instructions. //////////////////////////////////////////////////////////////////////////////// defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; //////////////////////////////////////////////////////////////////////////////// // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; def : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr, PHADDWrr, PHSUBWrr, PHADDSWrr, PHSUBSWrr, VPHADDDrr, VPHSUBDrr, VPHADDWrr, VPHSUBWrr, VPHADDSWrr, VPHSUBSWrr)>; def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm, PHADDWrm, PHSUBWrm, PHADDSWrm, PHSUBSWrm, VPHADDDrm, VPHSUBDrm, VPHADDWrm, VPHSUBWrm, VPHADDSWrm, VPHSUBSWrm)>; //////////////////////////////////////////////////////////////////////////////// // Carry-less multiplication instructions. //////////////////////////////////////////////////////////////////////////////// defm : PdWriteResXMMPair; def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> { let Latency = 12; let ResourceCycles = [1, 7]; let NumMicroOps = 6; } def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>; //////////////////////////////////////////////////////////////////////////////// // SSE4A instructions. //////////////////////////////////////////////////////////////////////////////// def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { let Latency = 3; let ResourceCycles = [1, 2]; } def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ)>; def PdWriteINSERTQI : SchedWriteRes<[PdFPU01, PdFPMAL]> { let Latency = 3; let ResourceCycles = [1, 3]; } def : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>; //////////////////////////////////////////////////////////////////////////////// // AVX instructions. //////////////////////////////////////////////////////////////////////////////// def PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> { let Latency = 6; let ResourceCycles = [1, 2, 4]; let NumMicroOps = 2; } def : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, VBROADCASTSSYrm)>; def PdWriteVZEROALL : SchedWriteRes<[]> { let Latency = 90; let NumMicroOps = 32; } def : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>; def PdWriteVZEROUPPER : SchedWriteRes<[]> { let Latency = 46; let NumMicroOps = 16; } def : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>; /////////////////////////////////////////////////////////////////////////////// // SchedWriteVariant definitions. /////////////////////////////////////////////////////////////////////////////// def PdWriteZeroLatency : SchedWriteRes<[]> { let Latency = 0; } def PdWriteZeroIdiom : SchedWriteVariant<[ SchedVar, [PdWriteZeroLatency]>, SchedVar, [WriteALU]> ]>; def : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr, XOR32rr, XOR64rr)>; def PdWriteFZeroIdiom : SchedWriteVariant<[ SchedVar, [PdWriteZeroLatency]>, SchedVar, [WriteFLogic]> ]>; def : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, VXORPDrr, ANDNPSrr, VANDNPSrr, ANDNPDrr, VANDNPDrr)>; // VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1. def PdWriteVZeroIdiomLogic : SchedWriteVariant<[ SchedVar, [PdWriteZeroLatency]>, SchedVar, [WriteVecLogic]> ]>; def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>; def PdWriteVZeroIdiomLogicX : SchedWriteVariant<[ SchedVar, [PdWriteZeroLatency]>, SchedVar, [WriteVecLogicX]> ]>; def : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, PANDNrr, VPANDNrr)>; def PdWriteVZeroIdiomALU : SchedWriteVariant<[ SchedVar, [PdWriteZeroLatency]>, SchedVar, [WriteVecALU]> ]>; def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr, MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr)>; def PdWriteVZeroIdiomALUX : SchedWriteVariant<[ SchedVar, [PdWriteZeroLatency]>, SchedVar, [WriteVecALUX]> ]>; def : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, PSUBDrr, VPSUBDrr, PSUBQrr, VPSUBQrr, PSUBWrr, VPSUBWrr, PCMPGTBrr, VPCMPGTBrr, PCMPGTDrr, VPCMPGTDrr, PCMPGTWrr, VPCMPGTWrr)>; /////////////////////////////////////////////////////////////////////////////// // Dependency breaking instructions. /////////////////////////////////////////////////////////////////////////////// // VPCMPGTQ, but not PCMPGTQ! def : IsZeroIdiomFunction<[ // GPR Zero-idioms. DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, // MMX Zero-idioms. DepBreakingClass<[ MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr, MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr, MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr, MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr ], ZeroIdiomPredicate>, // SSE Zero-idioms. DepBreakingClass<[ // fp variants. XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr, // int variants. PXORrr, PANDNrr, PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, PCMPGTBrr, PCMPGTDrr, PCMPGTWrr ], ZeroIdiomPredicate>, // AVX Zero-idioms. DepBreakingClass<[ // xmm fp variants. VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr, // xmm int variants. VPXORrr, VPANDNrr, VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr, VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, // ymm variants. VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr ], ZeroIdiomPredicate> ]>; def : IsDepBreakingFunction<[ // GPR DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>, DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >, // MMX DepBreakingClass<[ MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr ], ZeroIdiomPredicate>, // SSE DepBreakingClass<[ PCMPEQBrr, PCMPEQWrr, PCMPEQDrr // But not PCMPEQQrr. ], ZeroIdiomPredicate>, // AVX DepBreakingClass<[ VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr // But not VPCMPEQQrr. ], ZeroIdiomPredicate> ]>; } // SchedModel