1//===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// InstrSchedModel annotations for out-of-order CPUs. 11 12// Instructions with folded loads need to read the memory operand immediately, 13// but other register operands don't have to be read until the load is ready. 14// These operands are marked with ReadAfterLd. 15def ReadAfterLd : SchedRead; 16def ReadAfterVecLd : SchedRead; 17def ReadAfterVecXLd : SchedRead; 18def ReadAfterVecYLd : SchedRead; 19 20// Instructions that move data between general purpose registers and vector 21// registers may be subject to extra latency due to data bypass delays. 22// This SchedRead describes a bypass delay caused by data being moved from the 23// integer unit to the floating point unit. 24def ReadInt2Fpu : SchedRead; 25 26// Instructions with both a load and a store folded are modeled as a folded 27// load + WriteRMW. 28def WriteRMW : SchedWrite; 29 30// Helper to set SchedWrite ExePorts/Latency/ResourceCycles/NumMicroOps. 31multiclass X86WriteRes<SchedWrite SchedRW, 32 list<ProcResourceKind> ExePorts, 33 int Lat, list<int> Res, int UOps> { 34 def : WriteRes<SchedRW, ExePorts> { 35 let Latency = Lat; 36 let ResourceCycles = Res; 37 let NumMicroOps = UOps; 38 } 39} 40 41// Most instructions can fold loads, so almost every SchedWrite comes in two 42// variants: With and without a folded load. 43// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite 44// with a folded load. 45class X86FoldableSchedWrite : SchedWrite { 46 // The SchedWrite to use when a load is folded into the instruction. 47 SchedWrite Folded; 48 // The SchedRead to tag register operands than don't need to be ready 49 // until the folded load has completed. 50 SchedRead ReadAfterFold; 51} 52 53// Multiclass that produces a linked pair of SchedWrites. 54multiclass X86SchedWritePair<SchedRead ReadAfter = ReadAfterLd> { 55 // Register-Memory operation. 56 def Ld : SchedWrite; 57 // Register-Register operation. 58 def NAME : X86FoldableSchedWrite { 59 let Folded = !cast<SchedWrite>(NAME#"Ld"); 60 let ReadAfterFold = ReadAfter; 61 } 62} 63 64// Helpers to mark SchedWrites as unsupported. 65multiclass X86WriteResUnsupported<SchedWrite SchedRW> { 66 let Unsupported = 1 in { 67 def : WriteRes<SchedRW, []>; 68 } 69} 70multiclass X86WriteResPairUnsupported<X86FoldableSchedWrite SchedRW> { 71 let Unsupported = 1 in { 72 def : WriteRes<SchedRW, []>; 73 def : WriteRes<SchedRW.Folded, []>; 74 } 75} 76 77// Multiclass that wraps X86FoldableSchedWrite for each vector width. 78class X86SchedWriteWidths<X86FoldableSchedWrite sScl, 79 X86FoldableSchedWrite s128, 80 X86FoldableSchedWrite s256, 81 X86FoldableSchedWrite s512> { 82 X86FoldableSchedWrite Scl = sScl; // Scalar float/double operations. 83 X86FoldableSchedWrite MMX = sScl; // MMX operations. 84 X86FoldableSchedWrite XMM = s128; // XMM operations. 85 X86FoldableSchedWrite YMM = s256; // YMM operations. 86 X86FoldableSchedWrite ZMM = s512; // ZMM operations. 87} 88 89// Multiclass that wraps X86SchedWriteWidths for each fp vector type. 90class X86SchedWriteSizes<X86SchedWriteWidths sPS, 91 X86SchedWriteWidths sPD> { 92 X86SchedWriteWidths PS = sPS; 93 X86SchedWriteWidths PD = sPD; 94} 95 96// Multiclass that wraps move/load/store triple for a vector width. 97class X86SchedWriteMoveLS<SchedWrite MoveRR, 98 SchedWrite LoadRM, 99 SchedWrite StoreMR> { 100 SchedWrite RR = MoveRR; 101 SchedWrite RM = LoadRM; 102 SchedWrite MR = StoreMR; 103} 104 105// Multiclass that wraps X86SchedWriteMoveLS for each vector width. 106class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl, 107 X86SchedWriteMoveLS s128, 108 X86SchedWriteMoveLS s256, 109 X86SchedWriteMoveLS s512> { 110 X86SchedWriteMoveLS Scl = sScl; // Scalar float/double operations. 111 X86SchedWriteMoveLS MMX = sScl; // MMX operations. 112 X86SchedWriteMoveLS XMM = s128; // XMM operations. 113 X86SchedWriteMoveLS YMM = s256; // YMM operations. 114 X86SchedWriteMoveLS ZMM = s512; // ZMM operations. 115} 116 117// Loads, stores, and moves, not folded with other operations. 118def WriteLoad : SchedWrite; 119def WriteStore : SchedWrite; 120def WriteStoreNT : SchedWrite; 121def WriteMove : SchedWrite; 122def WriteCopy : WriteSequence<[WriteLoad, WriteStore]>; // mem->mem copy 123 124// Arithmetic. 125defm WriteALU : X86SchedWritePair; // Simple integer ALU op. 126defm WriteADC : X86SchedWritePair; // Integer ALU + flags op. 127def WriteALURMW : WriteSequence<[WriteALULd, WriteRMW]>; 128def WriteADCRMW : WriteSequence<[WriteADCLd, WriteRMW]>; 129def WriteLEA : SchedWrite; // LEA instructions can't fold loads. 130 131// Integer multiplication 132defm WriteIMul8 : X86SchedWritePair; // Integer 8-bit multiplication. 133defm WriteIMul16 : X86SchedWritePair; // Integer 16-bit multiplication. 134defm WriteIMul16Imm : X86SchedWritePair; // Integer 16-bit multiplication by immediate. 135defm WriteIMul16Reg : X86SchedWritePair; // Integer 16-bit multiplication by register. 136defm WriteIMul32 : X86SchedWritePair; // Integer 32-bit multiplication. 137defm WriteIMul32Imm : X86SchedWritePair; // Integer 32-bit multiplication by immediate. 138defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by register. 139defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication. 140defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate. 141defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register. 142def WriteIMulH : SchedWrite; // Integer multiplication, high part. 143 144def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap. 145def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap. 146defm WriteCMPXCHG : X86SchedWritePair; // Compare and set, compare and swap. 147def WriteCMPXCHGRMW : SchedWrite; // Compare and set, compare and swap. 148def WriteXCHG : SchedWrite; // Compare+Exchange - TODO RMW support. 149 150// Integer division. 151defm WriteDiv8 : X86SchedWritePair; 152defm WriteDiv16 : X86SchedWritePair; 153defm WriteDiv32 : X86SchedWritePair; 154defm WriteDiv64 : X86SchedWritePair; 155defm WriteIDiv8 : X86SchedWritePair; 156defm WriteIDiv16 : X86SchedWritePair; 157defm WriteIDiv32 : X86SchedWritePair; 158defm WriteIDiv64 : X86SchedWritePair; 159 160defm WriteBSF : X86SchedWritePair; // Bit scan forward. 161defm WriteBSR : X86SchedWritePair; // Bit scan reverse. 162defm WritePOPCNT : X86SchedWritePair; // Bit population count. 163defm WriteLZCNT : X86SchedWritePair; // Leading zero count. 164defm WriteTZCNT : X86SchedWritePair; // Trailing zero count. 165defm WriteCMOV : X86SchedWritePair; // Conditional move. 166def WriteFCMOV : SchedWrite; // X87 conditional move. 167def WriteSETCC : SchedWrite; // Set register based on condition code. 168def WriteSETCCStore : SchedWrite; 169def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH. 170 171def WriteBitTest : SchedWrite; // Bit Test 172def WriteBitTestImmLd : SchedWrite; 173def WriteBitTestRegLd : SchedWrite; 174 175def WriteBitTestSet : SchedWrite; // Bit Test + Set 176def WriteBitTestSetImmLd : SchedWrite; 177def WriteBitTestSetRegLd : SchedWrite; 178def WriteBitTestSetImmRMW : WriteSequence<[WriteBitTestSetImmLd, WriteRMW]>; 179def WriteBitTestSetRegRMW : WriteSequence<[WriteBitTestSetRegLd, WriteRMW]>; 180 181// Integer shifts and rotates. 182defm WriteShift : X86SchedWritePair; 183defm WriteShiftCL : X86SchedWritePair; 184defm WriteRotate : X86SchedWritePair; 185defm WriteRotateCL : X86SchedWritePair; 186 187// Double shift instructions. 188def WriteSHDrri : SchedWrite; 189def WriteSHDrrcl : SchedWrite; 190def WriteSHDmri : SchedWrite; 191def WriteSHDmrcl : SchedWrite; 192 193// BMI1 BEXTR/BLS, BMI2 BZHI 194defm WriteBEXTR : X86SchedWritePair; 195defm WriteBLS : X86SchedWritePair; 196defm WriteBZHI : X86SchedWritePair; 197 198// Idioms that clear a register, like xorps %xmm0, %xmm0. 199// These can often bypass execution ports completely. 200def WriteZero : SchedWrite; 201 202// Branches don't produce values, so they have no latency, but they still 203// consume resources. Indirect branches can fold loads. 204defm WriteJump : X86SchedWritePair; 205 206// Floating point. This covers both scalar and vector operations. 207def WriteFLD0 : SchedWrite; 208def WriteFLD1 : SchedWrite; 209def WriteFLDC : SchedWrite; 210def WriteFLoad : SchedWrite; 211def WriteFLoadX : SchedWrite; 212def WriteFLoadY : SchedWrite; 213def WriteFMaskedLoad : SchedWrite; 214def WriteFMaskedLoadY : SchedWrite; 215def WriteFStore : SchedWrite; 216def WriteFStoreX : SchedWrite; 217def WriteFStoreY : SchedWrite; 218def WriteFStoreNT : SchedWrite; 219def WriteFStoreNTX : SchedWrite; 220def WriteFStoreNTY : SchedWrite; 221def WriteFMaskedStore : SchedWrite; 222def WriteFMaskedStoreY : SchedWrite; 223def WriteFMove : SchedWrite; 224def WriteFMoveX : SchedWrite; 225def WriteFMoveY : SchedWrite; 226 227defm WriteFAdd : X86SchedWritePair<ReadAfterVecLd>; // Floating point add/sub. 228defm WriteFAddX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point add/sub (XMM). 229defm WriteFAddY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (YMM). 230defm WriteFAddZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (ZMM). 231defm WriteFAdd64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double add/sub. 232defm WriteFAdd64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double add/sub (XMM). 233defm WriteFAdd64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (YMM). 234defm WriteFAdd64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (ZMM). 235defm WriteFCmp : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare. 236defm WriteFCmpX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point compare (XMM). 237defm WriteFCmpY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (YMM). 238defm WriteFCmpZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (ZMM). 239defm WriteFCmp64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double compare. 240defm WriteFCmp64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double compare (XMM). 241defm WriteFCmp64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (YMM). 242defm WriteFCmp64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (ZMM). 243defm WriteFCom : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare to flags. 244defm WriteFMul : X86SchedWritePair<ReadAfterVecLd>; // Floating point multiplication. 245defm WriteFMulX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point multiplication (XMM). 246defm WriteFMulY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM). 247defm WriteFMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM). 248defm WriteFMul64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double multiplication. 249defm WriteFMul64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double multiplication (XMM). 250defm WriteFMul64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (YMM). 251defm WriteFMul64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (ZMM). 252defm WriteFDiv : X86SchedWritePair<ReadAfterVecLd>; // Floating point division. 253defm WriteFDivX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point division (XMM). 254defm WriteFDivY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (YMM). 255defm WriteFDivZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (ZMM). 256defm WriteFDiv64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double division. 257defm WriteFDiv64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double division (XMM). 258defm WriteFDiv64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (YMM). 259defm WriteFDiv64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (ZMM). 260defm WriteFSqrt : X86SchedWritePair<ReadAfterVecLd>; // Floating point square root. 261defm WriteFSqrtX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point square root (XMM). 262defm WriteFSqrtY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point square root (YMM). 263defm WriteFSqrtZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point square root (ZMM). 264defm WriteFSqrt64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double square root. 265defm WriteFSqrt64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double square root (XMM). 266defm WriteFSqrt64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (YMM). 267defm WriteFSqrt64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (ZMM). 268defm WriteFSqrt80 : X86SchedWritePair<ReadAfterVecLd>; // Floating point long double square root. 269defm WriteFRcp : X86SchedWritePair<ReadAfterVecLd>; // Floating point reciprocal estimate. 270defm WriteFRcpX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal estimate (XMM). 271defm WriteFRcpY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (YMM). 272defm WriteFRcpZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (ZMM). 273defm WriteFRsqrt : X86SchedWritePair<ReadAfterVecLd>; // Floating point reciprocal square root estimate. 274defm WriteFRsqrtX: X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal square root estimate (XMM). 275defm WriteFRsqrtY: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (YMM). 276defm WriteFRsqrtZ: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (ZMM). 277defm WriteFMA : X86SchedWritePair<ReadAfterVecLd>; // Fused Multiply Add. 278defm WriteFMAX : X86SchedWritePair<ReadAfterVecXLd>; // Fused Multiply Add (XMM). 279defm WriteFMAY : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (YMM). 280defm WriteFMAZ : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (ZMM). 281defm WriteDPPD : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double dot product. 282defm WriteDPPS : X86SchedWritePair<ReadAfterVecXLd>; // Floating point single dot product. 283defm WriteDPPSY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (YMM). 284defm WriteDPPSZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (ZMM). 285defm WriteFSign : X86SchedWritePair<ReadAfterVecLd>; // Floating point fabs/fchs. 286defm WriteFRnd : X86SchedWritePair<ReadAfterVecXLd>; // Floating point rounding. 287defm WriteFRndY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (YMM). 288defm WriteFRndZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (ZMM). 289defm WriteFLogic : X86SchedWritePair<ReadAfterVecXLd>; // Floating point and/or/xor logicals. 290defm WriteFLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (YMM). 291defm WriteFLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (ZMM). 292defm WriteFTest : X86SchedWritePair<ReadAfterVecXLd>; // Floating point TEST instructions. 293defm WriteFTestY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (YMM). 294defm WriteFTestZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (ZMM). 295defm WriteFShuffle : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector shuffles. 296defm WriteFShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (YMM). 297defm WriteFShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (ZMM). 298defm WriteFVarShuffle : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector variable shuffles. 299defm WriteFVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (YMM). 300defm WriteFVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (ZMM). 301defm WriteFBlend : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector blends. 302defm WriteFBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (YMM). 303defm WriteFBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (ZMM). 304defm WriteFVarBlend : X86SchedWritePair<ReadAfterVecXLd>; // Fp vector variable blends. 305defm WriteFVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMM). 306defm WriteFVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMZMM). 307 308// FMA Scheduling helper class. 309class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } 310 311// Horizontal Add/Sub (float and integer) 312defm WriteFHAdd : X86SchedWritePair<ReadAfterVecXLd>; 313defm WriteFHAddY : X86SchedWritePair<ReadAfterVecYLd>; 314defm WriteFHAddZ : X86SchedWritePair<ReadAfterVecYLd>; 315defm WritePHAdd : X86SchedWritePair<ReadAfterVecLd>; 316defm WritePHAddX : X86SchedWritePair<ReadAfterVecXLd>; 317defm WritePHAddY : X86SchedWritePair<ReadAfterVecYLd>; 318defm WritePHAddZ : X86SchedWritePair<ReadAfterVecYLd>; 319 320// Vector integer operations. 321def WriteVecLoad : SchedWrite; 322def WriteVecLoadX : SchedWrite; 323def WriteVecLoadY : SchedWrite; 324def WriteVecLoadNT : SchedWrite; 325def WriteVecLoadNTY : SchedWrite; 326def WriteVecMaskedLoad : SchedWrite; 327def WriteVecMaskedLoadY : SchedWrite; 328def WriteVecStore : SchedWrite; 329def WriteVecStoreX : SchedWrite; 330def WriteVecStoreY : SchedWrite; 331def WriteVecStoreNT : SchedWrite; 332def WriteVecStoreNTY : SchedWrite; 333def WriteVecMaskedStore : SchedWrite; 334def WriteVecMaskedStoreY : SchedWrite; 335def WriteVecMove : SchedWrite; 336def WriteVecMoveX : SchedWrite; 337def WriteVecMoveY : SchedWrite; 338def WriteVecMoveToGpr : SchedWrite; 339def WriteVecMoveFromGpr : SchedWrite; 340 341defm WriteVecALU : X86SchedWritePair<ReadAfterVecLd>; // Vector integer ALU op, no logicals. 342defm WriteVecALUX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer ALU op, no logicals (XMM). 343defm WriteVecALUY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (YMM). 344defm WriteVecALUZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (ZMM). 345defm WriteVecLogic : X86SchedWritePair<ReadAfterVecLd>; // Vector integer and/or/xor logicals. 346defm WriteVecLogicX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer and/or/xor logicals (XMM). 347defm WriteVecLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (YMM). 348defm WriteVecLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (ZMM). 349defm WriteVecTest : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer TEST instructions. 350defm WriteVecTestY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer TEST instructions (YMM). 351defm WriteVecTestZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer TEST instructions (ZMM). 352defm WriteVecShift : X86SchedWritePair<ReadAfterVecLd>; // Vector integer shifts (default). 353defm WriteVecShiftX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer shifts (XMM). 354defm WriteVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (YMM). 355defm WriteVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (ZMM). 356defm WriteVecShiftImm : X86SchedWritePair<ReadAfterVecLd>; // Vector integer immediate shifts (default). 357defm WriteVecShiftImmX: X86SchedWritePair<ReadAfterVecXLd>; // Vector integer immediate shifts (XMM). 358defm WriteVecShiftImmY: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (YMM). 359defm WriteVecShiftImmZ: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (ZMM). 360defm WriteVecIMul : X86SchedWritePair<ReadAfterVecLd>; // Vector integer multiply (default). 361defm WriteVecIMulX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer multiply (XMM). 362defm WriteVecIMulY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (YMM). 363defm WriteVecIMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (ZMM). 364defm WritePMULLD : X86SchedWritePair<ReadAfterVecXLd>; // Vector PMULLD. 365defm WritePMULLDY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (YMM). 366defm WritePMULLDZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (ZMM). 367defm WriteShuffle : X86SchedWritePair<ReadAfterVecLd>; // Vector shuffles. 368defm WriteShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector shuffles (XMM). 369defm WriteShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (YMM). 370defm WriteShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (ZMM). 371defm WriteVarShuffle : X86SchedWritePair<ReadAfterVecLd>; // Vector variable shuffles. 372defm WriteVarShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable shuffles (XMM). 373defm WriteVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (YMM). 374defm WriteVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (ZMM). 375defm WriteBlend : X86SchedWritePair<ReadAfterVecXLd>; // Vector blends. 376defm WriteBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (YMM). 377defm WriteBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (ZMM). 378defm WriteVarBlend : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable blends. 379defm WriteVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (YMM). 380defm WriteVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (ZMM). 381defm WritePSADBW : X86SchedWritePair<ReadAfterVecLd>; // Vector PSADBW. 382defm WritePSADBWX : X86SchedWritePair<ReadAfterVecXLd>; // Vector PSADBW (XMM). 383defm WritePSADBWY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (YMM). 384defm WritePSADBWZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (ZMM). 385defm WriteMPSAD : X86SchedWritePair<ReadAfterVecXLd>; // Vector MPSAD. 386defm WriteMPSADY : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (YMM). 387defm WriteMPSADZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (ZMM). 388defm WritePHMINPOS : X86SchedWritePair<ReadAfterVecXLd>; // Vector PHMINPOS. 389 390// Vector insert/extract operations. 391defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element. 392def WriteVecExtract : SchedWrite; // Extract vector element to gpr. 393def WriteVecExtractSt : SchedWrite; // Extract vector element and store. 394 395// MOVMSK operations. 396def WriteFMOVMSK : SchedWrite; 397def WriteVecMOVMSK : SchedWrite; 398def WriteVecMOVMSKY : SchedWrite; 399def WriteMMXMOVMSK : SchedWrite; 400 401// Conversion between integer and float. 402defm WriteCvtSD2I : X86SchedWritePair<ReadAfterVecLd>; // Double -> Integer. 403defm WriteCvtPD2I : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Integer (XMM). 404defm WriteCvtPD2IY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (YMM). 405defm WriteCvtPD2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (ZMM). 406 407defm WriteCvtSS2I : X86SchedWritePair<ReadAfterVecLd>; // Float -> Integer. 408defm WriteCvtPS2I : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Integer (XMM). 409defm WriteCvtPS2IY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (YMM). 410defm WriteCvtPS2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (ZMM). 411 412defm WriteCvtI2SD : X86SchedWritePair<ReadAfterVecLd>; // Integer -> Double. 413defm WriteCvtI2PD : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Double (XMM). 414defm WriteCvtI2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (YMM). 415defm WriteCvtI2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (ZMM). 416 417defm WriteCvtI2SS : X86SchedWritePair<ReadAfterVecLd>; // Integer -> Float. 418defm WriteCvtI2PS : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Float (XMM). 419defm WriteCvtI2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (YMM). 420defm WriteCvtI2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (ZMM). 421 422defm WriteCvtSS2SD : X86SchedWritePair<ReadAfterVecLd>; // Float -> Double size conversion. 423defm WriteCvtPS2PD : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Double size conversion (XMM). 424defm WriteCvtPS2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (YMM). 425defm WriteCvtPS2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (ZMM). 426 427defm WriteCvtSD2SS : X86SchedWritePair<ReadAfterVecLd>; // Double -> Float size conversion. 428defm WriteCvtPD2PS : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Float size conversion (XMM). 429defm WriteCvtPD2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (YMM). 430defm WriteCvtPD2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (ZMM). 431 432defm WriteCvtPH2PS : X86SchedWritePair<ReadAfterVecXLd>; // Half -> Float size conversion. 433defm WriteCvtPH2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (YMM). 434defm WriteCvtPH2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (ZMM). 435 436def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion. 437def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM). 438def WriteCvtPS2PHZ : SchedWrite; // // Float -> Half size conversion (ZMM). 439def WriteCvtPS2PHSt : SchedWrite; // // Float -> Half + store size conversion. 440def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM). 441def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM). 442 443// CRC32 instruction. 444defm WriteCRC32 : X86SchedWritePair<ReadAfterLd>; 445 446// Strings instructions. 447// Packed Compare Implicit Length Strings, Return Mask 448defm WritePCmpIStrM : X86SchedWritePair<ReadAfterVecXLd>; 449// Packed Compare Explicit Length Strings, Return Mask 450defm WritePCmpEStrM : X86SchedWritePair<ReadAfterVecXLd>; 451// Packed Compare Implicit Length Strings, Return Index 452defm WritePCmpIStrI : X86SchedWritePair<ReadAfterVecXLd>; 453// Packed Compare Explicit Length Strings, Return Index 454defm WritePCmpEStrI : X86SchedWritePair<ReadAfterVecXLd>; 455 456// AES instructions. 457defm WriteAESDecEnc : X86SchedWritePair<ReadAfterVecXLd>; // Decryption, encryption. 458defm WriteAESIMC : X86SchedWritePair<ReadAfterVecXLd>; // InvMixColumn. 459defm WriteAESKeyGen : X86SchedWritePair<ReadAfterVecXLd>; // Key Generation. 460 461// Carry-less multiplication instructions. 462defm WriteCLMul : X86SchedWritePair<ReadAfterVecXLd>; 463 464// EMMS/FEMMS 465def WriteEMMS : SchedWrite; 466 467// Load/store MXCSR 468def WriteLDMXCSR : SchedWrite; 469def WriteSTMXCSR : SchedWrite; 470 471// Catch-all for expensive system instructions. 472def WriteSystem : SchedWrite; 473 474// AVX2. 475defm WriteFShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width vector shuffles. 476defm WriteFVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width variable shuffles. 477defm WriteShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector shuffles. 478defm WriteVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector variable shuffles. 479defm WriteVarVecShift : X86SchedWritePair<ReadAfterVecXLd>; // Variable vector shifts. 480defm WriteVarVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (YMM). 481defm WriteVarVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (ZMM). 482 483// Old microcoded instructions that nobody use. 484def WriteMicrocoded : SchedWrite; 485 486// Fence instructions. 487def WriteFence : SchedWrite; 488 489// Nop, not very useful expect it provides a model for nops! 490def WriteNop : SchedWrite; 491 492// Move/Load/Store wrappers. 493def WriteFMoveLS 494 : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStore>; 495def WriteFMoveLSX 496 : X86SchedWriteMoveLS<WriteFMoveX, WriteFLoadX, WriteFStoreX>; 497def WriteFMoveLSY 498 : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreY>; 499def SchedWriteFMoveLS 500 : X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX, 501 WriteFMoveLSY, WriteFMoveLSY>; 502 503def WriteFMoveLSNT 504 : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>; 505def WriteFMoveLSNTX 506 : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>; 507def WriteFMoveLSNTY 508 : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>; 509def SchedWriteFMoveLSNT 510 : X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX, 511 WriteFMoveLSNTY, WriteFMoveLSNTY>; 512 513def WriteVecMoveLS 514 : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>; 515def WriteVecMoveLSX 516 : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadX, WriteVecStoreX>; 517def WriteVecMoveLSY 518 : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadY, WriteVecStoreY>; 519def SchedWriteVecMoveLS 520 : X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX, 521 WriteVecMoveLSY, WriteVecMoveLSY>; 522 523def WriteVecMoveLSNT 524 : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>; 525def WriteVecMoveLSNTX 526 : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>; 527def WriteVecMoveLSNTY 528 : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>; 529def SchedWriteVecMoveLSNT 530 : X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX, 531 WriteVecMoveLSNTY, WriteVecMoveLSNTY>; 532 533// Vector width wrappers. 534def SchedWriteFAdd 535 : X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>; 536def SchedWriteFAdd64 537 : X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Z>; 538def SchedWriteFHAdd 539 : X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddZ>; 540def SchedWriteFCmp 541 : X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpZ>; 542def SchedWriteFCmp64 543 : X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Z>; 544def SchedWriteFMul 545 : X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulZ>; 546def SchedWriteFMul64 547 : X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Z>; 548def SchedWriteFMA 549 : X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAZ>; 550def SchedWriteDPPD 551 : X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>; 552def SchedWriteDPPS 553 : X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSZ>; 554def SchedWriteFDiv 555 : X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>; 556def SchedWriteFDiv64 557 : X86SchedWriteWidths<WriteFDiv64, WriteFDiv64X, WriteFDiv64Y, WriteFDiv64Z>; 558def SchedWriteFSqrt 559 : X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX, 560 WriteFSqrtY, WriteFSqrtZ>; 561def SchedWriteFSqrt64 562 : X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X, 563 WriteFSqrt64Y, WriteFSqrt64Z>; 564def SchedWriteFRcp 565 : X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpZ>; 566def SchedWriteFRsqrt 567 : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtZ>; 568def SchedWriteFRnd 569 : X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndZ>; 570def SchedWriteFLogic 571 : X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicZ>; 572def SchedWriteFTest 573 : X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestZ>; 574 575def SchedWriteFShuffle 576 : X86SchedWriteWidths<WriteFShuffle, WriteFShuffle, 577 WriteFShuffleY, WriteFShuffleZ>; 578def SchedWriteFVarShuffle 579 : X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle, 580 WriteFVarShuffleY, WriteFVarShuffleZ>; 581def SchedWriteFBlend 582 : X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendZ>; 583def SchedWriteFVarBlend 584 : X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend, 585 WriteFVarBlendY, WriteFVarBlendZ>; 586 587def SchedWriteCvtDQ2PD 588 : X86SchedWriteWidths<WriteCvtI2SD, WriteCvtI2PD, 589 WriteCvtI2PDY, WriteCvtI2PDZ>; 590def SchedWriteCvtDQ2PS 591 : X86SchedWriteWidths<WriteCvtI2SS, WriteCvtI2PS, 592 WriteCvtI2PSY, WriteCvtI2PSZ>; 593def SchedWriteCvtPD2DQ 594 : X86SchedWriteWidths<WriteCvtSD2I, WriteCvtPD2I, 595 WriteCvtPD2IY, WriteCvtPD2IZ>; 596def SchedWriteCvtPS2DQ 597 : X86SchedWriteWidths<WriteCvtSS2I, WriteCvtPS2I, 598 WriteCvtPS2IY, WriteCvtPS2IZ>; 599def SchedWriteCvtPS2PD 600 : X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD, 601 WriteCvtPS2PDY, WriteCvtPS2PDZ>; 602def SchedWriteCvtPD2PS 603 : X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS, 604 WriteCvtPD2PSY, WriteCvtPD2PSZ>; 605 606def SchedWriteVecALU 607 : X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUZ>; 608def SchedWritePHAdd 609 : X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddZ>; 610def SchedWriteVecLogic 611 : X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX, 612 WriteVecLogicY, WriteVecLogicZ>; 613def SchedWriteVecTest 614 : X86SchedWriteWidths<WriteVecTest, WriteVecTest, 615 WriteVecTestY, WriteVecTestZ>; 616def SchedWriteVecShift 617 : X86SchedWriteWidths<WriteVecShift, WriteVecShiftX, 618 WriteVecShiftY, WriteVecShiftZ>; 619def SchedWriteVecShiftImm 620 : X86SchedWriteWidths<WriteVecShiftImm, WriteVecShiftImmX, 621 WriteVecShiftImmY, WriteVecShiftImmZ>; 622def SchedWriteVarVecShift 623 : X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift, 624 WriteVarVecShiftY, WriteVarVecShiftZ>; 625def SchedWriteVecIMul 626 : X86SchedWriteWidths<WriteVecIMul, WriteVecIMulX, 627 WriteVecIMulY, WriteVecIMulZ>; 628def SchedWritePMULLD 629 : X86SchedWriteWidths<WritePMULLD, WritePMULLD, 630 WritePMULLDY, WritePMULLDZ>; 631def SchedWriteMPSAD 632 : X86SchedWriteWidths<WriteMPSAD, WriteMPSAD, 633 WriteMPSADY, WriteMPSADZ>; 634def SchedWritePSADBW 635 : X86SchedWriteWidths<WritePSADBW, WritePSADBWX, 636 WritePSADBWY, WritePSADBWZ>; 637 638def SchedWriteShuffle 639 : X86SchedWriteWidths<WriteShuffle, WriteShuffleX, 640 WriteShuffleY, WriteShuffleZ>; 641def SchedWriteVarShuffle 642 : X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX, 643 WriteVarShuffleY, WriteVarShuffleZ>; 644def SchedWriteBlend 645 : X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendZ>; 646def SchedWriteVarBlend 647 : X86SchedWriteWidths<WriteVarBlend, WriteVarBlend, 648 WriteVarBlendY, WriteVarBlendZ>; 649 650// Vector size wrappers. 651def SchedWriteFAddSizes 652 : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd64>; 653def SchedWriteFCmpSizes 654 : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp64>; 655def SchedWriteFMulSizes 656 : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>; 657def SchedWriteFDivSizes 658 : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv64>; 659def SchedWriteFSqrtSizes 660 : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>; 661def SchedWriteFLogicSizes 662 : X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic>; 663def SchedWriteFShuffleSizes 664 : X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle>; 665 666//===----------------------------------------------------------------------===// 667// Generic Processor Scheduler Models. 668 669// IssueWidth is analogous to the number of decode units. Core and its 670// descendents, including Nehalem and SandyBridge have 4 decoders. 671// Resources beyond the decoder operate on micro-ops and are bufferred 672// so adjacent micro-ops don't directly compete. 673// 674// MicroOpBufferSize > 1 indicates that RAW dependencies can be 675// decoded in the same cycle. The value 32 is a reasonably arbitrary 676// number of in-flight instructions. 677// 678// HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef 679// indicates high latency opcodes. Alternatively, InstrItinData 680// entries may be included here to define specific operand 681// latencies. Since these latencies are not used for pipeline hazards, 682// they do not need to be exact. 683// 684// The GenericX86Model contains no instruction schedules 685// and disables PostRAScheduler. 686class GenericX86Model : SchedMachineModel { 687 let IssueWidth = 4; 688 let MicroOpBufferSize = 32; 689 let LoadLatency = 4; 690 let HighLatency = 10; 691 let PostRAScheduler = 0; 692 let CompleteModel = 0; 693} 694 695def GenericModel : GenericX86Model; 696 697// Define a model with the PostRAScheduler enabled. 698def GenericPostRAModel : GenericX86Model { 699 let PostRAScheduler = 1; 700} 701