1//=- X86SchedSkylake.td - X86 Skylake Server Scheduling ------*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for Skylake Server to support 10// instruction scheduling and other instruction cost heuristics. 11// 12//===----------------------------------------------------------------------===// 13 14def SkylakeServerModel : SchedMachineModel { 15 // All x86 instructions are modeled as a single micro-op, and SKylake can 16 // decode 6 instructions per cycle. 17 let IssueWidth = 6; 18 let MicroOpBufferSize = 224; // Based on the reorder buffer. 19 let LoadLatency = 5; 20 let MispredictPenalty = 14; 21 22 // Based on the LSD (loop-stream detector) queue size and benchmarking data. 23 let LoopMicroOpBufferSize = 50; 24 25 // This flag is set to allow the scheduler to assign a default model to 26 // unrecognized opcodes. 27 let CompleteModel = 0; 28} 29 30let SchedModel = SkylakeServerModel in { 31 32// Skylake Server can issue micro-ops to 8 different ports in one cycle. 33 34// Ports 0, 1, 5, and 6 handle all computation. 35// Port 4 gets the data half of stores. Store data can be available later than 36// the store address, but since we don't model the latency of stores, we can 37// ignore that. 38// Ports 2 and 3 are identical. They handle loads and the address half of 39// stores. Port 7 can handle address calculations. 40def SKXPort0 : ProcResource<1>; 41def SKXPort1 : ProcResource<1>; 42def SKXPort2 : ProcResource<1>; 43def SKXPort3 : ProcResource<1>; 44def SKXPort4 : ProcResource<1>; 45def SKXPort5 : ProcResource<1>; 46def SKXPort6 : ProcResource<1>; 47def SKXPort7 : ProcResource<1>; 48 49// Many micro-ops are capable of issuing on multiple ports. 50def SKXPort01 : ProcResGroup<[SKXPort0, SKXPort1]>; 51def SKXPort23 : ProcResGroup<[SKXPort2, SKXPort3]>; 52def SKXPort237 : ProcResGroup<[SKXPort2, SKXPort3, SKXPort7]>; 53def SKXPort04 : ProcResGroup<[SKXPort0, SKXPort4]>; 54def SKXPort05 : ProcResGroup<[SKXPort0, SKXPort5]>; 55def SKXPort06 : ProcResGroup<[SKXPort0, SKXPort6]>; 56def SKXPort15 : ProcResGroup<[SKXPort1, SKXPort5]>; 57def SKXPort16 : ProcResGroup<[SKXPort1, SKXPort6]>; 58def SKXPort56 : ProcResGroup<[SKXPort5, SKXPort6]>; 59def SKXPort015 : ProcResGroup<[SKXPort0, SKXPort1, SKXPort5]>; 60def SKXPort056 : ProcResGroup<[SKXPort0, SKXPort5, SKXPort6]>; 61def SKXPort0156: ProcResGroup<[SKXPort0, SKXPort1, SKXPort5, SKXPort6]>; 62 63def SKXDivider : ProcResource<1>; // Integer division issued on port 0. 64// FP division and sqrt on port 0. 65def SKXFPDivider : ProcResource<1>; 66 67// 60 Entry Unified Scheduler 68def SKXPortAny : ProcResGroup<[SKXPort0, SKXPort1, SKXPort2, SKXPort3, SKXPort4, 69 SKXPort5, SKXPort6, SKXPort7]> { 70 let BufferSize=60; 71} 72 73// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 74// cycles after the memory operand. 75def : ReadAdvance<ReadAfterLd, 5>; 76 77// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available 78// until 5/6/7 cycles after the memory operand. 79def : ReadAdvance<ReadAfterVecLd, 5>; 80def : ReadAdvance<ReadAfterVecXLd, 6>; 81def : ReadAdvance<ReadAfterVecYLd, 7>; 82 83def : ReadAdvance<ReadInt2Fpu, 0>; 84 85// Many SchedWrites are defined in pairs with and without a folded load. 86// Instructions with folded loads are usually micro-fused, so they only appear 87// as two micro-ops when queued in the reservation station. 88// This multiclass defines the resource usage for variants with and without 89// folded loads. 90multiclass SKXWriteResPair<X86FoldableSchedWrite SchedRW, 91 list<ProcResourceKind> ExePorts, 92 int Lat, list<int> Res = [1], int UOps = 1, 93 int LoadLat = 5, int LoadUOps = 1> { 94 // Register variant is using a single cycle on ExePort. 95 def : WriteRes<SchedRW, ExePorts> { 96 let Latency = Lat; 97 let ReleaseAtCycles = Res; 98 let NumMicroOps = UOps; 99 } 100 101 // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to 102 // the latency (default = 5). 103 def : WriteRes<SchedRW.Folded, !listconcat([SKXPort23], ExePorts)> { 104 let Latency = !add(Lat, LoadLat); 105 let ReleaseAtCycles = !listconcat([1], Res); 106 let NumMicroOps = !add(UOps, LoadUOps); 107 } 108} 109 110// A folded store needs a cycle on port 4 for the store data, and an extra port 111// 2/3/7 cycle to recompute the address. 112def : WriteRes<WriteRMW, [SKXPort237,SKXPort4]>; 113 114// Arithmetic. 115defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op. 116defm : SKXWriteResPair<WriteADC, [SKXPort06], 1>; // Integer ALU + flags op. 117 118// Integer multiplication. 119defm : SKXWriteResPair<WriteIMul8, [SKXPort1], 3>; 120defm : SKXWriteResPair<WriteIMul16, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,2], 4>; 121defm : X86WriteRes<WriteIMul16Imm, [SKXPort1,SKXPort0156], 4, [1,1], 2>; 122defm : X86WriteRes<WriteIMul16ImmLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>; 123defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>; 124defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>; 125defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>; 126defm : SKXWriteResPair<WriteMULX32, [SKXPort1,SKXPort06,SKXPort0156], 3, [1,1,1], 3>; 127defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>; 128defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>; 129defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>; 130defm : SKXWriteResPair<WriteMULX64, [SKXPort1,SKXPort5], 3, [1,1], 2>; 131defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>; 132defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>; 133def SKXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; } 134def : WriteRes<WriteIMulHLd, []> { 135 let Latency = !add(SKXWriteIMulH.Latency, SkylakeServerModel.LoadLatency); 136} 137 138defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>; 139defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>; 140defm : X86WriteRes<WriteCMPXCHG,[SKXPort06, SKXPort0156], 5, [2,3], 5>; 141defm : X86WriteRes<WriteCMPXCHGRMW,[SKXPort23,SKXPort06,SKXPort0156,SKXPort237,SKXPort4], 8, [1,2,1,1,1], 6>; 142defm : X86WriteRes<WriteXCHG, [SKXPort0156], 2, [3], 3>; 143 144// TODO: Why isn't the SKXDivider used? 145defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; 146defm : X86WriteRes<WriteDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>; 147defm : X86WriteRes<WriteDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>; 148defm : X86WriteRes<WriteDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>; 149defm : X86WriteRes<WriteDiv16Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>; 150defm : X86WriteRes<WriteDiv32Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>; 151defm : X86WriteRes<WriteDiv64Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>; 152 153defm : X86WriteRes<WriteIDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1>; 154defm : X86WriteRes<WriteIDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>; 155defm : X86WriteRes<WriteIDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>; 156defm : X86WriteRes<WriteIDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>; 157defm : X86WriteRes<WriteIDiv8Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 158defm : X86WriteRes<WriteIDiv16Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 159defm : X86WriteRes<WriteIDiv32Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 160defm : X86WriteRes<WriteIDiv64Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 161 162defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>; 163 164def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads. 165 166defm : SKXWriteResPair<WriteCMOV, [SKXPort06], 1, [1], 1>; // Conditional move. 167defm : X86WriteRes<WriteFCMOV, [SKXPort1], 3, [1], 1>; // x87 conditional move. 168def : WriteRes<WriteSETCC, [SKXPort06]>; // Setcc. 169def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> { 170 let Latency = 2; 171 let NumMicroOps = 3; 172} 173defm : X86WriteRes<WriteLAHFSAHF, [SKXPort06], 1, [1], 1>; 174defm : X86WriteRes<WriteBitTest, [SKXPort06], 1, [1], 1>; 175defm : X86WriteRes<WriteBitTestImmLd, [SKXPort06,SKXPort23], 6, [1,1], 2>; 176defm : X86WriteRes<WriteBitTestRegLd, [SKXPort0156,SKXPort23], 6, [1,1], 2>; 177defm : X86WriteRes<WriteBitTestSet, [SKXPort06], 1, [1], 1>; 178defm : X86WriteRes<WriteBitTestSetImmLd, [SKXPort06,SKXPort23], 5, [1,1], 3>; 179defm : X86WriteRes<WriteBitTestSetRegLd, [SKXPort0156,SKXPort23], 5, [1,1], 2>; 180 181// Integer shifts and rotates. 182defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>; 183defm : SKXWriteResPair<WriteShiftCL, [SKXPort06], 3, [3], 3>; 184defm : SKXWriteResPair<WriteRotate, [SKXPort06], 1, [1], 1>; 185defm : SKXWriteResPair<WriteRotateCL, [SKXPort06], 3, [3], 3>; 186 187// SHLD/SHRD. 188defm : X86WriteRes<WriteSHDrri, [SKXPort1], 3, [1], 1>; 189defm : X86WriteRes<WriteSHDrrcl,[SKXPort1,SKXPort06,SKXPort0156], 6, [1, 2, 1], 4>; 190defm : X86WriteRes<WriteSHDmri, [SKXPort1,SKXPort23,SKXPort237,SKXPort0156], 9, [1, 1, 1, 1], 4>; 191defm : X86WriteRes<WriteSHDmrcl,[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156], 11, [1, 1, 1, 2, 1], 6>; 192 193// Bit counts. 194defm : SKXWriteResPair<WriteBSF, [SKXPort1], 3>; 195defm : SKXWriteResPair<WriteBSR, [SKXPort1], 3>; 196defm : SKXWriteResPair<WriteLZCNT, [SKXPort1], 3>; 197defm : SKXWriteResPair<WriteTZCNT, [SKXPort1], 3>; 198defm : SKXWriteResPair<WritePOPCNT, [SKXPort1], 3>; 199 200// BMI1 BEXTR/BLS, BMI2 BZHI 201defm : SKXWriteResPair<WriteBEXTR, [SKXPort06,SKXPort15], 2, [1,1], 2>; 202defm : SKXWriteResPair<WriteBLS, [SKXPort15], 1>; 203defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>; 204 205// Loads, stores, and moves, not folded with other operations. 206defm : X86WriteRes<WriteLoad, [SKXPort23], 5, [1], 1>; 207defm : X86WriteRes<WriteStore, [SKXPort237, SKXPort4], 1, [1,1], 1>; 208defm : X86WriteRes<WriteStoreNT, [SKXPort237, SKXPort4], 1, [1,1], 2>; 209defm : X86WriteRes<WriteMove, [SKXPort0156], 1, [1], 1>; 210 211// Model the effect of clobbering the read-write mask operand of the GATHER operation. 212// Does not cost anything by itself, only has latency, matching that of the WriteLoad, 213defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>; 214 215// Idioms that clear a register, like xorps %xmm0, %xmm0. 216// These can often bypass execution ports completely. 217def : WriteRes<WriteZero, []>; 218 219// Branches don't produce values, so they have no latency, but they still 220// consume resources. Indirect branches can fold loads. 221defm : SKXWriteResPair<WriteJump, [SKXPort06], 1>; 222 223// Floating point. This covers both scalar and vector operations. 224defm : X86WriteRes<WriteFLD0, [SKXPort05], 1, [1], 1>; 225defm : X86WriteRes<WriteFLD1, [SKXPort05], 1, [2], 2>; 226defm : X86WriteRes<WriteFLDC, [SKXPort05], 1, [2], 2>; 227defm : X86WriteRes<WriteFLoad, [SKXPort23], 5, [1], 1>; 228defm : X86WriteRes<WriteFLoadX, [SKXPort23], 6, [1], 1>; 229defm : X86WriteRes<WriteFLoadY, [SKXPort23], 7, [1], 1>; 230defm : X86WriteRes<WriteFMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>; 231defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>; 232defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 2>; 233defm : X86WriteRes<WriteFStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>; 234defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 235defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>; 236defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>; 237defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 238 239defm : X86WriteRes<WriteFMaskedStore32, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 240defm : X86WriteRes<WriteFMaskedStore32Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 241defm : X86WriteRes<WriteFMaskedStore64, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 242defm : X86WriteRes<WriteFMaskedStore64Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 243 244defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>; 245defm : X86WriteRes<WriteFMoveX, [SKXPort015], 1, [1], 1>; 246defm : X86WriteRes<WriteFMoveY, [SKXPort015], 1, [1], 1>; 247defm : X86WriteRes<WriteFMoveZ, [SKXPort05], 1, [1], 1>; 248defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>; 249 250defm : SKXWriteResPair<WriteFAdd, [SKXPort01], 4, [1], 1, 5>; // Floating point add/sub. 251defm : SKXWriteResPair<WriteFAddX, [SKXPort01], 4, [1], 1, 6>; 252defm : SKXWriteResPair<WriteFAddY, [SKXPort01], 4, [1], 1, 7>; 253defm : SKXWriteResPair<WriteFAddZ, [SKXPort05], 4, [1], 1, 7>; 254defm : SKXWriteResPair<WriteFAdd64, [SKXPort01], 4, [1], 1, 5>; // Floating point double add/sub. 255defm : SKXWriteResPair<WriteFAdd64X, [SKXPort01], 4, [1], 1, 6>; 256defm : SKXWriteResPair<WriteFAdd64Y, [SKXPort01], 4, [1], 1, 7>; 257defm : SKXWriteResPair<WriteFAdd64Z, [SKXPort05], 4, [1], 1, 7>; 258 259defm : SKXWriteResPair<WriteFCmp, [SKXPort01], 4, [1], 1, 5>; // Floating point compare. 260defm : SKXWriteResPair<WriteFCmpX, [SKXPort01], 4, [1], 1, 6>; 261defm : SKXWriteResPair<WriteFCmpY, [SKXPort01], 4, [1], 1, 7>; 262defm : SKXWriteResPair<WriteFCmpZ, [SKXPort05], 4, [1], 1, 7>; 263defm : SKXWriteResPair<WriteFCmp64, [SKXPort01], 4, [1], 1, 5>; // Floating point double compare. 264defm : SKXWriteResPair<WriteFCmp64X, [SKXPort01], 4, [1], 1, 6>; 265defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort01], 4, [1], 1, 7>; 266defm : SKXWriteResPair<WriteFCmp64Z, [SKXPort05], 4, [1], 1, 7>; 267 268defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags (X87). 269defm : SKXWriteResPair<WriteFComX, [SKXPort0], 2>; // Floating point compare to flags (SSE). 270 271defm : SKXWriteResPair<WriteFMul, [SKXPort01], 4, [1], 1, 5>; // Floating point multiplication. 272defm : SKXWriteResPair<WriteFMulX, [SKXPort01], 4, [1], 1, 6>; 273defm : SKXWriteResPair<WriteFMulY, [SKXPort01], 4, [1], 1, 7>; 274defm : SKXWriteResPair<WriteFMulZ, [SKXPort05], 4, [1], 1, 7>; 275defm : SKXWriteResPair<WriteFMul64, [SKXPort01], 4, [1], 1, 5>; // Floating point double multiplication. 276defm : SKXWriteResPair<WriteFMul64X, [SKXPort01], 4, [1], 1, 6>; 277defm : SKXWriteResPair<WriteFMul64Y, [SKXPort01], 4, [1], 1, 7>; 278defm : SKXWriteResPair<WriteFMul64Z, [SKXPort05], 4, [1], 1, 7>; 279 280defm : SKXWriteResPair<WriteFDiv, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division. 281defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. 282defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles. 283defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles. 284defm : SKXWriteResPair<WriteFDiv64, [SKXPort0,SKXFPDivider], 14, [1,4], 1, 5>; // 10-14 cycles. // Floating point division. 285defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,4], 1, 6>; // 10-14 cycles. 286defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,8], 1, 7>; // 10-14 cycles. 287defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles. 288 289defm : SKXWriteResPair<WriteFSqrt, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root. 290defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>; 291defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0,SKXFPDivider], 12, [1,6], 1, 7>; 292defm : SKXWriteResPair<WriteFSqrtZ, [SKXPort0,SKXPort5,SKXFPDivider], 20, [2,1,12], 3, 7>; 293defm : SKXWriteResPair<WriteFSqrt64, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root. 294defm : SKXWriteResPair<WriteFSqrt64X, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 6>; 295defm : SKXWriteResPair<WriteFSqrt64Y, [SKXPort0,SKXFPDivider], 18, [1,12],1, 7>; 296defm : SKXWriteResPair<WriteFSqrt64Z, [SKXPort0,SKXPort5,SKXFPDivider], 32, [2,1,24], 3, 7>; 297defm : SKXWriteResPair<WriteFSqrt80, [SKXPort0,SKXFPDivider], 21, [1,7]>; // Floating point long double square root. 298 299defm : SKXWriteResPair<WriteFRcp, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate. 300defm : SKXWriteResPair<WriteFRcpX, [SKXPort0], 4, [1], 1, 6>; 301defm : SKXWriteResPair<WriteFRcpY, [SKXPort0], 4, [1], 1, 7>; 302defm : SKXWriteResPair<WriteFRcpZ, [SKXPort0,SKXPort5], 4, [2,1], 3, 7>; 303 304defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate. 305defm : SKXWriteResPair<WriteFRsqrtX,[SKXPort0], 4, [1], 1, 6>; 306defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>; 307defm : SKXWriteResPair<WriteFRsqrtZ,[SKXPort0,SKXPort5], 9, [2,1], 3, 7>; 308 309defm : SKXWriteResPair<WriteFMA, [SKXPort01], 4, [1], 1, 5>; // Fused Multiply Add. 310defm : SKXWriteResPair<WriteFMAX, [SKXPort01], 4, [1], 1, 6>; 311defm : SKXWriteResPair<WriteFMAY, [SKXPort01], 4, [1], 1, 7>; 312defm : SKXWriteResPair<WriteFMAZ, [SKXPort05], 4, [1], 1, 7>; 313defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Floating point double dot product. 314defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>; 315defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>; 316defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs. 317defm : SKXWriteResPair<WriteFRnd, [SKXPort01], 8, [2], 2, 6>; // Floating point rounding. 318defm : SKXWriteResPair<WriteFRndY, [SKXPort01], 8, [2], 2, 7>; 319defm : SKXWriteResPair<WriteFRndZ, [SKXPort05], 8, [2], 2, 7>; 320defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals. 321defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; 322defm : SKXWriteResPair<WriteFLogicZ, [SKXPort05], 1, [1], 1, 7>; 323defm : SKXWriteResPair<WriteFTest, [SKXPort0], 2, [1], 1, 6>; // Floating point TEST instructions. 324defm : SKXWriteResPair<WriteFTestY, [SKXPort0], 2, [1], 1, 7>; 325defm : SKXWriteResPair<WriteFTestZ, [SKXPort0], 2, [1], 1, 7>; 326defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles. 327defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>; 328defm : SKXWriteResPair<WriteFShuffleZ, [SKXPort5], 1, [1], 1, 7>; 329defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles. 330defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>; 331defm : SKXWriteResPair<WriteFVarShuffleZ, [SKXPort5], 1, [1], 1, 7>; 332defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends. 333defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>; 334defm : SKXWriteResPair<WriteFBlendZ,[SKXPort015], 1, [1], 1, 7>; 335defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends. 336defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>; 337defm : SKXWriteResPair<WriteFVarBlendZ,[SKXPort015], 2, [2], 2, 7>; 338 339// FMA Scheduling helper class. 340// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } 341 342// Vector integer operations. 343defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>; 344defm : X86WriteRes<WriteVecLoadX, [SKXPort23], 6, [1], 1>; 345defm : X86WriteRes<WriteVecLoadY, [SKXPort23], 7, [1], 1>; 346defm : X86WriteRes<WriteVecLoadNT, [SKXPort23], 6, [1], 1>; 347defm : X86WriteRes<WriteVecLoadNTY, [SKXPort23], 7, [1], 1>; 348defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>; 349defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>; 350defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 2>; 351defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>; 352defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 353defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>; 354defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 355defm : X86WriteRes<WriteVecMaskedStore32, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 356defm : X86WriteRes<WriteVecMaskedStore32Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 357defm : X86WriteRes<WriteVecMaskedStore64, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 358defm : X86WriteRes<WriteVecMaskedStore64Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 359defm : X86WriteRes<WriteVecMove, [SKXPort05], 1, [1], 1>; 360defm : X86WriteRes<WriteVecMoveX, [SKXPort015], 1, [1], 1>; 361defm : X86WriteRes<WriteVecMoveY, [SKXPort015], 1, [1], 1>; 362defm : X86WriteRes<WriteVecMoveZ, [SKXPort05], 1, [1], 1>; 363defm : X86WriteRes<WriteVecMoveToGpr, [SKXPort0], 2, [1], 1>; 364defm : X86WriteRes<WriteVecMoveFromGpr, [SKXPort5], 1, [1], 1>; 365 366defm : SKXWriteResPair<WriteVecALU, [SKXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. 367defm : SKXWriteResPair<WriteVecALUX, [SKXPort01], 1, [1], 1, 6>; 368defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; 369defm : SKXWriteResPair<WriteVecALUZ, [SKXPort0], 1, [1], 1, 7>; 370defm : SKXWriteResPair<WriteVecLogic, [SKXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor. 371defm : SKXWriteResPair<WriteVecLogicX,[SKXPort015], 1, [1], 1, 6>; 372defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; 373defm : SKXWriteResPair<WriteVecLogicZ,[SKXPort05], 1, [1], 1, 7>; 374defm : SKXWriteResPair<WriteVecTest, [SKXPort0,SKXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions. 375defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; 376defm : SKXWriteResPair<WriteVecTestZ, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; 377defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5, [1], 1, 5>; // Vector integer multiply. 378defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 5, [1], 1, 6>; 379defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 5, [1], 1, 7>; 380defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 5, [1], 1, 7>; 381defm : SKXWriteResPair<WritePMULLD, [SKXPort01], 10, [2], 2, 6>; // Vector PMULLD. 382defm : SKXWriteResPair<WritePMULLDY, [SKXPort01], 10, [2], 2, 7>; 383defm : SKXWriteResPair<WritePMULLDZ, [SKXPort05], 10, [2], 2, 7>; 384defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles. 385defm : SKXWriteResPair<WriteShuffleX, [SKXPort5], 1, [1], 1, 6>; 386defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; 387defm : SKXWriteResPair<WriteShuffleZ, [SKXPort5], 1, [1], 1, 7>; 388defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector variable shuffles. 389defm : SKXWriteResPair<WriteVarShuffleX, [SKXPort5], 1, [1], 1, 6>; 390defm : SKXWriteResPair<WriteVarShuffleY, [SKXPort5], 1, [1], 1, 7>; 391defm : SKXWriteResPair<WriteVarShuffleZ, [SKXPort5], 1, [1], 1, 7>; 392defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends. 393defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; 394defm : SKXWriteResPair<WriteBlendZ,[SKXPort5], 1, [1], 1, 7>; 395defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends. 396defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; 397defm : SKXWriteResPair<WriteVarBlendZ,[SKXPort05], 2, [1], 1, 6>; 398defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD. 399defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>; 400defm : SKXWriteResPair<WriteMPSADZ, [SKXPort5], 4, [2], 2, 7>; 401defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 5>; // Vector PSADBW. 402defm : SKXWriteResPair<WritePSADBWX, [SKXPort5], 3, [1], 1, 6>; 403defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; 404defm : SKXWriteResPair<WritePSADBWZ, [SKXPort5], 3, [1], 1, 7>; // TODO: 512-bit ops require ports 0/1 to be joined. 405defm : SKXWriteResPair<WritePHMINPOS, [SKXPort0], 4, [1], 1, 6>; // Vector PHMINPOS. 406 407// Vector integer shifts. 408defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1, [1], 1, 5>; 409defm : X86WriteRes<WriteVecShiftX, [SKXPort5,SKXPort01], 2, [1,1], 2>; 410defm : X86WriteRes<WriteVecShiftY, [SKXPort5,SKXPort01], 4, [1,1], 2>; 411defm : X86WriteRes<WriteVecShiftZ, [SKXPort5,SKXPort0], 4, [1,1], 2>; 412defm : X86WriteRes<WriteVecShiftXLd, [SKXPort01,SKXPort23], 7, [1,1], 2>; 413defm : X86WriteRes<WriteVecShiftYLd, [SKXPort01,SKXPort23], 8, [1,1], 2>; 414defm : X86WriteRes<WriteVecShiftZLd, [SKXPort0,SKXPort23], 8, [1,1], 2>; 415 416defm : SKXWriteResPair<WriteVecShiftImm, [SKXPort0], 1, [1], 1, 5>; 417defm : SKXWriteResPair<WriteVecShiftImmX, [SKXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts. 418defm : SKXWriteResPair<WriteVecShiftImmY, [SKXPort01], 1, [1], 1, 7>; 419defm : SKXWriteResPair<WriteVecShiftImmZ, [SKXPort0], 1, [1], 1, 7>; 420defm : SKXWriteResPair<WriteVarVecShift, [SKXPort01], 1, [1], 1, 6>; // Variable vector shifts. 421defm : SKXWriteResPair<WriteVarVecShiftY, [SKXPort01], 1, [1], 1, 7>; 422defm : SKXWriteResPair<WriteVarVecShiftZ, [SKXPort0], 1, [1], 1, 7>; 423 424// Vector insert/extract operations. 425def : WriteRes<WriteVecInsert, [SKXPort5]> { 426 let Latency = 2; 427 let NumMicroOps = 2; 428 let ReleaseAtCycles = [2]; 429} 430def : WriteRes<WriteVecInsertLd, [SKXPort5,SKXPort23]> { 431 let Latency = 6; 432 let NumMicroOps = 2; 433} 434def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>; 435 436def : WriteRes<WriteVecExtract, [SKXPort0,SKXPort5]> { 437 let Latency = 3; 438 let NumMicroOps = 2; 439} 440def : WriteRes<WriteVecExtractSt, [SKXPort4,SKXPort5,SKXPort237]> { 441 let Latency = 2; 442 let NumMicroOps = 3; 443} 444 445// Conversion between integer and float. 446defm : SKXWriteResPair<WriteCvtSS2I, [SKXPort01], 6, [2], 2>; // Needs more work: DD vs DQ. 447defm : SKXWriteResPair<WriteCvtPS2I, [SKXPort01], 3>; 448defm : SKXWriteResPair<WriteCvtPS2IY, [SKXPort01], 3>; 449defm : SKXWriteResPair<WriteCvtPS2IZ, [SKXPort05], 3>; 450defm : SKXWriteResPair<WriteCvtSD2I, [SKXPort01], 6, [2], 2>; 451defm : SKXWriteResPair<WriteCvtPD2I, [SKXPort01], 3>; 452defm : SKXWriteResPair<WriteCvtPD2IY, [SKXPort01], 3>; 453defm : SKXWriteResPair<WriteCvtPD2IZ, [SKXPort05], 3>; 454 455defm : SKXWriteResPair<WriteCvtI2SS, [SKXPort1], 4>; 456defm : SKXWriteResPair<WriteCvtI2PS, [SKXPort01], 4>; 457defm : SKXWriteResPair<WriteCvtI2PSY, [SKXPort01], 4>; 458defm : SKXWriteResPair<WriteCvtI2PSZ, [SKXPort05], 4>; // Needs more work: DD vs DQ. 459defm : SKXWriteResPair<WriteCvtI2SD, [SKXPort1], 4>; 460defm : SKXWriteResPair<WriteCvtI2PD, [SKXPort01], 4>; 461defm : SKXWriteResPair<WriteCvtI2PDY, [SKXPort01], 4>; 462defm : SKXWriteResPair<WriteCvtI2PDZ, [SKXPort05], 4>; 463 464defm : SKXWriteResPair<WriteCvtSS2SD, [SKXPort1], 3>; 465defm : SKXWriteResPair<WriteCvtPS2PD, [SKXPort1], 3>; 466defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort5,SKXPort01], 3, [1,1], 2>; 467defm : SKXWriteResPair<WriteCvtPS2PDZ, [SKXPort05], 3, [2], 2>; 468defm : SKXWriteResPair<WriteCvtSD2SS, [SKXPort5,SKXPort01], 5, [1,1], 2, 5>; 469defm : SKXWriteResPair<WriteCvtPD2PS, [SKXPort5,SKXPort01], 5, [1,1], 2, 4>; 470defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort5,SKXPort01], 7, [1,1], 2, 7>; 471defm : SKXWriteResPair<WriteCvtPD2PSZ, [SKXPort5,SKXPort05], 7, [1,1], 2, 7>; 472 473defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort01], 5, [1,1], 2>; 474defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort01], 7, [1,1], 2>; 475defm : X86WriteRes<WriteCvtPH2PSZ, [SKXPort5,SKXPort0], 7, [1,1], 2>; 476defm : X86WriteRes<WriteCvtPH2PSLd, [SKXPort23,SKXPort01], 9, [1,1], 2>; 477defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort01], 10, [1,1], 2>; 478defm : X86WriteRes<WriteCvtPH2PSZLd, [SKXPort23,SKXPort05], 10, [1,1], 2>; 479 480defm : X86WriteRes<WriteCvtPS2PH, [SKXPort5,SKXPort01], 5, [1,1], 2>; 481defm : X86WriteRes<WriteCvtPS2PHY, [SKXPort5,SKXPort01], 7, [1,1], 2>; 482defm : X86WriteRes<WriteCvtPS2PHZ, [SKXPort5,SKXPort05], 7, [1,1], 2>; 483defm : X86WriteRes<WriteCvtPS2PHSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 6, [1,1,1,1], 4>; 484defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 8, [1,1,1,1], 4>; 485defm : X86WriteRes<WriteCvtPS2PHZSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort05], 8, [1,1,1,1], 4>; 486 487// Strings instructions. 488 489// Packed Compare Implicit Length Strings, Return Mask 490def : WriteRes<WritePCmpIStrM, [SKXPort0]> { 491 let Latency = 10; 492 let NumMicroOps = 3; 493 let ReleaseAtCycles = [3]; 494} 495def : WriteRes<WritePCmpIStrMLd, [SKXPort0, SKXPort23]> { 496 let Latency = 16; 497 let NumMicroOps = 4; 498 let ReleaseAtCycles = [3,1]; 499} 500 501// Packed Compare Explicit Length Strings, Return Mask 502def : WriteRes<WritePCmpEStrM, [SKXPort0, SKXPort5, SKXPort015, SKXPort0156]> { 503 let Latency = 19; 504 let NumMicroOps = 9; 505 let ReleaseAtCycles = [4,3,1,1]; 506} 507def : WriteRes<WritePCmpEStrMLd, [SKXPort0, SKXPort5, SKXPort23, SKXPort015, SKXPort0156]> { 508 let Latency = 25; 509 let NumMicroOps = 10; 510 let ReleaseAtCycles = [4,3,1,1,1]; 511} 512 513// Packed Compare Implicit Length Strings, Return Index 514def : WriteRes<WritePCmpIStrI, [SKXPort0]> { 515 let Latency = 10; 516 let NumMicroOps = 3; 517 let ReleaseAtCycles = [3]; 518} 519def : WriteRes<WritePCmpIStrILd, [SKXPort0, SKXPort23]> { 520 let Latency = 16; 521 let NumMicroOps = 4; 522 let ReleaseAtCycles = [3,1]; 523} 524 525// Packed Compare Explicit Length Strings, Return Index 526def : WriteRes<WritePCmpEStrI, [SKXPort0,SKXPort5,SKXPort0156]> { 527 let Latency = 18; 528 let NumMicroOps = 8; 529 let ReleaseAtCycles = [4,3,1]; 530} 531def : WriteRes<WritePCmpEStrILd, [SKXPort0, SKXPort5, SKXPort23, SKXPort0156]> { 532 let Latency = 24; 533 let NumMicroOps = 9; 534 let ReleaseAtCycles = [4,3,1,1]; 535} 536 537// MOVMSK Instructions. 538def : WriteRes<WriteFMOVMSK, [SKXPort0]> { let Latency = 2; } 539def : WriteRes<WriteVecMOVMSK, [SKXPort0]> { let Latency = 2; } 540def : WriteRes<WriteVecMOVMSKY, [SKXPort0]> { let Latency = 2; } 541def : WriteRes<WriteMMXMOVMSK, [SKXPort0]> { let Latency = 2; } 542 543// AES instructions. 544def : WriteRes<WriteAESDecEnc, [SKXPort0]> { // Decryption, encryption. 545 let Latency = 4; 546 let NumMicroOps = 1; 547 let ReleaseAtCycles = [1]; 548} 549def : WriteRes<WriteAESDecEncLd, [SKXPort0, SKXPort23]> { 550 let Latency = 10; 551 let NumMicroOps = 2; 552 let ReleaseAtCycles = [1,1]; 553} 554 555def : WriteRes<WriteAESIMC, [SKXPort0]> { // InvMixColumn. 556 let Latency = 8; 557 let NumMicroOps = 2; 558 let ReleaseAtCycles = [2]; 559} 560def : WriteRes<WriteAESIMCLd, [SKXPort0, SKXPort23]> { 561 let Latency = 14; 562 let NumMicroOps = 3; 563 let ReleaseAtCycles = [2,1]; 564} 565 566def : WriteRes<WriteAESKeyGen, [SKXPort0,SKXPort5,SKXPort015]> { // Key Generation. 567 let Latency = 20; 568 let NumMicroOps = 11; 569 let ReleaseAtCycles = [3,6,2]; 570} 571def : WriteRes<WriteAESKeyGenLd, [SKXPort0,SKXPort5,SKXPort23,SKXPort015]> { 572 let Latency = 25; 573 let NumMicroOps = 11; 574 let ReleaseAtCycles = [3,6,1,1]; 575} 576 577// Carry-less multiplication instructions. 578def : WriteRes<WriteCLMul, [SKXPort5]> { 579 let Latency = 6; 580 let NumMicroOps = 1; 581 let ReleaseAtCycles = [1]; 582} 583def : WriteRes<WriteCLMulLd, [SKXPort5, SKXPort23]> { 584 let Latency = 12; 585 let NumMicroOps = 2; 586 let ReleaseAtCycles = [1,1]; 587} 588 589// Catch-all for expensive system instructions. 590def : WriteRes<WriteSystem, [SKXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite; 591 592// AVX2. 593defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles. 594defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles. 595defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles. 596defm : SKXWriteResPair<WriteVPMOV256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width packed vector width-changing move. 597defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles. 598 599// Old microcoded instructions that nobody use. 600def : WriteRes<WriteMicrocoded, [SKXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite; 601 602// Fence instructions. 603def : WriteRes<WriteFence, [SKXPort23, SKXPort4]>; 604 605// Load/store MXCSR. 606def : WriteRes<WriteLDMXCSR, [SKXPort0,SKXPort23,SKXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } 607def : WriteRes<WriteSTMXCSR, [SKXPort4,SKXPort5,SKXPort237]> { let Latency = 2; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } 608 609// Nop, not very useful expect it provides a model for nops! 610def : WriteRes<WriteNop, []>; 611 612//////////////////////////////////////////////////////////////////////////////// 613// Horizontal add/sub instructions. 614//////////////////////////////////////////////////////////////////////////////// 615 616defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort015], 6, [2,1], 3, 6>; 617defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort015], 6, [2,1], 3, 7>; 618defm : SKXWriteResPair<WritePHAdd, [SKXPort5,SKXPort05], 3, [2,1], 3, 5>; 619defm : SKXWriteResPair<WritePHAddX, [SKXPort5,SKXPort015], 3, [2,1], 3, 6>; 620defm : SKXWriteResPair<WritePHAddY, [SKXPort5,SKXPort015], 3, [2,1], 3, 7>; 621 622// Remaining instrs. 623 624def SKXWriteResGroup1 : SchedWriteRes<[SKXPort0]> { 625 let Latency = 1; 626 let NumMicroOps = 1; 627 let ReleaseAtCycles = [1]; 628} 629def: InstRW<[SKXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr", 630 "KANDN(B|D|Q|W)rr", 631 "KMOV(B|D|Q|W)kk", 632 "KNOT(B|D|Q|W)rr", 633 "KOR(B|D|Q|W)rr", 634 "KXNOR(B|D|Q|W)rr", 635 "KXOR(B|D|Q|W)rr", 636 "KSET0(B|D|Q|W)", // Same as KXOR 637 "KSET1(B|D|Q|W)", // Same as KXNOR 638 "MMX_PADDS(B|W)rr", 639 "MMX_PADDUS(B|W)rr", 640 "MMX_PAVG(B|W)rr", 641 "MMX_PCMPEQ(B|D|W)rr", 642 "MMX_PCMPGT(B|D|W)rr", 643 "MMX_P(MAX|MIN)SWrr", 644 "MMX_P(MAX|MIN)UBrr", 645 "MMX_PSUBS(B|W)rr", 646 "MMX_PSUBUS(B|W)rr", 647 "VPMOVB2M(Z|Z128|Z256)rr", 648 "VPMOVD2M(Z|Z128|Z256)rr", 649 "VPMOVQ2M(Z|Z128|Z256)rr", 650 "VPMOVW2M(Z|Z128|Z256)rr")>; 651 652def SKXWriteResGroup3 : SchedWriteRes<[SKXPort5]> { 653 let Latency = 1; 654 let NumMicroOps = 1; 655 let ReleaseAtCycles = [1]; 656} 657def: InstRW<[SKXWriteResGroup3], (instregex "COM(P?)_FST0r", 658 "KMOV(B|D|Q|W)kr", 659 "UCOM_F(P?)r")>; 660 661def SKXWriteResGroup4 : SchedWriteRes<[SKXPort6]> { 662 let Latency = 1; 663 let NumMicroOps = 1; 664 let ReleaseAtCycles = [1]; 665} 666def: InstRW<[SKXWriteResGroup4], (instregex "JMP(16|32|64)r")>; 667 668def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> { 669 let Latency = 1; 670 let NumMicroOps = 1; 671 let ReleaseAtCycles = [1]; 672} 673def: InstRW<[SKXWriteResGroup6], (instrs FINCSTP, FNOP)>; 674 675def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> { 676 let Latency = 1; 677 let NumMicroOps = 1; 678 let ReleaseAtCycles = [1]; 679} 680def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>; 681 682def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> { 683 let Latency = 1; 684 let NumMicroOps = 1; 685 let ReleaseAtCycles = [1]; 686} 687def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr")>; 688 689def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> { 690 let Latency = 1; 691 let NumMicroOps = 1; 692 let ReleaseAtCycles = [1]; 693} 694def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr", 695 "VBLENDMPS(Z128|Z256)rr", 696 "VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr", 697 "(V?)PADD(B|D|Q|W)rr", 698 "VPBLENDD(Y?)rri", 699 "VPBLENDMB(Z128|Z256)rr", 700 "VPBLENDMD(Z128|Z256)rr", 701 "VPBLENDMQ(Z128|Z256)rr", 702 "VPBLENDMW(Z128|Z256)rr", 703 "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk", 704 "VPTERNLOGD(Z|Z128|Z256)rri", 705 "VPTERNLOGQ(Z|Z128|Z256)rri")>; 706 707def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> { 708 let Latency = 1; 709 let NumMicroOps = 1; 710 let ReleaseAtCycles = [1]; 711} 712def: InstRW<[SKXWriteResGroup10], (instrs SGDT64m, 713 SIDT64m, 714 SMSW16m, 715 STRm, 716 SYSCALL)>; 717 718def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> { 719 let Latency = 1; 720 let NumMicroOps = 2; 721 let ReleaseAtCycles = [1,1]; 722} 723def: InstRW<[SKXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>; 724def: InstRW<[SKXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk", 725 "ST_FP(32|64|80)m")>; 726 727def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> { 728 let Latency = 2; 729 let NumMicroOps = 2; 730 let ReleaseAtCycles = [2]; 731} 732def: InstRW<[SKXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>; 733 734def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> { 735 let Latency = 2; 736 let NumMicroOps = 2; 737 let ReleaseAtCycles = [2]; 738} 739def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP, 740 MMX_MOVDQ2Qrr)>; 741 742def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> { 743 let Latency = 2; 744 let NumMicroOps = 2; 745 let ReleaseAtCycles = [2]; 746} 747def: InstRW<[SKXWriteResGroup17], (instrs LFENCE, 748 WAIT, 749 XGETBV)>; 750 751def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 752 let Latency = 2; 753 let NumMicroOps = 2; 754 let ReleaseAtCycles = [1,1]; 755} 756def: InstRW<[SKXWriteResGroup20], (instregex "CLFLUSH")>; 757 758def SKXWriteResGroup21 : SchedWriteRes<[SKXPort237,SKXPort0156]> { 759 let Latency = 2; 760 let NumMicroOps = 2; 761 let ReleaseAtCycles = [1,1]; 762} 763def: InstRW<[SKXWriteResGroup21], (instrs SFENCE)>; 764 765def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 766 let Latency = 2; 767 let NumMicroOps = 2; 768 let ReleaseAtCycles = [1,1]; 769} 770def: InstRW<[SKXWriteResGroup23], (instrs CWD, 771 JCXZ, JECXZ, JRCXZ, 772 ADC8i8, SBB8i8, 773 ADC16i16, SBB16i16, 774 ADC32i32, SBB32i32, 775 ADC64i32, SBB64i32)>; 776 777def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> { 778 let Latency = 2; 779 let NumMicroOps = 3; 780 let ReleaseAtCycles = [1,1,1]; 781} 782def: InstRW<[SKXWriteResGroup25], (instrs FNSTCW16m)>; 783 784def SKXWriteResGroup27 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { 785 let Latency = 2; 786 let NumMicroOps = 3; 787 let ReleaseAtCycles = [1,1,1]; 788} 789def: InstRW<[SKXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>; 790 791def SKXWriteResGroup28 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { 792 let Latency = 2; 793 let NumMicroOps = 3; 794 let ReleaseAtCycles = [1,1,1]; 795} 796def: InstRW<[SKXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8, 797 STOSB, STOSL, STOSQ, STOSW)>; 798def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>; 799 800def SKXWriteResGroup29 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { 801 let Latency = 2; 802 let NumMicroOps = 5; 803 let ReleaseAtCycles = [2,2,1]; 804} 805def: InstRW<[SKXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)")>; 806 807def SKXWriteResGroup30 : SchedWriteRes<[SKXPort0]> { 808 let Latency = 3; 809 let NumMicroOps = 1; 810 let ReleaseAtCycles = [1]; 811} 812def: InstRW<[SKXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk", 813 "KORTEST(B|D|Q|W)rr", 814 "KTEST(B|D|Q|W)rr")>; 815 816def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> { 817 let Latency = 3; 818 let NumMicroOps = 1; 819 let ReleaseAtCycles = [1]; 820} 821def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr", 822 "PEXT(32|64)rr")>; 823 824def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> { 825 let Latency = 3; 826 let NumMicroOps = 1; 827 let ReleaseAtCycles = [1]; 828} 829def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)", 830 "VALIGND(Z|Z128|Z256)rri", 831 "VALIGNQ(Z|Z128|Z256)rri", 832 "VPBROADCAST(B|W)rr", 833 "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>; 834 835def SKXWriteResGroup33 : SchedWriteRes<[SKXPort5]> { 836 let Latency = 4; 837 let NumMicroOps = 1; 838 let ReleaseAtCycles = [1]; 839} 840def: InstRW<[SKXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr", 841 "KSHIFTL(B|D|Q|W)ri", 842 "KSHIFTR(B|D|Q|W)ri", 843 "KUNPCK(BW|DQ|WD)rr", 844 "VCMPPD(Z|Z128|Z256)rri", 845 "VCMPPS(Z|Z128|Z256)rri", 846 "VCMP(SD|SS)Zrr", 847 "VFPCLASS(PD|PS)(Z|Z128|Z256)rr", 848 "VFPCLASS(SD|SS)Zrr", 849 "VPCMPB(Z|Z128|Z256)rri", 850 "VPCMPD(Z|Z128|Z256)rri", 851 "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr", 852 "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr", 853 "VPCMPQ(Z|Z128|Z256)rri", 854 "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri", 855 "VPCMPW(Z|Z128|Z256)rri", 856 "VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>; 857 858def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> { 859 let Latency = 3; 860 let NumMicroOps = 2; 861 let ReleaseAtCycles = [1,1]; 862} 863def: InstRW<[SKXWriteResGroup34], (instrs FNSTSW16r)>; 864 865def SKXWriteResGroup37 : SchedWriteRes<[SKXPort0,SKXPort5]> { 866 let Latency = 3; 867 let NumMicroOps = 3; 868 let ReleaseAtCycles = [1,2]; 869} 870def: InstRW<[SKXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>; 871 872def SKXWriteResGroup38 : SchedWriteRes<[SKXPort5,SKXPort01]> { 873 let Latency = 3; 874 let NumMicroOps = 3; 875 let ReleaseAtCycles = [2,1]; 876} 877def: InstRW<[SKXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>; 878 879def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> { 880 let Latency = 3; 881 let NumMicroOps = 3; 882 let ReleaseAtCycles = [2,1]; 883} 884def: InstRW<[SKXWriteResGroup41], (instrs MMX_PACKSSDWrr, 885 MMX_PACKSSWBrr, 886 MMX_PACKUSWBrr)>; 887 888def SKXWriteResGroup42 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 889 let Latency = 3; 890 let NumMicroOps = 3; 891 let ReleaseAtCycles = [1,2]; 892} 893def: InstRW<[SKXWriteResGroup42], (instregex "CLD")>; 894 895def SKXWriteResGroup43 : SchedWriteRes<[SKXPort237,SKXPort0156]> { 896 let Latency = 3; 897 let NumMicroOps = 3; 898 let ReleaseAtCycles = [1,2]; 899} 900def: InstRW<[SKXWriteResGroup43], (instrs MFENCE)>; 901 902def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 903 let Latency = 2; 904 let NumMicroOps = 3; 905 let ReleaseAtCycles = [1,2]; 906} 907def: InstRW<[SKXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, 908 RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; 909 910def SKXWriteResGroup44b : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { 911 let Latency = 5; 912 let NumMicroOps = 8; 913 let ReleaseAtCycles = [2,4,2]; 914} 915def: InstRW<[SKXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; 916 917def SKXWriteResGroup44c : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { 918 let Latency = 6; 919 let NumMicroOps = 8; 920 let ReleaseAtCycles = [2,4,2]; 921} 922def: InstRW<[SKXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; 923 924def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> { 925 let Latency = 3; 926 let NumMicroOps = 3; 927 let ReleaseAtCycles = [1,1,1]; 928} 929def: InstRW<[SKXWriteResGroup45], (instrs FNSTSWm)>; 930 931def SKXWriteResGroup47 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237,SKXPort0156]> { 932 let Latency = 3; 933 let NumMicroOps = 4; 934 let ReleaseAtCycles = [1,1,1,1]; 935} 936def: InstRW<[SKXWriteResGroup47], (instregex "CALL(16|32|64)r")>; 937 938def SKXWriteResGroup48 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06,SKXPort0156]> { 939 let Latency = 3; 940 let NumMicroOps = 4; 941 let ReleaseAtCycles = [1,1,1,1]; 942} 943def: InstRW<[SKXWriteResGroup48], (instrs CALL64pcrel32)>; 944 945def SKXWriteResGroup49 : SchedWriteRes<[SKXPort0]> { 946 let Latency = 4; 947 let NumMicroOps = 1; 948 let ReleaseAtCycles = [1]; 949} 950def: InstRW<[SKXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>; 951 952def SKXWriteResGroup50 : SchedWriteRes<[SKXPort01]> { 953 let Latency = 4; 954 let NumMicroOps = 1; 955 let ReleaseAtCycles = [1]; 956} 957def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2QQ(Z128|Z256)rr", 958 "VCVTPD2UQQ(Z128|Z256)rr", 959 "VCVTPS2DQ(Y|Z128|Z256)rr", 960 "(V?)CVTPS2DQrr", 961 "VCVTPS2UDQ(Z128|Z256)rr", 962 "VCVTTPD2QQ(Z128|Z256)rr", 963 "VCVTTPD2UQQ(Z128|Z256)rr", 964 "VCVTTPS2DQ(Z128|Z256)rr", 965 "(V?)CVTTPS2DQrr", 966 "VCVTTPS2UDQ(Z128|Z256)rr")>; 967 968def SKXWriteResGroup50z : SchedWriteRes<[SKXPort05]> { 969 let Latency = 4; 970 let NumMicroOps = 1; 971 let ReleaseAtCycles = [1]; 972} 973def: InstRW<[SKXWriteResGroup50z], (instrs VCVTPD2QQZrr, 974 VCVTPD2UQQZrr, 975 VCVTPS2DQZrr, 976 VCVTPS2UDQZrr, 977 VCVTTPD2QQZrr, 978 VCVTTPD2UQQZrr, 979 VCVTTPS2DQZrr, 980 VCVTTPS2UDQZrr)>; 981 982def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> { 983 let Latency = 4; 984 let NumMicroOps = 2; 985 let ReleaseAtCycles = [2]; 986} 987def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr", 988 "VEXPANDPS(Z|Z128|Z256)rr", 989 "VPEXPANDD(Z|Z128|Z256)rr", 990 "VPEXPANDQ(Z|Z128|Z256)rr", 991 "VPMOVDB(Z|Z128|Z256)rr", 992 "VPMOVDW(Z|Z128|Z256)rr", 993 "VPMOVQB(Z|Z128|Z256)rr", 994 "VPMOVQW(Z|Z128|Z256)rr", 995 "VPMOVSDB(Z|Z128|Z256)rr", 996 "VPMOVSDW(Z|Z128|Z256)rr", 997 "VPMOVSQB(Z|Z128|Z256)rr", 998 "VPMOVSQD(Z|Z128|Z256)rr", 999 "VPMOVSQW(Z|Z128|Z256)rr", 1000 "VPMOVSWB(Z|Z128|Z256)rr", 1001 "VPMOVUSDB(Z|Z128|Z256)rr", 1002 "VPMOVUSDW(Z|Z128|Z256)rr", 1003 "VPMOVUSQB(Z|Z128|Z256)rr", 1004 "VPMOVUSQD(Z|Z128|Z256)rr", 1005 "VPMOVUSWB(Z|Z128|Z256)rr", 1006 "VPMOVWB(Z|Z128|Z256)rr")>; 1007 1008def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { 1009 let Latency = 4; 1010 let NumMicroOps = 3; 1011 let ReleaseAtCycles = [1,1,1]; 1012} 1013def: InstRW<[SKXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m", 1014 "IST_F(16|32)m", 1015 "VPMOVQD(Z|Z128|Z256)mr(b?)")>; 1016 1017def SKXWriteResGroup55 : SchedWriteRes<[SKXPort0156]> { 1018 let Latency = 4; 1019 let NumMicroOps = 4; 1020 let ReleaseAtCycles = [4]; 1021} 1022def: InstRW<[SKXWriteResGroup55], (instrs FNCLEX)>; 1023 1024def SKXWriteResGroup56 : SchedWriteRes<[]> { 1025 let Latency = 0; 1026 let NumMicroOps = 4; 1027 let ReleaseAtCycles = []; 1028} 1029def: InstRW<[SKXWriteResGroup56], (instrs VZEROUPPER)>; 1030 1031def SKXWriteResGroup57 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort0156]> { 1032 let Latency = 4; 1033 let NumMicroOps = 4; 1034 let ReleaseAtCycles = [1,1,2]; 1035} 1036def: InstRW<[SKXWriteResGroup57], (instregex "LAR(16|32|64)rr")>; 1037 1038def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort01]> { 1039 let Latency = 5; 1040 let NumMicroOps = 2; 1041 let ReleaseAtCycles = [1,1]; 1042} 1043def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr", 1044 "MMX_CVT(T?)PS2PIrr", 1045 "VCVTDQ2PDZ128rr", 1046 "VCVTPD2DQZ128rr", 1047 "(V?)CVT(T?)PD2DQrr", 1048 "VCVTPD2UDQZ128rr", 1049 "VCVTPS2PDZ128rr", 1050 "(V?)CVTPS2PDrr", 1051 "VCVTPS2QQZ128rr", 1052 "VCVTPS2UQQZ128rr", 1053 "VCVTQQ2PSZ128rr", 1054 "(V?)CVTSI(64)?2SDrr", 1055 "VCVTSI2SSZrr", 1056 "(V?)CVTSI2SSrr", 1057 "VCVTSI(64)?2SDZrr", 1058 "VCVTSS2SDZrr", 1059 "(V?)CVTSS2SDrr", 1060 "VCVTTPD2DQZ128rr", 1061 "VCVTTPD2UDQZ128rr", 1062 "VCVTTPS2QQZ128rr", 1063 "VCVTTPS2UQQZ128rr", 1064 "VCVTUDQ2PDZ128rr", 1065 "VCVTUQQ2PSZ128rr", 1066 "VCVTUSI2SSZrr", 1067 "VCVTUSI(64)?2SDZrr")>; 1068 1069def SKXWriteResGroup62 : SchedWriteRes<[SKXPort5,SKXPort015]> { 1070 let Latency = 5; 1071 let NumMicroOps = 3; 1072 let ReleaseAtCycles = [2,1]; 1073} 1074def: InstRW<[SKXWriteResGroup62], (instregex "VPCONFLICTQZ128rr")>; 1075 1076def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> { 1077 let Latency = 5; 1078 let NumMicroOps = 3; 1079 let ReleaseAtCycles = [1,1,1]; 1080} 1081def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>; 1082 1083def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort01]> { 1084 let Latency = 5; 1085 let NumMicroOps = 3; 1086 let ReleaseAtCycles = [1,1,1]; 1087} 1088def: InstRW<[SKXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)", 1089 "VCVTPS2PHZ256mr(b?)", 1090 "VCVTPS2PHZmr(b?)")>; 1091 1092def SKXWriteResGroup66 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { 1093 let Latency = 5; 1094 let NumMicroOps = 4; 1095 let ReleaseAtCycles = [1,2,1]; 1096} 1097def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)", 1098 "VPMOVDW(Z|Z128|Z256)mr(b?)", 1099 "VPMOVQB(Z|Z128|Z256)mr(b?)", 1100 "VPMOVQW(Z|Z128|Z256)mr(b?)", 1101 "VPMOVSDB(Z|Z128|Z256)mr(b?)", 1102 "VPMOVSDW(Z|Z128|Z256)mr(b?)", 1103 "VPMOVSQB(Z|Z128|Z256)mr(b?)", 1104 "VPMOVSQD(Z|Z128|Z256)mr(b?)", 1105 "VPMOVSQW(Z|Z128|Z256)mr(b?)", 1106 "VPMOVSWB(Z|Z128|Z256)mr(b?)", 1107 "VPMOVUSDB(Z|Z128|Z256)mr(b?)", 1108 "VPMOVUSDW(Z|Z128|Z256)mr(b?)", 1109 "VPMOVUSQB(Z|Z128|Z256)mr(b?)", 1110 "VPMOVUSQD(Z|Z128|Z256)mr(b?)", 1111 "VPMOVUSQW(Z|Z128|Z256)mr(b?)", 1112 "VPMOVUSWB(Z|Z128|Z256)mr(b?)", 1113 "VPMOVWB(Z|Z128|Z256)mr(b?)")>; 1114 1115def SKXWriteResGroup67 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 1116 let Latency = 5; 1117 let NumMicroOps = 5; 1118 let ReleaseAtCycles = [1,4]; 1119} 1120def: InstRW<[SKXWriteResGroup67], (instrs XSETBV)>; 1121 1122def SKXWriteResGroup69 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { 1123 let Latency = 5; 1124 let NumMicroOps = 6; 1125 let ReleaseAtCycles = [1,1,4]; 1126} 1127def: InstRW<[SKXWriteResGroup69], (instregex "PUSHF(16|64)")>; 1128 1129def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> { 1130 let Latency = 6; 1131 let NumMicroOps = 1; 1132 let ReleaseAtCycles = [1]; 1133} 1134def: InstRW<[SKXWriteResGroup71], (instrs VBROADCASTSSrm, 1135 VPBROADCASTDrm, 1136 VPBROADCASTQrm)>; 1137def: InstRW<[SKXWriteResGroup71], (instregex "(V?)MOVSHDUPrm", 1138 "(V?)MOVSLDUPrm", 1139 "(V?)MOVDDUPrm")>; 1140 1141def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> { 1142 let Latency = 6; 1143 let NumMicroOps = 2; 1144 let ReleaseAtCycles = [2]; 1145} 1146def: InstRW<[SKXWriteResGroup72], (instrs MMX_CVTPI2PSrr)>; 1147def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr", 1148 "VCOMPRESSPS(Z|Z128|Z256)rr", 1149 "VPCOMPRESSD(Z|Z128|Z256)rr", 1150 "VPCOMPRESSQ(Z|Z128|Z256)rr", 1151 "VPERMW(Z|Z128|Z256)rr")>; 1152 1153def SKXWriteResGroup73 : SchedWriteRes<[SKXPort0,SKXPort23]> { 1154 let Latency = 6; 1155 let NumMicroOps = 2; 1156 let ReleaseAtCycles = [1,1]; 1157} 1158def: InstRW<[SKXWriteResGroup73], (instrs MMX_PADDSBrm, 1159 MMX_PADDSWrm, 1160 MMX_PADDUSBrm, 1161 MMX_PADDUSWrm, 1162 MMX_PAVGBrm, 1163 MMX_PAVGWrm, 1164 MMX_PCMPEQBrm, 1165 MMX_PCMPEQDrm, 1166 MMX_PCMPEQWrm, 1167 MMX_PCMPGTBrm, 1168 MMX_PCMPGTDrm, 1169 MMX_PCMPGTWrm, 1170 MMX_PMAXSWrm, 1171 MMX_PMAXUBrm, 1172 MMX_PMINSWrm, 1173 MMX_PMINUBrm, 1174 MMX_PSUBSBrm, 1175 MMX_PSUBSWrm, 1176 MMX_PSUBUSBrm, 1177 MMX_PSUBUSWrm)>; 1178 1179def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> { 1180 let Latency = 6; 1181 let NumMicroOps = 2; 1182 let ReleaseAtCycles = [1,1]; 1183} 1184def: InstRW<[SKXWriteResGroup76], (instrs FARJMP64m)>; 1185def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>; 1186 1187def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> { 1188 let Latency = 6; 1189 let NumMicroOps = 2; 1190 let ReleaseAtCycles = [1,1]; 1191} 1192def: InstRW<[SKXWriteResGroup79], (instregex "ANDN(32|64)rm", 1193 "MOVBE(16|32|64)rm")>; 1194 1195def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1196 let Latency = 6; 1197 let NumMicroOps = 2; 1198 let ReleaseAtCycles = [1,1]; 1199} 1200def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>; 1201def: InstRW<[SKXWriteResGroup80], (instrs VMOVDI2PDIZrm)>; 1202 1203def SKXWriteResGroup81 : SchedWriteRes<[SKXPort23,SKXPort0156]> { 1204 let Latency = 6; 1205 let NumMicroOps = 2; 1206 let ReleaseAtCycles = [1,1]; 1207} 1208def: InstRW<[SKXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>; 1209def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)rmr")>; 1210 1211def SKXWriteResGroup82 : SchedWriteRes<[SKXPort5,SKXPort01]> { 1212 let Latency = 6; 1213 let NumMicroOps = 3; 1214 let ReleaseAtCycles = [2,1]; 1215} 1216def: InstRW<[SKXWriteResGroup82], (instregex "(V?)CVTSI642SSrr", 1217 "VCVTSI642SSZrr", 1218 "VCVTUSI642SSZrr")>; 1219 1220def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]> { 1221 let Latency = 6; 1222 let NumMicroOps = 4; 1223 let ReleaseAtCycles = [1,1,1,1]; 1224} 1225def: InstRW<[SKXWriteResGroup84], (instregex "SLDT(16|32|64)r")>; 1226 1227def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { 1228 let Latency = 6; 1229 let NumMicroOps = 4; 1230 let ReleaseAtCycles = [1,1,1,1]; 1231} 1232def: InstRW<[SKXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)", 1233 "SHL(8|16|32|64)m(1|i)", 1234 "SHR(8|16|32|64)m(1|i)")>; 1235 1236def SKXWriteResGroup87 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { 1237 let Latency = 6; 1238 let NumMicroOps = 4; 1239 let ReleaseAtCycles = [1,1,1,1]; 1240} 1241def: InstRW<[SKXWriteResGroup87], (instregex "POP(16|32|64)rmm", 1242 "PUSH(16|32|64)rmm")>; 1243 1244def SKXWriteResGroup88 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 1245 let Latency = 6; 1246 let NumMicroOps = 6; 1247 let ReleaseAtCycles = [1,5]; 1248} 1249def: InstRW<[SKXWriteResGroup88], (instrs STD)>; 1250 1251def SKXWriteResGroup89 : SchedWriteRes<[SKXPort23]> { 1252 let Latency = 7; 1253 let NumMicroOps = 1; 1254 let ReleaseAtCycles = [1]; 1255} 1256def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m")>; 1257def: InstRW<[SKXWriteResGroup89], (instrs VBROADCASTF128rm, 1258 VBROADCASTI128rm, 1259 VBROADCASTSDYrm, 1260 VBROADCASTSSYrm, 1261 VMOVDDUPYrm, 1262 VMOVSHDUPYrm, 1263 VMOVSLDUPYrm, 1264 VPBROADCASTDYrm, 1265 VPBROADCASTQYrm)>; 1266 1267def SKXWriteResGroup90 : SchedWriteRes<[SKXPort01,SKXPort5]> { 1268 let Latency = 7; 1269 let NumMicroOps = 2; 1270 let ReleaseAtCycles = [1,1]; 1271} 1272def: InstRW<[SKXWriteResGroup90], (instrs VCVTDQ2PDYrr)>; 1273 1274def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1275 let Latency = 7; 1276 let NumMicroOps = 2; 1277 let ReleaseAtCycles = [1,1]; 1278} 1279def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)", 1280 "VMOVSSZrm(b?)")>; 1281 1282def SKXWriteResGroup92a : SchedWriteRes<[SKXPort5,SKXPort23]> { 1283 let Latency = 6; 1284 let NumMicroOps = 2; 1285 let ReleaseAtCycles = [1,1]; 1286} 1287def: InstRW<[SKXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm", 1288 "(V?)PMOV(SX|ZX)BQrm", 1289 "(V?)PMOV(SX|ZX)BWrm", 1290 "(V?)PMOV(SX|ZX)DQrm", 1291 "(V?)PMOV(SX|ZX)WDrm", 1292 "(V?)PMOV(SX|ZX)WQrm")>; 1293 1294def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort01]> { 1295 let Latency = 7; 1296 let NumMicroOps = 2; 1297 let ReleaseAtCycles = [1,1]; 1298} 1299def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr", 1300 "VCVTPD2DQ(Y|Z256)rr", 1301 "VCVTPD2UDQZ256rr", 1302 "VCVTPS2PD(Y|Z256)rr", 1303 "VCVTPS2QQZ256rr", 1304 "VCVTPS2UQQZ256rr", 1305 "VCVTQQ2PSZ256rr", 1306 "VCVTTPD2DQ(Y|Z256)rr", 1307 "VCVTTPD2UDQZ256rr", 1308 "VCVTTPS2QQZ256rr", 1309 "VCVTTPS2UQQZ256rr", 1310 "VCVTUDQ2PDZ256rr", 1311 "VCVTUQQ2PSZ256rr")>; 1312 1313def SKXWriteResGroup93z : SchedWriteRes<[SKXPort5,SKXPort05]> { 1314 let Latency = 7; 1315 let NumMicroOps = 2; 1316 let ReleaseAtCycles = [1,1]; 1317} 1318def: InstRW<[SKXWriteResGroup93z], (instrs VCVTDQ2PDZrr, 1319 VCVTPD2DQZrr, 1320 VCVTPD2UDQZrr, 1321 VCVTPS2PDZrr, 1322 VCVTPS2QQZrr, 1323 VCVTPS2UQQZrr, 1324 VCVTQQ2PSZrr, 1325 VCVTTPD2DQZrr, 1326 VCVTTPD2UDQZrr, 1327 VCVTTPS2QQZrr, 1328 VCVTTPS2UQQZrr, 1329 VCVTUDQ2PDZrr, 1330 VCVTUQQ2PSZrr)>; 1331 1332def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1333 let Latency = 7; 1334 let NumMicroOps = 2; 1335 let ReleaseAtCycles = [1,1]; 1336} 1337def: InstRW<[SKXWriteResGroup95], (instrs VMOVNTDQAZ128rm, 1338 VPBLENDDrmi)>; 1339def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd], 1340 (instregex "VBLENDMPDZ128rm(b?)", 1341 "VBLENDMPSZ128rm(b?)", 1342 "VBROADCASTI32X2Z128rm(b?)", 1343 "VBROADCASTSSZ128rm(b?)", 1344 "VINSERT(F|I)128rm", 1345 "VMOVAPDZ128rm(b?)", 1346 "VMOVAPSZ128rm(b?)", 1347 "VMOVDDUPZ128rm(b?)", 1348 "VMOVDQA32Z128rm(b?)", 1349 "VMOVDQA64Z128rm(b?)", 1350 "VMOVDQU16Z128rm(b?)", 1351 "VMOVDQU32Z128rm(b?)", 1352 "VMOVDQU64Z128rm(b?)", 1353 "VMOVDQU8Z128rm(b?)", 1354 "VMOVSHDUPZ128rm(b?)", 1355 "VMOVSLDUPZ128rm(b?)", 1356 "VMOVUPDZ128rm(b?)", 1357 "VMOVUPSZ128rm(b?)", 1358 "VPADD(B|D|Q|W)Z128rm(b?)", 1359 "(V?)PADD(B|D|Q|W)rm", 1360 "VPBLENDM(B|D|Q|W)Z128rm(b?)", 1361 "VPBROADCASTDZ128rm(b?)", 1362 "VPBROADCASTQZ128rm(b?)", 1363 "VPSUB(B|D|Q|W)Z128rm(b?)", 1364 "(V?)PSUB(B|D|Q|W)rm", 1365 "VPTERNLOGDZ128rm(b?)i", 1366 "VPTERNLOGQZ128rm(b?)i")>; 1367 1368def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1369 let Latency = 7; 1370 let NumMicroOps = 3; 1371 let ReleaseAtCycles = [2,1]; 1372} 1373def: InstRW<[SKXWriteResGroup96], (instrs MMX_PACKSSDWrm, 1374 MMX_PACKSSWBrm, 1375 MMX_PACKUSWBrm)>; 1376 1377def SKXWriteResGroup97 : SchedWriteRes<[SKXPort5,SKXPort015]> { 1378 let Latency = 7; 1379 let NumMicroOps = 3; 1380 let ReleaseAtCycles = [2,1]; 1381} 1382def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2WZ128rr", 1383 "VPERMI2WZ256rr", 1384 "VPERMI2WZrr", 1385 "VPERMT2WZ128rr", 1386 "VPERMT2WZ256rr", 1387 "VPERMT2WZrr")>; 1388 1389def SKXWriteResGroup99 : SchedWriteRes<[SKXPort23,SKXPort0156]> { 1390 let Latency = 7; 1391 let NumMicroOps = 3; 1392 let ReleaseAtCycles = [1,2]; 1393} 1394def: InstRW<[SKXWriteResGroup99], (instrs LEAVE, LEAVE64, 1395 SCASB, SCASL, SCASQ, SCASW)>; 1396 1397def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort01]> { 1398 let Latency = 7; 1399 let NumMicroOps = 3; 1400 let ReleaseAtCycles = [1,1,1]; 1401} 1402def: InstRW<[SKXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr", 1403 "VCVT(T?)SS2USI64Zrr")>; 1404 1405def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> { 1406 let Latency = 7; 1407 let NumMicroOps = 3; 1408 let ReleaseAtCycles = [1,1,1]; 1409} 1410def: InstRW<[SKXWriteResGroup101], (instrs FLDCW16m)>; 1411 1412def SKXWriteResGroup103 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort0156]> { 1413 let Latency = 7; 1414 let NumMicroOps = 3; 1415 let ReleaseAtCycles = [1,1,1]; 1416} 1417def: InstRW<[SKXWriteResGroup103], (instregex "KMOV(B|D|Q|W)km")>; 1418 1419def SKXWriteResGroup104 : SchedWriteRes<[SKXPort6,SKXPort23,SKXPort0156]> { 1420 let Latency = 7; 1421 let NumMicroOps = 3; 1422 let ReleaseAtCycles = [1,1,1]; 1423} 1424def: InstRW<[SKXWriteResGroup104], (instrs LRET64, RET64)>; 1425 1426def SKXWriteResGroup106 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { 1427 let Latency = 7; 1428 let NumMicroOps = 4; 1429 let ReleaseAtCycles = [1,2,1]; 1430} 1431def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)", 1432 "VCOMPRESSPS(Z|Z128|Z256)mr(b?)", 1433 "VPCOMPRESSD(Z|Z128|Z256)mr(b?)", 1434 "VPCOMPRESSQ(Z|Z128|Z256)mr(b?)")>; 1435 1436def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { 1437 let Latency = 7; 1438 let NumMicroOps = 5; 1439 let ReleaseAtCycles = [1,1,1,2]; 1440} 1441def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)", 1442 "ROR(8|16|32|64)m(1|i)")>; 1443 1444def SKXWriteResGroup107_1 : SchedWriteRes<[SKXPort06]> { 1445 let Latency = 2; 1446 let NumMicroOps = 2; 1447 let ReleaseAtCycles = [2]; 1448} 1449def: InstRW<[SKXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1, 1450 ROR8r1, ROR16r1, ROR32r1, ROR64r1)>; 1451 1452def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { 1453 let Latency = 7; 1454 let NumMicroOps = 5; 1455 let ReleaseAtCycles = [1,1,1,2]; 1456} 1457def: InstRW<[SKXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>; 1458 1459def SKXWriteResGroup109 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { 1460 let Latency = 7; 1461 let NumMicroOps = 5; 1462 let ReleaseAtCycles = [1,1,1,1,1]; 1463} 1464def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m")>; 1465def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64m)>; 1466 1467def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { 1468 let Latency = 7; 1469 let NumMicroOps = 7; 1470 let ReleaseAtCycles = [1,2,2,2]; 1471} 1472def: InstRW<[SKXWriteResGroup110], (instrs VPSCATTERDQZ128mr, 1473 VPSCATTERQQZ128mr, 1474 VSCATTERDPDZ128mr, 1475 VSCATTERQPDZ128mr)>; 1476 1477def SKXWriteResGroup111 : SchedWriteRes<[SKXPort6,SKXPort06,SKXPort15,SKXPort0156]> { 1478 let Latency = 7; 1479 let NumMicroOps = 7; 1480 let ReleaseAtCycles = [1,3,1,2]; 1481} 1482def: InstRW<[SKXWriteResGroup111], (instrs LOOP)>; 1483 1484def SKXWriteResGroup112 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { 1485 let Latency = 7; 1486 let NumMicroOps = 11; 1487 let ReleaseAtCycles = [1,4,4,2]; 1488} 1489def: InstRW<[SKXWriteResGroup112], (instrs VPSCATTERDQZ256mr, 1490 VPSCATTERQQZ256mr, 1491 VSCATTERDPDZ256mr, 1492 VSCATTERQPDZ256mr)>; 1493 1494def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { 1495 let Latency = 7; 1496 let NumMicroOps = 19; 1497 let ReleaseAtCycles = [1,8,8,2]; 1498} 1499def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr, 1500 VPSCATTERQQZmr, 1501 VSCATTERDPDZmr, 1502 VSCATTERQPDZmr)>; 1503 1504def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1505 let Latency = 7; 1506 let NumMicroOps = 36; 1507 let ReleaseAtCycles = [1,16,1,16,2]; 1508} 1509def: InstRW<[SKXWriteResGroup114], (instrs VSCATTERDPSZmr)>; 1510 1511def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> { 1512 let Latency = 8; 1513 let NumMicroOps = 2; 1514 let ReleaseAtCycles = [1,1]; 1515} 1516def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm", 1517 "PEXT(32|64)rm")>; 1518 1519def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1520 let Latency = 8; 1521 let NumMicroOps = 2; 1522 let ReleaseAtCycles = [1,1]; 1523} 1524def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)(32|64)m", 1525 "VPBROADCASTB(Z|Z256)rm(b?)", 1526 "VPBROADCASTW(Z|Z256)rm(b?)")>; 1527def: InstRW<[SKXWriteResGroup119], (instrs VPBROADCASTBYrm, 1528 VPBROADCASTWYrm, 1529 VPMOVSXBDYrm, 1530 VPMOVSXBQYrm, 1531 VPMOVSXWQYrm)>; 1532 1533def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1534 let Latency = 8; 1535 let NumMicroOps = 2; 1536 let ReleaseAtCycles = [1,1]; 1537} 1538def: InstRW<[SKXWriteResGroup121], (instrs VMOVNTDQAZ256rm, 1539 VPBLENDDYrmi)>; 1540def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd], 1541 (instregex "VBLENDMPD(Z|Z256)rm(b?)", 1542 "VBLENDMPS(Z|Z256)rm(b?)", 1543 "VBROADCASTF32X2Z256rm(b?)", 1544 "VBROADCASTF32X2Zrm(b?)", 1545 "VBROADCASTF32X4Z256rm(b?)", 1546 "VBROADCASTF32X4rm(b?)", 1547 "VBROADCASTF32X8rm(b?)", 1548 "VBROADCASTF64X2Z128rm(b?)", 1549 "VBROADCASTF64X2rm(b?)", 1550 "VBROADCASTF64X4rm(b?)", 1551 "VBROADCASTI32X2Z256rm(b?)", 1552 "VBROADCASTI32X2Zrm(b?)", 1553 "VBROADCASTI32X4Z256rm(b?)", 1554 "VBROADCASTI32X4rm(b?)", 1555 "VBROADCASTI32X8rm(b?)", 1556 "VBROADCASTI64X2Z128rm(b?)", 1557 "VBROADCASTI64X2rm(b?)", 1558 "VBROADCASTI64X4rm(b?)", 1559 "VBROADCASTSD(Z|Z256)rm(b?)", 1560 "VBROADCASTSS(Z|Z256)rm(b?)", 1561 "VINSERTF32x4(Z|Z256)rm(b?)", 1562 "VINSERTF32x8Zrm(b?)", 1563 "VINSERTF64x2(Z|Z256)rm(b?)", 1564 "VINSERTF64x4Zrm(b?)", 1565 "VINSERTI32x4(Z|Z256)rm(b?)", 1566 "VINSERTI32x8Zrm(b?)", 1567 "VINSERTI64x2(Z|Z256)rm(b?)", 1568 "VINSERTI64x4Zrm(b?)", 1569 "VMOVAPD(Z|Z256)rm(b?)", 1570 "VMOVAPS(Z|Z256)rm(b?)", 1571 "VMOVDDUP(Z|Z256)rm(b?)", 1572 "VMOVDQA32(Z|Z256)rm(b?)", 1573 "VMOVDQA64(Z|Z256)rm(b?)", 1574 "VMOVDQU16(Z|Z256)rm(b?)", 1575 "VMOVDQU32(Z|Z256)rm(b?)", 1576 "VMOVDQU64(Z|Z256)rm(b?)", 1577 "VMOVDQU8(Z|Z256)rm(b?)", 1578 "VMOVSHDUP(Z|Z256)rm(b?)", 1579 "VMOVSLDUP(Z|Z256)rm(b?)", 1580 "VMOVUPD(Z|Z256)rm(b?)", 1581 "VMOVUPS(Z|Z256)rm(b?)", 1582 "VPADD(B|D|Q|W)Yrm", 1583 "VPADD(B|D|Q|W)(Z|Z256)rm(b?)", 1584 "VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)", 1585 "VPBROADCASTD(Z|Z256)rm(b?)", 1586 "VPBROADCASTQ(Z|Z256)rm(b?)", 1587 "VPSUB(B|D|Q|W)Yrm", 1588 "VPSUB(B|D|Q|W)(Z|Z256)rm(b?)", 1589 "VPTERNLOGD(Z|Z256)rm(b?)i", 1590 "VPTERNLOGQ(Z|Z256)rm(b?)i")>; 1591 1592def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1593 let Latency = 8; 1594 let NumMicroOps = 4; 1595 let ReleaseAtCycles = [1,2,1]; 1596} 1597def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>; 1598 1599def SKXWriteResGroup127 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 1600 let Latency = 8; 1601 let NumMicroOps = 5; 1602 let ReleaseAtCycles = [1,1,1,2]; 1603} 1604def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)", 1605 "RCR(8|16|32|64)m(1|i)")>; 1606 1607def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { 1608 let Latency = 8; 1609 let NumMicroOps = 6; 1610 let ReleaseAtCycles = [1,1,1,3]; 1611} 1612def: InstRW<[SKXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL", 1613 "ROR(8|16|32|64)mCL", 1614 "SAR(8|16|32|64)mCL", 1615 "SHL(8|16|32|64)mCL", 1616 "SHR(8|16|32|64)mCL")>; 1617 1618def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 1619 let Latency = 8; 1620 let NumMicroOps = 6; 1621 let ReleaseAtCycles = [1,1,1,2,1]; 1622} 1623def: SchedAlias<WriteADCRMW, SKXWriteResGroup130>; 1624 1625def SKXWriteResGroup131 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1626 let Latency = 8; 1627 let NumMicroOps = 8; 1628 let ReleaseAtCycles = [1,2,1,2,2]; 1629} 1630def: InstRW<[SKXWriteResGroup131], (instrs VPSCATTERQDZ128mr, 1631 VPSCATTERQDZ256mr, 1632 VSCATTERQPSZ128mr, 1633 VSCATTERQPSZ256mr)>; 1634 1635def SKXWriteResGroup132 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1636 let Latency = 8; 1637 let NumMicroOps = 12; 1638 let ReleaseAtCycles = [1,4,1,4,2]; 1639} 1640def: InstRW<[SKXWriteResGroup132], (instrs VPSCATTERDDZ128mr, 1641 VSCATTERDPSZ128mr)>; 1642 1643def SKXWriteResGroup133 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1644 let Latency = 8; 1645 let NumMicroOps = 20; 1646 let ReleaseAtCycles = [1,8,1,8,2]; 1647} 1648def: InstRW<[SKXWriteResGroup133], (instrs VPSCATTERDDZ256mr, 1649 VSCATTERDPSZ256mr)>; 1650 1651def SKXWriteResGroup134 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1652 let Latency = 8; 1653 let NumMicroOps = 36; 1654 let ReleaseAtCycles = [1,16,1,16,2]; 1655} 1656def: InstRW<[SKXWriteResGroup134], (instrs VPSCATTERDDZmr)>; 1657 1658def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> { 1659 let Latency = 9; 1660 let NumMicroOps = 2; 1661 let ReleaseAtCycles = [1,1]; 1662} 1663def: InstRW<[SKXWriteResGroup135], (instrs MMX_CVTPI2PSrm)>; 1664 1665def SKXWriteResGroup136 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1666 let Latency = 9; 1667 let NumMicroOps = 2; 1668 let ReleaseAtCycles = [1,1]; 1669} 1670def: InstRW<[SKXWriteResGroup136], (instrs VPMOVSXBWYrm, 1671 VPMOVSXDQYrm, 1672 VPMOVSXWDYrm, 1673 VPMOVZXWDYrm)>; 1674def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i", 1675 "VFPCLASSSDZrm(b?)", 1676 "VFPCLASSSSZrm(b?)", 1677 "(V?)PCMPGTQrm", 1678 "VPERMI2DZ128rm(b?)", 1679 "VPERMI2PDZ128rm(b?)", 1680 "VPERMI2PSZ128rm(b?)", 1681 "VPERMI2QZ128rm(b?)", 1682 "VPERMT2DZ128rm(b?)", 1683 "VPERMT2PDZ128rm(b?)", 1684 "VPERMT2PSZ128rm(b?)", 1685 "VPERMT2QZ128rm(b?)", 1686 "VPMAXSQZ128rm(b?)", 1687 "VPMAXUQZ128rm(b?)", 1688 "VPMINSQZ128rm(b?)", 1689 "VPMINUQZ128rm(b?)")>; 1690 1691def SKXWriteResGroup136_2 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1692 let Latency = 10; 1693 let NumMicroOps = 2; 1694 let ReleaseAtCycles = [1,1]; 1695} 1696def: InstRW<[SKXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i", 1697 "VCMP(SD|SS)Zrm", 1698 "VFPCLASSPDZ128rm(b?)", 1699 "VFPCLASSPSZ128rm(b?)", 1700 "VPCMPBZ128rmi(b?)", 1701 "VPCMPDZ128rmi(b?)", 1702 "VPCMPEQ(B|D|Q|W)Z128rm(b?)", 1703 "VPCMPGT(B|D|Q|W)Z128rm(b?)", 1704 "VPCMPQZ128rmi(b?)", 1705 "VPCMPU(B|D|Q|W)Z128rmi(b?)", 1706 "VPCMPWZ128rmi(b?)", 1707 "VPTESTMBZ128rm(b?)", 1708 "VPTESTMDZ128rm(b?)", 1709 "VPTESTMQZ128rm(b?)", 1710 "VPTESTMWZ128rm(b?)", 1711 "VPTESTNMBZ128rm(b?)", 1712 "VPTESTNMDZ128rm(b?)", 1713 "VPTESTNMQZ128rm(b?)", 1714 "VPTESTNMWZ128rm(b?)")>; 1715 1716def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort01]> { 1717 let Latency = 9; 1718 let NumMicroOps = 2; 1719 let ReleaseAtCycles = [1,1]; 1720} 1721def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm", 1722 "(V?)CVTPS2PDrm")>; 1723 1724def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { 1725 let Latency = 9; 1726 let NumMicroOps = 4; 1727 let ReleaseAtCycles = [2,1,1]; 1728} 1729def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm", 1730 "(V?)PHSUBSWrm")>; 1731 1732def SKXWriteResGroup146 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { 1733 let Latency = 9; 1734 let NumMicroOps = 5; 1735 let ReleaseAtCycles = [1,2,1,1]; 1736} 1737def: InstRW<[SKXWriteResGroup146], (instregex "LAR(16|32|64)rm", 1738 "LSL(16|32|64)rm")>; 1739 1740def SKXWriteResGroup148 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1741 let Latency = 10; 1742 let NumMicroOps = 2; 1743 let ReleaseAtCycles = [1,1]; 1744} 1745def: InstRW<[SKXWriteResGroup148], (instrs VPCMPGTQYrm)>; 1746def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m", 1747 "ILD_F(16|32|64)m", 1748 "VALIGND(Z|Z256)rm(b?)i", 1749 "VALIGNQ(Z|Z256)rm(b?)i", 1750 "VPMAXSQ(Z|Z256)rm(b?)", 1751 "VPMAXUQ(Z|Z256)rm(b?)", 1752 "VPMINSQ(Z|Z256)rm(b?)", 1753 "VPMINUQ(Z|Z256)rm(b?)")>; 1754 1755def SKXWriteResGroup148_2 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1756 let Latency = 11; 1757 let NumMicroOps = 2; 1758 let ReleaseAtCycles = [1,1]; 1759} 1760def: InstRW<[SKXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i", 1761 "VCMPPS(Z|Z256)rm(b?)i", 1762 "VFPCLASSPD(Z|Z256)rm(b?)", 1763 "VFPCLASSPS(Z|Z256)rm(b?)", 1764 "VPCMPB(Z|Z256)rmi(b?)", 1765 "VPCMPD(Z|Z256)rmi(b?)", 1766 "VPCMPEQB(Z|Z256)rm(b?)", 1767 "VPCMPEQD(Z|Z256)rm(b?)", 1768 "VPCMPEQQ(Z|Z256)rm(b?)", 1769 "VPCMPEQW(Z|Z256)rm(b?)", 1770 "VPCMPGTB(Z|Z256)rm(b?)", 1771 "VPCMPGTD(Z|Z256)rm(b?)", 1772 "VPCMPGTQ(Z|Z256)rm(b?)", 1773 "VPCMPGTW(Z|Z256)rm(b?)", 1774 "VPCMPQ(Z|Z256)rmi(b?)", 1775 "VPCMPU(B|D|Q|W)Z256rmi(b?)", 1776 "VPCMPU(B|D|Q|W)Zrmi(b?)", 1777 "VPCMPW(Z|Z256)rmi(b?)", 1778 "VPTESTM(B|D|Q|W)Z256rm(b?)", 1779 "VPTESTM(B|D|Q|W)Zrm(b?)", 1780 "VPTESTNM(B|D|Q|W)Z256rm(b?)", 1781 "VPTESTNM(B|D|Q|W)Zrm(b?)")>; 1782 1783def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort01]> { 1784 let Latency = 10; 1785 let NumMicroOps = 2; 1786 let ReleaseAtCycles = [1,1]; 1787} 1788def: InstRW<[SKXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)", 1789 "VCVTDQ2PSZ128rm(b?)", 1790 "(V?)CVTDQ2PSrm", 1791 "VCVTPD2QQZ128rm(b?)", 1792 "VCVTPD2UQQZ128rm(b?)", 1793 "VCVTPH2PSZ128rm(b?)", 1794 "VCVTPS2DQZ128rm(b?)", 1795 "(V?)CVTPS2DQrm", 1796 "VCVTPS2PDZ128rm(b?)", 1797 "VCVTPS2QQZ128rm(b?)", 1798 "VCVTPS2UDQZ128rm(b?)", 1799 "VCVTPS2UQQZ128rm(b?)", 1800 "VCVTQQ2PDZ128rm(b?)", 1801 "VCVTQQ2PSZ128rm(b?)", 1802 "VCVTSS2SDZrm", 1803 "(V?)CVTSS2SDrm", 1804 "VCVTTPD2QQZ128rm(b?)", 1805 "VCVTTPD2UQQZ128rm(b?)", 1806 "VCVTTPS2DQZ128rm(b?)", 1807 "(V?)CVTTPS2DQrm", 1808 "VCVTTPS2QQZ128rm(b?)", 1809 "VCVTTPS2UDQZ128rm(b?)", 1810 "VCVTTPS2UQQZ128rm(b?)", 1811 "VCVTUDQ2PDZ128rm(b?)", 1812 "VCVTUDQ2PSZ128rm(b?)", 1813 "VCVTUQQ2PDZ128rm(b?)", 1814 "VCVTUQQ2PSZ128rm(b?)")>; 1815 1816def SKXWriteResGroup151 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1817 let Latency = 10; 1818 let NumMicroOps = 3; 1819 let ReleaseAtCycles = [2,1]; 1820} 1821def: InstRW<[SKXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)", 1822 "VEXPANDPSZ128rm(b?)", 1823 "VPEXPANDDZ128rm(b?)", 1824 "VPEXPANDQZ128rm(b?)")>; 1825 1826def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { 1827 let Latency = 10; 1828 let NumMicroOps = 4; 1829 let ReleaseAtCycles = [2,1,1]; 1830} 1831def: InstRW<[SKXWriteResGroup154], (instrs VPHADDSWYrm, 1832 VPHSUBSWYrm)>; 1833 1834def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 1835 let Latency = 10; 1836 let NumMicroOps = 8; 1837 let ReleaseAtCycles = [1,1,1,1,1,3]; 1838} 1839def: InstRW<[SKXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>; 1840 1841def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> { 1842 let Latency = 11; 1843 let NumMicroOps = 2; 1844 let ReleaseAtCycles = [1,1]; 1845} 1846def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F(32|64)m")>; 1847 1848def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort01]> { 1849 let Latency = 11; 1850 let NumMicroOps = 2; 1851 let ReleaseAtCycles = [1,1]; 1852} 1853def: InstRW<[SKXWriteResGroup161], (instrs VCVTDQ2PSYrm, 1854 VCVTPS2PDYrm)>; 1855def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)", 1856 "VCVTPH2PS(Z|Z256)rm(b?)", 1857 "VCVTPS2PD(Z|Z256)rm(b?)", 1858 "VCVTQQ2PD(Z|Z256)rm(b?)", 1859 "VCVTQQ2PSZ256rm(b?)", 1860 "VCVT(T?)PD2QQ(Z|Z256)rm(b?)", 1861 "VCVT(T?)PD2UQQ(Z|Z256)rm(b?)", 1862 "VCVT(T?)PS2DQYrm", 1863 "VCVT(T?)PS2DQ(Z|Z256)rm(b?)", 1864 "VCVT(T?)PS2QQZ256rm(b?)", 1865 "VCVT(T?)PS2UDQ(Z|Z256)rm(b?)", 1866 "VCVT(T?)PS2UQQZ256rm(b?)", 1867 "VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)", 1868 "VCVTUQQ2PD(Z|Z256)rm(b?)", 1869 "VCVTUQQ2PSZ256rm(b?)")>; 1870 1871def SKXWriteResGroup162 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1872 let Latency = 11; 1873 let NumMicroOps = 3; 1874 let ReleaseAtCycles = [2,1]; 1875} 1876def: InstRW<[SKXWriteResGroup162], (instregex "FICOM(P?)(16|32)m", 1877 "VEXPANDPD(Z|Z256)rm(b?)", 1878 "VEXPANDPS(Z|Z256)rm(b?)", 1879 "VPEXPANDD(Z|Z256)rm(b?)", 1880 "VPEXPANDQ(Z|Z256)rm(b?)")>; 1881 1882def SKXWriteResGroup164 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1883 let Latency = 11; 1884 let NumMicroOps = 3; 1885 let ReleaseAtCycles = [1,1,1]; 1886} 1887def: InstRW<[SKXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>; 1888 1889def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> { 1890 let Latency = 11; 1891 let NumMicroOps = 3; 1892 let ReleaseAtCycles = [1,1,1]; 1893} 1894def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2DQrm, 1895 CVTTPD2DQrm, 1896 MMX_CVTPD2PIrm, 1897 MMX_CVTTPD2PIrm)>; 1898 1899def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 1900 let Latency = 11; 1901 let NumMicroOps = 4; 1902 let ReleaseAtCycles = [2,1,1]; 1903} 1904def: InstRW<[SKXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>; 1905 1906def SKXWriteResGroup169 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { 1907 let Latency = 11; 1908 let NumMicroOps = 7; 1909 let ReleaseAtCycles = [2,3,2]; 1910} 1911def: InstRW<[SKXWriteResGroup169], (instregex "RCL(16|32|64)rCL", 1912 "RCR(16|32|64)rCL")>; 1913 1914def SKXWriteResGroup170 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> { 1915 let Latency = 11; 1916 let NumMicroOps = 9; 1917 let ReleaseAtCycles = [1,5,1,2]; 1918} 1919def: InstRW<[SKXWriteResGroup170], (instrs RCL8rCL)>; 1920 1921def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 1922 let Latency = 11; 1923 let NumMicroOps = 11; 1924 let ReleaseAtCycles = [2,9]; 1925} 1926def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>; 1927 1928def SKXWriteResGroup174 : SchedWriteRes<[SKXPort01]> { 1929 let Latency = 15; 1930 let NumMicroOps = 3; 1931 let ReleaseAtCycles = [3]; 1932} 1933def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>; 1934 1935def SKXWriteResGroup174z : SchedWriteRes<[SKXPort05]> { 1936 let Latency = 15; 1937 let NumMicroOps = 3; 1938 let ReleaseAtCycles = [3]; 1939} 1940def: InstRW<[SKXWriteResGroup174z], (instregex "VPMULLQZrr")>; 1941 1942def SKXWriteResGroup175 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1943 let Latency = 12; 1944 let NumMicroOps = 3; 1945 let ReleaseAtCycles = [2,1]; 1946} 1947def: InstRW<[SKXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>; 1948 1949def SKXWriteResGroup176 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort01]> { 1950 let Latency = 12; 1951 let NumMicroOps = 3; 1952 let ReleaseAtCycles = [1,1,1]; 1953} 1954def: InstRW<[SKXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)", 1955 "VCVT(T?)SS2USI64Zrm(b?)")>; 1956 1957def SKXWriteResGroup177 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> { 1958 let Latency = 12; 1959 let NumMicroOps = 3; 1960 let ReleaseAtCycles = [1,1,1]; 1961} 1962def: InstRW<[SKXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)", 1963 "VCVT(T?)PS2UQQZrm(b?)")>; 1964 1965def SKXWriteResGroup180 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1966 let Latency = 13; 1967 let NumMicroOps = 3; 1968 let ReleaseAtCycles = [2,1]; 1969} 1970def: InstRW<[SKXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m", 1971 "VPERMWZ256rm(b?)", 1972 "VPERMWZrm(b?)")>; 1973 1974def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1975 let Latency = 13; 1976 let NumMicroOps = 3; 1977 let ReleaseAtCycles = [1,1,1]; 1978} 1979def: InstRW<[SKXWriteResGroup181], (instrs VCVTDQ2PDYrm)>; 1980 1981def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 1982 let Latency = 13; 1983 let NumMicroOps = 4; 1984 let ReleaseAtCycles = [2,1,1]; 1985} 1986def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2WZ128rm(b?)", 1987 "VPERMT2WZ128rm(b?)")>; 1988 1989def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1990 let Latency = 14; 1991 let NumMicroOps = 3; 1992 let ReleaseAtCycles = [1,1,1]; 1993} 1994def: InstRW<[SKXWriteResGroup187], (instregex "MUL_FI(16|32)m")>; 1995 1996def SKXWriteResGroup188 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> { 1997 let Latency = 14; 1998 let NumMicroOps = 3; 1999 let ReleaseAtCycles = [1,1,1]; 2000} 2001def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)", 2002 "VCVTPD2UDQZrm(b?)", 2003 "VCVTQQ2PSZrm(b?)", 2004 "VCVTTPD2DQZrm(b?)", 2005 "VCVTTPD2UDQZrm(b?)", 2006 "VCVTUQQ2PSZrm(b?)")>; 2007 2008def SKXWriteResGroup189 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 2009 let Latency = 14; 2010 let NumMicroOps = 4; 2011 let ReleaseAtCycles = [2,1,1]; 2012} 2013def: InstRW<[SKXWriteResGroup189], (instregex "VPERMI2WZ256rm(b?)", 2014 "VPERMI2WZrm(b?)", 2015 "VPERMT2WZ256rm(b?)", 2016 "VPERMT2WZrm(b?)")>; 2017 2018def SKXWriteResGroup190 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> { 2019 let Latency = 14; 2020 let NumMicroOps = 10; 2021 let ReleaseAtCycles = [2,4,1,3]; 2022} 2023def: InstRW<[SKXWriteResGroup190], (instrs RCR8rCL)>; 2024 2025def SKXWriteResGroup191 : SchedWriteRes<[SKXPort0]> { 2026 let Latency = 15; 2027 let NumMicroOps = 1; 2028 let ReleaseAtCycles = [1]; 2029} 2030def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>; 2031 2032def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2033 let Latency = 15; 2034 let NumMicroOps = 8; 2035 let ReleaseAtCycles = [1,2,2,1,2]; 2036} 2037def: InstRW<[SKXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>; 2038 2039def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { 2040 let Latency = 15; 2041 let NumMicroOps = 10; 2042 let ReleaseAtCycles = [1,1,1,5,1,1]; 2043} 2044def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>; 2045 2046def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { 2047 let Latency = 16; 2048 let NumMicroOps = 14; 2049 let ReleaseAtCycles = [1,1,1,4,2,5]; 2050} 2051def: InstRW<[SKXWriteResGroup199], (instrs CMPXCHG8B)>; 2052 2053def SKXWriteResGroup200 : SchedWriteRes<[SKXPort1, SKXPort05, SKXPort6]> { 2054 let Latency = 12; 2055 let NumMicroOps = 34; 2056 let ReleaseAtCycles = [1, 4, 5]; 2057} 2058def: InstRW<[SKXWriteResGroup200], (instrs VZEROALL)>; 2059 2060def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> { 2061 let Latency = 17; 2062 let NumMicroOps = 15; 2063 let ReleaseAtCycles = [2,1,2,4,2,4]; 2064} 2065def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>; 2066 2067def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort01]> { 2068 let Latency = 21; 2069 let NumMicroOps = 4; 2070 let ReleaseAtCycles = [1,3]; 2071} 2072def: InstRW<[SKXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)")>; 2073 2074def SKXWriteResGroup207 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort06,SKXPort0156]> { 2075 let Latency = 18; 2076 let NumMicroOps = 8; 2077 let ReleaseAtCycles = [1,1,1,5]; 2078} 2079def: InstRW<[SKXWriteResGroup207], (instrs CPUID, RDTSC)>; 2080 2081def SKXWriteResGroup208 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { 2082 let Latency = 18; 2083 let NumMicroOps = 11; 2084 let ReleaseAtCycles = [2,1,1,4,1,2]; 2085} 2086def: InstRW<[SKXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>; 2087 2088def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort01]> { 2089 let Latency = 22; 2090 let NumMicroOps = 4; 2091 let ReleaseAtCycles = [1,3]; 2092} 2093def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>; 2094 2095def SKXWriteResGroup211_1 : SchedWriteRes<[SKXPort23,SKXPort05]> { 2096 let Latency = 22; 2097 let NumMicroOps = 4; 2098 let ReleaseAtCycles = [1,3]; 2099} 2100def: InstRW<[SKXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>; 2101 2102def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> { 2103 let Latency = 20; 2104 let NumMicroOps = 1; 2105 let ReleaseAtCycles = [1]; 2106} 2107def: InstRW<[SKXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>; 2108 2109def SKXWriteGatherEVEX2 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2110 let Latency = 17; 2111 let NumMicroOps = 5; // 2 uops perform multiple loads 2112 let ReleaseAtCycles = [1,2,1,1]; 2113} 2114def: InstRW<[SKXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm, 2115 VGATHERDPDZ128rm, VPGATHERDQZ128rm, 2116 VGATHERQPDZ128rm, VPGATHERQQZ128rm)>; 2117 2118def SKXWriteGatherEVEX4 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2119 let Latency = 19; 2120 let NumMicroOps = 5; // 2 uops perform multiple loads 2121 let ReleaseAtCycles = [1,4,1,1]; 2122} 2123def: InstRW<[SKXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm, 2124 VGATHERQPDZ256rm, VPGATHERQQZ256rm, 2125 VGATHERDPSZ128rm, VPGATHERDDZ128rm, 2126 VGATHERDPDZ256rm, VPGATHERDQZ256rm)>; 2127 2128def SKXWriteGatherEVEX8 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2129 let Latency = 21; 2130 let NumMicroOps = 5; // 2 uops perform multiple loads 2131 let ReleaseAtCycles = [1,8,1,1]; 2132} 2133def: InstRW<[SKXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm, 2134 VGATHERDPDZrm, VPGATHERDQZrm, 2135 VGATHERQPDZrm, VPGATHERQQZrm, 2136 VGATHERQPSZrm, VPGATHERQDZrm)>; 2137 2138def SKXWriteGatherEVEX16 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2139 let Latency = 25; 2140 let NumMicroOps = 5; // 2 uops perform multiple loads 2141 let ReleaseAtCycles = [1,16,1,1]; 2142} 2143def: InstRW<[SKXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>; 2144 2145def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 2146 let Latency = 20; 2147 let NumMicroOps = 8; 2148 let ReleaseAtCycles = [1,1,1,1,1,1,2]; 2149} 2150def: InstRW<[SKXWriteResGroup219], (instrs INSB, INSL, INSW)>; 2151 2152def SKXWriteResGroup220 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort0156]> { 2153 let Latency = 20; 2154 let NumMicroOps = 10; 2155 let ReleaseAtCycles = [1,2,7]; 2156} 2157def: InstRW<[SKXWriteResGroup220], (instrs MWAITrr)>; 2158 2159def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> { 2160 let Latency = 22; 2161 let NumMicroOps = 2; 2162 let ReleaseAtCycles = [1,1]; 2163} 2164def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F(32|64)m")>; 2165 2166def SKXWriteResGroupVEX2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { 2167 let Latency = 18; 2168 let NumMicroOps = 5; // 2 uops perform multiple loads 2169 let ReleaseAtCycles = [1,2,1,1]; 2170} 2171def: InstRW<[SKXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm, 2172 VGATHERQPDrm, VPGATHERQQrm, 2173 VGATHERQPSrm, VPGATHERQDrm)>; 2174 2175def SKXWriteResGroupVEX4 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { 2176 let Latency = 20; 2177 let NumMicroOps = 5; // 2 uops peform multiple loads 2178 let ReleaseAtCycles = [1,4,1,1]; 2179} 2180def: InstRW<[SKXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm, 2181 VGATHERDPSrm, VPGATHERDDrm, 2182 VGATHERQPDYrm, VPGATHERQQYrm, 2183 VGATHERQPSYrm, VPGATHERQDYrm)>; 2184 2185def SKXWriteResGroupVEX8 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { 2186 let Latency = 22; 2187 let NumMicroOps = 5; // 2 uops perform multiple loads 2188 let ReleaseAtCycles = [1,8,1,1]; 2189} 2190def: InstRW<[SKXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; 2191 2192def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { 2193 let Latency = 22; 2194 let NumMicroOps = 14; 2195 let ReleaseAtCycles = [5,5,4]; 2196} 2197def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr", 2198 "VPCONFLICTQZ256rr")>; 2199 2200def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 2201 let Latency = 23; 2202 let NumMicroOps = 19; 2203 let ReleaseAtCycles = [2,1,4,1,1,4,6]; 2204} 2205def: InstRW<[SKXWriteResGroup228], (instrs CMPXCHG16B)>; 2206 2207def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 2208 let Latency = 25; 2209 let NumMicroOps = 3; 2210 let ReleaseAtCycles = [1,1,1]; 2211} 2212def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI(16|32)m")>; 2213 2214def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> { 2215 let Latency = 27; 2216 let NumMicroOps = 2; 2217 let ReleaseAtCycles = [1,1]; 2218} 2219def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F(32|64)m")>; 2220 2221def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2222 let Latency = 29; 2223 let NumMicroOps = 15; 2224 let ReleaseAtCycles = [5,5,1,4]; 2225} 2226def: InstRW<[SKXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)")>; 2227 2228def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 2229 let Latency = 30; 2230 let NumMicroOps = 3; 2231 let ReleaseAtCycles = [1,1,1]; 2232} 2233def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>; 2234 2235def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> { 2236 let Latency = 35; 2237 let NumMicroOps = 23; 2238 let ReleaseAtCycles = [1,5,3,4,10]; 2239} 2240def: InstRW<[SKXWriteResGroup247], (instregex "IN(8|16|32)ri", 2241 "IN(8|16|32)rr")>; 2242 2243def SKXWriteResGroup248 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 2244 let Latency = 35; 2245 let NumMicroOps = 23; 2246 let ReleaseAtCycles = [1,5,2,1,4,10]; 2247} 2248def: InstRW<[SKXWriteResGroup248], (instregex "OUT(8|16|32)ir", 2249 "OUT(8|16|32)rr")>; 2250 2251def SKXWriteResGroup249 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { 2252 let Latency = 37; 2253 let NumMicroOps = 21; 2254 let ReleaseAtCycles = [9,7,5]; 2255} 2256def: InstRW<[SKXWriteResGroup249], (instregex "VPCONFLICTDZ256rr", 2257 "VPCONFLICTQZrr")>; 2258 2259def SKXWriteResGroup250 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { 2260 let Latency = 37; 2261 let NumMicroOps = 31; 2262 let ReleaseAtCycles = [1,8,1,21]; 2263} 2264def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64)?")>; 2265 2266def SKXWriteResGroup252 : SchedWriteRes<[SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort15,SKXPort0156]> { 2267 let Latency = 40; 2268 let NumMicroOps = 18; 2269 let ReleaseAtCycles = [1,1,2,3,1,1,1,8]; 2270} 2271def: InstRW<[SKXWriteResGroup252], (instrs VMCLEARm)>; 2272 2273def SKXWriteResGroup253 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { 2274 let Latency = 41; 2275 let NumMicroOps = 39; 2276 let ReleaseAtCycles = [1,10,1,1,26]; 2277} 2278def: InstRW<[SKXWriteResGroup253], (instrs XSAVE64)>; 2279 2280def SKXWriteResGroup254 : SchedWriteRes<[SKXPort5,SKXPort0156]> { 2281 let Latency = 42; 2282 let NumMicroOps = 22; 2283 let ReleaseAtCycles = [2,20]; 2284} 2285def: InstRW<[SKXWriteResGroup254], (instrs RDTSCP)>; 2286 2287def SKXWriteResGroup255 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { 2288 let Latency = 42; 2289 let NumMicroOps = 40; 2290 let ReleaseAtCycles = [1,11,1,1,26]; 2291} 2292def: InstRW<[SKXWriteResGroup255], (instrs XSAVE)>; 2293def: InstRW<[SKXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>; 2294 2295def SKXWriteResGroup256 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2296 let Latency = 44; 2297 let NumMicroOps = 22; 2298 let ReleaseAtCycles = [9,7,1,5]; 2299} 2300def: InstRW<[SKXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)", 2301 "VPCONFLICTQZrm(b?)")>; 2302 2303def SKXWriteResGroup258 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05,SKXPort06,SKXPort0156]> { 2304 let Latency = 62; 2305 let NumMicroOps = 64; 2306 let ReleaseAtCycles = [2,8,5,10,39]; 2307} 2308def: InstRW<[SKXWriteResGroup258], (instrs FLDENVm)>; 2309 2310def SKXWriteResGroup259 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> { 2311 let Latency = 63; 2312 let NumMicroOps = 88; 2313 let ReleaseAtCycles = [4,4,31,1,2,1,45]; 2314} 2315def: InstRW<[SKXWriteResGroup259], (instrs FXRSTOR64)>; 2316 2317def SKXWriteResGroup260 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> { 2318 let Latency = 63; 2319 let NumMicroOps = 90; 2320 let ReleaseAtCycles = [4,2,33,1,2,1,47]; 2321} 2322def: InstRW<[SKXWriteResGroup260], (instrs FXRSTOR)>; 2323 2324def SKXWriteResGroup261 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { 2325 let Latency = 67; 2326 let NumMicroOps = 35; 2327 let ReleaseAtCycles = [17,11,7]; 2328} 2329def: InstRW<[SKXWriteResGroup261], (instregex "VPCONFLICTDZrr")>; 2330 2331def SKXWriteResGroup262 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2332 let Latency = 74; 2333 let NumMicroOps = 36; 2334 let ReleaseAtCycles = [17,11,1,7]; 2335} 2336def: InstRW<[SKXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)")>; 2337 2338def SKXWriteResGroup263 : SchedWriteRes<[SKXPort5,SKXPort05,SKXPort0156]> { 2339 let Latency = 75; 2340 let NumMicroOps = 15; 2341 let ReleaseAtCycles = [6,3,6]; 2342} 2343def: InstRW<[SKXWriteResGroup263], (instrs FNINIT)>; 2344 2345def SKXWriteResGroup266 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort237,SKXPort06,SKXPort0156]> { 2346 let Latency = 106; 2347 let NumMicroOps = 100; 2348 let ReleaseAtCycles = [9,1,11,16,1,11,21,30]; 2349} 2350def: InstRW<[SKXWriteResGroup266], (instrs FSTENVm)>; 2351 2352def SKXWriteResGroup267 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 2353 let Latency = 140; 2354 let NumMicroOps = 4; 2355 let ReleaseAtCycles = [1,3]; 2356} 2357def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>; 2358 2359def: InstRW<[WriteZero], (instrs CLC)>; 2360 2361 2362// Instruction variants handled by the renamer. These might not need execution 2363// ports in certain conditions. 2364// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", 2365// section "Skylake Pipeline" > "Register allocation and renaming". 2366// These can be investigated with llvm-exegesis, e.g. 2367// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 2368// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 2369 2370def SKXWriteZeroLatency : SchedWriteRes<[]> { 2371 let Latency = 0; 2372} 2373 2374def SKXWriteZeroIdiom : SchedWriteVariant<[ 2375 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2376 SchedVar<NoSchedPred, [WriteALU]> 2377]>; 2378def : InstRW<[SKXWriteZeroIdiom], (instrs SUB32rr, SUB64rr, 2379 XOR32rr, XOR64rr)>; 2380 2381def SKXWriteFZeroIdiom : SchedWriteVariant<[ 2382 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2383 SchedVar<NoSchedPred, [WriteFLogic]> 2384]>; 2385def : InstRW<[SKXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, 2386 XORPDrr, VXORPDrr, 2387 VXORPSZ128rr, 2388 VXORPDZ128rr)>; 2389 2390def SKXWriteFZeroIdiomY : SchedWriteVariant<[ 2391 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2392 SchedVar<NoSchedPred, [WriteFLogicY]> 2393]>; 2394def : InstRW<[SKXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr, 2395 VXORPSZ256rr, VXORPDZ256rr)>; 2396 2397def SKXWriteFZeroIdiomZ : SchedWriteVariant<[ 2398 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2399 SchedVar<NoSchedPred, [WriteFLogicZ]> 2400]>; 2401def : InstRW<[SKXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>; 2402 2403def SKXWriteVZeroIdiomLogicX : SchedWriteVariant<[ 2404 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2405 SchedVar<NoSchedPred, [WriteVecLogicX]> 2406]>; 2407def : InstRW<[SKXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, 2408 VPXORDZ128rr, VPXORQZ128rr)>; 2409 2410def SKXWriteVZeroIdiomLogicY : SchedWriteVariant<[ 2411 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2412 SchedVar<NoSchedPred, [WriteVecLogicY]> 2413]>; 2414def : InstRW<[SKXWriteVZeroIdiomLogicY], (instrs VPXORYrr, 2415 VPXORDZ256rr, VPXORQZ256rr)>; 2416 2417def SKXWriteVZeroIdiomLogicZ : SchedWriteVariant<[ 2418 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2419 SchedVar<NoSchedPred, [WriteVecLogicZ]> 2420]>; 2421def : InstRW<[SKXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>; 2422 2423def SKXWriteVZeroIdiomALUX : SchedWriteVariant<[ 2424 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2425 SchedVar<NoSchedPred, [WriteVecALUX]> 2426]>; 2427def : InstRW<[SKXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr, 2428 PCMPGTDrr, VPCMPGTDrr, 2429 PCMPGTWrr, VPCMPGTWrr)>; 2430 2431def SKXWriteVZeroIdiomALUY : SchedWriteVariant<[ 2432 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2433 SchedVar<NoSchedPred, [WriteVecALUY]> 2434]>; 2435def : InstRW<[SKXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr, 2436 VPCMPGTDYrr, 2437 VPCMPGTWYrr)>; 2438 2439def SKXWritePSUB : SchedWriteRes<[SKXPort015]> { 2440 let Latency = 1; 2441 let NumMicroOps = 1; 2442 let ReleaseAtCycles = [1]; 2443} 2444 2445def SKXWriteVZeroIdiomPSUB : SchedWriteVariant<[ 2446 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2447 SchedVar<NoSchedPred, [SKXWritePSUB]> 2448]>; 2449 2450def : InstRW<[SKXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr, 2451 PSUBDrr, VPSUBDrr, VPSUBDZ128rr, 2452 PSUBQrr, VPSUBQrr, VPSUBQZ128rr, 2453 PSUBWrr, VPSUBWrr, VPSUBWZ128rr, 2454 VPSUBBYrr, VPSUBBZ256rr, 2455 VPSUBDYrr, VPSUBDZ256rr, 2456 VPSUBQYrr, VPSUBQZ256rr, 2457 VPSUBWYrr, VPSUBWZ256rr, 2458 VPSUBBZrr, 2459 VPSUBDZrr, 2460 VPSUBQZrr, 2461 VPSUBWZrr)>; 2462def SKXWritePCMPGTQ : SchedWriteRes<[SKXPort5]> { 2463 let Latency = 3; 2464 let NumMicroOps = 1; 2465 let ReleaseAtCycles = [1]; 2466} 2467 2468def SKXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ 2469 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2470 SchedVar<NoSchedPred, [SKXWritePCMPGTQ]> 2471]>; 2472def : InstRW<[SKXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, 2473 VPCMPGTQYrr)>; 2474 2475 2476// CMOVs that use both Z and C flag require an extra uop. 2477def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> { 2478 let Latency = 2; 2479 let ReleaseAtCycles = [2]; 2480 let NumMicroOps = 2; 2481} 2482 2483def SKXWriteCMOVA_CMOVBErm : SchedWriteRes<[SKXPort23,SKXPort06]> { 2484 let Latency = 7; 2485 let ReleaseAtCycles = [1,2]; 2486 let NumMicroOps = 3; 2487} 2488 2489def SKXCMOVA_CMOVBErr : SchedWriteVariant<[ 2490 SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SKXWriteCMOVA_CMOVBErr]>, 2491 SchedVar<NoSchedPred, [WriteCMOV]> 2492]>; 2493 2494def SKXCMOVA_CMOVBErm : SchedWriteVariant<[ 2495 SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SKXWriteCMOVA_CMOVBErm]>, 2496 SchedVar<NoSchedPred, [WriteCMOV.Folded]> 2497]>; 2498 2499def : InstRW<[SKXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; 2500def : InstRW<[SKXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; 2501 2502// SETCCs that use both Z and C flag require an extra uop. 2503def SKXWriteSETA_SETBEr : SchedWriteRes<[SKXPort06]> { 2504 let Latency = 2; 2505 let ReleaseAtCycles = [2]; 2506 let NumMicroOps = 2; 2507} 2508 2509def SKXWriteSETA_SETBEm : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> { 2510 let Latency = 3; 2511 let ReleaseAtCycles = [1,1,2]; 2512 let NumMicroOps = 4; 2513} 2514 2515def SKXSETA_SETBErr : SchedWriteVariant<[ 2516 SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SKXWriteSETA_SETBEr]>, 2517 SchedVar<NoSchedPred, [WriteSETCC]> 2518]>; 2519 2520def SKXSETA_SETBErm : SchedWriteVariant<[ 2521 SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SKXWriteSETA_SETBEm]>, 2522 SchedVar<NoSchedPred, [WriteSETCCStore]> 2523]>; 2524 2525def : InstRW<[SKXSETA_SETBErr], (instrs SETCCr)>; 2526def : InstRW<[SKXSETA_SETBErm], (instrs SETCCm)>; 2527 2528/////////////////////////////////////////////////////////////////////////////// 2529// Dependency breaking instructions. 2530/////////////////////////////////////////////////////////////////////////////// 2531 2532def : IsZeroIdiomFunction<[ 2533 // GPR Zero-idioms. 2534 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, 2535 2536 // SSE Zero-idioms. 2537 DepBreakingClass<[ 2538 // fp variants. 2539 XORPSrr, XORPDrr, 2540 2541 // int variants. 2542 PXORrr, 2543 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 2544 PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr 2545 ], ZeroIdiomPredicate>, 2546 2547 // AVX Zero-idioms. 2548 DepBreakingClass<[ 2549 // xmm fp variants. 2550 VXORPSrr, VXORPDrr, 2551 2552 // xmm int variants. 2553 VPXORrr, 2554 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 2555 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 2556 2557 // ymm variants. 2558 VXORPSYrr, VXORPDYrr, VPXORYrr, 2559 VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr, 2560 VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr, 2561 2562 // zmm variants. 2563 VXORPSZrr, VXORPDZrr, VPXORDZrr, VPXORQZrr, 2564 VXORPSZ128rr, VXORPDZ128rr, VPXORDZ128rr, VPXORQZ128rr, 2565 VXORPSZ256rr, VXORPDZ256rr, VPXORDZ256rr, VPXORQZ256rr, 2566 VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr, 2567 VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr, 2568 VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr, 2569 ], ZeroIdiomPredicate>, 2570]>; 2571 2572} // SchedModel 2573