1//=- X86SchedSkylake.td - X86 Skylake Server Scheduling ------*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for Skylake Server to support 10// instruction scheduling and other instruction cost heuristics. 11// 12//===----------------------------------------------------------------------===// 13 14def SkylakeServerModel : SchedMachineModel { 15 // All x86 instructions are modeled as a single micro-op, and SKylake can 16 // decode 6 instructions per cycle. 17 let IssueWidth = 6; 18 let MicroOpBufferSize = 224; // Based on the reorder buffer. 19 let LoadLatency = 5; 20 let MispredictPenalty = 14; 21 22 // Based on the LSD (loop-stream detector) queue size and benchmarking data. 23 let LoopMicroOpBufferSize = 50; 24 25 // This flag is set to allow the scheduler to assign a default model to 26 // unrecognized opcodes. 27 let CompleteModel = 0; 28} 29 30let SchedModel = SkylakeServerModel in { 31 32// Skylake Server can issue micro-ops to 8 different ports in one cycle. 33 34// Ports 0, 1, 5, and 6 handle all computation. 35// Port 4 gets the data half of stores. Store data can be available later than 36// the store address, but since we don't model the latency of stores, we can 37// ignore that. 38// Ports 2 and 3 are identical. They handle loads and the address half of 39// stores. Port 7 can handle address calculations. 40def SKXPort0 : ProcResource<1>; 41def SKXPort1 : ProcResource<1>; 42def SKXPort2 : ProcResource<1>; 43def SKXPort3 : ProcResource<1>; 44def SKXPort4 : ProcResource<1>; 45def SKXPort5 : ProcResource<1>; 46def SKXPort6 : ProcResource<1>; 47def SKXPort7 : ProcResource<1>; 48 49// Many micro-ops are capable of issuing on multiple ports. 50def SKXPort01 : ProcResGroup<[SKXPort0, SKXPort1]>; 51def SKXPort23 : ProcResGroup<[SKXPort2, SKXPort3]>; 52def SKXPort237 : ProcResGroup<[SKXPort2, SKXPort3, SKXPort7]>; 53def SKXPort04 : ProcResGroup<[SKXPort0, SKXPort4]>; 54def SKXPort05 : ProcResGroup<[SKXPort0, SKXPort5]>; 55def SKXPort06 : ProcResGroup<[SKXPort0, SKXPort6]>; 56def SKXPort15 : ProcResGroup<[SKXPort1, SKXPort5]>; 57def SKXPort16 : ProcResGroup<[SKXPort1, SKXPort6]>; 58def SKXPort56 : ProcResGroup<[SKXPort5, SKXPort6]>; 59def SKXPort015 : ProcResGroup<[SKXPort0, SKXPort1, SKXPort5]>; 60def SKXPort056 : ProcResGroup<[SKXPort0, SKXPort5, SKXPort6]>; 61def SKXPort0156: ProcResGroup<[SKXPort0, SKXPort1, SKXPort5, SKXPort6]>; 62 63def SKXDivider : ProcResource<1>; // Integer division issued on port 0. 64// FP division and sqrt on port 0. 65def SKXFPDivider : ProcResource<1>; 66 67// 60 Entry Unified Scheduler 68def SKXPortAny : ProcResGroup<[SKXPort0, SKXPort1, SKXPort2, SKXPort3, SKXPort4, 69 SKXPort5, SKXPort6, SKXPort7]> { 70 let BufferSize=60; 71} 72 73// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 74// cycles after the memory operand. 75def : ReadAdvance<ReadAfterLd, 5>; 76 77// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available 78// until 5/6/7 cycles after the memory operand. 79def : ReadAdvance<ReadAfterVecLd, 5>; 80def : ReadAdvance<ReadAfterVecXLd, 6>; 81def : ReadAdvance<ReadAfterVecYLd, 7>; 82 83def : ReadAdvance<ReadInt2Fpu, 0>; 84 85// Many SchedWrites are defined in pairs with and without a folded load. 86// Instructions with folded loads are usually micro-fused, so they only appear 87// as two micro-ops when queued in the reservation station. 88// This multiclass defines the resource usage for variants with and without 89// folded loads. 90multiclass SKXWriteResPair<X86FoldableSchedWrite SchedRW, 91 list<ProcResourceKind> ExePorts, 92 int Lat, list<int> Res = [1], int UOps = 1, 93 int LoadLat = 5, int LoadUOps = 1> { 94 // Register variant is using a single cycle on ExePort. 95 def : WriteRes<SchedRW, ExePorts> { 96 let Latency = Lat; 97 let ReleaseAtCycles = Res; 98 let NumMicroOps = UOps; 99 } 100 101 // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to 102 // the latency (default = 5). 103 def : WriteRes<SchedRW.Folded, !listconcat([SKXPort23], ExePorts)> { 104 let Latency = !add(Lat, LoadLat); 105 let ReleaseAtCycles = !listconcat([1], Res); 106 let NumMicroOps = !add(UOps, LoadUOps); 107 } 108} 109 110// A folded store needs a cycle on port 4 for the store data, and an extra port 111// 2/3/7 cycle to recompute the address. 112def : WriteRes<WriteRMW, [SKXPort237,SKXPort4]>; 113 114// Arithmetic. 115defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op. 116defm : SKXWriteResPair<WriteADC, [SKXPort06], 1>; // Integer ALU + flags op. 117 118// Integer multiplication. 119defm : SKXWriteResPair<WriteIMul8, [SKXPort1], 3>; 120defm : SKXWriteResPair<WriteIMul16, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,2], 4>; 121defm : X86WriteRes<WriteIMul16Imm, [SKXPort1,SKXPort0156], 4, [1,1], 2>; 122defm : X86WriteRes<WriteIMul16ImmLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>; 123defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>; 124defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>; 125defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>; 126defm : SKXWriteResPair<WriteMULX32, [SKXPort1,SKXPort06,SKXPort0156], 3, [1,1,1], 3>; 127defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>; 128defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>; 129defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>; 130defm : SKXWriteResPair<WriteMULX64, [SKXPort1,SKXPort5], 3, [1,1], 2>; 131defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>; 132defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>; 133def SKXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; } 134def : WriteRes<WriteIMulHLd, []> { 135 let Latency = !add(SKXWriteIMulH.Latency, SkylakeServerModel.LoadLatency); 136} 137 138defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>; 139defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>; 140defm : X86WriteRes<WriteCMPXCHG,[SKXPort06, SKXPort0156], 5, [2,3], 5>; 141defm : X86WriteRes<WriteCMPXCHGRMW,[SKXPort23,SKXPort06,SKXPort0156,SKXPort237,SKXPort4], 8, [1,2,1,1,1], 6>; 142defm : X86WriteRes<WriteXCHG, [SKXPort0156], 2, [3], 3>; 143 144// TODO: Why isn't the SKXDivider used? 145defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; 146defm : X86WriteRes<WriteDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>; 147defm : X86WriteRes<WriteDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>; 148defm : X86WriteRes<WriteDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>; 149defm : X86WriteRes<WriteDiv16Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>; 150defm : X86WriteRes<WriteDiv32Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>; 151defm : X86WriteRes<WriteDiv64Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>; 152 153defm : X86WriteRes<WriteIDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1>; 154defm : X86WriteRes<WriteIDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>; 155defm : X86WriteRes<WriteIDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>; 156defm : X86WriteRes<WriteIDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>; 157defm : X86WriteRes<WriteIDiv8Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 158defm : X86WriteRes<WriteIDiv16Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 159defm : X86WriteRes<WriteIDiv32Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 160defm : X86WriteRes<WriteIDiv64Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 161 162defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>; 163 164def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads. 165 166defm : SKXWriteResPair<WriteCMOV, [SKXPort06], 1, [1], 1>; // Conditional move. 167defm : X86WriteRes<WriteFCMOV, [SKXPort1], 3, [1], 1>; // x87 conditional move. 168def : WriteRes<WriteSETCC, [SKXPort06]>; // Setcc. 169def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> { 170 let Latency = 2; 171 let NumMicroOps = 3; 172} 173defm : X86WriteRes<WriteLAHFSAHF, [SKXPort06], 1, [1], 1>; 174defm : X86WriteRes<WriteBitTest, [SKXPort06], 1, [1], 1>; 175defm : X86WriteRes<WriteBitTestImmLd, [SKXPort06,SKXPort23], 6, [1,1], 2>; 176defm : X86WriteRes<WriteBitTestRegLd, [SKXPort0156,SKXPort23], 6, [1,1], 2>; 177defm : X86WriteRes<WriteBitTestSet, [SKXPort06], 1, [1], 1>; 178defm : X86WriteRes<WriteBitTestSetImmLd, [SKXPort06,SKXPort23], 5, [1,1], 3>; 179defm : X86WriteRes<WriteBitTestSetRegLd, [SKXPort0156,SKXPort23], 5, [1,1], 2>; 180 181// Integer shifts and rotates. 182defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>; 183defm : SKXWriteResPair<WriteShiftCL, [SKXPort06], 3, [3], 3>; 184defm : SKXWriteResPair<WriteRotate, [SKXPort06], 1, [1], 1>; 185defm : SKXWriteResPair<WriteRotateCL, [SKXPort06], 3, [3], 3>; 186 187// SHLD/SHRD. 188defm : X86WriteRes<WriteSHDrri, [SKXPort1], 3, [1], 1>; 189defm : X86WriteRes<WriteSHDrrcl,[SKXPort1,SKXPort06,SKXPort0156], 6, [1, 2, 1], 4>; 190defm : X86WriteRes<WriteSHDmri, [SKXPort1,SKXPort23,SKXPort237,SKXPort0156], 9, [1, 1, 1, 1], 4>; 191defm : X86WriteRes<WriteSHDmrcl,[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156], 11, [1, 1, 1, 2, 1], 6>; 192 193// Bit counts. 194defm : SKXWriteResPair<WriteBSF, [SKXPort1], 3>; 195defm : SKXWriteResPair<WriteBSR, [SKXPort1], 3>; 196defm : SKXWriteResPair<WriteLZCNT, [SKXPort1], 3>; 197defm : SKXWriteResPair<WriteTZCNT, [SKXPort1], 3>; 198defm : SKXWriteResPair<WritePOPCNT, [SKXPort1], 3>; 199 200// BMI1 BEXTR/BLS, BMI2 BZHI 201defm : SKXWriteResPair<WriteBEXTR, [SKXPort06,SKXPort15], 2, [1,1], 2>; 202defm : SKXWriteResPair<WriteBLS, [SKXPort15], 1>; 203defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>; 204 205// Loads, stores, and moves, not folded with other operations. 206defm : X86WriteRes<WriteLoad, [SKXPort23], 5, [1], 1>; 207defm : X86WriteRes<WriteStore, [SKXPort237, SKXPort4], 1, [1,1], 1>; 208defm : X86WriteRes<WriteStoreNT, [SKXPort237, SKXPort4], 1, [1,1], 2>; 209defm : X86WriteRes<WriteMove, [SKXPort0156], 1, [1], 1>; 210 211// Model the effect of clobbering the read-write mask operand of the GATHER operation. 212// Does not cost anything by itself, only has latency, matching that of the WriteLoad, 213defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>; 214 215// Idioms that clear a register, like xorps %xmm0, %xmm0. 216// These can often bypass execution ports completely. 217def : WriteRes<WriteZero, []>; 218 219// Branches don't produce values, so they have no latency, but they still 220// consume resources. Indirect branches can fold loads. 221defm : SKXWriteResPair<WriteJump, [SKXPort06], 1>; 222 223// Floating point. This covers both scalar and vector operations. 224defm : X86WriteRes<WriteFLD0, [SKXPort05], 1, [1], 1>; 225defm : X86WriteRes<WriteFLD1, [SKXPort05], 1, [2], 2>; 226defm : X86WriteRes<WriteFLDC, [SKXPort05], 1, [2], 2>; 227defm : X86WriteRes<WriteFLoad, [SKXPort23], 5, [1], 1>; 228defm : X86WriteRes<WriteFLoadX, [SKXPort23], 6, [1], 1>; 229defm : X86WriteRes<WriteFLoadY, [SKXPort23], 7, [1], 1>; 230defm : X86WriteRes<WriteFMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>; 231defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>; 232defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 2>; 233defm : X86WriteRes<WriteFStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>; 234defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 235defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>; 236defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>; 237defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 238 239defm : X86WriteRes<WriteFMaskedStore32, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 240defm : X86WriteRes<WriteFMaskedStore32Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 241defm : X86WriteRes<WriteFMaskedStore64, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 242defm : X86WriteRes<WriteFMaskedStore64Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 243 244defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>; 245defm : X86WriteRes<WriteFMoveX, [SKXPort015], 1, [1], 1>; 246defm : X86WriteRes<WriteFMoveY, [SKXPort015], 1, [1], 1>; 247defm : X86WriteRes<WriteFMoveZ, [SKXPort05], 1, [1], 1>; 248defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>; 249 250defm : SKXWriteResPair<WriteFAdd, [SKXPort01], 4, [1], 1, 5>; // Floating point add/sub. 251defm : SKXWriteResPair<WriteFAddX, [SKXPort01], 4, [1], 1, 6>; 252defm : SKXWriteResPair<WriteFAddY, [SKXPort01], 4, [1], 1, 7>; 253defm : SKXWriteResPair<WriteFAddZ, [SKXPort05], 4, [1], 1, 7>; 254defm : SKXWriteResPair<WriteFAdd64, [SKXPort01], 4, [1], 1, 5>; // Floating point double add/sub. 255defm : SKXWriteResPair<WriteFAdd64X, [SKXPort01], 4, [1], 1, 6>; 256defm : SKXWriteResPair<WriteFAdd64Y, [SKXPort01], 4, [1], 1, 7>; 257defm : SKXWriteResPair<WriteFAdd64Z, [SKXPort05], 4, [1], 1, 7>; 258 259defm : SKXWriteResPair<WriteFCmp, [SKXPort01], 4, [1], 1, 5>; // Floating point compare. 260defm : SKXWriteResPair<WriteFCmpX, [SKXPort01], 4, [1], 1, 6>; 261defm : SKXWriteResPair<WriteFCmpY, [SKXPort01], 4, [1], 1, 7>; 262defm : SKXWriteResPair<WriteFCmpZ, [SKXPort05], 4, [1], 1, 7>; 263defm : SKXWriteResPair<WriteFCmp64, [SKXPort01], 4, [1], 1, 5>; // Floating point double compare. 264defm : SKXWriteResPair<WriteFCmp64X, [SKXPort01], 4, [1], 1, 6>; 265defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort01], 4, [1], 1, 7>; 266defm : SKXWriteResPair<WriteFCmp64Z, [SKXPort05], 4, [1], 1, 7>; 267 268defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags (X87). 269defm : SKXWriteResPair<WriteFComX, [SKXPort0], 2>; // Floating point compare to flags (SSE). 270 271defm : SKXWriteResPair<WriteFMul, [SKXPort01], 4, [1], 1, 5>; // Floating point multiplication. 272defm : SKXWriteResPair<WriteFMulX, [SKXPort01], 4, [1], 1, 6>; 273defm : SKXWriteResPair<WriteFMulY, [SKXPort01], 4, [1], 1, 7>; 274defm : SKXWriteResPair<WriteFMulZ, [SKXPort05], 4, [1], 1, 7>; 275defm : SKXWriteResPair<WriteFMul64, [SKXPort01], 4, [1], 1, 5>; // Floating point double multiplication. 276defm : SKXWriteResPair<WriteFMul64X, [SKXPort01], 4, [1], 1, 6>; 277defm : SKXWriteResPair<WriteFMul64Y, [SKXPort01], 4, [1], 1, 7>; 278defm : SKXWriteResPair<WriteFMul64Z, [SKXPort05], 4, [1], 1, 7>; 279 280defm : SKXWriteResPair<WriteFDiv, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division. 281defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. 282defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles. 283defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles. 284defm : SKXWriteResPair<WriteFDiv64, [SKXPort0,SKXFPDivider], 14, [1,4], 1, 5>; // 10-14 cycles. // Floating point division. 285defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,4], 1, 6>; // 10-14 cycles. 286defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,8], 1, 7>; // 10-14 cycles. 287defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles. 288 289defm : SKXWriteResPair<WriteFSqrt, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root. 290defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>; 291defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0,SKXFPDivider], 12, [1,6], 1, 7>; 292defm : SKXWriteResPair<WriteFSqrtZ, [SKXPort0,SKXPort5,SKXFPDivider], 20, [2,1,12], 3, 7>; 293defm : SKXWriteResPair<WriteFSqrt64, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root. 294defm : SKXWriteResPair<WriteFSqrt64X, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 6>; 295defm : SKXWriteResPair<WriteFSqrt64Y, [SKXPort0,SKXFPDivider], 18, [1,12],1, 7>; 296defm : SKXWriteResPair<WriteFSqrt64Z, [SKXPort0,SKXPort5,SKXFPDivider], 32, [2,1,24], 3, 7>; 297defm : SKXWriteResPair<WriteFSqrt80, [SKXPort0,SKXFPDivider], 21, [1,7]>; // Floating point long double square root. 298 299defm : SKXWriteResPair<WriteFRcp, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate. 300defm : SKXWriteResPair<WriteFRcpX, [SKXPort0], 4, [1], 1, 6>; 301defm : SKXWriteResPair<WriteFRcpY, [SKXPort0], 4, [1], 1, 7>; 302defm : SKXWriteResPair<WriteFRcpZ, [SKXPort0,SKXPort5], 4, [2,1], 3, 7>; 303 304defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate. 305defm : SKXWriteResPair<WriteFRsqrtX,[SKXPort0], 4, [1], 1, 6>; 306defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>; 307defm : SKXWriteResPair<WriteFRsqrtZ,[SKXPort0,SKXPort5], 9, [2,1], 3, 7>; 308 309defm : SKXWriteResPair<WriteFMA, [SKXPort01], 4, [1], 1, 5>; // Fused Multiply Add. 310defm : SKXWriteResPair<WriteFMAX, [SKXPort01], 4, [1], 1, 6>; 311defm : SKXWriteResPair<WriteFMAY, [SKXPort01], 4, [1], 1, 7>; 312defm : SKXWriteResPair<WriteFMAZ, [SKXPort05], 4, [1], 1, 7>; 313defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Floating point double dot product. 314defm : X86WriteRes<WriteDPPS, [SKXPort5,SKXPort01], 13, [1,3], 4>; 315defm : X86WriteRes<WriteDPPSY, [SKXPort5,SKXPort01], 13, [1,3], 4>; 316defm : X86WriteRes<WriteDPPSLd, [SKXPort5,SKXPort01,SKXPort06,SKXPort23], 19, [1,3,1,1], 6>; 317defm : X86WriteRes<WriteDPPSYLd, [SKXPort5,SKXPort01,SKXPort06,SKXPort23], 20, [1,3,1,1], 6>; 318defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs. 319defm : SKXWriteResPair<WriteFRnd, [SKXPort01], 8, [2], 2, 6>; // Floating point rounding. 320defm : SKXWriteResPair<WriteFRndY, [SKXPort01], 8, [2], 2, 7>; 321defm : SKXWriteResPair<WriteFRndZ, [SKXPort05], 8, [2], 2, 7>; 322defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals. 323defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; 324defm : SKXWriteResPair<WriteFLogicZ, [SKXPort05], 1, [1], 1, 7>; 325defm : SKXWriteResPair<WriteFTest, [SKXPort0], 2, [1], 1, 6>; // Floating point TEST instructions. 326defm : SKXWriteResPair<WriteFTestY, [SKXPort0], 2, [1], 1, 7>; 327defm : SKXWriteResPair<WriteFTestZ, [SKXPort0], 2, [1], 1, 7>; 328defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles. 329defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>; 330defm : SKXWriteResPair<WriteFShuffleZ, [SKXPort5], 1, [1], 1, 7>; 331defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles. 332defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>; 333defm : SKXWriteResPair<WriteFVarShuffleZ, [SKXPort5], 1, [1], 1, 7>; 334defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends. 335defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>; 336defm : SKXWriteResPair<WriteFBlendZ,[SKXPort015], 1, [1], 1, 7>; 337defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends. 338defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>; 339defm : SKXWriteResPair<WriteFVarBlendZ,[SKXPort015], 2, [2], 2, 7>; 340 341// FMA Scheduling helper class. 342// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } 343 344// Vector integer operations. 345defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>; 346defm : X86WriteRes<WriteVecLoadX, [SKXPort23], 6, [1], 1>; 347defm : X86WriteRes<WriteVecLoadY, [SKXPort23], 7, [1], 1>; 348defm : X86WriteRes<WriteVecLoadNT, [SKXPort23], 6, [1], 1>; 349defm : X86WriteRes<WriteVecLoadNTY, [SKXPort23], 7, [1], 1>; 350defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>; 351defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>; 352defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 2>; 353defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>; 354defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 355defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>; 356defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 357defm : X86WriteRes<WriteVecMaskedStore32, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 358defm : X86WriteRes<WriteVecMaskedStore32Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 359defm : X86WriteRes<WriteVecMaskedStore64, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 360defm : X86WriteRes<WriteVecMaskedStore64Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>; 361defm : X86WriteRes<WriteVecMove, [SKXPort05], 1, [1], 1>; 362defm : X86WriteRes<WriteVecMoveX, [SKXPort015], 1, [1], 1>; 363defm : X86WriteRes<WriteVecMoveY, [SKXPort015], 1, [1], 1>; 364defm : X86WriteRes<WriteVecMoveZ, [SKXPort05], 1, [1], 1>; 365defm : X86WriteRes<WriteVecMoveToGpr, [SKXPort0], 2, [1], 1>; 366defm : X86WriteRes<WriteVecMoveFromGpr, [SKXPort5], 1, [1], 1>; 367 368defm : SKXWriteResPair<WriteVecALU, [SKXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. 369defm : SKXWriteResPair<WriteVecALUX, [SKXPort01], 1, [1], 1, 6>; 370defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; 371defm : SKXWriteResPair<WriteVecALUZ, [SKXPort0], 1, [1], 1, 7>; 372defm : SKXWriteResPair<WriteVecLogic, [SKXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor. 373defm : SKXWriteResPair<WriteVecLogicX,[SKXPort015], 1, [1], 1, 6>; 374defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; 375defm : SKXWriteResPair<WriteVecLogicZ,[SKXPort05], 1, [1], 1, 7>; 376defm : SKXWriteResPair<WriteVecTest, [SKXPort0,SKXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions. 377defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; 378defm : SKXWriteResPair<WriteVecTestZ, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; 379defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5, [1], 1, 5>; // Vector integer multiply. 380defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 5, [1], 1, 6>; 381defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 5, [1], 1, 7>; 382defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 5, [1], 1, 7>; 383defm : SKXWriteResPair<WritePMULLD, [SKXPort01], 10, [2], 2, 6>; // Vector PMULLD. 384defm : SKXWriteResPair<WritePMULLDY, [SKXPort01], 10, [2], 2, 7>; 385defm : SKXWriteResPair<WritePMULLDZ, [SKXPort05], 10, [2], 2, 7>; 386defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles. 387defm : SKXWriteResPair<WriteShuffleX, [SKXPort5], 1, [1], 1, 6>; 388defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; 389defm : SKXWriteResPair<WriteShuffleZ, [SKXPort5], 1, [1], 1, 7>; 390defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector variable shuffles. 391defm : SKXWriteResPair<WriteVarShuffleX, [SKXPort5], 1, [1], 1, 6>; 392defm : SKXWriteResPair<WriteVarShuffleY, [SKXPort5], 1, [1], 1, 7>; 393defm : SKXWriteResPair<WriteVarShuffleZ, [SKXPort5], 1, [1], 1, 7>; 394defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends. 395defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; 396defm : SKXWriteResPair<WriteBlendZ,[SKXPort5], 1, [1], 1, 7>; 397defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends. 398defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; 399defm : SKXWriteResPair<WriteVarBlendZ,[SKXPort05], 2, [1], 1, 6>; 400defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD. 401defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>; 402defm : SKXWriteResPair<WriteMPSADZ, [SKXPort5], 4, [2], 2, 7>; 403defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 5>; // Vector PSADBW. 404defm : SKXWriteResPair<WritePSADBWX, [SKXPort5], 3, [1], 1, 6>; 405defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; 406defm : SKXWriteResPair<WritePSADBWZ, [SKXPort5], 3, [1], 1, 7>; // TODO: 512-bit ops require ports 0/1 to be joined. 407defm : SKXWriteResPair<WritePHMINPOS, [SKXPort0], 4, [1], 1, 6>; // Vector PHMINPOS. 408 409// Vector integer shifts. 410defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1, [1], 1, 5>; 411defm : X86WriteRes<WriteVecShiftX, [SKXPort5,SKXPort01], 2, [1,1], 2>; 412defm : X86WriteRes<WriteVecShiftY, [SKXPort5,SKXPort01], 4, [1,1], 2>; 413defm : X86WriteRes<WriteVecShiftZ, [SKXPort5,SKXPort0], 4, [1,1], 2>; 414defm : X86WriteRes<WriteVecShiftXLd, [SKXPort01,SKXPort23], 7, [1,1], 2>; 415defm : X86WriteRes<WriteVecShiftYLd, [SKXPort01,SKXPort23], 8, [1,1], 2>; 416defm : X86WriteRes<WriteVecShiftZLd, [SKXPort0,SKXPort23], 8, [1,1], 2>; 417 418defm : SKXWriteResPair<WriteVecShiftImm, [SKXPort0], 1, [1], 1, 5>; 419defm : SKXWriteResPair<WriteVecShiftImmX, [SKXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts. 420defm : SKXWriteResPair<WriteVecShiftImmY, [SKXPort01], 1, [1], 1, 7>; 421defm : SKXWriteResPair<WriteVecShiftImmZ, [SKXPort0], 1, [1], 1, 7>; 422defm : SKXWriteResPair<WriteVarVecShift, [SKXPort01], 1, [1], 1, 6>; // Variable vector shifts. 423defm : SKXWriteResPair<WriteVarVecShiftY, [SKXPort01], 1, [1], 1, 7>; 424defm : SKXWriteResPair<WriteVarVecShiftZ, [SKXPort0], 1, [1], 1, 7>; 425 426// Vector insert/extract operations. 427def : WriteRes<WriteVecInsert, [SKXPort5]> { 428 let Latency = 2; 429 let NumMicroOps = 2; 430 let ReleaseAtCycles = [2]; 431} 432def : WriteRes<WriteVecInsertLd, [SKXPort5,SKXPort23]> { 433 let Latency = 6; 434 let NumMicroOps = 2; 435} 436def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>; 437 438def : WriteRes<WriteVecExtract, [SKXPort0,SKXPort5]> { 439 let Latency = 3; 440 let NumMicroOps = 2; 441} 442def : WriteRes<WriteVecExtractSt, [SKXPort4,SKXPort5,SKXPort237]> { 443 let Latency = 2; 444 let NumMicroOps = 3; 445} 446 447// Conversion between integer and float. 448defm : SKXWriteResPair<WriteCvtSS2I, [SKXPort01], 6, [2], 2>; // Needs more work: DD vs DQ. 449defm : SKXWriteResPair<WriteCvtPS2I, [SKXPort01], 3>; 450defm : SKXWriteResPair<WriteCvtPS2IY, [SKXPort01], 3>; 451defm : SKXWriteResPair<WriteCvtPS2IZ, [SKXPort05], 3>; 452defm : SKXWriteResPair<WriteCvtSD2I, [SKXPort01], 6, [2], 2>; 453defm : SKXWriteResPair<WriteCvtPD2I, [SKXPort01], 3>; 454defm : SKXWriteResPair<WriteCvtPD2IY, [SKXPort01], 3>; 455defm : SKXWriteResPair<WriteCvtPD2IZ, [SKXPort05], 3>; 456 457defm : SKXWriteResPair<WriteCvtI2SS, [SKXPort1], 4>; 458defm : SKXWriteResPair<WriteCvtI2PS, [SKXPort01], 4>; 459defm : SKXWriteResPair<WriteCvtI2PSY, [SKXPort01], 4>; 460defm : SKXWriteResPair<WriteCvtI2PSZ, [SKXPort05], 4>; // Needs more work: DD vs DQ. 461defm : SKXWriteResPair<WriteCvtI2SD, [SKXPort1], 4>; 462defm : SKXWriteResPair<WriteCvtI2PD, [SKXPort01], 4>; 463defm : SKXWriteResPair<WriteCvtI2PDY, [SKXPort01], 4>; 464defm : SKXWriteResPair<WriteCvtI2PDZ, [SKXPort05], 4>; 465 466defm : SKXWriteResPair<WriteCvtSS2SD, [SKXPort1], 3>; 467defm : SKXWriteResPair<WriteCvtPS2PD, [SKXPort1], 3>; 468defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort5,SKXPort01], 3, [1,1], 2>; 469defm : SKXWriteResPair<WriteCvtPS2PDZ, [SKXPort05], 3, [2], 2>; 470defm : SKXWriteResPair<WriteCvtSD2SS, [SKXPort5,SKXPort01], 5, [1,1], 2, 5>; 471defm : SKXWriteResPair<WriteCvtPD2PS, [SKXPort5,SKXPort01], 5, [1,1], 2, 4>; 472defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort5,SKXPort01], 7, [1,1], 2, 7>; 473defm : SKXWriteResPair<WriteCvtPD2PSZ, [SKXPort5,SKXPort05], 7, [1,1], 2, 7>; 474 475defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort01], 5, [1,1], 2>; 476defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort01], 7, [1,1], 2>; 477defm : X86WriteRes<WriteCvtPH2PSZ, [SKXPort5,SKXPort0], 7, [1,1], 2>; 478defm : X86WriteRes<WriteCvtPH2PSLd, [SKXPort23,SKXPort01], 9, [1,1], 2>; 479defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort01], 10, [1,1], 2>; 480defm : X86WriteRes<WriteCvtPH2PSZLd, [SKXPort23,SKXPort05], 10, [1,1], 2>; 481 482defm : X86WriteRes<WriteCvtPS2PH, [SKXPort5,SKXPort01], 5, [1,1], 2>; 483defm : X86WriteRes<WriteCvtPS2PHY, [SKXPort5,SKXPort01], 7, [1,1], 2>; 484defm : X86WriteRes<WriteCvtPS2PHZ, [SKXPort5,SKXPort05], 7, [1,1], 2>; 485defm : X86WriteRes<WriteCvtPS2PHSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 6, [1,1,1,1], 4>; 486defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 8, [1,1,1,1], 4>; 487defm : X86WriteRes<WriteCvtPS2PHZSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort05], 8, [1,1,1,1], 4>; 488 489// Strings instructions. 490 491// Packed Compare Implicit Length Strings, Return Mask 492def : WriteRes<WritePCmpIStrM, [SKXPort0]> { 493 let Latency = 10; 494 let NumMicroOps = 3; 495 let ReleaseAtCycles = [3]; 496} 497def : WriteRes<WritePCmpIStrMLd, [SKXPort0, SKXPort23]> { 498 let Latency = 16; 499 let NumMicroOps = 4; 500 let ReleaseAtCycles = [3,1]; 501} 502 503// Packed Compare Explicit Length Strings, Return Mask 504def : WriteRes<WritePCmpEStrM, [SKXPort0, SKXPort5, SKXPort015, SKXPort0156]> { 505 let Latency = 19; 506 let NumMicroOps = 9; 507 let ReleaseAtCycles = [4,3,1,1]; 508} 509def : WriteRes<WritePCmpEStrMLd, [SKXPort0, SKXPort5, SKXPort23, SKXPort015, SKXPort0156]> { 510 let Latency = 25; 511 let NumMicroOps = 10; 512 let ReleaseAtCycles = [4,3,1,1,1]; 513} 514 515// Packed Compare Implicit Length Strings, Return Index 516def : WriteRes<WritePCmpIStrI, [SKXPort0]> { 517 let Latency = 10; 518 let NumMicroOps = 3; 519 let ReleaseAtCycles = [3]; 520} 521def : WriteRes<WritePCmpIStrILd, [SKXPort0, SKXPort23]> { 522 let Latency = 16; 523 let NumMicroOps = 4; 524 let ReleaseAtCycles = [3,1]; 525} 526 527// Packed Compare Explicit Length Strings, Return Index 528def : WriteRes<WritePCmpEStrI, [SKXPort0,SKXPort5,SKXPort0156]> { 529 let Latency = 18; 530 let NumMicroOps = 8; 531 let ReleaseAtCycles = [4,3,1]; 532} 533def : WriteRes<WritePCmpEStrILd, [SKXPort0, SKXPort5, SKXPort23, SKXPort0156]> { 534 let Latency = 24; 535 let NumMicroOps = 9; 536 let ReleaseAtCycles = [4,3,1,1]; 537} 538 539// MOVMSK Instructions. 540def : WriteRes<WriteFMOVMSK, [SKXPort0]> { let Latency = 2; } 541def : WriteRes<WriteVecMOVMSK, [SKXPort0]> { let Latency = 2; } 542def : WriteRes<WriteVecMOVMSKY, [SKXPort0]> { let Latency = 2; } 543def : WriteRes<WriteMMXMOVMSK, [SKXPort0]> { let Latency = 2; } 544 545// AES instructions. 546def : WriteRes<WriteAESDecEnc, [SKXPort0]> { // Decryption, encryption. 547 let Latency = 4; 548 let NumMicroOps = 1; 549 let ReleaseAtCycles = [1]; 550} 551def : WriteRes<WriteAESDecEncLd, [SKXPort0, SKXPort23]> { 552 let Latency = 10; 553 let NumMicroOps = 2; 554 let ReleaseAtCycles = [1,1]; 555} 556 557def : WriteRes<WriteAESIMC, [SKXPort0]> { // InvMixColumn. 558 let Latency = 8; 559 let NumMicroOps = 2; 560 let ReleaseAtCycles = [2]; 561} 562def : WriteRes<WriteAESIMCLd, [SKXPort0, SKXPort23]> { 563 let Latency = 14; 564 let NumMicroOps = 3; 565 let ReleaseAtCycles = [2,1]; 566} 567 568def : WriteRes<WriteAESKeyGen, [SKXPort0,SKXPort5,SKXPort015]> { // Key Generation. 569 let Latency = 20; 570 let NumMicroOps = 11; 571 let ReleaseAtCycles = [3,6,2]; 572} 573def : WriteRes<WriteAESKeyGenLd, [SKXPort0,SKXPort5,SKXPort23,SKXPort015]> { 574 let Latency = 25; 575 let NumMicroOps = 11; 576 let ReleaseAtCycles = [3,6,1,1]; 577} 578 579// Carry-less multiplication instructions. 580def : WriteRes<WriteCLMul, [SKXPort5]> { 581 let Latency = 6; 582 let NumMicroOps = 1; 583 let ReleaseAtCycles = [1]; 584} 585def : WriteRes<WriteCLMulLd, [SKXPort5, SKXPort23]> { 586 let Latency = 12; 587 let NumMicroOps = 2; 588 let ReleaseAtCycles = [1,1]; 589} 590 591// Catch-all for expensive system instructions. 592def : WriteRes<WriteSystem, [SKXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite; 593 594// AVX2. 595defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles. 596defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles. 597defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles. 598defm : SKXWriteResPair<WriteVPMOV256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width packed vector width-changing move. 599defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles. 600 601// Old microcoded instructions that nobody use. 602def : WriteRes<WriteMicrocoded, [SKXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite; 603 604// Fence instructions. 605def : WriteRes<WriteFence, [SKXPort23, SKXPort4]>; 606 607// Load/store MXCSR. 608def : WriteRes<WriteLDMXCSR, [SKXPort0,SKXPort23,SKXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } 609def : WriteRes<WriteSTMXCSR, [SKXPort4,SKXPort5,SKXPort237]> { let Latency = 2; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } 610 611// Nop, not very useful expect it provides a model for nops! 612def : WriteRes<WriteNop, []>; 613 614//////////////////////////////////////////////////////////////////////////////// 615// Horizontal add/sub instructions. 616//////////////////////////////////////////////////////////////////////////////// 617 618defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort01], 6, [2,1], 3, 6>; 619defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort01], 6, [2,1], 3, 7>; 620defm : SKXWriteResPair<WritePHAdd, [SKXPort5,SKXPort05], 3, [2,1], 3, 5>; 621defm : SKXWriteResPair<WritePHAddX, [SKXPort5,SKXPort015], 3, [2,1], 3, 6>; 622defm : SKXWriteResPair<WritePHAddY, [SKXPort5,SKXPort015], 3, [2,1], 3, 7>; 623 624// Remaining instrs. 625 626def SKXWriteResGroup1 : SchedWriteRes<[SKXPort0]> { 627 let Latency = 1; 628 let NumMicroOps = 1; 629 let ReleaseAtCycles = [1]; 630} 631def: InstRW<[SKXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr", 632 "KANDN(B|D|Q|W)rr", 633 "KMOV(B|D|Q|W)kk", 634 "KNOT(B|D|Q|W)rr", 635 "KOR(B|D|Q|W)rr", 636 "KXNOR(B|D|Q|W)rr", 637 "KXOR(B|D|Q|W)rr", 638 "KSET0(B|D|Q|W)", // Same as KXOR 639 "KSET1(B|D|Q|W)", // Same as KXNOR 640 "MMX_PADDS(B|W)rr", 641 "MMX_PADDUS(B|W)rr", 642 "MMX_PAVG(B|W)rr", 643 "MMX_PCMPEQ(B|D|W)rr", 644 "MMX_PCMPGT(B|D|W)rr", 645 "MMX_P(MAX|MIN)SWrr", 646 "MMX_P(MAX|MIN)UBrr", 647 "MMX_PSUBS(B|W)rr", 648 "MMX_PSUBUS(B|W)rr", 649 "VPMOVB2M(Z|Z128|Z256)rr", 650 "VPMOVD2M(Z|Z128|Z256)rr", 651 "VPMOVQ2M(Z|Z128|Z256)rr", 652 "VPMOVW2M(Z|Z128|Z256)rr")>; 653 654def SKXWriteResGroup3 : SchedWriteRes<[SKXPort5]> { 655 let Latency = 1; 656 let NumMicroOps = 1; 657 let ReleaseAtCycles = [1]; 658} 659def: InstRW<[SKXWriteResGroup3], (instregex "COM(P?)_FST0r", 660 "KMOV(B|D|Q|W)kr", 661 "UCOM_F(P?)r")>; 662 663def SKXWriteResGroup4 : SchedWriteRes<[SKXPort6]> { 664 let Latency = 1; 665 let NumMicroOps = 1; 666 let ReleaseAtCycles = [1]; 667} 668def: InstRW<[SKXWriteResGroup4], (instregex "JMP(16|32|64)r")>; 669 670def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> { 671 let Latency = 1; 672 let NumMicroOps = 1; 673 let ReleaseAtCycles = [1]; 674} 675def: InstRW<[SKXWriteResGroup6], (instrs FINCSTP, FNOP)>; 676 677def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> { 678 let Latency = 1; 679 let NumMicroOps = 1; 680 let ReleaseAtCycles = [1]; 681} 682def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>; 683 684def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> { 685 let Latency = 1; 686 let NumMicroOps = 1; 687 let ReleaseAtCycles = [1]; 688} 689def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr")>; 690 691def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> { 692 let Latency = 1; 693 let NumMicroOps = 1; 694 let ReleaseAtCycles = [1]; 695} 696def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr", 697 "VBLENDMPS(Z128|Z256)rr", 698 "VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr", 699 "(V?)PADD(B|D|Q|W)rr", 700 "VPBLENDD(Y?)rri", 701 "VPBLENDMB(Z128|Z256)rr", 702 "VPBLENDMD(Z128|Z256)rr", 703 "VPBLENDMQ(Z128|Z256)rr", 704 "VPBLENDMW(Z128|Z256)rr", 705 "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk", 706 "VPTERNLOGD(Z|Z128|Z256)rri", 707 "VPTERNLOGQ(Z|Z128|Z256)rri")>; 708 709def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> { 710 let Latency = 1; 711 let NumMicroOps = 1; 712 let ReleaseAtCycles = [1]; 713} 714def: InstRW<[SKXWriteResGroup10], (instrs SGDT64m, 715 SIDT64m, 716 SMSW16m, 717 STRm, 718 SYSCALL)>; 719 720def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> { 721 let Latency = 1; 722 let NumMicroOps = 2; 723 let ReleaseAtCycles = [1,1]; 724} 725def: InstRW<[SKXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>; 726def: InstRW<[SKXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk", 727 "ST_FP(32|64|80)m")>; 728 729def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> { 730 let Latency = 2; 731 let NumMicroOps = 2; 732 let ReleaseAtCycles = [2]; 733} 734def: InstRW<[SKXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>; 735 736def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> { 737 let Latency = 2; 738 let NumMicroOps = 2; 739 let ReleaseAtCycles = [2]; 740} 741def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP, 742 MMX_MOVDQ2Qrr)>; 743 744def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> { 745 let Latency = 2; 746 let NumMicroOps = 2; 747 let ReleaseAtCycles = [2]; 748} 749def: InstRW<[SKXWriteResGroup17], (instrs LFENCE, 750 WAIT, 751 XGETBV)>; 752 753def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 754 let Latency = 2; 755 let NumMicroOps = 2; 756 let ReleaseAtCycles = [1,1]; 757} 758def: InstRW<[SKXWriteResGroup20], (instregex "CLFLUSH")>; 759 760def SKXWriteResGroup21 : SchedWriteRes<[SKXPort237,SKXPort0156]> { 761 let Latency = 2; 762 let NumMicroOps = 2; 763 let ReleaseAtCycles = [1,1]; 764} 765def: InstRW<[SKXWriteResGroup21], (instrs SFENCE)>; 766 767def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 768 let Latency = 2; 769 let NumMicroOps = 2; 770 let ReleaseAtCycles = [1,1]; 771} 772def: InstRW<[SKXWriteResGroup23], (instrs CWD, 773 JCXZ, JECXZ, JRCXZ, 774 ADC8i8, SBB8i8, 775 ADC16i16, SBB16i16, 776 ADC32i32, SBB32i32, 777 ADC64i32, SBB64i32)>; 778 779def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> { 780 let Latency = 2; 781 let NumMicroOps = 3; 782 let ReleaseAtCycles = [1,1,1]; 783} 784def: InstRW<[SKXWriteResGroup25], (instrs FNSTCW16m)>; 785 786def SKXWriteResGroup27 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { 787 let Latency = 2; 788 let NumMicroOps = 3; 789 let ReleaseAtCycles = [1,1,1]; 790} 791def: InstRW<[SKXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>; 792 793def SKXWriteResGroup28 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { 794 let Latency = 2; 795 let NumMicroOps = 3; 796 let ReleaseAtCycles = [1,1,1]; 797} 798def: InstRW<[SKXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8, 799 STOSB, STOSL, STOSQ, STOSW)>; 800def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>; 801 802def SKXWriteResGroup29 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { 803 let Latency = 2; 804 let NumMicroOps = 5; 805 let ReleaseAtCycles = [2,2,1]; 806} 807def: InstRW<[SKXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)")>; 808 809def SKXWriteResGroup30 : SchedWriteRes<[SKXPort0]> { 810 let Latency = 3; 811 let NumMicroOps = 1; 812 let ReleaseAtCycles = [1]; 813} 814def: InstRW<[SKXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk", 815 "KORTEST(B|D|Q|W)rr", 816 "KTEST(B|D|Q|W)rr")>; 817 818def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> { 819 let Latency = 3; 820 let NumMicroOps = 1; 821 let ReleaseAtCycles = [1]; 822} 823def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr", 824 "PEXT(32|64)rr")>; 825 826def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> { 827 let Latency = 3; 828 let NumMicroOps = 1; 829 let ReleaseAtCycles = [1]; 830} 831def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)", 832 "VALIGND(Z|Z128|Z256)rri", 833 "VALIGNQ(Z|Z128|Z256)rri", 834 "VPBROADCAST(B|W)rr", 835 "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>; 836 837def SKXWriteResGroup33 : SchedWriteRes<[SKXPort5]> { 838 let Latency = 4; 839 let NumMicroOps = 1; 840 let ReleaseAtCycles = [1]; 841} 842def: InstRW<[SKXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr", 843 "KSHIFTL(B|D|Q|W)ri", 844 "KSHIFTR(B|D|Q|W)ri", 845 "KUNPCK(BW|DQ|WD)rr", 846 "VCMPPD(Z|Z128|Z256)rri", 847 "VCMPPS(Z|Z128|Z256)rri", 848 "VCMP(SD|SS)Zrr", 849 "VFPCLASS(PD|PS)(Z|Z128|Z256)rr", 850 "VFPCLASS(SD|SS)Zrr", 851 "VPCMPB(Z|Z128|Z256)rri", 852 "VPCMPD(Z|Z128|Z256)rri", 853 "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr", 854 "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr", 855 "VPCMPQ(Z|Z128|Z256)rri", 856 "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri", 857 "VPCMPW(Z|Z128|Z256)rri", 858 "VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>; 859 860def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> { 861 let Latency = 3; 862 let NumMicroOps = 2; 863 let ReleaseAtCycles = [1,1]; 864} 865def: InstRW<[SKXWriteResGroup34], (instrs FNSTSW16r)>; 866 867def SKXWriteResGroup37 : SchedWriteRes<[SKXPort0,SKXPort5]> { 868 let Latency = 3; 869 let NumMicroOps = 3; 870 let ReleaseAtCycles = [1,2]; 871} 872def: InstRW<[SKXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>; 873 874def SKXWriteResGroup38 : SchedWriteRes<[SKXPort5,SKXPort01]> { 875 let Latency = 3; 876 let NumMicroOps = 3; 877 let ReleaseAtCycles = [2,1]; 878} 879def: InstRW<[SKXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>; 880 881def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> { 882 let Latency = 3; 883 let NumMicroOps = 3; 884 let ReleaseAtCycles = [2,1]; 885} 886def: InstRW<[SKXWriteResGroup41], (instrs MMX_PACKSSDWrr, 887 MMX_PACKSSWBrr, 888 MMX_PACKUSWBrr)>; 889 890def SKXWriteResGroup42 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 891 let Latency = 3; 892 let NumMicroOps = 3; 893 let ReleaseAtCycles = [1,2]; 894} 895def: InstRW<[SKXWriteResGroup42], (instregex "CLD")>; 896 897def SKXWriteResGroup43 : SchedWriteRes<[SKXPort237,SKXPort0156]> { 898 let Latency = 3; 899 let NumMicroOps = 3; 900 let ReleaseAtCycles = [1,2]; 901} 902def: InstRW<[SKXWriteResGroup43], (instrs MFENCE)>; 903 904def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 905 let Latency = 2; 906 let NumMicroOps = 3; 907 let ReleaseAtCycles = [1,2]; 908} 909def: InstRW<[SKXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, 910 RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; 911 912def SKXWriteResGroup44b : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { 913 let Latency = 5; 914 let NumMicroOps = 8; 915 let ReleaseAtCycles = [2,4,2]; 916} 917def: InstRW<[SKXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; 918 919def SKXWriteResGroup44c : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { 920 let Latency = 6; 921 let NumMicroOps = 8; 922 let ReleaseAtCycles = [2,4,2]; 923} 924def: InstRW<[SKXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; 925 926def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> { 927 let Latency = 3; 928 let NumMicroOps = 3; 929 let ReleaseAtCycles = [1,1,1]; 930} 931def: InstRW<[SKXWriteResGroup45], (instrs FNSTSWm)>; 932 933def SKXWriteResGroup47 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237,SKXPort0156]> { 934 let Latency = 3; 935 let NumMicroOps = 4; 936 let ReleaseAtCycles = [1,1,1,1]; 937} 938def: InstRW<[SKXWriteResGroup47], (instregex "CALL(16|32|64)r")>; 939 940def SKXWriteResGroup48 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06,SKXPort0156]> { 941 let Latency = 3; 942 let NumMicroOps = 4; 943 let ReleaseAtCycles = [1,1,1,1]; 944} 945def: InstRW<[SKXWriteResGroup48], (instrs CALL64pcrel32)>; 946 947def SKXWriteResGroup49 : SchedWriteRes<[SKXPort0]> { 948 let Latency = 4; 949 let NumMicroOps = 1; 950 let ReleaseAtCycles = [1]; 951} 952def: InstRW<[SKXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>; 953 954def SKXWriteResGroup50 : SchedWriteRes<[SKXPort01]> { 955 let Latency = 4; 956 let NumMicroOps = 1; 957 let ReleaseAtCycles = [1]; 958} 959def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2QQ(Z128|Z256)rr", 960 "VCVTPD2UQQ(Z128|Z256)rr", 961 "VCVTPS2DQ(Y|Z128|Z256)rr", 962 "(V?)CVTPS2DQrr", 963 "VCVTPS2UDQ(Z128|Z256)rr", 964 "VCVTTPD2QQ(Z128|Z256)rr", 965 "VCVTTPD2UQQ(Z128|Z256)rr", 966 "VCVTTPS2DQ(Z128|Z256)rr", 967 "(V?)CVTTPS2DQrr", 968 "VCVTTPS2UDQ(Z128|Z256)rr")>; 969 970def SKXWriteResGroup50z : SchedWriteRes<[SKXPort05]> { 971 let Latency = 4; 972 let NumMicroOps = 1; 973 let ReleaseAtCycles = [1]; 974} 975def: InstRW<[SKXWriteResGroup50z], (instrs VCVTPD2QQZrr, 976 VCVTPD2UQQZrr, 977 VCVTPS2DQZrr, 978 VCVTPS2UDQZrr, 979 VCVTTPD2QQZrr, 980 VCVTTPD2UQQZrr, 981 VCVTTPS2DQZrr, 982 VCVTTPS2UDQZrr)>; 983 984def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> { 985 let Latency = 4; 986 let NumMicroOps = 2; 987 let ReleaseAtCycles = [2]; 988} 989def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr", 990 "VEXPANDPS(Z|Z128|Z256)rr", 991 "VPEXPANDD(Z|Z128|Z256)rr", 992 "VPEXPANDQ(Z|Z128|Z256)rr", 993 "VPMOVDB(Z|Z128|Z256)rr", 994 "VPMOVDW(Z|Z128|Z256)rr", 995 "VPMOVQB(Z|Z128|Z256)rr", 996 "VPMOVQW(Z|Z128|Z256)rr", 997 "VPMOVSDB(Z|Z128|Z256)rr", 998 "VPMOVSDW(Z|Z128|Z256)rr", 999 "VPMOVSQB(Z|Z128|Z256)rr", 1000 "VPMOVSQD(Z|Z128|Z256)rr", 1001 "VPMOVSQW(Z|Z128|Z256)rr", 1002 "VPMOVSWB(Z|Z128|Z256)rr", 1003 "VPMOVUSDB(Z|Z128|Z256)rr", 1004 "VPMOVUSDW(Z|Z128|Z256)rr", 1005 "VPMOVUSQB(Z|Z128|Z256)rr", 1006 "VPMOVUSQD(Z|Z128|Z256)rr", 1007 "VPMOVUSWB(Z|Z128|Z256)rr", 1008 "VPMOVWB(Z|Z128|Z256)rr")>; 1009 1010def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { 1011 let Latency = 4; 1012 let NumMicroOps = 3; 1013 let ReleaseAtCycles = [1,1,1]; 1014} 1015def: InstRW<[SKXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m", 1016 "IST_F(16|32)m", 1017 "VPMOVQD(Z|Z128|Z256)mr(b?)")>; 1018 1019def SKXWriteResGroup55 : SchedWriteRes<[SKXPort0156]> { 1020 let Latency = 4; 1021 let NumMicroOps = 4; 1022 let ReleaseAtCycles = [4]; 1023} 1024def: InstRW<[SKXWriteResGroup55], (instrs FNCLEX)>; 1025 1026def SKXWriteResGroup56 : SchedWriteRes<[]> { 1027 let Latency = 0; 1028 let NumMicroOps = 4; 1029 let ReleaseAtCycles = []; 1030} 1031def: InstRW<[SKXWriteResGroup56], (instrs VZEROUPPER)>; 1032 1033def SKXWriteResGroup57 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort0156]> { 1034 let Latency = 4; 1035 let NumMicroOps = 4; 1036 let ReleaseAtCycles = [1,1,2]; 1037} 1038def: InstRW<[SKXWriteResGroup57], (instregex "LAR(16|32|64)rr")>; 1039 1040def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort01]> { 1041 let Latency = 5; 1042 let NumMicroOps = 2; 1043 let ReleaseAtCycles = [1,1]; 1044} 1045def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr", 1046 "MMX_CVT(T?)PS2PIrr", 1047 "VCVTDQ2PDZ128rr", 1048 "VCVTPD2DQZ128rr", 1049 "(V?)CVT(T?)PD2DQrr", 1050 "VCVTPD2UDQZ128rr", 1051 "VCVTPS2PDZ128rr", 1052 "(V?)CVTPS2PDrr", 1053 "VCVTPS2QQZ128rr", 1054 "VCVTPS2UQQZ128rr", 1055 "VCVTQQ2PSZ128rr", 1056 "(V?)CVTSI(64)?2SDrr", 1057 "VCVTSI2SSZrr", 1058 "(V?)CVTSI2SSrr", 1059 "VCVTSI(64)?2SDZrr", 1060 "VCVTSS2SDZrr", 1061 "(V?)CVTSS2SDrr", 1062 "VCVTTPD2DQZ128rr", 1063 "VCVTTPD2UDQZ128rr", 1064 "VCVTTPS2QQZ128rr", 1065 "VCVTTPS2UQQZ128rr", 1066 "VCVTUDQ2PDZ128rr", 1067 "VCVTUQQ2PSZ128rr", 1068 "VCVTUSI2SSZrr", 1069 "VCVTUSI(64)?2SDZrr")>; 1070 1071def SKXWriteResGroup62 : SchedWriteRes<[SKXPort5,SKXPort015]> { 1072 let Latency = 5; 1073 let NumMicroOps = 3; 1074 let ReleaseAtCycles = [2,1]; 1075} 1076def: InstRW<[SKXWriteResGroup62], (instregex "VPCONFLICTQZ128rr")>; 1077 1078def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> { 1079 let Latency = 5; 1080 let NumMicroOps = 3; 1081 let ReleaseAtCycles = [1,1,1]; 1082} 1083def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>; 1084 1085def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort01]> { 1086 let Latency = 5; 1087 let NumMicroOps = 3; 1088 let ReleaseAtCycles = [1,1,1]; 1089} 1090def: InstRW<[SKXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)", 1091 "VCVTPS2PHZ256mr(b?)", 1092 "VCVTPS2PHZmr(b?)")>; 1093 1094def SKXWriteResGroup66 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { 1095 let Latency = 5; 1096 let NumMicroOps = 4; 1097 let ReleaseAtCycles = [1,2,1]; 1098} 1099def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)", 1100 "VPMOVDW(Z|Z128|Z256)mr(b?)", 1101 "VPMOVQB(Z|Z128|Z256)mr(b?)", 1102 "VPMOVQW(Z|Z128|Z256)mr(b?)", 1103 "VPMOVSDB(Z|Z128|Z256)mr(b?)", 1104 "VPMOVSDW(Z|Z128|Z256)mr(b?)", 1105 "VPMOVSQB(Z|Z128|Z256)mr(b?)", 1106 "VPMOVSQD(Z|Z128|Z256)mr(b?)", 1107 "VPMOVSQW(Z|Z128|Z256)mr(b?)", 1108 "VPMOVSWB(Z|Z128|Z256)mr(b?)", 1109 "VPMOVUSDB(Z|Z128|Z256)mr(b?)", 1110 "VPMOVUSDW(Z|Z128|Z256)mr(b?)", 1111 "VPMOVUSQB(Z|Z128|Z256)mr(b?)", 1112 "VPMOVUSQD(Z|Z128|Z256)mr(b?)", 1113 "VPMOVUSQW(Z|Z128|Z256)mr(b?)", 1114 "VPMOVUSWB(Z|Z128|Z256)mr(b?)", 1115 "VPMOVWB(Z|Z128|Z256)mr(b?)")>; 1116 1117def SKXWriteResGroup67 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 1118 let Latency = 5; 1119 let NumMicroOps = 5; 1120 let ReleaseAtCycles = [1,4]; 1121} 1122def: InstRW<[SKXWriteResGroup67], (instrs XSETBV)>; 1123 1124def SKXWriteResGroup69 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { 1125 let Latency = 5; 1126 let NumMicroOps = 6; 1127 let ReleaseAtCycles = [1,1,4]; 1128} 1129def: InstRW<[SKXWriteResGroup69], (instregex "PUSHF(16|64)")>; 1130 1131def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> { 1132 let Latency = 6; 1133 let NumMicroOps = 1; 1134 let ReleaseAtCycles = [1]; 1135} 1136def: InstRW<[SKXWriteResGroup71], (instrs VBROADCASTSSrm, 1137 VPBROADCASTDrm, 1138 VPBROADCASTQrm)>; 1139def: InstRW<[SKXWriteResGroup71], (instregex "(V?)MOVSHDUPrm", 1140 "(V?)MOVSLDUPrm", 1141 "(V?)MOVDDUPrm")>; 1142 1143def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> { 1144 let Latency = 6; 1145 let NumMicroOps = 2; 1146 let ReleaseAtCycles = [2]; 1147} 1148def: InstRW<[SKXWriteResGroup72], (instrs MMX_CVTPI2PSrr)>; 1149def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr", 1150 "VCOMPRESSPS(Z|Z128|Z256)rr", 1151 "VPCOMPRESSD(Z|Z128|Z256)rr", 1152 "VPCOMPRESSQ(Z|Z128|Z256)rr", 1153 "VPERMW(Z|Z128|Z256)rr")>; 1154 1155def SKXWriteResGroup73 : SchedWriteRes<[SKXPort0,SKXPort23]> { 1156 let Latency = 6; 1157 let NumMicroOps = 2; 1158 let ReleaseAtCycles = [1,1]; 1159} 1160def: InstRW<[SKXWriteResGroup73], (instrs MMX_PADDSBrm, 1161 MMX_PADDSWrm, 1162 MMX_PADDUSBrm, 1163 MMX_PADDUSWrm, 1164 MMX_PAVGBrm, 1165 MMX_PAVGWrm, 1166 MMX_PCMPEQBrm, 1167 MMX_PCMPEQDrm, 1168 MMX_PCMPEQWrm, 1169 MMX_PCMPGTBrm, 1170 MMX_PCMPGTDrm, 1171 MMX_PCMPGTWrm, 1172 MMX_PMAXSWrm, 1173 MMX_PMAXUBrm, 1174 MMX_PMINSWrm, 1175 MMX_PMINUBrm, 1176 MMX_PSUBSBrm, 1177 MMX_PSUBSWrm, 1178 MMX_PSUBUSBrm, 1179 MMX_PSUBUSWrm)>; 1180 1181def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> { 1182 let Latency = 6; 1183 let NumMicroOps = 2; 1184 let ReleaseAtCycles = [1,1]; 1185} 1186def: InstRW<[SKXWriteResGroup76], (instrs FARJMP64m)>; 1187def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>; 1188 1189def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> { 1190 let Latency = 6; 1191 let NumMicroOps = 2; 1192 let ReleaseAtCycles = [1,1]; 1193} 1194def: InstRW<[SKXWriteResGroup79], (instregex "ANDN(32|64)rm", 1195 "MOVBE(16|32|64)rm")>; 1196 1197def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1198 let Latency = 6; 1199 let NumMicroOps = 2; 1200 let ReleaseAtCycles = [1,1]; 1201} 1202def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>; 1203def: InstRW<[SKXWriteResGroup80], (instrs VMOVDI2PDIZrm)>; 1204 1205def SKXWriteResGroup81 : SchedWriteRes<[SKXPort23,SKXPort0156]> { 1206 let Latency = 6; 1207 let NumMicroOps = 2; 1208 let ReleaseAtCycles = [1,1]; 1209} 1210def: InstRW<[SKXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>; 1211def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)rmr")>; 1212 1213def SKXWriteResGroup82 : SchedWriteRes<[SKXPort5,SKXPort01]> { 1214 let Latency = 6; 1215 let NumMicroOps = 3; 1216 let ReleaseAtCycles = [2,1]; 1217} 1218def: InstRW<[SKXWriteResGroup82], (instregex "(V?)CVTSI642SSrr", 1219 "VCVTSI642SSZrr", 1220 "VCVTUSI642SSZrr")>; 1221 1222def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]> { 1223 let Latency = 6; 1224 let NumMicroOps = 4; 1225 let ReleaseAtCycles = [1,1,1,1]; 1226} 1227def: InstRW<[SKXWriteResGroup84], (instregex "SLDT(16|32|64)r")>; 1228 1229def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { 1230 let Latency = 6; 1231 let NumMicroOps = 4; 1232 let ReleaseAtCycles = [1,1,1,1]; 1233} 1234def: InstRW<[SKXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)", 1235 "SHL(8|16|32|64)m(1|i)", 1236 "SHR(8|16|32|64)m(1|i)")>; 1237 1238def SKXWriteResGroup87 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { 1239 let Latency = 6; 1240 let NumMicroOps = 4; 1241 let ReleaseAtCycles = [1,1,1,1]; 1242} 1243def: InstRW<[SKXWriteResGroup87], (instregex "POP(16|32|64)rmm", 1244 "PUSH(16|32|64)rmm")>; 1245 1246def SKXWriteResGroup88 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 1247 let Latency = 6; 1248 let NumMicroOps = 6; 1249 let ReleaseAtCycles = [1,5]; 1250} 1251def: InstRW<[SKXWriteResGroup88], (instrs STD)>; 1252 1253def SKXWriteResGroup89 : SchedWriteRes<[SKXPort23]> { 1254 let Latency = 7; 1255 let NumMicroOps = 1; 1256 let ReleaseAtCycles = [1]; 1257} 1258def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m")>; 1259def: InstRW<[SKXWriteResGroup89], (instrs VBROADCASTF128rm, 1260 VBROADCASTI128rm, 1261 VBROADCASTSDYrm, 1262 VBROADCASTSSYrm, 1263 VMOVDDUPYrm, 1264 VMOVSHDUPYrm, 1265 VMOVSLDUPYrm, 1266 VPBROADCASTDYrm, 1267 VPBROADCASTQYrm)>; 1268 1269def SKXWriteResGroup90 : SchedWriteRes<[SKXPort01,SKXPort5]> { 1270 let Latency = 7; 1271 let NumMicroOps = 2; 1272 let ReleaseAtCycles = [1,1]; 1273} 1274def: InstRW<[SKXWriteResGroup90], (instrs VCVTDQ2PDYrr)>; 1275 1276def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1277 let Latency = 7; 1278 let NumMicroOps = 2; 1279 let ReleaseAtCycles = [1,1]; 1280} 1281def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)", 1282 "VMOVSSZrm(b?)")>; 1283 1284def SKXWriteResGroup92a : SchedWriteRes<[SKXPort5,SKXPort23]> { 1285 let Latency = 6; 1286 let NumMicroOps = 2; 1287 let ReleaseAtCycles = [1,1]; 1288} 1289def: InstRW<[SKXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm", 1290 "(V?)PMOV(SX|ZX)BQrm", 1291 "(V?)PMOV(SX|ZX)BWrm", 1292 "(V?)PMOV(SX|ZX)DQrm", 1293 "(V?)PMOV(SX|ZX)WDrm", 1294 "(V?)PMOV(SX|ZX)WQrm")>; 1295 1296def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort01]> { 1297 let Latency = 7; 1298 let NumMicroOps = 2; 1299 let ReleaseAtCycles = [1,1]; 1300} 1301def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr", 1302 "VCVTPD2DQ(Y|Z256)rr", 1303 "VCVTPD2UDQZ256rr", 1304 "VCVTPS2PD(Y|Z256)rr", 1305 "VCVTPS2QQZ256rr", 1306 "VCVTPS2UQQZ256rr", 1307 "VCVTQQ2PSZ256rr", 1308 "VCVTTPD2DQ(Y|Z256)rr", 1309 "VCVTTPD2UDQZ256rr", 1310 "VCVTTPS2QQZ256rr", 1311 "VCVTTPS2UQQZ256rr", 1312 "VCVTUDQ2PDZ256rr", 1313 "VCVTUQQ2PSZ256rr")>; 1314 1315def SKXWriteResGroup93z : SchedWriteRes<[SKXPort5,SKXPort05]> { 1316 let Latency = 7; 1317 let NumMicroOps = 2; 1318 let ReleaseAtCycles = [1,1]; 1319} 1320def: InstRW<[SKXWriteResGroup93z], (instrs VCVTDQ2PDZrr, 1321 VCVTPD2DQZrr, 1322 VCVTPD2UDQZrr, 1323 VCVTPS2PDZrr, 1324 VCVTPS2QQZrr, 1325 VCVTPS2UQQZrr, 1326 VCVTQQ2PSZrr, 1327 VCVTTPD2DQZrr, 1328 VCVTTPD2UDQZrr, 1329 VCVTTPS2QQZrr, 1330 VCVTTPS2UQQZrr, 1331 VCVTUDQ2PDZrr, 1332 VCVTUQQ2PSZrr)>; 1333 1334def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1335 let Latency = 7; 1336 let NumMicroOps = 2; 1337 let ReleaseAtCycles = [1,1]; 1338} 1339def: InstRW<[SKXWriteResGroup95], (instrs VMOVNTDQAZ128rm, 1340 VPBLENDDrmi)>; 1341def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd], 1342 (instregex "VBLENDMPDZ128rm(b?)", 1343 "VBLENDMPSZ128rm(b?)", 1344 "VBROADCASTI32X2Z128rm(b?)", 1345 "VBROADCASTSSZ128rm(b?)", 1346 "VINSERT(F|I)128rm", 1347 "VMOVAPDZ128rm(b?)", 1348 "VMOVAPSZ128rm(b?)", 1349 "VMOVDDUPZ128rm(b?)", 1350 "VMOVDQA32Z128rm(b?)", 1351 "VMOVDQA64Z128rm(b?)", 1352 "VMOVDQU16Z128rm(b?)", 1353 "VMOVDQU32Z128rm(b?)", 1354 "VMOVDQU64Z128rm(b?)", 1355 "VMOVDQU8Z128rm(b?)", 1356 "VMOVSHDUPZ128rm(b?)", 1357 "VMOVSLDUPZ128rm(b?)", 1358 "VMOVUPDZ128rm(b?)", 1359 "VMOVUPSZ128rm(b?)", 1360 "VPADD(B|D|Q|W)Z128rm(b?)", 1361 "(V?)PADD(B|D|Q|W)rm", 1362 "VPBLENDM(B|D|Q|W)Z128rm(b?)", 1363 "VPBROADCASTDZ128rm(b?)", 1364 "VPBROADCASTQZ128rm(b?)", 1365 "VPSUB(B|D|Q|W)Z128rm(b?)", 1366 "(V?)PSUB(B|D|Q|W)rm", 1367 "VPTERNLOGDZ128rm(b?)i", 1368 "VPTERNLOGQZ128rm(b?)i")>; 1369 1370def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1371 let Latency = 7; 1372 let NumMicroOps = 3; 1373 let ReleaseAtCycles = [2,1]; 1374} 1375def: InstRW<[SKXWriteResGroup96], (instrs MMX_PACKSSDWrm, 1376 MMX_PACKSSWBrm, 1377 MMX_PACKUSWBrm)>; 1378 1379def SKXWriteResGroup97 : SchedWriteRes<[SKXPort5,SKXPort015]> { 1380 let Latency = 7; 1381 let NumMicroOps = 3; 1382 let ReleaseAtCycles = [2,1]; 1383} 1384def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2WZ128rr", 1385 "VPERMI2WZ256rr", 1386 "VPERMI2WZrr", 1387 "VPERMT2WZ128rr", 1388 "VPERMT2WZ256rr", 1389 "VPERMT2WZrr")>; 1390 1391def SKXWriteResGroup99 : SchedWriteRes<[SKXPort23,SKXPort0156]> { 1392 let Latency = 7; 1393 let NumMicroOps = 3; 1394 let ReleaseAtCycles = [1,2]; 1395} 1396def: InstRW<[SKXWriteResGroup99], (instrs LEAVE, LEAVE64, 1397 SCASB, SCASL, SCASQ, SCASW)>; 1398 1399def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort01]> { 1400 let Latency = 7; 1401 let NumMicroOps = 3; 1402 let ReleaseAtCycles = [1,1,1]; 1403} 1404def: InstRW<[SKXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr", 1405 "VCVT(T?)SS2USI64Zrr")>; 1406 1407def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> { 1408 let Latency = 7; 1409 let NumMicroOps = 3; 1410 let ReleaseAtCycles = [1,1,1]; 1411} 1412def: InstRW<[SKXWriteResGroup101], (instrs FLDCW16m)>; 1413 1414def SKXWriteResGroup103 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort0156]> { 1415 let Latency = 7; 1416 let NumMicroOps = 3; 1417 let ReleaseAtCycles = [1,1,1]; 1418} 1419def: InstRW<[SKXWriteResGroup103], (instregex "KMOV(B|D|Q|W)km")>; 1420 1421def SKXWriteResGroup104 : SchedWriteRes<[SKXPort6,SKXPort23,SKXPort0156]> { 1422 let Latency = 7; 1423 let NumMicroOps = 3; 1424 let ReleaseAtCycles = [1,1,1]; 1425} 1426def: InstRW<[SKXWriteResGroup104], (instrs LRET64, RET64)>; 1427 1428def SKXWriteResGroup106 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { 1429 let Latency = 7; 1430 let NumMicroOps = 4; 1431 let ReleaseAtCycles = [1,2,1]; 1432} 1433def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)", 1434 "VCOMPRESSPS(Z|Z128|Z256)mr(b?)", 1435 "VPCOMPRESSD(Z|Z128|Z256)mr(b?)", 1436 "VPCOMPRESSQ(Z|Z128|Z256)mr(b?)")>; 1437 1438def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { 1439 let Latency = 7; 1440 let NumMicroOps = 5; 1441 let ReleaseAtCycles = [1,1,1,2]; 1442} 1443def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)", 1444 "ROR(8|16|32|64)m(1|i)")>; 1445 1446def SKXWriteResGroup107_1 : SchedWriteRes<[SKXPort06]> { 1447 let Latency = 2; 1448 let NumMicroOps = 2; 1449 let ReleaseAtCycles = [2]; 1450} 1451def: InstRW<[SKXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1, 1452 ROR8r1, ROR16r1, ROR32r1, ROR64r1)>; 1453 1454def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { 1455 let Latency = 7; 1456 let NumMicroOps = 5; 1457 let ReleaseAtCycles = [1,1,1,2]; 1458} 1459def: InstRW<[SKXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>; 1460 1461def SKXWriteResGroup109 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { 1462 let Latency = 7; 1463 let NumMicroOps = 5; 1464 let ReleaseAtCycles = [1,1,1,1,1]; 1465} 1466def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m")>; 1467def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64m)>; 1468 1469def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { 1470 let Latency = 7; 1471 let NumMicroOps = 7; 1472 let ReleaseAtCycles = [1,2,2,2]; 1473} 1474def: InstRW<[SKXWriteResGroup110], (instrs VPSCATTERDQZ128mr, 1475 VPSCATTERQQZ128mr, 1476 VSCATTERDPDZ128mr, 1477 VSCATTERQPDZ128mr)>; 1478 1479def SKXWriteResGroup111 : SchedWriteRes<[SKXPort6,SKXPort06,SKXPort15,SKXPort0156]> { 1480 let Latency = 7; 1481 let NumMicroOps = 7; 1482 let ReleaseAtCycles = [1,3,1,2]; 1483} 1484def: InstRW<[SKXWriteResGroup111], (instrs LOOP)>; 1485 1486def SKXWriteResGroup112 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { 1487 let Latency = 7; 1488 let NumMicroOps = 11; 1489 let ReleaseAtCycles = [1,4,4,2]; 1490} 1491def: InstRW<[SKXWriteResGroup112], (instrs VPSCATTERDQZ256mr, 1492 VPSCATTERQQZ256mr, 1493 VSCATTERDPDZ256mr, 1494 VSCATTERQPDZ256mr)>; 1495 1496def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { 1497 let Latency = 7; 1498 let NumMicroOps = 19; 1499 let ReleaseAtCycles = [1,8,8,2]; 1500} 1501def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr, 1502 VPSCATTERQDZmr, 1503 VPSCATTERQQZmr, 1504 VSCATTERDPDZmr, 1505 VSCATTERQPSZmr, 1506 VSCATTERQPDZmr)>; 1507 1508def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1509 let Latency = 7; 1510 let NumMicroOps = 36; 1511 let ReleaseAtCycles = [1,16,1,16,2]; 1512} 1513def: InstRW<[SKXWriteResGroup114], (instrs VSCATTERDPSZmr)>; 1514 1515def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> { 1516 let Latency = 8; 1517 let NumMicroOps = 2; 1518 let ReleaseAtCycles = [1,1]; 1519} 1520def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm", 1521 "PEXT(32|64)rm")>; 1522 1523def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1524 let Latency = 8; 1525 let NumMicroOps = 2; 1526 let ReleaseAtCycles = [1,1]; 1527} 1528def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)(32|64)m", 1529 "VPBROADCASTB(Z|Z256)rm(b?)", 1530 "VPBROADCASTW(Z|Z256)rm(b?)")>; 1531def: InstRW<[SKXWriteResGroup119], (instrs VPBROADCASTBYrm, 1532 VPBROADCASTWYrm, 1533 VPMOVSXBDYrm, 1534 VPMOVSXBQYrm, 1535 VPMOVSXWQYrm)>; 1536 1537def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1538 let Latency = 8; 1539 let NumMicroOps = 2; 1540 let ReleaseAtCycles = [1,1]; 1541} 1542def: InstRW<[SKXWriteResGroup121], (instrs VMOVNTDQAZ256rm, 1543 VPBLENDDYrmi)>; 1544def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd], 1545 (instregex "VBLENDMPD(Z|Z256)rm(b?)", 1546 "VBLENDMPS(Z|Z256)rm(b?)", 1547 "VBROADCASTF32X2Z256rm(b?)", 1548 "VBROADCASTF32X2Zrm(b?)", 1549 "VBROADCASTF32X4Z256rm(b?)", 1550 "VBROADCASTF32X4rm(b?)", 1551 "VBROADCASTF32X8rm(b?)", 1552 "VBROADCASTF64X2Z128rm(b?)", 1553 "VBROADCASTF64X2rm(b?)", 1554 "VBROADCASTF64X4rm(b?)", 1555 "VBROADCASTI32X2Z256rm(b?)", 1556 "VBROADCASTI32X2Zrm(b?)", 1557 "VBROADCASTI32X4Z256rm(b?)", 1558 "VBROADCASTI32X4rm(b?)", 1559 "VBROADCASTI32X8rm(b?)", 1560 "VBROADCASTI64X2Z128rm(b?)", 1561 "VBROADCASTI64X2rm(b?)", 1562 "VBROADCASTI64X4rm(b?)", 1563 "VBROADCASTSD(Z|Z256)rm(b?)", 1564 "VBROADCASTSS(Z|Z256)rm(b?)", 1565 "VINSERTF32x4(Z|Z256)rm(b?)", 1566 "VINSERTF32x8Zrm(b?)", 1567 "VINSERTF64x2(Z|Z256)rm(b?)", 1568 "VINSERTF64x4Zrm(b?)", 1569 "VINSERTI32x4(Z|Z256)rm(b?)", 1570 "VINSERTI32x8Zrm(b?)", 1571 "VINSERTI64x2(Z|Z256)rm(b?)", 1572 "VINSERTI64x4Zrm(b?)", 1573 "VMOVAPD(Z|Z256)rm(b?)", 1574 "VMOVAPS(Z|Z256)rm(b?)", 1575 "VMOVDDUP(Z|Z256)rm(b?)", 1576 "VMOVDQA32(Z|Z256)rm(b?)", 1577 "VMOVDQA64(Z|Z256)rm(b?)", 1578 "VMOVDQU16(Z|Z256)rm(b?)", 1579 "VMOVDQU32(Z|Z256)rm(b?)", 1580 "VMOVDQU64(Z|Z256)rm(b?)", 1581 "VMOVDQU8(Z|Z256)rm(b?)", 1582 "VMOVSHDUP(Z|Z256)rm(b?)", 1583 "VMOVSLDUP(Z|Z256)rm(b?)", 1584 "VMOVUPD(Z|Z256)rm(b?)", 1585 "VMOVUPS(Z|Z256)rm(b?)", 1586 "VPADD(B|D|Q|W)Yrm", 1587 "VPADD(B|D|Q|W)(Z|Z256)rm(b?)", 1588 "VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)", 1589 "VPBROADCASTD(Z|Z256)rm(b?)", 1590 "VPBROADCASTQ(Z|Z256)rm(b?)", 1591 "VPSUB(B|D|Q|W)Yrm", 1592 "VPSUB(B|D|Q|W)(Z|Z256)rm(b?)", 1593 "VPTERNLOGD(Z|Z256)rm(b?)i", 1594 "VPTERNLOGQ(Z|Z256)rm(b?)i")>; 1595 1596def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1597 let Latency = 8; 1598 let NumMicroOps = 4; 1599 let ReleaseAtCycles = [1,2,1]; 1600} 1601def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>; 1602 1603def SKXWriteResGroup127 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 1604 let Latency = 8; 1605 let NumMicroOps = 5; 1606 let ReleaseAtCycles = [1,1,1,2]; 1607} 1608def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)", 1609 "RCR(8|16|32|64)m(1|i)")>; 1610 1611def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { 1612 let Latency = 8; 1613 let NumMicroOps = 6; 1614 let ReleaseAtCycles = [1,1,1,3]; 1615} 1616def: InstRW<[SKXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL", 1617 "ROR(8|16|32|64)mCL", 1618 "SAR(8|16|32|64)mCL", 1619 "SHL(8|16|32|64)mCL", 1620 "SHR(8|16|32|64)mCL")>; 1621 1622def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 1623 let Latency = 8; 1624 let NumMicroOps = 6; 1625 let ReleaseAtCycles = [1,1,1,2,1]; 1626} 1627def: SchedAlias<WriteADCRMW, SKXWriteResGroup130>; 1628 1629def SKXWriteResGroup131 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1630 let Latency = 8; 1631 let NumMicroOps = 8; 1632 let ReleaseAtCycles = [1,2,1,2,2]; 1633} 1634def: InstRW<[SKXWriteResGroup131], (instrs VPSCATTERQDZ128mr, 1635 VPSCATTERQDZ256mr, 1636 VSCATTERQPSZ128mr, 1637 VSCATTERQPSZ256mr)>; 1638 1639def SKXWriteResGroup132 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1640 let Latency = 8; 1641 let NumMicroOps = 12; 1642 let ReleaseAtCycles = [1,4,1,4,2]; 1643} 1644def: InstRW<[SKXWriteResGroup132], (instrs VPSCATTERDDZ128mr, 1645 VSCATTERDPSZ128mr)>; 1646 1647def SKXWriteResGroup133 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1648 let Latency = 8; 1649 let NumMicroOps = 20; 1650 let ReleaseAtCycles = [1,8,1,8,2]; 1651} 1652def: InstRW<[SKXWriteResGroup133], (instrs VPSCATTERDDZ256mr, 1653 VSCATTERDPSZ256mr)>; 1654 1655def SKXWriteResGroup134 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1656 let Latency = 8; 1657 let NumMicroOps = 36; 1658 let ReleaseAtCycles = [1,16,1,16,2]; 1659} 1660def: InstRW<[SKXWriteResGroup134], (instrs VPSCATTERDDZmr)>; 1661 1662def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> { 1663 let Latency = 9; 1664 let NumMicroOps = 2; 1665 let ReleaseAtCycles = [1,1]; 1666} 1667def: InstRW<[SKXWriteResGroup135], (instrs MMX_CVTPI2PSrm)>; 1668 1669def SKXWriteResGroup136 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1670 let Latency = 9; 1671 let NumMicroOps = 2; 1672 let ReleaseAtCycles = [1,1]; 1673} 1674def: InstRW<[SKXWriteResGroup136], (instrs VPMOVSXBWYrm, 1675 VPMOVSXDQYrm, 1676 VPMOVSXWDYrm, 1677 VPMOVZXWDYrm)>; 1678def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i", 1679 "VFPCLASSSDZrm(b?)", 1680 "VFPCLASSSSZrm(b?)", 1681 "(V?)PCMPGTQrm", 1682 "VPERMI2DZ128rm(b?)", 1683 "VPERMI2PDZ128rm(b?)", 1684 "VPERMI2PSZ128rm(b?)", 1685 "VPERMI2QZ128rm(b?)", 1686 "VPERMT2DZ128rm(b?)", 1687 "VPERMT2PDZ128rm(b?)", 1688 "VPERMT2PSZ128rm(b?)", 1689 "VPERMT2QZ128rm(b?)", 1690 "VPMAXSQZ128rm(b?)", 1691 "VPMAXUQZ128rm(b?)", 1692 "VPMINSQZ128rm(b?)", 1693 "VPMINUQZ128rm(b?)")>; 1694 1695def SKXWriteResGroup136_2 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1696 let Latency = 10; 1697 let NumMicroOps = 2; 1698 let ReleaseAtCycles = [1,1]; 1699} 1700def: InstRW<[SKXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i", 1701 "VCMP(SD|SS)Zrm", 1702 "VFPCLASSPDZ128rm(b?)", 1703 "VFPCLASSPSZ128rm(b?)", 1704 "VPCMPBZ128rmi(b?)", 1705 "VPCMPDZ128rmi(b?)", 1706 "VPCMPEQ(B|D|Q|W)Z128rm(b?)", 1707 "VPCMPGT(B|D|Q|W)Z128rm(b?)", 1708 "VPCMPQZ128rmi(b?)", 1709 "VPCMPU(B|D|Q|W)Z128rmi(b?)", 1710 "VPCMPWZ128rmi(b?)", 1711 "VPTESTMBZ128rm(b?)", 1712 "VPTESTMDZ128rm(b?)", 1713 "VPTESTMQZ128rm(b?)", 1714 "VPTESTMWZ128rm(b?)", 1715 "VPTESTNMBZ128rm(b?)", 1716 "VPTESTNMDZ128rm(b?)", 1717 "VPTESTNMQZ128rm(b?)", 1718 "VPTESTNMWZ128rm(b?)")>; 1719 1720def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort01]> { 1721 let Latency = 9; 1722 let NumMicroOps = 2; 1723 let ReleaseAtCycles = [1,1]; 1724} 1725def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm", 1726 "(V?)CVTPS2PDrm")>; 1727 1728def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { 1729 let Latency = 9; 1730 let NumMicroOps = 4; 1731 let ReleaseAtCycles = [2,1,1]; 1732} 1733def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm", 1734 "(V?)PHSUBSWrm")>; 1735 1736def SKXWriteResGroup146 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { 1737 let Latency = 9; 1738 let NumMicroOps = 5; 1739 let ReleaseAtCycles = [1,2,1,1]; 1740} 1741def: InstRW<[SKXWriteResGroup146], (instregex "LAR(16|32|64)rm", 1742 "LSL(16|32|64)rm")>; 1743 1744def SKXWriteResGroup148 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1745 let Latency = 10; 1746 let NumMicroOps = 2; 1747 let ReleaseAtCycles = [1,1]; 1748} 1749def: InstRW<[SKXWriteResGroup148], (instrs VPCMPGTQYrm)>; 1750def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m", 1751 "ILD_F(16|32|64)m", 1752 "VALIGND(Z|Z256)rm(b?)i", 1753 "VALIGNQ(Z|Z256)rm(b?)i", 1754 "VPMAXSQ(Z|Z256)rm(b?)", 1755 "VPMAXUQ(Z|Z256)rm(b?)", 1756 "VPMINSQ(Z|Z256)rm(b?)", 1757 "VPMINUQ(Z|Z256)rm(b?)")>; 1758 1759def SKXWriteResGroup148_2 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1760 let Latency = 11; 1761 let NumMicroOps = 2; 1762 let ReleaseAtCycles = [1,1]; 1763} 1764def: InstRW<[SKXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i", 1765 "VCMPPS(Z|Z256)rm(b?)i", 1766 "VFPCLASSPD(Z|Z256)rm(b?)", 1767 "VFPCLASSPS(Z|Z256)rm(b?)", 1768 "VPCMPB(Z|Z256)rmi(b?)", 1769 "VPCMPD(Z|Z256)rmi(b?)", 1770 "VPCMPEQB(Z|Z256)rm(b?)", 1771 "VPCMPEQD(Z|Z256)rm(b?)", 1772 "VPCMPEQQ(Z|Z256)rm(b?)", 1773 "VPCMPEQW(Z|Z256)rm(b?)", 1774 "VPCMPGTB(Z|Z256)rm(b?)", 1775 "VPCMPGTD(Z|Z256)rm(b?)", 1776 "VPCMPGTQ(Z|Z256)rm(b?)", 1777 "VPCMPGTW(Z|Z256)rm(b?)", 1778 "VPCMPQ(Z|Z256)rmi(b?)", 1779 "VPCMPU(B|D|Q|W)Z256rmi(b?)", 1780 "VPCMPU(B|D|Q|W)Zrmi(b?)", 1781 "VPCMPW(Z|Z256)rmi(b?)", 1782 "VPTESTM(B|D|Q|W)Z256rm(b?)", 1783 "VPTESTM(B|D|Q|W)Zrm(b?)", 1784 "VPTESTNM(B|D|Q|W)Z256rm(b?)", 1785 "VPTESTNM(B|D|Q|W)Zrm(b?)")>; 1786 1787def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort01]> { 1788 let Latency = 10; 1789 let NumMicroOps = 2; 1790 let ReleaseAtCycles = [1,1]; 1791} 1792def: InstRW<[SKXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)", 1793 "VCVTDQ2PSZ128rm(b?)", 1794 "(V?)CVTDQ2PSrm", 1795 "VCVTPD2QQZ128rm(b?)", 1796 "VCVTPD2UQQZ128rm(b?)", 1797 "VCVTPH2PSZ128rm(b?)", 1798 "VCVTPS2DQZ128rm(b?)", 1799 "(V?)CVTPS2DQrm", 1800 "VCVTPS2PDZ128rm(b?)", 1801 "VCVTPS2QQZ128rm(b?)", 1802 "VCVTPS2UDQZ128rm(b?)", 1803 "VCVTPS2UQQZ128rm(b?)", 1804 "VCVTQQ2PDZ128rm(b?)", 1805 "VCVTQQ2PSZ128rm(b?)", 1806 "VCVTSS2SDZrm", 1807 "(V?)CVTSS2SDrm", 1808 "VCVTTPD2QQZ128rm(b?)", 1809 "VCVTTPD2UQQZ128rm(b?)", 1810 "VCVTTPS2DQZ128rm(b?)", 1811 "(V?)CVTTPS2DQrm", 1812 "VCVTTPS2QQZ128rm(b?)", 1813 "VCVTTPS2UDQZ128rm(b?)", 1814 "VCVTTPS2UQQZ128rm(b?)", 1815 "VCVTUDQ2PDZ128rm(b?)", 1816 "VCVTUDQ2PSZ128rm(b?)", 1817 "VCVTUQQ2PDZ128rm(b?)", 1818 "VCVTUQQ2PSZ128rm(b?)")>; 1819 1820def SKXWriteResGroup151 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1821 let Latency = 10; 1822 let NumMicroOps = 3; 1823 let ReleaseAtCycles = [2,1]; 1824} 1825def: InstRW<[SKXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)", 1826 "VEXPANDPSZ128rm(b?)", 1827 "VPEXPANDDZ128rm(b?)", 1828 "VPEXPANDQZ128rm(b?)")>; 1829 1830def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { 1831 let Latency = 10; 1832 let NumMicroOps = 4; 1833 let ReleaseAtCycles = [2,1,1]; 1834} 1835def: InstRW<[SKXWriteResGroup154], (instrs VPHADDSWYrm, 1836 VPHSUBSWYrm)>; 1837 1838def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 1839 let Latency = 10; 1840 let NumMicroOps = 8; 1841 let ReleaseAtCycles = [1,1,1,1,1,3]; 1842} 1843def: InstRW<[SKXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>; 1844 1845def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> { 1846 let Latency = 11; 1847 let NumMicroOps = 2; 1848 let ReleaseAtCycles = [1,1]; 1849} 1850def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F(32|64)m")>; 1851 1852def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort01]> { 1853 let Latency = 11; 1854 let NumMicroOps = 2; 1855 let ReleaseAtCycles = [1,1]; 1856} 1857def: InstRW<[SKXWriteResGroup161], (instrs VCVTDQ2PSYrm, 1858 VCVTPS2PDYrm)>; 1859def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)", 1860 "VCVTPH2PS(Z|Z256)rm(b?)", 1861 "VCVTPS2PD(Z|Z256)rm(b?)", 1862 "VCVTQQ2PD(Z|Z256)rm(b?)", 1863 "VCVTQQ2PSZ256rm(b?)", 1864 "VCVT(T?)PD2QQ(Z|Z256)rm(b?)", 1865 "VCVT(T?)PD2UQQ(Z|Z256)rm(b?)", 1866 "VCVT(T?)PS2DQYrm", 1867 "VCVT(T?)PS2DQ(Z|Z256)rm(b?)", 1868 "VCVT(T?)PS2QQZ256rm(b?)", 1869 "VCVT(T?)PS2UDQ(Z|Z256)rm(b?)", 1870 "VCVT(T?)PS2UQQZ256rm(b?)", 1871 "VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)", 1872 "VCVTUQQ2PD(Z|Z256)rm(b?)", 1873 "VCVTUQQ2PSZ256rm(b?)")>; 1874 1875def SKXWriteResGroup162 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1876 let Latency = 11; 1877 let NumMicroOps = 3; 1878 let ReleaseAtCycles = [2,1]; 1879} 1880def: InstRW<[SKXWriteResGroup162], (instregex "FICOM(P?)(16|32)m", 1881 "VEXPANDPD(Z|Z256)rm(b?)", 1882 "VEXPANDPS(Z|Z256)rm(b?)", 1883 "VPEXPANDD(Z|Z256)rm(b?)", 1884 "VPEXPANDQ(Z|Z256)rm(b?)")>; 1885 1886def SKXWriteResGroup164 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1887 let Latency = 11; 1888 let NumMicroOps = 3; 1889 let ReleaseAtCycles = [1,1,1]; 1890} 1891def: InstRW<[SKXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>; 1892 1893def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> { 1894 let Latency = 11; 1895 let NumMicroOps = 3; 1896 let ReleaseAtCycles = [1,1,1]; 1897} 1898def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2DQrm, 1899 CVTTPD2DQrm, 1900 MMX_CVTPD2PIrm, 1901 MMX_CVTTPD2PIrm)>; 1902 1903def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 1904 let Latency = 11; 1905 let NumMicroOps = 4; 1906 let ReleaseAtCycles = [2,1,1]; 1907} 1908def: InstRW<[SKXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>; 1909 1910def SKXWriteResGroup169 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { 1911 let Latency = 11; 1912 let NumMicroOps = 7; 1913 let ReleaseAtCycles = [2,3,2]; 1914} 1915def: InstRW<[SKXWriteResGroup169], (instregex "RCL(16|32|64)rCL", 1916 "RCR(16|32|64)rCL")>; 1917 1918def SKXWriteResGroup170 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> { 1919 let Latency = 11; 1920 let NumMicroOps = 9; 1921 let ReleaseAtCycles = [1,5,1,2]; 1922} 1923def: InstRW<[SKXWriteResGroup170], (instrs RCL8rCL)>; 1924 1925def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 1926 let Latency = 11; 1927 let NumMicroOps = 11; 1928 let ReleaseAtCycles = [2,9]; 1929} 1930def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>; 1931 1932def SKXWriteResGroup174 : SchedWriteRes<[SKXPort01]> { 1933 let Latency = 15; 1934 let NumMicroOps = 3; 1935 let ReleaseAtCycles = [3]; 1936} 1937def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>; 1938 1939def SKXWriteResGroup174z : SchedWriteRes<[SKXPort05]> { 1940 let Latency = 15; 1941 let NumMicroOps = 3; 1942 let ReleaseAtCycles = [3]; 1943} 1944def: InstRW<[SKXWriteResGroup174z], (instregex "VPMULLQZrr")>; 1945 1946def SKXWriteResGroup175 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1947 let Latency = 12; 1948 let NumMicroOps = 3; 1949 let ReleaseAtCycles = [2,1]; 1950} 1951def: InstRW<[SKXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>; 1952 1953def SKXWriteResGroup176 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort01]> { 1954 let Latency = 12; 1955 let NumMicroOps = 3; 1956 let ReleaseAtCycles = [1,1,1]; 1957} 1958def: InstRW<[SKXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)", 1959 "VCVT(T?)SS2USI64Zrm(b?)")>; 1960 1961def SKXWriteResGroup177 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> { 1962 let Latency = 12; 1963 let NumMicroOps = 3; 1964 let ReleaseAtCycles = [1,1,1]; 1965} 1966def: InstRW<[SKXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)", 1967 "VCVT(T?)PS2UQQZrm(b?)")>; 1968 1969def SKXWriteResGroup180 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1970 let Latency = 13; 1971 let NumMicroOps = 3; 1972 let ReleaseAtCycles = [2,1]; 1973} 1974def: InstRW<[SKXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m", 1975 "VPERMWZ256rm(b?)", 1976 "VPERMWZrm(b?)")>; 1977 1978def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1979 let Latency = 13; 1980 let NumMicroOps = 3; 1981 let ReleaseAtCycles = [1,1,1]; 1982} 1983def: InstRW<[SKXWriteResGroup181], (instrs VCVTDQ2PDYrm)>; 1984 1985def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 1986 let Latency = 13; 1987 let NumMicroOps = 4; 1988 let ReleaseAtCycles = [2,1,1]; 1989} 1990def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2WZ128rm(b?)", 1991 "VPERMT2WZ128rm(b?)")>; 1992 1993def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1994 let Latency = 14; 1995 let NumMicroOps = 3; 1996 let ReleaseAtCycles = [1,1,1]; 1997} 1998def: InstRW<[SKXWriteResGroup187], (instregex "MUL_FI(16|32)m")>; 1999 2000def SKXWriteResGroup188 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> { 2001 let Latency = 14; 2002 let NumMicroOps = 3; 2003 let ReleaseAtCycles = [1,1,1]; 2004} 2005def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)", 2006 "VCVTPD2UDQZrm(b?)", 2007 "VCVTQQ2PSZrm(b?)", 2008 "VCVTTPD2DQZrm(b?)", 2009 "VCVTTPD2UDQZrm(b?)", 2010 "VCVTUQQ2PSZrm(b?)")>; 2011 2012def SKXWriteResGroup189 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 2013 let Latency = 14; 2014 let NumMicroOps = 4; 2015 let ReleaseAtCycles = [2,1,1]; 2016} 2017def: InstRW<[SKXWriteResGroup189], (instregex "VPERMI2WZ256rm(b?)", 2018 "VPERMI2WZrm(b?)", 2019 "VPERMT2WZ256rm(b?)", 2020 "VPERMT2WZrm(b?)")>; 2021 2022def SKXWriteResGroup190 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> { 2023 let Latency = 14; 2024 let NumMicroOps = 10; 2025 let ReleaseAtCycles = [2,4,1,3]; 2026} 2027def: InstRW<[SKXWriteResGroup190], (instrs RCR8rCL)>; 2028 2029def SKXWriteResGroup191 : SchedWriteRes<[SKXPort0]> { 2030 let Latency = 15; 2031 let NumMicroOps = 1; 2032 let ReleaseAtCycles = [1]; 2033} 2034def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>; 2035 2036def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2037 let Latency = 15; 2038 let NumMicroOps = 8; 2039 let ReleaseAtCycles = [1,2,2,1,2]; 2040} 2041def: InstRW<[SKXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>; 2042 2043def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { 2044 let Latency = 15; 2045 let NumMicroOps = 10; 2046 let ReleaseAtCycles = [1,1,1,5,1,1]; 2047} 2048def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>; 2049 2050def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { 2051 let Latency = 16; 2052 let NumMicroOps = 14; 2053 let ReleaseAtCycles = [1,1,1,4,2,5]; 2054} 2055def: InstRW<[SKXWriteResGroup199], (instrs CMPXCHG8B)>; 2056 2057def SKXWriteResGroup200 : SchedWriteRes<[SKXPort1, SKXPort05, SKXPort6]> { 2058 let Latency = 12; 2059 let NumMicroOps = 34; 2060 let ReleaseAtCycles = [1, 4, 5]; 2061} 2062def: InstRW<[SKXWriteResGroup200], (instrs VZEROALL)>; 2063 2064def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> { 2065 let Latency = 17; 2066 let NumMicroOps = 15; 2067 let ReleaseAtCycles = [2,1,2,4,2,4]; 2068} 2069def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>; 2070 2071def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort01]> { 2072 let Latency = 21; 2073 let NumMicroOps = 4; 2074 let ReleaseAtCycles = [1,3]; 2075} 2076def: InstRW<[SKXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)")>; 2077 2078def SKXWriteResGroup207 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort06,SKXPort0156]> { 2079 let Latency = 18; 2080 let NumMicroOps = 8; 2081 let ReleaseAtCycles = [1,1,1,5]; 2082} 2083def: InstRW<[SKXWriteResGroup207], (instrs CPUID, RDTSC)>; 2084 2085def SKXWriteResGroup208 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { 2086 let Latency = 18; 2087 let NumMicroOps = 11; 2088 let ReleaseAtCycles = [2,1,1,4,1,2]; 2089} 2090def: InstRW<[SKXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>; 2091 2092def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort01]> { 2093 let Latency = 22; 2094 let NumMicroOps = 4; 2095 let ReleaseAtCycles = [1,3]; 2096} 2097def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>; 2098 2099def SKXWriteResGroup211_1 : SchedWriteRes<[SKXPort23,SKXPort05]> { 2100 let Latency = 22; 2101 let NumMicroOps = 4; 2102 let ReleaseAtCycles = [1,3]; 2103} 2104def: InstRW<[SKXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>; 2105 2106def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> { 2107 let Latency = 20; 2108 let NumMicroOps = 1; 2109 let ReleaseAtCycles = [1]; 2110} 2111def: InstRW<[SKXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>; 2112 2113def SKXWriteGatherEVEX2 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2114 let Latency = 17; 2115 let NumMicroOps = 5; // 2 uops perform multiple loads 2116 let ReleaseAtCycles = [1,2,1,1]; 2117} 2118def: InstRW<[SKXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm, 2119 VGATHERDPDZ128rm, VPGATHERDQZ128rm, 2120 VGATHERQPDZ128rm, VPGATHERQQZ128rm)>; 2121 2122def SKXWriteGatherEVEX4 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2123 let Latency = 19; 2124 let NumMicroOps = 5; // 2 uops perform multiple loads 2125 let ReleaseAtCycles = [1,4,1,1]; 2126} 2127def: InstRW<[SKXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm, 2128 VGATHERQPDZ256rm, VPGATHERQQZ256rm, 2129 VGATHERDPSZ128rm, VPGATHERDDZ128rm, 2130 VGATHERDPDZ256rm, VPGATHERDQZ256rm)>; 2131 2132def SKXWriteGatherEVEX8 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2133 let Latency = 21; 2134 let NumMicroOps = 5; // 2 uops perform multiple loads 2135 let ReleaseAtCycles = [1,8,1,1]; 2136} 2137def: InstRW<[SKXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm, 2138 VGATHERDPDZrm, VPGATHERDQZrm, 2139 VGATHERQPDZrm, VPGATHERQQZrm, 2140 VGATHERQPSZrm, VPGATHERQDZrm)>; 2141 2142def SKXWriteGatherEVEX16 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2143 let Latency = 25; 2144 let NumMicroOps = 5; // 2 uops perform multiple loads 2145 let ReleaseAtCycles = [1,16,1,1]; 2146} 2147def: InstRW<[SKXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>; 2148 2149def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 2150 let Latency = 20; 2151 let NumMicroOps = 8; 2152 let ReleaseAtCycles = [1,1,1,1,1,1,2]; 2153} 2154def: InstRW<[SKXWriteResGroup219], (instrs INSB, INSL, INSW)>; 2155 2156def SKXWriteResGroup220 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort0156]> { 2157 let Latency = 20; 2158 let NumMicroOps = 10; 2159 let ReleaseAtCycles = [1,2,7]; 2160} 2161def: InstRW<[SKXWriteResGroup220], (instrs MWAITrr)>; 2162 2163def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> { 2164 let Latency = 22; 2165 let NumMicroOps = 2; 2166 let ReleaseAtCycles = [1,1]; 2167} 2168def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F(32|64)m")>; 2169 2170def SKXWriteResGroupVEX2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { 2171 let Latency = 18; 2172 let NumMicroOps = 5; // 2 uops perform multiple loads 2173 let ReleaseAtCycles = [1,2,1,1]; 2174} 2175def: InstRW<[SKXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm, 2176 VGATHERQPDrm, VPGATHERQQrm, 2177 VGATHERQPSrm, VPGATHERQDrm)>; 2178 2179def SKXWriteResGroupVEX4 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { 2180 let Latency = 20; 2181 let NumMicroOps = 5; // 2 uops peform multiple loads 2182 let ReleaseAtCycles = [1,4,1,1]; 2183} 2184def: InstRW<[SKXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm, 2185 VGATHERDPSrm, VPGATHERDDrm, 2186 VGATHERQPDYrm, VPGATHERQQYrm, 2187 VGATHERQPSYrm, VPGATHERQDYrm)>; 2188 2189def SKXWriteResGroupVEX8 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { 2190 let Latency = 22; 2191 let NumMicroOps = 5; // 2 uops perform multiple loads 2192 let ReleaseAtCycles = [1,8,1,1]; 2193} 2194def: InstRW<[SKXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; 2195 2196def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { 2197 let Latency = 22; 2198 let NumMicroOps = 14; 2199 let ReleaseAtCycles = [5,5,4]; 2200} 2201def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr", 2202 "VPCONFLICTQZ256rr")>; 2203 2204def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 2205 let Latency = 23; 2206 let NumMicroOps = 19; 2207 let ReleaseAtCycles = [2,1,4,1,1,4,6]; 2208} 2209def: InstRW<[SKXWriteResGroup228], (instrs CMPXCHG16B)>; 2210 2211def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 2212 let Latency = 25; 2213 let NumMicroOps = 3; 2214 let ReleaseAtCycles = [1,1,1]; 2215} 2216def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI(16|32)m")>; 2217 2218def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> { 2219 let Latency = 27; 2220 let NumMicroOps = 2; 2221 let ReleaseAtCycles = [1,1]; 2222} 2223def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F(32|64)m")>; 2224 2225def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2226 let Latency = 29; 2227 let NumMicroOps = 15; 2228 let ReleaseAtCycles = [5,5,1,4]; 2229} 2230def: InstRW<[SKXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)")>; 2231 2232def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 2233 let Latency = 30; 2234 let NumMicroOps = 3; 2235 let ReleaseAtCycles = [1,1,1]; 2236} 2237def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>; 2238 2239def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> { 2240 let Latency = 35; 2241 let NumMicroOps = 23; 2242 let ReleaseAtCycles = [1,5,3,4,10]; 2243} 2244def: InstRW<[SKXWriteResGroup247], (instregex "IN(8|16|32)ri", 2245 "IN(8|16|32)rr")>; 2246 2247def SKXWriteResGroup248 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 2248 let Latency = 35; 2249 let NumMicroOps = 23; 2250 let ReleaseAtCycles = [1,5,2,1,4,10]; 2251} 2252def: InstRW<[SKXWriteResGroup248], (instregex "OUT(8|16|32)ir", 2253 "OUT(8|16|32)rr")>; 2254 2255def SKXWriteResGroup249 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { 2256 let Latency = 37; 2257 let NumMicroOps = 21; 2258 let ReleaseAtCycles = [9,7,5]; 2259} 2260def: InstRW<[SKXWriteResGroup249], (instregex "VPCONFLICTDZ256rr", 2261 "VPCONFLICTQZrr")>; 2262 2263def SKXWriteResGroup250 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { 2264 let Latency = 37; 2265 let NumMicroOps = 31; 2266 let ReleaseAtCycles = [1,8,1,21]; 2267} 2268def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64)?")>; 2269 2270def SKXWriteResGroup252 : SchedWriteRes<[SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort15,SKXPort0156]> { 2271 let Latency = 40; 2272 let NumMicroOps = 18; 2273 let ReleaseAtCycles = [1,1,2,3,1,1,1,8]; 2274} 2275def: InstRW<[SKXWriteResGroup252], (instrs VMCLEARm)>; 2276 2277def SKXWriteResGroup253 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { 2278 let Latency = 41; 2279 let NumMicroOps = 39; 2280 let ReleaseAtCycles = [1,10,1,1,26]; 2281} 2282def: InstRW<[SKXWriteResGroup253], (instrs XSAVE64)>; 2283 2284def SKXWriteResGroup254 : SchedWriteRes<[SKXPort5,SKXPort0156]> { 2285 let Latency = 42; 2286 let NumMicroOps = 22; 2287 let ReleaseAtCycles = [2,20]; 2288} 2289def: InstRW<[SKXWriteResGroup254], (instrs RDTSCP)>; 2290 2291def SKXWriteResGroup255 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { 2292 let Latency = 42; 2293 let NumMicroOps = 40; 2294 let ReleaseAtCycles = [1,11,1,1,26]; 2295} 2296def: InstRW<[SKXWriteResGroup255], (instrs XSAVE)>; 2297def: InstRW<[SKXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>; 2298 2299def SKXWriteResGroup256 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2300 let Latency = 44; 2301 let NumMicroOps = 22; 2302 let ReleaseAtCycles = [9,7,1,5]; 2303} 2304def: InstRW<[SKXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)", 2305 "VPCONFLICTQZrm(b?)")>; 2306 2307def SKXWriteResGroup258 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05,SKXPort06,SKXPort0156]> { 2308 let Latency = 62; 2309 let NumMicroOps = 64; 2310 let ReleaseAtCycles = [2,8,5,10,39]; 2311} 2312def: InstRW<[SKXWriteResGroup258], (instrs FLDENVm)>; 2313 2314def SKXWriteResGroup259 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> { 2315 let Latency = 63; 2316 let NumMicroOps = 88; 2317 let ReleaseAtCycles = [4,4,31,1,2,1,45]; 2318} 2319def: InstRW<[SKXWriteResGroup259], (instrs FXRSTOR64)>; 2320 2321def SKXWriteResGroup260 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> { 2322 let Latency = 63; 2323 let NumMicroOps = 90; 2324 let ReleaseAtCycles = [4,2,33,1,2,1,47]; 2325} 2326def: InstRW<[SKXWriteResGroup260], (instrs FXRSTOR)>; 2327 2328def SKXWriteResGroup261 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { 2329 let Latency = 67; 2330 let NumMicroOps = 35; 2331 let ReleaseAtCycles = [17,11,7]; 2332} 2333def: InstRW<[SKXWriteResGroup261], (instregex "VPCONFLICTDZrr")>; 2334 2335def SKXWriteResGroup262 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2336 let Latency = 74; 2337 let NumMicroOps = 36; 2338 let ReleaseAtCycles = [17,11,1,7]; 2339} 2340def: InstRW<[SKXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)")>; 2341 2342def SKXWriteResGroup263 : SchedWriteRes<[SKXPort5,SKXPort05,SKXPort0156]> { 2343 let Latency = 75; 2344 let NumMicroOps = 15; 2345 let ReleaseAtCycles = [6,3,6]; 2346} 2347def: InstRW<[SKXWriteResGroup263], (instrs FNINIT)>; 2348 2349def SKXWriteResGroup266 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort237,SKXPort06,SKXPort0156]> { 2350 let Latency = 106; 2351 let NumMicroOps = 100; 2352 let ReleaseAtCycles = [9,1,11,16,1,11,21,30]; 2353} 2354def: InstRW<[SKXWriteResGroup266], (instrs FSTENVm)>; 2355 2356def SKXWriteResGroup267 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 2357 let Latency = 140; 2358 let NumMicroOps = 4; 2359 let ReleaseAtCycles = [1,3]; 2360} 2361def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>; 2362 2363def: InstRW<[WriteZero], (instrs CLC)>; 2364 2365 2366// Instruction variants handled by the renamer. These might not need execution 2367// ports in certain conditions. 2368// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", 2369// section "Skylake Pipeline" > "Register allocation and renaming". 2370// These can be investigated with llvm-exegesis, e.g. 2371// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 2372// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 2373 2374def SKXWriteZeroLatency : SchedWriteRes<[]> { 2375 let Latency = 0; 2376} 2377 2378def SKXWriteZeroIdiom : SchedWriteVariant<[ 2379 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2380 SchedVar<NoSchedPred, [WriteALU]> 2381]>; 2382def : InstRW<[SKXWriteZeroIdiom], (instrs SUB32rr, SUB64rr, 2383 XOR32rr, XOR64rr)>; 2384 2385def SKXWriteFZeroIdiom : SchedWriteVariant<[ 2386 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2387 SchedVar<NoSchedPred, [WriteFLogic]> 2388]>; 2389def : InstRW<[SKXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, 2390 XORPDrr, VXORPDrr, 2391 VXORPSZ128rr, 2392 VXORPDZ128rr)>; 2393 2394def SKXWriteFZeroIdiomY : SchedWriteVariant<[ 2395 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2396 SchedVar<NoSchedPred, [WriteFLogicY]> 2397]>; 2398def : InstRW<[SKXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr, 2399 VXORPSZ256rr, VXORPDZ256rr)>; 2400 2401def SKXWriteFZeroIdiomZ : SchedWriteVariant<[ 2402 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2403 SchedVar<NoSchedPred, [WriteFLogicZ]> 2404]>; 2405def : InstRW<[SKXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>; 2406 2407def SKXWriteVZeroIdiomLogicX : SchedWriteVariant<[ 2408 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2409 SchedVar<NoSchedPred, [WriteVecLogicX]> 2410]>; 2411def : InstRW<[SKXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, 2412 VPXORDZ128rr, VPXORQZ128rr)>; 2413 2414def SKXWriteVZeroIdiomLogicY : SchedWriteVariant<[ 2415 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2416 SchedVar<NoSchedPred, [WriteVecLogicY]> 2417]>; 2418def : InstRW<[SKXWriteVZeroIdiomLogicY], (instrs VPXORYrr, 2419 VPXORDZ256rr, VPXORQZ256rr)>; 2420 2421def SKXWriteVZeroIdiomLogicZ : SchedWriteVariant<[ 2422 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2423 SchedVar<NoSchedPred, [WriteVecLogicZ]> 2424]>; 2425def : InstRW<[SKXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>; 2426 2427def SKXWriteVZeroIdiomALUX : SchedWriteVariant<[ 2428 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2429 SchedVar<NoSchedPred, [WriteVecALUX]> 2430]>; 2431def : InstRW<[SKXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr, 2432 PCMPGTDrr, VPCMPGTDrr, 2433 PCMPGTWrr, VPCMPGTWrr)>; 2434 2435def SKXWriteVZeroIdiomALUY : SchedWriteVariant<[ 2436 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2437 SchedVar<NoSchedPred, [WriteVecALUY]> 2438]>; 2439def : InstRW<[SKXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr, 2440 VPCMPGTDYrr, 2441 VPCMPGTWYrr)>; 2442 2443def SKXWritePSUB : SchedWriteRes<[SKXPort015]> { 2444 let Latency = 1; 2445 let NumMicroOps = 1; 2446 let ReleaseAtCycles = [1]; 2447} 2448 2449def SKXWriteVZeroIdiomPSUB : SchedWriteVariant<[ 2450 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2451 SchedVar<NoSchedPred, [SKXWritePSUB]> 2452]>; 2453 2454def : InstRW<[SKXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr, 2455 PSUBDrr, VPSUBDrr, VPSUBDZ128rr, 2456 PSUBQrr, VPSUBQrr, VPSUBQZ128rr, 2457 PSUBWrr, VPSUBWrr, VPSUBWZ128rr, 2458 VPSUBBYrr, VPSUBBZ256rr, 2459 VPSUBDYrr, VPSUBDZ256rr, 2460 VPSUBQYrr, VPSUBQZ256rr, 2461 VPSUBWYrr, VPSUBWZ256rr, 2462 VPSUBBZrr, 2463 VPSUBDZrr, 2464 VPSUBQZrr, 2465 VPSUBWZrr)>; 2466def SKXWritePCMPGTQ : SchedWriteRes<[SKXPort5]> { 2467 let Latency = 3; 2468 let NumMicroOps = 1; 2469 let ReleaseAtCycles = [1]; 2470} 2471 2472def SKXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ 2473 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2474 SchedVar<NoSchedPred, [SKXWritePCMPGTQ]> 2475]>; 2476def : InstRW<[SKXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, 2477 VPCMPGTQYrr)>; 2478 2479 2480// CMOVs that use both Z and C flag require an extra uop. 2481def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> { 2482 let Latency = 2; 2483 let ReleaseAtCycles = [2]; 2484 let NumMicroOps = 2; 2485} 2486 2487def SKXWriteCMOVA_CMOVBErm : SchedWriteRes<[SKXPort23,SKXPort06]> { 2488 let Latency = 7; 2489 let ReleaseAtCycles = [1,2]; 2490 let NumMicroOps = 3; 2491} 2492 2493def SKXCMOVA_CMOVBErr : SchedWriteVariant<[ 2494 SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SKXWriteCMOVA_CMOVBErr]>, 2495 SchedVar<NoSchedPred, [WriteCMOV]> 2496]>; 2497 2498def SKXCMOVA_CMOVBErm : SchedWriteVariant<[ 2499 SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SKXWriteCMOVA_CMOVBErm]>, 2500 SchedVar<NoSchedPred, [WriteCMOV.Folded]> 2501]>; 2502 2503def : InstRW<[SKXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; 2504def : InstRW<[SKXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; 2505 2506// SETCCs that use both Z and C flag require an extra uop. 2507def SKXWriteSETA_SETBEr : SchedWriteRes<[SKXPort06]> { 2508 let Latency = 2; 2509 let ReleaseAtCycles = [2]; 2510 let NumMicroOps = 2; 2511} 2512 2513def SKXWriteSETA_SETBEm : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> { 2514 let Latency = 3; 2515 let ReleaseAtCycles = [1,1,2]; 2516 let NumMicroOps = 4; 2517} 2518 2519def SKXSETA_SETBErr : SchedWriteVariant<[ 2520 SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SKXWriteSETA_SETBEr]>, 2521 SchedVar<NoSchedPred, [WriteSETCC]> 2522]>; 2523 2524def SKXSETA_SETBErm : SchedWriteVariant<[ 2525 SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SKXWriteSETA_SETBEm]>, 2526 SchedVar<NoSchedPred, [WriteSETCCStore]> 2527]>; 2528 2529def : InstRW<[SKXSETA_SETBErr], (instrs SETCCr)>; 2530def : InstRW<[SKXSETA_SETBErm], (instrs SETCCm)>; 2531 2532/////////////////////////////////////////////////////////////////////////////// 2533// Dependency breaking instructions. 2534/////////////////////////////////////////////////////////////////////////////// 2535 2536def : IsZeroIdiomFunction<[ 2537 // GPR Zero-idioms. 2538 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, 2539 2540 // SSE Zero-idioms. 2541 DepBreakingClass<[ 2542 // fp variants. 2543 XORPSrr, XORPDrr, 2544 2545 // int variants. 2546 PXORrr, 2547 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 2548 PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr 2549 ], ZeroIdiomPredicate>, 2550 2551 // AVX Zero-idioms. 2552 DepBreakingClass<[ 2553 // xmm fp variants. 2554 VXORPSrr, VXORPDrr, 2555 2556 // xmm int variants. 2557 VPXORrr, 2558 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 2559 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 2560 2561 // ymm variants. 2562 VXORPSYrr, VXORPDYrr, VPXORYrr, 2563 VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr, 2564 VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr, 2565 2566 // zmm variants. 2567 VXORPSZrr, VXORPDZrr, VPXORDZrr, VPXORQZrr, 2568 VXORPSZ128rr, VXORPDZ128rr, VPXORDZ128rr, VPXORQZ128rr, 2569 VXORPSZ256rr, VXORPDZ256rr, VPXORDZ256rr, VPXORQZ256rr, 2570 VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr, 2571 VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr, 2572 VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr, 2573 ], ZeroIdiomPredicate>, 2574]>; 2575 2576} // SchedModel 2577