1//=- X86SchedSkylake.td - X86 Skylake Server Scheduling ------*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for Skylake Server to support 10// instruction scheduling and other instruction cost heuristics. 11// 12//===----------------------------------------------------------------------===// 13 14def SkylakeServerModel : SchedMachineModel { 15 // All x86 instructions are modeled as a single micro-op, and SKylake can 16 // decode 6 instructions per cycle. 17 let IssueWidth = 6; 18 let MicroOpBufferSize = 224; // Based on the reorder buffer. 19 let LoadLatency = 5; 20 let MispredictPenalty = 14; 21 22 // Based on the LSD (loop-stream detector) queue size and benchmarking data. 23 let LoopMicroOpBufferSize = 50; 24 25 // This flag is set to allow the scheduler to assign a default model to 26 // unrecognized opcodes. 27 let CompleteModel = 0; 28} 29 30let SchedModel = SkylakeServerModel in { 31 32// Skylake Server can issue micro-ops to 8 different ports in one cycle. 33 34// Ports 0, 1, 5, and 6 handle all computation. 35// Port 4 gets the data half of stores. Store data can be available later than 36// the store address, but since we don't model the latency of stores, we can 37// ignore that. 38// Ports 2 and 3 are identical. They handle loads and the address half of 39// stores. Port 7 can handle address calculations. 40def SKXPort0 : ProcResource<1>; 41def SKXPort1 : ProcResource<1>; 42def SKXPort2 : ProcResource<1>; 43def SKXPort3 : ProcResource<1>; 44def SKXPort4 : ProcResource<1>; 45def SKXPort5 : ProcResource<1>; 46def SKXPort6 : ProcResource<1>; 47def SKXPort7 : ProcResource<1>; 48 49// Many micro-ops are capable of issuing on multiple ports. 50def SKXPort01 : ProcResGroup<[SKXPort0, SKXPort1]>; 51def SKXPort23 : ProcResGroup<[SKXPort2, SKXPort3]>; 52def SKXPort237 : ProcResGroup<[SKXPort2, SKXPort3, SKXPort7]>; 53def SKXPort04 : ProcResGroup<[SKXPort0, SKXPort4]>; 54def SKXPort05 : ProcResGroup<[SKXPort0, SKXPort5]>; 55def SKXPort06 : ProcResGroup<[SKXPort0, SKXPort6]>; 56def SKXPort15 : ProcResGroup<[SKXPort1, SKXPort5]>; 57def SKXPort16 : ProcResGroup<[SKXPort1, SKXPort6]>; 58def SKXPort56 : ProcResGroup<[SKXPort5, SKXPort6]>; 59def SKXPort015 : ProcResGroup<[SKXPort0, SKXPort1, SKXPort5]>; 60def SKXPort056 : ProcResGroup<[SKXPort0, SKXPort5, SKXPort6]>; 61def SKXPort0156: ProcResGroup<[SKXPort0, SKXPort1, SKXPort5, SKXPort6]>; 62 63def SKXDivider : ProcResource<1>; // Integer division issued on port 0. 64// FP division and sqrt on port 0. 65def SKXFPDivider : ProcResource<1>; 66 67// 60 Entry Unified Scheduler 68def SKXPortAny : ProcResGroup<[SKXPort0, SKXPort1, SKXPort2, SKXPort3, SKXPort4, 69 SKXPort5, SKXPort6, SKXPort7]> { 70 let BufferSize=60; 71} 72 73// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 74// cycles after the memory operand. 75def : ReadAdvance<ReadAfterLd, 5>; 76 77// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available 78// until 5/6/7 cycles after the memory operand. 79def : ReadAdvance<ReadAfterVecLd, 5>; 80def : ReadAdvance<ReadAfterVecXLd, 6>; 81def : ReadAdvance<ReadAfterVecYLd, 7>; 82 83def : ReadAdvance<ReadInt2Fpu, 0>; 84 85// Many SchedWrites are defined in pairs with and without a folded load. 86// Instructions with folded loads are usually micro-fused, so they only appear 87// as two micro-ops when queued in the reservation station. 88// This multiclass defines the resource usage for variants with and without 89// folded loads. 90multiclass SKXWriteResPair<X86FoldableSchedWrite SchedRW, 91 list<ProcResourceKind> ExePorts, 92 int Lat, list<int> Res = [1], int UOps = 1, 93 int LoadLat = 5> { 94 // Register variant is using a single cycle on ExePort. 95 def : WriteRes<SchedRW, ExePorts> { 96 let Latency = Lat; 97 let ResourceCycles = Res; 98 let NumMicroOps = UOps; 99 } 100 101 // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to 102 // the latency (default = 5). 103 def : WriteRes<SchedRW.Folded, !listconcat([SKXPort23], ExePorts)> { 104 let Latency = !add(Lat, LoadLat); 105 let ResourceCycles = !listconcat([1], Res); 106 let NumMicroOps = !add(UOps, 1); 107 } 108} 109 110// A folded store needs a cycle on port 4 for the store data, and an extra port 111// 2/3/7 cycle to recompute the address. 112def : WriteRes<WriteRMW, [SKXPort237,SKXPort4]>; 113 114// Arithmetic. 115defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op. 116defm : SKXWriteResPair<WriteADC, [SKXPort06], 1>; // Integer ALU + flags op. 117 118// Integer multiplication. 119defm : SKXWriteResPair<WriteIMul8, [SKXPort1], 3>; 120defm : SKXWriteResPair<WriteIMul16, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,2], 4>; 121defm : X86WriteRes<WriteIMul16Imm, [SKXPort1,SKXPort0156], 4, [1,1], 2>; 122defm : X86WriteRes<WriteIMul16ImmLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>; 123defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>; 124defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>; 125defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>; 126defm : SKXWriteResPair<WriteMULX32, [SKXPort1,SKXPort06,SKXPort0156], 3, [1,1,1], 3>; 127defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>; 128defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>; 129defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>; 130defm : SKXWriteResPair<WriteMULX64, [SKXPort1,SKXPort5], 3, [1,1], 2>; 131defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>; 132defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>; 133def SKXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; } 134def : WriteRes<WriteIMulHLd, []> { 135 let Latency = !add(SKXWriteIMulH.Latency, SkylakeServerModel.LoadLatency); 136} 137 138defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>; 139defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>; 140defm : X86WriteRes<WriteCMPXCHG,[SKXPort06, SKXPort0156], 5, [2,3], 5>; 141defm : X86WriteRes<WriteCMPXCHGRMW,[SKXPort23,SKXPort06,SKXPort0156,SKXPort237,SKXPort4], 8, [1,2,1,1,1], 6>; 142defm : X86WriteRes<WriteXCHG, [SKXPort0156], 2, [3], 3>; 143 144// TODO: Why isn't the SKXDivider used? 145defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; 146defm : X86WriteRes<WriteDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>; 147defm : X86WriteRes<WriteDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>; 148defm : X86WriteRes<WriteDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>; 149defm : X86WriteRes<WriteDiv16Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>; 150defm : X86WriteRes<WriteDiv32Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>; 151defm : X86WriteRes<WriteDiv64Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>; 152 153defm : X86WriteRes<WriteIDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1>; 154defm : X86WriteRes<WriteIDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>; 155defm : X86WriteRes<WriteIDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>; 156defm : X86WriteRes<WriteIDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>; 157defm : X86WriteRes<WriteIDiv8Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 158defm : X86WriteRes<WriteIDiv16Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 159defm : X86WriteRes<WriteIDiv32Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 160defm : X86WriteRes<WriteIDiv64Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 161 162defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>; 163 164def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads. 165 166defm : SKXWriteResPair<WriteCMOV, [SKXPort06], 1, [1], 1>; // Conditional move. 167defm : X86WriteRes<WriteFCMOV, [SKXPort1], 3, [1], 1>; // x87 conditional move. 168def : WriteRes<WriteSETCC, [SKXPort06]>; // Setcc. 169def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> { 170 let Latency = 2; 171 let NumMicroOps = 3; 172} 173defm : X86WriteRes<WriteLAHFSAHF, [SKXPort06], 1, [1], 1>; 174defm : X86WriteRes<WriteBitTest, [SKXPort06], 1, [1], 1>; 175defm : X86WriteRes<WriteBitTestImmLd, [SKXPort06,SKXPort23], 6, [1,1], 2>; 176defm : X86WriteRes<WriteBitTestRegLd, [SKXPort0156,SKXPort23], 6, [1,1], 2>; 177defm : X86WriteRes<WriteBitTestSet, [SKXPort06], 1, [1], 1>; 178defm : X86WriteRes<WriteBitTestSetImmLd, [SKXPort06,SKXPort23], 5, [1,1], 3>; 179defm : X86WriteRes<WriteBitTestSetRegLd, [SKXPort0156,SKXPort23], 5, [1,1], 2>; 180 181// Integer shifts and rotates. 182defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>; 183defm : SKXWriteResPair<WriteShiftCL, [SKXPort06], 3, [3], 3>; 184defm : SKXWriteResPair<WriteRotate, [SKXPort06], 1, [1], 1>; 185defm : SKXWriteResPair<WriteRotateCL, [SKXPort06], 3, [3], 3>; 186 187// SHLD/SHRD. 188defm : X86WriteRes<WriteSHDrri, [SKXPort1], 3, [1], 1>; 189defm : X86WriteRes<WriteSHDrrcl,[SKXPort1,SKXPort06,SKXPort0156], 6, [1, 2, 1], 4>; 190defm : X86WriteRes<WriteSHDmri, [SKXPort1,SKXPort23,SKXPort237,SKXPort0156], 9, [1, 1, 1, 1], 4>; 191defm : X86WriteRes<WriteSHDmrcl,[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156], 11, [1, 1, 1, 2, 1], 6>; 192 193// Bit counts. 194defm : SKXWriteResPair<WriteBSF, [SKXPort1], 3>; 195defm : SKXWriteResPair<WriteBSR, [SKXPort1], 3>; 196defm : SKXWriteResPair<WriteLZCNT, [SKXPort1], 3>; 197defm : SKXWriteResPair<WriteTZCNT, [SKXPort1], 3>; 198defm : SKXWriteResPair<WritePOPCNT, [SKXPort1], 3>; 199 200// BMI1 BEXTR/BLS, BMI2 BZHI 201defm : SKXWriteResPair<WriteBEXTR, [SKXPort06,SKXPort15], 2, [1,1], 2>; 202defm : SKXWriteResPair<WriteBLS, [SKXPort15], 1>; 203defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>; 204 205// Loads, stores, and moves, not folded with other operations. 206defm : X86WriteRes<WriteLoad, [SKXPort23], 5, [1], 1>; 207defm : X86WriteRes<WriteStore, [SKXPort237, SKXPort4], 1, [1,1], 1>; 208defm : X86WriteRes<WriteStoreNT, [SKXPort237, SKXPort4], 1, [1,1], 2>; 209defm : X86WriteRes<WriteMove, [SKXPort0156], 1, [1], 1>; 210 211// Model the effect of clobbering the read-write mask operand of the GATHER operation. 212// Does not cost anything by itself, only has latency, matching that of the WriteLoad, 213defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>; 214 215// Idioms that clear a register, like xorps %xmm0, %xmm0. 216// These can often bypass execution ports completely. 217def : WriteRes<WriteZero, []>; 218 219// Branches don't produce values, so they have no latency, but they still 220// consume resources. Indirect branches can fold loads. 221defm : SKXWriteResPair<WriteJump, [SKXPort06], 1>; 222 223// Floating point. This covers both scalar and vector operations. 224defm : X86WriteRes<WriteFLD0, [SKXPort05], 1, [1], 1>; 225defm : X86WriteRes<WriteFLD1, [SKXPort05], 1, [2], 2>; 226defm : X86WriteRes<WriteFLDC, [SKXPort05], 1, [2], 2>; 227defm : X86WriteRes<WriteFLoad, [SKXPort23], 5, [1], 1>; 228defm : X86WriteRes<WriteFLoadX, [SKXPort23], 6, [1], 1>; 229defm : X86WriteRes<WriteFLoadY, [SKXPort23], 7, [1], 1>; 230defm : X86WriteRes<WriteFMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>; 231defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>; 232defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 2>; 233defm : X86WriteRes<WriteFStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>; 234defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 235defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>; 236defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>; 237defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 238 239defm : X86WriteRes<WriteFMaskedStore32, [SKXPort237,SKXPort0], 2, [1,1], 2>; 240defm : X86WriteRes<WriteFMaskedStore32Y, [SKXPort237,SKXPort0], 2, [1,1], 2>; 241defm : X86WriteRes<WriteFMaskedStore64, [SKXPort237,SKXPort0], 2, [1,1], 2>; 242defm : X86WriteRes<WriteFMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>; 243 244defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>; 245defm : X86WriteRes<WriteFMoveX, [SKXPort015], 1, [1], 1>; 246defm : X86WriteRes<WriteFMoveY, [SKXPort015], 1, [1], 1>; 247defm : X86WriteRes<WriteFMoveZ, [SKXPort05], 1, [1], 1>; 248defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>; 249 250defm : SKXWriteResPair<WriteFAdd, [SKXPort01], 4, [1], 1, 5>; // Floating point add/sub. 251defm : SKXWriteResPair<WriteFAddX, [SKXPort01], 4, [1], 1, 6>; 252defm : SKXWriteResPair<WriteFAddY, [SKXPort01], 4, [1], 1, 7>; 253defm : SKXWriteResPair<WriteFAddZ, [SKXPort05], 4, [1], 1, 7>; 254defm : SKXWriteResPair<WriteFAdd64, [SKXPort01], 4, [1], 1, 5>; // Floating point double add/sub. 255defm : SKXWriteResPair<WriteFAdd64X, [SKXPort01], 4, [1], 1, 6>; 256defm : SKXWriteResPair<WriteFAdd64Y, [SKXPort01], 4, [1], 1, 7>; 257defm : SKXWriteResPair<WriteFAdd64Z, [SKXPort05], 4, [1], 1, 7>; 258 259defm : SKXWriteResPair<WriteFCmp, [SKXPort01], 4, [1], 1, 5>; // Floating point compare. 260defm : SKXWriteResPair<WriteFCmpX, [SKXPort01], 4, [1], 1, 6>; 261defm : SKXWriteResPair<WriteFCmpY, [SKXPort01], 4, [1], 1, 7>; 262defm : SKXWriteResPair<WriteFCmpZ, [SKXPort05], 4, [1], 1, 7>; 263defm : SKXWriteResPair<WriteFCmp64, [SKXPort01], 4, [1], 1, 5>; // Floating point double compare. 264defm : SKXWriteResPair<WriteFCmp64X, [SKXPort01], 4, [1], 1, 6>; 265defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort01], 4, [1], 1, 7>; 266defm : SKXWriteResPair<WriteFCmp64Z, [SKXPort05], 4, [1], 1, 7>; 267 268defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags (X87). 269defm : SKXWriteResPair<WriteFComX, [SKXPort0], 2>; // Floating point compare to flags (SSE). 270 271defm : SKXWriteResPair<WriteFMul, [SKXPort01], 4, [1], 1, 5>; // Floating point multiplication. 272defm : SKXWriteResPair<WriteFMulX, [SKXPort01], 4, [1], 1, 6>; 273defm : SKXWriteResPair<WriteFMulY, [SKXPort01], 4, [1], 1, 7>; 274defm : SKXWriteResPair<WriteFMulZ, [SKXPort05], 4, [1], 1, 7>; 275defm : SKXWriteResPair<WriteFMul64, [SKXPort01], 4, [1], 1, 5>; // Floating point double multiplication. 276defm : SKXWriteResPair<WriteFMul64X, [SKXPort01], 4, [1], 1, 6>; 277defm : SKXWriteResPair<WriteFMul64Y, [SKXPort01], 4, [1], 1, 7>; 278defm : SKXWriteResPair<WriteFMul64Z, [SKXPort05], 4, [1], 1, 7>; 279 280defm : SKXWriteResPair<WriteFDiv, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division. 281//defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. 282defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles. 283defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles. 284//defm : SKXWriteResPair<WriteFDiv64, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 5>; // 10-14 cycles. // Floating point division. 285//defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles. 286//defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles. 287defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles. 288 289defm : SKXWriteResPair<WriteFSqrt, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root. 290defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>; 291defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0,SKXFPDivider], 12, [1,6], 1, 7>; 292defm : SKXWriteResPair<WriteFSqrtZ, [SKXPort0,SKXPort5,SKXFPDivider], 20, [2,1,12], 3, 7>; 293defm : SKXWriteResPair<WriteFSqrt64, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root. 294defm : SKXWriteResPair<WriteFSqrt64X, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 6>; 295defm : SKXWriteResPair<WriteFSqrt64Y, [SKXPort0,SKXFPDivider], 18, [1,12],1, 7>; 296defm : SKXWriteResPair<WriteFSqrt64Z, [SKXPort0,SKXPort5,SKXFPDivider], 32, [2,1,24], 3, 7>; 297defm : SKXWriteResPair<WriteFSqrt80, [SKXPort0,SKXFPDivider], 21, [1,7]>; // Floating point long double square root. 298 299defm : SKXWriteResPair<WriteFRcp, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate. 300defm : SKXWriteResPair<WriteFRcpX, [SKXPort0], 4, [1], 1, 6>; 301defm : SKXWriteResPair<WriteFRcpY, [SKXPort0], 4, [1], 1, 7>; 302defm : SKXWriteResPair<WriteFRcpZ, [SKXPort0,SKXPort5], 4, [2,1], 3, 7>; 303 304defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate. 305defm : SKXWriteResPair<WriteFRsqrtX,[SKXPort0], 4, [1], 1, 6>; 306defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>; 307defm : SKXWriteResPair<WriteFRsqrtZ,[SKXPort0,SKXPort5], 9, [2,1], 3, 7>; 308 309defm : SKXWriteResPair<WriteFMA, [SKXPort01], 4, [1], 1, 5>; // Fused Multiply Add. 310defm : SKXWriteResPair<WriteFMAX, [SKXPort01], 4, [1], 1, 6>; 311defm : SKXWriteResPair<WriteFMAY, [SKXPort01], 4, [1], 1, 7>; 312defm : SKXWriteResPair<WriteFMAZ, [SKXPort05], 4, [1], 1, 7>; 313defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Floating point double dot product. 314defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>; 315defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>; 316defm : SKXWriteResPair<WriteDPPSZ,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>; 317defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs. 318defm : SKXWriteResPair<WriteFRnd, [SKXPort01], 8, [2], 2, 6>; // Floating point rounding. 319defm : SKXWriteResPair<WriteFRndY, [SKXPort01], 8, [2], 2, 7>; 320defm : SKXWriteResPair<WriteFRndZ, [SKXPort05], 8, [2], 2, 7>; 321defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals. 322defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; 323defm : SKXWriteResPair<WriteFLogicZ, [SKXPort05], 1, [1], 1, 7>; 324defm : SKXWriteResPair<WriteFTest, [SKXPort0], 2, [1], 1, 6>; // Floating point TEST instructions. 325defm : SKXWriteResPair<WriteFTestY, [SKXPort0], 2, [1], 1, 7>; 326defm : SKXWriteResPair<WriteFTestZ, [SKXPort0], 2, [1], 1, 7>; 327defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles. 328defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>; 329defm : SKXWriteResPair<WriteFShuffleZ, [SKXPort5], 1, [1], 1, 7>; 330defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles. 331defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>; 332defm : SKXWriteResPair<WriteFVarShuffleZ, [SKXPort5], 1, [1], 1, 7>; 333defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends. 334defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>; 335defm : SKXWriteResPair<WriteFBlendZ,[SKXPort015], 1, [1], 1, 7>; 336defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends. 337defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>; 338defm : SKXWriteResPair<WriteFVarBlendZ,[SKXPort015], 2, [2], 2, 7>; 339 340// FMA Scheduling helper class. 341// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } 342 343// Vector integer operations. 344defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>; 345defm : X86WriteRes<WriteVecLoadX, [SKXPort23], 6, [1], 1>; 346defm : X86WriteRes<WriteVecLoadY, [SKXPort23], 7, [1], 1>; 347defm : X86WriteRes<WriteVecLoadNT, [SKXPort23], 6, [1], 1>; 348defm : X86WriteRes<WriteVecLoadNTY, [SKXPort23], 7, [1], 1>; 349defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>; 350defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>; 351defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 2>; 352defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>; 353defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 354defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>; 355defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 356defm : X86WriteRes<WriteVecMaskedStore32, [SKXPort237,SKXPort0], 2, [1,1], 2>; 357defm : X86WriteRes<WriteVecMaskedStore32Y, [SKXPort237,SKXPort0], 2, [1,1], 2>; 358defm : X86WriteRes<WriteVecMaskedStore64, [SKXPort237,SKXPort0], 2, [1,1], 2>; 359defm : X86WriteRes<WriteVecMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>; 360defm : X86WriteRes<WriteVecMove, [SKXPort05], 1, [1], 1>; 361defm : X86WriteRes<WriteVecMoveX, [SKXPort015], 1, [1], 1>; 362defm : X86WriteRes<WriteVecMoveY, [SKXPort015], 1, [1], 1>; 363defm : X86WriteRes<WriteVecMoveZ, [SKXPort05], 1, [1], 1>; 364defm : X86WriteRes<WriteVecMoveToGpr, [SKXPort0], 2, [1], 1>; 365defm : X86WriteRes<WriteVecMoveFromGpr, [SKXPort5], 1, [1], 1>; 366 367defm : SKXWriteResPair<WriteVecALU, [SKXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. 368defm : SKXWriteResPair<WriteVecALUX, [SKXPort01], 1, [1], 1, 6>; 369defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; 370defm : SKXWriteResPair<WriteVecALUZ, [SKXPort0], 1, [1], 1, 7>; 371defm : SKXWriteResPair<WriteVecLogic, [SKXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor. 372defm : SKXWriteResPair<WriteVecLogicX,[SKXPort015], 1, [1], 1, 6>; 373defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; 374defm : SKXWriteResPair<WriteVecLogicZ,[SKXPort05], 1, [1], 1, 7>; 375defm : SKXWriteResPair<WriteVecTest, [SKXPort0,SKXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions. 376defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; 377defm : SKXWriteResPair<WriteVecTestZ, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; 378defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5, [1], 1, 5>; // Vector integer multiply. 379defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 5, [1], 1, 6>; 380defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 5, [1], 1, 7>; 381defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 5, [1], 1, 7>; 382defm : SKXWriteResPair<WritePMULLD, [SKXPort01], 10, [2], 2, 6>; // Vector PMULLD. 383defm : SKXWriteResPair<WritePMULLDY, [SKXPort01], 10, [2], 2, 7>; 384defm : SKXWriteResPair<WritePMULLDZ, [SKXPort05], 10, [2], 2, 7>; 385defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles. 386defm : SKXWriteResPair<WriteShuffleX, [SKXPort5], 1, [1], 1, 6>; 387defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; 388defm : SKXWriteResPair<WriteShuffleZ, [SKXPort5], 1, [1], 1, 7>; 389defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector variable shuffles. 390defm : SKXWriteResPair<WriteVarShuffleX, [SKXPort5], 1, [1], 1, 6>; 391defm : SKXWriteResPair<WriteVarShuffleY, [SKXPort5], 1, [1], 1, 7>; 392defm : SKXWriteResPair<WriteVarShuffleZ, [SKXPort5], 1, [1], 1, 7>; 393defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends. 394defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; 395defm : SKXWriteResPair<WriteBlendZ,[SKXPort5], 1, [1], 1, 7>; 396defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends. 397defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; 398defm : SKXWriteResPair<WriteVarBlendZ,[SKXPort05], 2, [1], 1, 6>; 399defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD. 400defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>; 401defm : SKXWriteResPair<WriteMPSADZ, [SKXPort5], 4, [2], 2, 7>; 402defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 5>; // Vector PSADBW. 403defm : SKXWriteResPair<WritePSADBWX, [SKXPort5], 3, [1], 1, 6>; 404defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; 405defm : SKXWriteResPair<WritePSADBWZ, [SKXPort5], 3, [1], 1, 7>; 406defm : SKXWriteResPair<WritePHMINPOS, [SKXPort0], 4, [1], 1, 6>; // Vector PHMINPOS. 407 408// Vector integer shifts. 409defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1, [1], 1, 5>; 410defm : X86WriteRes<WriteVecShiftX, [SKXPort5,SKXPort01], 2, [1,1], 2>; 411defm : X86WriteRes<WriteVecShiftY, [SKXPort5,SKXPort01], 4, [1,1], 2>; 412defm : X86WriteRes<WriteVecShiftZ, [SKXPort5,SKXPort0], 4, [1,1], 2>; 413defm : X86WriteRes<WriteVecShiftXLd, [SKXPort01,SKXPort23], 7, [1,1], 2>; 414defm : X86WriteRes<WriteVecShiftYLd, [SKXPort01,SKXPort23], 8, [1,1], 2>; 415defm : X86WriteRes<WriteVecShiftZLd, [SKXPort0,SKXPort23], 8, [1,1], 2>; 416 417defm : SKXWriteResPair<WriteVecShiftImm, [SKXPort0], 1, [1], 1, 5>; 418defm : SKXWriteResPair<WriteVecShiftImmX, [SKXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts. 419defm : SKXWriteResPair<WriteVecShiftImmY, [SKXPort01], 1, [1], 1, 7>; 420defm : SKXWriteResPair<WriteVecShiftImmZ, [SKXPort0], 1, [1], 1, 7>; 421defm : SKXWriteResPair<WriteVarVecShift, [SKXPort01], 1, [1], 1, 6>; // Variable vector shifts. 422defm : SKXWriteResPair<WriteVarVecShiftY, [SKXPort01], 1, [1], 1, 7>; 423defm : SKXWriteResPair<WriteVarVecShiftZ, [SKXPort0], 1, [1], 1, 7>; 424 425// Vector insert/extract operations. 426def : WriteRes<WriteVecInsert, [SKXPort5]> { 427 let Latency = 2; 428 let NumMicroOps = 2; 429 let ResourceCycles = [2]; 430} 431def : WriteRes<WriteVecInsertLd, [SKXPort5,SKXPort23]> { 432 let Latency = 6; 433 let NumMicroOps = 2; 434} 435def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>; 436 437def : WriteRes<WriteVecExtract, [SKXPort0,SKXPort5]> { 438 let Latency = 3; 439 let NumMicroOps = 2; 440} 441def : WriteRes<WriteVecExtractSt, [SKXPort4,SKXPort5,SKXPort237]> { 442 let Latency = 2; 443 let NumMicroOps = 3; 444} 445 446// Conversion between integer and float. 447defm : SKXWriteResPair<WriteCvtSS2I, [SKXPort01], 6, [2], 2>; // Needs more work: DD vs DQ. 448defm : SKXWriteResPair<WriteCvtPS2I, [SKXPort01], 3>; 449defm : SKXWriteResPair<WriteCvtPS2IY, [SKXPort01], 3>; 450defm : SKXWriteResPair<WriteCvtPS2IZ, [SKXPort05], 3>; 451defm : SKXWriteResPair<WriteCvtSD2I, [SKXPort01], 6, [2], 2>; 452defm : SKXWriteResPair<WriteCvtPD2I, [SKXPort01], 3>; 453defm : SKXWriteResPair<WriteCvtPD2IY, [SKXPort01], 3>; 454defm : SKXWriteResPair<WriteCvtPD2IZ, [SKXPort05], 3>; 455 456defm : SKXWriteResPair<WriteCvtI2SS, [SKXPort1], 4>; 457defm : SKXWriteResPair<WriteCvtI2PS, [SKXPort01], 4>; 458defm : SKXWriteResPair<WriteCvtI2PSY, [SKXPort01], 4>; 459defm : SKXWriteResPair<WriteCvtI2PSZ, [SKXPort05], 4>; // Needs more work: DD vs DQ. 460defm : SKXWriteResPair<WriteCvtI2SD, [SKXPort1], 4>; 461defm : SKXWriteResPair<WriteCvtI2PD, [SKXPort01], 4>; 462defm : SKXWriteResPair<WriteCvtI2PDY, [SKXPort01], 4>; 463defm : SKXWriteResPair<WriteCvtI2PDZ, [SKXPort05], 4>; 464 465defm : SKXWriteResPair<WriteCvtSS2SD, [SKXPort1], 3>; 466defm : SKXWriteResPair<WriteCvtPS2PD, [SKXPort1], 3>; 467defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort5,SKXPort01], 3, [1,1], 2>; 468defm : SKXWriteResPair<WriteCvtPS2PDZ, [SKXPort05], 3, [2], 2>; 469defm : SKXWriteResPair<WriteCvtSD2SS, [SKXPort1], 3>; 470defm : SKXWriteResPair<WriteCvtPD2PS, [SKXPort1], 3>; 471defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort5,SKXPort01], 3, [1,1], 2>; 472defm : SKXWriteResPair<WriteCvtPD2PSZ, [SKXPort05], 3, [2], 2>; 473 474defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort01], 5, [1,1], 2>; 475defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort01], 7, [1,1], 2>; 476defm : X86WriteRes<WriteCvtPH2PSZ, [SKXPort5,SKXPort0], 7, [1,1], 2>; 477defm : X86WriteRes<WriteCvtPH2PSLd, [SKXPort23,SKXPort01], 9, [1,1], 2>; 478defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort01], 10, [1,1], 2>; 479defm : X86WriteRes<WriteCvtPH2PSZLd, [SKXPort23,SKXPort05], 10, [1,1], 2>; 480 481defm : X86WriteRes<WriteCvtPS2PH, [SKXPort5,SKXPort01], 5, [1,1], 2>; 482defm : X86WriteRes<WriteCvtPS2PHY, [SKXPort5,SKXPort01], 7, [1,1], 2>; 483defm : X86WriteRes<WriteCvtPS2PHZ, [SKXPort5,SKXPort05], 7, [1,1], 2>; 484defm : X86WriteRes<WriteCvtPS2PHSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 6, [1,1,1,1], 4>; 485defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 8, [1,1,1,1], 4>; 486defm : X86WriteRes<WriteCvtPS2PHZSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort05], 8, [1,1,1,1], 4>; 487 488// Strings instructions. 489 490// Packed Compare Implicit Length Strings, Return Mask 491def : WriteRes<WritePCmpIStrM, [SKXPort0]> { 492 let Latency = 10; 493 let NumMicroOps = 3; 494 let ResourceCycles = [3]; 495} 496def : WriteRes<WritePCmpIStrMLd, [SKXPort0, SKXPort23]> { 497 let Latency = 16; 498 let NumMicroOps = 4; 499 let ResourceCycles = [3,1]; 500} 501 502// Packed Compare Explicit Length Strings, Return Mask 503def : WriteRes<WritePCmpEStrM, [SKXPort0, SKXPort5, SKXPort015, SKXPort0156]> { 504 let Latency = 19; 505 let NumMicroOps = 9; 506 let ResourceCycles = [4,3,1,1]; 507} 508def : WriteRes<WritePCmpEStrMLd, [SKXPort0, SKXPort5, SKXPort23, SKXPort015, SKXPort0156]> { 509 let Latency = 25; 510 let NumMicroOps = 10; 511 let ResourceCycles = [4,3,1,1,1]; 512} 513 514// Packed Compare Implicit Length Strings, Return Index 515def : WriteRes<WritePCmpIStrI, [SKXPort0]> { 516 let Latency = 10; 517 let NumMicroOps = 3; 518 let ResourceCycles = [3]; 519} 520def : WriteRes<WritePCmpIStrILd, [SKXPort0, SKXPort23]> { 521 let Latency = 16; 522 let NumMicroOps = 4; 523 let ResourceCycles = [3,1]; 524} 525 526// Packed Compare Explicit Length Strings, Return Index 527def : WriteRes<WritePCmpEStrI, [SKXPort0,SKXPort5,SKXPort0156]> { 528 let Latency = 18; 529 let NumMicroOps = 8; 530 let ResourceCycles = [4,3,1]; 531} 532def : WriteRes<WritePCmpEStrILd, [SKXPort0, SKXPort5, SKXPort23, SKXPort0156]> { 533 let Latency = 24; 534 let NumMicroOps = 9; 535 let ResourceCycles = [4,3,1,1]; 536} 537 538// MOVMSK Instructions. 539def : WriteRes<WriteFMOVMSK, [SKXPort0]> { let Latency = 2; } 540def : WriteRes<WriteVecMOVMSK, [SKXPort0]> { let Latency = 2; } 541def : WriteRes<WriteVecMOVMSKY, [SKXPort0]> { let Latency = 2; } 542def : WriteRes<WriteMMXMOVMSK, [SKXPort0]> { let Latency = 2; } 543 544// AES instructions. 545def : WriteRes<WriteAESDecEnc, [SKXPort0]> { // Decryption, encryption. 546 let Latency = 4; 547 let NumMicroOps = 1; 548 let ResourceCycles = [1]; 549} 550def : WriteRes<WriteAESDecEncLd, [SKXPort0, SKXPort23]> { 551 let Latency = 10; 552 let NumMicroOps = 2; 553 let ResourceCycles = [1,1]; 554} 555 556def : WriteRes<WriteAESIMC, [SKXPort0]> { // InvMixColumn. 557 let Latency = 8; 558 let NumMicroOps = 2; 559 let ResourceCycles = [2]; 560} 561def : WriteRes<WriteAESIMCLd, [SKXPort0, SKXPort23]> { 562 let Latency = 14; 563 let NumMicroOps = 3; 564 let ResourceCycles = [2,1]; 565} 566 567def : WriteRes<WriteAESKeyGen, [SKXPort0,SKXPort5,SKXPort015]> { // Key Generation. 568 let Latency = 20; 569 let NumMicroOps = 11; 570 let ResourceCycles = [3,6,2]; 571} 572def : WriteRes<WriteAESKeyGenLd, [SKXPort0,SKXPort5,SKXPort23,SKXPort015]> { 573 let Latency = 25; 574 let NumMicroOps = 11; 575 let ResourceCycles = [3,6,1,1]; 576} 577 578// Carry-less multiplication instructions. 579def : WriteRes<WriteCLMul, [SKXPort5]> { 580 let Latency = 6; 581 let NumMicroOps = 1; 582 let ResourceCycles = [1]; 583} 584def : WriteRes<WriteCLMulLd, [SKXPort5, SKXPort23]> { 585 let Latency = 12; 586 let NumMicroOps = 2; 587 let ResourceCycles = [1,1]; 588} 589 590// Catch-all for expensive system instructions. 591def : WriteRes<WriteSystem, [SKXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite; 592 593// AVX2. 594defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles. 595defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles. 596defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles. 597defm : SKXWriteResPair<WriteVPMOV256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width packed vector width-changing move. 598defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles. 599 600// Old microcoded instructions that nobody use. 601def : WriteRes<WriteMicrocoded, [SKXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite; 602 603// Fence instructions. 604def : WriteRes<WriteFence, [SKXPort23, SKXPort4]>; 605 606// Load/store MXCSR. 607def : WriteRes<WriteLDMXCSR, [SKXPort0,SKXPort23,SKXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } 608def : WriteRes<WriteSTMXCSR, [SKXPort4,SKXPort5,SKXPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } 609 610// Nop, not very useful expect it provides a model for nops! 611def : WriteRes<WriteNop, []>; 612 613//////////////////////////////////////////////////////////////////////////////// 614// Horizontal add/sub instructions. 615//////////////////////////////////////////////////////////////////////////////// 616 617defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort015], 6, [2,1], 3, 6>; 618defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort015], 6, [2,1], 3, 7>; 619defm : SKXWriteResPair<WritePHAdd, [SKXPort5,SKXPort05], 3, [2,1], 3, 5>; 620defm : SKXWriteResPair<WritePHAddX, [SKXPort5,SKXPort015], 3, [2,1], 3, 6>; 621defm : SKXWriteResPair<WritePHAddY, [SKXPort5,SKXPort015], 3, [2,1], 3, 7>; 622 623// Remaining instrs. 624 625def SKXWriteResGroup1 : SchedWriteRes<[SKXPort0]> { 626 let Latency = 1; 627 let NumMicroOps = 1; 628 let ResourceCycles = [1]; 629} 630def: InstRW<[SKXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr", 631 "KANDN(B|D|Q|W)rr", 632 "KMOV(B|D|Q|W)kk", 633 "KNOT(B|D|Q|W)rr", 634 "KOR(B|D|Q|W)rr", 635 "KXNOR(B|D|Q|W)rr", 636 "KXOR(B|D|Q|W)rr", 637 "KSET0(B|D|Q|W)", // Same as KXOR 638 "KSET1(B|D|Q|W)", // Same as KXNOR 639 "MMX_PADDS(B|W)rr", 640 "MMX_PADDUS(B|W)rr", 641 "MMX_PAVG(B|W)rr", 642 "MMX_PCMPEQ(B|D|W)rr", 643 "MMX_PCMPGT(B|D|W)rr", 644 "MMX_P(MAX|MIN)SWrr", 645 "MMX_P(MAX|MIN)UBrr", 646 "MMX_PSUBS(B|W)rr", 647 "MMX_PSUBUS(B|W)rr", 648 "VPMOVB2M(Z|Z128|Z256)rr", 649 "VPMOVD2M(Z|Z128|Z256)rr", 650 "VPMOVQ2M(Z|Z128|Z256)rr", 651 "VPMOVW2M(Z|Z128|Z256)rr")>; 652 653def SKXWriteResGroup3 : SchedWriteRes<[SKXPort5]> { 654 let Latency = 1; 655 let NumMicroOps = 1; 656 let ResourceCycles = [1]; 657} 658def: InstRW<[SKXWriteResGroup3], (instregex "COM(P?)_FST0r", 659 "KMOV(B|D|Q|W)kr", 660 "UCOM_F(P?)r")>; 661 662def SKXWriteResGroup4 : SchedWriteRes<[SKXPort6]> { 663 let Latency = 1; 664 let NumMicroOps = 1; 665 let ResourceCycles = [1]; 666} 667def: InstRW<[SKXWriteResGroup4], (instregex "JMP(16|32|64)r")>; 668 669def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> { 670 let Latency = 1; 671 let NumMicroOps = 1; 672 let ResourceCycles = [1]; 673} 674def: InstRW<[SKXWriteResGroup6], (instrs FINCSTP, FNOP)>; 675 676def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> { 677 let Latency = 1; 678 let NumMicroOps = 1; 679 let ResourceCycles = [1]; 680} 681def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>; 682 683def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> { 684 let Latency = 1; 685 let NumMicroOps = 1; 686 let ResourceCycles = [1]; 687} 688def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr")>; 689 690def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> { 691 let Latency = 1; 692 let NumMicroOps = 1; 693 let ResourceCycles = [1]; 694} 695def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr", 696 "VBLENDMPS(Z128|Z256)rr", 697 "VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr", 698 "(V?)PADD(B|D|Q|W)rr", 699 "VPBLENDD(Y?)rri", 700 "VPBLENDMB(Z128|Z256)rr", 701 "VPBLENDMD(Z128|Z256)rr", 702 "VPBLENDMQ(Z128|Z256)rr", 703 "VPBLENDMW(Z128|Z256)rr", 704 "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk", 705 "VPTERNLOGD(Z|Z128|Z256)rri", 706 "VPTERNLOGQ(Z|Z128|Z256)rri")>; 707 708def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> { 709 let Latency = 1; 710 let NumMicroOps = 1; 711 let ResourceCycles = [1]; 712} 713def: InstRW<[SKXWriteResGroup10], (instrs CBW, CWDE, CDQE, 714 CMC, STC, 715 SGDT64m, 716 SIDT64m, 717 SMSW16m, 718 STRm, 719 SYSCALL)>; 720 721def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> { 722 let Latency = 1; 723 let NumMicroOps = 2; 724 let ResourceCycles = [1,1]; 725} 726def: InstRW<[SKXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>; 727def: InstRW<[SKXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk", 728 "ST_FP(32|64|80)m")>; 729 730def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> { 731 let Latency = 2; 732 let NumMicroOps = 2; 733 let ResourceCycles = [2]; 734} 735def: InstRW<[SKXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>; 736 737def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> { 738 let Latency = 2; 739 let NumMicroOps = 2; 740 let ResourceCycles = [2]; 741} 742def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP, 743 MMX_MOVDQ2Qrr)>; 744 745def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> { 746 let Latency = 2; 747 let NumMicroOps = 2; 748 let ResourceCycles = [2]; 749} 750def: InstRW<[SKXWriteResGroup17], (instrs LFENCE, 751 WAIT, 752 XGETBV)>; 753 754def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 755 let Latency = 2; 756 let NumMicroOps = 2; 757 let ResourceCycles = [1,1]; 758} 759def: InstRW<[SKXWriteResGroup20], (instregex "CLFLUSH")>; 760 761def SKXWriteResGroup21 : SchedWriteRes<[SKXPort237,SKXPort0156]> { 762 let Latency = 2; 763 let NumMicroOps = 2; 764 let ResourceCycles = [1,1]; 765} 766def: InstRW<[SKXWriteResGroup21], (instrs SFENCE)>; 767 768def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 769 let Latency = 2; 770 let NumMicroOps = 2; 771 let ResourceCycles = [1,1]; 772} 773def: InstRW<[SKXWriteResGroup23], (instrs CWD, 774 JCXZ, JECXZ, JRCXZ, 775 ADC8i8, SBB8i8, 776 ADC16i16, SBB16i16, 777 ADC32i32, SBB32i32, 778 ADC64i32, SBB64i32)>; 779 780def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> { 781 let Latency = 2; 782 let NumMicroOps = 3; 783 let ResourceCycles = [1,1,1]; 784} 785def: InstRW<[SKXWriteResGroup25], (instrs FNSTCW16m)>; 786 787def SKXWriteResGroup27 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { 788 let Latency = 2; 789 let NumMicroOps = 3; 790 let ResourceCycles = [1,1,1]; 791} 792def: InstRW<[SKXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>; 793 794def SKXWriteResGroup28 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { 795 let Latency = 2; 796 let NumMicroOps = 3; 797 let ResourceCycles = [1,1,1]; 798} 799def: InstRW<[SKXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8, 800 STOSB, STOSL, STOSQ, STOSW)>; 801def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>; 802 803def SKXWriteResGroup29 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { 804 let Latency = 2; 805 let NumMicroOps = 5; 806 let ResourceCycles = [2,2,1]; 807} 808def: InstRW<[SKXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)")>; 809 810def SKXWriteResGroup30 : SchedWriteRes<[SKXPort0]> { 811 let Latency = 3; 812 let NumMicroOps = 1; 813 let ResourceCycles = [1]; 814} 815def: InstRW<[SKXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk", 816 "KORTEST(B|D|Q|W)rr", 817 "KTEST(B|D|Q|W)rr")>; 818 819def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> { 820 let Latency = 3; 821 let NumMicroOps = 1; 822 let ResourceCycles = [1]; 823} 824def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr", 825 "PEXT(32|64)rr")>; 826 827def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> { 828 let Latency = 3; 829 let NumMicroOps = 1; 830 let ResourceCycles = [1]; 831} 832def: InstRW<[SKXWriteResGroup32], (instrs VPSADBWZrr)>; // TODO: 512-bit ops require ports 0/1 to be joined. 833def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)", 834 "VALIGND(Z|Z128|Z256)rri", 835 "VALIGNQ(Z|Z128|Z256)rri", 836 "VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined. 837 "VPBROADCAST(B|W)rr", 838 "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>; 839 840def SKXWriteResGroup33 : SchedWriteRes<[SKXPort5]> { 841 let Latency = 4; 842 let NumMicroOps = 1; 843 let ResourceCycles = [1]; 844} 845def: InstRW<[SKXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr", 846 "KSHIFTL(B|D|Q|W)ri", 847 "KSHIFTR(B|D|Q|W)ri", 848 "KUNPCK(BW|DQ|WD)rr", 849 "VCMPPD(Z|Z128|Z256)rri", 850 "VCMPPS(Z|Z128|Z256)rri", 851 "VCMP(SD|SS)Zrr", 852 "VFPCLASS(PD|PS)(Z|Z128|Z256)rr", 853 "VFPCLASS(SD|SS)Zrr", 854 "VPCMPB(Z|Z128|Z256)rri", 855 "VPCMPD(Z|Z128|Z256)rri", 856 "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr", 857 "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr", 858 "VPCMPQ(Z|Z128|Z256)rri", 859 "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri", 860 "VPCMPW(Z|Z128|Z256)rri", 861 "VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>; 862 863def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> { 864 let Latency = 3; 865 let NumMicroOps = 2; 866 let ResourceCycles = [1,1]; 867} 868def: InstRW<[SKXWriteResGroup34], (instrs FNSTSW16r)>; 869 870def SKXWriteResGroup37 : SchedWriteRes<[SKXPort0,SKXPort5]> { 871 let Latency = 3; 872 let NumMicroOps = 3; 873 let ResourceCycles = [1,2]; 874} 875def: InstRW<[SKXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>; 876 877def SKXWriteResGroup38 : SchedWriteRes<[SKXPort5,SKXPort01]> { 878 let Latency = 3; 879 let NumMicroOps = 3; 880 let ResourceCycles = [2,1]; 881} 882def: InstRW<[SKXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>; 883 884def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> { 885 let Latency = 3; 886 let NumMicroOps = 3; 887 let ResourceCycles = [2,1]; 888} 889def: InstRW<[SKXWriteResGroup41], (instrs MMX_PACKSSDWrr, 890 MMX_PACKSSWBrr, 891 MMX_PACKUSWBrr)>; 892 893def SKXWriteResGroup42 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 894 let Latency = 3; 895 let NumMicroOps = 3; 896 let ResourceCycles = [1,2]; 897} 898def: InstRW<[SKXWriteResGroup42], (instregex "CLD")>; 899 900def SKXWriteResGroup43 : SchedWriteRes<[SKXPort237,SKXPort0156]> { 901 let Latency = 3; 902 let NumMicroOps = 3; 903 let ResourceCycles = [1,2]; 904} 905def: InstRW<[SKXWriteResGroup43], (instrs MFENCE)>; 906 907def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 908 let Latency = 3; 909 let NumMicroOps = 3; 910 let ResourceCycles = [1,2]; 911} 912def: InstRW<[SKXWriteResGroup44], (instregex "RCL(8|16|32|64)r(1|i)", 913 "RCR(8|16|32|64)r(1|i)")>; 914 915def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> { 916 let Latency = 3; 917 let NumMicroOps = 3; 918 let ResourceCycles = [1,1,1]; 919} 920def: InstRW<[SKXWriteResGroup45], (instrs FNSTSWm)>; 921 922def SKXWriteResGroup47 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237,SKXPort0156]> { 923 let Latency = 3; 924 let NumMicroOps = 4; 925 let ResourceCycles = [1,1,1,1]; 926} 927def: InstRW<[SKXWriteResGroup47], (instregex "CALL(16|32|64)r")>; 928 929def SKXWriteResGroup48 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06,SKXPort0156]> { 930 let Latency = 3; 931 let NumMicroOps = 4; 932 let ResourceCycles = [1,1,1,1]; 933} 934def: InstRW<[SKXWriteResGroup48], (instrs CALL64pcrel32)>; 935 936def SKXWriteResGroup49 : SchedWriteRes<[SKXPort0]> { 937 let Latency = 4; 938 let NumMicroOps = 1; 939 let ResourceCycles = [1]; 940} 941def: InstRW<[SKXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>; 942 943def SKXWriteResGroup50 : SchedWriteRes<[SKXPort01]> { 944 let Latency = 4; 945 let NumMicroOps = 1; 946 let ResourceCycles = [1]; 947} 948def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PS(Y|Z128|Z256)rr", 949 "(V?)CVTDQ2PSrr", 950 "VCVTPD2QQ(Z128|Z256)rr", 951 "VCVTPD2UQQ(Z128|Z256)rr", 952 "VCVTPS2DQ(Y|Z128|Z256)rr", 953 "(V?)CVTPS2DQrr", 954 "VCVTPS2UDQ(Z128|Z256)rr", 955 "VCVTQQ2PD(Z128|Z256)rr", 956 "VCVTTPD2QQ(Z128|Z256)rr", 957 "VCVTTPD2UQQ(Z128|Z256)rr", 958 "VCVTTPS2DQ(Z128|Z256)rr", 959 "(V?)CVTTPS2DQrr", 960 "VCVTTPS2UDQ(Z128|Z256)rr", 961 "VCVTUDQ2PS(Z128|Z256)rr", 962 "VCVTUQQ2PD(Z128|Z256)rr")>; 963 964def SKXWriteResGroup50z : SchedWriteRes<[SKXPort05]> { 965 let Latency = 4; 966 let NumMicroOps = 1; 967 let ResourceCycles = [1]; 968} 969def: InstRW<[SKXWriteResGroup50z], (instrs VCVTDQ2PSZrr, 970 VCVTPD2QQZrr, 971 VCVTPD2UQQZrr, 972 VCVTPS2DQZrr, 973 VCVTPS2UDQZrr, 974 VCVTQQ2PDZrr, 975 VCVTTPD2QQZrr, 976 VCVTTPD2UQQZrr, 977 VCVTTPS2DQZrr, 978 VCVTTPS2UDQZrr, 979 VCVTUDQ2PSZrr, 980 VCVTUQQ2PDZrr)>; 981 982def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> { 983 let Latency = 4; 984 let NumMicroOps = 2; 985 let ResourceCycles = [2]; 986} 987def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr", 988 "VEXPANDPS(Z|Z128|Z256)rr", 989 "VPEXPANDD(Z|Z128|Z256)rr", 990 "VPEXPANDQ(Z|Z128|Z256)rr", 991 "VPMOVDB(Z|Z128|Z256)rr", 992 "VPMOVDW(Z|Z128|Z256)rr", 993 "VPMOVQB(Z|Z128|Z256)rr", 994 "VPMOVQW(Z|Z128|Z256)rr", 995 "VPMOVSDB(Z|Z128|Z256)rr", 996 "VPMOVSDW(Z|Z128|Z256)rr", 997 "VPMOVSQB(Z|Z128|Z256)rr", 998 "VPMOVSQD(Z|Z128|Z256)rr", 999 "VPMOVSQW(Z|Z128|Z256)rr", 1000 "VPMOVSWB(Z|Z128|Z256)rr", 1001 "VPMOVUSDB(Z|Z128|Z256)rr", 1002 "VPMOVUSDW(Z|Z128|Z256)rr", 1003 "VPMOVUSQB(Z|Z128|Z256)rr", 1004 "VPMOVUSQD(Z|Z128|Z256)rr", 1005 "VPMOVUSWB(Z|Z128|Z256)rr", 1006 "VPMOVWB(Z|Z128|Z256)rr")>; 1007 1008def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { 1009 let Latency = 4; 1010 let NumMicroOps = 3; 1011 let ResourceCycles = [1,1,1]; 1012} 1013def: InstRW<[SKXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m", 1014 "IST_F(16|32)m", 1015 "VPMOVQD(Z|Z128|Z256)mr(b?)")>; 1016 1017def SKXWriteResGroup55 : SchedWriteRes<[SKXPort0156]> { 1018 let Latency = 4; 1019 let NumMicroOps = 4; 1020 let ResourceCycles = [4]; 1021} 1022def: InstRW<[SKXWriteResGroup55], (instrs FNCLEX)>; 1023 1024def SKXWriteResGroup56 : SchedWriteRes<[]> { 1025 let Latency = 0; 1026 let NumMicroOps = 4; 1027 let ResourceCycles = []; 1028} 1029def: InstRW<[SKXWriteResGroup56], (instrs VZEROUPPER)>; 1030 1031def SKXWriteResGroup57 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort0156]> { 1032 let Latency = 4; 1033 let NumMicroOps = 4; 1034 let ResourceCycles = [1,1,2]; 1035} 1036def: InstRW<[SKXWriteResGroup57], (instregex "LAR(16|32|64)rr")>; 1037 1038def SKXWriteResGroup58 : SchedWriteRes<[SKXPort23]> { 1039 let Latency = 5; 1040 let NumMicroOps = 1; 1041 let ResourceCycles = [1]; 1042} 1043def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)", 1044 "MOVZX(16|32|64)rm(8|16)", 1045 "(V?)MOVDDUPrm")>; // TODO: Should this be SKXWriteResGroup71? 1046 1047def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> { 1048 let Latency = 5; 1049 let NumMicroOps = 2; 1050 let ResourceCycles = [1,1]; 1051} 1052def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr", 1053 "MMX_CVT(T?)PS2PIrr", 1054 "VCVTDQ2PDZ128rr", 1055 "VCVTPD2DQZ128rr", 1056 "(V?)CVT(T?)PD2DQrr", 1057 "VCVTPD2PSZ128rr", 1058 "(V?)CVTPD2PSrr", 1059 "VCVTPD2UDQZ128rr", 1060 "VCVTPS2PDZ128rr", 1061 "(V?)CVTPS2PDrr", 1062 "VCVTPS2QQZ128rr", 1063 "VCVTPS2UQQZ128rr", 1064 "VCVTQQ2PSZ128rr", 1065 "(V?)CVTSD2SS(Z?)rr", 1066 "(V?)CVTSI(64)?2SDrr", 1067 "VCVTSI2SSZrr", 1068 "(V?)CVTSI2SSrr", 1069 "VCVTSI(64)?2SDZrr", 1070 "VCVTSS2SDZrr", 1071 "(V?)CVTSS2SDrr", 1072 "VCVTTPD2DQZ128rr", 1073 "VCVTTPD2UDQZ128rr", 1074 "VCVTTPS2QQZ128rr", 1075 "VCVTTPS2UQQZ128rr", 1076 "VCVTUDQ2PDZ128rr", 1077 "VCVTUQQ2PSZ128rr", 1078 "VCVTUSI2SSZrr", 1079 "VCVTUSI(64)?2SDZrr")>; 1080 1081def SKXWriteResGroup62 : SchedWriteRes<[SKXPort5,SKXPort015]> { 1082 let Latency = 5; 1083 let NumMicroOps = 3; 1084 let ResourceCycles = [2,1]; 1085} 1086def: InstRW<[SKXWriteResGroup62], (instregex "VPCONFLICTQZ128rr")>; 1087 1088def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> { 1089 let Latency = 5; 1090 let NumMicroOps = 3; 1091 let ResourceCycles = [1,1,1]; 1092} 1093def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>; 1094 1095def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort015]> { 1096 let Latency = 5; 1097 let NumMicroOps = 3; 1098 let ResourceCycles = [1,1,1]; 1099} 1100def: InstRW<[SKXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)", 1101 "VCVTPS2PHZ256mr(b?)", 1102 "VCVTPS2PHZmr(b?)")>; 1103 1104def SKXWriteResGroup66 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { 1105 let Latency = 5; 1106 let NumMicroOps = 4; 1107 let ResourceCycles = [1,2,1]; 1108} 1109def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)", 1110 "VPMOVDW(Z|Z128|Z256)mr(b?)", 1111 "VPMOVQB(Z|Z128|Z256)mr(b?)", 1112 "VPMOVQW(Z|Z128|Z256)mr(b?)", 1113 "VPMOVSDB(Z|Z128|Z256)mr(b?)", 1114 "VPMOVSDW(Z|Z128|Z256)mr(b?)", 1115 "VPMOVSQB(Z|Z128|Z256)mr(b?)", 1116 "VPMOVSQD(Z|Z128|Z256)mr(b?)", 1117 "VPMOVSQW(Z|Z128|Z256)mr(b?)", 1118 "VPMOVSWB(Z|Z128|Z256)mr(b?)", 1119 "VPMOVUSDB(Z|Z128|Z256)mr(b?)", 1120 "VPMOVUSDW(Z|Z128|Z256)mr(b?)", 1121 "VPMOVUSQB(Z|Z128|Z256)mr(b?)", 1122 "VPMOVUSQD(Z|Z128|Z256)mr(b?)", 1123 "VPMOVUSQW(Z|Z128|Z256)mr(b?)", 1124 "VPMOVUSWB(Z|Z128|Z256)mr(b?)", 1125 "VPMOVWB(Z|Z128|Z256)mr(b?)")>; 1126 1127def SKXWriteResGroup67 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 1128 let Latency = 5; 1129 let NumMicroOps = 5; 1130 let ResourceCycles = [1,4]; 1131} 1132def: InstRW<[SKXWriteResGroup67], (instrs XSETBV)>; 1133 1134def SKXWriteResGroup69 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { 1135 let Latency = 5; 1136 let NumMicroOps = 6; 1137 let ResourceCycles = [1,1,4]; 1138} 1139def: InstRW<[SKXWriteResGroup69], (instregex "PUSHF(16|64)")>; 1140 1141def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> { 1142 let Latency = 6; 1143 let NumMicroOps = 1; 1144 let ResourceCycles = [1]; 1145} 1146def: InstRW<[SKXWriteResGroup71], (instrs VBROADCASTSSrm, 1147 VPBROADCASTDrm, 1148 VPBROADCASTQrm, 1149 VMOVSHDUPrm, 1150 VMOVSLDUPrm, 1151 MOVSHDUPrm, 1152 MOVSLDUPrm)>; 1153 1154def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> { 1155 let Latency = 6; 1156 let NumMicroOps = 2; 1157 let ResourceCycles = [2]; 1158} 1159def: InstRW<[SKXWriteResGroup72], (instrs MMX_CVTPI2PSrr)>; 1160def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr", 1161 "VCOMPRESSPS(Z|Z128|Z256)rr", 1162 "VPCOMPRESSD(Z|Z128|Z256)rr", 1163 "VPCOMPRESSQ(Z|Z128|Z256)rr", 1164 "VPERMW(Z|Z128|Z256)rr")>; 1165 1166def SKXWriteResGroup73 : SchedWriteRes<[SKXPort0,SKXPort23]> { 1167 let Latency = 6; 1168 let NumMicroOps = 2; 1169 let ResourceCycles = [1,1]; 1170} 1171def: InstRW<[SKXWriteResGroup73], (instrs MMX_PADDSBrm, 1172 MMX_PADDSWrm, 1173 MMX_PADDUSBrm, 1174 MMX_PADDUSWrm, 1175 MMX_PAVGBrm, 1176 MMX_PAVGWrm, 1177 MMX_PCMPEQBrm, 1178 MMX_PCMPEQDrm, 1179 MMX_PCMPEQWrm, 1180 MMX_PCMPGTBrm, 1181 MMX_PCMPGTDrm, 1182 MMX_PCMPGTWrm, 1183 MMX_PMAXSWrm, 1184 MMX_PMAXUBrm, 1185 MMX_PMINSWrm, 1186 MMX_PMINUBrm, 1187 MMX_PSUBSBrm, 1188 MMX_PSUBSWrm, 1189 MMX_PSUBUSBrm, 1190 MMX_PSUBUSWrm)>; 1191 1192def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> { 1193 let Latency = 6; 1194 let NumMicroOps = 2; 1195 let ResourceCycles = [1,1]; 1196} 1197def: InstRW<[SKXWriteResGroup76], (instrs FARJMP64m)>; 1198def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>; 1199 1200def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> { 1201 let Latency = 6; 1202 let NumMicroOps = 2; 1203 let ResourceCycles = [1,1]; 1204} 1205def: InstRW<[SKXWriteResGroup79], (instregex "ANDN(32|64)rm", 1206 "MOVBE(16|32|64)rm")>; 1207 1208def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1209 let Latency = 6; 1210 let NumMicroOps = 2; 1211 let ResourceCycles = [1,1]; 1212} 1213def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>; 1214def: InstRW<[SKXWriteResGroup80], (instrs VMOVDI2PDIZrm)>; 1215 1216def SKXWriteResGroup81 : SchedWriteRes<[SKXPort23,SKXPort0156]> { 1217 let Latency = 6; 1218 let NumMicroOps = 2; 1219 let ResourceCycles = [1,1]; 1220} 1221def: InstRW<[SKXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>; 1222def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)rmr")>; 1223 1224def SKXWriteResGroup82 : SchedWriteRes<[SKXPort5,SKXPort015]> { 1225 let Latency = 6; 1226 let NumMicroOps = 3; 1227 let ResourceCycles = [2,1]; 1228} 1229def: InstRW<[SKXWriteResGroup82], (instregex "(V?)CVTSI642SSrr", 1230 "VCVTSI642SSZrr", 1231 "VCVTUSI642SSZrr")>; 1232 1233def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]> { 1234 let Latency = 6; 1235 let NumMicroOps = 4; 1236 let ResourceCycles = [1,1,1,1]; 1237} 1238def: InstRW<[SKXWriteResGroup84], (instregex "SLDT(16|32|64)r")>; 1239 1240def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { 1241 let Latency = 6; 1242 let NumMicroOps = 4; 1243 let ResourceCycles = [1,1,1,1]; 1244} 1245def: InstRW<[SKXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)", 1246 "SHL(8|16|32|64)m(1|i)", 1247 "SHR(8|16|32|64)m(1|i)")>; 1248 1249def SKXWriteResGroup87 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { 1250 let Latency = 6; 1251 let NumMicroOps = 4; 1252 let ResourceCycles = [1,1,1,1]; 1253} 1254def: InstRW<[SKXWriteResGroup87], (instregex "POP(16|32|64)rmm", 1255 "PUSH(16|32|64)rmm")>; 1256 1257def SKXWriteResGroup88 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 1258 let Latency = 6; 1259 let NumMicroOps = 6; 1260 let ResourceCycles = [1,5]; 1261} 1262def: InstRW<[SKXWriteResGroup88], (instrs STD)>; 1263 1264def SKXWriteResGroup89 : SchedWriteRes<[SKXPort23]> { 1265 let Latency = 7; 1266 let NumMicroOps = 1; 1267 let ResourceCycles = [1]; 1268} 1269def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m")>; 1270def: InstRW<[SKXWriteResGroup89], (instrs VBROADCASTF128, 1271 VBROADCASTI128, 1272 VBROADCASTSDYrm, 1273 VBROADCASTSSYrm, 1274 VMOVDDUPYrm, 1275 VMOVSHDUPYrm, 1276 VMOVSLDUPYrm, 1277 VPBROADCASTDYrm, 1278 VPBROADCASTQYrm)>; 1279 1280def SKXWriteResGroup90 : SchedWriteRes<[SKXPort01,SKXPort5]> { 1281 let Latency = 7; 1282 let NumMicroOps = 2; 1283 let ResourceCycles = [1,1]; 1284} 1285def: InstRW<[SKXWriteResGroup90], (instrs VCVTDQ2PDYrr)>; 1286 1287def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1288 let Latency = 7; 1289 let NumMicroOps = 2; 1290 let ResourceCycles = [1,1]; 1291} 1292def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)", 1293 "VMOVSSZrm(b?)")>; 1294 1295def SKXWriteResGroup92a : SchedWriteRes<[SKXPort5,SKXPort23]> { 1296 let Latency = 6; 1297 let NumMicroOps = 2; 1298 let ResourceCycles = [1,1]; 1299} 1300def: InstRW<[SKXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm", 1301 "(V?)PMOV(SX|ZX)BQrm", 1302 "(V?)PMOV(SX|ZX)BWrm", 1303 "(V?)PMOV(SX|ZX)DQrm", 1304 "(V?)PMOV(SX|ZX)WDrm", 1305 "(V?)PMOV(SX|ZX)WQrm")>; 1306 1307def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> { 1308 let Latency = 7; 1309 let NumMicroOps = 2; 1310 let ResourceCycles = [1,1]; 1311} 1312def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr", 1313 "VCVTPD2DQ(Y|Z256)rr", 1314 "VCVTPD2PS(Y|Z256)rr", 1315 "VCVTPD2UDQZ256rr", 1316 "VCVTPS2PD(Y|Z256)rr", 1317 "VCVTPS2QQZ256rr", 1318 "VCVTPS2UQQZ256rr", 1319 "VCVTQQ2PSZ256rr", 1320 "VCVTTPD2DQ(Y|Z256)rr", 1321 "VCVTTPD2UDQZ256rr", 1322 "VCVTTPS2QQZ256rr", 1323 "VCVTTPS2UQQZ256rr", 1324 "VCVTUDQ2PDZ256rr", 1325 "VCVTUQQ2PSZ256rr")>; 1326 1327def SKXWriteResGroup93z : SchedWriteRes<[SKXPort5,SKXPort05]> { 1328 let Latency = 7; 1329 let NumMicroOps = 2; 1330 let ResourceCycles = [1,1]; 1331} 1332def: InstRW<[SKXWriteResGroup93z], (instrs VCVTDQ2PDZrr, 1333 VCVTPD2DQZrr, 1334 VCVTPD2PSZrr, 1335 VCVTPD2UDQZrr, 1336 VCVTPS2PDZrr, 1337 VCVTPS2QQZrr, 1338 VCVTPS2UQQZrr, 1339 VCVTQQ2PSZrr, 1340 VCVTTPD2DQZrr, 1341 VCVTTPD2UDQZrr, 1342 VCVTTPS2QQZrr, 1343 VCVTTPS2UQQZrr, 1344 VCVTUDQ2PDZrr, 1345 VCVTUQQ2PSZrr)>; 1346 1347def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1348 let Latency = 7; 1349 let NumMicroOps = 2; 1350 let ResourceCycles = [1,1]; 1351} 1352def: InstRW<[SKXWriteResGroup95], (instrs VMOVNTDQAZ128rm, 1353 VPBLENDDrmi)>; 1354def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd], 1355 (instregex "VBLENDMPDZ128rm(b?)", 1356 "VBLENDMPSZ128rm(b?)", 1357 "VBROADCASTI32X2Z128rm(b?)", 1358 "VBROADCASTSSZ128rm(b?)", 1359 "VINSERT(F|I)128rm", 1360 "VMOVAPDZ128rm(b?)", 1361 "VMOVAPSZ128rm(b?)", 1362 "VMOVDDUPZ128rm(b?)", 1363 "VMOVDQA32Z128rm(b?)", 1364 "VMOVDQA64Z128rm(b?)", 1365 "VMOVDQU16Z128rm(b?)", 1366 "VMOVDQU32Z128rm(b?)", 1367 "VMOVDQU64Z128rm(b?)", 1368 "VMOVDQU8Z128rm(b?)", 1369 "VMOVSHDUPZ128rm(b?)", 1370 "VMOVSLDUPZ128rm(b?)", 1371 "VMOVUPDZ128rm(b?)", 1372 "VMOVUPSZ128rm(b?)", 1373 "VPADD(B|D|Q|W)Z128rm(b?)", 1374 "(V?)PADD(B|D|Q|W)rm", 1375 "VPBLENDM(B|D|Q|W)Z128rm(b?)", 1376 "VPBROADCASTDZ128rm(b?)", 1377 "VPBROADCASTQZ128rm(b?)", 1378 "VPSUB(B|D|Q|W)Z128rm(b?)", 1379 "(V?)PSUB(B|D|Q|W)rm", 1380 "VPTERNLOGDZ128rm(b?)i", 1381 "VPTERNLOGQZ128rm(b?)i")>; 1382 1383def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1384 let Latency = 7; 1385 let NumMicroOps = 3; 1386 let ResourceCycles = [2,1]; 1387} 1388def: InstRW<[SKXWriteResGroup96], (instrs MMX_PACKSSDWrm, 1389 MMX_PACKSSWBrm, 1390 MMX_PACKUSWBrm)>; 1391 1392def SKXWriteResGroup97 : SchedWriteRes<[SKXPort5,SKXPort015]> { 1393 let Latency = 7; 1394 let NumMicroOps = 3; 1395 let ResourceCycles = [2,1]; 1396} 1397def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2W128rr", 1398 "VPERMI2W256rr", 1399 "VPERMI2Wrr", 1400 "VPERMT2W128rr", 1401 "VPERMT2W256rr", 1402 "VPERMT2Wrr")>; 1403 1404def SKXWriteResGroup99 : SchedWriteRes<[SKXPort23,SKXPort0156]> { 1405 let Latency = 7; 1406 let NumMicroOps = 3; 1407 let ResourceCycles = [1,2]; 1408} 1409def: InstRW<[SKXWriteResGroup99], (instrs LEAVE, LEAVE64, 1410 SCASB, SCASL, SCASQ, SCASW)>; 1411 1412def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort015]> { 1413 let Latency = 7; 1414 let NumMicroOps = 3; 1415 let ResourceCycles = [1,1,1]; 1416} 1417def: InstRW<[SKXWriteResGroup100], (instregex "VCVTSS2USI64Zrr", 1418 "(V?)CVTSS2SI64(Z?)rr", 1419 "(V?)CVTTSS2SI64(Z?)rr", 1420 "VCVTTSS2USI64Zrr")>; 1421 1422def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> { 1423 let Latency = 7; 1424 let NumMicroOps = 3; 1425 let ResourceCycles = [1,1,1]; 1426} 1427def: InstRW<[SKXWriteResGroup101], (instrs FLDCW16m)>; 1428 1429def SKXWriteResGroup103 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort0156]> { 1430 let Latency = 7; 1431 let NumMicroOps = 3; 1432 let ResourceCycles = [1,1,1]; 1433} 1434def: InstRW<[SKXWriteResGroup103], (instregex "KMOV(B|D|Q|W)km")>; 1435 1436def SKXWriteResGroup104 : SchedWriteRes<[SKXPort6,SKXPort23,SKXPort0156]> { 1437 let Latency = 7; 1438 let NumMicroOps = 3; 1439 let ResourceCycles = [1,1,1]; 1440} 1441def: InstRW<[SKXWriteResGroup104], (instrs LRET64, RET64)>; 1442 1443def SKXWriteResGroup106 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { 1444 let Latency = 7; 1445 let NumMicroOps = 4; 1446 let ResourceCycles = [1,2,1]; 1447} 1448def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)", 1449 "VCOMPRESSPS(Z|Z128|Z256)mr(b?)", 1450 "VPCOMPRESSD(Z|Z128|Z256)mr(b?)", 1451 "VPCOMPRESSQ(Z|Z128|Z256)mr(b?)")>; 1452 1453def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { 1454 let Latency = 7; 1455 let NumMicroOps = 5; 1456 let ResourceCycles = [1,1,1,2]; 1457} 1458def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)", 1459 "ROR(8|16|32|64)m(1|i)")>; 1460 1461def SKXWriteResGroup107_1 : SchedWriteRes<[SKXPort06]> { 1462 let Latency = 2; 1463 let NumMicroOps = 2; 1464 let ResourceCycles = [2]; 1465} 1466def: InstRW<[SKXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1, 1467 ROR8r1, ROR16r1, ROR32r1, ROR64r1)>; 1468 1469def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { 1470 let Latency = 7; 1471 let NumMicroOps = 5; 1472 let ResourceCycles = [1,1,1,2]; 1473} 1474def: InstRW<[SKXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>; 1475 1476def SKXWriteResGroup109 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { 1477 let Latency = 7; 1478 let NumMicroOps = 5; 1479 let ResourceCycles = [1,1,1,1,1]; 1480} 1481def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m")>; 1482def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64m)>; 1483 1484def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { 1485 let Latency = 7; 1486 let NumMicroOps = 7; 1487 let ResourceCycles = [1,2,2,2]; 1488} 1489def: InstRW<[SKXWriteResGroup110], (instrs VPSCATTERDQZ128mr, 1490 VPSCATTERQQZ128mr, 1491 VSCATTERDPDZ128mr, 1492 VSCATTERQPDZ128mr)>; 1493 1494def SKXWriteResGroup111 : SchedWriteRes<[SKXPort6,SKXPort06,SKXPort15,SKXPort0156]> { 1495 let Latency = 7; 1496 let NumMicroOps = 7; 1497 let ResourceCycles = [1,3,1,2]; 1498} 1499def: InstRW<[SKXWriteResGroup111], (instrs LOOP)>; 1500 1501def SKXWriteResGroup112 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { 1502 let Latency = 7; 1503 let NumMicroOps = 11; 1504 let ResourceCycles = [1,4,4,2]; 1505} 1506def: InstRW<[SKXWriteResGroup112], (instrs VPSCATTERDQZ256mr, 1507 VPSCATTERQQZ256mr, 1508 VSCATTERDPDZ256mr, 1509 VSCATTERQPDZ256mr)>; 1510 1511def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { 1512 let Latency = 7; 1513 let NumMicroOps = 19; 1514 let ResourceCycles = [1,8,8,2]; 1515} 1516def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr, 1517 VPSCATTERQQZmr, 1518 VSCATTERDPDZmr, 1519 VSCATTERQPDZmr)>; 1520 1521def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1522 let Latency = 7; 1523 let NumMicroOps = 36; 1524 let ResourceCycles = [1,16,1,16,2]; 1525} 1526def: InstRW<[SKXWriteResGroup114], (instrs VSCATTERDPSZmr)>; 1527 1528def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> { 1529 let Latency = 8; 1530 let NumMicroOps = 2; 1531 let ResourceCycles = [1,1]; 1532} 1533def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm", 1534 "PEXT(32|64)rm")>; 1535 1536def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1537 let Latency = 8; 1538 let NumMicroOps = 2; 1539 let ResourceCycles = [1,1]; 1540} 1541def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)(32|64)m", 1542 "VPBROADCASTB(Z|Z256)rm(b?)", 1543 "VPBROADCASTW(Z|Z256)rm(b?)")>; 1544def: InstRW<[SKXWriteResGroup119], (instrs VPBROADCASTBYrm, 1545 VPBROADCASTWYrm, 1546 VPMOVSXBDYrm, 1547 VPMOVSXBQYrm, 1548 VPMOVSXWQYrm)>; 1549 1550def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1551 let Latency = 8; 1552 let NumMicroOps = 2; 1553 let ResourceCycles = [1,1]; 1554} 1555def: InstRW<[SKXWriteResGroup121], (instrs VMOVNTDQAZ256rm, 1556 VPBLENDDYrmi)>; 1557def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd], 1558 (instregex "VBLENDMPD(Z|Z256)rm(b?)", 1559 "VBLENDMPS(Z|Z256)rm(b?)", 1560 "VBROADCASTF32X2Z256rm(b?)", 1561 "VBROADCASTF32X2Zrm(b?)", 1562 "VBROADCASTF32X4Z256rm(b?)", 1563 "VBROADCASTF32X4rm(b?)", 1564 "VBROADCASTF32X8rm(b?)", 1565 "VBROADCASTF64X2Z128rm(b?)", 1566 "VBROADCASTF64X2rm(b?)", 1567 "VBROADCASTF64X4rm(b?)", 1568 "VBROADCASTI32X2Z256rm(b?)", 1569 "VBROADCASTI32X2Zrm(b?)", 1570 "VBROADCASTI32X4Z256rm(b?)", 1571 "VBROADCASTI32X4rm(b?)", 1572 "VBROADCASTI32X8rm(b?)", 1573 "VBROADCASTI64X2Z128rm(b?)", 1574 "VBROADCASTI64X2rm(b?)", 1575 "VBROADCASTI64X4rm(b?)", 1576 "VBROADCASTSD(Z|Z256)rm(b?)", 1577 "VBROADCASTSS(Z|Z256)rm(b?)", 1578 "VINSERTF32x4(Z|Z256)rm(b?)", 1579 "VINSERTF32x8Zrm(b?)", 1580 "VINSERTF64x2(Z|Z256)rm(b?)", 1581 "VINSERTF64x4Zrm(b?)", 1582 "VINSERTI32x4(Z|Z256)rm(b?)", 1583 "VINSERTI32x8Zrm(b?)", 1584 "VINSERTI64x2(Z|Z256)rm(b?)", 1585 "VINSERTI64x4Zrm(b?)", 1586 "VMOVAPD(Z|Z256)rm(b?)", 1587 "VMOVAPS(Z|Z256)rm(b?)", 1588 "VMOVDDUP(Z|Z256)rm(b?)", 1589 "VMOVDQA32(Z|Z256)rm(b?)", 1590 "VMOVDQA64(Z|Z256)rm(b?)", 1591 "VMOVDQU16(Z|Z256)rm(b?)", 1592 "VMOVDQU32(Z|Z256)rm(b?)", 1593 "VMOVDQU64(Z|Z256)rm(b?)", 1594 "VMOVDQU8(Z|Z256)rm(b?)", 1595 "VMOVSHDUP(Z|Z256)rm(b?)", 1596 "VMOVSLDUP(Z|Z256)rm(b?)", 1597 "VMOVUPD(Z|Z256)rm(b?)", 1598 "VMOVUPS(Z|Z256)rm(b?)", 1599 "VPADD(B|D|Q|W)Yrm", 1600 "VPADD(B|D|Q|W)(Z|Z256)rm(b?)", 1601 "VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)", 1602 "VPBROADCASTD(Z|Z256)rm(b?)", 1603 "VPBROADCASTQ(Z|Z256)rm(b?)", 1604 "VPSUB(B|D|Q|W)Yrm", 1605 "VPSUB(B|D|Q|W)(Z|Z256)rm(b?)", 1606 "VPTERNLOGD(Z|Z256)rm(b?)i", 1607 "VPTERNLOGQ(Z|Z256)rm(b?)i")>; 1608 1609def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1610 let Latency = 8; 1611 let NumMicroOps = 4; 1612 let ResourceCycles = [1,2,1]; 1613} 1614def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>; 1615 1616def SKXWriteResGroup127 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 1617 let Latency = 8; 1618 let NumMicroOps = 5; 1619 let ResourceCycles = [1,1,1,2]; 1620} 1621def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)", 1622 "RCR(8|16|32|64)m(1|i)")>; 1623 1624def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { 1625 let Latency = 8; 1626 let NumMicroOps = 6; 1627 let ResourceCycles = [1,1,1,3]; 1628} 1629def: InstRW<[SKXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL", 1630 "ROR(8|16|32|64)mCL", 1631 "SAR(8|16|32|64)mCL", 1632 "SHL(8|16|32|64)mCL", 1633 "SHR(8|16|32|64)mCL")>; 1634 1635def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 1636 let Latency = 8; 1637 let NumMicroOps = 6; 1638 let ResourceCycles = [1,1,1,2,1]; 1639} 1640def: SchedAlias<WriteADCRMW, SKXWriteResGroup130>; 1641 1642def SKXWriteResGroup131 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1643 let Latency = 8; 1644 let NumMicroOps = 8; 1645 let ResourceCycles = [1,2,1,2,2]; 1646} 1647def: InstRW<[SKXWriteResGroup131], (instrs VPSCATTERQDZ128mr, 1648 VPSCATTERQDZ256mr, 1649 VSCATTERQPSZ128mr, 1650 VSCATTERQPSZ256mr)>; 1651 1652def SKXWriteResGroup132 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1653 let Latency = 8; 1654 let NumMicroOps = 12; 1655 let ResourceCycles = [1,4,1,4,2]; 1656} 1657def: InstRW<[SKXWriteResGroup132], (instrs VPSCATTERDDZ128mr, 1658 VSCATTERDPSZ128mr)>; 1659 1660def SKXWriteResGroup133 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1661 let Latency = 8; 1662 let NumMicroOps = 20; 1663 let ResourceCycles = [1,8,1,8,2]; 1664} 1665def: InstRW<[SKXWriteResGroup133], (instrs VPSCATTERDDZ256mr, 1666 VSCATTERDPSZ256mr)>; 1667 1668def SKXWriteResGroup134 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1669 let Latency = 8; 1670 let NumMicroOps = 36; 1671 let ResourceCycles = [1,16,1,16,2]; 1672} 1673def: InstRW<[SKXWriteResGroup134], (instrs VPSCATTERDDZmr)>; 1674 1675def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> { 1676 let Latency = 9; 1677 let NumMicroOps = 2; 1678 let ResourceCycles = [1,1]; 1679} 1680def: InstRW<[SKXWriteResGroup135], (instrs MMX_CVTPI2PSrm)>; 1681 1682def SKXWriteResGroup136 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1683 let Latency = 9; 1684 let NumMicroOps = 2; 1685 let ResourceCycles = [1,1]; 1686} 1687def: InstRW<[SKXWriteResGroup136], (instrs VPMOVSXBWYrm, 1688 VPMOVSXDQYrm, 1689 VPMOVSXWDYrm, 1690 VPMOVZXWDYrm)>; 1691def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i", 1692 "VFPCLASSSDZrm(b?)", 1693 "VFPCLASSSSZrm(b?)", 1694 "(V?)PCMPGTQrm", 1695 "VPERMI2D128rm(b?)", 1696 "VPERMI2PD128rm(b?)", 1697 "VPERMI2PS128rm(b?)", 1698 "VPERMI2Q128rm(b?)", 1699 "VPERMT2D128rm(b?)", 1700 "VPERMT2PD128rm(b?)", 1701 "VPERMT2PS128rm(b?)", 1702 "VPERMT2Q128rm(b?)", 1703 "VPMAXSQZ128rm(b?)", 1704 "VPMAXUQZ128rm(b?)", 1705 "VPMINSQZ128rm(b?)", 1706 "VPMINUQZ128rm(b?)")>; 1707 1708def SKXWriteResGroup136_2 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1709 let Latency = 10; 1710 let NumMicroOps = 2; 1711 let ResourceCycles = [1,1]; 1712} 1713def: InstRW<[SKXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i", 1714 "VCMP(SD|SS)Zrm", 1715 "VFPCLASSPDZ128rm(b?)", 1716 "VFPCLASSPSZ128rm(b?)", 1717 "VPCMPBZ128rmi(b?)", 1718 "VPCMPDZ128rmi(b?)", 1719 "VPCMPEQ(B|D|Q|W)Z128rm(b?)", 1720 "VPCMPGT(B|D|Q|W)Z128rm(b?)", 1721 "VPCMPQZ128rmi(b?)", 1722 "VPCMPU(B|D|Q|W)Z128rmi(b?)", 1723 "VPCMPWZ128rmi(b?)", 1724 "VPTESTMBZ128rm(b?)", 1725 "VPTESTMDZ128rm(b?)", 1726 "VPTESTMQZ128rm(b?)", 1727 "VPTESTMWZ128rm(b?)", 1728 "VPTESTNMBZ128rm(b?)", 1729 "VPTESTNMDZ128rm(b?)", 1730 "VPTESTNMQZ128rm(b?)", 1731 "VPTESTNMWZ128rm(b?)")>; 1732 1733def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1734 let Latency = 9; 1735 let NumMicroOps = 2; 1736 let ResourceCycles = [1,1]; 1737} 1738def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm", 1739 "(V?)CVTPS2PDrm")>; 1740 1741def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { 1742 let Latency = 9; 1743 let NumMicroOps = 4; 1744 let ResourceCycles = [2,1,1]; 1745} 1746def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm", 1747 "(V?)PHSUBSWrm")>; 1748 1749def SKXWriteResGroup146 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { 1750 let Latency = 9; 1751 let NumMicroOps = 5; 1752 let ResourceCycles = [1,2,1,1]; 1753} 1754def: InstRW<[SKXWriteResGroup146], (instregex "LAR(16|32|64)rm", 1755 "LSL(16|32|64)rm")>; 1756 1757def SKXWriteResGroup148 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1758 let Latency = 10; 1759 let NumMicroOps = 2; 1760 let ResourceCycles = [1,1]; 1761} 1762def: InstRW<[SKXWriteResGroup148], (instrs VPCMPGTQYrm)>; 1763def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m", 1764 "ILD_F(16|32|64)m", 1765 "VALIGND(Z|Z256)rm(b?)i", 1766 "VALIGNQ(Z|Z256)rm(b?)i", 1767 "VPMAXSQ(Z|Z256)rm(b?)", 1768 "VPMAXUQ(Z|Z256)rm(b?)", 1769 "VPMINSQ(Z|Z256)rm(b?)", 1770 "VPMINUQ(Z|Z256)rm(b?)")>; 1771 1772def SKXWriteResGroup148_2 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1773 let Latency = 11; 1774 let NumMicroOps = 2; 1775 let ResourceCycles = [1,1]; 1776} 1777def: InstRW<[SKXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i", 1778 "VCMPPS(Z|Z256)rm(b?)i", 1779 "VFPCLASSPD(Z|Z256)rm(b?)", 1780 "VFPCLASSPS(Z|Z256)rm(b?)", 1781 "VPCMPB(Z|Z256)rmi(b?)", 1782 "VPCMPD(Z|Z256)rmi(b?)", 1783 "VPCMPEQB(Z|Z256)rm(b?)", 1784 "VPCMPEQD(Z|Z256)rm(b?)", 1785 "VPCMPEQQ(Z|Z256)rm(b?)", 1786 "VPCMPEQW(Z|Z256)rm(b?)", 1787 "VPCMPGTB(Z|Z256)rm(b?)", 1788 "VPCMPGTD(Z|Z256)rm(b?)", 1789 "VPCMPGTQ(Z|Z256)rm(b?)", 1790 "VPCMPGTW(Z|Z256)rm(b?)", 1791 "VPCMPQ(Z|Z256)rmi(b?)", 1792 "VPCMPU(B|D|Q|W)Z256rmi(b?)", 1793 "VPCMPU(B|D|Q|W)Zrmi(b?)", 1794 "VPCMPW(Z|Z256)rmi(b?)", 1795 "VPTESTM(B|D|Q|W)Z256rm(b?)", 1796 "VPTESTM(B|D|Q|W)Zrm(b?)", 1797 "VPTESTNM(B|D|Q|W)Z256rm(b?)", 1798 "VPTESTNM(B|D|Q|W)Zrm(b?)")>; 1799 1800def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1801 let Latency = 10; 1802 let NumMicroOps = 2; 1803 let ResourceCycles = [1,1]; 1804} 1805def: InstRW<[SKXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)", 1806 "VCVTDQ2PSZ128rm(b?)", 1807 "(V?)CVTDQ2PSrm", 1808 "VCVTPD2QQZ128rm(b?)", 1809 "VCVTPD2UQQZ128rm(b?)", 1810 "VCVTPH2PSZ128rm(b?)", 1811 "VCVTPS2DQZ128rm(b?)", 1812 "(V?)CVTPS2DQrm", 1813 "VCVTPS2PDZ128rm(b?)", 1814 "VCVTPS2QQZ128rm(b?)", 1815 "VCVTPS2UDQZ128rm(b?)", 1816 "VCVTPS2UQQZ128rm(b?)", 1817 "VCVTQQ2PDZ128rm(b?)", 1818 "VCVTQQ2PSZ128rm(b?)", 1819 "VCVTSS2SDZrm", 1820 "(V?)CVTSS2SDrm", 1821 "VCVTTPD2QQZ128rm(b?)", 1822 "VCVTTPD2UQQZ128rm(b?)", 1823 "VCVTTPS2DQZ128rm(b?)", 1824 "(V?)CVTTPS2DQrm", 1825 "VCVTTPS2QQZ128rm(b?)", 1826 "VCVTTPS2UDQZ128rm(b?)", 1827 "VCVTTPS2UQQZ128rm(b?)", 1828 "VCVTUDQ2PDZ128rm(b?)", 1829 "VCVTUDQ2PSZ128rm(b?)", 1830 "VCVTUQQ2PDZ128rm(b?)", 1831 "VCVTUQQ2PSZ128rm(b?)")>; 1832 1833def SKXWriteResGroup151 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1834 let Latency = 10; 1835 let NumMicroOps = 3; 1836 let ResourceCycles = [2,1]; 1837} 1838def: InstRW<[SKXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)", 1839 "VEXPANDPSZ128rm(b?)", 1840 "VPEXPANDDZ128rm(b?)", 1841 "VPEXPANDQZ128rm(b?)")>; 1842 1843def SKXWriteResGroup153 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 1844 let Latency = 10; 1845 let NumMicroOps = 3; 1846 let ResourceCycles = [1,1,1]; 1847} 1848def: InstRW<[SKXWriteResGroup153], (instregex "(V?)CVTSD2SSrm")>; 1849 1850def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { 1851 let Latency = 10; 1852 let NumMicroOps = 4; 1853 let ResourceCycles = [2,1,1]; 1854} 1855def: InstRW<[SKXWriteResGroup154], (instrs VPHADDSWYrm, 1856 VPHSUBSWYrm)>; 1857 1858def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 1859 let Latency = 10; 1860 let NumMicroOps = 8; 1861 let ResourceCycles = [1,1,1,1,1,3]; 1862} 1863def: InstRW<[SKXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>; 1864 1865def SKXWriteResGroup159 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { 1866 let Latency = 11; 1867 let NumMicroOps = 1; 1868 let ResourceCycles = [1,3]; 1869} 1870def : SchedAlias<WriteFDivX, SKXWriteResGroup159>; // TODO - convert to ZnWriteResFpuPair 1871 1872def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> { 1873 let Latency = 11; 1874 let NumMicroOps = 2; 1875 let ResourceCycles = [1,1]; 1876} 1877def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F(32|64)m")>; 1878 1879def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1880 let Latency = 11; 1881 let NumMicroOps = 2; 1882 let ResourceCycles = [1,1]; 1883} 1884def: InstRW<[SKXWriteResGroup161], (instrs VCVTDQ2PSYrm, 1885 VCVTPS2PDYrm)>; 1886def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)", 1887 "VCVTPH2PS(Z|Z256)rm(b?)", 1888 "VCVTPS2PD(Z|Z256)rm(b?)", 1889 "VCVTQQ2PD(Z|Z256)rm(b?)", 1890 "VCVTQQ2PSZ256rm(b?)", 1891 "VCVT(T?)PD2QQ(Z|Z256)rm(b?)", 1892 "VCVT(T?)PD2UQQ(Z|Z256)rm(b?)", 1893 "VCVT(T?)PS2DQYrm", 1894 "VCVT(T?)PS2DQ(Z|Z256)rm(b?)", 1895 "VCVT(T?)PS2QQZ256rm(b?)", 1896 "VCVT(T?)PS2UDQ(Z|Z256)rm(b?)", 1897 "VCVT(T?)PS2UQQZ256rm(b?)", 1898 "VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)", 1899 "VCVTUQQ2PD(Z|Z256)rm(b?)", 1900 "VCVTUQQ2PSZ256rm(b?)")>; 1901 1902def SKXWriteResGroup162 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1903 let Latency = 11; 1904 let NumMicroOps = 3; 1905 let ResourceCycles = [2,1]; 1906} 1907def: InstRW<[SKXWriteResGroup162], (instregex "FICOM(P?)(16|32)m", 1908 "VEXPANDPD(Z|Z256)rm(b?)", 1909 "VEXPANDPS(Z|Z256)rm(b?)", 1910 "VPEXPANDD(Z|Z256)rm(b?)", 1911 "VPEXPANDQ(Z|Z256)rm(b?)")>; 1912 1913def SKXWriteResGroup163 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1914 let Latency = 11; 1915 let NumMicroOps = 3; 1916 let ResourceCycles = [1,2]; 1917} 1918def: InstRW<[SKXWriteResGroup163], (instregex "VCVTSD2SSZrm")>; 1919 1920def SKXWriteResGroup164 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1921 let Latency = 11; 1922 let NumMicroOps = 3; 1923 let ResourceCycles = [1,1,1]; 1924} 1925def: InstRW<[SKXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>; 1926 1927def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 1928 let Latency = 11; 1929 let NumMicroOps = 3; 1930 let ResourceCycles = [1,1,1]; 1931} 1932def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2PSrm, 1933 CVTPD2DQrm, 1934 CVTTPD2DQrm, 1935 MMX_CVTPD2PIrm, 1936 MMX_CVTTPD2PIrm)>; 1937 1938def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 1939 let Latency = 11; 1940 let NumMicroOps = 4; 1941 let ResourceCycles = [2,1,1]; 1942} 1943def: InstRW<[SKXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>; 1944 1945def SKXWriteResGroup169 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { 1946 let Latency = 11; 1947 let NumMicroOps = 7; 1948 let ResourceCycles = [2,3,2]; 1949} 1950def: InstRW<[SKXWriteResGroup169], (instregex "RCL(16|32|64)rCL", 1951 "RCR(16|32|64)rCL")>; 1952 1953def SKXWriteResGroup170 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> { 1954 let Latency = 11; 1955 let NumMicroOps = 9; 1956 let ResourceCycles = [1,5,1,2]; 1957} 1958def: InstRW<[SKXWriteResGroup170], (instrs RCL8rCL)>; 1959 1960def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 1961 let Latency = 11; 1962 let NumMicroOps = 11; 1963 let ResourceCycles = [2,9]; 1964} 1965def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>; 1966 1967def SKXWriteResGroup174 : SchedWriteRes<[SKXPort01]> { 1968 let Latency = 15; 1969 let NumMicroOps = 3; 1970 let ResourceCycles = [3]; 1971} 1972def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>; 1973 1974def SKXWriteResGroup174z : SchedWriteRes<[SKXPort05]> { 1975 let Latency = 15; 1976 let NumMicroOps = 3; 1977 let ResourceCycles = [3]; 1978} 1979def: InstRW<[SKXWriteResGroup174z], (instregex "VPMULLQZrr")>; 1980 1981def SKXWriteResGroup175 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1982 let Latency = 12; 1983 let NumMicroOps = 3; 1984 let ResourceCycles = [2,1]; 1985} 1986def: InstRW<[SKXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>; 1987 1988def SKXWriteResGroup176 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { 1989 let Latency = 12; 1990 let NumMicroOps = 3; 1991 let ResourceCycles = [1,1,1]; 1992} 1993def: InstRW<[SKXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)", 1994 "VCVT(T?)SS2USI64Zrm(b?)")>; 1995 1996def SKXWriteResGroup177 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 1997 let Latency = 12; 1998 let NumMicroOps = 3; 1999 let ResourceCycles = [1,1,1]; 2000} 2001def: InstRW<[SKXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)", 2002 "VCVT(T?)PS2UQQZrm(b?)")>; 2003 2004def SKXWriteResGroup179 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort015]> { 2005 let Latency = 12; 2006 let NumMicroOps = 4; 2007 let ResourceCycles = [1,1,1,1]; 2008} 2009def: InstRW<[SKXWriteResGroup179], (instregex "CVTTSS2SI64rm")>; 2010 2011def SKXWriteResGroup180 : SchedWriteRes<[SKXPort5,SKXPort23]> { 2012 let Latency = 13; 2013 let NumMicroOps = 3; 2014 let ResourceCycles = [2,1]; 2015} 2016def: InstRW<[SKXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m", 2017 "VPERMWZ256rm(b?)", 2018 "VPERMWZrm(b?)")>; 2019 2020def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 2021 let Latency = 13; 2022 let NumMicroOps = 3; 2023 let ResourceCycles = [1,1,1]; 2024} 2025def: InstRW<[SKXWriteResGroup181], (instrs VCVTDQ2PDYrm)>; 2026 2027def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 2028 let Latency = 13; 2029 let NumMicroOps = 4; 2030 let ResourceCycles = [2,1,1]; 2031} 2032def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2W128rm(b?)", 2033 "VPERMT2W128rm(b?)")>; 2034 2035def SKXWriteResGroup184 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { 2036 let Latency = 14; 2037 let NumMicroOps = 1; 2038 let ResourceCycles = [1,3]; 2039} 2040def : SchedAlias<WriteFDiv64, SKXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair 2041def : SchedAlias<WriteFDiv64X, SKXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair 2042 2043def SKXWriteResGroup184_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { 2044 let Latency = 14; 2045 let NumMicroOps = 1; 2046 let ResourceCycles = [1,5]; 2047} 2048def : SchedAlias<WriteFDiv64Y, SKXWriteResGroup184_1>; // TODO - convert to ZnWriteResFpuPair 2049 2050def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 2051 let Latency = 14; 2052 let NumMicroOps = 3; 2053 let ResourceCycles = [1,1,1]; 2054} 2055def: InstRW<[SKXWriteResGroup187], (instregex "MUL_FI(16|32)m")>; 2056 2057def SKXWriteResGroup188 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 2058 let Latency = 14; 2059 let NumMicroOps = 3; 2060 let ResourceCycles = [1,1,1]; 2061} 2062def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)", 2063 "VCVTPD2PSZrm(b?)", 2064 "VCVTPD2UDQZrm(b?)", 2065 "VCVTQQ2PSZrm(b?)", 2066 "VCVTTPD2DQZrm(b?)", 2067 "VCVTTPD2UDQZrm(b?)", 2068 "VCVTUQQ2PSZrm(b?)")>; 2069 2070def SKXWriteResGroup189 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 2071 let Latency = 14; 2072 let NumMicroOps = 4; 2073 let ResourceCycles = [2,1,1]; 2074} 2075def: InstRW<[SKXWriteResGroup189], (instregex "VPERMI2W256rm(b?)", 2076 "VPERMI2Wrm(b?)", 2077 "VPERMT2W256rm(b?)", 2078 "VPERMT2Wrm(b?)")>; 2079 2080def SKXWriteResGroup190 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> { 2081 let Latency = 14; 2082 let NumMicroOps = 10; 2083 let ResourceCycles = [2,4,1,3]; 2084} 2085def: InstRW<[SKXWriteResGroup190], (instrs RCR8rCL)>; 2086 2087def SKXWriteResGroup191 : SchedWriteRes<[SKXPort0]> { 2088 let Latency = 15; 2089 let NumMicroOps = 1; 2090 let ResourceCycles = [1]; 2091} 2092def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>; 2093 2094def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2095 let Latency = 15; 2096 let NumMicroOps = 8; 2097 let ResourceCycles = [1,2,2,1,2]; 2098} 2099def: InstRW<[SKXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>; 2100 2101def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { 2102 let Latency = 15; 2103 let NumMicroOps = 10; 2104 let ResourceCycles = [1,1,1,5,1,1]; 2105} 2106def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>; 2107 2108def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { 2109 let Latency = 16; 2110 let NumMicroOps = 14; 2111 let ResourceCycles = [1,1,1,4,2,5]; 2112} 2113def: InstRW<[SKXWriteResGroup199], (instrs CMPXCHG8B)>; 2114 2115def SKXWriteResGroup200 : SchedWriteRes<[SKXPort1, SKXPort05, SKXPort6]> { 2116 let Latency = 12; 2117 let NumMicroOps = 34; 2118 let ResourceCycles = [1, 4, 5]; 2119} 2120def: InstRW<[SKXWriteResGroup200], (instrs VZEROALL)>; 2121 2122def SKXWriteResGroup201 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { 2123 let Latency = 17; 2124 let NumMicroOps = 2; 2125 let ResourceCycles = [1,1,5]; 2126} 2127def : SchedAlias<WriteFDivXLd, SKXWriteResGroup201>; // TODO - convert to ZnWriteResFpuPair 2128 2129def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> { 2130 let Latency = 17; 2131 let NumMicroOps = 15; 2132 let ResourceCycles = [2,1,2,4,2,4]; 2133} 2134def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>; 2135 2136def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort01]> { 2137 let Latency = 21; 2138 let NumMicroOps = 4; 2139 let ResourceCycles = [1,3]; 2140} 2141def: InstRW<[SKXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)")>; 2142 2143def SKXWriteResGroup207 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort06,SKXPort0156]> { 2144 let Latency = 18; 2145 let NumMicroOps = 8; 2146 let ResourceCycles = [1,1,1,5]; 2147} 2148def: InstRW<[SKXWriteResGroup207], (instrs CPUID, RDTSC)>; 2149 2150def SKXWriteResGroup208 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { 2151 let Latency = 18; 2152 let NumMicroOps = 11; 2153 let ResourceCycles = [2,1,1,4,1,2]; 2154} 2155def: InstRW<[SKXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>; 2156 2157def SKXWriteResGroup209 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { 2158 let Latency = 19; 2159 let NumMicroOps = 2; 2160 let ResourceCycles = [1,1,4]; 2161} 2162def : SchedAlias<WriteFDiv64Ld, SKXWriteResGroup209>; // TODO - convert to ZnWriteResFpuPair 2163 2164def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort01]> { 2165 let Latency = 22; 2166 let NumMicroOps = 4; 2167 let ResourceCycles = [1,3]; 2168} 2169def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>; 2170 2171def SKXWriteResGroup211_1 : SchedWriteRes<[SKXPort23,SKXPort05]> { 2172 let Latency = 22; 2173 let NumMicroOps = 4; 2174 let ResourceCycles = [1,3]; 2175} 2176def: InstRW<[SKXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>; 2177 2178def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> { 2179 let Latency = 20; 2180 let NumMicroOps = 1; 2181 let ResourceCycles = [1]; 2182} 2183def: InstRW<[SKXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>; 2184 2185def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { 2186 let Latency = 20; 2187 let NumMicroOps = 2; 2188 let ResourceCycles = [1,1,4]; 2189} 2190def : SchedAlias<WriteFDiv64XLd, SKXWriteResGroup216>; // TODO - convert to ZnWriteResFpuPair 2191 2192def SKXWriteGatherEVEX2 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2193 let Latency = 17; 2194 let NumMicroOps = 5; // 2 uops perform multiple loads 2195 let ResourceCycles = [1,2,1,1]; 2196} 2197def: InstRW<[SKXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm, 2198 VGATHERDPDZ128rm, VPGATHERDQZ128rm, 2199 VGATHERQPDZ128rm, VPGATHERQQZ128rm)>; 2200 2201def SKXWriteGatherEVEX4 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2202 let Latency = 19; 2203 let NumMicroOps = 5; // 2 uops perform multiple loads 2204 let ResourceCycles = [1,4,1,1]; 2205} 2206def: InstRW<[SKXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm, 2207 VGATHERQPDZ256rm, VPGATHERQQZ256rm, 2208 VGATHERDPSZ128rm, VPGATHERDDZ128rm, 2209 VGATHERDPDZ256rm, VPGATHERDQZ256rm)>; 2210 2211def SKXWriteGatherEVEX8 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2212 let Latency = 21; 2213 let NumMicroOps = 5; // 2 uops perform multiple loads 2214 let ResourceCycles = [1,8,1,1]; 2215} 2216def: InstRW<[SKXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm, 2217 VGATHERDPDZrm, VPGATHERDQZrm, 2218 VGATHERQPDZrm, VPGATHERQQZrm, 2219 VGATHERQPSZrm, VPGATHERQDZrm)>; 2220 2221def SKXWriteGatherEVEX16 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2222 let Latency = 25; 2223 let NumMicroOps = 5; // 2 uops perform multiple loads 2224 let ResourceCycles = [1,16,1,1]; 2225} 2226def: InstRW<[SKXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>; 2227 2228def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 2229 let Latency = 20; 2230 let NumMicroOps = 8; 2231 let ResourceCycles = [1,1,1,1,1,1,2]; 2232} 2233def: InstRW<[SKXWriteResGroup219], (instrs INSB, INSL, INSW)>; 2234 2235def SKXWriteResGroup220 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort0156]> { 2236 let Latency = 20; 2237 let NumMicroOps = 10; 2238 let ResourceCycles = [1,2,7]; 2239} 2240def: InstRW<[SKXWriteResGroup220], (instrs MWAITrr)>; 2241 2242def SKXWriteResGroup222 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { 2243 let Latency = 21; 2244 let NumMicroOps = 2; 2245 let ResourceCycles = [1,1,8]; 2246} 2247def : SchedAlias<WriteFDiv64YLd, SKXWriteResGroup222>; // TODO - convert to ZnWriteResFpuPair 2248 2249def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> { 2250 let Latency = 22; 2251 let NumMicroOps = 2; 2252 let ResourceCycles = [1,1]; 2253} 2254def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F(32|64)m")>; 2255 2256def SKXWriteResGroupVEX2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { 2257 let Latency = 18; 2258 let NumMicroOps = 5; // 2 uops perform multiple loads 2259 let ResourceCycles = [1,2,1,1]; 2260} 2261def: InstRW<[SKXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm, 2262 VGATHERQPDrm, VPGATHERQQrm, 2263 VGATHERQPSrm, VPGATHERQDrm)>; 2264 2265def SKXWriteResGroupVEX4 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { 2266 let Latency = 20; 2267 let NumMicroOps = 5; // 2 uops peform multiple loads 2268 let ResourceCycles = [1,4,1,1]; 2269} 2270def: InstRW<[SKXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm, 2271 VGATHERDPSrm, VPGATHERDDrm, 2272 VGATHERQPDYrm, VPGATHERQQYrm, 2273 VGATHERQPSYrm, VPGATHERQDYrm)>; 2274 2275def SKXWriteResGroupVEX8 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { 2276 let Latency = 22; 2277 let NumMicroOps = 5; // 2 uops perform multiple loads 2278 let ResourceCycles = [1,8,1,1]; 2279} 2280def: InstRW<[SKXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; 2281 2282def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { 2283 let Latency = 22; 2284 let NumMicroOps = 14; 2285 let ResourceCycles = [5,5,4]; 2286} 2287def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr", 2288 "VPCONFLICTQZ256rr")>; 2289 2290def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 2291 let Latency = 23; 2292 let NumMicroOps = 19; 2293 let ResourceCycles = [2,1,4,1,1,4,6]; 2294} 2295def: InstRW<[SKXWriteResGroup228], (instrs CMPXCHG16B)>; 2296 2297def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 2298 let Latency = 25; 2299 let NumMicroOps = 3; 2300 let ResourceCycles = [1,1,1]; 2301} 2302def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI(16|32)m")>; 2303 2304def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> { 2305 let Latency = 27; 2306 let NumMicroOps = 2; 2307 let ResourceCycles = [1,1]; 2308} 2309def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F(32|64)m")>; 2310 2311def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2312 let Latency = 29; 2313 let NumMicroOps = 15; 2314 let ResourceCycles = [5,5,1,4]; 2315} 2316def: InstRW<[SKXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)")>; 2317 2318def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 2319 let Latency = 30; 2320 let NumMicroOps = 3; 2321 let ResourceCycles = [1,1,1]; 2322} 2323def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>; 2324 2325def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> { 2326 let Latency = 35; 2327 let NumMicroOps = 23; 2328 let ResourceCycles = [1,5,3,4,10]; 2329} 2330def: InstRW<[SKXWriteResGroup247], (instregex "IN(8|16|32)ri", 2331 "IN(8|16|32)rr")>; 2332 2333def SKXWriteResGroup248 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 2334 let Latency = 35; 2335 let NumMicroOps = 23; 2336 let ResourceCycles = [1,5,2,1,4,10]; 2337} 2338def: InstRW<[SKXWriteResGroup248], (instregex "OUT(8|16|32)ir", 2339 "OUT(8|16|32)rr")>; 2340 2341def SKXWriteResGroup249 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { 2342 let Latency = 37; 2343 let NumMicroOps = 21; 2344 let ResourceCycles = [9,7,5]; 2345} 2346def: InstRW<[SKXWriteResGroup249], (instregex "VPCONFLICTDZ256rr", 2347 "VPCONFLICTQZrr")>; 2348 2349def SKXWriteResGroup250 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { 2350 let Latency = 37; 2351 let NumMicroOps = 31; 2352 let ResourceCycles = [1,8,1,21]; 2353} 2354def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64)?")>; 2355 2356def SKXWriteResGroup252 : SchedWriteRes<[SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort15,SKXPort0156]> { 2357 let Latency = 40; 2358 let NumMicroOps = 18; 2359 let ResourceCycles = [1,1,2,3,1,1,1,8]; 2360} 2361def: InstRW<[SKXWriteResGroup252], (instrs VMCLEARm)>; 2362 2363def SKXWriteResGroup253 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { 2364 let Latency = 41; 2365 let NumMicroOps = 39; 2366 let ResourceCycles = [1,10,1,1,26]; 2367} 2368def: InstRW<[SKXWriteResGroup253], (instrs XSAVE64)>; 2369 2370def SKXWriteResGroup254 : SchedWriteRes<[SKXPort5,SKXPort0156]> { 2371 let Latency = 42; 2372 let NumMicroOps = 22; 2373 let ResourceCycles = [2,20]; 2374} 2375def: InstRW<[SKXWriteResGroup254], (instrs RDTSCP)>; 2376 2377def SKXWriteResGroup255 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { 2378 let Latency = 42; 2379 let NumMicroOps = 40; 2380 let ResourceCycles = [1,11,1,1,26]; 2381} 2382def: InstRW<[SKXWriteResGroup255], (instrs XSAVE)>; 2383def: InstRW<[SKXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>; 2384 2385def SKXWriteResGroup256 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2386 let Latency = 44; 2387 let NumMicroOps = 22; 2388 let ResourceCycles = [9,7,1,5]; 2389} 2390def: InstRW<[SKXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)", 2391 "VPCONFLICTQZrm(b?)")>; 2392 2393def SKXWriteResGroup258 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05,SKXPort06,SKXPort0156]> { 2394 let Latency = 62; 2395 let NumMicroOps = 64; 2396 let ResourceCycles = [2,8,5,10,39]; 2397} 2398def: InstRW<[SKXWriteResGroup258], (instrs FLDENVm)>; 2399 2400def SKXWriteResGroup259 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> { 2401 let Latency = 63; 2402 let NumMicroOps = 88; 2403 let ResourceCycles = [4,4,31,1,2,1,45]; 2404} 2405def: InstRW<[SKXWriteResGroup259], (instrs FXRSTOR64)>; 2406 2407def SKXWriteResGroup260 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> { 2408 let Latency = 63; 2409 let NumMicroOps = 90; 2410 let ResourceCycles = [4,2,33,1,2,1,47]; 2411} 2412def: InstRW<[SKXWriteResGroup260], (instrs FXRSTOR)>; 2413 2414def SKXWriteResGroup261 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { 2415 let Latency = 67; 2416 let NumMicroOps = 35; 2417 let ResourceCycles = [17,11,7]; 2418} 2419def: InstRW<[SKXWriteResGroup261], (instregex "VPCONFLICTDZrr")>; 2420 2421def SKXWriteResGroup262 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2422 let Latency = 74; 2423 let NumMicroOps = 36; 2424 let ResourceCycles = [17,11,1,7]; 2425} 2426def: InstRW<[SKXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)")>; 2427 2428def SKXWriteResGroup263 : SchedWriteRes<[SKXPort5,SKXPort05,SKXPort0156]> { 2429 let Latency = 75; 2430 let NumMicroOps = 15; 2431 let ResourceCycles = [6,3,6]; 2432} 2433def: InstRW<[SKXWriteResGroup263], (instrs FNINIT)>; 2434 2435def SKXWriteResGroup266 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort237,SKXPort06,SKXPort0156]> { 2436 let Latency = 106; 2437 let NumMicroOps = 100; 2438 let ResourceCycles = [9,1,11,16,1,11,21,30]; 2439} 2440def: InstRW<[SKXWriteResGroup266], (instrs FSTENVm)>; 2441 2442def SKXWriteResGroup267 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 2443 let Latency = 140; 2444 let NumMicroOps = 4; 2445 let ResourceCycles = [1,3]; 2446} 2447def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>; 2448 2449def: InstRW<[WriteZero], (instrs CLC)>; 2450 2451 2452// Instruction variants handled by the renamer. These might not need execution 2453// ports in certain conditions. 2454// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", 2455// section "Skylake Pipeline" > "Register allocation and renaming". 2456// These can be investigated with llvm-exegesis, e.g. 2457// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 2458// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 2459 2460def SKXWriteZeroLatency : SchedWriteRes<[]> { 2461 let Latency = 0; 2462} 2463 2464def SKXWriteZeroIdiom : SchedWriteVariant<[ 2465 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2466 SchedVar<NoSchedPred, [WriteALU]> 2467]>; 2468def : InstRW<[SKXWriteZeroIdiom], (instrs SUB32rr, SUB64rr, 2469 XOR32rr, XOR64rr)>; 2470 2471def SKXWriteFZeroIdiom : SchedWriteVariant<[ 2472 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2473 SchedVar<NoSchedPred, [WriteFLogic]> 2474]>; 2475def : InstRW<[SKXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, 2476 XORPDrr, VXORPDrr, 2477 VXORPSZ128rr, 2478 VXORPDZ128rr)>; 2479 2480def SKXWriteFZeroIdiomY : SchedWriteVariant<[ 2481 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2482 SchedVar<NoSchedPred, [WriteFLogicY]> 2483]>; 2484def : InstRW<[SKXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr, 2485 VXORPSZ256rr, VXORPDZ256rr)>; 2486 2487def SKXWriteFZeroIdiomZ : SchedWriteVariant<[ 2488 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2489 SchedVar<NoSchedPred, [WriteFLogicZ]> 2490]>; 2491def : InstRW<[SKXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>; 2492 2493def SKXWriteVZeroIdiomLogicX : SchedWriteVariant<[ 2494 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2495 SchedVar<NoSchedPred, [WriteVecLogicX]> 2496]>; 2497def : InstRW<[SKXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, 2498 VPXORDZ128rr, VPXORQZ128rr)>; 2499 2500def SKXWriteVZeroIdiomLogicY : SchedWriteVariant<[ 2501 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2502 SchedVar<NoSchedPred, [WriteVecLogicY]> 2503]>; 2504def : InstRW<[SKXWriteVZeroIdiomLogicY], (instrs VPXORYrr, 2505 VPXORDZ256rr, VPXORQZ256rr)>; 2506 2507def SKXWriteVZeroIdiomLogicZ : SchedWriteVariant<[ 2508 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2509 SchedVar<NoSchedPred, [WriteVecLogicZ]> 2510]>; 2511def : InstRW<[SKXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>; 2512 2513def SKXWriteVZeroIdiomALUX : SchedWriteVariant<[ 2514 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2515 SchedVar<NoSchedPred, [WriteVecALUX]> 2516]>; 2517def : InstRW<[SKXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr, 2518 PCMPGTDrr, VPCMPGTDrr, 2519 PCMPGTWrr, VPCMPGTWrr)>; 2520 2521def SKXWriteVZeroIdiomALUY : SchedWriteVariant<[ 2522 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2523 SchedVar<NoSchedPred, [WriteVecALUY]> 2524]>; 2525def : InstRW<[SKXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr, 2526 VPCMPGTDYrr, 2527 VPCMPGTWYrr)>; 2528 2529def SKXWritePSUB : SchedWriteRes<[SKXPort015]> { 2530 let Latency = 1; 2531 let NumMicroOps = 1; 2532 let ResourceCycles = [1]; 2533} 2534 2535def SKXWriteVZeroIdiomPSUB : SchedWriteVariant<[ 2536 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2537 SchedVar<NoSchedPred, [SKXWritePSUB]> 2538]>; 2539 2540def : InstRW<[SKXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr, 2541 PSUBDrr, VPSUBDrr, VPSUBDZ128rr, 2542 PSUBQrr, VPSUBQrr, VPSUBQZ128rr, 2543 PSUBWrr, VPSUBWrr, VPSUBWZ128rr, 2544 VPSUBBYrr, VPSUBBZ256rr, 2545 VPSUBDYrr, VPSUBDZ256rr, 2546 VPSUBQYrr, VPSUBQZ256rr, 2547 VPSUBWYrr, VPSUBWZ256rr, 2548 VPSUBBZrr, 2549 VPSUBDZrr, 2550 VPSUBQZrr, 2551 VPSUBWZrr)>; 2552def SKXWritePCMPGTQ : SchedWriteRes<[SKXPort5]> { 2553 let Latency = 3; 2554 let NumMicroOps = 1; 2555 let ResourceCycles = [1]; 2556} 2557 2558def SKXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ 2559 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2560 SchedVar<NoSchedPred, [SKXWritePCMPGTQ]> 2561]>; 2562def : InstRW<[SKXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, 2563 VPCMPGTQYrr)>; 2564 2565 2566// CMOVs that use both Z and C flag require an extra uop. 2567def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> { 2568 let Latency = 2; 2569 let ResourceCycles = [2]; 2570 let NumMicroOps = 2; 2571} 2572 2573def SKXWriteCMOVA_CMOVBErm : SchedWriteRes<[SKXPort23,SKXPort06]> { 2574 let Latency = 7; 2575 let ResourceCycles = [1,2]; 2576 let NumMicroOps = 3; 2577} 2578 2579def SKXCMOVA_CMOVBErr : SchedWriteVariant<[ 2580 SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SKXWriteCMOVA_CMOVBErr]>, 2581 SchedVar<NoSchedPred, [WriteCMOV]> 2582]>; 2583 2584def SKXCMOVA_CMOVBErm : SchedWriteVariant<[ 2585 SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SKXWriteCMOVA_CMOVBErm]>, 2586 SchedVar<NoSchedPred, [WriteCMOV.Folded]> 2587]>; 2588 2589def : InstRW<[SKXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; 2590def : InstRW<[SKXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; 2591 2592// SETCCs that use both Z and C flag require an extra uop. 2593def SKXWriteSETA_SETBEr : SchedWriteRes<[SKXPort06]> { 2594 let Latency = 2; 2595 let ResourceCycles = [2]; 2596 let NumMicroOps = 2; 2597} 2598 2599def SKXWriteSETA_SETBEm : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> { 2600 let Latency = 3; 2601 let ResourceCycles = [1,1,2]; 2602 let NumMicroOps = 4; 2603} 2604 2605def SKXSETA_SETBErr : SchedWriteVariant<[ 2606 SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SKXWriteSETA_SETBEr]>, 2607 SchedVar<NoSchedPred, [WriteSETCC]> 2608]>; 2609 2610def SKXSETA_SETBErm : SchedWriteVariant<[ 2611 SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SKXWriteSETA_SETBEm]>, 2612 SchedVar<NoSchedPred, [WriteSETCCStore]> 2613]>; 2614 2615def : InstRW<[SKXSETA_SETBErr], (instrs SETCCr)>; 2616def : InstRW<[SKXSETA_SETBErm], (instrs SETCCm)>; 2617 2618/////////////////////////////////////////////////////////////////////////////// 2619// Dependency breaking instructions. 2620/////////////////////////////////////////////////////////////////////////////// 2621 2622def : IsZeroIdiomFunction<[ 2623 // GPR Zero-idioms. 2624 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, 2625 2626 // SSE Zero-idioms. 2627 DepBreakingClass<[ 2628 // fp variants. 2629 XORPSrr, XORPDrr, 2630 2631 // int variants. 2632 PXORrr, 2633 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 2634 PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr 2635 ], ZeroIdiomPredicate>, 2636 2637 // AVX Zero-idioms. 2638 DepBreakingClass<[ 2639 // xmm fp variants. 2640 VXORPSrr, VXORPDrr, 2641 2642 // xmm int variants. 2643 VPXORrr, 2644 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 2645 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 2646 2647 // ymm variants. 2648 VXORPSYrr, VXORPDYrr, VPXORYrr, 2649 VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr, 2650 VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr, 2651 2652 // zmm variants. 2653 VXORPSZrr, VXORPDZrr, VPXORDZrr, VPXORQZrr, 2654 VXORPSZ128rr, VXORPDZ128rr, VPXORDZ128rr, VPXORQZ128rr, 2655 VXORPSZ256rr, VXORPDZ256rr, VPXORDZ256rr, VPXORQZ256rr, 2656 VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr, 2657 VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr, 2658 VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr, 2659 ], ZeroIdiomPredicate>, 2660]>; 2661 2662} // SchedModel 2663