1//=- X86SchedIceLake.td - X86 Ice Lake Scheduling ------------*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for Ice Lake to support 10// instruction scheduling and other instruction cost heuristics. 11// 12// TODO: This is mainly a copy X86SchedSkylakeServer.td, but allows us to 13// iteratively improve scheduling handling toward better modelling the 14// Ice Lake (Sunny/Cypress Cove) microarchitecture. 15// 16//===----------------------------------------------------------------------===// 17 18def IceLakeModel : SchedMachineModel { 19 // All x86 instructions are modeled as a single micro-op, and Ice Lake can 20 // decode 6 instructions per cycle. 21 let IssueWidth = 6; 22 let MicroOpBufferSize = 352; // Based on the reorder buffer. 23 let LoadLatency = 5; 24 let MispredictPenalty = 14; 25 26 // Based on the LSD (loop-stream detector) queue size and benchmarking data. 27 let LoopMicroOpBufferSize = 50; 28 29 // This flag is set to allow the scheduler to assign a default model to 30 // unrecognized opcodes. 31 let CompleteModel = 0; 32} 33 34let SchedModel = IceLakeModel in { 35 36// Ice Lake can issue micro-ops to 8 different ports in one cycle. 37 38// Ports 0, 1, 5, and 6 handle all computation. 39// Ports 4 and 9 gets the data half of stores. Store data can be available later 40// than the store address, but since we don't model the latency of stores, we 41// can ignore that. 42// Ports 2 and 3 are identical. They handle loads and address calculations. 43// Ports 7 and 8 are identical. They handle stores address calculations. 44def ICXPort0 : ProcResource<1>; 45def ICXPort1 : ProcResource<1>; 46def ICXPort2 : ProcResource<1>; 47def ICXPort3 : ProcResource<1>; 48def ICXPort4 : ProcResource<1>; 49def ICXPort5 : ProcResource<1>; 50def ICXPort6 : ProcResource<1>; 51def ICXPort7 : ProcResource<1>; 52def ICXPort8 : ProcResource<1>; 53def ICXPort9 : ProcResource<1>; 54 55// Many micro-ops are capable of issuing on multiple ports. 56def ICXPort01 : ProcResGroup<[ICXPort0, ICXPort1]>; 57def ICXPort23 : ProcResGroup<[ICXPort2, ICXPort3]>; 58def ICXPort04 : ProcResGroup<[ICXPort0, ICXPort4]>; 59def ICXPort05 : ProcResGroup<[ICXPort0, ICXPort5]>; 60def ICXPort06 : ProcResGroup<[ICXPort0, ICXPort6]>; 61def ICXPort15 : ProcResGroup<[ICXPort1, ICXPort5]>; 62def ICXPort16 : ProcResGroup<[ICXPort1, ICXPort6]>; 63def ICXPort49 : ProcResGroup<[ICXPort4, ICXPort9]>; 64def ICXPort56 : ProcResGroup<[ICXPort5, ICXPort6]>; 65def ICXPort78 : ProcResGroup<[ICXPort7, ICXPort8]>; 66def ICXPort015 : ProcResGroup<[ICXPort0, ICXPort1, ICXPort5]>; 67def ICXPort056 : ProcResGroup<[ICXPort0, ICXPort5, ICXPort6]>; 68def ICXPort0156: ProcResGroup<[ICXPort0, ICXPort1, ICXPort5, ICXPort6]>; 69 70def ICXDivider : ProcResource<1>; // Integer division issued on port 0. 71// FP division and sqrt on port 0. 72def ICXFPDivider : ProcResource<1>; 73 74// 60 Entry Unified Scheduler 75def ICXPortAny : ProcResGroup<[ICXPort0, ICXPort1, ICXPort2, ICXPort3, ICXPort4, 76 ICXPort5, ICXPort6, ICXPort7, ICXPort8, ICXPort9]> { 77 let BufferSize=60; 78} 79 80// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 81// cycles after the memory operand. 82def : ReadAdvance<ReadAfterLd, 5>; 83 84// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available 85// until 5/6/7 cycles after the memory operand. 86def : ReadAdvance<ReadAfterVecLd, 5>; 87def : ReadAdvance<ReadAfterVecXLd, 6>; 88def : ReadAdvance<ReadAfterVecYLd, 7>; 89 90def : ReadAdvance<ReadInt2Fpu, 0>; 91 92// Many SchedWrites are defined in pairs with and without a folded load. 93// Instructions with folded loads are usually micro-fused, so they only appear 94// as two micro-ops when queued in the reservation station. 95// This multiclass defines the resource usage for variants with and without 96// folded loads. 97multiclass ICXWriteResPair<X86FoldableSchedWrite SchedRW, 98 list<ProcResourceKind> ExePorts, 99 int Lat, list<int> Res = [1], int UOps = 1, 100 int LoadLat = 5, int LoadUOps = 1> { 101 // Register variant is using a single cycle on ExePort. 102 def : WriteRes<SchedRW, ExePorts> { 103 let Latency = Lat; 104 let ReleaseAtCycles = Res; 105 let NumMicroOps = UOps; 106 } 107 108 // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to 109 // the latency (default = 5). 110 def : WriteRes<SchedRW.Folded, !listconcat([ICXPort23], ExePorts)> { 111 let Latency = !add(Lat, LoadLat); 112 let ReleaseAtCycles = !listconcat([1], Res); 113 let NumMicroOps = !add(UOps, LoadUOps); 114 } 115} 116 117// A folded store needs a cycle on port 4 for the store data, and an extra port 118// 2/3/7 cycle to recompute the address. 119def : WriteRes<WriteRMW, [ICXPort78,ICXPort49]>; 120 121// Arithmetic. 122defm : ICXWriteResPair<WriteALU, [ICXPort0156], 1>; // Simple integer ALU op. 123defm : ICXWriteResPair<WriteADC, [ICXPort06], 1>; // Integer ALU + flags op. 124 125// Integer multiplication. 126defm : ICXWriteResPair<WriteIMul8, [ICXPort1], 3>; 127defm : ICXWriteResPair<WriteIMul16, [ICXPort1,ICXPort06,ICXPort0156], 4, [1,1,2], 4>; 128defm : X86WriteRes<WriteIMul16Imm, [ICXPort1,ICXPort0156], 4, [1,1], 2>; 129defm : X86WriteRes<WriteIMul16ImmLd, [ICXPort1,ICXPort0156,ICXPort23], 8, [1,1,1], 3>; 130defm : X86WriteRes<WriteIMul16Reg, [ICXPort1], 3, [1], 1>; 131defm : X86WriteRes<WriteIMul16RegLd, [ICXPort1,ICXPort0156,ICXPort23], 8, [1,1,1], 3>; 132defm : ICXWriteResPair<WriteIMul32, [ICXPort1,ICXPort06,ICXPort0156], 4, [1,1,1], 3>; 133defm : ICXWriteResPair<WriteMULX32, [ICXPort1,ICXPort06,ICXPort0156], 3, [1,1,1], 3>; 134defm : ICXWriteResPair<WriteIMul32Imm, [ICXPort1], 3>; 135defm : ICXWriteResPair<WriteIMul32Reg, [ICXPort1], 3>; 136defm : ICXWriteResPair<WriteIMul64, [ICXPort1,ICXPort5], 4, [1,1], 2>; 137defm : ICXWriteResPair<WriteMULX64, [ICXPort1,ICXPort5], 3, [1,1], 2>; 138defm : ICXWriteResPair<WriteIMul64Imm, [ICXPort1], 3>; 139defm : ICXWriteResPair<WriteIMul64Reg, [ICXPort1], 3>; 140def ICXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; } 141def : WriteRes<WriteIMulHLd, []> { 142 let Latency = !add(ICXWriteIMulH.Latency, SkylakeServerModel.LoadLatency); 143} 144 145defm : X86WriteRes<WriteBSWAP32, [ICXPort15], 1, [1], 1>; 146defm : X86WriteRes<WriteBSWAP64, [ICXPort06, ICXPort15], 2, [1,1], 2>; 147defm : X86WriteRes<WriteCMPXCHG,[ICXPort06, ICXPort0156], 5, [2,3], 5>; 148defm : X86WriteRes<WriteCMPXCHGRMW,[ICXPort23,ICXPort06,ICXPort0156,ICXPort78,ICXPort49], 8, [1,2,1,1,1], 6>; 149defm : X86WriteRes<WriteXCHG, [ICXPort0156], 2, [3], 3>; 150 151// TODO: Why isn't the ICXDivider used? 152defm : ICXWriteResPair<WriteDiv8, [ICXPort0, ICXDivider], 25, [1,10], 1, 4>; 153defm : X86WriteRes<WriteDiv16, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>; 154defm : X86WriteRes<WriteDiv32, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>; 155defm : X86WriteRes<WriteDiv64, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>; 156defm : X86WriteRes<WriteDiv16Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>; 157defm : X86WriteRes<WriteDiv32Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>; 158defm : X86WriteRes<WriteDiv64Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>; 159 160defm : X86WriteRes<WriteIDiv8, [ICXPort0, ICXDivider], 25, [1,10], 1>; 161defm : X86WriteRes<WriteIDiv16, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>; 162defm : X86WriteRes<WriteIDiv32, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>; 163defm : X86WriteRes<WriteIDiv64, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>; 164defm : X86WriteRes<WriteIDiv8Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>; 165defm : X86WriteRes<WriteIDiv16Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>; 166defm : X86WriteRes<WriteIDiv32Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>; 167defm : X86WriteRes<WriteIDiv64Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>; 168 169defm : ICXWriteResPair<WriteCRC32, [ICXPort1], 3>; 170 171def : WriteRes<WriteLEA, [ICXPort15]>; // LEA instructions can't fold loads. 172 173defm : ICXWriteResPair<WriteCMOV, [ICXPort06], 1, [1], 1>; // Conditional move. 174defm : X86WriteRes<WriteFCMOV, [ICXPort1], 3, [1], 1>; // x87 conditional move. 175def : WriteRes<WriteSETCC, [ICXPort06]>; // Setcc. 176def : WriteRes<WriteSETCCStore, [ICXPort06,ICXPort49,ICXPort78]> { 177 let Latency = 2; 178 let NumMicroOps = 3; 179} 180defm : X86WriteRes<WriteLAHFSAHF, [ICXPort06], 1, [1], 1>; 181defm : X86WriteRes<WriteBitTest, [ICXPort06], 1, [1], 1>; 182defm : X86WriteRes<WriteBitTestImmLd, [ICXPort06,ICXPort23], 6, [1,1], 2>; 183defm : X86WriteRes<WriteBitTestRegLd, [ICXPort0156,ICXPort23], 6, [1,1], 2>; 184defm : X86WriteRes<WriteBitTestSet, [ICXPort06], 1, [1], 1>; 185defm : X86WriteRes<WriteBitTestSetImmLd, [ICXPort06,ICXPort23], 5, [1,1], 3>; 186defm : X86WriteRes<WriteBitTestSetRegLd, [ICXPort0156,ICXPort23], 5, [1,1], 2>; 187 188// Integer shifts and rotates. 189defm : ICXWriteResPair<WriteShift, [ICXPort06], 1>; 190defm : ICXWriteResPair<WriteShiftCL, [ICXPort06], 3, [3], 3>; 191defm : ICXWriteResPair<WriteRotate, [ICXPort06], 1, [1], 1>; 192defm : ICXWriteResPair<WriteRotateCL, [ICXPort06], 3, [3], 3>; 193 194// SHLD/SHRD. 195defm : X86WriteRes<WriteSHDrri, [ICXPort1], 3, [1], 1>; 196defm : X86WriteRes<WriteSHDrrcl,[ICXPort1,ICXPort06,ICXPort0156], 6, [1, 2, 1], 4>; 197defm : X86WriteRes<WriteSHDmri, [ICXPort1,ICXPort23,ICXPort78,ICXPort0156], 9, [1, 1, 1, 1], 4>; 198defm : X86WriteRes<WriteSHDmrcl,[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort0156], 11, [1, 1, 1, 2, 1], 6>; 199 200// Bit counts. 201defm : ICXWriteResPair<WriteBSF, [ICXPort1], 3>; 202defm : ICXWriteResPair<WriteBSR, [ICXPort1], 3>; 203defm : ICXWriteResPair<WriteLZCNT, [ICXPort1], 3>; 204defm : ICXWriteResPair<WriteTZCNT, [ICXPort1], 3>; 205defm : ICXWriteResPair<WritePOPCNT, [ICXPort1], 3>; 206 207// BMI1 BEXTR/BLS, BMI2 BZHI 208defm : ICXWriteResPair<WriteBEXTR, [ICXPort06,ICXPort15], 2, [1,1], 2>; 209defm : ICXWriteResPair<WriteBLS, [ICXPort15], 1>; 210defm : ICXWriteResPair<WriteBZHI, [ICXPort15], 1>; 211 212// Loads, stores, and moves, not folded with other operations. 213defm : X86WriteRes<WriteLoad, [ICXPort23], 5, [1], 1>; 214defm : X86WriteRes<WriteStore, [ICXPort78, ICXPort49], 1, [1,1], 1>; 215defm : X86WriteRes<WriteStoreNT, [ICXPort78, ICXPort49], 1, [1,1], 2>; 216defm : X86WriteRes<WriteMove, [ICXPort0156], 1, [1], 1>; 217 218// Model the effect of clobbering the read-write mask operand of the GATHER operation. 219// Does not cost anything by itself, only has latency, matching that of the WriteLoad, 220defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>; 221 222// Idioms that clear a register, like xorps %xmm0, %xmm0. 223// These can often bypass execution ports completely. 224def : WriteRes<WriteZero, []>; 225 226// Branches don't produce values, so they have no latency, but they still 227// consume resources. Indirect branches can fold loads. 228defm : ICXWriteResPair<WriteJump, [ICXPort06], 1>; 229 230// Floating point. This covers both scalar and vector operations. 231defm : X86WriteRes<WriteFLD0, [ICXPort05], 1, [1], 1>; 232defm : X86WriteRes<WriteFLD1, [ICXPort05], 1, [2], 2>; 233defm : X86WriteRes<WriteFLDC, [ICXPort05], 1, [2], 2>; 234defm : X86WriteRes<WriteFLoad, [ICXPort23], 5, [1], 1>; 235defm : X86WriteRes<WriteFLoadX, [ICXPort23], 6, [1], 1>; 236defm : X86WriteRes<WriteFLoadY, [ICXPort23], 7, [1], 1>; 237defm : X86WriteRes<WriteFMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>; 238defm : X86WriteRes<WriteFMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>; 239defm : X86WriteRes<WriteFStore, [ICXPort78,ICXPort49], 1, [1,1], 2>; 240defm : X86WriteRes<WriteFStoreX, [ICXPort78,ICXPort49], 1, [1,1], 2>; 241defm : X86WriteRes<WriteFStoreY, [ICXPort78,ICXPort49], 1, [1,1], 2>; 242defm : X86WriteRes<WriteFStoreNT, [ICXPort78,ICXPort49], 1, [1,1], 2>; 243defm : X86WriteRes<WriteFStoreNTX, [ICXPort78,ICXPort49], 1, [1,1], 2>; 244defm : X86WriteRes<WriteFStoreNTY, [ICXPort78,ICXPort49], 1, [1,1], 2>; 245 246defm : X86WriteRes<WriteFMaskedStore32, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>; 247defm : X86WriteRes<WriteFMaskedStore32Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>; 248defm : X86WriteRes<WriteFMaskedStore64, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>; 249defm : X86WriteRes<WriteFMaskedStore64Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>; 250 251defm : X86WriteRes<WriteFMove, [ICXPort015], 1, [1], 1>; 252defm : X86WriteRes<WriteFMoveX, [ICXPort015], 1, [1], 1>; 253defm : X86WriteRes<WriteFMoveY, [ICXPort015], 1, [1], 1>; 254defm : X86WriteRes<WriteFMoveZ, [ICXPort05], 1, [1], 1>; 255defm : X86WriteRes<WriteEMMS, [ICXPort05,ICXPort0156], 10, [9,1], 10>; 256 257defm : ICXWriteResPair<WriteFAdd, [ICXPort01], 4, [1], 1, 5>; // Floating point add/sub. 258defm : ICXWriteResPair<WriteFAddX, [ICXPort01], 4, [1], 1, 6>; 259defm : ICXWriteResPair<WriteFAddY, [ICXPort01], 4, [1], 1, 7>; 260defm : ICXWriteResPair<WriteFAddZ, [ICXPort05], 4, [1], 1, 7>; 261defm : ICXWriteResPair<WriteFAdd64, [ICXPort01], 4, [1], 1, 5>; // Floating point double add/sub. 262defm : ICXWriteResPair<WriteFAdd64X, [ICXPort01], 4, [1], 1, 6>; 263defm : ICXWriteResPair<WriteFAdd64Y, [ICXPort01], 4, [1], 1, 7>; 264defm : ICXWriteResPair<WriteFAdd64Z, [ICXPort05], 4, [1], 1, 7>; 265 266defm : ICXWriteResPair<WriteFCmp, [ICXPort01], 4, [1], 1, 5>; // Floating point compare. 267defm : ICXWriteResPair<WriteFCmpX, [ICXPort01], 4, [1], 1, 6>; 268defm : ICXWriteResPair<WriteFCmpY, [ICXPort01], 4, [1], 1, 7>; 269defm : ICXWriteResPair<WriteFCmpZ, [ICXPort05], 4, [1], 1, 7>; 270defm : ICXWriteResPair<WriteFCmp64, [ICXPort01], 4, [1], 1, 5>; // Floating point double compare. 271defm : ICXWriteResPair<WriteFCmp64X, [ICXPort01], 4, [1], 1, 6>; 272defm : ICXWriteResPair<WriteFCmp64Y, [ICXPort01], 4, [1], 1, 7>; 273defm : ICXWriteResPair<WriteFCmp64Z, [ICXPort05], 4, [1], 1, 7>; 274 275defm : ICXWriteResPair<WriteFCom, [ICXPort0], 2>; // Floating point compare to flags (X87). 276defm : ICXWriteResPair<WriteFComX, [ICXPort0], 2>; // Floating point compare to flags (SSE). 277 278defm : ICXWriteResPair<WriteFMul, [ICXPort01], 4, [1], 1, 5>; // Floating point multiplication. 279defm : ICXWriteResPair<WriteFMulX, [ICXPort01], 4, [1], 1, 6>; 280defm : ICXWriteResPair<WriteFMulY, [ICXPort01], 4, [1], 1, 7>; 281defm : ICXWriteResPair<WriteFMulZ, [ICXPort05], 4, [1], 1, 7>; 282defm : ICXWriteResPair<WriteFMul64, [ICXPort01], 4, [1], 1, 5>; // Floating point double multiplication. 283defm : ICXWriteResPair<WriteFMul64X, [ICXPort01], 4, [1], 1, 6>; 284defm : ICXWriteResPair<WriteFMul64Y, [ICXPort01], 4, [1], 1, 7>; 285defm : ICXWriteResPair<WriteFMul64Z, [ICXPort05], 4, [1], 1, 7>; 286 287defm : ICXWriteResPair<WriteFDiv, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division. 288defm : ICXWriteResPair<WriteFDivX, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. 289defm : ICXWriteResPair<WriteFDivY, [ICXPort0,ICXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles. 290defm : ICXWriteResPair<WriteFDivZ, [ICXPort0,ICXPort5,ICXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles. 291defm : ICXWriteResPair<WriteFDiv64, [ICXPort0,ICXFPDivider], 14, [1,4], 1, 5>; // 10-14 cycles. // Floating point division. 292defm : ICXWriteResPair<WriteFDiv64X, [ICXPort0,ICXFPDivider], 14, [1,4], 1, 6>; // 10-14 cycles. 293defm : ICXWriteResPair<WriteFDiv64Y, [ICXPort0,ICXFPDivider], 14, [1,8], 1, 7>; // 10-14 cycles. 294defm : ICXWriteResPair<WriteFDiv64Z, [ICXPort0,ICXPort5,ICXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles. 295 296defm : ICXWriteResPair<WriteFSqrt, [ICXPort0,ICXFPDivider], 12, [1,3], 1, 5>; // Floating point square root. 297defm : ICXWriteResPair<WriteFSqrtX, [ICXPort0,ICXFPDivider], 12, [1,3], 1, 6>; 298defm : ICXWriteResPair<WriteFSqrtY, [ICXPort0,ICXFPDivider], 12, [1,6], 1, 7>; 299defm : ICXWriteResPair<WriteFSqrtZ, [ICXPort0,ICXPort5,ICXFPDivider], 20, [2,1,12], 3, 7>; 300defm : ICXWriteResPair<WriteFSqrt64, [ICXPort0,ICXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root. 301defm : ICXWriteResPair<WriteFSqrt64X, [ICXPort0,ICXFPDivider], 18, [1,6], 1, 6>; 302defm : ICXWriteResPair<WriteFSqrt64Y, [ICXPort0,ICXFPDivider], 18, [1,12],1, 7>; 303defm : ICXWriteResPair<WriteFSqrt64Z, [ICXPort0,ICXPort5,ICXFPDivider], 32, [2,1,24], 3, 7>; 304defm : ICXWriteResPair<WriteFSqrt80, [ICXPort0,ICXFPDivider], 21, [1,7]>; // Floating point long double square root. 305 306defm : ICXWriteResPair<WriteFRcp, [ICXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate. 307defm : ICXWriteResPair<WriteFRcpX, [ICXPort0], 4, [1], 1, 6>; 308defm : ICXWriteResPair<WriteFRcpY, [ICXPort0], 4, [1], 1, 7>; 309defm : ICXWriteResPair<WriteFRcpZ, [ICXPort0,ICXPort5], 4, [2,1], 3, 7>; 310 311defm : ICXWriteResPair<WriteFRsqrt, [ICXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate. 312defm : ICXWriteResPair<WriteFRsqrtX,[ICXPort0], 4, [1], 1, 6>; 313defm : ICXWriteResPair<WriteFRsqrtY,[ICXPort0], 4, [1], 1, 7>; 314defm : ICXWriteResPair<WriteFRsqrtZ,[ICXPort0,ICXPort5], 9, [2,1], 3, 7>; 315 316defm : ICXWriteResPair<WriteFMA, [ICXPort01], 4, [1], 1, 5>; // Fused Multiply Add. 317defm : ICXWriteResPair<WriteFMAX, [ICXPort01], 4, [1], 1, 6>; 318defm : ICXWriteResPair<WriteFMAY, [ICXPort01], 4, [1], 1, 7>; 319defm : ICXWriteResPair<WriteFMAZ, [ICXPort0], 4, [1], 1, 7>; 320defm : ICXWriteResPair<WriteDPPD, [ICXPort5,ICXPort015], 9, [1,2], 3, 6>; // Floating point double dot product. 321defm : ICXWriteResPair<WriteDPPS, [ICXPort5,ICXPort015], 13, [1,3], 4, 6>; 322defm : ICXWriteResPair<WriteDPPSY,[ICXPort5,ICXPort015], 13, [1,3], 4, 7>; 323defm : ICXWriteResPair<WriteFSign, [ICXPort0], 1>; // Floating point fabs/fchs. 324defm : ICXWriteResPair<WriteFRnd, [ICXPort01], 8, [2], 2, 6>; // Floating point rounding. 325defm : ICXWriteResPair<WriteFRndY, [ICXPort01], 8, [2], 2, 7>; 326defm : ICXWriteResPair<WriteFRndZ, [ICXPort05], 8, [2], 2, 7>; 327defm : ICXWriteResPair<WriteFLogic, [ICXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals. 328defm : ICXWriteResPair<WriteFLogicY, [ICXPort015], 1, [1], 1, 7>; 329defm : ICXWriteResPair<WriteFLogicZ, [ICXPort05], 1, [1], 1, 7>; 330defm : ICXWriteResPair<WriteFTest, [ICXPort0], 2, [1], 1, 6>; // Floating point TEST instructions. 331defm : ICXWriteResPair<WriteFTestY, [ICXPort0], 2, [1], 1, 7>; 332defm : ICXWriteResPair<WriteFTestZ, [ICXPort0], 2, [1], 1, 7>; 333defm : ICXWriteResPair<WriteFShuffle, [ICXPort15], 1, [1], 1, 6>; // Floating point vector shuffles. 334defm : ICXWriteResPair<WriteFShuffleY, [ICXPort15], 1, [1], 1, 7>; 335defm : ICXWriteResPair<WriteFShuffleZ, [ICXPort5], 1, [1], 1, 7>; 336defm : ICXWriteResPair<WriteFVarShuffle, [ICXPort15], 1, [1], 1, 6>; // Floating point vector variable shuffles. 337defm : ICXWriteResPair<WriteFVarShuffleY, [ICXPort15], 1, [1], 1, 7>; 338defm : ICXWriteResPair<WriteFVarShuffleZ, [ICXPort5], 1, [1], 1, 7>; 339defm : ICXWriteResPair<WriteFBlend, [ICXPort015], 1, [1], 1, 6>; // Floating point vector blends. 340defm : ICXWriteResPair<WriteFBlendY,[ICXPort015], 1, [1], 1, 7>; 341defm : ICXWriteResPair<WriteFBlendZ,[ICXPort015], 1, [1], 1, 7>; 342defm : ICXWriteResPair<WriteFVarBlend, [ICXPort015], 2, [2], 2, 6>; // Fp vector variable blends. 343defm : ICXWriteResPair<WriteFVarBlendY,[ICXPort015], 2, [2], 2, 7>; 344defm : ICXWriteResPair<WriteFVarBlendZ,[ICXPort015], 2, [2], 2, 7>; 345 346// FMA Scheduling helper class. 347// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } 348 349// Vector integer operations. 350defm : X86WriteRes<WriteVecLoad, [ICXPort23], 5, [1], 1>; 351defm : X86WriteRes<WriteVecLoadX, [ICXPort23], 6, [1], 1>; 352defm : X86WriteRes<WriteVecLoadY, [ICXPort23], 7, [1], 1>; 353defm : X86WriteRes<WriteVecLoadNT, [ICXPort23], 6, [1], 1>; 354defm : X86WriteRes<WriteVecLoadNTY, [ICXPort23], 7, [1], 1>; 355defm : X86WriteRes<WriteVecMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>; 356defm : X86WriteRes<WriteVecMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>; 357defm : X86WriteRes<WriteVecStore, [ICXPort78,ICXPort49], 1, [1,1], 2>; 358defm : X86WriteRes<WriteVecStoreX, [ICXPort78,ICXPort49], 1, [1,1], 2>; 359defm : X86WriteRes<WriteVecStoreY, [ICXPort78,ICXPort49], 1, [1,1], 2>; 360defm : X86WriteRes<WriteVecStoreNT, [ICXPort78,ICXPort49], 1, [1,1], 2>; 361defm : X86WriteRes<WriteVecStoreNTY, [ICXPort78,ICXPort49], 1, [1,1], 2>; 362defm : X86WriteRes<WriteVecMaskedStore32, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>; 363defm : X86WriteRes<WriteVecMaskedStore32Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>; 364defm : X86WriteRes<WriteVecMaskedStore64, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>; 365defm : X86WriteRes<WriteVecMaskedStore64Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>; 366defm : X86WriteRes<WriteVecMove, [ICXPort05], 1, [1], 1>; 367defm : X86WriteRes<WriteVecMoveX, [ICXPort015], 1, [1], 1>; 368defm : X86WriteRes<WriteVecMoveY, [ICXPort015], 1, [1], 1>; 369defm : X86WriteRes<WriteVecMoveZ, [ICXPort05], 1, [1], 1>; 370defm : X86WriteRes<WriteVecMoveToGpr, [ICXPort0], 2, [1], 1>; 371defm : X86WriteRes<WriteVecMoveFromGpr, [ICXPort5], 1, [1], 1>; 372 373defm : ICXWriteResPair<WriteVecALU, [ICXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. 374defm : ICXWriteResPair<WriteVecALUX, [ICXPort01], 1, [1], 1, 6>; 375defm : ICXWriteResPair<WriteVecALUY, [ICXPort01], 1, [1], 1, 7>; 376defm : ICXWriteResPair<WriteVecALUZ, [ICXPort0], 1, [1], 1, 7>; 377defm : ICXWriteResPair<WriteVecLogic, [ICXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor. 378defm : ICXWriteResPair<WriteVecLogicX,[ICXPort015], 1, [1], 1, 6>; 379defm : ICXWriteResPair<WriteVecLogicY,[ICXPort015], 1, [1], 1, 7>; 380defm : ICXWriteResPair<WriteVecLogicZ,[ICXPort05], 1, [1], 1, 7>; 381defm : ICXWriteResPair<WriteVecTest, [ICXPort0,ICXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions. 382defm : ICXWriteResPair<WriteVecTestY, [ICXPort0,ICXPort5], 3, [1,1], 2, 7>; 383defm : ICXWriteResPair<WriteVecTestZ, [ICXPort0,ICXPort5], 3, [1,1], 2, 7>; 384defm : ICXWriteResPair<WriteVecIMul, [ICXPort0], 5, [1], 1, 5>; // Vector integer multiply. 385defm : ICXWriteResPair<WriteVecIMulX, [ICXPort01], 5, [1], 1, 6>; 386defm : ICXWriteResPair<WriteVecIMulY, [ICXPort01], 5, [1], 1, 7>; 387defm : ICXWriteResPair<WriteVecIMulZ, [ICXPort05], 5, [1], 1, 7>; 388defm : ICXWriteResPair<WritePMULLD, [ICXPort01], 10, [2], 2, 6>; // Vector PMULLD. 389defm : ICXWriteResPair<WritePMULLDY, [ICXPort01], 10, [2], 2, 7>; 390defm : ICXWriteResPair<WritePMULLDZ, [ICXPort05], 10, [2], 2, 7>; 391defm : ICXWriteResPair<WriteShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector shuffles. 392defm : ICXWriteResPair<WriteShuffleX, [ICXPort15], 1, [1], 1, 6>; 393defm : ICXWriteResPair<WriteShuffleY, [ICXPort15], 1, [1], 1, 7>; 394defm : ICXWriteResPair<WriteShuffleZ, [ICXPort5], 1, [1], 1, 7>; 395defm : ICXWriteResPair<WriteVarShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector variable shuffles. 396defm : ICXWriteResPair<WriteVarShuffleX, [ICXPort15], 1, [1], 1, 6>; 397defm : ICXWriteResPair<WriteVarShuffleY, [ICXPort15], 1, [1], 1, 7>; 398defm : ICXWriteResPair<WriteVarShuffleZ, [ICXPort5], 1, [1], 1, 7>; 399defm : ICXWriteResPair<WriteBlend, [ICXPort5], 1, [1], 1, 6>; // Vector blends. 400defm : ICXWriteResPair<WriteBlendY,[ICXPort5], 1, [1], 1, 7>; 401defm : ICXWriteResPair<WriteBlendZ,[ICXPort5], 1, [1], 1, 7>; 402defm : ICXWriteResPair<WriteVarBlend, [ICXPort015], 2, [2], 2, 6>; // Vector variable blends. 403defm : ICXWriteResPair<WriteVarBlendY,[ICXPort015], 2, [2], 2, 6>; 404defm : ICXWriteResPair<WriteVarBlendZ,[ICXPort05], 2, [1], 1, 6>; 405defm : ICXWriteResPair<WriteMPSAD, [ICXPort5], 4, [2], 2, 6>; // Vector MPSAD. 406defm : ICXWriteResPair<WriteMPSADY, [ICXPort5], 4, [2], 2, 7>; 407defm : ICXWriteResPair<WriteMPSADZ, [ICXPort5], 4, [2], 2, 7>; 408defm : ICXWriteResPair<WritePSADBW, [ICXPort5], 3, [1], 1, 5>; // Vector PSADBW. 409defm : ICXWriteResPair<WritePSADBWX, [ICXPort5], 3, [1], 1, 6>; 410defm : ICXWriteResPair<WritePSADBWY, [ICXPort5], 3, [1], 1, 7>; 411defm : ICXWriteResPair<WritePSADBWZ, [ICXPort5], 3, [1], 1, 7>; // TODO: 512-bit ops require ports 0/1 to be joined. 412defm : ICXWriteResPair<WritePHMINPOS, [ICXPort0], 4, [1], 1, 6>; // Vector PHMINPOS. 413 414// Vector integer shifts. 415defm : ICXWriteResPair<WriteVecShift, [ICXPort0], 1, [1], 1, 5>; 416defm : X86WriteRes<WriteVecShiftX, [ICXPort5,ICXPort01], 2, [1,1], 2>; 417defm : X86WriteRes<WriteVecShiftY, [ICXPort5,ICXPort01], 4, [1,1], 2>; 418defm : X86WriteRes<WriteVecShiftZ, [ICXPort5,ICXPort0], 4, [1,1], 2>; 419defm : X86WriteRes<WriteVecShiftXLd, [ICXPort01,ICXPort23], 7, [1,1], 2>; 420defm : X86WriteRes<WriteVecShiftYLd, [ICXPort01,ICXPort23], 8, [1,1], 2>; 421defm : X86WriteRes<WriteVecShiftZLd, [ICXPort0,ICXPort23], 8, [1,1], 2>; 422 423defm : ICXWriteResPair<WriteVecShiftImm, [ICXPort0], 1, [1], 1, 5>; 424defm : ICXWriteResPair<WriteVecShiftImmX, [ICXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts. 425defm : ICXWriteResPair<WriteVecShiftImmY, [ICXPort01], 1, [1], 1, 7>; 426defm : ICXWriteResPair<WriteVecShiftImmZ, [ICXPort0], 1, [1], 1, 7>; 427defm : ICXWriteResPair<WriteVarVecShift, [ICXPort01], 1, [1], 1, 6>; // Variable vector shifts. 428defm : ICXWriteResPair<WriteVarVecShiftY, [ICXPort01], 1, [1], 1, 7>; 429defm : ICXWriteResPair<WriteVarVecShiftZ, [ICXPort0], 1, [1], 1, 7>; 430 431// Vector insert/extract operations. 432def : WriteRes<WriteVecInsert, [ICXPort5]> { 433 let Latency = 2; 434 let NumMicroOps = 2; 435 let ReleaseAtCycles = [2]; 436} 437def : WriteRes<WriteVecInsertLd, [ICXPort5,ICXPort23]> { 438 let Latency = 6; 439 let NumMicroOps = 2; 440} 441def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>; 442 443def : WriteRes<WriteVecExtract, [ICXPort0,ICXPort5]> { 444 let Latency = 3; 445 let NumMicroOps = 2; 446} 447def : WriteRes<WriteVecExtractSt, [ICXPort49,ICXPort5,ICXPort78]> { 448 let Latency = 2; 449 let NumMicroOps = 3; 450} 451 452// Conversion between integer and float. 453defm : ICXWriteResPair<WriteCvtSS2I, [ICXPort01], 6, [2], 2>; // Needs more work: DD vs DQ. 454defm : ICXWriteResPair<WriteCvtPS2I, [ICXPort01], 3>; 455defm : ICXWriteResPair<WriteCvtPS2IY, [ICXPort01], 3>; 456defm : ICXWriteResPair<WriteCvtPS2IZ, [ICXPort05], 3>; 457defm : ICXWriteResPair<WriteCvtSD2I, [ICXPort01], 6, [2], 2>; 458defm : ICXWriteResPair<WriteCvtPD2I, [ICXPort01], 3>; 459defm : ICXWriteResPair<WriteCvtPD2IY, [ICXPort01], 3>; 460defm : ICXWriteResPair<WriteCvtPD2IZ, [ICXPort05], 3>; 461 462defm : ICXWriteResPair<WriteCvtI2SS, [ICXPort1], 4>; 463defm : ICXWriteResPair<WriteCvtI2PS, [ICXPort01], 4>; 464defm : ICXWriteResPair<WriteCvtI2PSY, [ICXPort01], 4>; 465defm : ICXWriteResPair<WriteCvtI2PSZ, [ICXPort05], 4>; // Needs more work: DD vs DQ. 466defm : ICXWriteResPair<WriteCvtI2SD, [ICXPort1], 4>; 467defm : ICXWriteResPair<WriteCvtI2PD, [ICXPort01], 4>; 468defm : ICXWriteResPair<WriteCvtI2PDY, [ICXPort01], 4>; 469defm : ICXWriteResPair<WriteCvtI2PDZ, [ICXPort05], 4>; 470 471defm : ICXWriteResPair<WriteCvtSS2SD, [ICXPort1], 3>; 472defm : ICXWriteResPair<WriteCvtPS2PD, [ICXPort1], 3>; 473defm : ICXWriteResPair<WriteCvtPS2PDY, [ICXPort5,ICXPort01], 3, [1,1], 2>; 474defm : ICXWriteResPair<WriteCvtPS2PDZ, [ICXPort05], 3, [2], 2>; 475defm : ICXWriteResPair<WriteCvtSD2SS, [ICXPort5,ICXPort01], 5, [1,1], 2, 5>; 476defm : ICXWriteResPair<WriteCvtPD2PS, [ICXPort5,ICXPort01], 5, [1,1], 2, 6>; 477defm : ICXWriteResPair<WriteCvtPD2PSY, [ICXPort5,ICXPort01], 7, [1,1], 2, 7>; 478defm : ICXWriteResPair<WriteCvtPD2PSZ, [ICXPort5,ICXPort0], 7, [1,1], 2, 7>; 479 480defm : X86WriteRes<WriteCvtPH2PS, [ICXPort5,ICXPort01], 5, [1,1], 2>; 481defm : X86WriteRes<WriteCvtPH2PSY, [ICXPort5,ICXPort01], 7, [1,1], 2>; 482defm : X86WriteRes<WriteCvtPH2PSZ, [ICXPort5,ICXPort0], 7, [1,1], 2>; 483defm : X86WriteRes<WriteCvtPH2PSLd, [ICXPort23,ICXPort01], 9, [1,1], 2>; 484defm : X86WriteRes<WriteCvtPH2PSYLd, [ICXPort23,ICXPort01], 10, [1,1], 2>; 485defm : X86WriteRes<WriteCvtPH2PSZLd, [ICXPort23,ICXPort05], 10, [1,1], 2>; 486 487defm : X86WriteRes<WriteCvtPS2PH, [ICXPort5,ICXPort01], 5, [1,1], 2>; 488defm : X86WriteRes<WriteCvtPS2PHY, [ICXPort5,ICXPort01], 7, [1,1], 2>; 489defm : X86WriteRes<WriteCvtPS2PHZ, [ICXPort5,ICXPort05], 7, [1,1], 2>; 490defm : X86WriteRes<WriteCvtPS2PHSt, [ICXPort49,ICXPort5,ICXPort78,ICXPort01], 6, [1,1,1,1], 4>; 491defm : X86WriteRes<WriteCvtPS2PHYSt, [ICXPort49,ICXPort5,ICXPort78,ICXPort01], 8, [1,1,1,1], 4>; 492defm : X86WriteRes<WriteCvtPS2PHZSt, [ICXPort49,ICXPort5,ICXPort78,ICXPort05], 8, [1,1,1,1], 4>; 493 494// Strings instructions. 495 496// Packed Compare Implicit Length Strings, Return Mask 497def : WriteRes<WritePCmpIStrM, [ICXPort0]> { 498 let Latency = 10; 499 let NumMicroOps = 3; 500 let ReleaseAtCycles = [3]; 501} 502def : WriteRes<WritePCmpIStrMLd, [ICXPort0, ICXPort23]> { 503 let Latency = 16; 504 let NumMicroOps = 4; 505 let ReleaseAtCycles = [3,1]; 506} 507 508// Packed Compare Explicit Length Strings, Return Mask 509def : WriteRes<WritePCmpEStrM, [ICXPort0, ICXPort5, ICXPort015, ICXPort0156]> { 510 let Latency = 19; 511 let NumMicroOps = 9; 512 let ReleaseAtCycles = [4,3,1,1]; 513} 514def : WriteRes<WritePCmpEStrMLd, [ICXPort0, ICXPort5, ICXPort23, ICXPort015, ICXPort0156]> { 515 let Latency = 25; 516 let NumMicroOps = 10; 517 let ReleaseAtCycles = [4,3,1,1,1]; 518} 519 520// Packed Compare Implicit Length Strings, Return Index 521def : WriteRes<WritePCmpIStrI, [ICXPort0]> { 522 let Latency = 10; 523 let NumMicroOps = 3; 524 let ReleaseAtCycles = [3]; 525} 526def : WriteRes<WritePCmpIStrILd, [ICXPort0, ICXPort23]> { 527 let Latency = 16; 528 let NumMicroOps = 4; 529 let ReleaseAtCycles = [3,1]; 530} 531 532// Packed Compare Explicit Length Strings, Return Index 533def : WriteRes<WritePCmpEStrI, [ICXPort0,ICXPort5,ICXPort0156]> { 534 let Latency = 18; 535 let NumMicroOps = 8; 536 let ReleaseAtCycles = [4,3,1]; 537} 538def : WriteRes<WritePCmpEStrILd, [ICXPort0, ICXPort5, ICXPort23, ICXPort0156]> { 539 let Latency = 24; 540 let NumMicroOps = 9; 541 let ReleaseAtCycles = [4,3,1,1]; 542} 543 544// MOVMSK Instructions. 545def : WriteRes<WriteFMOVMSK, [ICXPort0]> { let Latency = 2; } 546def : WriteRes<WriteVecMOVMSK, [ICXPort0]> { let Latency = 2; } 547def : WriteRes<WriteVecMOVMSKY, [ICXPort0]> { let Latency = 2; } 548def : WriteRes<WriteMMXMOVMSK, [ICXPort0]> { let Latency = 2; } 549 550// AES instructions. 551def : WriteRes<WriteAESDecEnc, [ICXPort0]> { // Decryption, encryption. 552 let Latency = 4; 553 let NumMicroOps = 1; 554 let ReleaseAtCycles = [1]; 555} 556def : WriteRes<WriteAESDecEncLd, [ICXPort0, ICXPort23]> { 557 let Latency = 10; 558 let NumMicroOps = 2; 559 let ReleaseAtCycles = [1,1]; 560} 561 562def : WriteRes<WriteAESIMC, [ICXPort0]> { // InvMixColumn. 563 let Latency = 8; 564 let NumMicroOps = 2; 565 let ReleaseAtCycles = [2]; 566} 567def : WriteRes<WriteAESIMCLd, [ICXPort0, ICXPort23]> { 568 let Latency = 14; 569 let NumMicroOps = 3; 570 let ReleaseAtCycles = [2,1]; 571} 572 573def : WriteRes<WriteAESKeyGen, [ICXPort0,ICXPort5,ICXPort015]> { // Key Generation. 574 let Latency = 20; 575 let NumMicroOps = 11; 576 let ReleaseAtCycles = [3,6,2]; 577} 578def : WriteRes<WriteAESKeyGenLd, [ICXPort0,ICXPort5,ICXPort23,ICXPort015]> { 579 let Latency = 25; 580 let NumMicroOps = 11; 581 let ReleaseAtCycles = [3,6,1,1]; 582} 583 584// Carry-less multiplication instructions. 585def : WriteRes<WriteCLMul, [ICXPort5]> { 586 let Latency = 6; 587 let NumMicroOps = 1; 588 let ReleaseAtCycles = [1]; 589} 590def : WriteRes<WriteCLMulLd, [ICXPort5, ICXPort23]> { 591 let Latency = 12; 592 let NumMicroOps = 2; 593 let ReleaseAtCycles = [1,1]; 594} 595 596// Catch-all for expensive system instructions. 597def : WriteRes<WriteSystem, [ICXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite; 598 599// AVX2. 600defm : ICXWriteResPair<WriteFShuffle256, [ICXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles. 601defm : ICXWriteResPair<WriteFVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles. 602defm : ICXWriteResPair<WriteShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles. 603defm : ICXWriteResPair<WriteVPMOV256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width packed vector width-changing move. 604defm : ICXWriteResPair<WriteVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles. 605 606// Old microcoded instructions that nobody use. 607def : WriteRes<WriteMicrocoded, [ICXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite; 608 609// Fence instructions. 610def : WriteRes<WriteFence, [ICXPort78, ICXPort49]>; 611 612// Load/store MXCSR. 613def : WriteRes<WriteLDMXCSR, [ICXPort0,ICXPort23,ICXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } 614def : WriteRes<WriteSTMXCSR, [ICXPort49,ICXPort5,ICXPort78]> { let Latency = 2; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } 615 616// Nop, not very useful expect it provides a model for nops! 617def : WriteRes<WriteNop, []>; 618 619//////////////////////////////////////////////////////////////////////////////// 620// Horizontal add/sub instructions. 621//////////////////////////////////////////////////////////////////////////////// 622 623defm : ICXWriteResPair<WriteFHAdd, [ICXPort5,ICXPort015], 6, [2,1], 3, 6>; 624defm : ICXWriteResPair<WriteFHAddY, [ICXPort5,ICXPort015], 6, [2,1], 3, 7>; 625defm : ICXWriteResPair<WritePHAdd, [ICXPort5,ICXPort05], 3, [2,1], 3, 5>; 626defm : ICXWriteResPair<WritePHAddX, [ICXPort5,ICXPort015], 3, [2,1], 3, 6>; 627defm : ICXWriteResPair<WritePHAddY, [ICXPort5,ICXPort015], 3, [2,1], 3, 7>; 628 629// Remaining instrs. 630 631def ICXWriteResGroup1 : SchedWriteRes<[ICXPort0]> { 632 let Latency = 1; 633 let NumMicroOps = 1; 634 let ReleaseAtCycles = [1]; 635} 636def: InstRW<[ICXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr", 637 "KANDN(B|D|Q|W)rr", 638 "KMOV(B|D|Q|W)kk", 639 "KNOT(B|D|Q|W)rr", 640 "KOR(B|D|Q|W)rr", 641 "KXNOR(B|D|Q|W)rr", 642 "KXOR(B|D|Q|W)rr", 643 "KSET0(B|D|Q|W)", // Same as KXOR 644 "KSET1(B|D|Q|W)", // Same as KXNOR 645 "MMX_PADDS(B|W)rr", 646 "MMX_PADDUS(B|W)rr", 647 "MMX_PAVG(B|W)rr", 648 "MMX_PCMPEQ(B|D|W)rr", 649 "MMX_PCMPGT(B|D|W)rr", 650 "MMX_P(MAX|MIN)SWrr", 651 "MMX_P(MAX|MIN)UBrr", 652 "MMX_PSUBS(B|W)rr", 653 "MMX_PSUBUS(B|W)rr", 654 "VPMOVB2M(Z|Z128|Z256)rr", 655 "VPMOVD2M(Z|Z128|Z256)rr", 656 "VPMOVQ2M(Z|Z128|Z256)rr", 657 "VPMOVW2M(Z|Z128|Z256)rr")>; 658 659def ICXWriteResGroup3 : SchedWriteRes<[ICXPort5]> { 660 let Latency = 1; 661 let NumMicroOps = 1; 662 let ReleaseAtCycles = [1]; 663} 664def: InstRW<[ICXWriteResGroup3], (instregex "COM(P?)_FST0r", 665 "KMOV(B|D|Q|W)kr", 666 "UCOM_F(P?)r", 667 "VPBROADCAST(D|Q)rr", 668 "(V?)INSERTPS(Z?)rr", 669 "(V?)MOV(HL|LH)PS(Z?)rr", 670 "(V?)MOVDDUP(Y|Z128|Z256)?rr", 671 "(V?)PALIGNR(Y|Z128|Z256)?rri", 672 "(V?)PERMIL(PD|PS)(Y|Z128|Z256)?ri", 673 "(V?)PERMIL(PD|PS)(Y|Z128|Z256)?rr", 674 "(V?)UNPCK(L|H)(PD|PS)(Y|Z128|Z256)?rr")>; 675 676def ICXWriteResGroup4 : SchedWriteRes<[ICXPort6]> { 677 let Latency = 1; 678 let NumMicroOps = 1; 679 let ReleaseAtCycles = [1]; 680} 681def: InstRW<[ICXWriteResGroup4], (instregex "JMP(16|32|64)r")>; 682 683def ICXWriteResGroup6 : SchedWriteRes<[ICXPort05]> { 684 let Latency = 1; 685 let NumMicroOps = 1; 686 let ReleaseAtCycles = [1]; 687} 688def: InstRW<[ICXWriteResGroup6], (instrs FINCSTP, FNOP)>; 689 690def ICXWriteResGroup7 : SchedWriteRes<[ICXPort06]> { 691 let Latency = 1; 692 let NumMicroOps = 1; 693 let ReleaseAtCycles = [1]; 694} 695def: InstRW<[ICXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>; 696 697def ICXWriteResGroup8 : SchedWriteRes<[ICXPort15]> { 698 let Latency = 1; 699 let NumMicroOps = 1; 700 let ReleaseAtCycles = [1]; 701} 702def: InstRW<[ICXWriteResGroup8], (instregex "ANDN(32|64)rr")>; 703 704def ICXWriteResGroup9 : SchedWriteRes<[ICXPort015]> { 705 let Latency = 1; 706 let NumMicroOps = 1; 707 let ReleaseAtCycles = [1]; 708} 709def: InstRW<[ICXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr", 710 "VBLENDMPS(Z128|Z256)rr", 711 "VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr", 712 "(V?)PADD(B|D|Q|W)rr", 713 "(V?)MOV(SD|SS)(Z?)rr", 714 "VPBLENDD(Y?)rri", 715 "VPBLENDMB(Z128|Z256)rr", 716 "VPBLENDMD(Z128|Z256)rr", 717 "VPBLENDMQ(Z128|Z256)rr", 718 "VPBLENDMW(Z128|Z256)rr", 719 "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk", 720 "VPTERNLOGD(Z|Z128|Z256)rri", 721 "VPTERNLOGQ(Z|Z128|Z256)rri")>; 722 723def ICXWriteResGroup10 : SchedWriteRes<[ICXPort0156]> { 724 let Latency = 1; 725 let NumMicroOps = 1; 726 let ReleaseAtCycles = [1]; 727} 728def: InstRW<[ICXWriteResGroup10], (instrs SGDT64m, 729 SIDT64m, 730 SMSW16m, 731 STRm, 732 SYSCALL)>; 733 734def ICXWriteResGroup11 : SchedWriteRes<[ICXPort49,ICXPort78]> { 735 let Latency = 1; 736 let NumMicroOps = 2; 737 let ReleaseAtCycles = [1,1]; 738} 739def: InstRW<[ICXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>; 740def: InstRW<[ICXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk", 741 "ST_FP(32|64|80)m")>; 742 743def ICXWriteResGroup13 : SchedWriteRes<[ICXPort5]> { 744 let Latency = 2; 745 let NumMicroOps = 2; 746 let ReleaseAtCycles = [2]; 747} 748def: InstRW<[ICXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>; 749 750def ICXWriteResGroup14 : SchedWriteRes<[ICXPort05]> { 751 let Latency = 2; 752 let NumMicroOps = 2; 753 let ReleaseAtCycles = [2]; 754} 755def: InstRW<[ICXWriteResGroup14], (instrs FDECSTP, 756 MMX_MOVDQ2Qrr)>; 757 758def ICXWriteResGroup17 : SchedWriteRes<[ICXPort0156]> { 759 let Latency = 2; 760 let NumMicroOps = 2; 761 let ReleaseAtCycles = [2]; 762} 763def: InstRW<[ICXWriteResGroup17], (instrs LFENCE, 764 WAIT, 765 XGETBV)>; 766 767def ICXWriteResGroup20 : SchedWriteRes<[ICXPort6,ICXPort0156]> { 768 let Latency = 2; 769 let NumMicroOps = 2; 770 let ReleaseAtCycles = [1,1]; 771} 772def: InstRW<[ICXWriteResGroup20], (instregex "CLFLUSH")>; 773 774def ICXWriteResGroup21 : SchedWriteRes<[ICXPort49,ICXPort78]> { 775 let Latency = 2; 776 let NumMicroOps = 2; 777 let ReleaseAtCycles = [1,1]; 778} 779def: InstRW<[ICXWriteResGroup21], (instrs SFENCE)>; 780 781def ICXWriteResGroup23 : SchedWriteRes<[ICXPort06,ICXPort0156]> { 782 let Latency = 2; 783 let NumMicroOps = 2; 784 let ReleaseAtCycles = [1,1]; 785} 786def: InstRW<[ICXWriteResGroup23], (instrs CWD, 787 JCXZ, JECXZ, JRCXZ, 788 ADC8i8, SBB8i8, 789 ADC16i16, SBB16i16, 790 ADC32i32, SBB32i32, 791 ADC64i32, SBB64i32)>; 792 793def ICXWriteResGroup25 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort78]> { 794 let Latency = 2; 795 let NumMicroOps = 3; 796 let ReleaseAtCycles = [1,1,1]; 797} 798def: InstRW<[ICXWriteResGroup25], (instrs FNSTCW16m)>; 799 800def ICXWriteResGroup27 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort15]> { 801 let Latency = 2; 802 let NumMicroOps = 3; 803 let ReleaseAtCycles = [1,1,1]; 804} 805def: InstRW<[ICXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>; 806 807def ICXWriteResGroup28 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort0156]> { 808 let Latency = 2; 809 let NumMicroOps = 3; 810 let ReleaseAtCycles = [1,1,1]; 811} 812def: InstRW<[ICXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8, 813 STOSB, STOSL, STOSQ, STOSW)>; 814def: InstRW<[ICXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>; 815 816def ICXWriteResGroup29 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort15]> { 817 let Latency = 2; 818 let NumMicroOps = 5; 819 let ReleaseAtCycles = [2,2,1]; 820} 821def: InstRW<[ICXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)")>; 822 823def ICXWriteResGroup30 : SchedWriteRes<[ICXPort0]> { 824 let Latency = 3; 825 let NumMicroOps = 1; 826 let ReleaseAtCycles = [1]; 827} 828def: InstRW<[ICXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk", 829 "KORTEST(B|D|Q|W)rr", 830 "KTEST(B|D|Q|W)rr")>; 831 832def ICXWriteResGroup31 : SchedWriteRes<[ICXPort1]> { 833 let Latency = 3; 834 let NumMicroOps = 1; 835 let ReleaseAtCycles = [1]; 836} 837def: InstRW<[ICXWriteResGroup31], (instregex "PDEP(32|64)rr", 838 "PEXT(32|64)rr")>; 839 840def ICXWriteResGroup32 : SchedWriteRes<[ICXPort5]> { 841 let Latency = 3; 842 let NumMicroOps = 1; 843 let ReleaseAtCycles = [1]; 844} 845def: InstRW<[ICXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)", 846 "VALIGND(Z|Z128|Z256)rri", 847 "VALIGNQ(Z|Z128|Z256)rri", 848 "VPBROADCAST(B|W)rr", 849 "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z128|Z256)?rr", 850 "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>; 851 852def ICXWriteResGroup33 : SchedWriteRes<[ICXPort5]> { 853 let Latency = 4; 854 let NumMicroOps = 1; 855 let ReleaseAtCycles = [1]; 856} 857def: InstRW<[ICXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr", 858 "KSHIFTL(B|D|Q|W)ri", 859 "KSHIFTR(B|D|Q|W)ri", 860 "KUNPCK(BW|DQ|WD)rr", 861 "VCMPPD(Z|Z128|Z256)rri", 862 "VCMPPS(Z|Z128|Z256)rri", 863 "VCMP(SD|SS)Zrr", 864 "VFPCLASS(PD|PS)(Z|Z128|Z256)rr", 865 "VFPCLASS(SD|SS)Zrr", 866 "VPCMPB(Z|Z128|Z256)rri", 867 "VPCMPD(Z|Z128|Z256)rri", 868 "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr", 869 "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr", 870 "VPCMPQ(Z|Z128|Z256)rri", 871 "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri", 872 "VPCMPW(Z|Z128|Z256)rri", 873 "VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>; 874 875def ICXWriteResGroup34 : SchedWriteRes<[ICXPort0,ICXPort0156]> { 876 let Latency = 3; 877 let NumMicroOps = 2; 878 let ReleaseAtCycles = [1,1]; 879} 880def: InstRW<[ICXWriteResGroup34], (instrs FNSTSW16r)>; 881 882def ICXWriteResGroup37 : SchedWriteRes<[ICXPort0,ICXPort5]> { 883 let Latency = 3; 884 let NumMicroOps = 3; 885 let ReleaseAtCycles = [1,2]; 886} 887def: InstRW<[ICXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>; 888 889def ICXWriteResGroup38 : SchedWriteRes<[ICXPort5,ICXPort01]> { 890 let Latency = 3; 891 let NumMicroOps = 3; 892 let ReleaseAtCycles = [2,1]; 893} 894def: InstRW<[ICXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>; 895 896def ICXWriteResGroup41 : SchedWriteRes<[ICXPort5,ICXPort0156]> { 897 let Latency = 3; 898 let NumMicroOps = 3; 899 let ReleaseAtCycles = [2,1]; 900} 901def: InstRW<[ICXWriteResGroup41], (instrs MMX_PACKSSDWrr, 902 MMX_PACKSSWBrr, 903 MMX_PACKUSWBrr)>; 904 905def ICXWriteResGroup42 : SchedWriteRes<[ICXPort6,ICXPort0156]> { 906 let Latency = 3; 907 let NumMicroOps = 3; 908 let ReleaseAtCycles = [1,2]; 909} 910def: InstRW<[ICXWriteResGroup42], (instregex "CLD")>; 911 912def ICXWriteResGroup43 : SchedWriteRes<[ICXPort49,ICXPort78]> { 913 let Latency = 3; 914 let NumMicroOps = 3; 915 let ReleaseAtCycles = [1,2]; 916} 917def: InstRW<[ICXWriteResGroup43], (instrs MFENCE)>; 918 919def ICXWriteResGroup44 : SchedWriteRes<[ICXPort06,ICXPort0156]> { 920 let Latency = 2; 921 let NumMicroOps = 3; 922 let ReleaseAtCycles = [1,2]; 923} 924def: InstRW<[ICXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, 925 RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; 926 927def ICXWriteResGroup44b : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> { 928 let Latency = 5; 929 let NumMicroOps = 7; 930 let ReleaseAtCycles = [2,3,2]; 931} 932def: InstRW<[ICXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; 933 934def ICXWriteResGroup44c : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> { 935 let Latency = 6; 936 let NumMicroOps = 7; 937 let ReleaseAtCycles = [2,3,2]; 938} 939def: InstRW<[ICXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; 940 941def ICXWriteResGroup45 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78]> { 942 let Latency = 3; 943 let NumMicroOps = 3; 944 let ReleaseAtCycles = [1,1,1]; 945} 946def: InstRW<[ICXWriteResGroup45], (instrs FNSTSWm)>; 947 948def ICXWriteResGroup47 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort78,ICXPort0156]> { 949 let Latency = 3; 950 let NumMicroOps = 4; 951 let ReleaseAtCycles = [1,1,1,1]; 952} 953def: InstRW<[ICXWriteResGroup47], (instregex "CALL(16|32|64)r")>; 954 955def ICXWriteResGroup48 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort06,ICXPort0156]> { 956 let Latency = 3; 957 let NumMicroOps = 4; 958 let ReleaseAtCycles = [1,1,1,1]; 959} 960def: InstRW<[ICXWriteResGroup48], (instrs CALL64pcrel32)>; 961 962def ICXWriteResGroup49 : SchedWriteRes<[ICXPort0]> { 963 let Latency = 4; 964 let NumMicroOps = 1; 965 let ReleaseAtCycles = [1]; 966} 967def: InstRW<[ICXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>; 968 969def ICXWriteResGroup50 : SchedWriteRes<[ICXPort01]> { 970 let Latency = 4; 971 let NumMicroOps = 1; 972 let ReleaseAtCycles = [1]; 973} 974def: InstRW<[ICXWriteResGroup50], (instregex "VCVTPD2QQ(Z128|Z256)rr", 975 "VCVTPD2UQQ(Z128|Z256)rr", 976 "VCVTPS2DQ(Y|Z128|Z256)rr", 977 "(V?)CVTPS2DQrr", 978 "VCVTPS2UDQ(Z128|Z256)rr", 979 "VCVTTPD2QQ(Z128|Z256)rr", 980 "VCVTTPD2UQQ(Z128|Z256)rr", 981 "VCVTTPS2DQ(Z128|Z256)rr", 982 "(V?)CVTTPS2DQrr", 983 "VCVTTPS2UDQ(Z128|Z256)rr")>; 984 985def ICXWriteResGroup50z : SchedWriteRes<[ICXPort05]> { 986 let Latency = 4; 987 let NumMicroOps = 1; 988 let ReleaseAtCycles = [1]; 989} 990def: InstRW<[ICXWriteResGroup50z], (instrs VCVTPD2QQZrr, 991 VCVTPD2UQQZrr, 992 VCVTPS2DQZrr, 993 VCVTPS2UDQZrr, 994 VCVTTPD2QQZrr, 995 VCVTTPD2UQQZrr, 996 VCVTTPS2DQZrr, 997 VCVTTPS2UDQZrr)>; 998 999def ICXWriteResGroup51 : SchedWriteRes<[ICXPort5]> { 1000 let Latency = 4; 1001 let NumMicroOps = 2; 1002 let ReleaseAtCycles = [2]; 1003} 1004def: InstRW<[ICXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr", 1005 "VEXPANDPS(Z|Z128|Z256)rr", 1006 "VPEXPANDD(Z|Z128|Z256)rr", 1007 "VPEXPANDQ(Z|Z128|Z256)rr", 1008 "VPMOVDB(Z|Z128|Z256)rr", 1009 "VPMOVDW(Z|Z128|Z256)rr", 1010 "VPMOVQB(Z|Z128|Z256)rr", 1011 "VPMOVQW(Z|Z128|Z256)rr", 1012 "VPMOVSDB(Z|Z128|Z256)rr", 1013 "VPMOVSDW(Z|Z128|Z256)rr", 1014 "VPMOVSQB(Z|Z128|Z256)rr", 1015 "VPMOVSQD(Z|Z128|Z256)rr", 1016 "VPMOVSQW(Z|Z128|Z256)rr", 1017 "VPMOVSWB(Z|Z128|Z256)rr", 1018 "VPMOVUSDB(Z|Z128|Z256)rr", 1019 "VPMOVUSDW(Z|Z128|Z256)rr", 1020 "VPMOVUSQB(Z|Z128|Z256)rr", 1021 "VPMOVUSQD(Z|Z128|Z256)rr", 1022 "VPMOVUSWB(Z|Z128|Z256)rr", 1023 "VPMOVWB(Z|Z128|Z256)rr")>; 1024 1025def ICXWriteResGroup54 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> { 1026 let Latency = 4; 1027 let NumMicroOps = 3; 1028 let ReleaseAtCycles = [1,1,1]; 1029} 1030def: InstRW<[ICXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m", 1031 "IST_F(16|32)m", 1032 "VPMOVQD(Z|Z128|Z256)mr(b?)")>; 1033 1034def ICXWriteResGroup55 : SchedWriteRes<[ICXPort0156]> { 1035 let Latency = 4; 1036 let NumMicroOps = 4; 1037 let ReleaseAtCycles = [4]; 1038} 1039def: InstRW<[ICXWriteResGroup55], (instrs FNCLEX)>; 1040 1041def ICXWriteResGroup56 : SchedWriteRes<[]> { 1042 let Latency = 0; 1043 let NumMicroOps = 4; 1044 let ReleaseAtCycles = []; 1045} 1046def: InstRW<[ICXWriteResGroup56], (instrs VZEROUPPER)>; 1047 1048def ICXWriteResGroup57 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort0156]> { 1049 let Latency = 4; 1050 let NumMicroOps = 4; 1051 let ReleaseAtCycles = [1,1,2]; 1052} 1053def: InstRW<[ICXWriteResGroup57], (instregex "LAR(16|32|64)rr")>; 1054 1055def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort01]> { 1056 let Latency = 5; 1057 let NumMicroOps = 2; 1058 let ReleaseAtCycles = [1,1]; 1059} 1060def: InstRW<[ICXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr", 1061 "MMX_CVT(T?)PS2PIrr", 1062 "VCVTDQ2PDZ128rr", 1063 "VCVTPD2DQZ128rr", 1064 "(V?)CVT(T?)PD2DQrr", 1065 "VCVTPD2UDQZ128rr", 1066 "VCVTPS2PDZ128rr", 1067 "(V?)CVTPS2PDrr", 1068 "VCVTPS2QQZ128rr", 1069 "VCVTPS2UQQZ128rr", 1070 "VCVTQQ2PSZ128rr", 1071 "(V?)CVTSI(64)?2SDrr", 1072 "VCVTSI2SSZrr", 1073 "(V?)CVTSI2SSrr", 1074 "VCVTSI(64)?2SDZrr", 1075 "VCVTSS2SDZrr", 1076 "(V?)CVTSS2SDrr", 1077 "VCVTTPD2DQZ128rr", 1078 "VCVTTPD2UDQZ128rr", 1079 "VCVTTPS2QQZ128rr", 1080 "VCVTTPS2UQQZ128rr", 1081 "VCVTUDQ2PDZ128rr", 1082 "VCVTUQQ2PSZ128rr", 1083 "VCVTUSI2SSZrr", 1084 "VCVTUSI(64)?2SDZrr")>; 1085 1086def ICXWriteResGroup62 : SchedWriteRes<[ICXPort5,ICXPort015]> { 1087 let Latency = 5; 1088 let NumMicroOps = 3; 1089 let ReleaseAtCycles = [2,1]; 1090} 1091def: InstRW<[ICXWriteResGroup62], (instregex "VPCONFLICTQZ128rr")>; 1092 1093def ICXWriteResGroup63 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06]> { 1094 let Latency = 5; 1095 let NumMicroOps = 3; 1096 let ReleaseAtCycles = [1,1,1]; 1097} 1098def: InstRW<[ICXWriteResGroup63], (instregex "STR(16|32|64)r")>; 1099 1100def ICXWriteResGroup65 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort01]> { 1101 let Latency = 5; 1102 let NumMicroOps = 3; 1103 let ReleaseAtCycles = [1,1,1]; 1104} 1105def: InstRW<[ICXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)", 1106 "VCVTPS2PHZ256mr(b?)", 1107 "VCVTPS2PHZmr(b?)")>; 1108 1109def ICXWriteResGroup66 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> { 1110 let Latency = 5; 1111 let NumMicroOps = 4; 1112 let ReleaseAtCycles = [1,2,1]; 1113} 1114def: InstRW<[ICXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)", 1115 "VPMOVDW(Z|Z128|Z256)mr(b?)", 1116 "VPMOVQB(Z|Z128|Z256)mr(b?)", 1117 "VPMOVQW(Z|Z128|Z256)mr(b?)", 1118 "VPMOVSDB(Z|Z128|Z256)mr(b?)", 1119 "VPMOVSDW(Z|Z128|Z256)mr(b?)", 1120 "VPMOVSQB(Z|Z128|Z256)mr(b?)", 1121 "VPMOVSQD(Z|Z128|Z256)mr(b?)", 1122 "VPMOVSQW(Z|Z128|Z256)mr(b?)", 1123 "VPMOVSWB(Z|Z128|Z256)mr(b?)", 1124 "VPMOVUSDB(Z|Z128|Z256)mr(b?)", 1125 "VPMOVUSDW(Z|Z128|Z256)mr(b?)", 1126 "VPMOVUSQB(Z|Z128|Z256)mr(b?)", 1127 "VPMOVUSQD(Z|Z128|Z256)mr(b?)", 1128 "VPMOVUSQW(Z|Z128|Z256)mr(b?)", 1129 "VPMOVUSWB(Z|Z128|Z256)mr(b?)", 1130 "VPMOVWB(Z|Z128|Z256)mr(b?)")>; 1131 1132def ICXWriteResGroup67 : SchedWriteRes<[ICXPort06,ICXPort0156]> { 1133 let Latency = 5; 1134 let NumMicroOps = 5; 1135 let ReleaseAtCycles = [1,4]; 1136} 1137def: InstRW<[ICXWriteResGroup67], (instrs XSETBV)>; 1138 1139def ICXWriteResGroup69 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort0156]> { 1140 let Latency = 5; 1141 let NumMicroOps = 6; 1142 let ReleaseAtCycles = [1,1,4]; 1143} 1144def: InstRW<[ICXWriteResGroup69], (instregex "PUSHF(16|64)")>; 1145 1146def ICXWriteResGroup71 : SchedWriteRes<[ICXPort23]> { 1147 let Latency = 6; 1148 let NumMicroOps = 1; 1149 let ReleaseAtCycles = [1]; 1150} 1151def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm, 1152 VPBROADCASTDrm, 1153 VPBROADCASTQrm, 1154 VMOVSHDUPrm, 1155 VMOVSLDUPrm, 1156 VMOVDDUPrm, 1157 MOVSHDUPrm, 1158 MOVSLDUPrm, 1159 MOVDDUPrm)>; 1160 1161def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> { 1162 let Latency = 6; 1163 let NumMicroOps = 2; 1164 let ReleaseAtCycles = [2]; 1165} 1166def: InstRW<[ICXWriteResGroup72], (instrs MMX_CVTPI2PSrr)>; 1167def: InstRW<[ICXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr", 1168 "VCOMPRESSPS(Z|Z128|Z256)rr", 1169 "VPCOMPRESSD(Z|Z128|Z256)rr", 1170 "VPCOMPRESSQ(Z|Z128|Z256)rr", 1171 "VPERMW(Z|Z128|Z256)rr")>; 1172 1173def ICXWriteResGroup73 : SchedWriteRes<[ICXPort0,ICXPort23]> { 1174 let Latency = 6; 1175 let NumMicroOps = 2; 1176 let ReleaseAtCycles = [1,1]; 1177} 1178def: InstRW<[ICXWriteResGroup73], (instrs MMX_PADDSBrm, 1179 MMX_PADDSWrm, 1180 MMX_PADDUSBrm, 1181 MMX_PADDUSWrm, 1182 MMX_PAVGBrm, 1183 MMX_PAVGWrm, 1184 MMX_PCMPEQBrm, 1185 MMX_PCMPEQDrm, 1186 MMX_PCMPEQWrm, 1187 MMX_PCMPGTBrm, 1188 MMX_PCMPGTDrm, 1189 MMX_PCMPGTWrm, 1190 MMX_PMAXSWrm, 1191 MMX_PMAXUBrm, 1192 MMX_PMINSWrm, 1193 MMX_PMINUBrm, 1194 MMX_PSUBSBrm, 1195 MMX_PSUBSWrm, 1196 MMX_PSUBUSBrm, 1197 MMX_PSUBUSWrm)>; 1198 1199def ICXWriteResGroup76 : SchedWriteRes<[ICXPort6,ICXPort23]> { 1200 let Latency = 6; 1201 let NumMicroOps = 2; 1202 let ReleaseAtCycles = [1,1]; 1203} 1204def: InstRW<[ICXWriteResGroup76], (instrs FARJMP64m)>; 1205def: InstRW<[ICXWriteResGroup76], (instregex "JMP(16|32|64)m")>; 1206 1207def ICXWriteResGroup79 : SchedWriteRes<[ICXPort23,ICXPort15]> { 1208 let Latency = 6; 1209 let NumMicroOps = 2; 1210 let ReleaseAtCycles = [1,1]; 1211} 1212def: InstRW<[ICXWriteResGroup79], (instregex "ANDN(32|64)rm", 1213 "MOVBE(16|32|64)rm")>; 1214 1215def ICXWriteResGroup80 : SchedWriteRes<[ICXPort23,ICXPort015]> { 1216 let Latency = 6; 1217 let NumMicroOps = 2; 1218 let ReleaseAtCycles = [1,1]; 1219} 1220def: InstRW<[ICXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>; 1221def: InstRW<[ICXWriteResGroup80], (instrs VMOVDI2PDIZrm)>; 1222 1223def ICXWriteResGroup81 : SchedWriteRes<[ICXPort23,ICXPort0156]> { 1224 let Latency = 6; 1225 let NumMicroOps = 2; 1226 let ReleaseAtCycles = [1,1]; 1227} 1228def: InstRW<[ICXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>; 1229def: InstRW<[ICXWriteResGroup81], (instregex "POP(16|32|64)rmr")>; 1230 1231def ICXWriteResGroup82 : SchedWriteRes<[ICXPort5,ICXPort01]> { 1232 let Latency = 6; 1233 let NumMicroOps = 3; 1234 let ReleaseAtCycles = [2,1]; 1235} 1236def: InstRW<[ICXWriteResGroup82], (instregex "(V?)CVTSI642SSrr", 1237 "VCVTSI642SSZrr", 1238 "VCVTUSI642SSZrr")>; 1239 1240def ICXWriteResGroup84 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06,ICXPort0156]> { 1241 let Latency = 6; 1242 let NumMicroOps = 4; 1243 let ReleaseAtCycles = [1,1,1,1]; 1244} 1245def: InstRW<[ICXWriteResGroup84], (instregex "SLDT(16|32|64)r")>; 1246 1247def ICXWriteResGroup86 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> { 1248 let Latency = 6; 1249 let NumMicroOps = 4; 1250 let ReleaseAtCycles = [1,1,1,1]; 1251} 1252def: InstRW<[ICXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)", 1253 "SHL(8|16|32|64)m(1|i)", 1254 "SHR(8|16|32|64)m(1|i)")>; 1255 1256def ICXWriteResGroup87 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort0156]> { 1257 let Latency = 6; 1258 let NumMicroOps = 4; 1259 let ReleaseAtCycles = [1,1,1,1]; 1260} 1261def: InstRW<[ICXWriteResGroup87], (instregex "POP(16|32|64)rmm", 1262 "PUSH(16|32|64)rmm")>; 1263 1264def ICXWriteResGroup88 : SchedWriteRes<[ICXPort6,ICXPort0156]> { 1265 let Latency = 6; 1266 let NumMicroOps = 6; 1267 let ReleaseAtCycles = [1,5]; 1268} 1269def: InstRW<[ICXWriteResGroup88], (instrs STD)>; 1270 1271def ICXWriteResGroup89 : SchedWriteRes<[ICXPort23]> { 1272 let Latency = 7; 1273 let NumMicroOps = 1; 1274 let ReleaseAtCycles = [1]; 1275} 1276def: InstRW<[ICXWriteResGroup89], (instregex "LD_F(32|64|80)m")>; 1277def: InstRW<[ICXWriteResGroup89], (instrs VBROADCASTF128rm, 1278 VBROADCASTI128rm, 1279 VBROADCASTSDYrm, 1280 VBROADCASTSSYrm, 1281 VMOVDDUPYrm, 1282 VMOVSHDUPYrm, 1283 VMOVSLDUPYrm, 1284 VPBROADCASTDYrm, 1285 VPBROADCASTQYrm)>; 1286 1287def ICXWriteResGroup90 : SchedWriteRes<[ICXPort01,ICXPort5]> { 1288 let Latency = 7; 1289 let NumMicroOps = 2; 1290 let ReleaseAtCycles = [1,1]; 1291} 1292def: InstRW<[ICXWriteResGroup90], (instrs VCVTDQ2PDYrr)>; 1293 1294def ICXWriteResGroup92 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1295 let Latency = 7; 1296 let NumMicroOps = 2; 1297 let ReleaseAtCycles = [1,1]; 1298} 1299def: InstRW<[ICXWriteResGroup92], (instregex "VMOV(SD|SS)Zrm(b?)", 1300 "VPBROADCAST(B|W)(Z128)?rm", 1301 "(V?)INSERTPS(Z?)rm", 1302 "(V?)PALIGNR(Z128)?rmi", 1303 "(V?)PERMIL(PD|PS)(Z128)?m(b?)i", 1304 "(V?)PERMIL(PD|PS)(Z128)?rm", 1305 "(V?)UNPCK(L|H)(PD|PS)(Z128)?rm")>; 1306 1307def ICXWriteResGroup93 : SchedWriteRes<[ICXPort5,ICXPort01]> { 1308 let Latency = 7; 1309 let NumMicroOps = 2; 1310 let ReleaseAtCycles = [1,1]; 1311} 1312def: InstRW<[ICXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr", 1313 "VCVTPD2DQ(Y|Z256)rr", 1314 "VCVTPD2UDQZ256rr", 1315 "VCVTPS2PD(Y|Z256)rr", 1316 "VCVTPS2QQZ256rr", 1317 "VCVTPS2UQQZ256rr", 1318 "VCVTQQ2PSZ256rr", 1319 "VCVTTPD2DQ(Y|Z256)rr", 1320 "VCVTTPD2UDQZ256rr", 1321 "VCVTTPS2QQZ256rr", 1322 "VCVTTPS2UQQZ256rr", 1323 "VCVTUDQ2PDZ256rr", 1324 "VCVTUQQ2PSZ256rr")>; 1325 1326def ICXWriteResGroup93z : SchedWriteRes<[ICXPort5,ICXPort05]> { 1327 let Latency = 7; 1328 let NumMicroOps = 2; 1329 let ReleaseAtCycles = [1,1]; 1330} 1331def: InstRW<[ICXWriteResGroup93z], (instrs VCVTDQ2PDZrr, 1332 VCVTPD2DQZrr, 1333 VCVTPD2UDQZrr, 1334 VCVTPS2PDZrr, 1335 VCVTPS2QQZrr, 1336 VCVTPS2UQQZrr, 1337 VCVTQQ2PSZrr, 1338 VCVTTPD2DQZrr, 1339 VCVTTPD2UDQZrr, 1340 VCVTTPS2QQZrr, 1341 VCVTTPS2UQQZrr, 1342 VCVTUDQ2PDZrr, 1343 VCVTUQQ2PSZrr)>; 1344 1345def ICXWriteResGroup95 : SchedWriteRes<[ICXPort23,ICXPort015]> { 1346 let Latency = 7; 1347 let NumMicroOps = 2; 1348 let ReleaseAtCycles = [1,1]; 1349} 1350def: InstRW<[ICXWriteResGroup95], (instrs VMOVNTDQAZ128rm, 1351 VPBLENDDrmi)>; 1352def: InstRW<[ICXWriteResGroup95, ReadAfterVecXLd], 1353 (instregex "VBLENDMPDZ128rm(b?)", 1354 "VBLENDMPSZ128rm(b?)", 1355 "VBROADCASTI32X2Z128rm(b?)", 1356 "VBROADCASTSSZ128rm(b?)", 1357 "VINSERT(F|I)128rm", 1358 "VMOVAPDZ128rm(b?)", 1359 "VMOVAPSZ128rm(b?)", 1360 "VMOVDDUPZ128rm(b?)", 1361 "VMOVDQA32Z128rm(b?)", 1362 "VMOVDQA64Z128rm(b?)", 1363 "VMOVDQU16Z128rm(b?)", 1364 "VMOVDQU32Z128rm(b?)", 1365 "VMOVDQU64Z128rm(b?)", 1366 "VMOVDQU8Z128rm(b?)", 1367 "VMOVSHDUPZ128rm(b?)", 1368 "VMOVSLDUPZ128rm(b?)", 1369 "VMOVUPDZ128rm(b?)", 1370 "VMOVUPSZ128rm(b?)", 1371 "VPADD(B|D|Q|W)Z128rm(b?)", 1372 "(V?)PADD(B|D|Q|W)rm", 1373 "VPBLENDM(B|D|Q|W)Z128rm(b?)", 1374 "VPBROADCASTDZ128rm(b?)", 1375 "VPBROADCASTQZ128rm(b?)", 1376 "VPSUB(B|D|Q|W)Z128rm(b?)", 1377 "(V?)PSUB(B|D|Q|W)rm", 1378 "VPTERNLOGDZ128rm(b?)i", 1379 "VPTERNLOGQZ128rm(b?)i")>; 1380 1381def ICXWriteResGroup96 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1382 let Latency = 7; 1383 let NumMicroOps = 3; 1384 let ReleaseAtCycles = [2,1]; 1385} 1386def: InstRW<[ICXWriteResGroup96], (instrs MMX_PACKSSDWrm, 1387 MMX_PACKSSWBrm, 1388 MMX_PACKUSWBrm)>; 1389 1390def ICXWriteResGroup97 : SchedWriteRes<[ICXPort5,ICXPort015]> { 1391 let Latency = 7; 1392 let NumMicroOps = 3; 1393 let ReleaseAtCycles = [2,1]; 1394} 1395def: InstRW<[ICXWriteResGroup97], (instregex "VPERMI2WZ128rr", 1396 "VPERMI2WZ256rr", 1397 "VPERMI2WZrr", 1398 "VPERMT2WZ128rr", 1399 "VPERMT2WZ256rr", 1400 "VPERMT2WZrr")>; 1401 1402def ICXWriteResGroup99 : SchedWriteRes<[ICXPort23,ICXPort0156]> { 1403 let Latency = 7; 1404 let NumMicroOps = 3; 1405 let ReleaseAtCycles = [1,2]; 1406} 1407def: InstRW<[ICXWriteResGroup99], (instrs LEAVE, LEAVE64, 1408 SCASB, SCASL, SCASQ, SCASW)>; 1409 1410def ICXWriteResGroup100 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort01]> { 1411 let Latency = 7; 1412 let NumMicroOps = 3; 1413 let ReleaseAtCycles = [1,1,1]; 1414} 1415def: InstRW<[ICXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr", 1416 "VCVT(T?)SS2USI64Zrr")>; 1417 1418def ICXWriteResGroup101 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05]> { 1419 let Latency = 7; 1420 let NumMicroOps = 3; 1421 let ReleaseAtCycles = [1,1,1]; 1422} 1423def: InstRW<[ICXWriteResGroup101], (instrs FLDCW16m)>; 1424 1425def ICXWriteResGroup103 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort0156]> { 1426 let Latency = 7; 1427 let NumMicroOps = 3; 1428 let ReleaseAtCycles = [1,1,1]; 1429} 1430def: InstRW<[ICXWriteResGroup103], (instregex "KMOV(B|D|Q|W)km")>; 1431 1432def ICXWriteResGroup104 : SchedWriteRes<[ICXPort6,ICXPort23,ICXPort0156]> { 1433 let Latency = 7; 1434 let NumMicroOps = 3; 1435 let ReleaseAtCycles = [1,1,1]; 1436} 1437def: InstRW<[ICXWriteResGroup104], (instrs LRET64, RET64)>; 1438 1439def ICXWriteResGroup106 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> { 1440 let Latency = 7; 1441 let NumMicroOps = 4; 1442 let ReleaseAtCycles = [1,2,1]; 1443} 1444def: InstRW<[ICXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)", 1445 "VCOMPRESSPS(Z|Z128|Z256)mr(b?)", 1446 "VPCOMPRESSD(Z|Z128|Z256)mr(b?)", 1447 "VPCOMPRESSQ(Z|Z128|Z256)mr(b?)")>; 1448 1449def ICXWriteResGroup107 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> { 1450 let Latency = 7; 1451 let NumMicroOps = 5; 1452 let ReleaseAtCycles = [1,1,1,2]; 1453} 1454def: InstRW<[ICXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)", 1455 "ROR(8|16|32|64)m(1|i)")>; 1456 1457def ICXWriteResGroup107_1 : SchedWriteRes<[ICXPort06]> { 1458 let Latency = 2; 1459 let NumMicroOps = 2; 1460 let ReleaseAtCycles = [2]; 1461} 1462def: InstRW<[ICXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1, 1463 ROR8r1, ROR16r1, ROR32r1, ROR64r1)>; 1464 1465def ICXWriteResGroup108 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort0156]> { 1466 let Latency = 7; 1467 let NumMicroOps = 5; 1468 let ReleaseAtCycles = [1,1,1,2]; 1469} 1470def: InstRW<[ICXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>; 1471 1472def ICXWriteResGroup109 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> { 1473 let Latency = 7; 1474 let NumMicroOps = 5; 1475 let ReleaseAtCycles = [1,1,1,1,1]; 1476} 1477def: InstRW<[ICXWriteResGroup109], (instregex "CALL(16|32|64)m")>; 1478def: InstRW<[ICXWriteResGroup109], (instrs FARCALL64m)>; 1479 1480def ICXWriteResGroup110 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> { 1481 let Latency = 7; 1482 let NumMicroOps = 7; 1483 let ReleaseAtCycles = [1,2,2,2]; 1484} 1485def: InstRW<[ICXWriteResGroup110], (instrs VPSCATTERDQZ128mr, 1486 VPSCATTERQQZ128mr, 1487 VSCATTERDPDZ128mr, 1488 VSCATTERQPDZ128mr)>; 1489 1490def ICXWriteResGroup111 : SchedWriteRes<[ICXPort6,ICXPort06,ICXPort15,ICXPort0156]> { 1491 let Latency = 7; 1492 let NumMicroOps = 7; 1493 let ReleaseAtCycles = [1,3,1,2]; 1494} 1495def: InstRW<[ICXWriteResGroup111], (instrs LOOP)>; 1496 1497def ICXWriteResGroup112 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> { 1498 let Latency = 7; 1499 let NumMicroOps = 11; 1500 let ReleaseAtCycles = [1,4,4,2]; 1501} 1502def: InstRW<[ICXWriteResGroup112], (instrs VPSCATTERDQZ256mr, 1503 VPSCATTERQQZ256mr, 1504 VSCATTERDPDZ256mr, 1505 VSCATTERQPDZ256mr)>; 1506 1507def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> { 1508 let Latency = 7; 1509 let NumMicroOps = 19; 1510 let ReleaseAtCycles = [1,8,8,2]; 1511} 1512def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr, 1513 VPSCATTERQQZmr, 1514 VSCATTERDPDZmr, 1515 VSCATTERQPDZmr)>; 1516 1517def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> { 1518 let Latency = 7; 1519 let NumMicroOps = 36; 1520 let ReleaseAtCycles = [1,16,1,16,2]; 1521} 1522def: InstRW<[ICXWriteResGroup114], (instrs VSCATTERDPSZmr)>; 1523 1524def ICXWriteResGroup118 : SchedWriteRes<[ICXPort1,ICXPort23]> { 1525 let Latency = 8; 1526 let NumMicroOps = 2; 1527 let ReleaseAtCycles = [1,1]; 1528} 1529def: InstRW<[ICXWriteResGroup118], (instregex "PDEP(32|64)rm", 1530 "PEXT(32|64)rm")>; 1531 1532def ICXWriteResGroup119 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1533 let Latency = 8; 1534 let NumMicroOps = 2; 1535 let ReleaseAtCycles = [1,1]; 1536} 1537def: InstRW<[ICXWriteResGroup119], (instregex "FCOM(P?)(32|64)m", 1538 "VPBROADCASTB(Z|Z256)rm(b?)", 1539 "VPBROADCASTW(Z|Z256)rm(b?)", 1540 "(V?)PALIGNR(Y|Z256)rmi", 1541 "(V?)PERMIL(PD|PS)(Y|Z256)m(b?)i", 1542 "(V?)PERMIL(PD|PS)(Y|Z256)rm", 1543 "(V?)UNPCK(L|H)(PD|PS)(Y|Z256)rm")>; 1544def: InstRW<[ICXWriteResGroup119], (instrs VPBROADCASTBYrm, 1545 VPBROADCASTWYrm, 1546 VPMOVSXBDYrm, 1547 VPMOVSXBQYrm, 1548 VPMOVSXWQYrm)>; 1549 1550def ICXWriteResGroup121 : SchedWriteRes<[ICXPort23,ICXPort015]> { 1551 let Latency = 8; 1552 let NumMicroOps = 2; 1553 let ReleaseAtCycles = [1,1]; 1554} 1555def: InstRW<[ICXWriteResGroup121], (instrs VMOVNTDQAZ256rm, 1556 VPBLENDDYrmi)>; 1557def: InstRW<[ICXWriteResGroup121, ReadAfterVecYLd], 1558 (instregex "VBLENDMPD(Z|Z256)rm(b?)", 1559 "VBLENDMPS(Z|Z256)rm(b?)", 1560 "VBROADCASTF32X2Z256rm(b?)", 1561 "VBROADCASTF32X2Zrm(b?)", 1562 "VBROADCASTF32X4Z256rm(b?)", 1563 "VBROADCASTF32X4rm(b?)", 1564 "VBROADCASTF32X8rm(b?)", 1565 "VBROADCASTF64X2Z128rm(b?)", 1566 "VBROADCASTF64X2rm(b?)", 1567 "VBROADCASTF64X4rm(b?)", 1568 "VBROADCASTI32X2Z256rm(b?)", 1569 "VBROADCASTI32X2Zrm(b?)", 1570 "VBROADCASTI32X4Z256rm(b?)", 1571 "VBROADCASTI32X4rm(b?)", 1572 "VBROADCASTI32X8rm(b?)", 1573 "VBROADCASTI64X2Z128rm(b?)", 1574 "VBROADCASTI64X2rm(b?)", 1575 "VBROADCASTI64X4rm(b?)", 1576 "VBROADCASTSD(Z|Z256)rm(b?)", 1577 "VBROADCASTSS(Z|Z256)rm(b?)", 1578 "VINSERTF32x4(Z|Z256)rm(b?)", 1579 "VINSERTF32x8Zrm(b?)", 1580 "VINSERTF64x2(Z|Z256)rm(b?)", 1581 "VINSERTF64x4Zrm(b?)", 1582 "VINSERTI32x4(Z|Z256)rm(b?)", 1583 "VINSERTI32x8Zrm(b?)", 1584 "VINSERTI64x2(Z|Z256)rm(b?)", 1585 "VINSERTI64x4Zrm(b?)", 1586 "VMOVAPD(Z|Z256)rm(b?)", 1587 "VMOVAPS(Z|Z256)rm(b?)", 1588 "VMOVDDUP(Z|Z256)rm(b?)", 1589 "VMOVDQA32(Z|Z256)rm(b?)", 1590 "VMOVDQA64(Z|Z256)rm(b?)", 1591 "VMOVDQU16(Z|Z256)rm(b?)", 1592 "VMOVDQU32(Z|Z256)rm(b?)", 1593 "VMOVDQU64(Z|Z256)rm(b?)", 1594 "VMOVDQU8(Z|Z256)rm(b?)", 1595 "VMOVSHDUP(Z|Z256)rm(b?)", 1596 "VMOVSLDUP(Z|Z256)rm(b?)", 1597 "VMOVUPD(Z|Z256)rm(b?)", 1598 "VMOVUPS(Z|Z256)rm(b?)", 1599 "VPADD(B|D|Q|W)Yrm", 1600 "VPADD(B|D|Q|W)(Z|Z256)rm(b?)", 1601 "VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)", 1602 "VPBROADCASTD(Z|Z256)rm(b?)", 1603 "VPBROADCASTQ(Z|Z256)rm(b?)", 1604 "VPSUB(B|D|Q|W)Yrm", 1605 "VPSUB(B|D|Q|W)(Z|Z256)rm(b?)", 1606 "VPTERNLOGD(Z|Z256)rm(b?)i", 1607 "VPTERNLOGQ(Z|Z256)rm(b?)i")>; 1608 1609def ICXWriteResGroup123 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { 1610 let Latency = 8; 1611 let NumMicroOps = 4; 1612 let ReleaseAtCycles = [1,2,1]; 1613} 1614def: InstRW<[ICXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>; 1615 1616def ICXWriteResGroup127 : SchedWriteRes<[ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> { 1617 let Latency = 8; 1618 let NumMicroOps = 5; 1619 let ReleaseAtCycles = [1,1,1,2]; 1620} 1621def: InstRW<[ICXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)", 1622 "RCR(8|16|32|64)m(1|i)")>; 1623 1624def ICXWriteResGroup128 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> { 1625 let Latency = 8; 1626 let NumMicroOps = 6; 1627 let ReleaseAtCycles = [1,1,1,3]; 1628} 1629def: InstRW<[ICXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL", 1630 "ROR(8|16|32|64)mCL", 1631 "SAR(8|16|32|64)mCL", 1632 "SHL(8|16|32|64)mCL", 1633 "SHR(8|16|32|64)mCL")>; 1634 1635def ICXWriteResGroup130 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> { 1636 let Latency = 8; 1637 let NumMicroOps = 6; 1638 let ReleaseAtCycles = [1,1,1,2,1]; 1639} 1640def: SchedAlias<WriteADCRMW, ICXWriteResGroup130>; 1641 1642def ICXWriteResGroup131 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> { 1643 let Latency = 8; 1644 let NumMicroOps = 8; 1645 let ReleaseAtCycles = [1,2,1,2,2]; 1646} 1647def: InstRW<[ICXWriteResGroup131], (instrs VPSCATTERQDZ128mr, 1648 VPSCATTERQDZ256mr, 1649 VSCATTERQPSZ128mr, 1650 VSCATTERQPSZ256mr)>; 1651 1652def ICXWriteResGroup132 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> { 1653 let Latency = 8; 1654 let NumMicroOps = 12; 1655 let ReleaseAtCycles = [1,4,1,4,2]; 1656} 1657def: InstRW<[ICXWriteResGroup132], (instrs VPSCATTERDDZ128mr, 1658 VSCATTERDPSZ128mr)>; 1659 1660def ICXWriteResGroup133 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> { 1661 let Latency = 8; 1662 let NumMicroOps = 20; 1663 let ReleaseAtCycles = [1,8,1,8,2]; 1664} 1665def: InstRW<[ICXWriteResGroup133], (instrs VPSCATTERDDZ256mr, 1666 VSCATTERDPSZ256mr)>; 1667 1668def ICXWriteResGroup134 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> { 1669 let Latency = 8; 1670 let NumMicroOps = 36; 1671 let ReleaseAtCycles = [1,16,1,16,2]; 1672} 1673def: InstRW<[ICXWriteResGroup134], (instrs VPSCATTERDDZmr)>; 1674 1675def ICXWriteResGroup135 : SchedWriteRes<[ICXPort0,ICXPort23]> { 1676 let Latency = 9; 1677 let NumMicroOps = 2; 1678 let ReleaseAtCycles = [1,1]; 1679} 1680def: InstRW<[ICXWriteResGroup135], (instrs MMX_CVTPI2PSrm)>; 1681 1682def ICXWriteResGroup136 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1683 let Latency = 9; 1684 let NumMicroOps = 2; 1685 let ReleaseAtCycles = [1,1]; 1686} 1687def: InstRW<[ICXWriteResGroup136], (instrs VPMOVSXBWYrm, 1688 VPMOVSXDQYrm, 1689 VPMOVSXWDYrm, 1690 VPMOVZXWDYrm)>; 1691def: InstRW<[ICXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i", 1692 "VFPCLASSSDZrm(b?)", 1693 "VFPCLASSSSZrm(b?)", 1694 "(V?)PCMPGTQrm", 1695 "VPERMI2DZ128rm(b?)", 1696 "VPERMI2PDZ128rm(b?)", 1697 "VPERMI2PSZ128rm(b?)", 1698 "VPERMI2QZ128rm(b?)", 1699 "VPERMT2DZ128rm(b?)", 1700 "VPERMT2PDZ128rm(b?)", 1701 "VPERMT2PSZ128rm(b?)", 1702 "VPERMT2QZ128rm(b?)", 1703 "VPMAXSQZ128rm(b?)", 1704 "VPMAXUQZ128rm(b?)", 1705 "VPMINSQZ128rm(b?)", 1706 "VPMINUQZ128rm(b?)")>; 1707 1708def ICXWriteResGroup136_2 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1709 let Latency = 10; 1710 let NumMicroOps = 2; 1711 let ReleaseAtCycles = [1,1]; 1712} 1713def: InstRW<[ICXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i", 1714 "VCMP(SD|SS)Zrm", 1715 "VFPCLASSPDZ128rm(b?)", 1716 "VFPCLASSPSZ128rm(b?)", 1717 "VPCMPBZ128rmi(b?)", 1718 "VPCMPDZ128rmi(b?)", 1719 "VPCMPEQ(B|D|Q|W)Z128rm(b?)", 1720 "VPCMPGT(B|D|Q|W)Z128rm(b?)", 1721 "VPCMPQZ128rmi(b?)", 1722 "VPCMPU(B|D|Q|W)Z128rmi(b?)", 1723 "VPCMPWZ128rmi(b?)", 1724 "(V?)PACK(U|S)S(DW|WB)(Z128)?rm", 1725 "VPTESTMBZ128rm(b?)", 1726 "VPTESTMDZ128rm(b?)", 1727 "VPTESTMQZ128rm(b?)", 1728 "VPTESTMWZ128rm(b?)", 1729 "VPTESTNMBZ128rm(b?)", 1730 "VPTESTNMDZ128rm(b?)", 1731 "VPTESTNMQZ128rm(b?)", 1732 "VPTESTNMWZ128rm(b?)")>; 1733 1734def ICXWriteResGroup137 : SchedWriteRes<[ICXPort23,ICXPort01]> { 1735 let Latency = 9; 1736 let NumMicroOps = 2; 1737 let ReleaseAtCycles = [1,1]; 1738} 1739def: InstRW<[ICXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm", 1740 "(V?)CVTPS2PDrm")>; 1741 1742def ICXWriteResGroup143 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> { 1743 let Latency = 9; 1744 let NumMicroOps = 4; 1745 let ReleaseAtCycles = [2,1,1]; 1746} 1747def: InstRW<[ICXWriteResGroup143], (instregex "(V?)PHADDSWrm", 1748 "(V?)PHSUBSWrm")>; 1749 1750def ICXWriteResGroup146 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> { 1751 let Latency = 9; 1752 let NumMicroOps = 5; 1753 let ReleaseAtCycles = [1,2,1,1]; 1754} 1755def: InstRW<[ICXWriteResGroup146], (instregex "LAR(16|32|64)rm", 1756 "LSL(16|32|64)rm")>; 1757 1758def ICXWriteResGroup148 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1759 let Latency = 10; 1760 let NumMicroOps = 2; 1761 let ReleaseAtCycles = [1,1]; 1762} 1763def: InstRW<[ICXWriteResGroup148], (instrs VPCMPGTQYrm)>; 1764def: InstRW<[ICXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m", 1765 "ILD_F(16|32|64)m", 1766 "VALIGND(Z|Z256)rm(b?)i", 1767 "VALIGNQ(Z|Z256)rm(b?)i", 1768 "VPMAXSQ(Z|Z256)rm(b?)", 1769 "VPMAXUQ(Z|Z256)rm(b?)", 1770 "VPMINSQ(Z|Z256)rm(b?)", 1771 "VPMINUQ(Z|Z256)rm(b?)")>; 1772 1773def ICXWriteResGroup148_2 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1774 let Latency = 11; 1775 let NumMicroOps = 2; 1776 let ReleaseAtCycles = [1,1]; 1777} 1778def: InstRW<[ICXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i", 1779 "VCMPPS(Z|Z256)rm(b?)i", 1780 "VFPCLASSPD(Z|Z256)rm(b?)", 1781 "VFPCLASSPS(Z|Z256)rm(b?)", 1782 "VPCMPB(Z|Z256)rmi(b?)", 1783 "VPCMPD(Z|Z256)rmi(b?)", 1784 "VPCMPEQB(Z|Z256)rm(b?)", 1785 "VPCMPEQD(Z|Z256)rm(b?)", 1786 "VPCMPEQQ(Z|Z256)rm(b?)", 1787 "VPCMPEQW(Z|Z256)rm(b?)", 1788 "VPCMPGTB(Z|Z256)rm(b?)", 1789 "VPCMPGTD(Z|Z256)rm(b?)", 1790 "VPCMPGTQ(Z|Z256)rm(b?)", 1791 "VPCMPGTW(Z|Z256)rm(b?)", 1792 "VPCMPQ(Z|Z256)rmi(b?)", 1793 "VPCMPU(B|D|Q|W)Z256rmi(b?)", 1794 "VPCMPU(B|D|Q|W)Zrmi(b?)", 1795 "VPCMPW(Z|Z256)rmi(b?)", 1796 "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z256)rm", 1797 "VPTESTM(B|D|Q|W)Z256rm(b?)", 1798 "VPTESTM(B|D|Q|W)Zrm(b?)", 1799 "VPTESTNM(B|D|Q|W)Z256rm(b?)", 1800 "VPTESTNM(B|D|Q|W)Zrm(b?)")>; 1801 1802def ICXWriteResGroup149 : SchedWriteRes<[ICXPort23,ICXPort01]> { 1803 let Latency = 10; 1804 let NumMicroOps = 2; 1805 let ReleaseAtCycles = [1,1]; 1806} 1807def: InstRW<[ICXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)", 1808 "VCVTDQ2PSZ128rm(b?)", 1809 "(V?)CVTDQ2PSrm", 1810 "VCVTPD2QQZ128rm(b?)", 1811 "VCVTPD2UQQZ128rm(b?)", 1812 "VCVTPH2PSZ128rm(b?)", 1813 "VCVTPS2DQZ128rm(b?)", 1814 "(V?)CVTPS2DQrm", 1815 "VCVTPS2PDZ128rm(b?)", 1816 "VCVTPS2QQZ128rm(b?)", 1817 "VCVTPS2UDQZ128rm(b?)", 1818 "VCVTPS2UQQZ128rm(b?)", 1819 "VCVTQQ2PDZ128rm(b?)", 1820 "VCVTQQ2PSZ128rm(b?)", 1821 "VCVTSS2SDZrm", 1822 "(V?)CVTSS2SDrm", 1823 "VCVTTPD2QQZ128rm(b?)", 1824 "VCVTTPD2UQQZ128rm(b?)", 1825 "VCVTTPS2DQZ128rm(b?)", 1826 "(V?)CVTTPS2DQrm", 1827 "VCVTTPS2QQZ128rm(b?)", 1828 "VCVTTPS2UDQZ128rm(b?)", 1829 "VCVTTPS2UQQZ128rm(b?)", 1830 "VCVTUDQ2PDZ128rm(b?)", 1831 "VCVTUDQ2PSZ128rm(b?)", 1832 "VCVTUQQ2PDZ128rm(b?)", 1833 "VCVTUQQ2PSZ128rm(b?)")>; 1834 1835def ICXWriteResGroup151 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1836 let Latency = 10; 1837 let NumMicroOps = 3; 1838 let ReleaseAtCycles = [2,1]; 1839} 1840def: InstRW<[ICXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)", 1841 "VEXPANDPSZ128rm(b?)", 1842 "VPEXPANDDZ128rm(b?)", 1843 "VPEXPANDQZ128rm(b?)")>; 1844 1845def ICXWriteResGroup154 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> { 1846 let Latency = 10; 1847 let NumMicroOps = 4; 1848 let ReleaseAtCycles = [2,1,1]; 1849} 1850def: InstRW<[ICXWriteResGroup154], (instrs VPHADDSWYrm, 1851 VPHSUBSWYrm)>; 1852 1853def ICXWriteResGroup157 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> { 1854 let Latency = 10; 1855 let NumMicroOps = 8; 1856 let ReleaseAtCycles = [1,1,1,1,1,3]; 1857} 1858def: InstRW<[ICXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>; 1859 1860def ICXWriteResGroup160 : SchedWriteRes<[ICXPort0,ICXPort23]> { 1861 let Latency = 11; 1862 let NumMicroOps = 2; 1863 let ReleaseAtCycles = [1,1]; 1864} 1865def: InstRW<[ICXWriteResGroup160], (instregex "MUL_F(32|64)m")>; 1866 1867def ICXWriteResGroup161 : SchedWriteRes<[ICXPort23,ICXPort01]> { 1868 let Latency = 11; 1869 let NumMicroOps = 2; 1870 let ReleaseAtCycles = [1,1]; 1871} 1872def: InstRW<[ICXWriteResGroup161], (instrs VCVTDQ2PSYrm, 1873 VCVTPS2PDYrm)>; 1874def: InstRW<[ICXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)", 1875 "VCVTPH2PS(Z|Z256)rm(b?)", 1876 "VCVTPS2PD(Z|Z256)rm(b?)", 1877 "VCVTQQ2PD(Z|Z256)rm(b?)", 1878 "VCVTQQ2PSZ256rm(b?)", 1879 "VCVT(T?)PD2QQ(Z|Z256)rm(b?)", 1880 "VCVT(T?)PD2UQQ(Z|Z256)rm(b?)", 1881 "VCVT(T?)PS2DQYrm", 1882 "VCVT(T?)PS2DQ(Z|Z256)rm(b?)", 1883 "VCVT(T?)PS2QQZ256rm(b?)", 1884 "VCVT(T?)PS2UDQ(Z|Z256)rm(b?)", 1885 "VCVT(T?)PS2UQQZ256rm(b?)", 1886 "VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)", 1887 "VCVTUQQ2PD(Z|Z256)rm(b?)", 1888 "VCVTUQQ2PSZ256rm(b?)")>; 1889 1890def ICXWriteResGroup162 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1891 let Latency = 11; 1892 let NumMicroOps = 3; 1893 let ReleaseAtCycles = [2,1]; 1894} 1895def: InstRW<[ICXWriteResGroup162], (instregex "FICOM(P?)(16|32)m", 1896 "VEXPANDPD(Z|Z256)rm(b?)", 1897 "VEXPANDPS(Z|Z256)rm(b?)", 1898 "VPEXPANDD(Z|Z256)rm(b?)", 1899 "VPEXPANDQ(Z|Z256)rm(b?)")>; 1900 1901def ICXWriteResGroup164 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { 1902 let Latency = 11; 1903 let NumMicroOps = 3; 1904 let ReleaseAtCycles = [1,1,1]; 1905} 1906def: InstRW<[ICXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>; 1907 1908def ICXWriteResGroup166 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> { 1909 let Latency = 11; 1910 let NumMicroOps = 3; 1911 let ReleaseAtCycles = [1,1,1]; 1912} 1913def: InstRW<[ICXWriteResGroup166], (instrs CVTPD2DQrm, 1914 CVTTPD2DQrm, 1915 MMX_CVTPD2PIrm, 1916 MMX_CVTTPD2PIrm)>; 1917 1918def ICXWriteResGroup167 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { 1919 let Latency = 11; 1920 let NumMicroOps = 4; 1921 let ReleaseAtCycles = [2,1,1]; 1922} 1923def: InstRW<[ICXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>; 1924 1925def ICXWriteResGroup169 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> { 1926 let Latency = 11; 1927 let NumMicroOps = 7; 1928 let ReleaseAtCycles = [2,3,2]; 1929} 1930def: InstRW<[ICXWriteResGroup169], (instregex "RCL(16|32|64)rCL", 1931 "RCR(16|32|64)rCL")>; 1932 1933def ICXWriteResGroup170 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> { 1934 let Latency = 11; 1935 let NumMicroOps = 9; 1936 let ReleaseAtCycles = [1,5,1,2]; 1937} 1938def: InstRW<[ICXWriteResGroup170], (instrs RCL8rCL)>; 1939 1940def ICXWriteResGroup171 : SchedWriteRes<[ICXPort06,ICXPort0156]> { 1941 let Latency = 11; 1942 let NumMicroOps = 11; 1943 let ReleaseAtCycles = [2,9]; 1944} 1945def: InstRW<[ICXWriteResGroup171], (instrs LOOPE, LOOPNE)>; 1946 1947def ICXWriteResGroup174 : SchedWriteRes<[ICXPort01]> { 1948 let Latency = 15; 1949 let NumMicroOps = 3; 1950 let ReleaseAtCycles = [3]; 1951} 1952def: InstRW<[ICXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>; 1953 1954def ICXWriteResGroup174z : SchedWriteRes<[ICXPort0]> { 1955 let Latency = 15; 1956 let NumMicroOps = 3; 1957 let ReleaseAtCycles = [3]; 1958} 1959def: InstRW<[ICXWriteResGroup174z], (instregex "VPMULLQZrr")>; 1960 1961def ICXWriteResGroup175 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1962 let Latency = 12; 1963 let NumMicroOps = 3; 1964 let ReleaseAtCycles = [2,1]; 1965} 1966def: InstRW<[ICXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>; 1967 1968def ICXWriteResGroup176 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort01]> { 1969 let Latency = 12; 1970 let NumMicroOps = 3; 1971 let ReleaseAtCycles = [1,1,1]; 1972} 1973def: InstRW<[ICXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)", 1974 "VCVT(T?)SS2USI64Zrm(b?)")>; 1975 1976def ICXWriteResGroup177 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> { 1977 let Latency = 12; 1978 let NumMicroOps = 3; 1979 let ReleaseAtCycles = [1,1,1]; 1980} 1981def: InstRW<[ICXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)", 1982 "VCVT(T?)PS2UQQZrm(b?)")>; 1983 1984def ICXWriteResGroup180 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1985 let Latency = 13; 1986 let NumMicroOps = 3; 1987 let ReleaseAtCycles = [2,1]; 1988} 1989def: InstRW<[ICXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m", 1990 "VPERMWZ256rm(b?)", 1991 "VPERMWZrm(b?)")>; 1992 1993def ICXWriteResGroup181 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { 1994 let Latency = 13; 1995 let NumMicroOps = 3; 1996 let ReleaseAtCycles = [1,1,1]; 1997} 1998def: InstRW<[ICXWriteResGroup181], (instrs VCVTDQ2PDYrm)>; 1999 2000def ICXWriteResGroup183 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { 2001 let Latency = 13; 2002 let NumMicroOps = 4; 2003 let ReleaseAtCycles = [2,1,1]; 2004} 2005def: InstRW<[ICXWriteResGroup183], (instregex "VPERMI2WZ128rm(b?)", 2006 "VPERMT2WZ128rm(b?)")>; 2007 2008def ICXWriteResGroup187 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { 2009 let Latency = 14; 2010 let NumMicroOps = 3; 2011 let ReleaseAtCycles = [1,1,1]; 2012} 2013def: InstRW<[ICXWriteResGroup187], (instregex "MUL_FI(16|32)m")>; 2014 2015def ICXWriteResGroup188 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> { 2016 let Latency = 14; 2017 let NumMicroOps = 3; 2018 let ReleaseAtCycles = [1,1,1]; 2019} 2020def: InstRW<[ICXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)", 2021 "VCVTPD2UDQZrm(b?)", 2022 "VCVTQQ2PSZrm(b?)", 2023 "VCVTTPD2DQZrm(b?)", 2024 "VCVTTPD2UDQZrm(b?)", 2025 "VCVTUQQ2PSZrm(b?)")>; 2026 2027def ICXWriteResGroup189 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { 2028 let Latency = 14; 2029 let NumMicroOps = 4; 2030 let ReleaseAtCycles = [2,1,1]; 2031} 2032def: InstRW<[ICXWriteResGroup189], (instregex "VPERMI2WZ256rm(b?)", 2033 "VPERMI2WZrm(b?)", 2034 "VPERMT2WZ256rm(b?)", 2035 "VPERMT2WZrm(b?)")>; 2036 2037def ICXWriteResGroup190 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> { 2038 let Latency = 14; 2039 let NumMicroOps = 10; 2040 let ReleaseAtCycles = [2,4,1,3]; 2041} 2042def: InstRW<[ICXWriteResGroup190], (instrs RCR8rCL)>; 2043 2044def ICXWriteResGroup191 : SchedWriteRes<[ICXPort0]> { 2045 let Latency = 15; 2046 let NumMicroOps = 1; 2047 let ReleaseAtCycles = [1]; 2048} 2049def: InstRW<[ICXWriteResGroup191], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>; 2050 2051def ICXWriteResGroup194 : SchedWriteRes<[ICXPort1,ICXPort5,ICXPort01,ICXPort23,ICXPort015]> { 2052 let Latency = 15; 2053 let NumMicroOps = 8; 2054 let ReleaseAtCycles = [1,2,2,1,2]; 2055} 2056def: InstRW<[ICXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>; 2057 2058def ICXWriteResGroup195 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> { 2059 let Latency = 15; 2060 let NumMicroOps = 10; 2061 let ReleaseAtCycles = [1,1,1,5,1,1]; 2062} 2063def: InstRW<[ICXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>; 2064 2065def ICXWriteResGroup199 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> { 2066 let Latency = 16; 2067 let NumMicroOps = 14; 2068 let ReleaseAtCycles = [1,1,1,4,2,5]; 2069} 2070def: InstRW<[ICXWriteResGroup199], (instrs CMPXCHG8B)>; 2071 2072def ICXWriteResGroup200 : SchedWriteRes<[ICXPort1, ICXPort05, ICXPort6]> { 2073 let Latency = 12; 2074 let NumMicroOps = 34; 2075 let ReleaseAtCycles = [1, 4, 5]; 2076} 2077def: InstRW<[ICXWriteResGroup200], (instrs VZEROALL)>; 2078 2079def ICXWriteResGroup202 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156]> { 2080 let Latency = 17; 2081 let NumMicroOps = 15; 2082 let ReleaseAtCycles = [2,1,2,4,2,4]; 2083} 2084def: InstRW<[ICXWriteResGroup202], (instrs XCH_F)>; 2085 2086def ICXWriteResGroup205 : SchedWriteRes<[ICXPort23,ICXPort01]> { 2087 let Latency = 21; 2088 let NumMicroOps = 4; 2089 let ReleaseAtCycles = [1,3]; 2090} 2091def: InstRW<[ICXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)")>; 2092 2093def ICXWriteResGroup207 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort06,ICXPort0156]> { 2094 let Latency = 18; 2095 let NumMicroOps = 8; 2096 let ReleaseAtCycles = [1,1,1,5]; 2097} 2098def: InstRW<[ICXWriteResGroup207], (instrs CPUID, RDTSC)>; 2099 2100def ICXWriteResGroup208 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> { 2101 let Latency = 18; 2102 let NumMicroOps = 11; 2103 let ReleaseAtCycles = [2,1,1,4,1,2]; 2104} 2105def: InstRW<[ICXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>; 2106 2107def ICXWriteResGroup211 : SchedWriteRes<[ICXPort23,ICXPort01]> { 2108 let Latency = 22; 2109 let NumMicroOps = 4; 2110 let ReleaseAtCycles = [1,3]; 2111} 2112def: InstRW<[ICXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>; 2113 2114def ICXWriteResGroup211_1 : SchedWriteRes<[ICXPort23,ICXPort0]> { 2115 let Latency = 22; 2116 let NumMicroOps = 4; 2117 let ReleaseAtCycles = [1,3]; 2118} 2119def: InstRW<[ICXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>; 2120 2121def ICXWriteResGroup215 : SchedWriteRes<[ICXPort0]> { 2122 let Latency = 20; 2123 let NumMicroOps = 1; 2124 let ReleaseAtCycles = [1]; 2125} 2126def: InstRW<[ICXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>; 2127 2128def ICXWriteGatherEVEX2 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> { 2129 let Latency = 17; 2130 let NumMicroOps = 5; // 2 uops perform multiple loads 2131 let ReleaseAtCycles = [1,2,1,1]; 2132} 2133def: InstRW<[ICXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm, 2134 VGATHERDPDZ128rm, VPGATHERDQZ128rm, 2135 VGATHERQPDZ128rm, VPGATHERQQZ128rm)>; 2136 2137def ICXWriteGatherEVEX4 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> { 2138 let Latency = 19; 2139 let NumMicroOps = 5; // 2 uops perform multiple loads 2140 let ReleaseAtCycles = [1,4,1,1]; 2141} 2142def: InstRW<[ICXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm, 2143 VGATHERQPDZ256rm, VPGATHERQQZ256rm, 2144 VGATHERDPSZ128rm, VPGATHERDDZ128rm, 2145 VGATHERDPDZ256rm, VPGATHERDQZ256rm)>; 2146 2147def ICXWriteGatherEVEX8 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> { 2148 let Latency = 21; 2149 let NumMicroOps = 5; // 2 uops perform multiple loads 2150 let ReleaseAtCycles = [1,8,1,1]; 2151} 2152def: InstRW<[ICXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm, 2153 VGATHERDPDZrm, VPGATHERDQZrm, 2154 VGATHERQPDZrm, VPGATHERQQZrm, 2155 VGATHERQPSZrm, VPGATHERQDZrm)>; 2156 2157def ICXWriteGatherEVEX16 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> { 2158 let Latency = 25; 2159 let NumMicroOps = 5; // 2 uops perform multiple loads 2160 let ReleaseAtCycles = [1,16,1,1]; 2161} 2162def: InstRW<[ICXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>; 2163 2164def ICXWriteResGroup219 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> { 2165 let Latency = 20; 2166 let NumMicroOps = 8; 2167 let ReleaseAtCycles = [1,1,1,1,1,1,2]; 2168} 2169def: InstRW<[ICXWriteResGroup219], (instrs INSB, INSL, INSW)>; 2170 2171def ICXWriteResGroup220 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort0156]> { 2172 let Latency = 20; 2173 let NumMicroOps = 10; 2174 let ReleaseAtCycles = [1,2,7]; 2175} 2176def: InstRW<[ICXWriteResGroup220], (instrs MWAITrr)>; 2177 2178def ICXWriteResGroup223 : SchedWriteRes<[ICXPort0,ICXPort23]> { 2179 let Latency = 22; 2180 let NumMicroOps = 2; 2181 let ReleaseAtCycles = [1,1]; 2182} 2183def: InstRW<[ICXWriteResGroup223], (instregex "DIV_F(32|64)m")>; 2184 2185def ICXWriteResGroupVEX2 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> { 2186 let Latency = 18; 2187 let NumMicroOps = 5; // 2 uops perform multiple loads 2188 let ReleaseAtCycles = [1,2,1,1]; 2189} 2190def: InstRW<[ICXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm, 2191 VGATHERQPDrm, VPGATHERQQrm, 2192 VGATHERQPSrm, VPGATHERQDrm)>; 2193 2194def ICXWriteResGroupVEX4 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> { 2195 let Latency = 20; 2196 let NumMicroOps = 5; // 2 uops peform multiple loads 2197 let ReleaseAtCycles = [1,4,1,1]; 2198} 2199def: InstRW<[ICXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm, 2200 VGATHERDPSrm, VPGATHERDDrm, 2201 VGATHERQPDYrm, VPGATHERQQYrm, 2202 VGATHERQPSYrm, VPGATHERQDYrm)>; 2203 2204def ICXWriteResGroupVEX8 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> { 2205 let Latency = 22; 2206 let NumMicroOps = 5; // 2 uops perform multiple loads 2207 let ReleaseAtCycles = [1,8,1,1]; 2208} 2209def: InstRW<[ICXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; 2210 2211def ICXWriteResGroup225 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> { 2212 let Latency = 22; 2213 let NumMicroOps = 14; 2214 let ReleaseAtCycles = [5,5,4]; 2215} 2216def: InstRW<[ICXWriteResGroup225], (instregex "VPCONFLICTDZ128rr", 2217 "VPCONFLICTQZ256rr")>; 2218 2219def ICXWriteResGroup228 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> { 2220 let Latency = 23; 2221 let NumMicroOps = 19; 2222 let ReleaseAtCycles = [2,1,4,1,1,4,6]; 2223} 2224def: InstRW<[ICXWriteResGroup228], (instrs CMPXCHG16B)>; 2225 2226def ICXWriteResGroup233 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { 2227 let Latency = 25; 2228 let NumMicroOps = 3; 2229 let ReleaseAtCycles = [1,1,1]; 2230} 2231def: InstRW<[ICXWriteResGroup233], (instregex "DIV_FI(16|32)m")>; 2232 2233def ICXWriteResGroup239 : SchedWriteRes<[ICXPort0,ICXPort23]> { 2234 let Latency = 27; 2235 let NumMicroOps = 2; 2236 let ReleaseAtCycles = [1,1]; 2237} 2238def: InstRW<[ICXWriteResGroup239], (instregex "DIVR_F(32|64)m")>; 2239 2240def ICXWriteResGroup242 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> { 2241 let Latency = 29; 2242 let NumMicroOps = 15; 2243 let ReleaseAtCycles = [5,5,1,4]; 2244} 2245def: InstRW<[ICXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)")>; 2246 2247def ICXWriteResGroup243 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { 2248 let Latency = 30; 2249 let NumMicroOps = 3; 2250 let ReleaseAtCycles = [1,1,1]; 2251} 2252def: InstRW<[ICXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>; 2253 2254def ICXWriteResGroup247 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort06,ICXPort0156]> { 2255 let Latency = 35; 2256 let NumMicroOps = 23; 2257 let ReleaseAtCycles = [1,5,3,4,10]; 2258} 2259def: InstRW<[ICXWriteResGroup247], (instregex "IN(8|16|32)ri", 2260 "IN(8|16|32)rr")>; 2261 2262def ICXWriteResGroup248 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> { 2263 let Latency = 35; 2264 let NumMicroOps = 23; 2265 let ReleaseAtCycles = [1,5,2,1,4,10]; 2266} 2267def: InstRW<[ICXWriteResGroup248], (instregex "OUT(8|16|32)ir", 2268 "OUT(8|16|32)rr")>; 2269 2270def ICXWriteResGroup249 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> { 2271 let Latency = 37; 2272 let NumMicroOps = 21; 2273 let ReleaseAtCycles = [9,7,5]; 2274} 2275def: InstRW<[ICXWriteResGroup249], (instregex "VPCONFLICTDZ256rr", 2276 "VPCONFLICTQZrr")>; 2277 2278def ICXWriteResGroup250 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> { 2279 let Latency = 37; 2280 let NumMicroOps = 31; 2281 let ReleaseAtCycles = [1,8,1,21]; 2282} 2283def: InstRW<[ICXWriteResGroup250], (instregex "XRSTOR(64)?")>; 2284 2285def ICXWriteResGroup252 : SchedWriteRes<[ICXPort1,ICXPort49,ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort15,ICXPort0156]> { 2286 let Latency = 40; 2287 let NumMicroOps = 18; 2288 let ReleaseAtCycles = [1,1,2,3,1,1,1,8]; 2289} 2290def: InstRW<[ICXWriteResGroup252], (instrs VMCLEARm)>; 2291 2292def ICXWriteResGroup253 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> { 2293 let Latency = 41; 2294 let NumMicroOps = 39; 2295 let ReleaseAtCycles = [1,10,1,1,26]; 2296} 2297def: InstRW<[ICXWriteResGroup253], (instrs XSAVE64)>; 2298 2299def ICXWriteResGroup254 : SchedWriteRes<[ICXPort5,ICXPort0156]> { 2300 let Latency = 42; 2301 let NumMicroOps = 22; 2302 let ReleaseAtCycles = [2,20]; 2303} 2304def: InstRW<[ICXWriteResGroup254], (instrs RDTSCP)>; 2305 2306def ICXWriteResGroup255 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> { 2307 let Latency = 42; 2308 let NumMicroOps = 40; 2309 let ReleaseAtCycles = [1,11,1,1,26]; 2310} 2311def: InstRW<[ICXWriteResGroup255], (instrs XSAVE)>; 2312def: InstRW<[ICXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>; 2313 2314def ICXWriteResGroup256 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> { 2315 let Latency = 44; 2316 let NumMicroOps = 22; 2317 let ReleaseAtCycles = [9,7,1,5]; 2318} 2319def: InstRW<[ICXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)", 2320 "VPCONFLICTQZrm(b?)")>; 2321 2322def ICXWriteResGroup258 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05,ICXPort06,ICXPort0156]> { 2323 let Latency = 62; 2324 let NumMicroOps = 64; 2325 let ReleaseAtCycles = [2,8,5,10,39]; 2326} 2327def: InstRW<[ICXWriteResGroup258], (instrs FLDENVm)>; 2328 2329def ICXWriteResGroup259 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> { 2330 let Latency = 63; 2331 let NumMicroOps = 88; 2332 let ReleaseAtCycles = [4,4,31,1,2,1,45]; 2333} 2334def: InstRW<[ICXWriteResGroup259], (instrs FXRSTOR64)>; 2335 2336def ICXWriteResGroup260 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> { 2337 let Latency = 63; 2338 let NumMicroOps = 90; 2339 let ReleaseAtCycles = [4,2,33,1,2,1,47]; 2340} 2341def: InstRW<[ICXWriteResGroup260], (instrs FXRSTOR)>; 2342 2343def ICXWriteResGroup261 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> { 2344 let Latency = 67; 2345 let NumMicroOps = 35; 2346 let ReleaseAtCycles = [17,11,7]; 2347} 2348def: InstRW<[ICXWriteResGroup261], (instregex "VPCONFLICTDZrr")>; 2349 2350def ICXWriteResGroup262 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> { 2351 let Latency = 74; 2352 let NumMicroOps = 36; 2353 let ReleaseAtCycles = [17,11,1,7]; 2354} 2355def: InstRW<[ICXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)")>; 2356 2357def ICXWriteResGroup263 : SchedWriteRes<[ICXPort5,ICXPort05,ICXPort0156]> { 2358 let Latency = 75; 2359 let NumMicroOps = 15; 2360 let ReleaseAtCycles = [6,3,6]; 2361} 2362def: InstRW<[ICXWriteResGroup263], (instrs FNINIT)>; 2363 2364def ICXWriteResGroup266 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort49,ICXPort5,ICXPort6,ICXPort78,ICXPort06,ICXPort0156]> { 2365 let Latency = 106; 2366 let NumMicroOps = 100; 2367 let ReleaseAtCycles = [9,1,11,16,1,11,21,30]; 2368} 2369def: InstRW<[ICXWriteResGroup266], (instrs FSTENVm)>; 2370 2371def ICXWriteResGroup267 : SchedWriteRes<[ICXPort6,ICXPort0156]> { 2372 let Latency = 140; 2373 let NumMicroOps = 4; 2374 let ReleaseAtCycles = [1,3]; 2375} 2376def: InstRW<[ICXWriteResGroup267], (instrs PAUSE)>; 2377 2378def: InstRW<[WriteZero], (instrs CLC)>; 2379 2380 2381// Instruction variants handled by the renamer. These might not need execution 2382// ports in certain conditions. 2383// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", 2384// section "Skylake Pipeline" > "Register allocation and renaming". 2385// These can be investigated with llvm-exegesis, e.g. 2386// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 2387// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 2388 2389def ICXWriteZeroLatency : SchedWriteRes<[]> { 2390 let Latency = 0; 2391} 2392 2393def ICXWriteZeroIdiom : SchedWriteVariant<[ 2394 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2395 SchedVar<NoSchedPred, [WriteALU]> 2396]>; 2397def : InstRW<[ICXWriteZeroIdiom], (instrs SUB32rr, SUB64rr, 2398 XOR32rr, XOR64rr)>; 2399 2400def ICXWriteFZeroIdiom : SchedWriteVariant<[ 2401 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2402 SchedVar<NoSchedPred, [WriteFLogic]> 2403]>; 2404def : InstRW<[ICXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, 2405 XORPDrr, VXORPDrr, 2406 VXORPSZ128rr, 2407 VXORPDZ128rr)>; 2408 2409def ICXWriteFZeroIdiomY : SchedWriteVariant<[ 2410 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2411 SchedVar<NoSchedPred, [WriteFLogicY]> 2412]>; 2413def : InstRW<[ICXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr, 2414 VXORPSZ256rr, VXORPDZ256rr)>; 2415 2416def ICXWriteFZeroIdiomZ : SchedWriteVariant<[ 2417 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2418 SchedVar<NoSchedPred, [WriteFLogicZ]> 2419]>; 2420def : InstRW<[ICXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>; 2421 2422def ICXWriteVZeroIdiomLogicX : SchedWriteVariant<[ 2423 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2424 SchedVar<NoSchedPred, [WriteVecLogicX]> 2425]>; 2426def : InstRW<[ICXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, 2427 VPXORDZ128rr, VPXORQZ128rr)>; 2428 2429def ICXWriteVZeroIdiomLogicY : SchedWriteVariant<[ 2430 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2431 SchedVar<NoSchedPred, [WriteVecLogicY]> 2432]>; 2433def : InstRW<[ICXWriteVZeroIdiomLogicY], (instrs VPXORYrr, 2434 VPXORDZ256rr, VPXORQZ256rr)>; 2435 2436def ICXWriteVZeroIdiomLogicZ : SchedWriteVariant<[ 2437 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2438 SchedVar<NoSchedPred, [WriteVecLogicZ]> 2439]>; 2440def : InstRW<[ICXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>; 2441 2442def ICXWriteVZeroIdiomALUX : SchedWriteVariant<[ 2443 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2444 SchedVar<NoSchedPred, [WriteVecALUX]> 2445]>; 2446def : InstRW<[ICXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr, 2447 PCMPGTDrr, VPCMPGTDrr, 2448 PCMPGTWrr, VPCMPGTWrr)>; 2449 2450def ICXWriteVZeroIdiomALUY : SchedWriteVariant<[ 2451 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2452 SchedVar<NoSchedPred, [WriteVecALUY]> 2453]>; 2454def : InstRW<[ICXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr, 2455 VPCMPGTDYrr, 2456 VPCMPGTWYrr)>; 2457 2458def ICXWritePSUB : SchedWriteRes<[ICXPort015]> { 2459 let Latency = 1; 2460 let NumMicroOps = 1; 2461 let ReleaseAtCycles = [1]; 2462} 2463 2464def ICXWriteVZeroIdiomPSUB : SchedWriteVariant<[ 2465 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2466 SchedVar<NoSchedPred, [ICXWritePSUB]> 2467]>; 2468 2469def : InstRW<[ICXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr, 2470 PSUBDrr, VPSUBDrr, VPSUBDZ128rr, 2471 PSUBQrr, VPSUBQrr, VPSUBQZ128rr, 2472 PSUBWrr, VPSUBWrr, VPSUBWZ128rr, 2473 VPSUBBYrr, VPSUBBZ256rr, 2474 VPSUBDYrr, VPSUBDZ256rr, 2475 VPSUBQYrr, VPSUBQZ256rr, 2476 VPSUBWYrr, VPSUBWZ256rr, 2477 VPSUBBZrr, 2478 VPSUBDZrr, 2479 VPSUBQZrr, 2480 VPSUBWZrr)>; 2481def ICXWritePCMPGTQ : SchedWriteRes<[ICXPort5]> { 2482 let Latency = 3; 2483 let NumMicroOps = 1; 2484 let ReleaseAtCycles = [1]; 2485} 2486 2487def ICXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ 2488 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2489 SchedVar<NoSchedPred, [ICXWritePCMPGTQ]> 2490]>; 2491def : InstRW<[ICXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, 2492 VPCMPGTQYrr)>; 2493 2494 2495// CMOVs that use both Z and C flag require an extra uop. 2496def ICXWriteCMOVA_CMOVBErr : SchedWriteRes<[ICXPort06]> { 2497 let Latency = 2; 2498 let ReleaseAtCycles = [2]; 2499 let NumMicroOps = 2; 2500} 2501 2502def ICXWriteCMOVA_CMOVBErm : SchedWriteRes<[ICXPort23,ICXPort06]> { 2503 let Latency = 7; 2504 let ReleaseAtCycles = [1,2]; 2505 let NumMicroOps = 3; 2506} 2507 2508def ICXCMOVA_CMOVBErr : SchedWriteVariant<[ 2509 SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [ICXWriteCMOVA_CMOVBErr]>, 2510 SchedVar<NoSchedPred, [WriteCMOV]> 2511]>; 2512 2513def ICXCMOVA_CMOVBErm : SchedWriteVariant<[ 2514 SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [ICXWriteCMOVA_CMOVBErm]>, 2515 SchedVar<NoSchedPred, [WriteCMOV.Folded]> 2516]>; 2517 2518def : InstRW<[ICXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; 2519def : InstRW<[ICXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; 2520 2521// SETCCs that use both Z and C flag require an extra uop. 2522def ICXWriteSETA_SETBEr : SchedWriteRes<[ICXPort06]> { 2523 let Latency = 2; 2524 let ReleaseAtCycles = [2]; 2525 let NumMicroOps = 2; 2526} 2527 2528def ICXWriteSETA_SETBEm : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort06]> { 2529 let Latency = 3; 2530 let ReleaseAtCycles = [1,1,2]; 2531 let NumMicroOps = 4; 2532} 2533 2534def ICXSETA_SETBErr : SchedWriteVariant<[ 2535 SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [ICXWriteSETA_SETBEr]>, 2536 SchedVar<NoSchedPred, [WriteSETCC]> 2537]>; 2538 2539def ICXSETA_SETBErm : SchedWriteVariant<[ 2540 SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [ICXWriteSETA_SETBEm]>, 2541 SchedVar<NoSchedPred, [WriteSETCCStore]> 2542]>; 2543 2544def : InstRW<[ICXSETA_SETBErr], (instrs SETCCr)>; 2545def : InstRW<[ICXSETA_SETBErm], (instrs SETCCm)>; 2546 2547/////////////////////////////////////////////////////////////////////////////// 2548// Dependency breaking instructions. 2549/////////////////////////////////////////////////////////////////////////////// 2550 2551def : IsZeroIdiomFunction<[ 2552 // GPR Zero-idioms. 2553 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, 2554 2555 // SSE Zero-idioms. 2556 DepBreakingClass<[ 2557 // fp variants. 2558 XORPSrr, XORPDrr, 2559 2560 // int variants. 2561 PXORrr, 2562 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 2563 PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr 2564 ], ZeroIdiomPredicate>, 2565 2566 // AVX Zero-idioms. 2567 DepBreakingClass<[ 2568 // xmm fp variants. 2569 VXORPSrr, VXORPDrr, 2570 2571 // xmm int variants. 2572 VPXORrr, 2573 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 2574 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 2575 2576 // ymm variants. 2577 VXORPSYrr, VXORPDYrr, VPXORYrr, 2578 VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr, 2579 VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr, 2580 2581 // zmm variants. 2582 VXORPSZrr, VXORPDZrr, VPXORDZrr, VPXORQZrr, 2583 VXORPSZ128rr, VXORPDZ128rr, VPXORDZ128rr, VPXORQZ128rr, 2584 VXORPSZ256rr, VXORPDZ256rr, VPXORDZ256rr, VPXORQZ256rr, 2585 VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr, 2586 VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr, 2587 VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr, 2588 ], ZeroIdiomPredicate>, 2589]>; 2590 2591} // SchedModel 2592