1fe6060f1SDimitry Andric//=- X86ScheduleZnver3.td - X86 Znver3 Scheduling ------------*- tablegen -*-=// 2fe6060f1SDimitry Andric// 3fe6060f1SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric// 7fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric// 9fe6060f1SDimitry Andric// This file defines the machine model for Znver3 to support instruction 10fe6060f1SDimitry Andric// scheduling and other instruction cost heuristics. 11fe6060f1SDimitry Andric// Based on: 12fe6060f1SDimitry Andric// * AMD Software Optimization Guide for AMD Family 19h Processors. 13fe6060f1SDimitry Andric// https://www.amd.com/system/files/TechDocs/56665.zip 14fe6060f1SDimitry Andric// * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog 15fe6060f1SDimitry Andric// http://www.agner.org/optimize/microarchitecture.pdf 16fe6060f1SDimitry Andric// * AMD Zen 3 Ryzen Deep Dive Review 17fe6060f1SDimitry Andric// https://www.anandtech.com/show/16214/ 18fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 19fe6060f1SDimitry Andric 20fe6060f1SDimitry Andricdef Znver3Model : SchedMachineModel { 21fe6060f1SDimitry Andric // AMD SOG 19h, 2.9.6 Dispatch 22fe6060f1SDimitry Andric // The processor may dispatch up to 6 macro ops per cycle 23fe6060f1SDimitry Andric // into the execution engine. 24fe6060f1SDimitry Andric let IssueWidth = 6; 25fe6060f1SDimitry Andric // AMD SOG 19h, 2.10.3 26fe6060f1SDimitry Andric // The retire control unit (RCU) tracks the completion status of all 27fe6060f1SDimitry Andric // outstanding operations (integer, load/store, and floating-point) and is 28fe6060f1SDimitry Andric // the final arbiter for exception processing and recovery. 29fe6060f1SDimitry Andric // The unit can receive up to 6 macro ops dispatched per cycle and track up 30fe6060f1SDimitry Andric // to 256 macro ops in-flight in non-SMT mode or 128 per thread in SMT mode. 31fe6060f1SDimitry Andric let MicroOpBufferSize = 256; 32fe6060f1SDimitry Andric // AMD SOG 19h, 2.9.1 Op Cache 33fe6060f1SDimitry Andric // The op cache is organized as an associative cache with 64 sets and 8 ways. 34fe6060f1SDimitry Andric // At each set-way intersection is an entry containing up to 8 macro ops. 35fe6060f1SDimitry Andric // The maximum capacity of the op cache is 4K ops. 36*0fca6ea1SDimitry Andric // Assuming a maximum dispatch of 8 ops/cy and a mispredict cost of 12cy from 37*0fca6ea1SDimitry Andric // the op-cache, we limit the loop buffer to 8*12 = 96 to avoid loop unrolling 38*0fca6ea1SDimitry Andric // leading to excessive filling of the op-cache from frontend. 39*0fca6ea1SDimitry Andric let LoopMicroOpBufferSize = 96; 40fe6060f1SDimitry Andric // AMD SOG 19h, 2.6.2 L1 Data Cache 41fe6060f1SDimitry Andric // The L1 data cache has a 4- or 5- cycle integer load-to-use latency. 42fe6060f1SDimitry Andric // AMD SOG 19h, 2.12 L1 Data Cache 43fe6060f1SDimitry Andric // The AGU and LS pipelines are optimized for simple address generation modes. 44fe6060f1SDimitry Andric // <...> and can achieve 4-cycle load-to-use integer load latency. 45fe6060f1SDimitry Andric let LoadLatency = 4; 46fe6060f1SDimitry Andric // AMD SOG 19h, 2.12 L1 Data Cache 47fe6060f1SDimitry Andric // The AGU and LS pipelines are optimized for simple address generation modes. 48fe6060f1SDimitry Andric // <...> and can achieve <...> 7-cycle load-to-use FP load latency. 49fe6060f1SDimitry Andric int VecLoadLatency = 7; 50fe6060f1SDimitry Andric // Latency of a simple store operation. 51fe6060f1SDimitry Andric int StoreLatency = 1; 52*0fca6ea1SDimitry Andric // FIXME: 53fe6060f1SDimitry Andric let HighLatency = 25; // FIXME: any better choice? 54fe6060f1SDimitry Andric // AMD SOG 19h, 2.8 Optimizing Branching 55fe6060f1SDimitry Andric // The branch misprediction penalty is in the range from 11 to 18 cycles, 56fe6060f1SDimitry Andric // <...>. The common case penalty is 13 cycles. 57fe6060f1SDimitry Andric let MispredictPenalty = 13; 58fe6060f1SDimitry Andric 59fe6060f1SDimitry Andric let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass. 60fe6060f1SDimitry Andric 61fe6060f1SDimitry Andric let CompleteModel = 1; 62fe6060f1SDimitry Andric} 63fe6060f1SDimitry Andric 64fe6060f1SDimitry Andriclet SchedModel = Znver3Model in { 65fe6060f1SDimitry Andric 66fe6060f1SDimitry Andric 67fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 68fe6060f1SDimitry Andric// RCU 69fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 70fe6060f1SDimitry Andric 71fe6060f1SDimitry Andric// AMD SOG 19h, 2.10.3 Retire Control Unit 72fe6060f1SDimitry Andric// The unit can receive up to 6 macro ops dispatched per cycle and track up to 73fe6060f1SDimitry Andric// 256 macro ops in-flight in non-SMT mode or 128 per thread in SMT mode. <...> 74fe6060f1SDimitry Andric// The retire unit handles in-order commit of up to eight macro ops per cycle. 75fe6060f1SDimitry Andricdef Zn3RCU : RetireControlUnit<Znver3Model.MicroOpBufferSize, 8>; 76fe6060f1SDimitry Andric 77fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 78fe6060f1SDimitry Andric// Units 79fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 80fe6060f1SDimitry Andric 81fe6060f1SDimitry Andric// There are total of three Units, each one with it's own schedulers. 82fe6060f1SDimitry Andric 83fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 84fe6060f1SDimitry Andric// Integer Execution Unit 85fe6060f1SDimitry Andric// 86fe6060f1SDimitry Andric 87fe6060f1SDimitry Andric// AMD SOG 19h, 2.4 Superscalar Organization 88fe6060f1SDimitry Andric// The processor uses four decoupled independent integer scheduler queues, 89fe6060f1SDimitry Andric// each one servicing one ALU pipeline and one or two other pipelines 90fe6060f1SDimitry Andric 91fe6060f1SDimitry Andric// 92fe6060f1SDimitry Andric// Execution pipes 93fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 94fe6060f1SDimitry Andric 95fe6060f1SDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units 96fe6060f1SDimitry Andric// The processor contains 4 general purpose integer execution pipes. 97fe6060f1SDimitry Andric// Each pipe has an ALU capable of general purpose integer operations. 98fe6060f1SDimitry Andricdef Zn3ALU0 : ProcResource<1>; 99fe6060f1SDimitry Andricdef Zn3ALU1 : ProcResource<1>; 100fe6060f1SDimitry Andricdef Zn3ALU2 : ProcResource<1>; 101fe6060f1SDimitry Andricdef Zn3ALU3 : ProcResource<1>; 102fe6060f1SDimitry Andric 103fe6060f1SDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units 104fe6060f1SDimitry Andric// There is also a separate branch execution unit. 105fe6060f1SDimitry Andricdef Zn3BRU1 : ProcResource<1>; 106fe6060f1SDimitry Andric 107fe6060f1SDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units 108fe6060f1SDimitry Andric// There are three Address Generation Units (AGUs) for all load and store 109fe6060f1SDimitry Andric// address generation. There are also 3 store data movement units 110fe6060f1SDimitry Andric// associated with the same schedulers as the AGUs. 111fe6060f1SDimitry Andricdef Zn3AGU0 : ProcResource<1>; 112fe6060f1SDimitry Andricdef Zn3AGU1 : ProcResource<1>; 113fe6060f1SDimitry Andricdef Zn3AGU2 : ProcResource<1>; 114fe6060f1SDimitry Andric 115fe6060f1SDimitry Andric// 116fe6060f1SDimitry Andric// Execution Units 117fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 118fe6060f1SDimitry Andric 119fe6060f1SDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units 120fe6060f1SDimitry Andric// ALU0 additionally has divide <...> execution capability. 121fe6060f1SDimitry Andricdefvar Zn3Divider = Zn3ALU0; 122fe6060f1SDimitry Andric 123fe6060f1SDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units 124fe6060f1SDimitry Andric// ALU0 additionally has <...> branch execution capability. 125fe6060f1SDimitry Andricdefvar Zn3BRU0 = Zn3ALU0; 126fe6060f1SDimitry Andric 127fe6060f1SDimitry Andric// Integer Multiplication issued on ALU1. 128fe6060f1SDimitry Andricdefvar Zn3Multiplier = Zn3ALU1; 129fe6060f1SDimitry Andric 130fe6060f1SDimitry Andric// Execution pipeline grouping 131fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 132fe6060f1SDimitry Andric 133fe6060f1SDimitry Andric// General ALU operations 134fe6060f1SDimitry Andricdef Zn3ALU0123 : ProcResGroup<[Zn3ALU0, Zn3ALU1, Zn3ALU2, Zn3ALU3]>; 135fe6060f1SDimitry Andric 136fe6060f1SDimitry Andric// General AGU operations 137fe6060f1SDimitry Andricdef Zn3AGU012 : ProcResGroup<[Zn3AGU0, Zn3AGU1, Zn3AGU2]>; 138fe6060f1SDimitry Andric 139fe6060f1SDimitry Andric// Control flow: jumps, calls 140fe6060f1SDimitry Andricdef Zn3BRU01 : ProcResGroup<[Zn3BRU0, Zn3BRU1]>; 141fe6060f1SDimitry Andric 142fe6060f1SDimitry Andric// Everything that isn't control flow, but still needs to access CC register, 143fe6060f1SDimitry Andric// namely: conditional moves, SETcc. 144fe6060f1SDimitry Andricdef Zn3ALU03 : ProcResGroup<[Zn3ALU0, Zn3ALU3]>; 145fe6060f1SDimitry Andric 146fe6060f1SDimitry Andric// Zn3ALU1 handles complex bit twiddling: CRC/PDEP/PEXT 147fe6060f1SDimitry Andric 148fe6060f1SDimitry Andric// Simple bit twiddling: bit test, shift/rotate, bit extraction 149fe6060f1SDimitry Andricdef Zn3ALU12 : ProcResGroup<[Zn3ALU1, Zn3ALU2]>; 150fe6060f1SDimitry Andric 151fe6060f1SDimitry Andric 152fe6060f1SDimitry Andric// 153fe6060f1SDimitry Andric// Scheduling 154fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 155fe6060f1SDimitry Andric 156fe6060f1SDimitry Andric// AMD SOG 19h, 2.10.3 Retire Control Unit 157fe6060f1SDimitry Andric// The integer physical register file (PRF) consists of 192 registers. 158fe6060f1SDimitry Andricdef Zn3IntegerPRF : RegisterFile<192, [GR64, CCR], [1, 1], [1, 0], 159fe6060f1SDimitry Andric 6, // Max moves that can be eliminated per cycle. 160fe6060f1SDimitry Andric 0>; // Restrict move elimination to zero regs. 161fe6060f1SDimitry Andric 162fe6060f1SDimitry Andric// anandtech, The integer scheduler has a 4*24 entry macro op capacity. 163fe6060f1SDimitry Andric// AMD SOG 19h, 2.10.1 Schedulers 164fe6060f1SDimitry Andric// The schedulers can receive up to six macro ops per cycle, with a limit of 165fe6060f1SDimitry Andric// two per scheduler. Each scheduler can issue one micro op per cycle into 166fe6060f1SDimitry Andric// each of its associated pipelines 167fe6060f1SDimitry Andric// FIXME: these are 4 separate schedulers, not a single big one. 168fe6060f1SDimitry Andricdef Zn3Int : ProcResGroup<[Zn3ALU0, Zn3AGU0, Zn3BRU0, // scheduler 0 169fe6060f1SDimitry Andric Zn3ALU1, Zn3AGU1, // scheduler 1 170fe6060f1SDimitry Andric Zn3ALU2, Zn3AGU2, // scheduler 2 171fe6060f1SDimitry Andric Zn3ALU3, Zn3BRU1 // scheduler 3 172fe6060f1SDimitry Andric ]> { 173fe6060f1SDimitry Andric let BufferSize = !mul(4, 24); 174fe6060f1SDimitry Andric} 175fe6060f1SDimitry Andric 176fe6060f1SDimitry Andric 177fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 178fe6060f1SDimitry Andric// Floating-Point Unit 179fe6060f1SDimitry Andric// 180fe6060f1SDimitry Andric 181fe6060f1SDimitry Andric// AMD SOG 19h, 2.4 Superscalar Organization 182fe6060f1SDimitry Andric// The processor uses <...> two decoupled independent floating point schedulers 183fe6060f1SDimitry Andric// each servicing two FP pipelines and one store or FP-to-integer pipeline. 184fe6060f1SDimitry Andric 185fe6060f1SDimitry Andric// 186fe6060f1SDimitry Andric// Execution pipes 187fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 188fe6060f1SDimitry Andric 189fe6060f1SDimitry Andric// AMD SOG 19h, 2.10.1 Schedulers 190fe6060f1SDimitry Andric// <...>, and six FPU pipes. 191fe6060f1SDimitry Andric// Agner, 22.10 Floating point execution pipes 192fe6060f1SDimitry Andric// There are six floating point/vector execution pipes, 193*0fca6ea1SDimitry Andricdef Zn3FP0 : ProcResource<1>; 194*0fca6ea1SDimitry Andricdef Zn3FP1 : ProcResource<1>; 195*0fca6ea1SDimitry Andricdef Zn3FP2 : ProcResource<1>; 196*0fca6ea1SDimitry Andricdef Zn3FP3 : ProcResource<1>; 197*0fca6ea1SDimitry Andricdef Zn3FP45 : ProcResource<2>; 198fe6060f1SDimitry Andric 199fe6060f1SDimitry Andric// 200fe6060f1SDimitry Andric// Execution Units 201fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 202fe6060f1SDimitry Andric// AMD SOG 19h, 2.11.1 Floating Point Execution Resources 203fe6060f1SDimitry Andric 204fe6060f1SDimitry Andric// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ) 205*0fca6ea1SDimitry Andricdefvar Zn3FPFMul0 = Zn3FP0; 206*0fca6ea1SDimitry Andricdefvar Zn3FPFMul1 = Zn3FP1; 207fe6060f1SDimitry Andric 208fe6060f1SDimitry Andric// (v)FADD* 209*0fca6ea1SDimitry Andricdefvar Zn3FPFAdd0 = Zn3FP2; 210*0fca6ea1SDimitry Andricdefvar Zn3FPFAdd1 = Zn3FP3; 211fe6060f1SDimitry Andric 212fe6060f1SDimitry Andric// All convert operations except pack/unpack 213*0fca6ea1SDimitry Andricdefvar Zn3FPFCvt0 = Zn3FP2; 214*0fca6ea1SDimitry Andricdefvar Zn3FPFCvt1 = Zn3FP3; 215fe6060f1SDimitry Andric 216fe6060f1SDimitry Andric// All Divide and Square Root except Reciprocal Approximation 217fe6060f1SDimitry Andric// AMD SOG 19h, 2.11.1 Floating Point Execution Resources 218fe6060f1SDimitry Andric// FDIV unit can support 2 simultaneous operations in flight 219fe6060f1SDimitry Andric// even though it occupies a single pipe. 220fe6060f1SDimitry Andric// FIXME: BufferSize=2 ? 221*0fca6ea1SDimitry Andricdefvar Zn3FPFDiv = Zn3FP1; 222fe6060f1SDimitry Andric 223fe6060f1SDimitry Andric// Moves and Logical operations on Floating Point Data Types 224*0fca6ea1SDimitry Andricdefvar Zn3FPFMisc0 = Zn3FP0; 225*0fca6ea1SDimitry Andricdefvar Zn3FPFMisc1 = Zn3FP1; 226*0fca6ea1SDimitry Andricdefvar Zn3FPFMisc2 = Zn3FP2; 227*0fca6ea1SDimitry Andricdefvar Zn3FPFMisc3 = Zn3FP3; 228fe6060f1SDimitry Andric 229fe6060f1SDimitry Andric// Integer Adds, Subtracts, and Compares 230fe6060f1SDimitry Andric// Some complex VADD operations are not available in all pipes. 231*0fca6ea1SDimitry Andricdefvar Zn3FPVAdd0 = Zn3FP0; 232*0fca6ea1SDimitry Andricdefvar Zn3FPVAdd1 = Zn3FP1; 233*0fca6ea1SDimitry Andricdefvar Zn3FPVAdd2 = Zn3FP2; 234*0fca6ea1SDimitry Andricdefvar Zn3FPVAdd3 = Zn3FP3; 235fe6060f1SDimitry Andric 236fe6060f1SDimitry Andric// Integer Multiplies, SAD, Blendvb 237*0fca6ea1SDimitry Andricdefvar Zn3FPVMul0 = Zn3FP0; 238*0fca6ea1SDimitry Andricdefvar Zn3FPVMul1 = Zn3FP3; 239fe6060f1SDimitry Andric 240fe6060f1SDimitry Andric// Data Shuffles, Packs, Unpacks, Permute 241fe6060f1SDimitry Andric// Some complex shuffle operations are only available in pipe1. 242*0fca6ea1SDimitry Andricdefvar Zn3FPVShuf = Zn3FP1; 243*0fca6ea1SDimitry Andricdefvar Zn3FPVShufAux = Zn3FP2; 244fe6060f1SDimitry Andric 245fe6060f1SDimitry Andric// Bit Shift Left/Right operations 246*0fca6ea1SDimitry Andricdefvar Zn3FPVShift0 = Zn3FP1; 247*0fca6ea1SDimitry Andricdefvar Zn3FPVShift1 = Zn3FP2; 248fe6060f1SDimitry Andric 249fe6060f1SDimitry Andric// Moves and Logical operations on Packed Integer Data Types 250*0fca6ea1SDimitry Andricdefvar Zn3FPVMisc0 = Zn3FP0; 251*0fca6ea1SDimitry Andricdefvar Zn3FPVMisc1 = Zn3FP1; 252*0fca6ea1SDimitry Andricdefvar Zn3FPVMisc2 = Zn3FP2; 253*0fca6ea1SDimitry Andricdefvar Zn3FPVMisc3 = Zn3FP3; 254fe6060f1SDimitry Andric 255fe6060f1SDimitry Andric// *AES* 256*0fca6ea1SDimitry Andricdefvar Zn3FPAES0 = Zn3FP0; 257*0fca6ea1SDimitry Andricdefvar Zn3FPAES1 = Zn3FP1; 258fe6060f1SDimitry Andric 259fe6060f1SDimitry Andric// *CLM* 260*0fca6ea1SDimitry Andricdefvar Zn3FPCLM0 = Zn3FP0; 261*0fca6ea1SDimitry Andricdefvar Zn3FPCLM1 = Zn3FP1; 262fe6060f1SDimitry Andric 263fe6060f1SDimitry Andric// Execution pipeline grouping 264fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 265fe6060f1SDimitry Andric 266fe6060f1SDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 267fe6060f1SDimitry Andric// Stores and floating point to general purpose register transfer 268fe6060f1SDimitry Andric// have 2 dedicated pipelines (pipe 5 and 6). 269*0fca6ea1SDimitry Andricdef Zn3FPU0123 : ProcResGroup<[Zn3FP0, Zn3FP1, Zn3FP2, Zn3FP3]>; 270fe6060f1SDimitry Andric 271fe6060f1SDimitry Andric// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ) 272fe6060f1SDimitry Andricdef Zn3FPFMul01 : ProcResGroup<[Zn3FPFMul0, Zn3FPFMul1]>; 273fe6060f1SDimitry Andric 274fe6060f1SDimitry Andric// (v)FADD* 275fe6060f1SDimitry Andric// Some complex VADD operations are not available in all pipes. 276fe6060f1SDimitry Andricdef Zn3FPFAdd01 : ProcResGroup<[Zn3FPFAdd0, Zn3FPFAdd1]>; 277fe6060f1SDimitry Andric 278fe6060f1SDimitry Andric// All convert operations except pack/unpack 279fe6060f1SDimitry Andricdef Zn3FPFCvt01 : ProcResGroup<[Zn3FPFCvt0, Zn3FPFCvt1]>; 280fe6060f1SDimitry Andric 281fe6060f1SDimitry Andric// All Divide and Square Root except Reciprocal Approximation 282fe6060f1SDimitry Andric// def Zn3FPFDiv : ProcResGroup<[Zn3FPFDiv]>; 283fe6060f1SDimitry Andric 284fe6060f1SDimitry Andric// Moves and Logical operations on Floating Point Data Types 285fe6060f1SDimitry Andricdef Zn3FPFMisc0123 : ProcResGroup<[Zn3FPFMisc0, Zn3FPFMisc1, Zn3FPFMisc2, Zn3FPFMisc3]>; 286fe6060f1SDimitry Andric 287fe6060f1SDimitry Andricdef Zn3FPFMisc12 : ProcResGroup<[Zn3FPFMisc1, Zn3FPFMisc2]>; 288fe6060f1SDimitry Andric 289fe6060f1SDimitry Andric// Loads, Stores and Move to General Register (EX) Operations 290fe6060f1SDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 291fe6060f1SDimitry Andric// Stores and floating point to general purpose register transfer 292fe6060f1SDimitry Andric// have 2 dedicated pipelines (pipe 5 and 6). 293*0fca6ea1SDimitry Andricdefvar Zn3FPLd01 = Zn3FP45; 294fe6060f1SDimitry Andric 295fe6060f1SDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 296fe6060f1SDimitry Andric// Note that FP stores are supported on two pipelines, 297fe6060f1SDimitry Andric// but throughput is limited to one per cycle. 298*0fca6ea1SDimitry Andriclet Super = Zn3FP45 in 299fe6060f1SDimitry Andricdef Zn3FPSt : ProcResource<1>; 300fe6060f1SDimitry Andric 301fe6060f1SDimitry Andric// Integer Adds, Subtracts, and Compares 302fe6060f1SDimitry Andric// Some complex VADD operations are not available in all pipes. 303fe6060f1SDimitry Andricdef Zn3FPVAdd0123 : ProcResGroup<[Zn3FPVAdd0, Zn3FPVAdd1, Zn3FPVAdd2, Zn3FPVAdd3]>; 304fe6060f1SDimitry Andric 305fe6060f1SDimitry Andricdef Zn3FPVAdd01: ProcResGroup<[Zn3FPVAdd0, Zn3FPVAdd1]>; 306fe6060f1SDimitry Andricdef Zn3FPVAdd12: ProcResGroup<[Zn3FPVAdd1, Zn3FPVAdd2]>; 307fe6060f1SDimitry Andric 308fe6060f1SDimitry Andric// Integer Multiplies, SAD, Blendvb 309fe6060f1SDimitry Andricdef Zn3FPVMul01 : ProcResGroup<[Zn3FPVMul0, Zn3FPVMul1]>; 310fe6060f1SDimitry Andric 311fe6060f1SDimitry Andric// Data Shuffles, Packs, Unpacks, Permute 312fe6060f1SDimitry Andric// Some complex shuffle operations are only available in pipe1. 313fe6060f1SDimitry Andricdef Zn3FPVShuf01 : ProcResGroup<[Zn3FPVShuf, Zn3FPVShufAux]>; 314fe6060f1SDimitry Andric 315fe6060f1SDimitry Andric// Bit Shift Left/Right operations 316fe6060f1SDimitry Andricdef Zn3FPVShift01 : ProcResGroup<[Zn3FPVShift0, Zn3FPVShift1]>; 317fe6060f1SDimitry Andric 318fe6060f1SDimitry Andric// Moves and Logical operations on Packed Integer Data Types 319fe6060f1SDimitry Andricdef Zn3FPVMisc0123 : ProcResGroup<[Zn3FPVMisc0, Zn3FPVMisc1, Zn3FPVMisc2, Zn3FPVMisc3]>; 320fe6060f1SDimitry Andric 321fe6060f1SDimitry Andric// *AES* 322fe6060f1SDimitry Andricdef Zn3FPAES01 : ProcResGroup<[Zn3FPAES0, Zn3FPAES1]>; 323fe6060f1SDimitry Andric 324fe6060f1SDimitry Andric// *CLM* 325fe6060f1SDimitry Andricdef Zn3FPCLM01 : ProcResGroup<[Zn3FPCLM0, Zn3FPCLM1]>; 326fe6060f1SDimitry Andric 327fe6060f1SDimitry Andric 328fe6060f1SDimitry Andric// 329fe6060f1SDimitry Andric// Scheduling 330fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 331fe6060f1SDimitry Andric 332fe6060f1SDimitry Andric// Agner, 21.8 Register renaming and out-of-order schedulers 333fe6060f1SDimitry Andric// The floating point register file has 160 vector registers 334fe6060f1SDimitry Andric// of 128 bits each in Zen 1 and 256 bits each in Zen 2. 335fe6060f1SDimitry Andric// anandtech also confirms this. 336fe6060f1SDimitry Andricdef Zn3FpPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 1], [0, 1, 1], 337fe6060f1SDimitry Andric 6, // Max moves that can be eliminated per cycle. 338fe6060f1SDimitry Andric 0>; // Restrict move elimination to zero regs. 339fe6060f1SDimitry Andric 340fe6060f1SDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 341fe6060f1SDimitry Andric// The floating-point scheduler has a 2*32 entry macro op capacity. 342fe6060f1SDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 343fe6060f1SDimitry Andric// <...> the scheduler can issue 1 micro op per cycle for each pipe. 344fe6060f1SDimitry Andric// FIXME: those are two separate schedulers, not a single big one. 345*0fca6ea1SDimitry Andricdef Zn3FP : ProcResGroup<[Zn3FP0, Zn3FP2, /*Zn3FP4,*/ // scheduler 0 346*0fca6ea1SDimitry Andric Zn3FP1, Zn3FP3, Zn3FP45 /*Zn3FP5*/ // scheduler 1 347fe6060f1SDimitry Andric ]> { 348fe6060f1SDimitry Andric let BufferSize = !mul(2, 32); 349fe6060f1SDimitry Andric} 350fe6060f1SDimitry Andric 351fe6060f1SDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 352fe6060f1SDimitry Andric// Macro ops can be dispatched to the 64 entry Non Scheduling Queue (NSQ) 353fe6060f1SDimitry Andric// even if floating-point scheduler is full. 354fe6060f1SDimitry Andric// FIXME: how to model this properly? 355fe6060f1SDimitry Andric 356fe6060f1SDimitry Andric 357fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 358fe6060f1SDimitry Andric// Load-Store Unit 359fe6060f1SDimitry Andric// 360fe6060f1SDimitry Andric 361fe6060f1SDimitry Andric// AMD SOG 19h, 2.12 Load-Store Unit 362fe6060f1SDimitry Andric// The LS unit contains three largely independent pipe-lines 363fe6060f1SDimitry Andric// enabling the execution of three 256-bit memory operations per cycle. 364fe6060f1SDimitry Andricdef Zn3LSU : ProcResource<3>; 365fe6060f1SDimitry Andric 366fe6060f1SDimitry Andric// AMD SOG 19h, 2.12 Load-Store Unit 367fe6060f1SDimitry Andric// All three memory operations can be loads. 368fe6060f1SDimitry Andriclet Super = Zn3LSU in 369fe6060f1SDimitry Andricdef Zn3Load : ProcResource<3> { 370fe6060f1SDimitry Andric // AMD SOG 19h, 2.12 Load-Store Unit 371fe6060f1SDimitry Andric // The LS unit can process up to 72 out-of-order loads. 372fe6060f1SDimitry Andric let BufferSize = 72; 373fe6060f1SDimitry Andric} 374fe6060f1SDimitry Andric 375fe6060f1SDimitry Andricdef Zn3LoadQueue : LoadQueue<Zn3Load>; 376fe6060f1SDimitry Andric 377fe6060f1SDimitry Andric// AMD SOG 19h, 2.12 Load-Store Unit 378fe6060f1SDimitry Andric// A maximum of two of the memory operations can be stores. 379fe6060f1SDimitry Andriclet Super = Zn3LSU in 380fe6060f1SDimitry Andricdef Zn3Store : ProcResource<2> { 381fe6060f1SDimitry Andric // AMD SOG 19h, 2.12 Load-Store Unit 382fe6060f1SDimitry Andric // The LS unit utilizes a 64-entry store queue (STQ). 383fe6060f1SDimitry Andric let BufferSize = 64; 384fe6060f1SDimitry Andric} 385fe6060f1SDimitry Andric 386fe6060f1SDimitry Andricdef Zn3StoreQueue : StoreQueue<Zn3Store>; 387fe6060f1SDimitry Andric 388fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 389fe6060f1SDimitry Andric// Basic helper classes. 390fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 391fe6060f1SDimitry Andric 392fe6060f1SDimitry Andric// Many SchedWrites are defined in pairs with and without a folded load. 393fe6060f1SDimitry Andric// Instructions with folded loads are usually micro-fused, so they only appear 394fe6060f1SDimitry Andric// as two micro-ops when dispatched by the schedulers. 395fe6060f1SDimitry Andric// This multiclass defines the resource usage for variants with and without 396fe6060f1SDimitry Andric// folded loads. 397fe6060f1SDimitry Andric 398fe6060f1SDimitry Andricmulticlass __zn3WriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts, 399fe6060f1SDimitry Andric int Lat = 1, list<int> Res = [], int UOps = 1> { 400fe6060f1SDimitry Andric def : WriteRes<SchedRW, ExePorts> { 401fe6060f1SDimitry Andric let Latency = Lat; 4025f757f3fSDimitry Andric let ReleaseAtCycles = Res; 403fe6060f1SDimitry Andric let NumMicroOps = UOps; 404fe6060f1SDimitry Andric } 405fe6060f1SDimitry Andric} 406fe6060f1SDimitry Andric 407fe6060f1SDimitry Andricmulticlass __zn3WriteResPair<X86FoldableSchedWrite SchedRW, 408fe6060f1SDimitry Andric list<ProcResourceKind> ExePorts, int Lat, 409fe6060f1SDimitry Andric list<int> Res, int UOps, int LoadLat, int LoadUOps, 410fe6060f1SDimitry Andric ProcResourceKind AGU, int LoadRes> { 411fe6060f1SDimitry Andric defm : __zn3WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 412fe6060f1SDimitry Andric 413fe6060f1SDimitry Andric defm : __zn3WriteRes<SchedRW.Folded, 414fe6060f1SDimitry Andric !listconcat([AGU, Zn3Load], ExePorts), 415fe6060f1SDimitry Andric !add(Lat, LoadLat), 416fe6060f1SDimitry Andric !if(!and(!empty(Res), !eq(LoadRes, 1)), 417fe6060f1SDimitry Andric [], 418fe6060f1SDimitry Andric !listconcat([1, LoadRes], 419fe6060f1SDimitry Andric !if(!empty(Res), 420fe6060f1SDimitry Andric !listsplat(1, !size(ExePorts)), 421fe6060f1SDimitry Andric Res))), 422fe6060f1SDimitry Andric !add(UOps, LoadUOps)>; 423fe6060f1SDimitry Andric} 424fe6060f1SDimitry Andric 425fe6060f1SDimitry Andric// For classes without folded loads. 426fe6060f1SDimitry Andricmulticlass Zn3WriteResInt<SchedWrite SchedRW, 427fe6060f1SDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 428fe6060f1SDimitry Andric list<int> Res = [], int UOps = 1> { 429fe6060f1SDimitry Andric defm : __zn3WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 430fe6060f1SDimitry Andric} 431fe6060f1SDimitry Andric 432fe6060f1SDimitry Andricmulticlass Zn3WriteResXMM<SchedWrite SchedRW, 433fe6060f1SDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 434fe6060f1SDimitry Andric list<int> Res = [], int UOps = 1> { 435fe6060f1SDimitry Andric defm : __zn3WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 436fe6060f1SDimitry Andric} 437fe6060f1SDimitry Andric 438fe6060f1SDimitry Andricmulticlass Zn3WriteResYMM<SchedWrite SchedRW, 439fe6060f1SDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 440fe6060f1SDimitry Andric list<int> Res = [], int UOps = 1> { 441fe6060f1SDimitry Andric defm : __zn3WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 442fe6060f1SDimitry Andric} 443fe6060f1SDimitry Andric 444fe6060f1SDimitry Andric// For classes with folded loads. 445fe6060f1SDimitry Andricmulticlass Zn3WriteResIntPair<X86FoldableSchedWrite SchedRW, 446fe6060f1SDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 447fe6060f1SDimitry Andric list<int> Res = [], int UOps = 1, 448fe6060f1SDimitry Andric int LoadUOps = 0, int LoadRes = 1> { 449fe6060f1SDimitry Andric defm : __zn3WriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 450fe6060f1SDimitry Andric Znver3Model.LoadLatency, 451fe6060f1SDimitry Andric LoadUOps, Zn3AGU012, LoadRes>; 452fe6060f1SDimitry Andric} 453fe6060f1SDimitry Andric 454fe6060f1SDimitry Andricmulticlass Zn3WriteResXMMPair<X86FoldableSchedWrite SchedRW, 455fe6060f1SDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 456fe6060f1SDimitry Andric list<int> Res = [], int UOps = 1, 457fe6060f1SDimitry Andric int LoadUOps = 0, int LoadRes = 1> { 458fe6060f1SDimitry Andric defm : __zn3WriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 459fe6060f1SDimitry Andric Znver3Model.VecLoadLatency, 460fe6060f1SDimitry Andric LoadUOps, Zn3FPLd01, LoadRes>; 461fe6060f1SDimitry Andric} 462fe6060f1SDimitry Andric 463fe6060f1SDimitry Andricmulticlass Zn3WriteResYMMPair<X86FoldableSchedWrite SchedRW, 464fe6060f1SDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 465fe6060f1SDimitry Andric list<int> Res = [], int UOps = 1, 466fe6060f1SDimitry Andric int LoadUOps = 0, int LoadRes = 1> { 467fe6060f1SDimitry Andric defm : __zn3WriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 468fe6060f1SDimitry Andric Znver3Model.VecLoadLatency, 469fe6060f1SDimitry Andric LoadUOps, Zn3FPLd01, LoadRes>; 470fe6060f1SDimitry Andric} 471fe6060f1SDimitry Andric 472fe6060f1SDimitry Andric 473fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 474fe6060f1SDimitry Andric// Here be dragons. 475fe6060f1SDimitry Andric//===----------------------------------------------------------------------===// 476fe6060f1SDimitry Andric 477fe6060f1SDimitry Andricdef : ReadAdvance<ReadAfterLd, Znver3Model.LoadLatency>; 478fe6060f1SDimitry Andric 479fe6060f1SDimitry Andricdef : ReadAdvance<ReadAfterVecLd, Znver3Model.VecLoadLatency>; 480fe6060f1SDimitry Andricdef : ReadAdvance<ReadAfterVecXLd, Znver3Model.VecLoadLatency>; 481fe6060f1SDimitry Andricdef : ReadAdvance<ReadAfterVecYLd, Znver3Model.VecLoadLatency>; 482fe6060f1SDimitry Andric 483fe6060f1SDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 484fe6060f1SDimitry Andric// There is 1 cycle of added latency for a result to cross 485fe6060f1SDimitry Andric// from F to I or I to F domain. 486fe6060f1SDimitry Andricdef : ReadAdvance<ReadInt2Fpu, -1>; 487fe6060f1SDimitry Andric 488fe6060f1SDimitry Andric// Instructions with both a load and a store folded are modeled as a folded 489fe6060f1SDimitry Andric// load + WriteRMW. 490fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteRMW, [Zn3AGU012, Zn3Store], Znver3Model.StoreLatency, [1, 1], 0>; 491fe6060f1SDimitry Andric 492fe6060f1SDimitry Andric// Loads, stores, and moves, not folded with other operations. 493fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteLoad, [Zn3AGU012, Zn3Load], !add(Znver3Model.LoadLatency, 1), [1, 1], 1>; 494fe6060f1SDimitry Andric 495fe6060f1SDimitry Andric// Model the effect of clobbering the read-write mask operand of the GATHER operation. 496fe6060f1SDimitry Andric// Does not cost anything by itself, only has latency, matching that of the WriteLoad, 497fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteVecMaskedGatherWriteback, [], !add(Znver3Model.LoadLatency, 1), [], 0>; 498fe6060f1SDimitry Andric 499fe6060f1SDimitry Andricdef Zn3WriteMOVSlow : SchedWriteRes<[Zn3AGU012, Zn3Load]> { 500fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, 1); 5015f757f3fSDimitry Andric let ReleaseAtCycles = [3, 1]; 502fe6060f1SDimitry Andric let NumMicroOps = 1; 503fe6060f1SDimitry Andric} 504fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteMOVSlow], (instrs MOV8rm, MOV8rm_NOREX, MOV16rm, MOVSX16rm16, MOVSX16rm32, MOVZX16rm16, MOVSX16rm8, MOVZX16rm8)>; 505fe6060f1SDimitry Andric 506fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteStore, [Zn3AGU012, Zn3Store], Znver3Model.StoreLatency, [1, 2], 1>; 507fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteStoreNT, [Zn3AGU012, Zn3Store], Znver3Model.StoreLatency, [1, 2], 1>; 508fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteMove, [Zn3ALU0123], 1, [4], 1>; 509fe6060f1SDimitry Andric 510fe6060f1SDimitry Andric// Treat misc copies as a move. 511fe6060f1SDimitry Andricdef : InstRW<[WriteMove], (instrs COPY)>; 512fe6060f1SDimitry Andric 513fe6060f1SDimitry Andricdef Zn3WriteMOVBE16rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> { 514fe6060f1SDimitry Andric let Latency = Znver3Model.LoadLatency; 5155f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 4]; 516fe6060f1SDimitry Andric let NumMicroOps = 1; 517fe6060f1SDimitry Andric} 518fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteMOVBE16rm], (instrs MOVBE16rm)>; 519fe6060f1SDimitry Andric 520fe6060f1SDimitry Andricdef Zn3WriteMOVBEmr : SchedWriteRes<[Zn3ALU0123, Zn3AGU012, Zn3Store]> { 521fe6060f1SDimitry Andric let Latency = Znver3Model.StoreLatency; 5225f757f3fSDimitry Andric let ReleaseAtCycles = [4, 1, 1]; 523fe6060f1SDimitry Andric let NumMicroOps = 2; 524fe6060f1SDimitry Andric} 525fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteMOVBEmr], (instrs MOVBE16mr, MOVBE32mr, MOVBE64mr)>; 526fe6060f1SDimitry Andric 527fe6060f1SDimitry Andric// Arithmetic. 528fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteALU, [Zn3ALU0123], 1, [1], 1>; // Simple integer ALU op. 529fe6060f1SDimitry Andric 530fe6060f1SDimitry Andricdef Zn3WriteALUSlow : SchedWriteRes<[Zn3ALU0123]> { 531fe6060f1SDimitry Andric let Latency = 1; 5325f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 533fe6060f1SDimitry Andric let NumMicroOps = 1; 534fe6060f1SDimitry Andric} 535fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteALUSlow], (instrs ADD8i8, ADD16i16, ADD32i32, ADD64i32, 536fe6060f1SDimitry Andric AND8i8, AND16i16, AND32i32, AND64i32, 537fe6060f1SDimitry Andric OR8i8, OR16i16, OR32i32, OR64i32, 538fe6060f1SDimitry Andric SUB8i8, SUB16i16, SUB32i32, SUB64i32, 539fe6060f1SDimitry Andric XOR8i8, XOR16i16, XOR32i32, XOR64i32)>; 540fe6060f1SDimitry Andric 541fe6060f1SDimitry Andricdef Zn3WriteMoveExtend : SchedWriteRes<[Zn3ALU0123]> { 542fe6060f1SDimitry Andric let Latency = 1; 5435f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 544fe6060f1SDimitry Andric let NumMicroOps = 1; 545fe6060f1SDimitry Andric} 546fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteMoveExtend], (instrs MOVSX16rr16, MOVSX16rr32, MOVZX16rr16, MOVSX16rr8, MOVZX16rr8)>; 547fe6060f1SDimitry Andric 548fe6060f1SDimitry Andricdef Zn3WriteMaterialize32bitImm: SchedWriteRes<[Zn3ALU0123]> { 549fe6060f1SDimitry Andric let Latency = 1; 5505f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 551fe6060f1SDimitry Andric let NumMicroOps = 1; 552fe6060f1SDimitry Andric} 553fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteMaterialize32bitImm], (instrs MOV32ri, MOV32ri_alt, MOV64ri32)>; 554fe6060f1SDimitry Andric 555fe6060f1SDimitry Andricdef Zn3WritePDEP_PEXT : SchedWriteRes<[Zn3ALU1]> { 556fe6060f1SDimitry Andric let Latency = 3; 5575f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 558fe6060f1SDimitry Andric let NumMicroOps = 1; 559fe6060f1SDimitry Andric} 560fe6060f1SDimitry Andricdef : InstRW<[Zn3WritePDEP_PEXT], (instrs PDEP32rr, PDEP64rr, 561fe6060f1SDimitry Andric PEXT32rr, PEXT64rr)>; 562fe6060f1SDimitry Andric 563fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteADC, [Zn3ALU0123], 1, [4], 1>; // Integer ALU + flags op. 564fe6060f1SDimitry Andric 565fe6060f1SDimitry Andricdef Zn3WriteADC8mr_SBB8mr : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123, Zn3Store]> { 566fe6060f1SDimitry Andric let Latency = 1; 5675f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 7, 1]; 568fe6060f1SDimitry Andric let NumMicroOps = 1; 569fe6060f1SDimitry Andric} 570fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteADC8mr_SBB8mr], (instrs ADC8mr, SBB8mr)>; 571fe6060f1SDimitry Andric 572fe6060f1SDimitry Andric// This is for simple LEAs with one or two input operands. 573fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteLEA, [Zn3AGU012], 1, [1], 1>; // LEA instructions can't fold loads. 574fe6060f1SDimitry Andric 575fe6060f1SDimitry Andric// This write is used for slow LEA instructions. 576fe6060f1SDimitry Andricdef Zn3Write3OpsLEA : SchedWriteRes<[Zn3ALU0123]> { 577fe6060f1SDimitry Andric let Latency = 2; 5785f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 579fe6060f1SDimitry Andric let NumMicroOps = 2; 580fe6060f1SDimitry Andric} 581fe6060f1SDimitry Andric 582fe6060f1SDimitry Andric// On Znver3, a slow LEA is either a 3Ops LEA (base, index, offset), 583fe6060f1SDimitry Andric// or an LEA with a `Scale` value different than 1. 584fe6060f1SDimitry Andricdef Zn3SlowLEAPredicate : MCSchedPredicate< 585fe6060f1SDimitry Andric CheckAny<[ 586fe6060f1SDimitry Andric // A 3-operand LEA (base, index, offset). 587fe6060f1SDimitry Andric IsThreeOperandsLEAFn, 588fe6060f1SDimitry Andric // An LEA with a "Scale" different than 1. 589fe6060f1SDimitry Andric CheckAll<[ 590fe6060f1SDimitry Andric CheckIsImmOperand<2>, 591fe6060f1SDimitry Andric CheckNot<CheckImmOperand<2, 1>> 592fe6060f1SDimitry Andric ]> 593fe6060f1SDimitry Andric ]> 594fe6060f1SDimitry Andric>; 595fe6060f1SDimitry Andric 596fe6060f1SDimitry Andricdef Zn3WriteLEA : SchedWriteVariant<[ 597fe6060f1SDimitry Andric SchedVar<Zn3SlowLEAPredicate, [Zn3Write3OpsLEA]>, 598fe6060f1SDimitry Andric SchedVar<NoSchedPred, [WriteLEA]> 599fe6060f1SDimitry Andric]>; 600fe6060f1SDimitry Andric 601fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>; 602fe6060f1SDimitry Andric 603fe6060f1SDimitry Andricdef Zn3SlowLEA16r : SchedWriteRes<[Zn3ALU0123]> { 604fe6060f1SDimitry Andric let Latency = 2; // FIXME: not from llvm-exegesis 6055f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 606fe6060f1SDimitry Andric let NumMicroOps = 2; 607fe6060f1SDimitry Andric} 608fe6060f1SDimitry Andric 609fe6060f1SDimitry Andricdef : InstRW<[Zn3SlowLEA16r], (instrs LEA16r)>; 610fe6060f1SDimitry Andric 611fe6060f1SDimitry Andric// Integer multiplication 612fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIMul8, [Zn3Multiplier], 3, [3], 1>; // Integer 8-bit multiplication. 613fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIMul16, [Zn3Multiplier], 3, [3], 3, /*LoadUOps=*/1>; // Integer 16-bit multiplication. 614fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIMul16Imm, [Zn3Multiplier], 4, [4], 2>; // Integer 16-bit multiplication by immediate. 615fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register. 616fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>; // Integer 32-bit multiplication. 617349cc55cSDimitry Andricdefm : Zn3WriteResIntPair<WriteMULX32, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags. 618fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIMul32Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate. 619fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIMul32Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register. 620fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIMul64, [Zn3Multiplier], 3, [3], 2>; // Integer 64-bit multiplication. 621349cc55cSDimitry Andricdefm : Zn3WriteResIntPair<WriteMULX64, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags. 622fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIMul64Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate. 623fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIMul64Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register. 624349cc55cSDimitry Andricdefm : Zn3WriteResInt<WriteIMulHLd, [], !add(4, Znver3Model.LoadLatency), [], 0>; // Integer multiplication, high part. 625fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteIMulH, [], 4, [], 0>; // Integer multiplication, high part. 626fe6060f1SDimitry Andric 627fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteBSWAP32, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 32-bit Swap. 628fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteBSWAP64, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 64-bit Swap. 629fe6060f1SDimitry Andric 630fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteCMPXCHG, [Zn3ALU0123], 3, [12], 5>; // Compare and set, compare and swap. 631fe6060f1SDimitry Andric 632fe6060f1SDimitry Andricdef Zn3WriteCMPXCHG8rr : SchedWriteRes<[Zn3ALU0123]> { 633fe6060f1SDimitry Andric let Latency = 3; 6345f757f3fSDimitry Andric let ReleaseAtCycles = [12]; 635fe6060f1SDimitry Andric let NumMicroOps = 3; 636fe6060f1SDimitry Andric} 637fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteCMPXCHG8rr], (instrs CMPXCHG8rr)>; 638fe6060f1SDimitry Andric 639fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteCMPXCHGRMW, [Zn3ALU0123], 3, [12], 6>; // Compare and set, compare and swap. 640fe6060f1SDimitry Andric 641fe6060f1SDimitry Andricdef Zn3WriteCMPXCHG8rm_LCMPXCHG8 : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> { 642fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteCMPXCHG8rr.Latency); 6435f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 12]; 644fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteCMPXCHG8rr.NumMicroOps, 2); 645fe6060f1SDimitry Andric} 646fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteCMPXCHG8rm_LCMPXCHG8], (instrs CMPXCHG8rm, LCMPXCHG8)>; 647fe6060f1SDimitry Andric 648fe6060f1SDimitry Andricdef Zn3WriteCMPXCHG8B : SchedWriteRes<[Zn3ALU0123]> { 649fe6060f1SDimitry Andric let Latency = 3; // FIXME: not from llvm-exegesis 6505f757f3fSDimitry Andric let ReleaseAtCycles = [24]; 651fe6060f1SDimitry Andric let NumMicroOps = 19; 652fe6060f1SDimitry Andric} 653fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteCMPXCHG8B], (instrs CMPXCHG8B)>; 654fe6060f1SDimitry Andric 655fe6060f1SDimitry Andricdef Zn3WriteCMPXCHG16B_LCMPXCHG16B : SchedWriteRes<[Zn3ALU0123]> { 656fe6060f1SDimitry Andric let Latency = 4; // FIXME: not from llvm-exegesis 6575f757f3fSDimitry Andric let ReleaseAtCycles = [59]; 658fe6060f1SDimitry Andric let NumMicroOps = 28; 659fe6060f1SDimitry Andric} 660fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteCMPXCHG16B_LCMPXCHG16B], (instrs CMPXCHG16B, LCMPXCHG16B)>; 661fe6060f1SDimitry Andric 662fe6060f1SDimitry Andricdef Zn3WriteWriteXCHGUnrenameable : SchedWriteRes<[Zn3ALU0123]> { 663fe6060f1SDimitry Andric let Latency = 1; 6645f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 665fe6060f1SDimitry Andric let NumMicroOps = 2; 666fe6060f1SDimitry Andric} 667fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteWriteXCHGUnrenameable], (instrs XCHG8rr, XCHG16rr, XCHG16ar)>; 668fe6060f1SDimitry Andric 669fe6060f1SDimitry Andricdef Zn3WriteXCHG8rm_XCHG16rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> { 670fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, 3); // FIXME: not from llvm-exegesis 6715f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 672fe6060f1SDimitry Andric let NumMicroOps = 5; 673fe6060f1SDimitry Andric} 674fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteXCHG8rm_XCHG16rm], (instrs XCHG8rm, XCHG16rm)>; 675fe6060f1SDimitry Andric 676fe6060f1SDimitry Andricdef Zn3WriteXCHG32rm_XCHG64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> { 677fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, 2); // FIXME: not from llvm-exegesis 6785f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 679fe6060f1SDimitry Andric let NumMicroOps = 2; 680fe6060f1SDimitry Andric} 681fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteXCHG32rm_XCHG64rm], (instrs XCHG32rm, XCHG64rm)>; 682fe6060f1SDimitry Andric 683fe6060f1SDimitry Andric// Integer division. 684fe6060f1SDimitry Andric// FIXME: uops for 8-bit division measures as 2. for others it's a guess. 685fe6060f1SDimitry Andric// FIXME: latency for 8-bit division measures as 10. for others it's a guess. 686fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteDiv8, [Zn3Divider], 10, [10], 2>; 687fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteDiv16, [Zn3Divider], 11, [11], 2>; 688fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteDiv32, [Zn3Divider], 13, [13], 2>; 689fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteDiv64, [Zn3Divider], 17, [17], 2>; 690fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIDiv8, [Zn3Divider], 10, [10], 2>; 691fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIDiv16, [Zn3Divider], 11, [11], 2>; 692fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIDiv32, [Zn3Divider], 13, [13], 2>; 693fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteIDiv64, [Zn3Divider], 17, [17], 2>; 694fe6060f1SDimitry Andric 695fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteBSF, [Zn3ALU1], 3, [3], 6, /*LoadUOps=*/2>; // Bit scan forward. 696fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteBSR, [Zn3ALU1], 4, [4], 6, /*LoadUOps=*/2>; // Bit scan reverse. 697fe6060f1SDimitry Andric 698fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WritePOPCNT, [Zn3ALU0123], 1, [1], 1>; // Bit population count. 699fe6060f1SDimitry Andric 700fe6060f1SDimitry Andricdef Zn3WritePOPCNT16rr : SchedWriteRes<[Zn3ALU0123]> { 701fe6060f1SDimitry Andric let Latency = 1; 7025f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 703fe6060f1SDimitry Andric let NumMicroOps = 1; 704fe6060f1SDimitry Andric} 705fe6060f1SDimitry Andricdef : InstRW<[Zn3WritePOPCNT16rr], (instrs POPCNT16rr)>; 706fe6060f1SDimitry Andric 707fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteLZCNT, [Zn3ALU0123], 1, [1], 1>; // Leading zero count. 708fe6060f1SDimitry Andric 709fe6060f1SDimitry Andricdef Zn3WriteLZCNT16rr : SchedWriteRes<[Zn3ALU0123]> { 710fe6060f1SDimitry Andric let Latency = 1; 7115f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 712fe6060f1SDimitry Andric let NumMicroOps = 1; 713fe6060f1SDimitry Andric} 714fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteLZCNT16rr], (instrs LZCNT16rr)>; 715fe6060f1SDimitry Andric 716fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteTZCNT, [Zn3ALU12], 2, [1], 2>; // Trailing zero count. 717fe6060f1SDimitry Andric 718fe6060f1SDimitry Andricdef Zn3WriteTZCNT16rr : SchedWriteRes<[Zn3ALU0123]> { 719fe6060f1SDimitry Andric let Latency = 2; 7205f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 721fe6060f1SDimitry Andric let NumMicroOps = 2; 722fe6060f1SDimitry Andric} 723fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteTZCNT16rr], (instrs TZCNT16rr)>; 724fe6060f1SDimitry Andric 725fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteCMOV, [Zn3ALU03], 1, [1], 1>; // Conditional move. 726fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteFCMOV, [Zn3ALU0123], 7, [28], 7>; // FIXME: not from llvm-exegesis // X87 conditional move. 727fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteSETCC, [Zn3ALU03], 1, [2], 1>; // Set register based on condition code. 728fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteSETCCStore, [Zn3ALU03, Zn3AGU012, Zn3Store], 2, [2, 1, 1], 2>; // FIXME: latency not from llvm-exegesis 729fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteLAHFSAHF, [Zn3ALU3], 1, [1], 1>; // Load/Store flags in AH. 730fe6060f1SDimitry Andric 731fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteBitTest, [Zn3ALU12], 1, [1], 1>; // Bit Test 732fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteBitTestImmLd, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 1), [1, 1, 1], 2>; 733fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteBitTestRegLd, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 1), [1, 1, 1], 7>; 734fe6060f1SDimitry Andric 735fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteBitTestSet, [Zn3ALU12], 2, [2], 2>; // Bit Test + Set 736fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteBitTestSetImmLd, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 2), [1, 1, 1], 4>; 737fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteBitTestSetRegLd, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 2), [1, 1, 1], 9>; 738fe6060f1SDimitry Andric 739fe6060f1SDimitry Andric// Integer shifts and rotates. 740fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteShift, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>; 741fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteShiftCL, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>; 742fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteRotate, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>; 743fe6060f1SDimitry Andric 744fe6060f1SDimitry Andricdef Zn3WriteRotateR1 : SchedWriteRes<[Zn3ALU12]> { 745fe6060f1SDimitry Andric let Latency = 1; 7465f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 747fe6060f1SDimitry Andric let NumMicroOps = 1; 748fe6060f1SDimitry Andric} 749fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteRotateR1], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, 750fe6060f1SDimitry Andric RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; 751fe6060f1SDimitry Andric 752fe6060f1SDimitry Andricdef Zn3WriteRotateM1 : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { 753fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateR1.Latency); 7545f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 755fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteRotateR1.NumMicroOps, 1); 756fe6060f1SDimitry Andric} 757fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteRotateM1], (instrs RCL8m1, RCL16m1, RCL32m1, RCL64m1, 758fe6060f1SDimitry Andric RCR8m1, RCR16m1, RCR32m1, RCR64m1)>; 759fe6060f1SDimitry Andric 760fe6060f1SDimitry Andricdef Zn3WriteRotateRightRI : SchedWriteRes<[Zn3ALU12]> { 761fe6060f1SDimitry Andric let Latency = 3; 7625f757f3fSDimitry Andric let ReleaseAtCycles = [6]; 763fe6060f1SDimitry Andric let NumMicroOps = 7; 764fe6060f1SDimitry Andric} 765fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteRotateRightRI], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; 766fe6060f1SDimitry Andric 767fe6060f1SDimitry Andricdef Zn3WriteRotateRightMI : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { 768fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateRightRI.Latency); 7695f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 8]; 770fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteRotateRightRI.NumMicroOps, 3); 771fe6060f1SDimitry Andric} 772fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteRotateRightMI], (instrs RCR8mi, RCR16mi, RCR32mi, RCR64mi)>; 773fe6060f1SDimitry Andric 774fe6060f1SDimitry Andricdef Zn3WriteRotateLeftRI : SchedWriteRes<[Zn3ALU12]> { 775fe6060f1SDimitry Andric let Latency = 4; 7765f757f3fSDimitry Andric let ReleaseAtCycles = [8]; 777fe6060f1SDimitry Andric let NumMicroOps = 9; 778fe6060f1SDimitry Andric} 779fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteRotateLeftRI], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; 780fe6060f1SDimitry Andric 781fe6060f1SDimitry Andricdef Zn3WriteRotateLeftMI : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { 782fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateLeftRI.Latency); 7835f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 8]; 784fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteRotateLeftRI.NumMicroOps, 2); 785fe6060f1SDimitry Andric} 786fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteRotateLeftMI], (instrs RCL8mi, RCL16mi, RCL32mi, RCL64mi)>; 787fe6060f1SDimitry Andric 788fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteRotateCL, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>; 789fe6060f1SDimitry Andric 790fe6060f1SDimitry Andricdef Zn3WriteRotateRightRCL : SchedWriteRes<[Zn3ALU12]> { 791fe6060f1SDimitry Andric let Latency = 3; 7925f757f3fSDimitry Andric let ReleaseAtCycles = [6]; 793fe6060f1SDimitry Andric let NumMicroOps = 7; 794fe6060f1SDimitry Andric} 795fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteRotateRightRCL], (instrs RCR8rCL, RCR16rCL, RCR32rCL, RCR64rCL)>; 796fe6060f1SDimitry Andric 797fe6060f1SDimitry Andricdef Zn3WriteRotateRightMCL : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { 798fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateRightRCL.Latency); 7995f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 8]; 800fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteRotateRightRCL.NumMicroOps, 2); 801fe6060f1SDimitry Andric} 802fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteRotateRightMCL], (instrs RCR8mCL, RCR16mCL, RCR32mCL, RCR64mCL)>; 803fe6060f1SDimitry Andric 804fe6060f1SDimitry Andricdef Zn3WriteRotateLeftRCL : SchedWriteRes<[Zn3ALU12]> { 805fe6060f1SDimitry Andric let Latency = 4; 8065f757f3fSDimitry Andric let ReleaseAtCycles = [8]; 807fe6060f1SDimitry Andric let NumMicroOps = 9; 808fe6060f1SDimitry Andric} 809fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteRotateLeftRCL], (instrs RCL8rCL, RCL16rCL, RCL32rCL, RCL64rCL)>; 810fe6060f1SDimitry Andric 811fe6060f1SDimitry Andricdef Zn3WriteRotateLeftMCL : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { 812fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateLeftRCL.Latency); 8135f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 8]; 814fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteRotateLeftRCL.NumMicroOps, 2); 815fe6060f1SDimitry Andric} 816fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteRotateLeftMCL], (instrs RCL8mCL, RCL16mCL, RCL32mCL, RCL64mCL)>; 817fe6060f1SDimitry Andric 818fe6060f1SDimitry Andric// Double shift instructions. 819fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteSHDrri, [Zn3ALU12], 2, [3], 4>; 820fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteSHDrrcl, [Zn3ALU12], 2, [3], 5>; 821fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteSHDmri, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 2), [1, 1, 4], 6>; 822fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteSHDmrcl, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 2), [1, 1, 4], 6>; 823fe6060f1SDimitry Andric 824fe6060f1SDimitry Andric// BMI1 BEXTR/BLS, BMI2 BZHI 825fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteBEXTR, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>; 826fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteBLS, [Zn3ALU0123], 2, [2], 2, /*LoadUOps=*/1>; 827fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteBZHI, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>; 828fe6060f1SDimitry Andric 829fe6060f1SDimitry Andric// Idioms that clear a register, like xorps %xmm0, %xmm0. 830fe6060f1SDimitry Andric// These can often bypass execution ports completely. 831fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteZero, [Zn3ALU0123], 0, [0], 1>; 832fe6060f1SDimitry Andric 833fe6060f1SDimitry Andric// Branches don't produce values, so they have no latency, but they still 834fe6060f1SDimitry Andric// consume resources. Indirect branches can fold loads. 835fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteJump, [Zn3BRU01], 1, [1], 1>; // FIXME: not from llvm-exegesis 836fe6060f1SDimitry Andric 837fe6060f1SDimitry Andric// Floating point. This covers both scalar and vector operations. 838*0fca6ea1SDimitry Andricdefm : Zn3WriteResInt<WriteFLD0, [Zn3FPLd01, Zn3Load, Zn3FP1], !add(Znver3Model.LoadLatency, 4), [1, 1, 1], 1>; 839*0fca6ea1SDimitry Andricdefm : Zn3WriteResInt<WriteFLD1, [Zn3FPLd01, Zn3Load, Zn3FP1], !add(Znver3Model.LoadLatency, 7), [1, 1, 1], 1>; 840*0fca6ea1SDimitry Andricdefm : Zn3WriteResInt<WriteFLDC, [Zn3FPLd01, Zn3Load, Zn3FP1], !add(Znver3Model.LoadLatency, 7), [1, 1, 1], 1>; 841fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteFLoad, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 842fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteFLoadX, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 843fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteFLoadY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 844fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteFMaskedLoad, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 845fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteFMaskedLoadY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 846fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteFStore, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 847fe6060f1SDimitry Andric 848fe6060f1SDimitry Andricdef Zn3WriteWriteFStoreMMX : SchedWriteRes<[Zn3FPSt, Zn3Store]> { 849fe6060f1SDimitry Andric let Latency = 2; // FIXME: not from llvm-exegesis 8505f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1]; 851fe6060f1SDimitry Andric let NumMicroOps = 2; 852fe6060f1SDimitry Andric} 853fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteWriteFStoreMMX], (instrs MOVHPDmr, MOVHPSmr, 854fe6060f1SDimitry Andric VMOVHPDmr, VMOVHPSmr)>; 855fe6060f1SDimitry Andric 856fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteFStoreX, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 857fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteFStoreY, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 858fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteFStoreNT, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 859fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteFStoreNTX, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 860fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteFStoreNTY, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 861fe6060f1SDimitry Andric 862fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteFMaskedStore32, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [6, 1], 18>; 863fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteFMaskedStore64, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [4, 1], 10>; 864fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteFMaskedStore32Y, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [12, 1], 42>; 865fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteFMaskedStore64Y, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [6, 1], 18>; 866fe6060f1SDimitry Andric 867fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFAdd, [Zn3FPFAdd01], 3, [1], 1>; // Floating point add/sub. 868fe6060f1SDimitry Andric 869fe6060f1SDimitry Andricdef Zn3WriteX87Arith : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { 870fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, 1); // FIXME: not from llvm-exegesis 8715f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 24]; 872fe6060f1SDimitry Andric let NumMicroOps = 2; 873fe6060f1SDimitry Andric} 874fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteX87Arith], (instrs ADD_FI16m, ADD_FI32m, 875fe6060f1SDimitry Andric SUB_FI16m, SUB_FI32m, 876fe6060f1SDimitry Andric SUBR_FI16m, SUBR_FI32m, 877fe6060f1SDimitry Andric MUL_FI16m, MUL_FI32m)>; 878fe6060f1SDimitry Andric 879fe6060f1SDimitry Andricdef Zn3WriteX87Div : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { 880fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, 1); // FIXME: not from llvm-exegesis 8815f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 62]; 882fe6060f1SDimitry Andric let NumMicroOps = 2; 883fe6060f1SDimitry Andric} 884fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteX87Div], (instrs DIV_FI16m, DIV_FI32m, 885fe6060f1SDimitry Andric DIVR_FI16m, DIVR_FI32m)>; 886fe6060f1SDimitry Andric 887fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFAddX, [Zn3FPFAdd01], 3, [1], 1>; // Floating point add/sub (XMM). 888fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFAddY, [Zn3FPFAdd01], 3, [1], 1>; // Floating point add/sub (YMM). 889fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFAddZ>; // Floating point add/sub (ZMM). 890fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFAdd64, [Zn3FPFAdd01], 3, [1], 1>; // Floating point double add/sub. 891fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFAdd64X, [Zn3FPFAdd01], 3, [1], 1>; // Floating point double add/sub (XMM). 892fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFAdd64Y, [Zn3FPFAdd01], 3, [1], 1>; // Floating point double add/sub (YMM). 893fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFAdd64Z>; // Floating point double add/sub (ZMM). 894fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFCmp, [Zn3FPFMul01], 1, [1], 1>; // Floating point compare. 895fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFCmpX, [Zn3FPFMul01], 1, [1], 1>; // Floating point compare (XMM). 896fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFCmpY, [Zn3FPFMul01], 1, [1], 1>; // Floating point compare (YMM). 897fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFCmpZ>; // Floating point compare (ZMM). 898fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFCmp64, [Zn3FPFMul01], 1, [1], 1>; // Floating point double compare. 899fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFCmp64X, [Zn3FPFMul01], 1, [1], 1>; // Floating point double compare (XMM). 900fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFCmp64Y, [Zn3FPFMul01], 1, [1], 1>; // Floating point double compare (YMM). 901fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFCmp64Z>; // Floating point double compare (ZMM). 902fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFCom, [Zn3FPFMul01], 3, [2], 1>; // FIXME: latency not from llvm-exegesis // Floating point compare to flags (X87). 903fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFComX, [Zn3FPFMul01], 4, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point compare to flags (SSE). 904fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFMul, [Zn3FPFMul01], 3, [1], 1>; // Floating point multiplication. 905fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFMulX, [Zn3FPFMul01], 3, [1], 1>; // Floating point multiplication (XMM). 906fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFMulY, [Zn3FPFMul01], 3, [1], 1>; // Floating point multiplication (YMM). 907fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFMulZ>; // Floating point multiplication (YMM). 908fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFMul64, [Zn3FPFMul01], 3, [1], 1>; // Floating point double multiplication. 909fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFMul64X, [Zn3FPFMul01], 3, [1], 1>; // Floating point double multiplication (XMM). 910fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFMul64Y, [Zn3FPFMul01], 3, [1], 1>; // Floating point double multiplication (YMM). 911fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFMul64Z>; // Floating point double multiplication (ZMM). 912fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFDiv, [Zn3FPFDiv], 11, [3], 1>; // Floating point division. 913fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFDivX, [Zn3FPFDiv], 11, [3], 1>; // Floating point division (XMM). 914fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFDivY, [Zn3FPFDiv], 11, [3], 1>; // Floating point division (YMM). 915fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFDivZ>; // Floating point division (ZMM). 916fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFDiv64, [Zn3FPFDiv], 13, [5], 1>; // Floating point double division. 917fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFDiv64X, [Zn3FPFDiv], 13, [5], 1>; // Floating point double division (XMM). 918fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFDiv64Y, [Zn3FPFDiv], 13, [5], 1>; // Floating point double division (YMM). 919fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFDiv64Z>; // Floating point double division (ZMM). 920fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFSqrt, [Zn3FPFDiv], 15, [5], 1>; // Floating point square root. 921fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFSqrtX, [Zn3FPFDiv], 15, [5], 1>; // Floating point square root (XMM). 922fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFSqrtY, [Zn3FPFDiv], 15, [5], 1>; // Floating point square root (YMM). 923fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFSqrtZ>; // Floating point square root (ZMM). 924fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFSqrt64, [Zn3FPFDiv], 21, [9], 1>; // Floating point double square root. 925fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFSqrt64X, [Zn3FPFDiv], 21, [9], 1>; // Floating point double square root (XMM). 926fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFSqrt64Y, [Zn3FPFDiv], 21, [9], 1>; // Floating point double square root (YMM). 927fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFSqrt64Z>; // Floating point double square root (ZMM). 928fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFSqrt80, [Zn3FPFDiv], 22, [23], 1>; // FIXME: latency not from llvm-exegesis // Floating point long double square root. 929fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFRcp, [Zn3FPFMul01], 3, [1], 1>; // Floating point reciprocal estimate. 930fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFRcpX, [Zn3FPFMul01], 3, [1], 1>; // Floating point reciprocal estimate (XMM). 931fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFRcpY, [Zn3FPFMul01], 3, [1], 1>; // Floating point reciprocal estimate (YMM). 932fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFRcpZ>; // Floating point reciprocal estimate (ZMM). 933fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFRsqrt, [Zn3FPFDiv], 3, [1], 1>; // Floating point reciprocal square root estimate. 934fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFRsqrtX, [Zn3FPFDiv], 3, [1], 1>; // Floating point reciprocal square root estimate (XMM). 935fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFRsqrtY, [Zn3FPFDiv], 3, [1], 1>; // Floating point reciprocal square root estimate (YMM). 936fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFRsqrtZ>; // Floating point reciprocal square root estimate (ZMM). 937bdd1243dSDimitry Andricdefm : Zn3WriteResXMMPair<WriteFMA, [Zn3FPFMul01], 4, [1], 1>; // Fused Multiply Add. 938bdd1243dSDimitry Andricdefm : Zn3WriteResXMMPair<WriteFMAX, [Zn3FPFMul01], 4, [1], 1>; // Fused Multiply Add (XMM). 939bdd1243dSDimitry Andricdefm : Zn3WriteResYMMPair<WriteFMAY, [Zn3FPFMul01], 4, [1], 1>; // Fused Multiply Add (YMM). 940fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFMAZ>; // Fused Multiply Add (ZMM). 941fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteDPPD, [Zn3FPFMul01], 9, [6], 3, /*LoadUOps=*/2>; // Floating point double dot product. 942fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteDPPS, [Zn3FPFMul01], 15, [8], 8, /*LoadUOps=*/2>; // Floating point single dot product. 943fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteDPPSY, [Zn3FPFMul01], 15, [8], 7, /*LoadUOps=*/1>; // Floating point single dot product (YMM). 944fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFSign, [Zn3FPFMul01], 1, [2], 1>; // FIXME: latency not from llvm-exegesis // Floating point fabs/fchs. 945fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFRnd, [Zn3FPFCvt01], 3, [1], 1>; // Floating point rounding. 946fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFRndY, [Zn3FPFCvt01], 3, [1], 1>; // Floating point rounding (YMM). 947fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFRndZ>; // Floating point rounding (ZMM). 948fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFLogic, [Zn3FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor logicals. 949fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFLogicY, [Zn3FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor logicals (YMM). 950fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFLogicZ>; // Floating point and/or/xor logicals (ZMM). 951fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFTest, [Zn3FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions. 952fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFTestY, [Zn3FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions (YMM). 953fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFTestZ>; // Floating point TEST instructions (ZMM). 954fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFShuffle, [Zn3FPVShuf01], 1, [1], 1>; // Floating point vector shuffles. 955fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFShuffleY, [Zn3FPVShuf01], 1, [1], 1>; // Floating point vector shuffles (YMM). 956fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFShuffleZ>; // Floating point vector shuffles (ZMM). 957fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFVarShuffle, [Zn3FPVShuf01], 3, [1], 1>; // Floating point vector variable shuffles. 958fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFVarShuffleY, [Zn3FPVShuf01], 3, [1], 1>; // Floating point vector variable shuffles (YMM). 959fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; // Floating point vector variable shuffles (ZMM). 960fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFBlend, [Zn3FPFMul01], 1, [1], 1>; // Floating point vector blends. 961fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFBlendY, [Zn3FPFMul01], 1, [1], 1>; // Floating point vector blends (YMM). 962fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFBlendZ>; // Floating point vector blends (ZMM). 963fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFVarBlend, [Zn3FPFMul01], 1, [1], 1>; // Fp vector variable blends. 964fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFVarBlendY, [Zn3FPFMul01], 1, [1], 1>; // Fp vector variable blends (YMM). 965fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFVarBlendZ>; // Fp vector variable blends (ZMM). 966fe6060f1SDimitry Andric 967fe6060f1SDimitry Andric// Horizontal Add/Sub (float and integer) 968fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteFHAdd, [Zn3FPFAdd0], 6, [2], 4>; 969fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFHAddY, [Zn3FPFAdd0], 6, [2], 3, /*LoadUOps=*/1>; 970fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFHAddZ>; 971fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WritePHAdd, [Zn3FPVAdd0], 2, [2], 3, /*LoadUOps=*/1>; 972fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WritePHAddX, [Zn3FPVAdd0], 2, [2], 4>; 973fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WritePHAddY, [Zn3FPVAdd0], 2, [2], 3, /*LoadUOps=*/1>; 974fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WritePHAddZ>; 975fe6060f1SDimitry Andric 976fe6060f1SDimitry Andric// Vector integer operations. 977fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecLoad, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 978fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecLoadX, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 979fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteVecLoadY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 980fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecLoadNT, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 981fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteVecLoadNTY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 982fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecMaskedLoad, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 983fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteVecMaskedLoadY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 984fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecStore, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 985fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecStoreX, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 986fe6060f1SDimitry Andric 987fe6060f1SDimitry Andricdef Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn3FPFMisc0]> { 988fe6060f1SDimitry Andric let Latency = 4; 9895f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 990fe6060f1SDimitry Andric let NumMicroOps = 1; 991fe6060f1SDimitry Andric} 992fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rr, VEXTRACTI128rr)>; 993fe6060f1SDimitry Andric 994fe6060f1SDimitry Andricdef Zn3WriteVEXTRACTI128mr : SchedWriteRes<[Zn3FPFMisc0, Zn3FPSt, Zn3Store]> { 995fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency); 9965f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 1]; 997fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1); 998fe6060f1SDimitry Andric} 999fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVEXTRACTI128mr], (instrs VEXTRACTI128mr, VEXTRACTF128mr)>; 1000fe6060f1SDimitry Andric 1001fe6060f1SDimitry Andricdef Zn3WriteVINSERTF128rmr : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPFMisc0]> { 1002fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency); 10035f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 1]; 1004fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0); 1005fe6060f1SDimitry Andric} 1006fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVINSERTF128rmr], (instrs VINSERTF128rm)>; 1007fe6060f1SDimitry Andric 1008fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteVecStoreY, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 1009fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecStoreNT, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 1010fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteVecStoreNTY, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 1011fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecMaskedStore32, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [6, 1], 18>; 1012fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecMaskedStore64, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [4, 1], 10>; 1013fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteVecMaskedStore32Y, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [12, 1], 42>; 1014fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteVecMaskedStore64Y, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [6, 1], 18>; 1015fe6060f1SDimitry Andric 1016fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecMoveToGpr, [Zn3FPLd01], 1, [2], 1>; 1017fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecMoveFromGpr, [Zn3FPLd01], 1, [2], 1>; 1018fe6060f1SDimitry Andric 1019fe6060f1SDimitry Andricdef Zn3WriteMOVMMX : SchedWriteRes<[Zn3FPLd01, Zn3FPFMisc0123]> { 1020fe6060f1SDimitry Andric let Latency = 1; 10215f757f3fSDimitry Andric let ReleaseAtCycles = [1, 2]; 1022fe6060f1SDimitry Andric let NumMicroOps = 2; 1023fe6060f1SDimitry Andric} 1024fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteMOVMMX], (instrs MMX_MOVQ2FR64rr, MMX_MOVQ2DQrr)>; 1025fe6060f1SDimitry Andric 1026fe6060f1SDimitry Andricdef Zn3WriteMOVMMXSlow : SchedWriteRes<[Zn3FPLd01, Zn3FPFMisc0123]> { 1027fe6060f1SDimitry Andric let Latency = 1; 10285f757f3fSDimitry Andric let ReleaseAtCycles = [1, 4]; 1029fe6060f1SDimitry Andric let NumMicroOps = 2; 1030fe6060f1SDimitry Andric} 1031fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteMOVMMXSlow], (instrs MMX_MOVD64rr, MMX_MOVD64to64rr)>; 1032fe6060f1SDimitry Andric 1033fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVecALU, [Zn3FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals. 1034fe6060f1SDimitry Andric 1035fe6060f1SDimitry Andricdef Zn3WriteEXTRQ_INSERTQ : SchedWriteRes<[Zn3FPVShuf01, Zn3FPLd01]> { 1036fe6060f1SDimitry Andric let Latency = 3; 10375f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1]; 1038fe6060f1SDimitry Andric let NumMicroOps = 1; 1039fe6060f1SDimitry Andric} 1040fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteEXTRQ_INSERTQ], (instrs EXTRQ, INSERTQ)>; 1041fe6060f1SDimitry Andric 1042fe6060f1SDimitry Andricdef Zn3WriteEXTRQI_INSERTQI : SchedWriteRes<[Zn3FPVShuf01, Zn3FPLd01]> { 1043fe6060f1SDimitry Andric let Latency = 3; 10445f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1]; 1045fe6060f1SDimitry Andric let NumMicroOps = 2; 1046fe6060f1SDimitry Andric} 1047fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteEXTRQI_INSERTQI], (instrs EXTRQI, INSERTQI)>; 1048fe6060f1SDimitry Andric 1049fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVecALUX, [Zn3FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (XMM). 1050fe6060f1SDimitry Andric 1051fe6060f1SDimitry Andricdef Zn3WriteVecALUXSlow : SchedWriteRes<[Zn3FPVAdd01]> { 1052fe6060f1SDimitry Andric let Latency = 1; 10535f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 1054fe6060f1SDimitry Andric let NumMicroOps = 1; 1055fe6060f1SDimitry Andric} 1056fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVecALUXSlow], (instrs PABSBrr, PABSDrr, PABSWrr, 1057fe6060f1SDimitry Andric PADDSBrr, PADDSWrr, PADDUSBrr, PADDUSWrr, 1058fe6060f1SDimitry Andric PAVGBrr, PAVGWrr, 1059fe6060f1SDimitry Andric PSIGNBrr, PSIGNDrr, PSIGNWrr, 1060fe6060f1SDimitry Andric VPABSBrr, VPABSDrr, VPABSWrr, 1061fe6060f1SDimitry Andric VPADDSBrr, VPADDSWrr, VPADDUSBrr, VPADDUSWrr, 1062fe6060f1SDimitry Andric VPAVGBrr, VPAVGWrr, 1063fe6060f1SDimitry Andric VPCMPEQQrr, 1064fe6060f1SDimitry Andric VPSIGNBrr, VPSIGNDrr, VPSIGNWrr, 1065fe6060f1SDimitry Andric PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr)>; 1066fe6060f1SDimitry Andric 1067fe6060f1SDimitry Andricdef Zn3WriteVecALUXMMX : SchedWriteRes<[Zn3FPVAdd01]> { 1068fe6060f1SDimitry Andric let Latency = 1; 10695f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 1070fe6060f1SDimitry Andric let NumMicroOps = 1; 1071fe6060f1SDimitry Andric} 1072fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVecALUXMMX], (instrs MMX_PABSBrr, MMX_PABSDrr, MMX_PABSWrr, 1073fe6060f1SDimitry Andric MMX_PSIGNBrr, MMX_PSIGNDrr, MMX_PSIGNWrr, 10740eae32dcSDimitry Andric MMX_PADDSBrr, MMX_PADDSWrr, MMX_PADDUSBrr, MMX_PADDUSWrr, 10750eae32dcSDimitry Andric MMX_PAVGBrr, MMX_PAVGWrr, 10760eae32dcSDimitry Andric MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr)>; 1077fe6060f1SDimitry Andric 1078fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteVecALUY, [Zn3FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (YMM). 1079fe6060f1SDimitry Andric 1080fe6060f1SDimitry Andricdef Zn3WriteVecALUYSlow : SchedWriteRes<[Zn3FPVAdd01]> { 1081fe6060f1SDimitry Andric let Latency = 1; 10825f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 1083fe6060f1SDimitry Andric let NumMicroOps = 1; 1084fe6060f1SDimitry Andric} 1085fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVecALUYSlow], (instrs VPABSBYrr, VPABSDYrr, VPABSWYrr, 1086fe6060f1SDimitry Andric VPADDSBYrr, VPADDSWYrr, VPADDUSBYrr, VPADDUSWYrr, 1087fe6060f1SDimitry Andric VPSUBSBYrr, VPSUBSWYrr, VPSUBUSBYrr, VPSUBUSWYrr, 1088fe6060f1SDimitry Andric VPAVGBYrr, VPAVGWYrr, 1089fe6060f1SDimitry Andric VPCMPEQQYrr, 1090fe6060f1SDimitry Andric VPSIGNBYrr, VPSIGNDYrr, VPSIGNWYrr)>; 1091fe6060f1SDimitry Andric 1092fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecALUZ>; // Vector integer ALU op, no logicals (ZMM). 1093fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVecLogic, [Zn3FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals. 1094fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVecLogicX, [Zn3FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals (XMM). 1095fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteVecLogicY, [Zn3FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals (YMM). 1096fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecLogicZ>; // Vector integer and/or/xor logicals (ZMM). 1097fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVecTest, [Zn3FPVAdd12, Zn3FPSt], 1, [1, 1], 2>; // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions. 1098fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteVecTestY, [Zn3FPVAdd12, Zn3FPSt], 1, [1, 1], 2>; // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions (YMM). 1099fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecTestZ>; // Vector integer TEST instructions (ZMM). 1100fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVecShift, [Zn3FPVShift01], 1, [1], 1>; // Vector integer shifts (default). 1101fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVecShiftX, [Zn3FPVShift01], 1, [1], 1>; // Vector integer shifts (XMM). 1102fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteVecShiftY, [Zn3FPVShift01], 1, [1], 1>; // Vector integer shifts (YMM). 1103fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecShiftZ>; // Vector integer shifts (ZMM). 1104fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVecShiftImm, [Zn3FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (default). 1105fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVecShiftImmX, [Zn3FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (XMM). 1106fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteVecShiftImmY, [Zn3FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (YMM). 1107fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; // Vector integer immediate shifts (ZMM). 1108fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVecIMul, [Zn3FPVMul01], 3, [1], 1>; // Vector integer multiply (default). 1109fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVecIMulX, [Zn3FPVMul01], 3, [1], 1>; // Vector integer multiply (XMM). 1110fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteVecIMulY, [Zn3FPVMul01], 3, [1], 1>; // Vector integer multiply (YMM). 1111fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecIMulZ>; // Vector integer multiply (ZMM). 1112fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WritePMULLD, [Zn3FPVMul01], 3, [1], 1>; // Vector PMULLD. 1113fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WritePMULLDY, [Zn3FPVMul01], 3, [1], 1>; // Vector PMULLD (YMM). 1114fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WritePMULLDZ>; // Vector PMULLD (ZMM). 1115fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteShuffle, [Zn3FPVShuf01], 1, [1], 1>; // Vector shuffles. 1116fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteShuffleX, [Zn3FPVShuf01], 1, [1], 1>; // Vector shuffles (XMM). 1117fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteShuffleY, [Zn3FPVShuf01], 1, [1], 1>; // Vector shuffles (YMM). 1118fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteShuffleZ>; // Vector shuffles (ZMM). 1119fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVarShuffle, [Zn3FPVShuf01], 1, [1], 1>; // Vector variable shuffles. 1120fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVarShuffleX, [Zn3FPVShuf01], 1, [1], 1>; // Vector variable shuffles (XMM). 1121fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteVarShuffleY, [Zn3FPVShuf01], 1, [1], 1>; // Vector variable shuffles (YMM). 1122fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarShuffleZ>; // Vector variable shuffles (ZMM). 1123fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteBlend, [Zn3FPVMisc0123], 1, [1], 1>; // Vector blends. 1124fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteBlendY, [Zn3FPVMisc0123], 1, [1], 1>; // Vector blends (YMM). 1125fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteBlendZ>; // Vector blends (ZMM). 1126fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVarBlend, [Zn3FPVMul01], 1, [1], 1>; // Vector variable blends. 1127fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteVarBlendY, [Zn3FPVMul01], 1, [1], 1>; // Vector variable blends (YMM). 1128fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarBlendZ>; // Vector variable blends (ZMM). 1129fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WritePSADBW, [Zn3FPVAdd0123], 3, [2], 1>; // Vector PSADBW. 1130fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WritePSADBWX, [Zn3FPVAdd0123], 3, [2], 1>; // Vector PSADBW (XMM). 1131fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WritePSADBWY, [Zn3FPVAdd0123], 3, [2], 1>; // Vector PSADBW (YMM). 1132fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WritePSADBWZ>; // Vector PSADBW (ZMM). 1133fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteMPSAD, [Zn3FPVAdd0123], 4, [8], 4, /*LoadUOps=*/2>; // Vector MPSAD. 1134fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteMPSADY, [Zn3FPVAdd0123], 4, [8], 3, /*LoadUOps=*/1>; // Vector MPSAD (YMM). 1135fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteMPSADZ>; // Vector MPSAD (ZMM). 1136fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WritePHMINPOS, [Zn3FPVAdd01], 3, [1], 1>; // Vector PHMINPOS. 1137fe6060f1SDimitry Andric 1138fe6060f1SDimitry Andric// Vector insert/extract operations. 1139fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVecInsert, [Zn3FPLd01], 1, [2], 2, /*LoadUOps=*/-1>; // Insert gpr to vector element. 1140fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecExtract, [Zn3FPLd01], 1, [2], 2>; // Extract vector element to gpr. 1141fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecExtractSt, [Zn3FPSt, Zn3Store], !add(1, Znver3Model.StoreLatency), [1, 1], 2>; // Extract vector element and store. 1142fe6060f1SDimitry Andric 1143fe6060f1SDimitry Andric// MOVMSK operations. 1144fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteFMOVMSK, [Zn3FPVMisc2], 1, [1], 1>; 1145fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecMOVMSK, [Zn3FPVMisc2], 1, [1], 1>; 1146fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteVecMOVMSKY, [Zn3FPVMisc2], 1, [1], 1>; 1147fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteMMXMOVMSK, [Zn3FPVMisc2], 1, [1], 1>; 1148fe6060f1SDimitry Andric 1149fe6060f1SDimitry Andric// Conversion between integer and float. 1150fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCvtSD2I, [Zn3FPFCvt01], 2, [2], 2>; // Double -> Integer. 1151fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCvtPD2I, [Zn3FPFCvt01], 3, [1], 1>; // Double -> Integer (XMM). 1152fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteCvtPD2IY, [Zn3FPFCvt01], 6, [2], 2>; // Double -> Integer (YMM). 1153fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; // Double -> Integer (ZMM). 1154fe6060f1SDimitry Andric 1155fe6060f1SDimitry Andricdef Zn3WriteCvtPD2IMMX : SchedWriteRes<[Zn3FPFCvt01]> { 1156fe6060f1SDimitry Andric let Latency = 1; 11575f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 1158fe6060f1SDimitry Andric let NumMicroOps = 2; 1159fe6060f1SDimitry Andric} 11600eae32dcSDimitry Andricdef : InstRW<[Zn3WriteCvtPD2IMMX], (instrs MMX_CVTPD2PIrm, MMX_CVTTPD2PIrm, MMX_CVTPD2PIrr, MMX_CVTTPD2PIrr)>; 1161fe6060f1SDimitry Andric 1162fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCvtSS2I, [Zn3FPFCvt01], 2, [2], 2>; // Float -> Integer. 1163fe6060f1SDimitry Andric 1164fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCvtPS2I, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Integer (XMM). 1165fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteCvtPS2IY, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Integer (YMM). 1166fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; // Float -> Integer (ZMM). 1167fe6060f1SDimitry Andric 1168fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCvtI2SD, [Zn3FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>; // Integer -> Double. 1169fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCvtI2PD, [Zn3FPFCvt01], 3, [1], 1>; // Integer -> Double (XMM). 1170fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteCvtI2PDY, [Zn3FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Integer -> Double (YMM). 1171fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; // Integer -> Double (ZMM). 1172fe6060f1SDimitry Andric 1173fe6060f1SDimitry Andricdef Zn3WriteCvtI2PDMMX : SchedWriteRes<[Zn3FPFCvt01]> { 1174fe6060f1SDimitry Andric let Latency = 2; 11755f757f3fSDimitry Andric let ReleaseAtCycles = [6]; 1176fe6060f1SDimitry Andric let NumMicroOps = 2; 1177fe6060f1SDimitry Andric} 11780eae32dcSDimitry Andricdef : InstRW<[Zn3WriteCvtI2PDMMX], (instrs MMX_CVTPI2PDrm, MMX_CVTPI2PDrr)>; 1179fe6060f1SDimitry Andric 1180fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCvtI2SS, [Zn3FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>; // Integer -> Float. 1181fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCvtI2PS, [Zn3FPFCvt01], 3, [1], 1>; // Integer -> Float (XMM). 1182fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteCvtI2PSY, [Zn3FPFCvt01], 3, [1], 1>; // Integer -> Float (YMM). 1183fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; // Integer -> Float (ZMM). 1184fe6060f1SDimitry Andric 1185fe6060f1SDimitry Andricdef Zn3WriteCvtI2PSMMX : SchedWriteRes<[Zn3FPFCvt01]> { 1186fe6060f1SDimitry Andric let Latency = 3; 11875f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 1188fe6060f1SDimitry Andric let NumMicroOps = 2; 1189fe6060f1SDimitry Andric} 11900eae32dcSDimitry Andricdef : InstRW<[Zn3WriteCvtI2PSMMX], (instrs MMX_CVTPI2PSrr)>; 1191fe6060f1SDimitry Andric 1192fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCvtSS2SD, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Double size conversion. 1193fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCvtPS2PD, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Double size conversion (XMM). 1194fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteCvtPS2PDY, [Zn3FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Float -> Double size conversion (YMM). 1195fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; // Float -> Double size conversion (ZMM). 1196fe6060f1SDimitry Andric 1197fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCvtSD2SS, [Zn3FPFCvt01], 3, [1], 1>; // Double -> Float size conversion. 1198fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCvtPD2PS, [Zn3FPFCvt01], 3, [1], 1>; // Double -> Float size conversion (XMM). 1199fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteCvtPD2PSY, [Zn3FPFCvt01], 6, [2], 2>; // Double -> Float size conversion (YMM). 1200fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; // Double -> Float size conversion (ZMM). 1201fe6060f1SDimitry Andric 1202fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCvtPH2PS, [Zn3FPFCvt01], 3, [1], 1>; // Half -> Float size conversion. 1203fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteCvtPH2PSY, [Zn3FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Half -> Float size conversion (YMM). 1204fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>; // Half -> Float size conversion (ZMM). 1205fe6060f1SDimitry Andric 1206fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteCvtPS2PH, [Zn3FPFCvt01], 3, [2], 1>; // Float -> Half size conversion. 1207fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteCvtPS2PHY, [Zn3FPFCvt01], 6, [2], 2>; // Float -> Half size conversion (YMM). 1208fe6060f1SDimitry Andricdefm : X86WriteResUnsupported<WriteCvtPS2PHZ>; // Float -> Half size conversion (ZMM). 1209fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteCvtPS2PHSt, [Zn3FPFCvt01, Zn3FPSt, Zn3Store], !add(3, Znver3Model.StoreLatency), [1, 1, 1], 2>; // Float -> Half + store size conversion. 1210fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteCvtPS2PHYSt, [Zn3FPFCvt01, Zn3FPSt, Zn3Store], !add(6, Znver3Model.StoreLatency), [2, 1, 1], 3>; // Float -> Half + store size conversion (YMM). 1211fe6060f1SDimitry Andricdefm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; // Float -> Half + store size conversion (ZMM). 1212fe6060f1SDimitry Andric 1213fe6060f1SDimitry Andric// CRC32 instruction. 1214fe6060f1SDimitry Andricdefm : Zn3WriteResIntPair<WriteCRC32, [Zn3ALU1], 3, [1], 1>; 1215fe6060f1SDimitry Andric 1216fe6060f1SDimitry Andricdef Zn3WriteSHA1MSG1rr : SchedWriteRes<[Zn3FPU0123]> { 1217fe6060f1SDimitry Andric let Latency = 2; 12185f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 1219fe6060f1SDimitry Andric let NumMicroOps = 2; 1220fe6060f1SDimitry Andric} 1221fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>; 1222fe6060f1SDimitry Andric 1223fe6060f1SDimitry Andricdef Zn3WriteSHA1MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { 1224fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG1rr.Latency); 12255f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 1226fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteSHA1MSG1rr.NumMicroOps, 0); 1227fe6060f1SDimitry Andric} 1228fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteSHA1MSG1rm], (instrs SHA1MSG1rm)>; 1229fe6060f1SDimitry Andric 1230fe6060f1SDimitry Andricdef Zn3WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn3FPU0123]> { 1231fe6060f1SDimitry Andric let Latency = 1; 12325f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 1233fe6060f1SDimitry Andric let NumMicroOps = 1; 1234fe6060f1SDimitry Andric} 1235fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>; 1236fe6060f1SDimitry Andric 1237fe6060f1SDimitry Andricdef Zn3Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { 1238fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG2rr_SHA1NEXTErr.Latency); 12395f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 1240fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0); 1241fe6060f1SDimitry Andric} 1242fe6060f1SDimitry Andricdef : InstRW<[Zn3Writerm_SHA1MSG2rm_SHA1NEXTErm], (instrs SHA1MSG2rm, SHA1NEXTErm)>; 1243fe6060f1SDimitry Andric 1244fe6060f1SDimitry Andricdef Zn3WriteSHA256MSG1rr : SchedWriteRes<[Zn3FPU0123]> { 1245fe6060f1SDimitry Andric let Latency = 2; 12465f757f3fSDimitry Andric let ReleaseAtCycles = [3]; 1247fe6060f1SDimitry Andric let NumMicroOps = 2; 1248fe6060f1SDimitry Andric} 1249fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>; 1250fe6060f1SDimitry Andric 1251fe6060f1SDimitry Andricdef Zn3Writerm_SHA256MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { 1252fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG1rr.Latency); 12535f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 3]; 1254fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteSHA256MSG1rr.NumMicroOps, 0); 1255fe6060f1SDimitry Andric} 1256fe6060f1SDimitry Andricdef : InstRW<[Zn3Writerm_SHA256MSG1rm], (instrs SHA256MSG1rm)>; 1257fe6060f1SDimitry Andric 1258fe6060f1SDimitry Andricdef Zn3WriteSHA256MSG2rr : SchedWriteRes<[Zn3FPU0123]> { 1259fe6060f1SDimitry Andric let Latency = 3; 12605f757f3fSDimitry Andric let ReleaseAtCycles = [8]; 1261fe6060f1SDimitry Andric let NumMicroOps = 4; 1262fe6060f1SDimitry Andric} 1263fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>; 1264fe6060f1SDimitry Andric 1265fe6060f1SDimitry Andricdef Zn3WriteSHA256MSG2rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { 1266fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG2rr.Latency); 12675f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 8]; 1268fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteSHA256MSG2rr.NumMicroOps, 1); 1269fe6060f1SDimitry Andric} 1270fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteSHA256MSG2rm], (instrs SHA256MSG2rm)>; 1271fe6060f1SDimitry Andric 1272fe6060f1SDimitry Andricdef Zn3WriteSHA1RNDS4rri : SchedWriteRes<[Zn3FPU0123]> { 1273fe6060f1SDimitry Andric let Latency = 6; 12745f757f3fSDimitry Andric let ReleaseAtCycles = [8]; 1275fe6060f1SDimitry Andric let NumMicroOps = 1; 1276fe6060f1SDimitry Andric} 1277fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteSHA1RNDS4rri], (instrs SHA1RNDS4rri)>; 1278fe6060f1SDimitry Andric 1279fe6060f1SDimitry Andricdef Zn3WriteSHA256RNDS2rr : SchedWriteRes<[Zn3FPU0123]> { 1280fe6060f1SDimitry Andric let Latency = 4; 12815f757f3fSDimitry Andric let ReleaseAtCycles = [8]; 1282fe6060f1SDimitry Andric let NumMicroOps = 1; 1283fe6060f1SDimitry Andric} 1284fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteSHA256RNDS2rr], (instrs SHA256RNDS2rr)>; 1285fe6060f1SDimitry Andric 1286fe6060f1SDimitry Andric// Strings instructions. 1287fe6060f1SDimitry Andric// Packed Compare Implicit Length Strings, Return Mask 1288fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WritePCmpIStrM, [Zn3FPVAdd0123], 6, [8], 3, /*LoadUOps=*/1>; 1289fe6060f1SDimitry Andric// Packed Compare Explicit Length Strings, Return Mask 1290fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WritePCmpEStrM, [Zn3FPVAdd0123], 6, [12], 7, /*LoadUOps=*/5>; 1291fe6060f1SDimitry Andric// Packed Compare Implicit Length Strings, Return Index 1292fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WritePCmpIStrI, [Zn3FPVAdd0123], 2, [8], 4>; 1293fe6060f1SDimitry Andric// Packed Compare Explicit Length Strings, Return Index 1294fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WritePCmpEStrI, [Zn3FPVAdd0123], 6, [12], 8, /*LoadUOps=*/4>; 1295fe6060f1SDimitry Andric 1296fe6060f1SDimitry Andric// AES instructions. 1297fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteAESDecEnc, [Zn3FPAES01], 4, [1], 1>; // Decryption, encryption. 1298fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteAESIMC, [Zn3FPAES01], 4, [1], 1>; // InvMixColumn. 1299fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteAESKeyGen, [Zn3FPAES01], 4, [1], 1>; // Key Generation. 1300fe6060f1SDimitry Andric 1301fe6060f1SDimitry Andric// Carry-less multiplication instructions. 1302fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteCLMul, [Zn3FPCLM01], 4, [4], 4>; 1303fe6060f1SDimitry Andric 1304fe6060f1SDimitry Andric// EMMS/FEMMS 1305fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteEMMS, [Zn3ALU0123], 2, [1], 1>; // FIXME: latency not from llvm-exegesis 1306fe6060f1SDimitry Andric 1307fe6060f1SDimitry Andric// Load/store MXCSR 1308fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteLDMXCSR, [Zn3AGU012, Zn3Load, Zn3ALU0123], !add(Znver3Model.LoadLatency, 1), [1, 1, 6], 1>; // FIXME: latency not from llvm-exegesis 1309fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteSTMXCSR, [Zn3ALU0123, Zn3AGU012, Zn3Store], !add(1, Znver3Model.StoreLatency), [60, 1, 1], 2>; // FIXME: latency not from llvm-exegesis 1310fe6060f1SDimitry Andric 1311fe6060f1SDimitry Andric// Catch-all for expensive system instructions. 1312fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteSystem, [Zn3ALU0123], 100, [100], 100>; 1313fe6060f1SDimitry Andric 1314fe6060f1SDimitry Andricdef Zn3WriteVZEROUPPER : SchedWriteRes<[Zn3FPU0123]> { 1315fe6060f1SDimitry Andric let Latency = 0; // FIXME: not from llvm-exegesis 13165f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 1317fe6060f1SDimitry Andric let NumMicroOps = 1; 1318fe6060f1SDimitry Andric} 1319fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVZEROUPPER], (instrs VZEROUPPER)>; 1320fe6060f1SDimitry Andric 1321fe6060f1SDimitry Andricdef Zn3WriteVZEROALL : SchedWriteRes<[Zn3FPU0123]> { 1322fe6060f1SDimitry Andric let Latency = 10; // FIXME: not from llvm-exegesis 13235f757f3fSDimitry Andric let ReleaseAtCycles = [24]; 1324fe6060f1SDimitry Andric let NumMicroOps = 18; 1325fe6060f1SDimitry Andric} 1326fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVZEROALL], (instrs VZEROALL)>; 1327fe6060f1SDimitry Andric 1328fe6060f1SDimitry Andric// AVX2. 1329fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFShuffle256, [Zn3FPVShuf], 2, [1], 1, /*LoadUOps=*/2>; // Fp 256-bit width vector shuffles. 1330fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteFVarShuffle256, [Zn3FPVShuf], 7, [1], 2, /*LoadUOps=*/1>; // Fp 256-bit width variable shuffles. 1331fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteShuffle256, [Zn3FPVShuf], 2, [1], 1>; // 256-bit width vector shuffles. 1332fe6060f1SDimitry Andric 1333fe6060f1SDimitry Andricdef Zn3WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn3FPVShuf]> { 1334fe6060f1SDimitry Andric let Latency = 3; 13355f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 1336fe6060f1SDimitry Andric let NumMicroOps = 1; 1337fe6060f1SDimitry Andric} 1338fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rr, VPERM2F128rr)>; 1339fe6060f1SDimitry Andric 1340fe6060f1SDimitry Andricdef Zn3WriteVPERM2F128rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> { 1341fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERM2I128rr_VPERM2F128rr.Latency); 13425f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 1]; 1343fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0); 1344fe6060f1SDimitry Andric} 1345fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVPERM2F128rm], (instrs VPERM2F128rm)>; 1346fe6060f1SDimitry Andric 1347fe6060f1SDimitry Andricdef Zn3WriteVPERMPSYrm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> { 1348bdd1243dSDimitry Andric let Latency = !add(Znver3Model.LoadLatency, 7); 13495f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 1350bdd1243dSDimitry Andric let NumMicroOps = 3; 1351fe6060f1SDimitry Andric} 1352fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVPERMPSYrm], (instrs VPERMPSYrm)>; 1353fe6060f1SDimitry Andric 1354fe6060f1SDimitry Andricdef Zn3WriteVPERMYri : SchedWriteRes<[Zn3FPVShuf]> { 1355fe6060f1SDimitry Andric let Latency = 6; 13565f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 1357fe6060f1SDimitry Andric let NumMicroOps = 2; 1358fe6060f1SDimitry Andric} 1359fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>; 1360fe6060f1SDimitry Andric 1361fe6060f1SDimitry Andricdef Zn3WriteVPERMPDYmi : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> { 1362fe6060f1SDimitry Andric let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERMYri.Latency); 13635f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 1364fe6060f1SDimitry Andric let NumMicroOps = !add(Zn3WriteVPERMYri.NumMicroOps, 1); 1365fe6060f1SDimitry Andric} 1366fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVPERMPDYmi], (instrs VPERMPDYmi)>; 1367fe6060f1SDimitry Andric 1368bdd1243dSDimitry Andricdef Zn3WriteVPERMDYm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> { 1369bdd1243dSDimitry Andric let Latency = !add(Znver3Model.LoadLatency, 5); 13705f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 1371fe6060f1SDimitry Andric let NumMicroOps = 2; 1372fe6060f1SDimitry Andric} 1373bdd1243dSDimitry Andricdef : InstRW<[Zn3WriteVPERMDYm], (instrs VPERMQYmi, VPERMDYrm)>; 1374fe6060f1SDimitry Andric 1375fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteVPMOV256, [Zn3FPVShuf01], 4, [3], 2, /*LoadUOps=*/-1>; // 256-bit width packed vector width-changing move. 1376bdd1243dSDimitry Andricdefm : Zn3WriteResYMMPair<WriteVarShuffle256, [Zn3FPVShuf], 5, [1], 2, /*LoadUOps=*/1>; // 256-bit width vector variable shuffles. 1377fe6060f1SDimitry Andricdefm : Zn3WriteResXMMPair<WriteVarVecShift, [Zn3FPVShift01], 1, [1], 1>; // Variable vector shifts. 1378fe6060f1SDimitry Andricdefm : Zn3WriteResYMMPair<WriteVarVecShiftY, [Zn3FPVShift01], 1, [1], 1>; // Variable vector shifts (YMM). 1379fe6060f1SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; // Variable vector shifts (ZMM). 1380fe6060f1SDimitry Andric 1381fe6060f1SDimitry Andric// Old microcoded instructions that nobody use. 1382fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteMicrocoded, [Zn3ALU0123], 100, [100], 100>; 1383fe6060f1SDimitry Andric 1384fe6060f1SDimitry Andric// Fence instructions. 1385fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteFence, [Zn3ALU0123], 1, [100], 1>; 1386fe6060f1SDimitry Andric 1387fe6060f1SDimitry Andricdef Zn3WriteLFENCE : SchedWriteRes<[Zn3LSU]> { 1388fe6060f1SDimitry Andric let Latency = 1; 13895f757f3fSDimitry Andric let ReleaseAtCycles = [30]; 1390fe6060f1SDimitry Andric let NumMicroOps = 1; 1391fe6060f1SDimitry Andric} 1392fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteLFENCE], (instrs LFENCE)>; 1393fe6060f1SDimitry Andric 1394fe6060f1SDimitry Andricdef Zn3WriteSFENCE : SchedWriteRes<[Zn3LSU]> { 1395fe6060f1SDimitry Andric let Latency = 1; 13965f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 1397fe6060f1SDimitry Andric let NumMicroOps = 1; 1398fe6060f1SDimitry Andric} 1399fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteSFENCE], (instrs SFENCE)>; 1400fe6060f1SDimitry Andric 1401fe6060f1SDimitry Andric// Nop, not very useful expect it provides a model for nops! 1402fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteNop, [Zn3ALU0123], 0, [1], 1>; // FIXME: latency not from llvm-exegesis 1403fe6060f1SDimitry Andric 1404fe6060f1SDimitry Andric 1405fe6060f1SDimitry Andric/////////////////////////////////////////////////////////////////////////////// 1406fe6060f1SDimitry Andric// Zero Cycle Move 1407fe6060f1SDimitry Andric/////////////////////////////////////////////////////////////////////////////// 1408fe6060f1SDimitry Andric 1409fe6060f1SDimitry Andricdef Zn3WriteZeroLatency : SchedWriteRes<[]> { 1410fe6060f1SDimitry Andric let Latency = 0; 14115f757f3fSDimitry Andric let ReleaseAtCycles = []; 1412fe6060f1SDimitry Andric let NumMicroOps = 1; 1413fe6060f1SDimitry Andric} 1414fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteZeroLatency], (instrs MOV32rr, MOV32rr_REV, 1415fe6060f1SDimitry Andric MOV64rr, MOV64rr_REV, 1416fe6060f1SDimitry Andric MOVSX32rr32)>; 1417fe6060f1SDimitry Andric 1418fe6060f1SDimitry Andricdef Zn3WriteSwapRenameable : SchedWriteRes<[]> { 1419fe6060f1SDimitry Andric let Latency = 0; 14205f757f3fSDimitry Andric let ReleaseAtCycles = []; 1421fe6060f1SDimitry Andric let NumMicroOps = 2; 1422fe6060f1SDimitry Andric} 1423fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteSwapRenameable], (instrs XCHG32rr, XCHG32ar, 1424fe6060f1SDimitry Andric XCHG64rr, XCHG64ar)>; 1425fe6060f1SDimitry Andric 1426fe6060f1SDimitry Andricdefm : Zn3WriteResInt<WriteXCHG, [Zn3ALU0123], 0, [8], 2>; // Compare+Exchange - TODO RMW support. 1427fe6060f1SDimitry Andric 1428fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteFMove, [Zn3FPVMisc0123], 1, [1], 1>; // Empty sched class 1429fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteFMoveX, [], 0, [], 1>; 1430fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteFMoveY, [], 0, [], 1>; 143104eeddc0SDimitry Andricdefm : X86WriteResUnsupported<WriteFMoveZ>; 1432fe6060f1SDimitry Andric 1433fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecMove, [Zn3FPFMisc0123], 1, [1], 1>; // MMX 1434fe6060f1SDimitry Andricdefm : Zn3WriteResXMM<WriteVecMoveX, [], 0, [], 1>; 1435fe6060f1SDimitry Andricdefm : Zn3WriteResYMM<WriteVecMoveY, [], 0, [], 1>; 143604eeddc0SDimitry Andricdefm : X86WriteResUnsupported<WriteVecMoveZ>; 1437fe6060f1SDimitry Andric 1438fe6060f1SDimitry Andricdef : IsOptimizableRegisterMove<[ 1439fe6060f1SDimitry Andric InstructionEquivalenceClass<[ 1440fe6060f1SDimitry Andric // GPR variants. 1441fe6060f1SDimitry Andric MOV32rr, MOV32rr_REV, 1442fe6060f1SDimitry Andric MOV64rr, MOV64rr_REV, 1443fe6060f1SDimitry Andric MOVSX32rr32, 1444fe6060f1SDimitry Andric XCHG32rr, XCHG32ar, 1445fe6060f1SDimitry Andric XCHG64rr, XCHG64ar, 1446fe6060f1SDimitry Andric 1447fe6060f1SDimitry Andric // MMX variants. 1448fe6060f1SDimitry Andric // MMX moves are *NOT* eliminated. 1449fe6060f1SDimitry Andric 1450fe6060f1SDimitry Andric // SSE variants. 1451fe6060f1SDimitry Andric MOVAPSrr, MOVAPSrr_REV, 1452fe6060f1SDimitry Andric MOVUPSrr, MOVUPSrr_REV, 1453fe6060f1SDimitry Andric MOVAPDrr, MOVAPDrr_REV, 1454fe6060f1SDimitry Andric MOVUPDrr, MOVUPDrr_REV, 1455fe6060f1SDimitry Andric MOVDQArr, MOVDQArr_REV, 1456fe6060f1SDimitry Andric MOVDQUrr, MOVDQUrr_REV, 1457fe6060f1SDimitry Andric 1458fe6060f1SDimitry Andric // AVX variants. 1459fe6060f1SDimitry Andric VMOVAPSrr, VMOVAPSrr_REV, 1460fe6060f1SDimitry Andric VMOVUPSrr, VMOVUPSrr_REV, 1461fe6060f1SDimitry Andric VMOVAPDrr, VMOVAPDrr_REV, 1462fe6060f1SDimitry Andric VMOVUPDrr, VMOVUPDrr_REV, 1463fe6060f1SDimitry Andric VMOVDQArr, VMOVDQArr_REV, 1464fe6060f1SDimitry Andric VMOVDQUrr, VMOVDQUrr_REV, 1465fe6060f1SDimitry Andric 1466fe6060f1SDimitry Andric // AVX YMM variants. 1467fe6060f1SDimitry Andric VMOVAPSYrr, VMOVAPSYrr_REV, 1468fe6060f1SDimitry Andric VMOVUPSYrr, VMOVUPSYrr_REV, 1469fe6060f1SDimitry Andric VMOVAPDYrr, VMOVAPDYrr_REV, 1470fe6060f1SDimitry Andric VMOVUPDYrr, VMOVUPDYrr_REV, 1471fe6060f1SDimitry Andric VMOVDQAYrr, VMOVDQAYrr_REV, 1472fe6060f1SDimitry Andric VMOVDQUYrr, VMOVDQUYrr_REV, 1473fe6060f1SDimitry Andric ], TruePred > 1474fe6060f1SDimitry Andric]>; 1475fe6060f1SDimitry Andric 1476fe6060f1SDimitry Andric/////////////////////////////////////////////////////////////////////////////// 1477fe6060f1SDimitry Andric// Dependency breaking instructions. 1478fe6060f1SDimitry Andric/////////////////////////////////////////////////////////////////////////////// 1479fe6060f1SDimitry Andric 1480fe6060f1SDimitry Andricdef Zn3WriteZeroIdiom : SchedWriteVariant<[ 1481fe6060f1SDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 1482fe6060f1SDimitry Andric SchedVar<NoSchedPred, [WriteALU]> 1483fe6060f1SDimitry Andric]>; 1484fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteZeroIdiom], (instrs XOR32rr, XOR32rr_REV, 1485fe6060f1SDimitry Andric XOR64rr, XOR64rr_REV, 1486fe6060f1SDimitry Andric SUB32rr, SUB32rr_REV, 1487fe6060f1SDimitry Andric SUB64rr, SUB64rr_REV)>; 1488fe6060f1SDimitry Andric 1489fe6060f1SDimitry Andricdef Zn3WriteZeroIdiomEFLAGS : SchedWriteVariant<[ 1490fe6060f1SDimitry Andric SchedVar<MCSchedPredicate<CheckSameRegOperand<0, 1>>, [Zn3WriteZeroLatency]>, 1491fe6060f1SDimitry Andric SchedVar<NoSchedPred, [WriteALU]> 1492fe6060f1SDimitry Andric]>; 1493fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteZeroIdiomEFLAGS], (instrs CMP8rr, CMP8rr_REV, 1494fe6060f1SDimitry Andric CMP16rr, CMP16rr_REV, 1495fe6060f1SDimitry Andric CMP32rr, CMP32rr_REV, 1496fe6060f1SDimitry Andric CMP64rr, CMP64rr_REV)>; 1497fe6060f1SDimitry Andric 1498fe6060f1SDimitry Andricdef Zn3WriteFZeroIdiom : SchedWriteVariant<[ 1499fe6060f1SDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 1500fe6060f1SDimitry Andric SchedVar<NoSchedPred, [WriteFLogic]> 1501fe6060f1SDimitry Andric]>; 1502fe6060f1SDimitry Andric// NOTE: XORPSrr, XORPDrr are not zero-cycle! 1503fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteFZeroIdiom], (instrs VXORPSrr, VXORPDrr, 1504fe6060f1SDimitry Andric VANDNPSrr, VANDNPDrr)>; 1505fe6060f1SDimitry Andric 1506fe6060f1SDimitry Andricdef Zn3WriteFZeroIdiomY : SchedWriteVariant<[ 1507fe6060f1SDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 1508fe6060f1SDimitry Andric SchedVar<NoSchedPred, [WriteFLogicY]> 1509fe6060f1SDimitry Andric]>; 1510fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr, 1511fe6060f1SDimitry Andric VANDNPSYrr, VANDNPDYrr)>; 1512fe6060f1SDimitry Andric 1513fe6060f1SDimitry Andricdef Zn3WriteVZeroIdiomLogicX : SchedWriteVariant<[ 1514fe6060f1SDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 1515fe6060f1SDimitry Andric SchedVar<NoSchedPred, [WriteVecLogicX]> 1516fe6060f1SDimitry Andric]>; 1517fe6060f1SDimitry Andric// NOTE: PXORrr,PANDNrr are not zero-cycle! 1518fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVZeroIdiomLogicX], (instrs VPXORrr, VPANDNrr)>; 1519fe6060f1SDimitry Andric 1520fe6060f1SDimitry Andricdef Zn3WriteVZeroIdiomLogicY : SchedWriteVariant<[ 1521fe6060f1SDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 1522fe6060f1SDimitry Andric SchedVar<NoSchedPred, [WriteVecLogicY]> 1523fe6060f1SDimitry Andric]>; 1524fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVZeroIdiomLogicY], (instrs VPXORYrr, VPANDNYrr)>; 1525fe6060f1SDimitry Andric 1526fe6060f1SDimitry Andricdef Zn3WriteVZeroIdiomALUX : SchedWriteVariant<[ 1527fe6060f1SDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 1528fe6060f1SDimitry Andric SchedVar<NoSchedPred, [WriteVecALUX]> 1529fe6060f1SDimitry Andric]>; 1530fe6060f1SDimitry Andric// NOTE: PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 1531fe6060f1SDimitry Andric// PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr are not zero-cycle! 1532fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVZeroIdiomALUX], 1533fe6060f1SDimitry Andric (instrs VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 1534fe6060f1SDimitry Andric VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr)>; 1535fe6060f1SDimitry Andric 1536fe6060f1SDimitry Andricdef Zn3WriteVZeroIdiomALUY : SchedWriteVariant<[ 1537fe6060f1SDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 1538fe6060f1SDimitry Andric SchedVar<NoSchedPred, [WriteVecALUY]> 1539fe6060f1SDimitry Andric]>; 1540fe6060f1SDimitry Andricdef : InstRW<[Zn3WriteVZeroIdiomALUY], 1541fe6060f1SDimitry Andric (instrs VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr, 1542fe6060f1SDimitry Andric VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr)>; 1543fe6060f1SDimitry Andric 1544fe6060f1SDimitry Andricdef : IsZeroIdiomFunction<[ 1545fe6060f1SDimitry Andric // GPR Zero-idioms. 1546fe6060f1SDimitry Andric DepBreakingClass<[ XOR32rr, XOR32rr_REV, 1547fe6060f1SDimitry Andric XOR64rr, XOR64rr_REV, 1548fe6060f1SDimitry Andric SUB32rr, SUB32rr_REV, 1549fe6060f1SDimitry Andric SUB64rr, SUB64rr_REV ], ZeroIdiomPredicate>, 1550fe6060f1SDimitry Andric 1551fe6060f1SDimitry Andric // SSE XMM Zero-idioms. 1552fe6060f1SDimitry Andric DepBreakingClass<[ 1553fe6060f1SDimitry Andric // fp variants. 1554fe6060f1SDimitry Andric XORPSrr, XORPDrr, 1555fe6060f1SDimitry Andric ANDNPSrr, ANDNPDrr, 1556fe6060f1SDimitry Andric 1557fe6060f1SDimitry Andric // int variants. 1558fe6060f1SDimitry Andric PXORrr, 1559fe6060f1SDimitry Andric PANDNrr, 1560fe6060f1SDimitry Andric PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 1561fe6060f1SDimitry Andric PSUBSBrr, PSUBSWrr, 1562fe6060f1SDimitry Andric PSUBUSBrr, PSUBUSWrr, 1563fe6060f1SDimitry Andric PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr 1564fe6060f1SDimitry Andric ], ZeroIdiomPredicate>, 1565fe6060f1SDimitry Andric 1566fe6060f1SDimitry Andric // AVX XMM Zero-idioms. 1567fe6060f1SDimitry Andric DepBreakingClass<[ 1568fe6060f1SDimitry Andric // fp variants. 1569fe6060f1SDimitry Andric VXORPSrr, VXORPDrr, 1570fe6060f1SDimitry Andric VANDNPSrr, VANDNPDrr, 1571fe6060f1SDimitry Andric 1572fe6060f1SDimitry Andric // int variants. 1573fe6060f1SDimitry Andric VPXORrr, 1574fe6060f1SDimitry Andric VPANDNrr, 1575fe6060f1SDimitry Andric VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 1576fe6060f1SDimitry Andric VPSUBSBrr, VPSUBSWrr, 1577fe6060f1SDimitry Andric VPSUBUSBrr, VPSUBUSWrr, 1578fe6060f1SDimitry Andric VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 1579fe6060f1SDimitry Andric ], ZeroIdiomPredicate>, 1580fe6060f1SDimitry Andric 1581fe6060f1SDimitry Andric // AVX YMM Zero-idioms. 1582fe6060f1SDimitry Andric DepBreakingClass<[ 1583fe6060f1SDimitry Andric // fp variants. 1584fe6060f1SDimitry Andric VXORPSYrr, VXORPDYrr, 1585fe6060f1SDimitry Andric VANDNPSYrr, VANDNPDYrr, 1586fe6060f1SDimitry Andric 1587fe6060f1SDimitry Andric // int variants. 1588fe6060f1SDimitry Andric VPXORYrr, 1589fe6060f1SDimitry Andric VPANDNYrr, 1590fe6060f1SDimitry Andric VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr, 1591fe6060f1SDimitry Andric VPSUBSBYrr, VPSUBSWYrr, 1592fe6060f1SDimitry Andric VPSUBUSBYrr, VPSUBUSWYrr, 1593fe6060f1SDimitry Andric VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr 1594fe6060f1SDimitry Andric ], ZeroIdiomPredicate>, 1595fe6060f1SDimitry Andric]>; 1596fe6060f1SDimitry Andric 1597fe6060f1SDimitry Andricdef : IsDepBreakingFunction<[ 1598fe6060f1SDimitry Andric // GPR 1599fe6060f1SDimitry Andric DepBreakingClass<[ SBB32rr, SBB32rr_REV, 1600fe6060f1SDimitry Andric SBB64rr, SBB64rr_REV ], ZeroIdiomPredicate>, 1601fe6060f1SDimitry Andric DepBreakingClass<[ CMP8rr, CMP8rr_REV, 1602fe6060f1SDimitry Andric CMP16rr, CMP16rr_REV, 1603fe6060f1SDimitry Andric CMP32rr, CMP32rr_REV, 1604fe6060f1SDimitry Andric CMP64rr, CMP64rr_REV ], CheckSameRegOperand<0, 1> >, 1605fe6060f1SDimitry Andric 1606fe6060f1SDimitry Andric // MMX 1607fe6060f1SDimitry Andric DepBreakingClass<[ 16080eae32dcSDimitry Andric MMX_PCMPEQBrr, MMX_PCMPEQWrr, MMX_PCMPEQDrr 1609fe6060f1SDimitry Andric ], ZeroIdiomPredicate>, 1610fe6060f1SDimitry Andric 1611fe6060f1SDimitry Andric // SSE 1612fe6060f1SDimitry Andric DepBreakingClass<[ 1613fe6060f1SDimitry Andric PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr 1614fe6060f1SDimitry Andric ], ZeroIdiomPredicate>, 1615fe6060f1SDimitry Andric 1616fe6060f1SDimitry Andric // AVX XMM 1617fe6060f1SDimitry Andric DepBreakingClass<[ 1618fe6060f1SDimitry Andric VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr 1619fe6060f1SDimitry Andric ], ZeroIdiomPredicate>, 1620fe6060f1SDimitry Andric 1621fe6060f1SDimitry Andric // AVX YMM 1622fe6060f1SDimitry Andric DepBreakingClass<[ 1623fe6060f1SDimitry Andric VPCMPEQBYrr, VPCMPEQWYrr, VPCMPEQDYrr, VPCMPEQQYrr 1624fe6060f1SDimitry Andric ], ZeroIdiomPredicate>, 1625fe6060f1SDimitry Andric]>; 1626fe6060f1SDimitry Andric 1627fe6060f1SDimitry Andric} // SchedModel 1628