1 //===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the implementation of the classes providing information
10 // about existing X86 FMA3 opcodes, classifying and grouping them.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "X86InstrFMA3Info.h"
15 #include "X86InstrInfo.h"
16 #include "llvm/Support/Threading.h"
17 #include <atomic>
18 #include <cassert>
19 #include <cstdint>
20
21 using namespace llvm;
22
23 #define FMA3GROUP(Name, Suf, Attrs) \
24 { { X86::Name##132##Suf, X86::Name##213##Suf, X86::Name##231##Suf }, Attrs },
25
26 #define FMA3GROUP_MASKED(Name, Suf, Attrs) \
27 FMA3GROUP(Name, Suf, Attrs) \
28 FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
29 FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
30
31 #define FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
32 FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
33 FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
34 FMA3GROUP_MASKED(Name, Suf##Z256m, Attrs) \
35 FMA3GROUP_MASKED(Name, Suf##Z256r, Attrs) \
36 FMA3GROUP_MASKED(Name, Suf##Zm, Attrs) \
37 FMA3GROUP_MASKED(Name, Suf##Zr, Attrs) \
38
39 #define FMA3GROUP_PACKED_WIDTHS_ALL(Name, Suf, Attrs) \
40 FMA3GROUP(Name, Suf##Ym, Attrs) \
41 FMA3GROUP(Name, Suf##Yr, Attrs) \
42 FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
43 FMA3GROUP(Name, Suf##m, Attrs) \
44 FMA3GROUP(Name, Suf##r, Attrs)
45
46 #define FMA3GROUP_PACKED(Name, Attrs) \
47 FMA3GROUP_PACKED_WIDTHS_ALL(Name, PD, Attrs) \
48 FMA3GROUP_PACKED_WIDTHS_Z(Name, PH, Attrs) \
49 FMA3GROUP_PACKED_WIDTHS_ALL(Name, PS, Attrs)
50
51 #define FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
52 FMA3GROUP(Name, Suf##Zm, Attrs) \
53 FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
54 FMA3GROUP(Name, Suf##Zr, Attrs) \
55 FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
56
57 #define FMA3GROUP_SCALAR_WIDTHS_ALL(Name, Suf, Attrs) \
58 FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
59 FMA3GROUP(Name, Suf##m, Attrs) \
60 FMA3GROUP(Name, Suf##m_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
61 FMA3GROUP(Name, Suf##r, Attrs) \
62 FMA3GROUP(Name, Suf##r_Int, Attrs | X86InstrFMA3Group::Intrinsic)
63
64 #define FMA3GROUP_SCALAR(Name, Attrs) \
65 FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SD, Attrs) \
66 FMA3GROUP_SCALAR_WIDTHS_Z(Name, SH, Attrs) \
67 FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SS, Attrs)
68
69 #define FMA3GROUP_FULL(Name, Attrs) \
70 FMA3GROUP_PACKED(Name, Attrs) \
71 FMA3GROUP_SCALAR(Name, Attrs)
72
73 static const X86InstrFMA3Group Groups[] = {
74 FMA3GROUP_FULL(VFMADD, 0)
75 FMA3GROUP_PACKED(VFMADDSUB, 0)
76 FMA3GROUP_FULL(VFMSUB, 0)
77 FMA3GROUP_PACKED(VFMSUBADD, 0)
78 FMA3GROUP_FULL(VFNMADD, 0)
79 FMA3GROUP_FULL(VFNMSUB, 0)
80 };
81
82 #define FMA3GROUP_PACKED_AVX512_WIDTHS(Name, Type, Suf, Attrs) \
83 FMA3GROUP_MASKED(Name, Type##Z128##Suf, Attrs) \
84 FMA3GROUP_MASKED(Name, Type##Z256##Suf, Attrs) \
85 FMA3GROUP_MASKED(Name, Type##Z##Suf, Attrs)
86
87 #define FMA3GROUP_PACKED_AVX512(Name, Suf, Attrs) \
88 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
89 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
90 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
91
92 #define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
93 FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
94 FMA3GROUP_MASKED(Name, PHZ##Suf, Attrs) \
95 FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
96
97 #define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs) \
98 FMA3GROUP(Name, SDZ##Suf, Attrs) \
99 FMA3GROUP_MASKED(Name, SDZ##Suf##_Int, Attrs) \
100 FMA3GROUP(Name, SHZ##Suf, Attrs) \
101 FMA3GROUP_MASKED(Name, SHZ##Suf##_Int, Attrs) \
102 FMA3GROUP(Name, SSZ##Suf, Attrs) \
103 FMA3GROUP_MASKED(Name, SSZ##Suf##_Int, Attrs)
104
105 static const X86InstrFMA3Group BroadcastGroups[] = {
106 FMA3GROUP_PACKED_AVX512(VFMADD, mb, 0)
107 FMA3GROUP_PACKED_AVX512(VFMADDSUB, mb, 0)
108 FMA3GROUP_PACKED_AVX512(VFMSUB, mb, 0)
109 FMA3GROUP_PACKED_AVX512(VFMSUBADD, mb, 0)
110 FMA3GROUP_PACKED_AVX512(VFNMADD, mb, 0)
111 FMA3GROUP_PACKED_AVX512(VFNMSUB, mb, 0)
112 };
113
114 static const X86InstrFMA3Group RoundGroups[] = {
115 FMA3GROUP_PACKED_AVX512_ROUND(VFMADD, rb, 0)
116 FMA3GROUP_SCALAR_AVX512_ROUND(VFMADD, rb, X86InstrFMA3Group::Intrinsic)
117 FMA3GROUP_PACKED_AVX512_ROUND(VFMADDSUB, rb, 0)
118 FMA3GROUP_PACKED_AVX512_ROUND(VFMSUB, rb, 0)
119 FMA3GROUP_SCALAR_AVX512_ROUND(VFMSUB, rb, X86InstrFMA3Group::Intrinsic)
120 FMA3GROUP_PACKED_AVX512_ROUND(VFMSUBADD, rb, 0)
121 FMA3GROUP_PACKED_AVX512_ROUND(VFNMADD, rb, 0)
122 FMA3GROUP_SCALAR_AVX512_ROUND(VFNMADD, rb, X86InstrFMA3Group::Intrinsic)
123 FMA3GROUP_PACKED_AVX512_ROUND(VFNMSUB, rb, 0)
124 FMA3GROUP_SCALAR_AVX512_ROUND(VFNMSUB, rb, X86InstrFMA3Group::Intrinsic)
125 };
126
verifyTables()127 static void verifyTables() {
128 #ifndef NDEBUG
129 static std::atomic<bool> TableChecked(false);
130 if (!TableChecked.load(std::memory_order_relaxed)) {
131 assert(llvm::is_sorted(Groups) && llvm::is_sorted(RoundGroups) &&
132 llvm::is_sorted(BroadcastGroups) && "FMA3 tables not sorted!");
133 TableChecked.store(true, std::memory_order_relaxed);
134 }
135 #endif
136 }
137
138 /// Returns a reference to a group of FMA3 opcodes to where the given
139 /// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
140 /// and not included into any FMA3 group, then nullptr is returned.
getFMA3Group(unsigned Opcode,uint64_t TSFlags)141 const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
142
143 // FMA3 instructions have a well defined encoding pattern we can exploit.
144 uint8_t BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
145 bool IsFMA3Opcode = ((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
146 (BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
147 (BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
148 bool IsFMA3Encoding = ((TSFlags & X86II::EncodingMask) == X86II::VEX &&
149 (TSFlags & X86II::OpMapMask) == X86II::T8) ||
150 ((TSFlags & X86II::EncodingMask) == X86II::EVEX &&
151 ((TSFlags & X86II::OpMapMask) == X86II::T8 ||
152 (TSFlags & X86II::OpMapMask) == X86II::T_MAP6));
153 bool IsFMA3Prefix = (TSFlags & X86II::OpPrefixMask) == X86II::PD;
154 if (!IsFMA3Opcode || !IsFMA3Encoding || !IsFMA3Prefix)
155 return nullptr;
156
157 verifyTables();
158
159 ArrayRef<X86InstrFMA3Group> Table;
160 if (TSFlags & X86II::EVEX_RC)
161 Table = ArrayRef(RoundGroups);
162 else if (TSFlags & X86II::EVEX_B)
163 Table = ArrayRef(BroadcastGroups);
164 else
165 Table = ArrayRef(Groups);
166
167 // FMA 132 instructions have an opcode of 0x96-0x9F
168 // FMA 213 instructions have an opcode of 0xA6-0xAF
169 // FMA 231 instructions have an opcode of 0xB6-0xBF
170 unsigned FormIndex = ((BaseOpcode - 0x90) >> 4) & 0x3;
171
172 auto I = partition_point(Table, [=](const X86InstrFMA3Group &Group) {
173 return Group.Opcodes[FormIndex] < Opcode;
174 });
175 assert(I != Table.end() && I->Opcodes[FormIndex] == Opcode &&
176 "Couldn't find FMA3 opcode!");
177 return I;
178 }
179