xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the X86 memory folding tables.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "X86InstrFoldTables.h"
14 #include "X86InstrInfo.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include <atomic>
17 #include <vector>
18 
19 using namespace llvm;
20 
21 // These tables are sorted by their RegOp value allowing them to be binary
22 // searched at runtime without the need for additional storage. The enum values
23 // are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which
24 // makes sorting these tables a simple matter of alphabetizing the table.
25 #include "X86GenFoldTables.inc"
26 
27 // Table to map instructions safe to broadcast using a different width from the
28 // element width.
29 static const X86FoldTableEntry BroadcastSizeTable2[] = {
30   { X86::VANDNPDZ128rr,        X86::VANDNPSZ128rmb,       TB_BCAST_SS },
31   { X86::VANDNPDZ256rr,        X86::VANDNPSZ256rmb,       TB_BCAST_SS },
32   { X86::VANDNPDZrr,           X86::VANDNPSZrmb,          TB_BCAST_SS },
33   { X86::VANDNPSZ128rr,        X86::VANDNPDZ128rmb,       TB_BCAST_SD },
34   { X86::VANDNPSZ256rr,        X86::VANDNPDZ256rmb,       TB_BCAST_SD },
35   { X86::VANDNPSZrr,           X86::VANDNPDZrmb,          TB_BCAST_SD },
36   { X86::VANDPDZ128rr,         X86::VANDPSZ128rmb,        TB_BCAST_SS },
37   { X86::VANDPDZ256rr,         X86::VANDPSZ256rmb,        TB_BCAST_SS },
38   { X86::VANDPDZrr,            X86::VANDPSZrmb,           TB_BCAST_SS },
39   { X86::VANDPSZ128rr,         X86::VANDPDZ128rmb,        TB_BCAST_SD },
40   { X86::VANDPSZ256rr,         X86::VANDPDZ256rmb,        TB_BCAST_SD },
41   { X86::VANDPSZrr,            X86::VANDPDZrmb,           TB_BCAST_SD },
42   { X86::VORPDZ128rr,          X86::VORPSZ128rmb,         TB_BCAST_SS },
43   { X86::VORPDZ256rr,          X86::VORPSZ256rmb,         TB_BCAST_SS },
44   { X86::VORPDZrr,             X86::VORPSZrmb,            TB_BCAST_SS },
45   { X86::VORPSZ128rr,          X86::VORPDZ128rmb,         TB_BCAST_SD },
46   { X86::VORPSZ256rr,          X86::VORPDZ256rmb,         TB_BCAST_SD },
47   { X86::VORPSZrr,             X86::VORPDZrmb,            TB_BCAST_SD },
48   { X86::VPANDDZ128rr,         X86::VPANDQZ128rmb,        TB_BCAST_Q },
49   { X86::VPANDDZ256rr,         X86::VPANDQZ256rmb,        TB_BCAST_Q },
50   { X86::VPANDDZrr,            X86::VPANDQZrmb,           TB_BCAST_Q },
51   { X86::VPANDNDZ128rr,        X86::VPANDNQZ128rmb,       TB_BCAST_Q },
52   { X86::VPANDNDZ256rr,        X86::VPANDNQZ256rmb,       TB_BCAST_Q },
53   { X86::VPANDNDZrr,           X86::VPANDNQZrmb,          TB_BCAST_Q },
54   { X86::VPANDNQZ128rr,        X86::VPANDNDZ128rmb,       TB_BCAST_D },
55   { X86::VPANDNQZ256rr,        X86::VPANDNDZ256rmb,       TB_BCAST_D },
56   { X86::VPANDNQZrr,           X86::VPANDNDZrmb,          TB_BCAST_D },
57   { X86::VPANDQZ128rr,         X86::VPANDDZ128rmb,        TB_BCAST_D },
58   { X86::VPANDQZ256rr,         X86::VPANDDZ256rmb,        TB_BCAST_D },
59   { X86::VPANDQZrr,            X86::VPANDDZrmb,           TB_BCAST_D },
60   { X86::VPORDZ128rr,          X86::VPORQZ128rmb,         TB_BCAST_Q },
61   { X86::VPORDZ256rr,          X86::VPORQZ256rmb,         TB_BCAST_Q },
62   { X86::VPORDZrr,             X86::VPORQZrmb,            TB_BCAST_Q },
63   { X86::VPORQZ128rr,          X86::VPORDZ128rmb,         TB_BCAST_D },
64   { X86::VPORQZ256rr,          X86::VPORDZ256rmb,         TB_BCAST_D },
65   { X86::VPORQZrr,             X86::VPORDZrmb,            TB_BCAST_D },
66   { X86::VPXORDZ128rr,         X86::VPXORQZ128rmb,        TB_BCAST_Q },
67   { X86::VPXORDZ256rr,         X86::VPXORQZ256rmb,        TB_BCAST_Q },
68   { X86::VPXORDZrr,            X86::VPXORQZrmb,           TB_BCAST_Q },
69   { X86::VPXORQZ128rr,         X86::VPXORDZ128rmb,        TB_BCAST_D },
70   { X86::VPXORQZ256rr,         X86::VPXORDZ256rmb,        TB_BCAST_D },
71   { X86::VPXORQZrr,            X86::VPXORDZrmb,           TB_BCAST_D },
72   { X86::VXORPDZ128rr,         X86::VXORPSZ128rmb,        TB_BCAST_SS },
73   { X86::VXORPDZ256rr,         X86::VXORPSZ256rmb,        TB_BCAST_SS },
74   { X86::VXORPDZrr,            X86::VXORPSZrmb,           TB_BCAST_SS },
75   { X86::VXORPSZ128rr,         X86::VXORPDZ128rmb,        TB_BCAST_SD },
76   { X86::VXORPSZ256rr,         X86::VXORPDZ256rmb,        TB_BCAST_SD },
77   { X86::VXORPSZrr,            X86::VXORPDZrmb,           TB_BCAST_SD },
78 };
79 
80 static const X86FoldTableEntry BroadcastSizeTable3[] = {
81   { X86::VPTERNLOGDZ128rri,    X86::VPTERNLOGQZ128rmbi,   TB_BCAST_Q },
82   { X86::VPTERNLOGDZ256rri,    X86::VPTERNLOGQZ256rmbi,   TB_BCAST_Q },
83   { X86::VPTERNLOGDZrri,       X86::VPTERNLOGQZrmbi,      TB_BCAST_Q },
84   { X86::VPTERNLOGQZ128rri,    X86::VPTERNLOGDZ128rmbi,   TB_BCAST_D },
85   { X86::VPTERNLOGQZ256rri,    X86::VPTERNLOGDZ256rmbi,   TB_BCAST_D },
86   { X86::VPTERNLOGQZrri,       X86::VPTERNLOGDZrmbi,      TB_BCAST_D },
87 };
88 
89 static const X86FoldTableEntry *
90 lookupFoldTableImpl(ArrayRef<X86FoldTableEntry> Table, unsigned RegOp) {
91 #ifndef NDEBUG
92 #define CHECK_SORTED_UNIQUE(TABLE)                                             \
93   assert(llvm::is_sorted(TABLE) && #TABLE " is not sorted");                   \
94   assert(std::adjacent_find(std::begin(Table), std::end(Table)) ==             \
95              std::end(Table) &&                                                \
96          #TABLE " is not unique");
97 
98   // Make sure the tables are sorted.
99   static std::atomic<bool> FoldTablesChecked(false);
100   if (!FoldTablesChecked.load(std::memory_order_relaxed)) {
101     CHECK_SORTED_UNIQUE(Table2Addr)
102     CHECK_SORTED_UNIQUE(Table0)
103     CHECK_SORTED_UNIQUE(Table1)
104     CHECK_SORTED_UNIQUE(Table2)
105     CHECK_SORTED_UNIQUE(Table3)
106     CHECK_SORTED_UNIQUE(Table4)
107     CHECK_SORTED_UNIQUE(BroadcastTable1)
108     CHECK_SORTED_UNIQUE(BroadcastTable2)
109     CHECK_SORTED_UNIQUE(BroadcastTable3)
110     CHECK_SORTED_UNIQUE(BroadcastTable4)
111     CHECK_SORTED_UNIQUE(BroadcastSizeTable2)
112     CHECK_SORTED_UNIQUE(BroadcastSizeTable3)
113     FoldTablesChecked.store(true, std::memory_order_relaxed);
114   }
115 #endif
116 
117   const X86FoldTableEntry *Data = llvm::lower_bound(Table, RegOp);
118   if (Data != Table.end() && Data->KeyOp == RegOp &&
119       !(Data->Flags & TB_NO_FORWARD))
120     return Data;
121   return nullptr;
122 }
123 
124 const X86FoldTableEntry *llvm::lookupTwoAddrFoldTable(unsigned RegOp) {
125   return lookupFoldTableImpl(Table2Addr, RegOp);
126 }
127 
128 const X86FoldTableEntry *llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) {
129   ArrayRef<X86FoldTableEntry> FoldTable;
130   if (OpNum == 0)
131     FoldTable = ArrayRef(Table0);
132   else if (OpNum == 1)
133     FoldTable = ArrayRef(Table1);
134   else if (OpNum == 2)
135     FoldTable = ArrayRef(Table2);
136   else if (OpNum == 3)
137     FoldTable = ArrayRef(Table3);
138   else if (OpNum == 4)
139     FoldTable = ArrayRef(Table4);
140   else
141     return nullptr;
142 
143   return lookupFoldTableImpl(FoldTable, RegOp);
144 }
145 
146 const X86FoldTableEntry *llvm::lookupBroadcastFoldTable(unsigned RegOp,
147                                                         unsigned OpNum) {
148   ArrayRef<X86FoldTableEntry> FoldTable;
149   if (OpNum == 1)
150     FoldTable = ArrayRef(BroadcastTable1);
151   else if (OpNum == 2)
152     FoldTable = ArrayRef(BroadcastTable2);
153   else if (OpNum == 3)
154     FoldTable = ArrayRef(BroadcastTable3);
155   else if (OpNum == 4)
156     FoldTable = ArrayRef(BroadcastTable4);
157   else
158     return nullptr;
159 
160   return lookupFoldTableImpl(FoldTable, RegOp);
161 }
162 
163 namespace {
164 
165 // This class stores the memory unfolding tables. It is instantiated as a
166 // function scope static variable to lazily init the unfolding table.
167 struct X86MemUnfoldTable {
168   // Stores memory unfolding tables entries sorted by opcode.
169   std::vector<X86FoldTableEntry> Table;
170 
171   X86MemUnfoldTable() {
172     for (const X86FoldTableEntry &Entry : Table2Addr)
173       // Index 0, folded load and store, no alignment requirement.
174       addTableEntry(Entry, TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
175 
176     for (const X86FoldTableEntry &Entry : Table0)
177       // Index 0, mix of loads and stores.
178       addTableEntry(Entry, TB_INDEX_0);
179 
180     for (const X86FoldTableEntry &Entry : Table1)
181       // Index 1, folded load
182       addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD);
183 
184     for (const X86FoldTableEntry &Entry : Table2)
185       // Index 2, folded load
186       addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD);
187 
188     for (const X86FoldTableEntry &Entry : Table3)
189       // Index 3, folded load
190       addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD);
191 
192     for (const X86FoldTableEntry &Entry : Table4)
193       // Index 4, folded load
194       addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD);
195 
196     // Broadcast tables.
197     for (const X86FoldTableEntry &Entry : BroadcastTable1)
198       // Index 1, folded broadcast
199       addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD);
200 
201     for (const X86FoldTableEntry &Entry : BroadcastTable2)
202       // Index 2, folded broadcast
203       addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD);
204 
205     for (const X86FoldTableEntry &Entry : BroadcastTable3)
206       // Index 3, folded broadcast
207       addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD);
208 
209     for (const X86FoldTableEntry &Entry : BroadcastTable4)
210       // Index 4, folded broadcast
211       addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD);
212 
213     // Sort the memory->reg unfold table.
214     array_pod_sort(Table.begin(), Table.end());
215 
216     // Now that it's sorted, ensure its unique.
217     assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() &&
218            "Memory unfolding table is not unique!");
219   }
220 
221   void addTableEntry(const X86FoldTableEntry &Entry, uint16_t ExtraFlags) {
222     // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it.
223     if ((Entry.Flags & TB_NO_REVERSE) == 0)
224       Table.push_back({Entry.DstOp, Entry.KeyOp,
225                        static_cast<uint16_t>(Entry.Flags | ExtraFlags)});
226   }
227 };
228 } // namespace
229 
230 const X86FoldTableEntry *llvm::lookupUnfoldTable(unsigned MemOp) {
231   static X86MemUnfoldTable MemUnfoldTable;
232   auto &Table = MemUnfoldTable.Table;
233   auto I = llvm::lower_bound(Table, MemOp);
234   if (I != Table.end() && I->KeyOp == MemOp)
235     return &*I;
236   return nullptr;
237 }
238 
239 namespace {
240 
241 // This class stores the memory -> broadcast folding tables. It is instantiated
242 // as a function scope static variable to lazily init the folding table.
243 struct X86BroadcastFoldTable {
244   // Stores memory broadcast folding tables entries sorted by opcode.
245   std::vector<X86FoldTableEntry> Table;
246 
247   X86BroadcastFoldTable() {
248     // Broadcast tables.
249     for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable2) {
250       unsigned RegOp = Reg2Bcst.KeyOp;
251       unsigned BcstOp = Reg2Bcst.DstOp;
252       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
253         unsigned MemOp = Reg2Mem->DstOp;
254         uint16_t Flags =
255             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD;
256         Table.push_back({MemOp, BcstOp, Flags});
257       }
258     }
259     for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable2) {
260       unsigned RegOp = Reg2Bcst.KeyOp;
261       unsigned BcstOp = Reg2Bcst.DstOp;
262       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
263         unsigned MemOp = Reg2Mem->DstOp;
264         uint16_t Flags =
265             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD;
266         Table.push_back({MemOp, BcstOp, Flags});
267       }
268     }
269 
270     for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable3) {
271       unsigned RegOp = Reg2Bcst.KeyOp;
272       unsigned BcstOp = Reg2Bcst.DstOp;
273       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
274         unsigned MemOp = Reg2Mem->DstOp;
275         uint16_t Flags =
276             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD;
277         Table.push_back({MemOp, BcstOp, Flags});
278       }
279     }
280     for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable3) {
281       unsigned RegOp = Reg2Bcst.KeyOp;
282       unsigned BcstOp = Reg2Bcst.DstOp;
283       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
284         unsigned MemOp = Reg2Mem->DstOp;
285         uint16_t Flags =
286             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD;
287         Table.push_back({MemOp, BcstOp, Flags});
288       }
289     }
290 
291     for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable4) {
292       unsigned RegOp = Reg2Bcst.KeyOp;
293       unsigned BcstOp = Reg2Bcst.DstOp;
294       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 4)) {
295         unsigned MemOp = Reg2Mem->DstOp;
296         uint16_t Flags =
297             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_4 | TB_FOLDED_LOAD;
298         Table.push_back({MemOp, BcstOp, Flags});
299       }
300     }
301 
302     // Sort the memory->broadcast fold table.
303     array_pod_sort(Table.begin(), Table.end());
304   }
305 };
306 } // namespace
307 
308 bool llvm::matchBroadcastSize(const X86FoldTableEntry &Entry,
309                               unsigned BroadcastBits) {
310   switch (Entry.Flags & TB_BCAST_MASK) {
311   case TB_BCAST_W:
312   case TB_BCAST_SH:
313     return BroadcastBits == 16;
314   case TB_BCAST_D:
315   case TB_BCAST_SS:
316     return BroadcastBits == 32;
317   case TB_BCAST_Q:
318   case TB_BCAST_SD:
319     return BroadcastBits == 64;
320   }
321   return false;
322 }
323 
324 const X86FoldTableEntry *
325 llvm::lookupBroadcastFoldTableBySize(unsigned MemOp, unsigned BroadcastBits) {
326   static X86BroadcastFoldTable BroadcastFoldTable;
327   auto &Table = BroadcastFoldTable.Table;
328   for (auto I = llvm::lower_bound(Table, MemOp);
329        I != Table.end() && I->KeyOp == MemOp; ++I) {
330     if (matchBroadcastSize(*I, BroadcastBits))
331       return &*I;
332   }
333   return nullptr;
334 }
335