1 //===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the X86 memory folding tables. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "X86InstrFoldTables.h" 14 #include "X86InstrInfo.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include <atomic> 17 #include <vector> 18 19 using namespace llvm; 20 21 // These tables are sorted by their RegOp value allowing them to be binary 22 // searched at runtime without the need for additional storage. The enum values 23 // are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which 24 // makes sorting these tables a simple matter of alphabetizing the table. 25 #include "X86GenFoldTables.inc" 26 27 // Table to map instructions safe to broadcast using a different width from the 28 // element width. 29 static const X86FoldTableEntry BroadcastSizeTable2[] = { 30 { X86::VANDNPDZ128rr, X86::VANDNPSZ128rmb, TB_BCAST_SS }, 31 { X86::VANDNPDZ256rr, X86::VANDNPSZ256rmb, TB_BCAST_SS }, 32 { X86::VANDNPDZrr, X86::VANDNPSZrmb, TB_BCAST_SS }, 33 { X86::VANDNPSZ128rr, X86::VANDNPDZ128rmb, TB_BCAST_SD }, 34 { X86::VANDNPSZ256rr, X86::VANDNPDZ256rmb, TB_BCAST_SD }, 35 { X86::VANDNPSZrr, X86::VANDNPDZrmb, TB_BCAST_SD }, 36 { X86::VANDPDZ128rr, X86::VANDPSZ128rmb, TB_BCAST_SS }, 37 { X86::VANDPDZ256rr, X86::VANDPSZ256rmb, TB_BCAST_SS }, 38 { X86::VANDPDZrr, X86::VANDPSZrmb, TB_BCAST_SS }, 39 { X86::VANDPSZ128rr, X86::VANDPDZ128rmb, TB_BCAST_SD }, 40 { X86::VANDPSZ256rr, X86::VANDPDZ256rmb, TB_BCAST_SD }, 41 { X86::VANDPSZrr, X86::VANDPDZrmb, TB_BCAST_SD }, 42 { X86::VORPDZ128rr, X86::VORPSZ128rmb, TB_BCAST_SS }, 43 { X86::VORPDZ256rr, X86::VORPSZ256rmb, TB_BCAST_SS }, 44 { X86::VORPDZrr, X86::VORPSZrmb, TB_BCAST_SS }, 45 { X86::VORPSZ128rr, X86::VORPDZ128rmb, TB_BCAST_SD }, 46 { X86::VORPSZ256rr, X86::VORPDZ256rmb, TB_BCAST_SD }, 47 { X86::VORPSZrr, X86::VORPDZrmb, TB_BCAST_SD }, 48 { X86::VPANDDZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q }, 49 { X86::VPANDDZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q }, 50 { X86::VPANDDZrr, X86::VPANDQZrmb, TB_BCAST_Q }, 51 { X86::VPANDNDZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q }, 52 { X86::VPANDNDZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q }, 53 { X86::VPANDNDZrr, X86::VPANDNQZrmb, TB_BCAST_Q }, 54 { X86::VPANDNQZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D }, 55 { X86::VPANDNQZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D }, 56 { X86::VPANDNQZrr, X86::VPANDNDZrmb, TB_BCAST_D }, 57 { X86::VPANDQZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D }, 58 { X86::VPANDQZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D }, 59 { X86::VPANDQZrr, X86::VPANDDZrmb, TB_BCAST_D }, 60 { X86::VPORDZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q }, 61 { X86::VPORDZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q }, 62 { X86::VPORDZrr, X86::VPORQZrmb, TB_BCAST_Q }, 63 { X86::VPORQZ128rr, X86::VPORDZ128rmb, TB_BCAST_D }, 64 { X86::VPORQZ256rr, X86::VPORDZ256rmb, TB_BCAST_D }, 65 { X86::VPORQZrr, X86::VPORDZrmb, TB_BCAST_D }, 66 { X86::VPXORDZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q }, 67 { X86::VPXORDZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q }, 68 { X86::VPXORDZrr, X86::VPXORQZrmb, TB_BCAST_Q }, 69 { X86::VPXORQZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D }, 70 { X86::VPXORQZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D }, 71 { X86::VPXORQZrr, X86::VPXORDZrmb, TB_BCAST_D }, 72 { X86::VXORPDZ128rr, X86::VXORPSZ128rmb, TB_BCAST_SS }, 73 { X86::VXORPDZ256rr, X86::VXORPSZ256rmb, TB_BCAST_SS }, 74 { X86::VXORPDZrr, X86::VXORPSZrmb, TB_BCAST_SS }, 75 { X86::VXORPSZ128rr, X86::VXORPDZ128rmb, TB_BCAST_SD }, 76 { X86::VXORPSZ256rr, X86::VXORPDZ256rmb, TB_BCAST_SD }, 77 { X86::VXORPSZrr, X86::VXORPDZrmb, TB_BCAST_SD }, 78 }; 79 80 static const X86FoldTableEntry BroadcastSizeTable3[] = { 81 { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q }, 82 { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q }, 83 { X86::VPTERNLOGDZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q }, 84 { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGDZ128rmbi, TB_BCAST_D }, 85 { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGDZ256rmbi, TB_BCAST_D }, 86 { X86::VPTERNLOGQZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D }, 87 }; 88 89 static const X86FoldTableEntry * 90 lookupFoldTableImpl(ArrayRef<X86FoldTableEntry> Table, unsigned RegOp) { 91 #ifndef NDEBUG 92 #define CHECK_SORTED_UNIQUE(TABLE) \ 93 assert(llvm::is_sorted(TABLE) && #TABLE " is not sorted"); \ 94 assert(std::adjacent_find(std::begin(Table), std::end(Table)) == \ 95 std::end(Table) && \ 96 #TABLE " is not unique"); 97 98 // Make sure the tables are sorted. 99 static std::atomic<bool> FoldTablesChecked(false); 100 if (!FoldTablesChecked.load(std::memory_order_relaxed)) { 101 CHECK_SORTED_UNIQUE(Table2Addr) 102 CHECK_SORTED_UNIQUE(Table0) 103 CHECK_SORTED_UNIQUE(Table1) 104 CHECK_SORTED_UNIQUE(Table2) 105 CHECK_SORTED_UNIQUE(Table3) 106 CHECK_SORTED_UNIQUE(Table4) 107 CHECK_SORTED_UNIQUE(BroadcastTable1) 108 CHECK_SORTED_UNIQUE(BroadcastTable2) 109 CHECK_SORTED_UNIQUE(BroadcastTable3) 110 CHECK_SORTED_UNIQUE(BroadcastTable4) 111 CHECK_SORTED_UNIQUE(BroadcastSizeTable2) 112 CHECK_SORTED_UNIQUE(BroadcastSizeTable3) 113 FoldTablesChecked.store(true, std::memory_order_relaxed); 114 } 115 #endif 116 117 const X86FoldTableEntry *Data = llvm::lower_bound(Table, RegOp); 118 if (Data != Table.end() && Data->KeyOp == RegOp && 119 !(Data->Flags & TB_NO_FORWARD)) 120 return Data; 121 return nullptr; 122 } 123 124 const X86FoldTableEntry * 125 llvm::lookupTwoAddrFoldTable(unsigned RegOp) { 126 return lookupFoldTableImpl(Table2Addr, RegOp); 127 } 128 129 const X86FoldTableEntry * 130 llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) { 131 ArrayRef<X86FoldTableEntry> FoldTable; 132 if (OpNum == 0) 133 FoldTable = ArrayRef(Table0); 134 else if (OpNum == 1) 135 FoldTable = ArrayRef(Table1); 136 else if (OpNum == 2) 137 FoldTable = ArrayRef(Table2); 138 else if (OpNum == 3) 139 FoldTable = ArrayRef(Table3); 140 else if (OpNum == 4) 141 FoldTable = ArrayRef(Table4); 142 else 143 return nullptr; 144 145 return lookupFoldTableImpl(FoldTable, RegOp); 146 } 147 148 namespace { 149 150 // This class stores the memory unfolding tables. It is instantiated as a 151 // function scope static variable to lazily init the unfolding table. 152 struct X86MemUnfoldTable { 153 // Stores memory unfolding tables entries sorted by opcode. 154 std::vector<X86FoldTableEntry> Table; 155 156 X86MemUnfoldTable() { 157 for (const X86FoldTableEntry &Entry : Table2Addr) 158 // Index 0, folded load and store, no alignment requirement. 159 addTableEntry(Entry, TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); 160 161 for (const X86FoldTableEntry &Entry : Table0) 162 // Index 0, mix of loads and stores. 163 addTableEntry(Entry, TB_INDEX_0); 164 165 for (const X86FoldTableEntry &Entry : Table1) 166 // Index 1, folded load 167 addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD); 168 169 for (const X86FoldTableEntry &Entry : Table2) 170 // Index 2, folded load 171 addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD); 172 173 for (const X86FoldTableEntry &Entry : Table3) 174 // Index 3, folded load 175 addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD); 176 177 for (const X86FoldTableEntry &Entry : Table4) 178 // Index 4, folded load 179 addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD); 180 181 // Broadcast tables. 182 for (const X86FoldTableEntry &Entry : BroadcastTable2) 183 // Index 2, folded broadcast 184 addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD | TB_FOLDED_BCAST); 185 186 for (const X86FoldTableEntry &Entry : BroadcastTable3) 187 // Index 3, folded broadcast 188 addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD | TB_FOLDED_BCAST); 189 190 for (const X86FoldTableEntry &Entry : BroadcastTable4) 191 // Index 4, folded broadcast 192 addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD | TB_FOLDED_BCAST); 193 194 // Sort the memory->reg unfold table. 195 array_pod_sort(Table.begin(), Table.end()); 196 197 // Now that it's sorted, ensure its unique. 198 assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() && 199 "Memory unfolding table is not unique!"); 200 } 201 202 void addTableEntry(const X86FoldTableEntry &Entry, 203 uint16_t ExtraFlags) { 204 // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it. 205 if ((Entry.Flags & TB_NO_REVERSE) == 0) 206 Table.push_back({Entry.DstOp, Entry.KeyOp, 207 static_cast<uint16_t>(Entry.Flags | ExtraFlags) }); 208 } 209 }; 210 } 211 212 const X86FoldTableEntry * 213 llvm::lookupUnfoldTable(unsigned MemOp) { 214 static X86MemUnfoldTable MemUnfoldTable; 215 auto &Table = MemUnfoldTable.Table; 216 auto I = llvm::lower_bound(Table, MemOp); 217 if (I != Table.end() && I->KeyOp == MemOp) 218 return &*I; 219 return nullptr; 220 } 221 222 namespace { 223 224 // This class stores the memory -> broadcast folding tables. It is instantiated 225 // as a function scope static variable to lazily init the folding table. 226 struct X86BroadcastFoldTable { 227 // Stores memory broadcast folding tables entries sorted by opcode. 228 std::vector<X86FoldTableEntry> Table; 229 230 X86BroadcastFoldTable() { 231 // Broadcast tables. 232 for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable2) { 233 unsigned RegOp = Reg2Bcst.KeyOp; 234 unsigned BcstOp = Reg2Bcst.DstOp; 235 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) { 236 unsigned MemOp = Reg2Mem->DstOp; 237 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | 238 TB_FOLDED_LOAD | TB_FOLDED_BCAST; 239 Table.push_back({MemOp, BcstOp, Flags}); 240 } 241 } 242 for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable2) { 243 unsigned RegOp = Reg2Bcst.KeyOp; 244 unsigned BcstOp = Reg2Bcst.DstOp; 245 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) { 246 unsigned MemOp = Reg2Mem->DstOp; 247 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | 248 TB_FOLDED_LOAD | TB_FOLDED_BCAST; 249 Table.push_back({MemOp, BcstOp, Flags}); 250 } 251 } 252 253 for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable3) { 254 unsigned RegOp = Reg2Bcst.KeyOp; 255 unsigned BcstOp = Reg2Bcst.DstOp; 256 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) { 257 unsigned MemOp = Reg2Mem->DstOp; 258 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | 259 TB_FOLDED_LOAD | TB_FOLDED_BCAST; 260 Table.push_back({MemOp, BcstOp, Flags}); 261 } 262 } 263 for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable3) { 264 unsigned RegOp = Reg2Bcst.KeyOp; 265 unsigned BcstOp = Reg2Bcst.DstOp; 266 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) { 267 unsigned MemOp = Reg2Mem->DstOp; 268 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | 269 TB_FOLDED_LOAD | TB_FOLDED_BCAST; 270 Table.push_back({MemOp, BcstOp, Flags}); 271 } 272 } 273 274 for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable4) { 275 unsigned RegOp = Reg2Bcst.KeyOp; 276 unsigned BcstOp = Reg2Bcst.DstOp; 277 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 4)) { 278 unsigned MemOp = Reg2Mem->DstOp; 279 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_4 | 280 TB_FOLDED_LOAD | TB_FOLDED_BCAST; 281 Table.push_back({MemOp, BcstOp, Flags}); 282 } 283 } 284 285 // Sort the memory->broadcast fold table. 286 array_pod_sort(Table.begin(), Table.end()); 287 } 288 }; 289 } // namespace 290 291 static bool matchBroadcastSize(const X86FoldTableEntry &Entry, 292 unsigned BroadcastBits) { 293 switch (Entry.Flags & TB_BCAST_MASK) { 294 case TB_BCAST_W: 295 case TB_BCAST_SH: 296 return BroadcastBits == 16; 297 case TB_BCAST_D: 298 case TB_BCAST_SS: 299 return BroadcastBits == 32; 300 case TB_BCAST_Q: 301 case TB_BCAST_SD: 302 return BroadcastBits == 64; 303 } 304 return false; 305 } 306 307 const X86FoldTableEntry * 308 llvm::lookupBroadcastFoldTable(unsigned MemOp, unsigned BroadcastBits) { 309 static X86BroadcastFoldTable BroadcastFoldTable; 310 auto &Table = BroadcastFoldTable.Table; 311 for (auto I = llvm::lower_bound(Table, MemOp); 312 I != Table.end() && I->KeyOp == MemOp; ++I) { 313 if (matchBroadcastSize(*I, BroadcastBits)) 314 return &*I; 315 } 316 return nullptr; 317 } 318