1 //===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the X86 memory folding tables. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "X86InstrFoldTables.h" 14 #include "X86InstrInfo.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include <atomic> 17 #include <vector> 18 19 using namespace llvm; 20 21 // These tables are sorted by their RegOp value allowing them to be binary 22 // searched at runtime without the need for additional storage. The enum values 23 // are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which 24 // makes sorting these tables a simple matter of alphabetizing the table. 25 #include "X86GenFoldTables.inc" 26 27 // Table to map instructions safe to broadcast using a different width from the 28 // element width. 29 static const X86FoldTableEntry BroadcastSizeTable2[] = { 30 { X86::VANDNPDZ128rr, X86::VANDNPSZ128rmb, TB_BCAST_SS }, 31 { X86::VANDNPDZ256rr, X86::VANDNPSZ256rmb, TB_BCAST_SS }, 32 { X86::VANDNPDZrr, X86::VANDNPSZrmb, TB_BCAST_SS }, 33 { X86::VANDNPSZ128rr, X86::VANDNPDZ128rmb, TB_BCAST_SD }, 34 { X86::VANDNPSZ256rr, X86::VANDNPDZ256rmb, TB_BCAST_SD }, 35 { X86::VANDNPSZrr, X86::VANDNPDZrmb, TB_BCAST_SD }, 36 { X86::VANDPDZ128rr, X86::VANDPSZ128rmb, TB_BCAST_SS }, 37 { X86::VANDPDZ256rr, X86::VANDPSZ256rmb, TB_BCAST_SS }, 38 { X86::VANDPDZrr, X86::VANDPSZrmb, TB_BCAST_SS }, 39 { X86::VANDPSZ128rr, X86::VANDPDZ128rmb, TB_BCAST_SD }, 40 { X86::VANDPSZ256rr, X86::VANDPDZ256rmb, TB_BCAST_SD }, 41 { X86::VANDPSZrr, X86::VANDPDZrmb, TB_BCAST_SD }, 42 { X86::VORPDZ128rr, X86::VORPSZ128rmb, TB_BCAST_SS }, 43 { X86::VORPDZ256rr, X86::VORPSZ256rmb, TB_BCAST_SS }, 44 { X86::VORPDZrr, X86::VORPSZrmb, TB_BCAST_SS }, 45 { X86::VORPSZ128rr, X86::VORPDZ128rmb, TB_BCAST_SD }, 46 { X86::VORPSZ256rr, X86::VORPDZ256rmb, TB_BCAST_SD }, 47 { X86::VORPSZrr, X86::VORPDZrmb, TB_BCAST_SD }, 48 { X86::VPANDDZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q }, 49 { X86::VPANDDZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q }, 50 { X86::VPANDDZrr, X86::VPANDQZrmb, TB_BCAST_Q }, 51 { X86::VPANDNDZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q }, 52 { X86::VPANDNDZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q }, 53 { X86::VPANDNDZrr, X86::VPANDNQZrmb, TB_BCAST_Q }, 54 { X86::VPANDNQZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D }, 55 { X86::VPANDNQZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D }, 56 { X86::VPANDNQZrr, X86::VPANDNDZrmb, TB_BCAST_D }, 57 { X86::VPANDQZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D }, 58 { X86::VPANDQZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D }, 59 { X86::VPANDQZrr, X86::VPANDDZrmb, TB_BCAST_D }, 60 { X86::VPORDZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q }, 61 { X86::VPORDZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q }, 62 { X86::VPORDZrr, X86::VPORQZrmb, TB_BCAST_Q }, 63 { X86::VPORQZ128rr, X86::VPORDZ128rmb, TB_BCAST_D }, 64 { X86::VPORQZ256rr, X86::VPORDZ256rmb, TB_BCAST_D }, 65 { X86::VPORQZrr, X86::VPORDZrmb, TB_BCAST_D }, 66 { X86::VPXORDZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q }, 67 { X86::VPXORDZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q }, 68 { X86::VPXORDZrr, X86::VPXORQZrmb, TB_BCAST_Q }, 69 { X86::VPXORQZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D }, 70 { X86::VPXORQZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D }, 71 { X86::VPXORQZrr, X86::VPXORDZrmb, TB_BCAST_D }, 72 { X86::VXORPDZ128rr, X86::VXORPSZ128rmb, TB_BCAST_SS }, 73 { X86::VXORPDZ256rr, X86::VXORPSZ256rmb, TB_BCAST_SS }, 74 { X86::VXORPDZrr, X86::VXORPSZrmb, TB_BCAST_SS }, 75 { X86::VXORPSZ128rr, X86::VXORPDZ128rmb, TB_BCAST_SD }, 76 { X86::VXORPSZ256rr, X86::VXORPDZ256rmb, TB_BCAST_SD }, 77 { X86::VXORPSZrr, X86::VXORPDZrmb, TB_BCAST_SD }, 78 }; 79 80 static const X86FoldTableEntry BroadcastSizeTable3[] = { 81 { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q }, 82 { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q }, 83 { X86::VPTERNLOGDZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q }, 84 { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGDZ128rmbi, TB_BCAST_D }, 85 { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGDZ256rmbi, TB_BCAST_D }, 86 { X86::VPTERNLOGQZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D }, 87 }; 88 89 static const X86FoldTableEntry * 90 lookupFoldTableImpl(ArrayRef<X86FoldTableEntry> Table, unsigned RegOp) { 91 #ifndef NDEBUG 92 #define CHECK_SORTED_UNIQUE(TABLE) \ 93 assert(llvm::is_sorted(TABLE) && #TABLE " is not sorted"); \ 94 assert(std::adjacent_find(std::begin(Table), std::end(Table)) == \ 95 std::end(Table) && \ 96 #TABLE " is not unique"); 97 98 // Make sure the tables are sorted. 99 static std::atomic<bool> FoldTablesChecked(false); 100 if (!FoldTablesChecked.load(std::memory_order_relaxed)) { 101 CHECK_SORTED_UNIQUE(Table2Addr) 102 CHECK_SORTED_UNIQUE(Table0) 103 CHECK_SORTED_UNIQUE(Table1) 104 CHECK_SORTED_UNIQUE(Table2) 105 CHECK_SORTED_UNIQUE(Table3) 106 CHECK_SORTED_UNIQUE(Table4) 107 CHECK_SORTED_UNIQUE(BroadcastTable1) 108 CHECK_SORTED_UNIQUE(BroadcastTable2) 109 CHECK_SORTED_UNIQUE(BroadcastTable3) 110 CHECK_SORTED_UNIQUE(BroadcastTable4) 111 CHECK_SORTED_UNIQUE(BroadcastSizeTable2) 112 CHECK_SORTED_UNIQUE(BroadcastSizeTable3) 113 FoldTablesChecked.store(true, std::memory_order_relaxed); 114 } 115 #endif 116 117 const X86FoldTableEntry *Data = llvm::lower_bound(Table, RegOp); 118 if (Data != Table.end() && Data->KeyOp == RegOp && 119 !(Data->Flags & TB_NO_FORWARD)) 120 return Data; 121 return nullptr; 122 } 123 124 const X86FoldTableEntry *llvm::lookupTwoAddrFoldTable(unsigned RegOp) { 125 return lookupFoldTableImpl(Table2Addr, RegOp); 126 } 127 128 const X86FoldTableEntry *llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) { 129 ArrayRef<X86FoldTableEntry> FoldTable; 130 if (OpNum == 0) 131 FoldTable = ArrayRef(Table0); 132 else if (OpNum == 1) 133 FoldTable = ArrayRef(Table1); 134 else if (OpNum == 2) 135 FoldTable = ArrayRef(Table2); 136 else if (OpNum == 3) 137 FoldTable = ArrayRef(Table3); 138 else if (OpNum == 4) 139 FoldTable = ArrayRef(Table4); 140 else 141 return nullptr; 142 143 return lookupFoldTableImpl(FoldTable, RegOp); 144 } 145 146 const X86FoldTableEntry *llvm::lookupBroadcastFoldTable(unsigned RegOp, 147 unsigned OpNum) { 148 ArrayRef<X86FoldTableEntry> FoldTable; 149 if (OpNum == 1) 150 FoldTable = ArrayRef(BroadcastTable1); 151 else if (OpNum == 2) 152 FoldTable = ArrayRef(BroadcastTable2); 153 else if (OpNum == 3) 154 FoldTable = ArrayRef(BroadcastTable3); 155 else if (OpNum == 4) 156 FoldTable = ArrayRef(BroadcastTable4); 157 else 158 return nullptr; 159 160 return lookupFoldTableImpl(FoldTable, RegOp); 161 } 162 163 namespace { 164 165 // This class stores the memory unfolding tables. It is instantiated as a 166 // function scope static variable to lazily init the unfolding table. 167 struct X86MemUnfoldTable { 168 // Stores memory unfolding tables entries sorted by opcode. 169 std::vector<X86FoldTableEntry> Table; 170 171 X86MemUnfoldTable() { 172 for (const X86FoldTableEntry &Entry : Table2Addr) 173 // Index 0, folded load and store, no alignment requirement. 174 addTableEntry(Entry, TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); 175 176 for (const X86FoldTableEntry &Entry : Table0) 177 // Index 0, mix of loads and stores. 178 addTableEntry(Entry, TB_INDEX_0); 179 180 for (const X86FoldTableEntry &Entry : Table1) 181 // Index 1, folded load 182 addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD); 183 184 for (const X86FoldTableEntry &Entry : Table2) 185 // Index 2, folded load 186 addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD); 187 188 for (const X86FoldTableEntry &Entry : Table3) 189 // Index 3, folded load 190 addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD); 191 192 for (const X86FoldTableEntry &Entry : Table4) 193 // Index 4, folded load 194 addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD); 195 196 // Broadcast tables. 197 for (const X86FoldTableEntry &Entry : BroadcastTable1) 198 // Index 1, folded broadcast 199 addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD); 200 201 for (const X86FoldTableEntry &Entry : BroadcastTable2) 202 // Index 2, folded broadcast 203 addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD); 204 205 for (const X86FoldTableEntry &Entry : BroadcastTable3) 206 // Index 3, folded broadcast 207 addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD); 208 209 for (const X86FoldTableEntry &Entry : BroadcastTable4) 210 // Index 4, folded broadcast 211 addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD); 212 213 // Sort the memory->reg unfold table. 214 array_pod_sort(Table.begin(), Table.end()); 215 216 // Now that it's sorted, ensure its unique. 217 assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() && 218 "Memory unfolding table is not unique!"); 219 } 220 221 void addTableEntry(const X86FoldTableEntry &Entry, uint16_t ExtraFlags) { 222 // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it. 223 if ((Entry.Flags & TB_NO_REVERSE) == 0) 224 Table.push_back({Entry.DstOp, Entry.KeyOp, 225 static_cast<uint16_t>(Entry.Flags | ExtraFlags)}); 226 } 227 }; 228 } // namespace 229 230 const X86FoldTableEntry *llvm::lookupUnfoldTable(unsigned MemOp) { 231 static X86MemUnfoldTable MemUnfoldTable; 232 auto &Table = MemUnfoldTable.Table; 233 auto I = llvm::lower_bound(Table, MemOp); 234 if (I != Table.end() && I->KeyOp == MemOp) 235 return &*I; 236 return nullptr; 237 } 238 239 namespace { 240 241 // This class stores the memory -> broadcast folding tables. It is instantiated 242 // as a function scope static variable to lazily init the folding table. 243 struct X86BroadcastFoldTable { 244 // Stores memory broadcast folding tables entries sorted by opcode. 245 std::vector<X86FoldTableEntry> Table; 246 247 X86BroadcastFoldTable() { 248 // Broadcast tables. 249 for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable2) { 250 unsigned RegOp = Reg2Bcst.KeyOp; 251 unsigned BcstOp = Reg2Bcst.DstOp; 252 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) { 253 unsigned MemOp = Reg2Mem->DstOp; 254 uint16_t Flags = 255 Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD; 256 Table.push_back({MemOp, BcstOp, Flags}); 257 } 258 } 259 for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable2) { 260 unsigned RegOp = Reg2Bcst.KeyOp; 261 unsigned BcstOp = Reg2Bcst.DstOp; 262 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) { 263 unsigned MemOp = Reg2Mem->DstOp; 264 uint16_t Flags = 265 Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD; 266 Table.push_back({MemOp, BcstOp, Flags}); 267 } 268 } 269 270 for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable3) { 271 unsigned RegOp = Reg2Bcst.KeyOp; 272 unsigned BcstOp = Reg2Bcst.DstOp; 273 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) { 274 unsigned MemOp = Reg2Mem->DstOp; 275 uint16_t Flags = 276 Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD; 277 Table.push_back({MemOp, BcstOp, Flags}); 278 } 279 } 280 for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable3) { 281 unsigned RegOp = Reg2Bcst.KeyOp; 282 unsigned BcstOp = Reg2Bcst.DstOp; 283 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) { 284 unsigned MemOp = Reg2Mem->DstOp; 285 uint16_t Flags = 286 Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD; 287 Table.push_back({MemOp, BcstOp, Flags}); 288 } 289 } 290 291 for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable4) { 292 unsigned RegOp = Reg2Bcst.KeyOp; 293 unsigned BcstOp = Reg2Bcst.DstOp; 294 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 4)) { 295 unsigned MemOp = Reg2Mem->DstOp; 296 uint16_t Flags = 297 Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_4 | TB_FOLDED_LOAD; 298 Table.push_back({MemOp, BcstOp, Flags}); 299 } 300 } 301 302 // Sort the memory->broadcast fold table. 303 array_pod_sort(Table.begin(), Table.end()); 304 } 305 }; 306 } // namespace 307 308 bool llvm::matchBroadcastSize(const X86FoldTableEntry &Entry, 309 unsigned BroadcastBits) { 310 switch (Entry.Flags & TB_BCAST_MASK) { 311 case TB_BCAST_W: 312 case TB_BCAST_SH: 313 return BroadcastBits == 16; 314 case TB_BCAST_D: 315 case TB_BCAST_SS: 316 return BroadcastBits == 32; 317 case TB_BCAST_Q: 318 case TB_BCAST_SD: 319 return BroadcastBits == 64; 320 } 321 return false; 322 } 323 324 const X86FoldTableEntry * 325 llvm::lookupBroadcastFoldTableBySize(unsigned MemOp, unsigned BroadcastBits) { 326 static X86BroadcastFoldTable BroadcastFoldTable; 327 auto &Table = BroadcastFoldTable.Table; 328 for (auto I = llvm::lower_bound(Table, MemOp); 329 I != Table.end() && I->KeyOp == MemOp; ++I) { 330 if (matchBroadcastSize(*I, BroadcastBits)) 331 return &*I; 332 } 333 return nullptr; 334 } 335