1 //===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the X86 memory folding tables. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "X86InstrFoldTables.h" 14 #include "X86InstrInfo.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include <atomic> 17 #include <vector> 18 19 using namespace llvm; 20 21 // These tables are sorted by their RegOp value allowing them to be binary 22 // searched at runtime without the need for additional storage. The enum values 23 // are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which 24 // makes sorting these tables a simple matter of alphabetizing the table. 25 #include "X86GenFoldTables.inc" 26 static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = { 27 { X86::VADDPDZ128rr, X86::VADDPDZ128rmb, TB_BCAST_SD }, 28 { X86::VADDPDZ256rr, X86::VADDPDZ256rmb, TB_BCAST_SD }, 29 { X86::VADDPDZrr, X86::VADDPDZrmb, TB_BCAST_SD }, 30 { X86::VADDPSZ128rr, X86::VADDPSZ128rmb, TB_BCAST_SS }, 31 { X86::VADDPSZ256rr, X86::VADDPSZ256rmb, TB_BCAST_SS }, 32 { X86::VADDPSZrr, X86::VADDPSZrmb, TB_BCAST_SS }, 33 { X86::VANDNPDZ128rr, X86::VANDNPDZ128rmb, TB_BCAST_SD }, 34 { X86::VANDNPDZ256rr, X86::VANDNPDZ256rmb, TB_BCAST_SD }, 35 { X86::VANDNPDZrr, X86::VANDNPDZrmb, TB_BCAST_SD }, 36 { X86::VANDNPSZ128rr, X86::VANDNPSZ128rmb, TB_BCAST_SS }, 37 { X86::VANDNPSZ256rr, X86::VANDNPSZ256rmb, TB_BCAST_SS }, 38 { X86::VANDNPSZrr, X86::VANDNPSZrmb, TB_BCAST_SS }, 39 { X86::VANDPDZ128rr, X86::VANDPDZ128rmb, TB_BCAST_SD }, 40 { X86::VANDPDZ256rr, X86::VANDPDZ256rmb, TB_BCAST_SD }, 41 { X86::VANDPDZrr, X86::VANDPDZrmb, TB_BCAST_SD }, 42 { X86::VANDPSZ128rr, X86::VANDPSZ128rmb, TB_BCAST_SS }, 43 { X86::VANDPSZ256rr, X86::VANDPSZ256rmb, TB_BCAST_SS }, 44 { X86::VANDPSZrr, X86::VANDPSZrmb, TB_BCAST_SS }, 45 { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmbi, TB_BCAST_SD }, 46 { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmbi, TB_BCAST_SD }, 47 { X86::VCMPPDZrri, X86::VCMPPDZrmbi, TB_BCAST_SD }, 48 { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmbi, TB_BCAST_SS }, 49 { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmbi, TB_BCAST_SS }, 50 { X86::VCMPPSZrri, X86::VCMPPSZrmbi, TB_BCAST_SS }, 51 { X86::VDIVPDZ128rr, X86::VDIVPDZ128rmb, TB_BCAST_SD }, 52 { X86::VDIVPDZ256rr, X86::VDIVPDZ256rmb, TB_BCAST_SD }, 53 { X86::VDIVPDZrr, X86::VDIVPDZrmb, TB_BCAST_SD }, 54 { X86::VDIVPSZ128rr, X86::VDIVPSZ128rmb, TB_BCAST_SS }, 55 { X86::VDIVPSZ256rr, X86::VDIVPSZ256rmb, TB_BCAST_SS }, 56 { X86::VDIVPSZrr, X86::VDIVPSZrmb, TB_BCAST_SS }, 57 { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rmb, TB_BCAST_SD }, 58 { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rmb, TB_BCAST_SD }, 59 { X86::VMAXCPDZrr, X86::VMAXCPDZrmb, TB_BCAST_SD }, 60 { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rmb, TB_BCAST_SS }, 61 { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rmb, TB_BCAST_SS }, 62 { X86::VMAXCPSZrr, X86::VMAXCPSZrmb, TB_BCAST_SS }, 63 { X86::VMAXPDZ128rr, X86::VMAXPDZ128rmb, TB_BCAST_SD }, 64 { X86::VMAXPDZ256rr, X86::VMAXPDZ256rmb, TB_BCAST_SD }, 65 { X86::VMAXPDZrr, X86::VMAXPDZrmb, TB_BCAST_SD }, 66 { X86::VMAXPSZ128rr, X86::VMAXPSZ128rmb, TB_BCAST_SS }, 67 { X86::VMAXPSZ256rr, X86::VMAXPSZ256rmb, TB_BCAST_SS }, 68 { X86::VMAXPSZrr, X86::VMAXPSZrmb, TB_BCAST_SS }, 69 { X86::VMINCPDZ128rr, X86::VMINCPDZ128rmb, TB_BCAST_SD }, 70 { X86::VMINCPDZ256rr, X86::VMINCPDZ256rmb, TB_BCAST_SD }, 71 { X86::VMINCPDZrr, X86::VMINCPDZrmb, TB_BCAST_SD }, 72 { X86::VMINCPSZ128rr, X86::VMINCPSZ128rmb, TB_BCAST_SS }, 73 { X86::VMINCPSZ256rr, X86::VMINCPSZ256rmb, TB_BCAST_SS }, 74 { X86::VMINCPSZrr, X86::VMINCPSZrmb, TB_BCAST_SS }, 75 { X86::VMINPDZ128rr, X86::VMINPDZ128rmb, TB_BCAST_SD }, 76 { X86::VMINPDZ256rr, X86::VMINPDZ256rmb, TB_BCAST_SD }, 77 { X86::VMINPDZrr, X86::VMINPDZrmb, TB_BCAST_SD }, 78 { X86::VMINPSZ128rr, X86::VMINPSZ128rmb, TB_BCAST_SS }, 79 { X86::VMINPSZ256rr, X86::VMINPSZ256rmb, TB_BCAST_SS }, 80 { X86::VMINPSZrr, X86::VMINPSZrmb, TB_BCAST_SS }, 81 { X86::VMULPDZ128rr, X86::VMULPDZ128rmb, TB_BCAST_SD }, 82 { X86::VMULPDZ256rr, X86::VMULPDZ256rmb, TB_BCAST_SD }, 83 { X86::VMULPDZrr, X86::VMULPDZrmb, TB_BCAST_SD }, 84 { X86::VMULPSZ128rr, X86::VMULPSZ128rmb, TB_BCAST_SS }, 85 { X86::VMULPSZ256rr, X86::VMULPSZ256rmb, TB_BCAST_SS }, 86 { X86::VMULPSZrr, X86::VMULPSZrmb, TB_BCAST_SS }, 87 { X86::VORPDZ128rr, X86::VORPDZ128rmb, TB_BCAST_SD }, 88 { X86::VORPDZ256rr, X86::VORPDZ256rmb, TB_BCAST_SD }, 89 { X86::VORPDZrr, X86::VORPDZrmb, TB_BCAST_SD }, 90 { X86::VORPSZ128rr, X86::VORPSZ128rmb, TB_BCAST_SS }, 91 { X86::VORPSZ256rr, X86::VORPSZ256rmb, TB_BCAST_SS }, 92 { X86::VORPSZrr, X86::VORPSZrmb, TB_BCAST_SS }, 93 { X86::VPADDDZ128rr, X86::VPADDDZ128rmb, TB_BCAST_D }, 94 { X86::VPADDDZ256rr, X86::VPADDDZ256rmb, TB_BCAST_D }, 95 { X86::VPADDDZrr, X86::VPADDDZrmb, TB_BCAST_D }, 96 { X86::VPADDQZ128rr, X86::VPADDQZ128rmb, TB_BCAST_Q }, 97 { X86::VPADDQZ256rr, X86::VPADDQZ256rmb, TB_BCAST_Q }, 98 { X86::VPADDQZrr, X86::VPADDQZrmb, TB_BCAST_Q }, 99 { X86::VPANDDZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D }, 100 { X86::VPANDDZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D }, 101 { X86::VPANDDZrr, X86::VPANDDZrmb, TB_BCAST_D }, 102 { X86::VPANDNDZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D }, 103 { X86::VPANDNDZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D }, 104 { X86::VPANDNDZrr, X86::VPANDNDZrmb, TB_BCAST_D }, 105 { X86::VPANDNQZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q }, 106 { X86::VPANDNQZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q }, 107 { X86::VPANDNQZrr, X86::VPANDNQZrmb, TB_BCAST_Q }, 108 { X86::VPANDQZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q }, 109 { X86::VPANDQZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q }, 110 { X86::VPANDQZrr, X86::VPANDQZrmb, TB_BCAST_Q }, 111 { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmib, TB_BCAST_D }, 112 { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmib, TB_BCAST_D }, 113 { X86::VPCMPDZrri, X86::VPCMPDZrmib, TB_BCAST_D }, 114 { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rmb, TB_BCAST_D }, 115 { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rmb, TB_BCAST_D }, 116 { X86::VPCMPEQDZrr, X86::VPCMPEQDZrmb, TB_BCAST_D }, 117 { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rmb, TB_BCAST_Q }, 118 { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rmb, TB_BCAST_Q }, 119 { X86::VPCMPEQQZrr, X86::VPCMPEQQZrmb, TB_BCAST_Q }, 120 { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rmb, TB_BCAST_D }, 121 { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rmb, TB_BCAST_D }, 122 { X86::VPCMPGTDZrr, X86::VPCMPGTDZrmb, TB_BCAST_D }, 123 { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rmb, TB_BCAST_Q }, 124 { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rmb, TB_BCAST_Q }, 125 { X86::VPCMPGTQZrr, X86::VPCMPGTQZrmb, TB_BCAST_Q }, 126 { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmib, TB_BCAST_Q }, 127 { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmib, TB_BCAST_Q }, 128 { X86::VPCMPQZrri, X86::VPCMPQZrmib, TB_BCAST_Q }, 129 { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmib, TB_BCAST_D }, 130 { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmib, TB_BCAST_D }, 131 { X86::VPCMPUDZrri, X86::VPCMPUDZrmib, TB_BCAST_D }, 132 { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmib, TB_BCAST_Q }, 133 { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmib, TB_BCAST_Q }, 134 { X86::VPCMPUQZrri, X86::VPCMPUQZrmib, TB_BCAST_Q }, 135 { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rmb, TB_BCAST_D }, 136 { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rmb, TB_BCAST_D }, 137 { X86::VPMAXSDZrr, X86::VPMAXSDZrmb, TB_BCAST_D }, 138 { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rmb, TB_BCAST_Q }, 139 { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rmb, TB_BCAST_Q }, 140 { X86::VPMAXSQZrr, X86::VPMAXSQZrmb, TB_BCAST_Q }, 141 { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rmb, TB_BCAST_D }, 142 { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rmb, TB_BCAST_D }, 143 { X86::VPMAXUDZrr, X86::VPMAXUDZrmb, TB_BCAST_D }, 144 { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rmb, TB_BCAST_Q }, 145 { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rmb, TB_BCAST_Q }, 146 { X86::VPMAXUQZrr, X86::VPMAXUQZrmb, TB_BCAST_Q }, 147 { X86::VPMINSDZ128rr, X86::VPMINSDZ128rmb, TB_BCAST_D }, 148 { X86::VPMINSDZ256rr, X86::VPMINSDZ256rmb, TB_BCAST_D }, 149 { X86::VPMINSDZrr, X86::VPMINSDZrmb, TB_BCAST_D }, 150 { X86::VPMINSQZ128rr, X86::VPMINSQZ128rmb, TB_BCAST_Q }, 151 { X86::VPMINSQZ256rr, X86::VPMINSQZ256rmb, TB_BCAST_Q }, 152 { X86::VPMINSQZrr, X86::VPMINSQZrmb, TB_BCAST_Q }, 153 { X86::VPMINUDZ128rr, X86::VPMINUDZ128rmb, TB_BCAST_D }, 154 { X86::VPMINUDZ256rr, X86::VPMINUDZ256rmb, TB_BCAST_D }, 155 { X86::VPMINUDZrr, X86::VPMINUDZrmb, TB_BCAST_D }, 156 { X86::VPMINUQZ128rr, X86::VPMINUQZ128rmb, TB_BCAST_Q }, 157 { X86::VPMINUQZ256rr, X86::VPMINUQZ256rmb, TB_BCAST_Q }, 158 { X86::VPMINUQZrr, X86::VPMINUQZrmb, TB_BCAST_Q }, 159 { X86::VPMULLDZ128rr, X86::VPMULLDZ128rmb, TB_BCAST_D }, 160 { X86::VPMULLDZ256rr, X86::VPMULLDZ256rmb, TB_BCAST_D }, 161 { X86::VPMULLDZrr, X86::VPMULLDZrmb, TB_BCAST_D }, 162 { X86::VPMULLQZ128rr, X86::VPMULLQZ128rmb, TB_BCAST_Q }, 163 { X86::VPMULLQZ256rr, X86::VPMULLQZ256rmb, TB_BCAST_Q }, 164 { X86::VPMULLQZrr, X86::VPMULLQZrmb, TB_BCAST_Q }, 165 { X86::VPORDZ128rr, X86::VPORDZ128rmb, TB_BCAST_D }, 166 { X86::VPORDZ256rr, X86::VPORDZ256rmb, TB_BCAST_D }, 167 { X86::VPORDZrr, X86::VPORDZrmb, TB_BCAST_D }, 168 { X86::VPORQZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q }, 169 { X86::VPORQZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q }, 170 { X86::VPORQZrr, X86::VPORQZrmb, TB_BCAST_Q }, 171 { X86::VPTESTMDZ128rr, X86::VPTESTMDZ128rmb, TB_BCAST_D }, 172 { X86::VPTESTMDZ256rr, X86::VPTESTMDZ256rmb, TB_BCAST_D }, 173 { X86::VPTESTMDZrr, X86::VPTESTMDZrmb, TB_BCAST_D }, 174 { X86::VPTESTMQZ128rr, X86::VPTESTMQZ128rmb, TB_BCAST_Q }, 175 { X86::VPTESTMQZ256rr, X86::VPTESTMQZ256rmb, TB_BCAST_Q }, 176 { X86::VPTESTMQZrr, X86::VPTESTMQZrmb, TB_BCAST_Q }, 177 { X86::VPTESTNMDZ128rr,X86::VPTESTNMDZ128rmb,TB_BCAST_D }, 178 { X86::VPTESTNMDZ256rr,X86::VPTESTNMDZ256rmb,TB_BCAST_D }, 179 { X86::VPTESTNMDZrr, X86::VPTESTNMDZrmb, TB_BCAST_D }, 180 { X86::VPTESTNMQZ128rr,X86::VPTESTNMQZ128rmb,TB_BCAST_Q }, 181 { X86::VPTESTNMQZ256rr,X86::VPTESTNMQZ256rmb,TB_BCAST_Q }, 182 { X86::VPTESTNMQZrr, X86::VPTESTNMQZrmb, TB_BCAST_Q }, 183 { X86::VPXORDZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D }, 184 { X86::VPXORDZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D }, 185 { X86::VPXORDZrr, X86::VPXORDZrmb, TB_BCAST_D }, 186 { X86::VPXORQZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q }, 187 { X86::VPXORQZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q }, 188 { X86::VPXORQZrr, X86::VPXORQZrmb, TB_BCAST_Q }, 189 { X86::VSUBPDZ128rr, X86::VSUBPDZ128rmb, TB_BCAST_SD }, 190 { X86::VSUBPDZ256rr, X86::VSUBPDZ256rmb, TB_BCAST_SD }, 191 { X86::VSUBPDZrr, X86::VSUBPDZrmb, TB_BCAST_SD }, 192 { X86::VSUBPSZ128rr, X86::VSUBPSZ128rmb, TB_BCAST_SS }, 193 { X86::VSUBPSZ256rr, X86::VSUBPSZ256rmb, TB_BCAST_SS }, 194 { X86::VSUBPSZrr, X86::VSUBPSZrmb, TB_BCAST_SS }, 195 { X86::VXORPDZ128rr, X86::VXORPDZ128rmb, TB_BCAST_SD }, 196 { X86::VXORPDZ256rr, X86::VXORPDZ256rmb, TB_BCAST_SD }, 197 { X86::VXORPDZrr, X86::VXORPDZrmb, TB_BCAST_SD }, 198 { X86::VXORPSZ128rr, X86::VXORPSZ128rmb, TB_BCAST_SS }, 199 { X86::VXORPSZ256rr, X86::VXORPSZ256rmb, TB_BCAST_SS }, 200 { X86::VXORPSZrr, X86::VXORPSZrmb, TB_BCAST_SS }, 201 }; 202 203 static const X86MemoryFoldTableEntry BroadcastFoldTable3[] = { 204 { X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128mb, TB_BCAST_SD }, 205 { X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256mb, TB_BCAST_SD }, 206 { X86::VFMADD132PDZr, X86::VFMADD132PDZmb, TB_BCAST_SD }, 207 { X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128mb, TB_BCAST_SS }, 208 { X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256mb, TB_BCAST_SS }, 209 { X86::VFMADD132PSZr, X86::VFMADD132PSZmb, TB_BCAST_SS }, 210 { X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128mb, TB_BCAST_SD }, 211 { X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256mb, TB_BCAST_SD }, 212 { X86::VFMADD213PDZr, X86::VFMADD213PDZmb, TB_BCAST_SD }, 213 { X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128mb, TB_BCAST_SS }, 214 { X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256mb, TB_BCAST_SS }, 215 { X86::VFMADD213PSZr, X86::VFMADD213PSZmb, TB_BCAST_SS }, 216 { X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128mb, TB_BCAST_SD }, 217 { X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256mb, TB_BCAST_SD }, 218 { X86::VFMADD231PDZr, X86::VFMADD231PDZmb, TB_BCAST_SD }, 219 { X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128mb, TB_BCAST_SS }, 220 { X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256mb, TB_BCAST_SS }, 221 { X86::VFMADD231PSZr, X86::VFMADD231PSZmb, TB_BCAST_SS }, 222 { X86::VFMADDSUB132PDZ128r, X86::VFMADDSUB132PDZ128mb, TB_BCAST_SD }, 223 { X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256mb, TB_BCAST_SD }, 224 { X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZmb, TB_BCAST_SD }, 225 { X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128mb, TB_BCAST_SS }, 226 { X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256mb, TB_BCAST_SS }, 227 { X86::VFMADDSUB132PSZr, X86::VFMADDSUB132PSZmb, TB_BCAST_SS }, 228 { X86::VFMADDSUB213PDZ128r, X86::VFMADDSUB213PDZ128mb, TB_BCAST_SD }, 229 { X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256mb, TB_BCAST_SD }, 230 { X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZmb, TB_BCAST_SD }, 231 { X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128mb, TB_BCAST_SS }, 232 { X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256mb, TB_BCAST_SS }, 233 { X86::VFMADDSUB213PSZr, X86::VFMADDSUB213PSZmb, TB_BCAST_SS }, 234 { X86::VFMADDSUB231PDZ128r, X86::VFMADDSUB231PDZ128mb, TB_BCAST_SD }, 235 { X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256mb, TB_BCAST_SD }, 236 { X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZmb, TB_BCAST_SD }, 237 { X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128mb, TB_BCAST_SS }, 238 { X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256mb, TB_BCAST_SS }, 239 { X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZmb, TB_BCAST_SS }, 240 { X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128mb, TB_BCAST_SD }, 241 { X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256mb, TB_BCAST_SD }, 242 { X86::VFMSUB132PDZr, X86::VFMSUB132PDZmb, TB_BCAST_SD }, 243 { X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128mb, TB_BCAST_SS }, 244 { X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256mb, TB_BCAST_SS }, 245 { X86::VFMSUB132PSZr, X86::VFMSUB132PSZmb, TB_BCAST_SS }, 246 { X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128mb, TB_BCAST_SD }, 247 { X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256mb, TB_BCAST_SD }, 248 { X86::VFMSUB213PDZr, X86::VFMSUB213PDZmb, TB_BCAST_SD }, 249 { X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128mb, TB_BCAST_SS }, 250 { X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256mb, TB_BCAST_SS }, 251 { X86::VFMSUB213PSZr, X86::VFMSUB213PSZmb, TB_BCAST_SS }, 252 { X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128mb, TB_BCAST_SD }, 253 { X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256mb, TB_BCAST_SD }, 254 { X86::VFMSUB231PDZr, X86::VFMSUB231PDZmb, TB_BCAST_SD }, 255 { X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128mb, TB_BCAST_SS }, 256 { X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256mb, TB_BCAST_SS }, 257 { X86::VFMSUB231PSZr, X86::VFMSUB231PSZmb, TB_BCAST_SS }, 258 { X86::VFMSUBADD132PDZ128r, X86::VFMSUBADD132PDZ128mb, TB_BCAST_SD }, 259 { X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256mb, TB_BCAST_SD }, 260 { X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZmb, TB_BCAST_SD }, 261 { X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128mb, TB_BCAST_SS }, 262 { X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256mb, TB_BCAST_SS }, 263 { X86::VFMSUBADD132PSZr, X86::VFMSUBADD132PSZmb, TB_BCAST_SS }, 264 { X86::VFMSUBADD213PDZ128r, X86::VFMSUBADD213PDZ128mb, TB_BCAST_SD }, 265 { X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256mb, TB_BCAST_SD }, 266 { X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZmb, TB_BCAST_SD }, 267 { X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128mb, TB_BCAST_SS }, 268 { X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256mb, TB_BCAST_SS }, 269 { X86::VFMSUBADD213PSZr, X86::VFMSUBADD213PSZmb, TB_BCAST_SS }, 270 { X86::VFMSUBADD231PDZ128r, X86::VFMSUBADD231PDZ128mb, TB_BCAST_SD }, 271 { X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256mb, TB_BCAST_SD }, 272 { X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZmb, TB_BCAST_SD }, 273 { X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128mb, TB_BCAST_SS }, 274 { X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256mb, TB_BCAST_SS }, 275 { X86::VFMSUBADD231PSZr, X86::VFMSUBADD231PSZmb, TB_BCAST_SS }, 276 { X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128mb, TB_BCAST_SD }, 277 { X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256mb, TB_BCAST_SD }, 278 { X86::VFNMADD132PDZr, X86::VFNMADD132PDZmb, TB_BCAST_SD }, 279 { X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128mb, TB_BCAST_SS }, 280 { X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256mb, TB_BCAST_SS }, 281 { X86::VFNMADD132PSZr, X86::VFNMADD132PSZmb, TB_BCAST_SS }, 282 { X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128mb, TB_BCAST_SD }, 283 { X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256mb, TB_BCAST_SD }, 284 { X86::VFNMADD213PDZr, X86::VFNMADD213PDZmb, TB_BCAST_SD }, 285 { X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128mb, TB_BCAST_SS }, 286 { X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256mb, TB_BCAST_SS }, 287 { X86::VFNMADD213PSZr, X86::VFNMADD213PSZmb, TB_BCAST_SS }, 288 { X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128mb, TB_BCAST_SD }, 289 { X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256mb, TB_BCAST_SD }, 290 { X86::VFNMADD231PDZr, X86::VFNMADD231PDZmb, TB_BCAST_SD }, 291 { X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128mb, TB_BCAST_SS }, 292 { X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256mb, TB_BCAST_SS }, 293 { X86::VFNMADD231PSZr, X86::VFNMADD231PSZmb, TB_BCAST_SS }, 294 { X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128mb, TB_BCAST_SD }, 295 { X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256mb, TB_BCAST_SD }, 296 { X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZmb, TB_BCAST_SD }, 297 { X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128mb, TB_BCAST_SS }, 298 { X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256mb, TB_BCAST_SS }, 299 { X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZmb, TB_BCAST_SS }, 300 { X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128mb, TB_BCAST_SD }, 301 { X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256mb, TB_BCAST_SD }, 302 { X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZmb, TB_BCAST_SD }, 303 { X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128mb, TB_BCAST_SS }, 304 { X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256mb, TB_BCAST_SS }, 305 { X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZmb, TB_BCAST_SS }, 306 { X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128mb, TB_BCAST_SD }, 307 { X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256mb, TB_BCAST_SD }, 308 { X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZmb, TB_BCAST_SD }, 309 { X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128mb, TB_BCAST_SS }, 310 { X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256mb, TB_BCAST_SS }, 311 { X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZmb, TB_BCAST_SS }, 312 { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmbi, TB_BCAST_D }, 313 { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmbi, TB_BCAST_D }, 314 { X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D }, 315 { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q }, 316 { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q }, 317 { X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q }, 318 }; 319 320 // Table to map instructions safe to broadcast using a different width from the 321 // element width. 322 static const X86MemoryFoldTableEntry BroadcastSizeFoldTable2[] = { 323 { X86::VANDNPDZ128rr, X86::VANDNPSZ128rmb, TB_BCAST_SS }, 324 { X86::VANDNPDZ256rr, X86::VANDNPSZ256rmb, TB_BCAST_SS }, 325 { X86::VANDNPDZrr, X86::VANDNPSZrmb, TB_BCAST_SS }, 326 { X86::VANDNPSZ128rr, X86::VANDNPDZ128rmb, TB_BCAST_SD }, 327 { X86::VANDNPSZ256rr, X86::VANDNPDZ256rmb, TB_BCAST_SD }, 328 { X86::VANDNPSZrr, X86::VANDNPDZrmb, TB_BCAST_SD }, 329 { X86::VANDPDZ128rr, X86::VANDPSZ128rmb, TB_BCAST_SS }, 330 { X86::VANDPDZ256rr, X86::VANDPSZ256rmb, TB_BCAST_SS }, 331 { X86::VANDPDZrr, X86::VANDPSZrmb, TB_BCAST_SS }, 332 { X86::VANDPSZ128rr, X86::VANDPDZ128rmb, TB_BCAST_SD }, 333 { X86::VANDPSZ256rr, X86::VANDPDZ256rmb, TB_BCAST_SD }, 334 { X86::VANDPSZrr, X86::VANDPDZrmb, TB_BCAST_SD }, 335 { X86::VORPDZ128rr, X86::VORPSZ128rmb, TB_BCAST_SS }, 336 { X86::VORPDZ256rr, X86::VORPSZ256rmb, TB_BCAST_SS }, 337 { X86::VORPDZrr, X86::VORPSZrmb, TB_BCAST_SS }, 338 { X86::VORPSZ128rr, X86::VORPDZ128rmb, TB_BCAST_SD }, 339 { X86::VORPSZ256rr, X86::VORPDZ256rmb, TB_BCAST_SD }, 340 { X86::VORPSZrr, X86::VORPDZrmb, TB_BCAST_SD }, 341 { X86::VPANDDZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q }, 342 { X86::VPANDDZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q }, 343 { X86::VPANDDZrr, X86::VPANDQZrmb, TB_BCAST_Q }, 344 { X86::VPANDNDZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q }, 345 { X86::VPANDNDZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q }, 346 { X86::VPANDNDZrr, X86::VPANDNQZrmb, TB_BCAST_Q }, 347 { X86::VPANDNQZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D }, 348 { X86::VPANDNQZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D }, 349 { X86::VPANDNQZrr, X86::VPANDNDZrmb, TB_BCAST_D }, 350 { X86::VPANDQZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D }, 351 { X86::VPANDQZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D }, 352 { X86::VPANDQZrr, X86::VPANDDZrmb, TB_BCAST_D }, 353 { X86::VPORDZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q }, 354 { X86::VPORDZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q }, 355 { X86::VPORDZrr, X86::VPORQZrmb, TB_BCAST_Q }, 356 { X86::VPORQZ128rr, X86::VPORDZ128rmb, TB_BCAST_D }, 357 { X86::VPORQZ256rr, X86::VPORDZ256rmb, TB_BCAST_D }, 358 { X86::VPORQZrr, X86::VPORDZrmb, TB_BCAST_D }, 359 { X86::VPXORDZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q }, 360 { X86::VPXORDZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q }, 361 { X86::VPXORDZrr, X86::VPXORQZrmb, TB_BCAST_Q }, 362 { X86::VPXORQZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D }, 363 { X86::VPXORQZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D }, 364 { X86::VPXORQZrr, X86::VPXORDZrmb, TB_BCAST_D }, 365 { X86::VXORPDZ128rr, X86::VXORPSZ128rmb, TB_BCAST_SS }, 366 { X86::VXORPDZ256rr, X86::VXORPSZ256rmb, TB_BCAST_SS }, 367 { X86::VXORPDZrr, X86::VXORPSZrmb, TB_BCAST_SS }, 368 { X86::VXORPSZ128rr, X86::VXORPDZ128rmb, TB_BCAST_SD }, 369 { X86::VXORPSZ256rr, X86::VXORPDZ256rmb, TB_BCAST_SD }, 370 { X86::VXORPSZrr, X86::VXORPDZrmb, TB_BCAST_SD }, 371 }; 372 373 static const X86MemoryFoldTableEntry BroadcastSizeFoldTable3[] = { 374 { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q }, 375 { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q }, 376 { X86::VPTERNLOGDZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q }, 377 { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGDZ128rmbi, TB_BCAST_D }, 378 { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGDZ256rmbi, TB_BCAST_D }, 379 { X86::VPTERNLOGQZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D }, 380 }; 381 382 static const X86MemoryFoldTableEntry * 383 lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) { 384 #ifndef NDEBUG 385 // Make sure the tables are sorted. 386 static std::atomic<bool> FoldTablesChecked(false); 387 if (!FoldTablesChecked.load(std::memory_order_relaxed)) { 388 assert(llvm::is_sorted(MemoryFoldTable2Addr) && 389 std::adjacent_find(std::begin(MemoryFoldTable2Addr), 390 std::end(MemoryFoldTable2Addr)) == 391 std::end(MemoryFoldTable2Addr) && 392 "MemoryFoldTable2Addr is not sorted and unique!"); 393 assert(llvm::is_sorted(MemoryFoldTable0) && 394 std::adjacent_find(std::begin(MemoryFoldTable0), 395 std::end(MemoryFoldTable0)) == 396 std::end(MemoryFoldTable0) && 397 "MemoryFoldTable0 is not sorted and unique!"); 398 assert(llvm::is_sorted(MemoryFoldTable1) && 399 std::adjacent_find(std::begin(MemoryFoldTable1), 400 std::end(MemoryFoldTable1)) == 401 std::end(MemoryFoldTable1) && 402 "MemoryFoldTable1 is not sorted and unique!"); 403 assert(llvm::is_sorted(MemoryFoldTable2) && 404 std::adjacent_find(std::begin(MemoryFoldTable2), 405 std::end(MemoryFoldTable2)) == 406 std::end(MemoryFoldTable2) && 407 "MemoryFoldTable2 is not sorted and unique!"); 408 assert(llvm::is_sorted(MemoryFoldTable3) && 409 std::adjacent_find(std::begin(MemoryFoldTable3), 410 std::end(MemoryFoldTable3)) == 411 std::end(MemoryFoldTable3) && 412 "MemoryFoldTable3 is not sorted and unique!"); 413 assert(llvm::is_sorted(MemoryFoldTable4) && 414 std::adjacent_find(std::begin(MemoryFoldTable4), 415 std::end(MemoryFoldTable4)) == 416 std::end(MemoryFoldTable4) && 417 "MemoryFoldTable4 is not sorted and unique!"); 418 assert(llvm::is_sorted(BroadcastFoldTable2) && 419 std::adjacent_find(std::begin(BroadcastFoldTable2), 420 std::end(BroadcastFoldTable2)) == 421 std::end(BroadcastFoldTable2) && 422 "BroadcastFoldTable2 is not sorted and unique!"); 423 assert(llvm::is_sorted(BroadcastFoldTable3) && 424 std::adjacent_find(std::begin(BroadcastFoldTable3), 425 std::end(BroadcastFoldTable3)) == 426 std::end(BroadcastFoldTable3) && 427 "BroadcastFoldTable3 is not sorted and unique!"); 428 assert(llvm::is_sorted(BroadcastSizeFoldTable2) && 429 std::adjacent_find(std::begin(BroadcastSizeFoldTable2), 430 std::end(BroadcastSizeFoldTable2)) == 431 std::end(BroadcastSizeFoldTable2) && 432 "BroadcastSizeFoldTable2 is not sorted and unique!"); 433 assert(llvm::is_sorted(BroadcastSizeFoldTable3) && 434 std::adjacent_find(std::begin(BroadcastSizeFoldTable3), 435 std::end(BroadcastSizeFoldTable3)) == 436 std::end(BroadcastSizeFoldTable3) && 437 "BroadcastSizeFoldTable3 is not sorted and unique!"); 438 FoldTablesChecked.store(true, std::memory_order_relaxed); 439 } 440 #endif 441 442 const X86MemoryFoldTableEntry *Data = llvm::lower_bound(Table, RegOp); 443 if (Data != Table.end() && Data->KeyOp == RegOp && 444 !(Data->Flags & TB_NO_FORWARD)) 445 return Data; 446 return nullptr; 447 } 448 449 const X86MemoryFoldTableEntry * 450 llvm::lookupTwoAddrFoldTable(unsigned RegOp) { 451 return lookupFoldTableImpl(MemoryFoldTable2Addr, RegOp); 452 } 453 454 const X86MemoryFoldTableEntry * 455 llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) { 456 ArrayRef<X86MemoryFoldTableEntry> FoldTable; 457 if (OpNum == 0) 458 FoldTable = ArrayRef(MemoryFoldTable0); 459 else if (OpNum == 1) 460 FoldTable = ArrayRef(MemoryFoldTable1); 461 else if (OpNum == 2) 462 FoldTable = ArrayRef(MemoryFoldTable2); 463 else if (OpNum == 3) 464 FoldTable = ArrayRef(MemoryFoldTable3); 465 else if (OpNum == 4) 466 FoldTable = ArrayRef(MemoryFoldTable4); 467 else 468 return nullptr; 469 470 return lookupFoldTableImpl(FoldTable, RegOp); 471 } 472 473 namespace { 474 475 // This class stores the memory unfolding tables. It is instantiated as a 476 // function scope static variable to lazily init the unfolding table. 477 struct X86MemUnfoldTable { 478 // Stores memory unfolding tables entries sorted by opcode. 479 std::vector<X86MemoryFoldTableEntry> Table; 480 481 X86MemUnfoldTable() { 482 for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable2Addr) 483 // Index 0, folded load and store, no alignment requirement. 484 addTableEntry(Entry, TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); 485 486 for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable0) 487 // Index 0, mix of loads and stores. 488 addTableEntry(Entry, TB_INDEX_0); 489 490 for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable1) 491 // Index 1, folded load 492 addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD); 493 494 for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable2) 495 // Index 2, folded load 496 addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD); 497 498 for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable3) 499 // Index 3, folded load 500 addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD); 501 502 for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable4) 503 // Index 4, folded load 504 addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD); 505 506 // Broadcast tables. 507 for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable2) 508 // Index 2, folded broadcast 509 addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD | TB_FOLDED_BCAST); 510 511 for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable3) 512 // Index 3, folded broadcast 513 addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD | TB_FOLDED_BCAST); 514 515 // Sort the memory->reg unfold table. 516 array_pod_sort(Table.begin(), Table.end()); 517 518 // Now that it's sorted, ensure its unique. 519 assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() && 520 "Memory unfolding table is not unique!"); 521 } 522 523 void addTableEntry(const X86MemoryFoldTableEntry &Entry, 524 uint16_t ExtraFlags) { 525 // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it. 526 if ((Entry.Flags & TB_NO_REVERSE) == 0) 527 Table.push_back({Entry.DstOp, Entry.KeyOp, 528 static_cast<uint16_t>(Entry.Flags | ExtraFlags) }); 529 } 530 }; 531 } 532 533 const X86MemoryFoldTableEntry * 534 llvm::lookupUnfoldTable(unsigned MemOp) { 535 static X86MemUnfoldTable MemUnfoldTable; 536 auto &Table = MemUnfoldTable.Table; 537 auto I = llvm::lower_bound(Table, MemOp); 538 if (I != Table.end() && I->KeyOp == MemOp) 539 return &*I; 540 return nullptr; 541 } 542 543 namespace { 544 545 // This class stores the memory -> broadcast folding tables. It is instantiated 546 // as a function scope static variable to lazily init the folding table. 547 struct X86MemBroadcastFoldTable { 548 // Stores memory broadcast folding tables entries sorted by opcode. 549 std::vector<X86MemoryFoldTableEntry> Table; 550 551 X86MemBroadcastFoldTable() { 552 // Broadcast tables. 553 for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastFoldTable2) { 554 unsigned RegOp = Reg2Bcst.KeyOp; 555 unsigned BcstOp = Reg2Bcst.DstOp; 556 if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) { 557 unsigned MemOp = Reg2Mem->DstOp; 558 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | 559 TB_FOLDED_LOAD | TB_FOLDED_BCAST; 560 Table.push_back({MemOp, BcstOp, Flags}); 561 } 562 } 563 for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastSizeFoldTable2) { 564 unsigned RegOp = Reg2Bcst.KeyOp; 565 unsigned BcstOp = Reg2Bcst.DstOp; 566 if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) { 567 unsigned MemOp = Reg2Mem->DstOp; 568 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | 569 TB_FOLDED_LOAD | TB_FOLDED_BCAST; 570 Table.push_back({MemOp, BcstOp, Flags}); 571 } 572 } 573 574 for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastFoldTable3) { 575 unsigned RegOp = Reg2Bcst.KeyOp; 576 unsigned BcstOp = Reg2Bcst.DstOp; 577 if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) { 578 unsigned MemOp = Reg2Mem->DstOp; 579 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | 580 TB_FOLDED_LOAD | TB_FOLDED_BCAST; 581 Table.push_back({MemOp, BcstOp, Flags}); 582 } 583 } 584 for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastSizeFoldTable3) { 585 unsigned RegOp = Reg2Bcst.KeyOp; 586 unsigned BcstOp = Reg2Bcst.DstOp; 587 if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) { 588 unsigned MemOp = Reg2Mem->DstOp; 589 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | 590 TB_FOLDED_LOAD | TB_FOLDED_BCAST; 591 Table.push_back({MemOp, BcstOp, Flags}); 592 } 593 } 594 595 // Sort the memory->broadcast fold table. 596 array_pod_sort(Table.begin(), Table.end()); 597 } 598 }; 599 } // namespace 600 601 static bool matchBroadcastSize(const X86MemoryFoldTableEntry &Entry, 602 unsigned BroadcastBits) { 603 switch (Entry.Flags & TB_BCAST_MASK) { 604 case TB_BCAST_SD: 605 case TB_BCAST_Q: 606 return BroadcastBits == 64; 607 case TB_BCAST_SS: 608 case TB_BCAST_D: 609 return BroadcastBits == 32; 610 } 611 return false; 612 } 613 614 const X86MemoryFoldTableEntry * 615 llvm::lookupBroadcastFoldTable(unsigned MemOp, unsigned BroadcastBits) { 616 static X86MemBroadcastFoldTable MemBroadcastFoldTable; 617 auto &Table = MemBroadcastFoldTable.Table; 618 for (auto I = llvm::lower_bound(Table, MemOp); 619 I != Table.end() && I->KeyOp == MemOp; ++I) { 620 if (matchBroadcastSize(*I, BroadcastBits)) 621 return &*I; 622 } 623 return nullptr; 624 } 625