1 //===-- X86FixupInstTunings.cpp - replace instructions -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file does a tuning pass replacing slower machine instructions 10 // with faster ones. We do this here, as opposed to during normal ISel, as 11 // attempting to get the "right" instruction can break patterns. This pass 12 // is not meant search for special cases where an instruction can be transformed 13 // to another, it is only meant to do transformations where the old instruction 14 // is always replacable with the new instructions. For example: 15 // 16 // `vpermq ymm` -> `vshufd ymm` 17 // -- BAD, not always valid (lane cross/non-repeated mask) 18 // 19 // `vpermilps ymm` -> `vshufd ymm` 20 // -- GOOD, always replaceable 21 // 22 //===----------------------------------------------------------------------===// 23 24 #include "X86.h" 25 #include "X86InstrInfo.h" 26 #include "X86Subtarget.h" 27 #include "llvm/ADT/Statistic.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "x86-fixup-inst-tuning" 35 36 STATISTIC(NumInstChanges, "Number of instructions changes"); 37 38 namespace { 39 class X86FixupInstTuningPass : public MachineFunctionPass { 40 public: 41 static char ID; 42 43 X86FixupInstTuningPass() : MachineFunctionPass(ID) {} 44 45 StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; } 46 47 bool runOnMachineFunction(MachineFunction &MF) override; 48 bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB, 49 MachineBasicBlock::iterator &I); 50 51 // This pass runs after regalloc and doesn't support VReg operands. 52 MachineFunctionProperties getRequiredProperties() const override { 53 return MachineFunctionProperties().set( 54 MachineFunctionProperties::Property::NoVRegs); 55 } 56 57 private: 58 const X86InstrInfo *TII = nullptr; 59 const X86Subtarget *ST = nullptr; 60 const MCSchedModel *SM = nullptr; 61 }; 62 } // end anonymous namespace 63 64 char X86FixupInstTuningPass::ID = 0; 65 66 INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false) 67 68 FunctionPass *llvm::createX86FixupInstTuning() { 69 return new X86FixupInstTuningPass(); 70 } 71 72 template <typename T> 73 static std::optional<bool> CmpOptionals(T NewVal, T CurVal) { 74 if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal) 75 return *NewVal < *CurVal; 76 77 return std::nullopt; 78 } 79 80 bool X86FixupInstTuningPass::processInstruction( 81 MachineFunction &MF, MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator &I) { 83 MachineInstr &MI = *I; 84 unsigned Opc = MI.getOpcode(); 85 unsigned NumOperands = MI.getDesc().getNumOperands(); 86 87 auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> { 88 // We already checked that SchedModel exists in `NewOpcPreferable`. 89 return MCSchedModel::getReciprocalThroughput( 90 *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass()))); 91 }; 92 93 auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> { 94 // We already checked that SchedModel exists in `NewOpcPreferable`. 95 return MCSchedModel::computeInstrLatency( 96 *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass()))); 97 }; 98 99 auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> { 100 if (unsigned Size = TII->get(Opcode).getSize()) 101 return Size; 102 // Zero size means we where unable to compute it. 103 return std::nullopt; 104 }; 105 106 auto NewOpcPreferable = [&](unsigned NewOpc, 107 bool ReplaceInTie = true) -> bool { 108 std::optional<bool> Res; 109 if (SM->hasInstrSchedModel()) { 110 // Compare tput -> lat -> code size. 111 Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc)); 112 if (Res.has_value()) 113 return *Res; 114 115 Res = CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc)); 116 if (Res.has_value()) 117 return *Res; 118 } 119 120 Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc)); 121 if (Res.has_value()) 122 return *Res; 123 124 // We either have either were unable to get tput/lat/codesize or all values 125 // were equal. Return specified option for a tie. 126 return ReplaceInTie; 127 }; 128 129 // `vpermilpd r, i` -> `vshufpd r, r, i` 130 // `vpermilpd r, i, k` -> `vshufpd r, r, i, k` 131 // `vshufpd` is always as fast or faster than `vpermilpd` and takes 132 // 1 less byte of code size for VEX and EVEX encoding. 133 auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool { 134 if (!NewOpcPreferable(NewOpc)) 135 return false; 136 unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm(); 137 MI.removeOperand(NumOperands - 1); 138 MI.addOperand(MI.getOperand(NumOperands - 2)); 139 MI.setDesc(TII->get(NewOpc)); 140 MI.addOperand(MachineOperand::CreateImm(MaskImm)); 141 return true; 142 }; 143 144 // `vpermilps r, i` -> `vshufps r, r, i` 145 // `vpermilps r, i, k` -> `vshufps r, r, i, k` 146 // `vshufps` is always as fast or faster than `vpermilps` and takes 147 // 1 less byte of code size for VEX and EVEX encoding. 148 auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool { 149 if (!NewOpcPreferable(NewOpc)) 150 return false; 151 unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm(); 152 MI.removeOperand(NumOperands - 1); 153 MI.addOperand(MI.getOperand(NumOperands - 2)); 154 MI.setDesc(TII->get(NewOpc)); 155 MI.addOperand(MachineOperand::CreateImm(MaskImm)); 156 return true; 157 }; 158 159 // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles. 160 // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less 161 // byte of code size. 162 auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool { 163 // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as 164 // `vpshufd` saves a byte of code size. 165 if (!ST->hasNoDomainDelayShuffle() || 166 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) 167 return false; 168 MI.setDesc(TII->get(NewOpc)); 169 return true; 170 }; 171 172 // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00` 173 // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff` 174 // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00` 175 // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff` 176 // `vunpcklpd r, m` -> `vunpcklqdq r, m, k` 177 // `vunpckhpd r, m` -> `vunpckhqdq r, m, k` 178 // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k` 179 // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k` 180 // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd` 181 // -> `vunpck{l|h}qdq` 182 // 2) If `vshufpd` faster than `vunpck{l|h}pd` 183 // -> `vshufpd` 184 // 185 // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay) 186 auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool { 187 if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) 188 return false; 189 190 MI.setDesc(TII->get(NewOpc)); 191 MI.addOperand(MachineOperand::CreateImm(MaskImm)); 192 return true; 193 }; 194 195 auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool { 196 // TODO it may be worth it to set ReplaceInTie to `true` as there is no real 197 // downside to the integer unpck, but if someone doesn't specify exact 198 // target we won't find it faster. 199 if (!ST->hasNoDomainDelayShuffle() || 200 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) 201 return false; 202 MI.setDesc(TII->get(NewOpc)); 203 return true; 204 }; 205 206 auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain, 207 unsigned NewOpc) -> bool { 208 if (ProcessUNPCKToIntDomain(NewOpcIntDomain)) 209 return true; 210 return ProcessUNPCK(NewOpc, 0x00); 211 }; 212 auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain, 213 unsigned NewOpc) -> bool { 214 if (ProcessUNPCKToIntDomain(NewOpcIntDomain)) 215 return true; 216 return ProcessUNPCK(NewOpc, 0xff); 217 }; 218 219 auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool { 220 return ProcessUNPCKToIntDomain(NewOpcIntDomain); 221 }; 222 223 auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool { 224 return ProcessUNPCKToIntDomain(NewOpc); 225 }; 226 227 switch (Opc) { 228 case X86::VPERMILPDri: 229 return ProcessVPERMILPDri(X86::VSHUFPDrri); 230 case X86::VPERMILPDYri: 231 return ProcessVPERMILPDri(X86::VSHUFPDYrri); 232 case X86::VPERMILPDZ128ri: 233 return ProcessVPERMILPDri(X86::VSHUFPDZ128rri); 234 case X86::VPERMILPDZ256ri: 235 return ProcessVPERMILPDri(X86::VSHUFPDZ256rri); 236 case X86::VPERMILPDZri: 237 return ProcessVPERMILPDri(X86::VSHUFPDZrri); 238 case X86::VPERMILPDZ128rikz: 239 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz); 240 case X86::VPERMILPDZ256rikz: 241 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz); 242 case X86::VPERMILPDZrikz: 243 return ProcessVPERMILPDri(X86::VSHUFPDZrrikz); 244 case X86::VPERMILPDZ128rik: 245 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik); 246 case X86::VPERMILPDZ256rik: 247 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik); 248 case X86::VPERMILPDZrik: 249 return ProcessVPERMILPDri(X86::VSHUFPDZrrik); 250 251 case X86::VPERMILPSri: 252 return ProcessVPERMILPSri(X86::VSHUFPSrri); 253 case X86::VPERMILPSYri: 254 return ProcessVPERMILPSri(X86::VSHUFPSYrri); 255 case X86::VPERMILPSZ128ri: 256 return ProcessVPERMILPSri(X86::VSHUFPSZ128rri); 257 case X86::VPERMILPSZ256ri: 258 return ProcessVPERMILPSri(X86::VSHUFPSZ256rri); 259 case X86::VPERMILPSZri: 260 return ProcessVPERMILPSri(X86::VSHUFPSZrri); 261 case X86::VPERMILPSZ128rikz: 262 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz); 263 case X86::VPERMILPSZ256rikz: 264 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz); 265 case X86::VPERMILPSZrikz: 266 return ProcessVPERMILPSri(X86::VSHUFPSZrrikz); 267 case X86::VPERMILPSZ128rik: 268 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik); 269 case X86::VPERMILPSZ256rik: 270 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik); 271 case X86::VPERMILPSZrik: 272 return ProcessVPERMILPSri(X86::VSHUFPSZrrik); 273 case X86::VPERMILPSmi: 274 return ProcessVPERMILPSmi(X86::VPSHUFDmi); 275 case X86::VPERMILPSYmi: 276 // TODO: See if there is a more generic way we can test if the replacement 277 // instruction is supported. 278 return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false; 279 case X86::VPERMILPSZ128mi: 280 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi); 281 case X86::VPERMILPSZ256mi: 282 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi); 283 case X86::VPERMILPSZmi: 284 return ProcessVPERMILPSmi(X86::VPSHUFDZmi); 285 case X86::VPERMILPSZ128mikz: 286 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz); 287 case X86::VPERMILPSZ256mikz: 288 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz); 289 case X86::VPERMILPSZmikz: 290 return ProcessVPERMILPSmi(X86::VPSHUFDZmikz); 291 case X86::VPERMILPSZ128mik: 292 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik); 293 case X86::VPERMILPSZ256mik: 294 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik); 295 case X86::VPERMILPSZmik: 296 return ProcessVPERMILPSmi(X86::VPSHUFDZmik); 297 298 case X86::MOVLHPSrr: 299 case X86::UNPCKLPDrr: 300 return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri); 301 case X86::VMOVLHPSrr: 302 case X86::VUNPCKLPDrr: 303 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri); 304 case X86::VUNPCKLPDYrr: 305 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri); 306 // VMOVLHPS is always 128 bits. 307 case X86::VMOVLHPSZrr: 308 case X86::VUNPCKLPDZ128rr: 309 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri); 310 case X86::VUNPCKLPDZ256rr: 311 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri); 312 case X86::VUNPCKLPDZrr: 313 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri); 314 case X86::VUNPCKLPDZ128rrk: 315 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik); 316 case X86::VUNPCKLPDZ256rrk: 317 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik); 318 case X86::VUNPCKLPDZrrk: 319 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik); 320 case X86::VUNPCKLPDZ128rrkz: 321 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz); 322 case X86::VUNPCKLPDZ256rrkz: 323 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz); 324 case X86::VUNPCKLPDZrrkz: 325 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz); 326 case X86::UNPCKHPDrr: 327 return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri); 328 case X86::VUNPCKHPDrr: 329 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri); 330 case X86::VUNPCKHPDYrr: 331 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri); 332 case X86::VUNPCKHPDZ128rr: 333 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri); 334 case X86::VUNPCKHPDZ256rr: 335 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri); 336 case X86::VUNPCKHPDZrr: 337 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri); 338 case X86::VUNPCKHPDZ128rrk: 339 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik); 340 case X86::VUNPCKHPDZ256rrk: 341 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik); 342 case X86::VUNPCKHPDZrrk: 343 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik); 344 case X86::VUNPCKHPDZ128rrkz: 345 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz); 346 case X86::VUNPCKHPDZ256rrkz: 347 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz); 348 case X86::VUNPCKHPDZrrkz: 349 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz); 350 case X86::UNPCKLPDrm: 351 return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm); 352 case X86::VUNPCKLPDrm: 353 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm); 354 case X86::VUNPCKLPDYrm: 355 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm); 356 case X86::VUNPCKLPDZ128rm: 357 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm); 358 case X86::VUNPCKLPDZ256rm: 359 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm); 360 case X86::VUNPCKLPDZrm: 361 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm); 362 case X86::VUNPCKLPDZ128rmk: 363 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk); 364 case X86::VUNPCKLPDZ256rmk: 365 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk); 366 case X86::VUNPCKLPDZrmk: 367 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk); 368 case X86::VUNPCKLPDZ128rmkz: 369 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz); 370 case X86::VUNPCKLPDZ256rmkz: 371 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz); 372 case X86::VUNPCKLPDZrmkz: 373 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz); 374 case X86::UNPCKHPDrm: 375 return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm); 376 case X86::VUNPCKHPDrm: 377 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm); 378 case X86::VUNPCKHPDYrm: 379 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm); 380 case X86::VUNPCKHPDZ128rm: 381 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm); 382 case X86::VUNPCKHPDZ256rm: 383 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm); 384 case X86::VUNPCKHPDZrm: 385 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm); 386 case X86::VUNPCKHPDZ128rmk: 387 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk); 388 case X86::VUNPCKHPDZ256rmk: 389 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk); 390 case X86::VUNPCKHPDZrmk: 391 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk); 392 case X86::VUNPCKHPDZ128rmkz: 393 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz); 394 case X86::VUNPCKHPDZ256rmkz: 395 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz); 396 case X86::VUNPCKHPDZrmkz: 397 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz); 398 399 case X86::UNPCKLPSrr: 400 return ProcessUNPCKPS(X86::PUNPCKLDQrr); 401 case X86::VUNPCKLPSrr: 402 return ProcessUNPCKPS(X86::VPUNPCKLDQrr); 403 case X86::VUNPCKLPSYrr: 404 return ProcessUNPCKPS(X86::VPUNPCKLDQYrr); 405 case X86::VUNPCKLPSZ128rr: 406 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr); 407 case X86::VUNPCKLPSZ256rr: 408 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr); 409 case X86::VUNPCKLPSZrr: 410 return ProcessUNPCKPS(X86::VPUNPCKLDQZrr); 411 case X86::VUNPCKLPSZ128rrk: 412 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk); 413 case X86::VUNPCKLPSZ256rrk: 414 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk); 415 case X86::VUNPCKLPSZrrk: 416 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk); 417 case X86::VUNPCKLPSZ128rrkz: 418 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz); 419 case X86::VUNPCKLPSZ256rrkz: 420 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz); 421 case X86::VUNPCKLPSZrrkz: 422 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz); 423 case X86::UNPCKHPSrr: 424 return ProcessUNPCKPS(X86::PUNPCKHDQrr); 425 case X86::VUNPCKHPSrr: 426 return ProcessUNPCKPS(X86::VPUNPCKHDQrr); 427 case X86::VUNPCKHPSYrr: 428 return ProcessUNPCKPS(X86::VPUNPCKHDQYrr); 429 case X86::VUNPCKHPSZ128rr: 430 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr); 431 case X86::VUNPCKHPSZ256rr: 432 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr); 433 case X86::VUNPCKHPSZrr: 434 return ProcessUNPCKPS(X86::VPUNPCKHDQZrr); 435 case X86::VUNPCKHPSZ128rrk: 436 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk); 437 case X86::VUNPCKHPSZ256rrk: 438 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk); 439 case X86::VUNPCKHPSZrrk: 440 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk); 441 case X86::VUNPCKHPSZ128rrkz: 442 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz); 443 case X86::VUNPCKHPSZ256rrkz: 444 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz); 445 case X86::VUNPCKHPSZrrkz: 446 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz); 447 case X86::UNPCKLPSrm: 448 return ProcessUNPCKPS(X86::PUNPCKLDQrm); 449 case X86::VUNPCKLPSrm: 450 return ProcessUNPCKPS(X86::VPUNPCKLDQrm); 451 case X86::VUNPCKLPSYrm: 452 return ProcessUNPCKPS(X86::VPUNPCKLDQYrm); 453 case X86::VUNPCKLPSZ128rm: 454 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm); 455 case X86::VUNPCKLPSZ256rm: 456 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm); 457 case X86::VUNPCKLPSZrm: 458 return ProcessUNPCKPS(X86::VPUNPCKLDQZrm); 459 case X86::VUNPCKLPSZ128rmk: 460 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk); 461 case X86::VUNPCKLPSZ256rmk: 462 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk); 463 case X86::VUNPCKLPSZrmk: 464 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk); 465 case X86::VUNPCKLPSZ128rmkz: 466 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz); 467 case X86::VUNPCKLPSZ256rmkz: 468 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz); 469 case X86::VUNPCKLPSZrmkz: 470 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz); 471 case X86::UNPCKHPSrm: 472 return ProcessUNPCKPS(X86::PUNPCKHDQrm); 473 case X86::VUNPCKHPSrm: 474 return ProcessUNPCKPS(X86::VPUNPCKHDQrm); 475 case X86::VUNPCKHPSYrm: 476 return ProcessUNPCKPS(X86::VPUNPCKHDQYrm); 477 case X86::VUNPCKHPSZ128rm: 478 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm); 479 case X86::VUNPCKHPSZ256rm: 480 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm); 481 case X86::VUNPCKHPSZrm: 482 return ProcessUNPCKPS(X86::VPUNPCKHDQZrm); 483 case X86::VUNPCKHPSZ128rmk: 484 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk); 485 case X86::VUNPCKHPSZ256rmk: 486 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk); 487 case X86::VUNPCKHPSZrmk: 488 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk); 489 case X86::VUNPCKHPSZ128rmkz: 490 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz); 491 case X86::VUNPCKHPSZ256rmkz: 492 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz); 493 case X86::VUNPCKHPSZrmkz: 494 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz); 495 default: 496 return false; 497 } 498 } 499 500 bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) { 501 LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";); 502 bool Changed = false; 503 ST = &MF.getSubtarget<X86Subtarget>(); 504 TII = ST->getInstrInfo(); 505 SM = &ST->getSchedModel(); 506 507 for (MachineBasicBlock &MBB : MF) { 508 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 509 if (processInstruction(MF, MBB, I)) { 510 ++NumInstChanges; 511 Changed = true; 512 } 513 } 514 } 515 LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";); 516 return Changed; 517 } 518