xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86FixupInstTuning.cpp (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 //===-- X86FixupInstTunings.cpp - replace instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file does a tuning pass replacing slower machine instructions
10 // with faster ones. We do this here, as opposed to during normal ISel, as
11 // attempting to get the "right" instruction can break patterns. This pass
12 // is not meant search for special cases where an instruction can be transformed
13 // to another, it is only meant to do transformations where the old instruction
14 // is always replacable with the new instructions. For example:
15 //
16 //      `vpermq ymm` -> `vshufd ymm`
17 //          -- BAD, not always valid (lane cross/non-repeated mask)
18 //
19 //      `vpermilps ymm` -> `vshufd ymm`
20 //          -- GOOD, always replaceable
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #include "X86.h"
25 #include "X86InstrInfo.h"
26 #include "X86Subtarget.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "x86-fixup-inst-tuning"
35 
36 STATISTIC(NumInstChanges, "Number of instructions changes");
37 
38 namespace {
39 class X86FixupInstTuningPass : public MachineFunctionPass {
40 public:
41   static char ID;
42 
43   X86FixupInstTuningPass() : MachineFunctionPass(ID) {}
44 
45   StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; }
46 
47   bool runOnMachineFunction(MachineFunction &MF) override;
48   bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
49                           MachineBasicBlock::iterator &I);
50 
51   // This pass runs after regalloc and doesn't support VReg operands.
52   MachineFunctionProperties getRequiredProperties() const override {
53     return MachineFunctionProperties().set(
54         MachineFunctionProperties::Property::NoVRegs);
55   }
56 
57 private:
58   const X86InstrInfo *TII = nullptr;
59   const X86Subtarget *ST = nullptr;
60   const MCSchedModel *SM = nullptr;
61 };
62 } // end anonymous namespace
63 
64 char X86FixupInstTuningPass::ID = 0;
65 
66 INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
67 
68 FunctionPass *llvm::createX86FixupInstTuning() {
69   return new X86FixupInstTuningPass();
70 }
71 
72 template <typename T>
73 static std::optional<bool> CmpOptionals(T NewVal, T CurVal) {
74   if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal)
75     return *NewVal < *CurVal;
76 
77   return std::nullopt;
78 }
79 
80 bool X86FixupInstTuningPass::processInstruction(
81     MachineFunction &MF, MachineBasicBlock &MBB,
82     MachineBasicBlock::iterator &I) {
83   MachineInstr &MI = *I;
84   unsigned Opc = MI.getOpcode();
85   unsigned NumOperands = MI.getDesc().getNumOperands();
86 
87   auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> {
88     // We already checked that SchedModel exists in `NewOpcPreferable`.
89     return MCSchedModel::getReciprocalThroughput(
90         *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
91   };
92 
93   auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> {
94     // We already checked that SchedModel exists in `NewOpcPreferable`.
95     return MCSchedModel::computeInstrLatency(
96         *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
97   };
98 
99   auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> {
100     if (unsigned Size = TII->get(Opcode).getSize())
101       return Size;
102     // Zero size means we where unable to compute it.
103     return std::nullopt;
104   };
105 
106   auto NewOpcPreferable = [&](unsigned NewOpc,
107                               bool ReplaceInTie = true) -> bool {
108     std::optional<bool> Res;
109     if (SM->hasInstrSchedModel()) {
110       // Compare tput -> lat -> code size.
111       Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc));
112       if (Res.has_value())
113         return *Res;
114 
115       Res = CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc));
116       if (Res.has_value())
117         return *Res;
118     }
119 
120     Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc));
121     if (Res.has_value())
122       return *Res;
123 
124     // We either have either were unable to get tput/lat/codesize or all values
125     // were equal. Return specified option for a tie.
126     return ReplaceInTie;
127   };
128 
129   // `vpermilpd r, i` -> `vshufpd r, r, i`
130   // `vpermilpd r, i, k` -> `vshufpd r, r, i, k`
131   // `vshufpd` is always as fast or faster than `vpermilpd` and takes
132   // 1 less byte of code size for VEX and EVEX encoding.
133   auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool {
134     if (!NewOpcPreferable(NewOpc))
135       return false;
136     unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
137     MI.removeOperand(NumOperands - 1);
138     MI.addOperand(MI.getOperand(NumOperands - 2));
139     MI.setDesc(TII->get(NewOpc));
140     MI.addOperand(MachineOperand::CreateImm(MaskImm));
141     return true;
142   };
143 
144   // `vpermilps r, i` -> `vshufps r, r, i`
145   // `vpermilps r, i, k` -> `vshufps r, r, i, k`
146   // `vshufps` is always as fast or faster than `vpermilps` and takes
147   // 1 less byte of code size for VEX and EVEX encoding.
148   auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool {
149     if (!NewOpcPreferable(NewOpc))
150       return false;
151     unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
152     MI.removeOperand(NumOperands - 1);
153     MI.addOperand(MI.getOperand(NumOperands - 2));
154     MI.setDesc(TII->get(NewOpc));
155     MI.addOperand(MachineOperand::CreateImm(MaskImm));
156     return true;
157   };
158 
159   // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles.
160   // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less
161   // byte of code size.
162   auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool {
163     // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as
164     // `vpshufd` saves a byte of code size.
165     if (!ST->hasNoDomainDelayShuffle() ||
166         !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
167       return false;
168     MI.setDesc(TII->get(NewOpc));
169     return true;
170   };
171 
172   // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00`
173   // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff`
174   // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00`
175   // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff`
176   // `vunpcklpd r, m` -> `vunpcklqdq r, m, k`
177   // `vunpckhpd r, m` -> `vunpckhqdq r, m, k`
178   // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k`
179   // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k`
180   // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd`
181   //        -> `vunpck{l|h}qdq`
182   // 2) If `vshufpd` faster than `vunpck{l|h}pd`
183   //        -> `vshufpd`
184   //
185   // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay)
186   auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool {
187     if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
188       return false;
189 
190     MI.setDesc(TII->get(NewOpc));
191     MI.addOperand(MachineOperand::CreateImm(MaskImm));
192     return true;
193   };
194 
195   auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool {
196     // TODO it may be worth it to set ReplaceInTie to `true` as there is no real
197     // downside to the integer unpck, but if someone doesn't specify exact
198     // target we won't find it faster.
199     if (!ST->hasNoDomainDelayShuffle() ||
200         !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
201       return false;
202     MI.setDesc(TII->get(NewOpc));
203     return true;
204   };
205 
206   auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain,
207                                unsigned NewOpc) -> bool {
208     if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
209       return true;
210     return ProcessUNPCK(NewOpc, 0x00);
211   };
212   auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain,
213                                unsigned NewOpc) -> bool {
214     if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
215       return true;
216     return ProcessUNPCK(NewOpc, 0xff);
217   };
218 
219   auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool {
220     return ProcessUNPCKToIntDomain(NewOpcIntDomain);
221   };
222 
223   auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {
224     return ProcessUNPCKToIntDomain(NewOpc);
225   };
226 
227   switch (Opc) {
228   case X86::VPERMILPDri:
229     return ProcessVPERMILPDri(X86::VSHUFPDrri);
230   case X86::VPERMILPDYri:
231     return ProcessVPERMILPDri(X86::VSHUFPDYrri);
232   case X86::VPERMILPDZ128ri:
233     return ProcessVPERMILPDri(X86::VSHUFPDZ128rri);
234   case X86::VPERMILPDZ256ri:
235     return ProcessVPERMILPDri(X86::VSHUFPDZ256rri);
236   case X86::VPERMILPDZri:
237     return ProcessVPERMILPDri(X86::VSHUFPDZrri);
238   case X86::VPERMILPDZ128rikz:
239     return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz);
240   case X86::VPERMILPDZ256rikz:
241     return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz);
242   case X86::VPERMILPDZrikz:
243     return ProcessVPERMILPDri(X86::VSHUFPDZrrikz);
244   case X86::VPERMILPDZ128rik:
245     return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik);
246   case X86::VPERMILPDZ256rik:
247     return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik);
248   case X86::VPERMILPDZrik:
249     return ProcessVPERMILPDri(X86::VSHUFPDZrrik);
250 
251   case X86::VPERMILPSri:
252     return ProcessVPERMILPSri(X86::VSHUFPSrri);
253   case X86::VPERMILPSYri:
254     return ProcessVPERMILPSri(X86::VSHUFPSYrri);
255   case X86::VPERMILPSZ128ri:
256     return ProcessVPERMILPSri(X86::VSHUFPSZ128rri);
257   case X86::VPERMILPSZ256ri:
258     return ProcessVPERMILPSri(X86::VSHUFPSZ256rri);
259   case X86::VPERMILPSZri:
260     return ProcessVPERMILPSri(X86::VSHUFPSZrri);
261   case X86::VPERMILPSZ128rikz:
262     return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz);
263   case X86::VPERMILPSZ256rikz:
264     return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz);
265   case X86::VPERMILPSZrikz:
266     return ProcessVPERMILPSri(X86::VSHUFPSZrrikz);
267   case X86::VPERMILPSZ128rik:
268     return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik);
269   case X86::VPERMILPSZ256rik:
270     return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik);
271   case X86::VPERMILPSZrik:
272     return ProcessVPERMILPSri(X86::VSHUFPSZrrik);
273   case X86::VPERMILPSmi:
274     return ProcessVPERMILPSmi(X86::VPSHUFDmi);
275   case X86::VPERMILPSYmi:
276     // TODO: See if there is a more generic way we can test if the replacement
277     // instruction is supported.
278     return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false;
279   case X86::VPERMILPSZ128mi:
280     return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi);
281   case X86::VPERMILPSZ256mi:
282     return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi);
283   case X86::VPERMILPSZmi:
284     return ProcessVPERMILPSmi(X86::VPSHUFDZmi);
285   case X86::VPERMILPSZ128mikz:
286     return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz);
287   case X86::VPERMILPSZ256mikz:
288     return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz);
289   case X86::VPERMILPSZmikz:
290     return ProcessVPERMILPSmi(X86::VPSHUFDZmikz);
291   case X86::VPERMILPSZ128mik:
292     return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik);
293   case X86::VPERMILPSZ256mik:
294     return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);
295   case X86::VPERMILPSZmik:
296     return ProcessVPERMILPSmi(X86::VPSHUFDZmik);
297 
298   case X86::MOVLHPSrr:
299   case X86::UNPCKLPDrr:
300     return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);
301   case X86::VMOVLHPSrr:
302   case X86::VUNPCKLPDrr:
303     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri);
304   case X86::VUNPCKLPDYrr:
305     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri);
306     // VMOVLHPS is always 128 bits.
307   case X86::VMOVLHPSZrr:
308   case X86::VUNPCKLPDZ128rr:
309     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri);
310   case X86::VUNPCKLPDZ256rr:
311     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri);
312   case X86::VUNPCKLPDZrr:
313     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri);
314   case X86::VUNPCKLPDZ128rrk:
315     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik);
316   case X86::VUNPCKLPDZ256rrk:
317     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik);
318   case X86::VUNPCKLPDZrrk:
319     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik);
320   case X86::VUNPCKLPDZ128rrkz:
321     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
322   case X86::VUNPCKLPDZ256rrkz:
323     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
324   case X86::VUNPCKLPDZrrkz:
325     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz);
326   case X86::UNPCKHPDrr:
327     return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri);
328   case X86::VUNPCKHPDrr:
329     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri);
330   case X86::VUNPCKHPDYrr:
331     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri);
332   case X86::VUNPCKHPDZ128rr:
333     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri);
334   case X86::VUNPCKHPDZ256rr:
335     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri);
336   case X86::VUNPCKHPDZrr:
337     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri);
338   case X86::VUNPCKHPDZ128rrk:
339     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik);
340   case X86::VUNPCKHPDZ256rrk:
341     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik);
342   case X86::VUNPCKHPDZrrk:
343     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik);
344   case X86::VUNPCKHPDZ128rrkz:
345     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
346   case X86::VUNPCKHPDZ256rrkz:
347     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
348   case X86::VUNPCKHPDZrrkz:
349     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz);
350   case X86::UNPCKLPDrm:
351     return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm);
352   case X86::VUNPCKLPDrm:
353     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm);
354   case X86::VUNPCKLPDYrm:
355     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm);
356   case X86::VUNPCKLPDZ128rm:
357     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm);
358   case X86::VUNPCKLPDZ256rm:
359     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm);
360   case X86::VUNPCKLPDZrm:
361     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm);
362   case X86::VUNPCKLPDZ128rmk:
363     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk);
364   case X86::VUNPCKLPDZ256rmk:
365     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk);
366   case X86::VUNPCKLPDZrmk:
367     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk);
368   case X86::VUNPCKLPDZ128rmkz:
369     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz);
370   case X86::VUNPCKLPDZ256rmkz:
371     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz);
372   case X86::VUNPCKLPDZrmkz:
373     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz);
374   case X86::UNPCKHPDrm:
375     return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm);
376   case X86::VUNPCKHPDrm:
377     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm);
378   case X86::VUNPCKHPDYrm:
379     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm);
380   case X86::VUNPCKHPDZ128rm:
381     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm);
382   case X86::VUNPCKHPDZ256rm:
383     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm);
384   case X86::VUNPCKHPDZrm:
385     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm);
386   case X86::VUNPCKHPDZ128rmk:
387     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk);
388   case X86::VUNPCKHPDZ256rmk:
389     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk);
390   case X86::VUNPCKHPDZrmk:
391     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk);
392   case X86::VUNPCKHPDZ128rmkz:
393     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz);
394   case X86::VUNPCKHPDZ256rmkz:
395     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz);
396   case X86::VUNPCKHPDZrmkz:
397     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz);
398 
399   case X86::UNPCKLPSrr:
400     return ProcessUNPCKPS(X86::PUNPCKLDQrr);
401   case X86::VUNPCKLPSrr:
402     return ProcessUNPCKPS(X86::VPUNPCKLDQrr);
403   case X86::VUNPCKLPSYrr:
404     return ProcessUNPCKPS(X86::VPUNPCKLDQYrr);
405   case X86::VUNPCKLPSZ128rr:
406     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr);
407   case X86::VUNPCKLPSZ256rr:
408     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr);
409   case X86::VUNPCKLPSZrr:
410     return ProcessUNPCKPS(X86::VPUNPCKLDQZrr);
411   case X86::VUNPCKLPSZ128rrk:
412     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk);
413   case X86::VUNPCKLPSZ256rrk:
414     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk);
415   case X86::VUNPCKLPSZrrk:
416     return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk);
417   case X86::VUNPCKLPSZ128rrkz:
418     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz);
419   case X86::VUNPCKLPSZ256rrkz:
420     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz);
421   case X86::VUNPCKLPSZrrkz:
422     return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz);
423   case X86::UNPCKHPSrr:
424     return ProcessUNPCKPS(X86::PUNPCKHDQrr);
425   case X86::VUNPCKHPSrr:
426     return ProcessUNPCKPS(X86::VPUNPCKHDQrr);
427   case X86::VUNPCKHPSYrr:
428     return ProcessUNPCKPS(X86::VPUNPCKHDQYrr);
429   case X86::VUNPCKHPSZ128rr:
430     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr);
431   case X86::VUNPCKHPSZ256rr:
432     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr);
433   case X86::VUNPCKHPSZrr:
434     return ProcessUNPCKPS(X86::VPUNPCKHDQZrr);
435   case X86::VUNPCKHPSZ128rrk:
436     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk);
437   case X86::VUNPCKHPSZ256rrk:
438     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk);
439   case X86::VUNPCKHPSZrrk:
440     return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk);
441   case X86::VUNPCKHPSZ128rrkz:
442     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz);
443   case X86::VUNPCKHPSZ256rrkz:
444     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz);
445   case X86::VUNPCKHPSZrrkz:
446     return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz);
447   case X86::UNPCKLPSrm:
448     return ProcessUNPCKPS(X86::PUNPCKLDQrm);
449   case X86::VUNPCKLPSrm:
450     return ProcessUNPCKPS(X86::VPUNPCKLDQrm);
451   case X86::VUNPCKLPSYrm:
452     return ProcessUNPCKPS(X86::VPUNPCKLDQYrm);
453   case X86::VUNPCKLPSZ128rm:
454     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm);
455   case X86::VUNPCKLPSZ256rm:
456     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm);
457   case X86::VUNPCKLPSZrm:
458     return ProcessUNPCKPS(X86::VPUNPCKLDQZrm);
459   case X86::VUNPCKLPSZ128rmk:
460     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk);
461   case X86::VUNPCKLPSZ256rmk:
462     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk);
463   case X86::VUNPCKLPSZrmk:
464     return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk);
465   case X86::VUNPCKLPSZ128rmkz:
466     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz);
467   case X86::VUNPCKLPSZ256rmkz:
468     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz);
469   case X86::VUNPCKLPSZrmkz:
470     return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz);
471   case X86::UNPCKHPSrm:
472     return ProcessUNPCKPS(X86::PUNPCKHDQrm);
473   case X86::VUNPCKHPSrm:
474     return ProcessUNPCKPS(X86::VPUNPCKHDQrm);
475   case X86::VUNPCKHPSYrm:
476     return ProcessUNPCKPS(X86::VPUNPCKHDQYrm);
477   case X86::VUNPCKHPSZ128rm:
478     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm);
479   case X86::VUNPCKHPSZ256rm:
480     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm);
481   case X86::VUNPCKHPSZrm:
482     return ProcessUNPCKPS(X86::VPUNPCKHDQZrm);
483   case X86::VUNPCKHPSZ128rmk:
484     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk);
485   case X86::VUNPCKHPSZ256rmk:
486     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk);
487   case X86::VUNPCKHPSZrmk:
488     return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk);
489   case X86::VUNPCKHPSZ128rmkz:
490     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz);
491   case X86::VUNPCKHPSZ256rmkz:
492     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz);
493   case X86::VUNPCKHPSZrmkz:
494     return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz);
495   default:
496     return false;
497   }
498 }
499 
500 bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) {
501   LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";);
502   bool Changed = false;
503   ST = &MF.getSubtarget<X86Subtarget>();
504   TII = ST->getInstrInfo();
505   SM = &ST->getSchedModel();
506 
507   for (MachineBasicBlock &MBB : MF) {
508     for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
509       if (processInstruction(MF, MBB, I)) {
510         ++NumInstChanges;
511         Changed = true;
512       }
513     }
514   }
515   LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";);
516   return Changed;
517 }
518