xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVecCompiler.td (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1//===- X86InstrVecCompiler.td - Vector Compiler Patterns ---*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the various vector pseudo instructions used by the
10// compiler, as well as Pat patterns used during instruction selection.
11//
12//===----------------------------------------------------------------------===//
13
14//===----------------------------------------------------------------------===//
15//  Non-instruction patterns
16//===----------------------------------------------------------------------===//
17
18let Predicates = [NoAVX512] in {
19  // A vector extract of the first f32/f64 position is a subregister copy
20  def : Pat<(f16 (extractelt (v8f16 VR128:$src), (iPTR 0))),
21            (COPY_TO_REGCLASS (v8f16 VR128:$src), FR16)>;
22  def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
23            (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
24  def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
25            (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
26}
27
28let Predicates = [HasAVX512] in {
29  // A vector extract of the first f32/f64 position is a subregister copy
30  def : Pat<(f16 (extractelt (v8f16 VR128X:$src), (iPTR 0))),
31            (COPY_TO_REGCLASS (v8f16 VR128X:$src), FR16X)>;
32  def : Pat<(f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
33            (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X)>;
34  def : Pat<(f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))),
35            (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X)>;
36}
37
38let Predicates = [NoVLX] in {
39  def : Pat<(v8f16 (scalar_to_vector FR16:$src)),
40            (COPY_TO_REGCLASS FR16:$src, VR128)>;
41  // Implicitly promote a 32-bit scalar to a vector.
42  def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
43            (COPY_TO_REGCLASS FR32:$src, VR128)>;
44  // Implicitly promote a 64-bit scalar to a vector.
45  def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
46            (COPY_TO_REGCLASS FR64:$src, VR128)>;
47}
48
49let Predicates = [HasVLX] in {
50  def : Pat<(v8f16 (scalar_to_vector FR16X:$src)),
51            (COPY_TO_REGCLASS FR16X:$src, VR128X)>;
52  // Implicitly promote a 32-bit scalar to a vector.
53  def : Pat<(v4f32 (scalar_to_vector FR32X:$src)),
54            (COPY_TO_REGCLASS FR32X:$src, VR128X)>;
55  // Implicitly promote a 64-bit scalar to a vector.
56  def : Pat<(v2f64 (scalar_to_vector FR64X:$src)),
57            (COPY_TO_REGCLASS FR64X:$src, VR128X)>;
58}
59
60//===----------------------------------------------------------------------===//
61// Subvector tricks
62//===----------------------------------------------------------------------===//
63
64// Patterns for insert_subvector/extract_subvector to/from index=0
65multiclass subvector_subreg_lowering<RegisterClass subRC, ValueType subVT,
66                                     RegisterClass RC, ValueType VT,
67                                     SubRegIndex subIdx> {
68  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
69            (subVT (EXTRACT_SUBREG RC:$src, subIdx))>;
70
71  def : Pat<(VT (insert_subvector undef_or_freeze_undef, subRC:$src, (iPTR 0))),
72            (VT (INSERT_SUBREG (IMPLICIT_DEF), subRC:$src, subIdx))>;
73}
74
75// A 128-bit subvector extract from the first 256-bit vector position is a
76// subregister copy that needs no instruction. Likewise, a 128-bit subvector
77// insert to the first 256-bit vector position is a subregister copy that needs
78// no instruction.
79defm : subvector_subreg_lowering<VR128, v4i32, VR256, v8i32,  sub_xmm>;
80defm : subvector_subreg_lowering<VR128, v4f32, VR256, v8f32,  sub_xmm>;
81defm : subvector_subreg_lowering<VR128, v2i64, VR256, v4i64,  sub_xmm>;
82defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64,  sub_xmm>;
83defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>;
84defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8,  sub_xmm>;
85defm : subvector_subreg_lowering<VR128, v8f16, VR256, v16f16, sub_xmm>;
86defm : subvector_subreg_lowering<VR128, v8bf16, VR256, v16bf16, sub_xmm>;
87
88// A 128-bit subvector extract from the first 512-bit vector position is a
89// subregister copy that needs no instruction. Likewise, a 128-bit subvector
90// insert to the first 512-bit vector position is a subregister copy that needs
91// no instruction.
92defm : subvector_subreg_lowering<VR128, v4i32, VR512, v16i32, sub_xmm>;
93defm : subvector_subreg_lowering<VR128, v4f32, VR512, v16f32, sub_xmm>;
94defm : subvector_subreg_lowering<VR128, v2i64, VR512, v8i64,  sub_xmm>;
95defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64,  sub_xmm>;
96defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>;
97defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8,  sub_xmm>;
98defm : subvector_subreg_lowering<VR128, v8f16, VR512, v32f16, sub_xmm>;
99defm : subvector_subreg_lowering<VR128, v8bf16, VR512, v32bf16, sub_xmm>;
100
101// A 128-bit subvector extract from the first 512-bit vector position is a
102// subregister copy that needs no instruction. Likewise, a 128-bit subvector
103// insert to the first 512-bit vector position is a subregister copy that needs
104// no instruction.
105defm : subvector_subreg_lowering<VR256, v8i32,  VR512, v16i32, sub_ymm>;
106defm : subvector_subreg_lowering<VR256, v8f32,  VR512, v16f32, sub_ymm>;
107defm : subvector_subreg_lowering<VR256, v4i64,  VR512, v8i64,  sub_ymm>;
108defm : subvector_subreg_lowering<VR256, v4f64,  VR512, v8f64,  sub_ymm>;
109defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
110defm : subvector_subreg_lowering<VR256, v32i8,  VR512, v64i8,  sub_ymm>;
111defm : subvector_subreg_lowering<VR256, v16f16, VR512, v32f16, sub_ymm>;
112defm : subvector_subreg_lowering<VR256, v16bf16, VR512, v32bf16, sub_ymm>;
113
114
115// If we're inserting into an all zeros vector, just use a plain move which
116// will zero the upper bits. A post-isel hook will take care of removing
117// any moves that we can prove are unnecessary.
118multiclass subvec_zero_lowering<string MoveStr,
119                                RegisterClass RC, ValueType DstTy,
120                                ValueType SrcTy, SubRegIndex SubIdx> {
121  def : Pat<(DstTy (insert_subvector immAllZerosV,
122                                     (SrcTy RC:$src), (iPTR 0))),
123            (SUBREG_TO_REG (i64 0),
124             (SrcTy (!cast<Instruction>("VMOV"#MoveStr#"rr") RC:$src)), SubIdx)>;
125}
126
127let Predicates = [HasAVX, NoVLX] in {
128  defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, sub_xmm>;
129  defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, sub_xmm>;
130  defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, sub_xmm>;
131  defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, sub_xmm>;
132  defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, sub_xmm>;
133  defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, sub_xmm>;
134}
135
136let Predicates = [HasAVXNECONVERT, NoVLX] in
137  defm : subvec_zero_lowering<"DQA", VR128, v16bf16, v8bf16, sub_xmm>;
138
139let Predicates = [HasVLX] in {
140  defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, sub_xmm>;
141  defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, sub_xmm>;
142  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, sub_xmm>;
143  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, sub_xmm>;
144  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, sub_xmm>;
145  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, sub_xmm>;
146
147  defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, sub_xmm>;
148  defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, sub_xmm>;
149  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, sub_xmm>;
150  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, sub_xmm>;
151  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, sub_xmm>;
152  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, sub_xmm>;
153
154  defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, sub_ymm>;
155  defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, sub_ymm>;
156  defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, sub_ymm>;
157  defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, sub_ymm>;
158  defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, sub_ymm>;
159  defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, sub_ymm>;
160}
161
162let Predicates = [HasAVX512, NoVLX] in {
163  defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, sub_xmm>;
164  defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, sub_xmm>;
165  defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, sub_xmm>;
166  defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, sub_xmm>;
167  defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, sub_xmm>;
168  defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, sub_xmm>;
169
170  defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, sub_ymm>;
171  defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, sub_ymm>;
172  defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, sub_ymm>;
173  defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, sub_ymm>;
174  defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, sub_ymm>;
175  defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, sub_ymm>;
176}
177
178let Predicates = [HasFP16, HasVLX] in {
179  defm : subvec_zero_lowering<"APSZ128", VR128X, v16f16, v8f16, sub_xmm>;
180  defm : subvec_zero_lowering<"APSZ128", VR128X, v32f16, v8f16, sub_xmm>;
181  defm : subvec_zero_lowering<"APSZ256", VR256X, v32f16, v16f16, sub_ymm>;
182}
183
184let Predicates = [HasBF16, HasVLX] in {
185  defm : subvec_zero_lowering<"APSZ128", VR128X, v16bf16, v8bf16, sub_xmm>;
186  defm : subvec_zero_lowering<"APSZ128", VR128X, v32bf16, v8bf16, sub_xmm>;
187  defm : subvec_zero_lowering<"APSZ256", VR256X, v32bf16, v16bf16, sub_ymm>;
188}
189
190class maskzeroupper<ValueType vt, RegisterClass RC> :
191  PatLeaf<(vt RC:$src), [{
192    return isMaskZeroExtended(N);
193  }]>;
194
195def maskzeroupperv1i1  : maskzeroupper<v1i1,  VK1>;
196def maskzeroupperv2i1  : maskzeroupper<v2i1,  VK2>;
197def maskzeroupperv4i1  : maskzeroupper<v4i1,  VK4>;
198def maskzeroupperv8i1  : maskzeroupper<v8i1,  VK8>;
199def maskzeroupperv16i1 : maskzeroupper<v16i1, VK16>;
200def maskzeroupperv32i1 : maskzeroupper<v32i1, VK32>;
201
202// The patterns determine if we can depend on the upper bits of a mask register
203// being zeroed by the previous operation so that we can skip explicit
204// zeroing.
205let Predicates = [HasBWI] in {
206  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
207                                     maskzeroupperv1i1:$src, (iPTR 0))),
208            (COPY_TO_REGCLASS VK1:$src, VK32)>;
209  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
210                                     maskzeroupperv8i1:$src, (iPTR 0))),
211            (COPY_TO_REGCLASS VK8:$src, VK32)>;
212  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
213                                     maskzeroupperv16i1:$src, (iPTR 0))),
214            (COPY_TO_REGCLASS VK16:$src, VK32)>;
215
216  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
217                                     maskzeroupperv1i1:$src, (iPTR 0))),
218            (COPY_TO_REGCLASS VK1:$src, VK64)>;
219  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
220                                     maskzeroupperv8i1:$src, (iPTR 0))),
221            (COPY_TO_REGCLASS VK8:$src, VK64)>;
222  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
223                                     maskzeroupperv16i1:$src, (iPTR 0))),
224            (COPY_TO_REGCLASS VK16:$src, VK64)>;
225  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
226                                     maskzeroupperv32i1:$src, (iPTR 0))),
227            (COPY_TO_REGCLASS VK32:$src, VK64)>;
228}
229
230let Predicates = [HasAVX512] in {
231  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
232                                     maskzeroupperv1i1:$src, (iPTR 0))),
233            (COPY_TO_REGCLASS VK1:$src, VK16)>;
234  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
235                                     maskzeroupperv8i1:$src, (iPTR 0))),
236            (COPY_TO_REGCLASS VK8:$src, VK16)>;
237}
238
239let Predicates = [HasDQI] in {
240  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
241                                    maskzeroupperv1i1:$src, (iPTR 0))),
242            (COPY_TO_REGCLASS VK1:$src, VK8)>;
243}
244
245let Predicates = [HasVLX, HasDQI] in {
246  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
247                                    maskzeroupperv2i1:$src, (iPTR 0))),
248            (COPY_TO_REGCLASS VK2:$src, VK8)>;
249  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
250                                    maskzeroupperv4i1:$src, (iPTR 0))),
251            (COPY_TO_REGCLASS VK4:$src, VK8)>;
252}
253
254let Predicates = [HasVLX] in {
255  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
256                                     maskzeroupperv2i1:$src, (iPTR 0))),
257            (COPY_TO_REGCLASS VK2:$src, VK16)>;
258  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
259                                     maskzeroupperv4i1:$src, (iPTR 0))),
260            (COPY_TO_REGCLASS VK4:$src, VK16)>;
261}
262
263let Predicates = [HasBWI, HasVLX] in {
264  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
265                                     maskzeroupperv2i1:$src, (iPTR 0))),
266            (COPY_TO_REGCLASS VK2:$src, VK32)>;
267  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
268                                     maskzeroupperv4i1:$src, (iPTR 0))),
269            (COPY_TO_REGCLASS VK4:$src, VK32)>;
270  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
271                                     maskzeroupperv2i1:$src, (iPTR 0))),
272            (COPY_TO_REGCLASS VK2:$src, VK64)>;
273  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
274                                     maskzeroupperv4i1:$src, (iPTR 0))),
275            (COPY_TO_REGCLASS VK4:$src, VK64)>;
276}
277
278// If the bits are not zero we have to fall back to explicitly zeroing by
279// using shifts.
280let Predicates = [HasAVX512] in {
281  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
282                                     (v1i1 VK1:$mask), (iPTR 0))),
283            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK1:$mask, VK16),
284                                    (i8 15)), (i8 15))>;
285
286  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
287                                     (v2i1 VK2:$mask), (iPTR 0))),
288            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16),
289                                    (i8 14)), (i8 14))>;
290
291  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
292                                     (v4i1 VK4:$mask), (iPTR 0))),
293            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16),
294                                    (i8 12)), (i8 12))>;
295}
296
297let Predicates = [HasAVX512, NoDQI] in {
298  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
299                                     (v8i1 VK8:$mask), (iPTR 0))),
300            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16),
301                                    (i8 8)), (i8 8))>;
302}
303
304let Predicates = [HasDQI] in {
305  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
306                                     (v8i1 VK8:$mask), (iPTR 0))),
307            (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>;
308
309  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
310                                    (v1i1 VK1:$mask), (iPTR 0))),
311            (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK1:$mask, VK8),
312                                    (i8 7)), (i8 7))>;
313  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
314                                    (v2i1 VK2:$mask), (iPTR 0))),
315            (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8),
316                                    (i8 6)), (i8 6))>;
317  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
318                                    (v4i1 VK4:$mask), (iPTR 0))),
319            (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK4:$mask, VK8),
320                                    (i8 4)), (i8 4))>;
321}
322
323let Predicates = [HasBWI] in {
324  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
325                                     (v16i1 VK16:$mask), (iPTR 0))),
326            (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK32)>;
327
328  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
329                                     (v16i1 VK16:$mask), (iPTR 0))),
330            (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK64)>;
331  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
332                                     (v32i1 VK32:$mask), (iPTR 0))),
333            (COPY_TO_REGCLASS (KMOVDkk VK32:$mask), VK64)>;
334}
335
336let Predicates = [HasBWI, NoDQI] in {
337  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
338                                     (v8i1 VK8:$mask), (iPTR 0))),
339            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32),
340                                    (i8 24)), (i8 24))>;
341
342  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
343                                     (v8i1 VK8:$mask), (iPTR 0))),
344            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64),
345                                    (i8 56)), (i8 56))>;
346}
347
348let Predicates = [HasBWI, HasDQI] in {
349  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
350                                     (v8i1 VK8:$mask), (iPTR 0))),
351            (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK32)>;
352
353  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
354                                     (v8i1 VK8:$mask), (iPTR 0))),
355            (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>;
356}
357
358let Predicates = [HasBWI] in {
359  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
360                                     (v1i1 VK1:$mask), (iPTR 0))),
361            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK1:$mask, VK32),
362                                    (i8 31)), (i8 31))>;
363  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
364                                     (v2i1 VK2:$mask), (iPTR 0))),
365            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK2:$mask, VK32),
366                                    (i8 30)), (i8 30))>;
367  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
368                                     (v4i1 VK4:$mask), (iPTR 0))),
369            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK4:$mask, VK32),
370                                    (i8 28)), (i8 28))>;
371
372  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
373                                     (v1i1 VK1:$mask), (iPTR 0))),
374            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK1:$mask, VK64),
375                                    (i8 63)), (i8 63))>;
376  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
377                                     (v2i1 VK2:$mask), (iPTR 0))),
378            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK2:$mask, VK64),
379                                    (i8 62)), (i8 62))>;
380  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
381                                     (v4i1 VK4:$mask), (iPTR 0))),
382            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
383                                    (i8 60)), (i8 60))>;
384}
385
386//===----------------------------------------------------------------------===//
387// Extra selection patterns for f128, f128mem
388
389// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
390let Predicates = [NoAVX] in {
391def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
392          (MOVAPSmr addr:$dst, VR128:$src)>;
393def : Pat<(store (f128 VR128:$src), addr:$dst),
394          (MOVUPSmr addr:$dst, VR128:$src)>;
395
396def : Pat<(alignedloadf128 addr:$src),
397          (MOVAPSrm addr:$src)>;
398def : Pat<(loadf128 addr:$src),
399          (MOVUPSrm addr:$src)>;
400}
401
402let Predicates = [HasAVX, NoVLX] in {
403def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
404          (VMOVAPSmr addr:$dst, VR128:$src)>;
405def : Pat<(store (f128 VR128:$src), addr:$dst),
406          (VMOVUPSmr addr:$dst, VR128:$src)>;
407
408def : Pat<(alignedloadf128 addr:$src),
409          (VMOVAPSrm addr:$src)>;
410def : Pat<(loadf128 addr:$src),
411          (VMOVUPSrm addr:$src)>;
412}
413
414let Predicates = [HasVLX] in {
415def : Pat<(alignedstore (f128 VR128X:$src), addr:$dst),
416          (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
417def : Pat<(store (f128 VR128X:$src), addr:$dst),
418          (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
419
420def : Pat<(alignedloadf128 addr:$src),
421          (VMOVAPSZ128rm addr:$src)>;
422def : Pat<(loadf128 addr:$src),
423          (VMOVUPSZ128rm addr:$src)>;
424}
425
426let Predicates = [UseSSE1] in {
427// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
428def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
429          (ANDPSrm VR128:$src1, f128mem:$src2)>;
430
431def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
432          (ANDPSrr VR128:$src1, VR128:$src2)>;
433
434def : Pat<(f128 (X86for VR128:$src1, (memopf128 addr:$src2))),
435          (ORPSrm VR128:$src1, f128mem:$src2)>;
436
437def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
438          (ORPSrr VR128:$src1, VR128:$src2)>;
439
440def : Pat<(f128 (X86fxor VR128:$src1, (memopf128 addr:$src2))),
441          (XORPSrm VR128:$src1, f128mem:$src2)>;
442
443def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
444          (XORPSrr VR128:$src1, VR128:$src2)>;
445}
446
447let Predicates = [HasAVX, NoVLX] in {
448// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
449def : Pat<(f128 (X86fand VR128:$src1, (loadf128 addr:$src2))),
450          (VANDPSrm VR128:$src1, f128mem:$src2)>;
451
452def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
453          (VANDPSrr VR128:$src1, VR128:$src2)>;
454
455def : Pat<(f128 (X86for VR128:$src1, (loadf128 addr:$src2))),
456          (VORPSrm VR128:$src1, f128mem:$src2)>;
457
458def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
459          (VORPSrr VR128:$src1, VR128:$src2)>;
460
461def : Pat<(f128 (X86fxor VR128:$src1, (loadf128 addr:$src2))),
462          (VXORPSrm VR128:$src1, f128mem:$src2)>;
463
464def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
465          (VXORPSrr VR128:$src1, VR128:$src2)>;
466}
467
468let Predicates = [HasVLX] in {
469// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
470def : Pat<(f128 (X86fand VR128X:$src1, (loadf128 addr:$src2))),
471          (VANDPSZ128rm VR128X:$src1, f128mem:$src2)>;
472
473def : Pat<(f128 (X86fand VR128X:$src1, VR128X:$src2)),
474          (VANDPSZ128rr VR128X:$src1, VR128X:$src2)>;
475
476def : Pat<(f128 (X86for VR128X:$src1, (loadf128 addr:$src2))),
477          (VORPSZ128rm VR128X:$src1, f128mem:$src2)>;
478
479def : Pat<(f128 (X86for VR128X:$src1, VR128X:$src2)),
480          (VORPSZ128rr VR128X:$src1, VR128X:$src2)>;
481
482def : Pat<(f128 (X86fxor VR128X:$src1, (loadf128 addr:$src2))),
483          (VXORPSZ128rm VR128X:$src1, f128mem:$src2)>;
484
485def : Pat<(f128 (X86fxor VR128X:$src1, VR128X:$src2)),
486          (VXORPSZ128rr VR128X:$src1, VR128X:$src2)>;
487}
488