xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HexagonISelLowering.h"
10 #include "HexagonRegisterInfo.h"
11 #include "HexagonSubtarget.h"
12 #include "llvm/ADT/SetVector.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/Analysis/MemoryLocation.h"
15 #include "llvm/CodeGen/MachineBasicBlock.h"
16 #include "llvm/CodeGen/MachineFunction.h"
17 #include "llvm/CodeGen/MachineInstr.h"
18 #include "llvm/CodeGen/MachineOperand.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/IR/IntrinsicsHexagon.h"
22 #include "llvm/Support/CommandLine.h"
23 
24 #include <algorithm>
25 #include <string>
26 #include <utility>
27 
28 using namespace llvm;
29 
30 static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
31   cl::Hidden, cl::init(16),
32   cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33 
34 static const MVT LegalV64[] =  { MVT::v64i8,  MVT::v32i16,  MVT::v16i32 };
35 static const MVT LegalW64[] =  { MVT::v128i8, MVT::v64i16,  MVT::v32i32 };
36 static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16,  MVT::v32i32 };
37 static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
38 
getIEEEProperties(MVT Ty)39 static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
40   // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41   MVT ElemTy = Ty.getScalarType();
42   switch (ElemTy.SimpleTy) {
43     case MVT::f16:
44       return std::make_tuple(5, 15, 10);
45     case MVT::f32:
46       return std::make_tuple(8, 127, 23);
47     case MVT::f64:
48       return std::make_tuple(11, 1023, 52);
49     default:
50       break;
51   }
52   llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
53 }
54 
55 void
initializeHVXLowering()56 HexagonTargetLowering::initializeHVXLowering() {
57   if (Subtarget.useHVX64BOps()) {
58     addRegisterClass(MVT::v64i8,  &Hexagon::HvxVRRegClass);
59     addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
60     addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
61     addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
62     addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
63     addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
64     // These "short" boolean vector types should be legal because
65     // they will appear as results of vector compares. If they were
66     // not legal, type legalization would try to make them legal
67     // and that would require using operations that do not use or
68     // produce such types. That, in turn, would imply using custom
69     // nodes, which would be unoptimizable by the DAG combiner.
70     // The idea is to rely on target-independent operations as much
71     // as possible.
72     addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
73     addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
74     addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
75   } else if (Subtarget.useHVX128BOps()) {
76     addRegisterClass(MVT::v128i8,  &Hexagon::HvxVRRegClass);
77     addRegisterClass(MVT::v64i16,  &Hexagon::HvxVRRegClass);
78     addRegisterClass(MVT::v32i32,  &Hexagon::HvxVRRegClass);
79     addRegisterClass(MVT::v256i8,  &Hexagon::HvxWRRegClass);
80     addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
81     addRegisterClass(MVT::v64i32,  &Hexagon::HvxWRRegClass);
82     addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
83     addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
84     addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
85     if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
86       addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
87       addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
88       addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
89       addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
90     }
91   }
92 
93   // Set up operation actions.
94 
95   bool Use64b = Subtarget.useHVX64BOps();
96   ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
97   ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
98   MVT ByteV = Use64b ?  MVT::v64i8 : MVT::v128i8;
99   MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
100   MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
101 
102   auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
103     setOperationAction(Opc, FromTy, Promote);
104     AddPromotedToType(Opc, FromTy, ToTy);
105   };
106 
107   // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
108   // Note: v16i1 -> i16 is handled in type legalization instead of op
109   // legalization.
110   setOperationAction(ISD::BITCAST,              MVT::i16, Custom);
111   setOperationAction(ISD::BITCAST,              MVT::i32, Custom);
112   setOperationAction(ISD::BITCAST,              MVT::i64, Custom);
113   setOperationAction(ISD::BITCAST,            MVT::v16i1, Custom);
114   setOperationAction(ISD::BITCAST,           MVT::v128i1, Custom);
115   setOperationAction(ISD::BITCAST,             MVT::i128, Custom);
116   setOperationAction(ISD::VECTOR_SHUFFLE,          ByteV, Legal);
117   setOperationAction(ISD::VECTOR_SHUFFLE,          ByteW, Legal);
118   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
119 
120   if (Subtarget.useHVX128BOps())
121     setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
122   if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
123       Subtarget.useHVXFloatingPoint()) {
124 
125     static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
126     static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
127 
128     for (MVT T : FloatV) {
129       setOperationAction(ISD::FADD,              T, Legal);
130       setOperationAction(ISD::FSUB,              T, Legal);
131       setOperationAction(ISD::FMUL,              T, Legal);
132       setOperationAction(ISD::FMINIMUMNUM, T, Legal);
133       setOperationAction(ISD::FMAXIMUMNUM, T, Legal);
134 
135       setOperationAction(ISD::INSERT_SUBVECTOR,  T, Custom);
136       setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom);
137 
138       setOperationAction(ISD::SPLAT_VECTOR,      T, Legal);
139       setOperationAction(ISD::SPLAT_VECTOR,      T, Legal);
140 
141       setOperationAction(ISD::MLOAD,             T, Custom);
142       setOperationAction(ISD::MSTORE,            T, Custom);
143       // Custom-lower BUILD_VECTOR. The standard (target-independent)
144       // handling of it would convert it to a load, which is not always
145       // the optimal choice.
146       setOperationAction(ISD::BUILD_VECTOR,      T, Custom);
147     }
148 
149 
150     // BUILD_VECTOR with f16 operands cannot be promoted without
151     // promoting the result, so lower the node to vsplat or constant pool
152     setOperationAction(ISD::BUILD_VECTOR,      MVT::f16, Custom);
153     setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom);
154     setOperationAction(ISD::SPLAT_VECTOR,      MVT::f16, Custom);
155 
156     // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
157     // generated.
158     setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
159     setPromoteTo(ISD::VECTOR_SHUFFLE,  MVT::v64f16, ByteV);
160     setPromoteTo(ISD::VECTOR_SHUFFLE,  MVT::v64f32, ByteW);
161     setPromoteTo(ISD::VECTOR_SHUFFLE,  MVT::v32f32, ByteV);
162 
163     for (MVT P : FloatW) {
164       setOperationAction(ISD::LOAD,           P, Custom);
165       setOperationAction(ISD::STORE,          P, Custom);
166       setOperationAction(ISD::FADD,           P, Custom);
167       setOperationAction(ISD::FSUB,           P, Custom);
168       setOperationAction(ISD::FMUL,           P, Custom);
169       setOperationAction(ISD::FMINIMUMNUM, P, Custom);
170       setOperationAction(ISD::FMAXIMUMNUM, P, Custom);
171       setOperationAction(ISD::SETCC,          P, Custom);
172       setOperationAction(ISD::VSELECT,        P, Custom);
173 
174       // Custom-lower BUILD_VECTOR. The standard (target-independent)
175       // handling of it would convert it to a load, which is not always
176       // the optimal choice.
177       setOperationAction(ISD::BUILD_VECTOR,   P, Custom);
178       // Make concat-vectors custom to handle concats of more than 2 vectors.
179       setOperationAction(ISD::CONCAT_VECTORS, P, Custom);
180 
181       setOperationAction(ISD::MLOAD,          P, Custom);
182       setOperationAction(ISD::MSTORE,         P, Custom);
183     }
184 
185     if (Subtarget.useHVXQFloatOps()) {
186       setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
187       setOperationAction(ISD::FP_ROUND,  MVT::v64f16, Legal);
188     } else if (Subtarget.useHVXIEEEFPOps()) {
189       setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
190       setOperationAction(ISD::FP_ROUND,  MVT::v64f16, Legal);
191     }
192   }
193 
194   for (MVT T : LegalV) {
195     setIndexedLoadAction(ISD::POST_INC,  T, Legal);
196     setIndexedStoreAction(ISD::POST_INC, T, Legal);
197 
198     setOperationAction(ISD::ABS,            T, Legal);
199     setOperationAction(ISD::AND,            T, Legal);
200     setOperationAction(ISD::OR,             T, Legal);
201     setOperationAction(ISD::XOR,            T, Legal);
202     setOperationAction(ISD::ADD,            T, Legal);
203     setOperationAction(ISD::SUB,            T, Legal);
204     setOperationAction(ISD::MUL,            T, Legal);
205     setOperationAction(ISD::CTPOP,          T, Legal);
206     setOperationAction(ISD::CTLZ,           T, Legal);
207     setOperationAction(ISD::SELECT,         T, Legal);
208     setOperationAction(ISD::SPLAT_VECTOR,   T, Legal);
209     setOperationAction(ISD::UADDSAT, T, Legal);
210     setOperationAction(ISD::SADDSAT, T, Legal);
211     if (T != ByteV) {
212       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
213       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
214       setOperationAction(ISD::BSWAP,                    T, Legal);
215     }
216 
217     setOperationAction(ISD::SMIN,           T, Legal);
218     setOperationAction(ISD::SMAX,           T, Legal);
219     if (T.getScalarType() != MVT::i32) {
220       setOperationAction(ISD::UMIN,         T, Legal);
221       setOperationAction(ISD::UMAX,         T, Legal);
222     }
223 
224     setOperationAction(ISD::CTTZ,               T, Custom);
225     setOperationAction(ISD::LOAD,               T, Custom);
226     setOperationAction(ISD::MLOAD,              T, Custom);
227     setOperationAction(ISD::MSTORE,             T, Custom);
228     if (T.getScalarType() != MVT::i32) {
229       setOperationAction(ISD::MULHS,              T, Legal);
230       setOperationAction(ISD::MULHU,              T, Legal);
231     }
232 
233     setOperationAction(ISD::BUILD_VECTOR,       T, Custom);
234     // Make concat-vectors custom to handle concats of more than 2 vectors.
235     setOperationAction(ISD::CONCAT_VECTORS,     T, Custom);
236     setOperationAction(ISD::INSERT_SUBVECTOR,   T, Custom);
237     setOperationAction(ISD::INSERT_VECTOR_ELT,  T, Custom);
238     setOperationAction(ISD::EXTRACT_SUBVECTOR,  T, Custom);
239     setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
240     setOperationAction(ISD::ANY_EXTEND,         T, Custom);
241     setOperationAction(ISD::SIGN_EXTEND,        T, Custom);
242     setOperationAction(ISD::ZERO_EXTEND,        T, Custom);
243     setOperationAction(ISD::FSHL,               T, Custom);
244     setOperationAction(ISD::FSHR,               T, Custom);
245     if (T != ByteV) {
246       setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
247       // HVX only has shifts of words and halfwords.
248       setOperationAction(ISD::SRA,                     T, Custom);
249       setOperationAction(ISD::SHL,                     T, Custom);
250       setOperationAction(ISD::SRL,                     T, Custom);
251 
252       // Promote all shuffles to operate on vectors of bytes.
253       setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
254     }
255 
256     if (Subtarget.useHVXFloatingPoint()) {
257       // Same action for both QFloat and IEEE.
258       setOperationAction(ISD::SINT_TO_FP, T, Custom);
259       setOperationAction(ISD::UINT_TO_FP, T, Custom);
260       setOperationAction(ISD::FP_TO_SINT, T, Custom);
261       setOperationAction(ISD::FP_TO_UINT, T, Custom);
262     }
263 
264     setCondCodeAction(ISD::SETNE,  T, Expand);
265     setCondCodeAction(ISD::SETLE,  T, Expand);
266     setCondCodeAction(ISD::SETGE,  T, Expand);
267     setCondCodeAction(ISD::SETLT,  T, Expand);
268     setCondCodeAction(ISD::SETULE, T, Expand);
269     setCondCodeAction(ISD::SETUGE, T, Expand);
270     setCondCodeAction(ISD::SETULT, T, Expand);
271   }
272 
273   for (MVT T : LegalW) {
274     // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
275     // independent) handling of it would convert it to a load, which is
276     // not always the optimal choice.
277     setOperationAction(ISD::BUILD_VECTOR,   T, Custom);
278     // Make concat-vectors custom to handle concats of more than 2 vectors.
279     setOperationAction(ISD::CONCAT_VECTORS, T, Custom);
280 
281     // Custom-lower these operations for pairs. Expand them into a concat
282     // of the corresponding operations on individual vectors.
283     setOperationAction(ISD::ANY_EXTEND,               T, Custom);
284     setOperationAction(ISD::SIGN_EXTEND,              T, Custom);
285     setOperationAction(ISD::ZERO_EXTEND,              T, Custom);
286     setOperationAction(ISD::SIGN_EXTEND_INREG,        T, Custom);
287     setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG,  T, Custom);
288     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
289     setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
290     setOperationAction(ISD::SPLAT_VECTOR,             T, Custom);
291 
292     setOperationAction(ISD::LOAD,     T, Custom);
293     setOperationAction(ISD::STORE,    T, Custom);
294     setOperationAction(ISD::MLOAD,    T, Custom);
295     setOperationAction(ISD::MSTORE,   T, Custom);
296     setOperationAction(ISD::ABS,      T, Custom);
297     setOperationAction(ISD::CTLZ,     T, Custom);
298     setOperationAction(ISD::CTTZ,     T, Custom);
299     setOperationAction(ISD::CTPOP,    T, Custom);
300 
301     setOperationAction(ISD::ADD,      T, Legal);
302     setOperationAction(ISD::UADDSAT, T, Legal);
303     setOperationAction(ISD::SADDSAT, T, Legal);
304     setOperationAction(ISD::SUB,      T, Legal);
305     setOperationAction(ISD::MUL,      T, Custom);
306     setOperationAction(ISD::MULHS,    T, Custom);
307     setOperationAction(ISD::MULHU,    T, Custom);
308     setOperationAction(ISD::AND,      T, Custom);
309     setOperationAction(ISD::OR,       T, Custom);
310     setOperationAction(ISD::XOR,      T, Custom);
311     setOperationAction(ISD::SETCC,    T, Custom);
312     setOperationAction(ISD::VSELECT,  T, Custom);
313     if (T != ByteW) {
314       setOperationAction(ISD::SRA,      T, Custom);
315       setOperationAction(ISD::SHL,      T, Custom);
316       setOperationAction(ISD::SRL,      T, Custom);
317 
318       // Promote all shuffles to operate on vectors of bytes.
319       setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
320     }
321     setOperationAction(ISD::FSHL,     T, Custom);
322     setOperationAction(ISD::FSHR,     T, Custom);
323 
324     setOperationAction(ISD::SMIN,     T, Custom);
325     setOperationAction(ISD::SMAX,     T, Custom);
326     if (T.getScalarType() != MVT::i32) {
327       setOperationAction(ISD::UMIN,     T, Custom);
328       setOperationAction(ISD::UMAX,     T, Custom);
329     }
330 
331     if (Subtarget.useHVXFloatingPoint()) {
332       // Same action for both QFloat and IEEE.
333       setOperationAction(ISD::SINT_TO_FP, T, Custom);
334       setOperationAction(ISD::UINT_TO_FP, T, Custom);
335       setOperationAction(ISD::FP_TO_SINT, T, Custom);
336       setOperationAction(ISD::FP_TO_UINT, T, Custom);
337     }
338   }
339 
340   // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
341   setOperationAction(ISD::MULHS,      WordV, Custom); // -> _LOHI
342   setOperationAction(ISD::MULHU,      WordV, Custom); // -> _LOHI
343   setOperationAction(ISD::SMUL_LOHI,  WordV, Custom);
344   setOperationAction(ISD::UMUL_LOHI,  WordV, Custom);
345 
346   setCondCodeAction(ISD::SETNE,  MVT::v64f16, Expand);
347   setCondCodeAction(ISD::SETLE,  MVT::v64f16, Expand);
348   setCondCodeAction(ISD::SETGE,  MVT::v64f16, Expand);
349   setCondCodeAction(ISD::SETLT,  MVT::v64f16, Expand);
350   setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
351   setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
352   setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
353   setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
354   setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
355   setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
356   setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
357   setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
358   setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
359   setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
360 
361   setCondCodeAction(ISD::SETNE,  MVT::v32f32, Expand);
362   setCondCodeAction(ISD::SETLE,  MVT::v32f32, Expand);
363   setCondCodeAction(ISD::SETGE,  MVT::v32f32, Expand);
364   setCondCodeAction(ISD::SETLT,  MVT::v32f32, Expand);
365   setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
366   setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
367   setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
368   setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
369   setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
370   setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
371   setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
372   setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
373   setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
374   setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
375 
376   // Boolean vectors.
377 
378   for (MVT T : LegalW) {
379     // Boolean types for vector pairs will overlap with the boolean
380     // types for single vectors, e.g.
381     //   v64i8  -> v64i1 (single)
382     //   v64i16 -> v64i1 (pair)
383     // Set these actions first, and allow the single actions to overwrite
384     // any duplicates.
385     MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
386     setOperationAction(ISD::SETCC,              BoolW, Custom);
387     setOperationAction(ISD::AND,                BoolW, Custom);
388     setOperationAction(ISD::OR,                 BoolW, Custom);
389     setOperationAction(ISD::XOR,                BoolW, Custom);
390     // Masked load/store takes a mask that may need splitting.
391     setOperationAction(ISD::MLOAD,              BoolW, Custom);
392     setOperationAction(ISD::MSTORE,             BoolW, Custom);
393   }
394 
395   for (MVT T : LegalV) {
396     MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
397     setOperationAction(ISD::BUILD_VECTOR,       BoolV, Custom);
398     setOperationAction(ISD::CONCAT_VECTORS,     BoolV, Custom);
399     setOperationAction(ISD::INSERT_SUBVECTOR,   BoolV, Custom);
400     setOperationAction(ISD::INSERT_VECTOR_ELT,  BoolV, Custom);
401     setOperationAction(ISD::EXTRACT_SUBVECTOR,  BoolV, Custom);
402     setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom);
403     setOperationAction(ISD::SELECT,             BoolV, Custom);
404     setOperationAction(ISD::AND,                BoolV, Legal);
405     setOperationAction(ISD::OR,                 BoolV, Legal);
406     setOperationAction(ISD::XOR,                BoolV, Legal);
407   }
408 
409   if (Use64b) {
410     for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
411       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
412   } else {
413     for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
414       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
415   }
416 
417   // Handle store widening for short vectors.
418   unsigned HwLen = Subtarget.getVectorLength();
419   for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
420     if (ElemTy == MVT::i1)
421       continue;
422     int ElemWidth = ElemTy.getFixedSizeInBits();
423     int MaxElems = (8*HwLen) / ElemWidth;
424     for (int N = 2; N < MaxElems; N *= 2) {
425       MVT VecTy = MVT::getVectorVT(ElemTy, N);
426       auto Action = getPreferredVectorAction(VecTy);
427       if (Action == TargetLoweringBase::TypeWidenVector) {
428         setOperationAction(ISD::LOAD,         VecTy, Custom);
429         setOperationAction(ISD::STORE,        VecTy, Custom);
430         setOperationAction(ISD::SETCC,        VecTy, Custom);
431         setOperationAction(ISD::TRUNCATE,     VecTy, Custom);
432         setOperationAction(ISD::ANY_EXTEND,   VecTy, Custom);
433         setOperationAction(ISD::SIGN_EXTEND,  VecTy, Custom);
434         setOperationAction(ISD::ZERO_EXTEND,  VecTy, Custom);
435         if (Subtarget.useHVXFloatingPoint()) {
436           setOperationAction(ISD::FP_TO_SINT,   VecTy, Custom);
437           setOperationAction(ISD::FP_TO_UINT,   VecTy, Custom);
438           setOperationAction(ISD::SINT_TO_FP,   VecTy, Custom);
439           setOperationAction(ISD::UINT_TO_FP,   VecTy, Custom);
440         }
441 
442         MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
443         if (!isTypeLegal(BoolTy))
444           setOperationAction(ISD::SETCC, BoolTy, Custom);
445       }
446     }
447   }
448 
449   setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT});
450 }
451 
452 unsigned
getPreferredHvxVectorAction(MVT VecTy) const453 HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
454   MVT ElemTy = VecTy.getVectorElementType();
455   unsigned VecLen = VecTy.getVectorNumElements();
456   unsigned HwLen = Subtarget.getVectorLength();
457 
458   // Split vectors of i1 that exceed byte vector length.
459   if (ElemTy == MVT::i1 && VecLen > HwLen)
460     return TargetLoweringBase::TypeSplitVector;
461 
462   ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
463   // For shorter vectors of i1, widen them if any of the corresponding
464   // vectors of integers needs to be widened.
465   if (ElemTy == MVT::i1) {
466     for (MVT T : Tys) {
467       assert(T != MVT::i1);
468       auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
469       if (A != ~0u)
470         return A;
471     }
472     return ~0u;
473   }
474 
475   // If the size of VecTy is at least half of the vector length,
476   // widen the vector. Note: the threshold was not selected in
477   // any scientific way.
478   if (llvm::is_contained(Tys, ElemTy)) {
479     unsigned VecWidth = VecTy.getSizeInBits();
480     unsigned HwWidth = 8*HwLen;
481     if (VecWidth > 2*HwWidth)
482       return TargetLoweringBase::TypeSplitVector;
483 
484     bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
485     if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
486       return TargetLoweringBase::TypeWidenVector;
487     if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
488       return TargetLoweringBase::TypeWidenVector;
489   }
490 
491   // Defer to default.
492   return ~0u;
493 }
494 
495 unsigned
getCustomHvxOperationAction(SDNode & Op) const496 HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
497   unsigned Opc = Op.getOpcode();
498   switch (Opc) {
499   case HexagonISD::SMUL_LOHI:
500   case HexagonISD::UMUL_LOHI:
501   case HexagonISD::USMUL_LOHI:
502     return TargetLoweringBase::Custom;
503   }
504   return TargetLoweringBase::Legal;
505 }
506 
507 SDValue
getInt(unsigned IntId,MVT ResTy,ArrayRef<SDValue> Ops,const SDLoc & dl,SelectionDAG & DAG) const508 HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
509                               const SDLoc &dl, SelectionDAG &DAG) const {
510   SmallVector<SDValue,4> IntOps;
511   IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
512   append_range(IntOps, Ops);
513   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
514 }
515 
516 MVT
typeJoin(const TypePair & Tys) const517 HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
518   assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
519 
520   MVT ElemTy = Tys.first.getVectorElementType();
521   return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
522                                   Tys.second.getVectorNumElements());
523 }
524 
525 HexagonTargetLowering::TypePair
typeSplit(MVT VecTy) const526 HexagonTargetLowering::typeSplit(MVT VecTy) const {
527   assert(VecTy.isVector());
528   unsigned NumElem = VecTy.getVectorNumElements();
529   assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
530   MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
531   return { HalfTy, HalfTy };
532 }
533 
534 MVT
typeExtElem(MVT VecTy,unsigned Factor) const535 HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
536   MVT ElemTy = VecTy.getVectorElementType();
537   MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
538   return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
539 }
540 
541 MVT
typeTruncElem(MVT VecTy,unsigned Factor) const542 HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
543   MVT ElemTy = VecTy.getVectorElementType();
544   MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
545   return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
546 }
547 
548 SDValue
opCastElem(SDValue Vec,MVT ElemTy,SelectionDAG & DAG) const549 HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
550                                   SelectionDAG &DAG) const {
551   if (ty(Vec).getVectorElementType() == ElemTy)
552     return Vec;
553   MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
554   return DAG.getBitcast(CastTy, Vec);
555 }
556 
557 SDValue
opJoin(const VectorPair & Ops,const SDLoc & dl,SelectionDAG & DAG) const558 HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
559                               SelectionDAG &DAG) const {
560   return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
561                      Ops.first, Ops.second);
562 }
563 
564 HexagonTargetLowering::VectorPair
opSplit(SDValue Vec,const SDLoc & dl,SelectionDAG & DAG) const565 HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
566                                SelectionDAG &DAG) const {
567   TypePair Tys = typeSplit(ty(Vec));
568   if (Vec.getOpcode() == HexagonISD::QCAT)
569     return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
570   return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
571 }
572 
573 bool
isHvxSingleTy(MVT Ty) const574 HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
575   return Subtarget.isHVXVectorType(Ty) &&
576          Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
577 }
578 
579 bool
isHvxPairTy(MVT Ty) const580 HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
581   return Subtarget.isHVXVectorType(Ty) &&
582          Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
583 }
584 
585 bool
isHvxBoolTy(MVT Ty) const586 HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
587   return Subtarget.isHVXVectorType(Ty, true) &&
588          Ty.getVectorElementType() == MVT::i1;
589 }
590 
allowsHvxMemoryAccess(MVT VecTy,MachineMemOperand::Flags Flags,unsigned * Fast) const591 bool HexagonTargetLowering::allowsHvxMemoryAccess(
592     MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
593   // Bool vectors are excluded by default, but make it explicit to
594   // emphasize that bool vectors cannot be loaded or stored.
595   // Also, disallow double vector stores (to prevent unnecessary
596   // store widening in DAG combiner).
597   if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
598     return false;
599   if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
600     return false;
601   if (Fast)
602     *Fast = 1;
603   return true;
604 }
605 
allowsHvxMisalignedMemoryAccesses(MVT VecTy,MachineMemOperand::Flags Flags,unsigned * Fast) const606 bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
607     MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
608   if (!Subtarget.isHVXVectorType(VecTy))
609     return false;
610   // XXX Should this be false?  vmemu are a bit slower than vmem.
611   if (Fast)
612     *Fast = 1;
613   return true;
614 }
615 
AdjustHvxInstrPostInstrSelection(MachineInstr & MI,SDNode * Node) const616 void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
617     MachineInstr &MI, SDNode *Node) const {
618   unsigned Opc = MI.getOpcode();
619   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
620   MachineBasicBlock &MB = *MI.getParent();
621   MachineFunction &MF = *MB.getParent();
622   MachineRegisterInfo &MRI = MF.getRegInfo();
623   DebugLoc DL = MI.getDebugLoc();
624   auto At = MI.getIterator();
625 
626   switch (Opc) {
627   case Hexagon::PS_vsplatib:
628     if (Subtarget.useHVXV62Ops()) {
629       // SplatV = A2_tfrsi #imm
630       // OutV = V6_lvsplatb SplatV
631       Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
632       BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
633         .add(MI.getOperand(1));
634       Register OutV = MI.getOperand(0).getReg();
635       BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
636         .addReg(SplatV);
637     } else {
638       // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
639       // OutV = V6_lvsplatw SplatV
640       Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
641       const MachineOperand &InpOp = MI.getOperand(1);
642       assert(InpOp.isImm());
643       uint32_t V = InpOp.getImm() & 0xFF;
644       BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
645           .addImm(V << 24 | V << 16 | V << 8 | V);
646       Register OutV = MI.getOperand(0).getReg();
647       BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
648     }
649     MB.erase(At);
650     break;
651   case Hexagon::PS_vsplatrb:
652     if (Subtarget.useHVXV62Ops()) {
653       // OutV = V6_lvsplatb Inp
654       Register OutV = MI.getOperand(0).getReg();
655       BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
656         .add(MI.getOperand(1));
657     } else {
658       Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
659       const MachineOperand &InpOp = MI.getOperand(1);
660       BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
661           .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
662       Register OutV = MI.getOperand(0).getReg();
663       BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
664           .addReg(SplatV);
665     }
666     MB.erase(At);
667     break;
668   case Hexagon::PS_vsplatih:
669     if (Subtarget.useHVXV62Ops()) {
670       // SplatV = A2_tfrsi #imm
671       // OutV = V6_lvsplath SplatV
672       Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
673       BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
674         .add(MI.getOperand(1));
675       Register OutV = MI.getOperand(0).getReg();
676       BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
677         .addReg(SplatV);
678     } else {
679       // SplatV = A2_tfrsi #imm:#imm
680       // OutV = V6_lvsplatw SplatV
681       Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
682       const MachineOperand &InpOp = MI.getOperand(1);
683       assert(InpOp.isImm());
684       uint32_t V = InpOp.getImm() & 0xFFFF;
685       BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
686           .addImm(V << 16 | V);
687       Register OutV = MI.getOperand(0).getReg();
688       BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
689     }
690     MB.erase(At);
691     break;
692   case Hexagon::PS_vsplatrh:
693     if (Subtarget.useHVXV62Ops()) {
694       // OutV = V6_lvsplath Inp
695       Register OutV = MI.getOperand(0).getReg();
696       BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
697         .add(MI.getOperand(1));
698     } else {
699       // SplatV = A2_combine_ll Inp, Inp
700       // OutV = V6_lvsplatw SplatV
701       Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
702       const MachineOperand &InpOp = MI.getOperand(1);
703       BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
704           .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
705           .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
706       Register OutV = MI.getOperand(0).getReg();
707       BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
708     }
709     MB.erase(At);
710     break;
711   case Hexagon::PS_vsplatiw:
712   case Hexagon::PS_vsplatrw:
713     if (Opc == Hexagon::PS_vsplatiw) {
714       // SplatV = A2_tfrsi #imm
715       Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
716       BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
717         .add(MI.getOperand(1));
718       MI.getOperand(1).ChangeToRegister(SplatV, false);
719     }
720     // OutV = V6_lvsplatw SplatV/Inp
721     MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
722     break;
723   }
724 }
725 
726 SDValue
convertToByteIndex(SDValue ElemIdx,MVT ElemTy,SelectionDAG & DAG) const727 HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
728                                           SelectionDAG &DAG) const {
729   if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
730     ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
731 
732   unsigned ElemWidth = ElemTy.getSizeInBits();
733   if (ElemWidth == 8)
734     return ElemIdx;
735 
736   unsigned L = Log2_32(ElemWidth/8);
737   const SDLoc &dl(ElemIdx);
738   return DAG.getNode(ISD::SHL, dl, MVT::i32,
739                      {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
740 }
741 
742 SDValue
getIndexInWord32(SDValue Idx,MVT ElemTy,SelectionDAG & DAG) const743 HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
744                                         SelectionDAG &DAG) const {
745   unsigned ElemWidth = ElemTy.getSizeInBits();
746   assert(ElemWidth >= 8 && ElemWidth <= 32);
747   if (ElemWidth == 32)
748     return Idx;
749 
750   if (ty(Idx) != MVT::i32)
751     Idx = DAG.getBitcast(MVT::i32, Idx);
752   const SDLoc &dl(Idx);
753   SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
754   SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
755   return SubIdx;
756 }
757 
758 SDValue
getByteShuffle(const SDLoc & dl,SDValue Op0,SDValue Op1,ArrayRef<int> Mask,SelectionDAG & DAG) const759 HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
760                                       SDValue Op1, ArrayRef<int> Mask,
761                                       SelectionDAG &DAG) const {
762   MVT OpTy = ty(Op0);
763   assert(OpTy == ty(Op1));
764 
765   MVT ElemTy = OpTy.getVectorElementType();
766   if (ElemTy == MVT::i8)
767     return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
768   assert(ElemTy.getSizeInBits() >= 8);
769 
770   MVT ResTy = tyVector(OpTy, MVT::i8);
771   unsigned ElemSize = ElemTy.getSizeInBits() / 8;
772 
773   SmallVector<int,128> ByteMask;
774   for (int M : Mask) {
775     if (M < 0) {
776       for (unsigned I = 0; I != ElemSize; ++I)
777         ByteMask.push_back(-1);
778     } else {
779       int NewM = M*ElemSize;
780       for (unsigned I = 0; I != ElemSize; ++I)
781         ByteMask.push_back(NewM+I);
782     }
783   }
784   assert(ResTy.getVectorNumElements() == ByteMask.size());
785   return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
786                               opCastElem(Op1, MVT::i8, DAG), ByteMask);
787 }
788 
789 SDValue
buildHvxVectorReg(ArrayRef<SDValue> Values,const SDLoc & dl,MVT VecTy,SelectionDAG & DAG) const790 HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
791                                          const SDLoc &dl, MVT VecTy,
792                                          SelectionDAG &DAG) const {
793   unsigned VecLen = Values.size();
794   MachineFunction &MF = DAG.getMachineFunction();
795   MVT ElemTy = VecTy.getVectorElementType();
796   unsigned ElemWidth = ElemTy.getSizeInBits();
797   unsigned HwLen = Subtarget.getVectorLength();
798 
799   unsigned ElemSize = ElemWidth / 8;
800   assert(ElemSize*VecLen == HwLen);
801   SmallVector<SDValue,32> Words;
802 
803   if (VecTy.getVectorElementType() != MVT::i32 &&
804       !(Subtarget.useHVXFloatingPoint() &&
805       VecTy.getVectorElementType() == MVT::f32)) {
806     assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
807     unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
808     MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
809     for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
810       SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
811       Words.push_back(DAG.getBitcast(MVT::i32, W));
812     }
813   } else {
814     for (SDValue V : Values)
815       Words.push_back(DAG.getBitcast(MVT::i32, V));
816   }
817   auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
818     unsigned NumValues = Values.size();
819     assert(NumValues > 0);
820     bool IsUndef = true;
821     for (unsigned i = 0; i != NumValues; ++i) {
822       if (Values[i].isUndef())
823         continue;
824       IsUndef = false;
825       if (!SplatV.getNode())
826         SplatV = Values[i];
827       else if (SplatV != Values[i])
828         return false;
829     }
830     if (IsUndef)
831       SplatV = Values[0];
832     return true;
833   };
834 
835   unsigned NumWords = Words.size();
836   SDValue SplatV;
837   bool IsSplat = isSplat(Words, SplatV);
838   if (IsSplat && isUndef(SplatV))
839     return DAG.getUNDEF(VecTy);
840   if (IsSplat) {
841     assert(SplatV.getNode());
842     if (isNullConstant(SplatV))
843       return getZero(dl, VecTy, DAG);
844     MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
845     SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
846     return DAG.getBitcast(VecTy, S);
847   }
848 
849   // Delay recognizing constant vectors until here, so that we can generate
850   // a vsplat.
851   SmallVector<ConstantInt*, 128> Consts(VecLen);
852   bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
853   if (AllConst) {
854     ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
855                             (Constant**)Consts.end());
856     Constant *CV = ConstantVector::get(Tmp);
857     Align Alignment(HwLen);
858     SDValue CP =
859         LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
860     return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
861                        MachinePointerInfo::getConstantPool(MF), Alignment);
862   }
863 
864   // A special case is a situation where the vector is built entirely from
865   // elements extracted from another vector. This could be done via a shuffle
866   // more efficiently, but typically, the size of the source vector will not
867   // match the size of the vector being built (which precludes the use of a
868   // shuffle directly).
869   // This only handles a single source vector, and the vector being built
870   // should be of a sub-vector type of the source vector type.
871   auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
872                                              SmallVectorImpl<int> &SrcIdx) {
873     SDValue Vec;
874     for (SDValue V : Values) {
875       if (isUndef(V)) {
876         SrcIdx.push_back(-1);
877         continue;
878       }
879       if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
880         return false;
881       // All extracts should come from the same vector.
882       SDValue T = V.getOperand(0);
883       if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
884         return false;
885       Vec = T;
886       ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
887       if (C == nullptr)
888         return false;
889       int I = C->getSExtValue();
890       assert(I >= 0 && "Negative element index");
891       SrcIdx.push_back(I);
892     }
893     SrcVec = Vec;
894     return true;
895   };
896 
897   SmallVector<int,128> ExtIdx;
898   SDValue ExtVec;
899   if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
900     MVT ExtTy = ty(ExtVec);
901     unsigned ExtLen = ExtTy.getVectorNumElements();
902     if (ExtLen == VecLen || ExtLen == 2*VecLen) {
903       // Construct a new shuffle mask that will produce a vector with the same
904       // number of elements as the input vector, and such that the vector we
905       // want will be the initial subvector of it.
906       SmallVector<int,128> Mask;
907       BitVector Used(ExtLen);
908 
909       for (int M : ExtIdx) {
910         Mask.push_back(M);
911         if (M >= 0)
912           Used.set(M);
913       }
914       // Fill the rest of the mask with the unused elements of ExtVec in hopes
915       // that it will result in a permutation of ExtVec's elements. It's still
916       // fine if it doesn't (e.g. if undefs are present, or elements are
917       // repeated), but permutations can always be done efficiently via vdelta
918       // and vrdelta.
919       for (unsigned I = 0; I != ExtLen; ++I) {
920         if (Mask.size() == ExtLen)
921           break;
922         if (!Used.test(I))
923           Mask.push_back(I);
924       }
925 
926       SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
927                                        DAG.getUNDEF(ExtTy), Mask);
928       return ExtLen == VecLen ? S : LoHalf(S, DAG);
929     }
930   }
931 
932   // Find most common element to initialize vector with. This is to avoid
933   // unnecessary vinsert/valign for cases where the same value is present
934   // many times. Creates a histogram of the vector's elements to find the
935   // most common element n.
936   assert(4*Words.size() == Subtarget.getVectorLength());
937   int VecHist[32];
938   int n = 0;
939   for (unsigned i = 0; i != NumWords; ++i) {
940     VecHist[i] = 0;
941     if (Words[i].isUndef())
942       continue;
943     for (unsigned j = i; j != NumWords; ++j)
944       if (Words[i] == Words[j])
945         VecHist[i]++;
946 
947     if (VecHist[i] > VecHist[n])
948       n = i;
949   }
950 
951   SDValue HalfV = getZero(dl, VecTy, DAG);
952   if (VecHist[n] > 1) {
953     SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
954     HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
955                        {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
956   }
957   SDValue HalfV0 = HalfV;
958   SDValue HalfV1 = HalfV;
959 
960   // Construct two halves in parallel, then or them together. Rn and Rm count
961   // number of rotations needed before the next element. One last rotation is
962   // performed post-loop to position the last element.
963   int Rn = 0, Rm = 0;
964   SDValue Sn, Sm;
965   SDValue N = HalfV0;
966   SDValue M = HalfV1;
967   for (unsigned i = 0; i != NumWords/2; ++i) {
968     // Rotate by element count since last insertion.
969     if (Words[i] != Words[n] || VecHist[n] <= 1) {
970       Sn = DAG.getConstant(Rn, dl, MVT::i32);
971       HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
972       N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
973                       {HalfV0, Words[i]});
974       Rn = 0;
975     }
976     if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
977       Sm = DAG.getConstant(Rm, dl, MVT::i32);
978       HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
979       M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
980                       {HalfV1, Words[i+NumWords/2]});
981       Rm = 0;
982     }
983     Rn += 4;
984     Rm += 4;
985   }
986   // Perform last rotation.
987   Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
988   Sm = DAG.getConstant(Rm, dl, MVT::i32);
989   HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
990   HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
991 
992   SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
993   SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
994 
995   SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
996 
997   SDValue OutV =
998       DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
999   return OutV;
1000 }
1001 
1002 SDValue
createHvxPrefixPred(SDValue PredV,const SDLoc & dl,unsigned BitBytes,bool ZeroFill,SelectionDAG & DAG) const1003 HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1004       unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1005   MVT PredTy = ty(PredV);
1006   unsigned HwLen = Subtarget.getVectorLength();
1007   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1008 
1009   if (Subtarget.isHVXVectorType(PredTy, true)) {
1010     // Move the vector predicate SubV to a vector register, and scale it
1011     // down to match the representation (bytes per type element) that VecV
1012     // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1013     // in general) element and put them at the front of the resulting
1014     // vector. This subvector will then be inserted into the Q2V of VecV.
1015     // To avoid having an operation that generates an illegal type (short
1016     // vector), generate a full size vector.
1017     //
1018     SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1019     SmallVector<int,128> Mask(HwLen);
1020     // Scale = BitBytes(PredV) / Given BitBytes.
1021     unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1022     unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1023 
1024     for (unsigned i = 0; i != HwLen; ++i) {
1025       unsigned Num = i % Scale;
1026       unsigned Off = i / Scale;
1027       Mask[BlockLen*Num + Off] = i;
1028     }
1029     SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1030     if (!ZeroFill)
1031       return S;
1032     // Fill the bytes beyond BlockLen with 0s.
1033     // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1034     // when BlockLen < HwLen.
1035     assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1036     MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1037     SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1038                          {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1039     SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1040     return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1041   }
1042 
1043   // Make sure that this is a valid scalar predicate.
1044   assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1045 
1046   unsigned Bytes = 8 / PredTy.getVectorNumElements();
1047   SmallVector<SDValue,4> Words[2];
1048   unsigned IdxW = 0;
1049 
1050   SDValue W0 = isUndef(PredV)
1051                   ? DAG.getUNDEF(MVT::i64)
1052                   : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1053   Words[IdxW].push_back(HiHalf(W0, DAG));
1054   Words[IdxW].push_back(LoHalf(W0, DAG));
1055 
1056   while (Bytes < BitBytes) {
1057     IdxW ^= 1;
1058     Words[IdxW].clear();
1059 
1060     if (Bytes < 4) {
1061       for (const SDValue &W : Words[IdxW ^ 1]) {
1062         SDValue T = expandPredicate(W, dl, DAG);
1063         Words[IdxW].push_back(HiHalf(T, DAG));
1064         Words[IdxW].push_back(LoHalf(T, DAG));
1065       }
1066     } else {
1067       for (const SDValue &W : Words[IdxW ^ 1]) {
1068         Words[IdxW].push_back(W);
1069         Words[IdxW].push_back(W);
1070       }
1071     }
1072     Bytes *= 2;
1073   }
1074 
1075   assert(Bytes == BitBytes);
1076 
1077   SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1078   SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1079   for (const SDValue &W : Words[IdxW]) {
1080     Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1081     Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1082   }
1083 
1084   return Vec;
1085 }
1086 
1087 SDValue
buildHvxVectorPred(ArrayRef<SDValue> Values,const SDLoc & dl,MVT VecTy,SelectionDAG & DAG) const1088 HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1089                                           const SDLoc &dl, MVT VecTy,
1090                                           SelectionDAG &DAG) const {
1091   // Construct a vector V of bytes, such that a comparison V >u 0 would
1092   // produce the required vector predicate.
1093   unsigned VecLen = Values.size();
1094   unsigned HwLen = Subtarget.getVectorLength();
1095   assert(VecLen <= HwLen || VecLen == 8*HwLen);
1096   SmallVector<SDValue,128> Bytes;
1097   bool AllT = true, AllF = true;
1098 
1099   auto IsTrue = [] (SDValue V) {
1100     if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1101       return !N->isZero();
1102     return false;
1103   };
1104   auto IsFalse = [] (SDValue V) {
1105     if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1106       return N->isZero();
1107     return false;
1108   };
1109 
1110   if (VecLen <= HwLen) {
1111     // In the hardware, each bit of a vector predicate corresponds to a byte
1112     // of a vector register. Calculate how many bytes does a bit of VecTy
1113     // correspond to.
1114     assert(HwLen % VecLen == 0);
1115     unsigned BitBytes = HwLen / VecLen;
1116     for (SDValue V : Values) {
1117       AllT &= IsTrue(V);
1118       AllF &= IsFalse(V);
1119 
1120       SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1121                                  : DAG.getUNDEF(MVT::i8);
1122       for (unsigned B = 0; B != BitBytes; ++B)
1123         Bytes.push_back(Ext);
1124     }
1125   } else {
1126     // There are as many i1 values, as there are bits in a vector register.
1127     // Divide the values into groups of 8 and check that each group consists
1128     // of the same value (ignoring undefs).
1129     for (unsigned I = 0; I != VecLen; I += 8) {
1130       unsigned B = 0;
1131       // Find the first non-undef value in this group.
1132       for (; B != 8; ++B) {
1133         if (!Values[I+B].isUndef())
1134           break;
1135       }
1136       SDValue F = Values[I+B];
1137       AllT &= IsTrue(F);
1138       AllF &= IsFalse(F);
1139 
1140       SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1141                             : DAG.getUNDEF(MVT::i8);
1142       Bytes.push_back(Ext);
1143       // Verify that the rest of values in the group are the same as the
1144       // first.
1145       for (; B != 8; ++B)
1146         assert(Values[I+B].isUndef() || Values[I+B] == F);
1147     }
1148   }
1149 
1150   if (AllT)
1151     return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1152   if (AllF)
1153     return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1154 
1155   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1156   SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1157   return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1158 }
1159 
1160 SDValue
extractHvxElementReg(SDValue VecV,SDValue IdxV,const SDLoc & dl,MVT ResTy,SelectionDAG & DAG) const1161 HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1162       const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1163   MVT ElemTy = ty(VecV).getVectorElementType();
1164 
1165   unsigned ElemWidth = ElemTy.getSizeInBits();
1166   assert(ElemWidth >= 8 && ElemWidth <= 32);
1167   (void)ElemWidth;
1168 
1169   SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1170   SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1171                                {VecV, ByteIdx});
1172   if (ElemTy == MVT::i32)
1173     return ExWord;
1174 
1175   // Have an extracted word, need to extract the smaller element out of it.
1176   // 1. Extract the bits of (the original) IdxV that correspond to the index
1177   //    of the desired element in the 32-bit word.
1178   SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1179   // 2. Extract the element from the word.
1180   SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1181   return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1182 }
1183 
1184 SDValue
extractHvxElementPred(SDValue VecV,SDValue IdxV,const SDLoc & dl,MVT ResTy,SelectionDAG & DAG) const1185 HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1186       const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1187   // Implement other return types if necessary.
1188   assert(ResTy == MVT::i1);
1189 
1190   unsigned HwLen = Subtarget.getVectorLength();
1191   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1192   SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1193 
1194   unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1195   SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1196   IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1197 
1198   SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1199   SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1200   return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1201 }
1202 
1203 SDValue
insertHvxElementReg(SDValue VecV,SDValue IdxV,SDValue ValV,const SDLoc & dl,SelectionDAG & DAG) const1204 HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1205       SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1206   MVT ElemTy = ty(VecV).getVectorElementType();
1207 
1208   unsigned ElemWidth = ElemTy.getSizeInBits();
1209   assert(ElemWidth >= 8 && ElemWidth <= 32);
1210   (void)ElemWidth;
1211 
1212   auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1213                                      SDValue ByteIdxV) {
1214     MVT VecTy = ty(VecV);
1215     unsigned HwLen = Subtarget.getVectorLength();
1216     SDValue MaskV =
1217         DAG.getNode(ISD::AND, dl, MVT::i32,
1218                     {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1219     SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1220     SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1221     SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1222                                {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1223     SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1224     return TorV;
1225   };
1226 
1227   SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1228   if (ElemTy == MVT::i32)
1229     return InsertWord(VecV, ValV, ByteIdx);
1230 
1231   // If this is not inserting a 32-bit word, convert it into such a thing.
1232   // 1. Extract the existing word from the target vector.
1233   SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1234                                 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1235   SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1236                                      dl, MVT::i32, DAG);
1237 
1238   // 2. Treating the extracted word as a 32-bit vector, insert the given
1239   //    value into it.
1240   SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1241   MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1242   SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1243                              ValV, SubIdx, dl, ElemTy, DAG);
1244 
1245   // 3. Insert the 32-bit word back into the original vector.
1246   return InsertWord(VecV, Ins, ByteIdx);
1247 }
1248 
1249 SDValue
insertHvxElementPred(SDValue VecV,SDValue IdxV,SDValue ValV,const SDLoc & dl,SelectionDAG & DAG) const1250 HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1251       SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1252   unsigned HwLen = Subtarget.getVectorLength();
1253   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1254   SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1255 
1256   unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1257   SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1258   IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1259   ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1260 
1261   SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1262   return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1263 }
1264 
1265 SDValue
extractHvxSubvectorReg(SDValue OrigOp,SDValue VecV,SDValue IdxV,const SDLoc & dl,MVT ResTy,SelectionDAG & DAG) const1266 HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1267       SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1268   MVT VecTy = ty(VecV);
1269   unsigned HwLen = Subtarget.getVectorLength();
1270   unsigned Idx = IdxV.getNode()->getAsZExtVal();
1271   MVT ElemTy = VecTy.getVectorElementType();
1272   unsigned ElemWidth = ElemTy.getSizeInBits();
1273 
1274   // If the source vector is a vector pair, get the single vector containing
1275   // the subvector of interest. The subvector will never overlap two single
1276   // vectors.
1277   if (isHvxPairTy(VecTy)) {
1278     unsigned SubIdx = Hexagon::vsub_lo;
1279     if (Idx * ElemWidth >= 8 * HwLen) {
1280       SubIdx = Hexagon::vsub_hi;
1281       Idx -= VecTy.getVectorNumElements() / 2;
1282     }
1283 
1284     VecTy = typeSplit(VecTy).first;
1285     VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1286     if (VecTy == ResTy)
1287       return VecV;
1288   }
1289 
1290   // The only meaningful subvectors of a single HVX vector are those that
1291   // fit in a scalar register.
1292   assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1293 
1294   MVT WordTy = tyVector(VecTy, MVT::i32);
1295   SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1296   unsigned WordIdx = (Idx*ElemWidth) / 32;
1297 
1298   SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1299   SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1300   if (ResTy.getSizeInBits() == 32)
1301     return DAG.getBitcast(ResTy, W0);
1302 
1303   SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1304   SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1305   SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1306   return DAG.getBitcast(ResTy, WW);
1307 }
1308 
1309 SDValue
extractHvxSubvectorPred(SDValue VecV,SDValue IdxV,const SDLoc & dl,MVT ResTy,SelectionDAG & DAG) const1310 HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1311       const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1312   MVT VecTy = ty(VecV);
1313   unsigned HwLen = Subtarget.getVectorLength();
1314   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1315   SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1316   // IdxV is required to be a constant.
1317   unsigned Idx = IdxV.getNode()->getAsZExtVal();
1318 
1319   unsigned ResLen = ResTy.getVectorNumElements();
1320   unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1321   unsigned Offset = Idx * BitBytes;
1322   SDValue Undef = DAG.getUNDEF(ByteTy);
1323   SmallVector<int,128> Mask;
1324 
1325   if (Subtarget.isHVXVectorType(ResTy, true)) {
1326     // Converting between two vector predicates. Since the result is shorter
1327     // than the source, it will correspond to a vector predicate with the
1328     // relevant bits replicated. The replication count is the ratio of the
1329     // source and target vector lengths.
1330     unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1331     assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1332     for (unsigned i = 0; i != HwLen/Rep; ++i) {
1333       for (unsigned j = 0; j != Rep; ++j)
1334         Mask.push_back(i + Offset);
1335     }
1336     SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1337     return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1338   }
1339 
1340   // Converting between a vector predicate and a scalar predicate. In the
1341   // vector predicate, a group of BitBytes bits will correspond to a single
1342   // i1 element of the source vector type. Those bits will all have the same
1343   // value. The same will be true for ByteVec, where each byte corresponds
1344   // to a bit in the vector predicate.
1345   // The algorithm is to traverse the ByteVec, going over the i1 values from
1346   // the source vector, and generate the corresponding representation in an
1347   // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1348   // elements so that the interesting 8 bytes will be in the low end of the
1349   // vector.
1350   unsigned Rep = 8 / ResLen;
1351   // Make sure the output fill the entire vector register, so repeat the
1352   // 8-byte groups as many times as necessary.
1353   for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1354     // This will generate the indexes of the 8 interesting bytes.
1355     for (unsigned i = 0; i != ResLen; ++i) {
1356       for (unsigned j = 0; j != Rep; ++j)
1357         Mask.push_back(Offset + i*BitBytes);
1358     }
1359   }
1360 
1361   SDValue Zero = getZero(dl, MVT::i32, DAG);
1362   SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1363   // Combine the two low words from ShuffV into a v8i8, and byte-compare
1364   // them against 0.
1365   SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1366   SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1367                            {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1368   SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1369   return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1370                   {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1371 }
1372 
1373 SDValue
insertHvxSubvectorReg(SDValue VecV,SDValue SubV,SDValue IdxV,const SDLoc & dl,SelectionDAG & DAG) const1374 HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1375       SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1376   MVT VecTy = ty(VecV);
1377   MVT SubTy = ty(SubV);
1378   unsigned HwLen = Subtarget.getVectorLength();
1379   MVT ElemTy = VecTy.getVectorElementType();
1380   unsigned ElemWidth = ElemTy.getSizeInBits();
1381 
1382   bool IsPair = isHvxPairTy(VecTy);
1383   MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1384   // The two single vectors that VecV consists of, if it's a pair.
1385   SDValue V0, V1;
1386   SDValue SingleV = VecV;
1387   SDValue PickHi;
1388 
1389   if (IsPair) {
1390     V0 = LoHalf(VecV, DAG);
1391     V1 = HiHalf(VecV, DAG);
1392 
1393     SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1394                                     dl, MVT::i32);
1395     PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1396     if (isHvxSingleTy(SubTy)) {
1397       if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1398         unsigned Idx = CN->getZExtValue();
1399         assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1400         unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1401         return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1402       }
1403       // If IdxV is not a constant, generate the two variants: with the
1404       // SubV as the high and as the low subregister, and select the right
1405       // pair based on the IdxV.
1406       SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1407       SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1408       return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1409     }
1410     // The subvector being inserted must be entirely contained in one of
1411     // the vectors V0 or V1. Set SingleV to the correct one, and update
1412     // IdxV to be the index relative to the beginning of that vector.
1413     SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1414     IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1415     SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1416   }
1417 
1418   // The only meaningful subvectors of a single HVX vector are those that
1419   // fit in a scalar register.
1420   assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1421   // Convert IdxV to be index in bytes.
1422   auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1423   if (!IdxN || !IdxN->isZero()) {
1424     IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1425                        DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1426     SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1427   }
1428   // When inserting a single word, the rotation back to the original position
1429   // would be by HwLen-Idx, but if two words are inserted, it will need to be
1430   // by (HwLen-4)-Idx.
1431   unsigned RolBase = HwLen;
1432   if (SubTy.getSizeInBits() == 32) {
1433     SDValue V = DAG.getBitcast(MVT::i32, SubV);
1434     SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1435   } else {
1436     SDValue V = DAG.getBitcast(MVT::i64, SubV);
1437     SDValue R0 = LoHalf(V, DAG);
1438     SDValue R1 = HiHalf(V, DAG);
1439     SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1440     SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1441                           DAG.getConstant(4, dl, MVT::i32));
1442     SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1443     RolBase = HwLen-4;
1444   }
1445   // If the vector wasn't ror'ed, don't ror it back.
1446   if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1447     SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1448                                DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1449     SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1450   }
1451 
1452   if (IsPair) {
1453     SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1454     SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1455     return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1456   }
1457   return SingleV;
1458 }
1459 
1460 SDValue
insertHvxSubvectorPred(SDValue VecV,SDValue SubV,SDValue IdxV,const SDLoc & dl,SelectionDAG & DAG) const1461 HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1462       SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1463   MVT VecTy = ty(VecV);
1464   MVT SubTy = ty(SubV);
1465   assert(Subtarget.isHVXVectorType(VecTy, true));
1466   // VecV is an HVX vector predicate. SubV may be either an HVX vector
1467   // predicate as well, or it can be a scalar predicate.
1468 
1469   unsigned VecLen = VecTy.getVectorNumElements();
1470   unsigned HwLen = Subtarget.getVectorLength();
1471   assert(HwLen % VecLen == 0 && "Unexpected vector type");
1472 
1473   unsigned Scale = VecLen / SubTy.getVectorNumElements();
1474   unsigned BitBytes = HwLen / VecLen;
1475   unsigned BlockLen = HwLen / Scale;
1476 
1477   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1478   SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1479   SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1480   SDValue ByteIdx;
1481 
1482   auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1483   if (!IdxN || !IdxN->isZero()) {
1484     ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1485                           DAG.getConstant(BitBytes, dl, MVT::i32));
1486     ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1487   }
1488 
1489   // ByteVec is the target vector VecV rotated in such a way that the
1490   // subvector should be inserted at index 0. Generate a predicate mask
1491   // and use vmux to do the insertion.
1492   assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1493   MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1494   SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1495                        {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1496   ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1497   // Rotate ByteVec back, and convert to a vector predicate.
1498   if (!IdxN || !IdxN->isZero()) {
1499     SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1500     SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1501     ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1502   }
1503   return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1504 }
1505 
1506 SDValue
extendHvxVectorPred(SDValue VecV,const SDLoc & dl,MVT ResTy,bool ZeroExt,SelectionDAG & DAG) const1507 HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1508       MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1509   // Sign- and any-extending of a vector predicate to a vector register is
1510   // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1511   // a vector of 1s (where the 1s are of type matching the vector type).
1512   assert(Subtarget.isHVXVectorType(ResTy));
1513   if (!ZeroExt)
1514     return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1515 
1516   assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1517   SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1518                              DAG.getConstant(1, dl, MVT::i32));
1519   SDValue False = getZero(dl, ResTy, DAG);
1520   return DAG.getSelect(dl, ResTy, VecV, True, False);
1521 }
1522 
1523 SDValue
compressHvxPred(SDValue VecQ,const SDLoc & dl,MVT ResTy,SelectionDAG & DAG) const1524 HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1525       MVT ResTy, SelectionDAG &DAG) const {
1526   // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1527   // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1528   // vector register. The remaining bits of the vector register are
1529   // unspecified.
1530 
1531   MachineFunction &MF = DAG.getMachineFunction();
1532   unsigned HwLen = Subtarget.getVectorLength();
1533   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1534   MVT PredTy = ty(VecQ);
1535   unsigned PredLen = PredTy.getVectorNumElements();
1536   assert(HwLen % PredLen == 0);
1537   MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1538 
1539   Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1540   SmallVector<Constant*, 128> Tmp;
1541   // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1542   // These are bytes with the LSB rotated left with respect to their index.
1543   for (unsigned i = 0; i != HwLen/8; ++i) {
1544     for (unsigned j = 0; j != 8; ++j)
1545       Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1546   }
1547   Constant *CV = ConstantVector::get(Tmp);
1548   Align Alignment(HwLen);
1549   SDValue CP =
1550       LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1551   SDValue Bytes =
1552       DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1553                   MachinePointerInfo::getConstantPool(MF), Alignment);
1554 
1555   // Select the bytes that correspond to true bits in the vector predicate.
1556   SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1557       getZero(dl, VecTy, DAG));
1558   // Calculate the OR of all bytes in each group of 8. That will compress
1559   // all the individual bits into a single byte.
1560   // First, OR groups of 4, via vrmpy with 0x01010101.
1561   SDValue All1 =
1562       DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1563   SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1564   // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1565   SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1566       {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1567   SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1568 
1569   // Pick every 8th byte and coalesce them at the beginning of the output.
1570   // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1571   // byte and so on.
1572   SmallVector<int,128> Mask;
1573   for (unsigned i = 0; i != HwLen; ++i)
1574     Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1575   SDValue Collect =
1576       DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1577   return DAG.getBitcast(ResTy, Collect);
1578 }
1579 
1580 SDValue
resizeToWidth(SDValue VecV,MVT ResTy,bool Signed,const SDLoc & dl,SelectionDAG & DAG) const1581 HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1582                                      const SDLoc &dl, SelectionDAG &DAG) const {
1583   // Take a vector and resize the element type to match the given type.
1584   MVT InpTy = ty(VecV);
1585   if (InpTy == ResTy)
1586     return VecV;
1587 
1588   unsigned InpWidth = InpTy.getSizeInBits();
1589   unsigned ResWidth = ResTy.getSizeInBits();
1590 
1591   if (InpTy.isFloatingPoint()) {
1592     return InpWidth < ResWidth
1593                ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1594                : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1595                              DAG.getTargetConstant(0, dl, MVT::i32));
1596   }
1597 
1598   assert(InpTy.isInteger());
1599 
1600   if (InpWidth < ResWidth) {
1601     unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1602     return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1603   } else {
1604     unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1605     return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1606   }
1607 }
1608 
1609 SDValue
extractSubvector(SDValue Vec,MVT SubTy,unsigned SubIdx,SelectionDAG & DAG) const1610 HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1611       SelectionDAG &DAG) const {
1612   assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1613 
1614   const SDLoc &dl(Vec);
1615   unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1616   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1617                      {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1618 }
1619 
1620 SDValue
LowerHvxBuildVector(SDValue Op,SelectionDAG & DAG) const1621 HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1622       const {
1623   const SDLoc &dl(Op);
1624   MVT VecTy = ty(Op);
1625 
1626   unsigned Size = Op.getNumOperands();
1627   SmallVector<SDValue,128> Ops;
1628   for (unsigned i = 0; i != Size; ++i)
1629     Ops.push_back(Op.getOperand(i));
1630 
1631   if (VecTy.getVectorElementType() == MVT::i1)
1632     return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1633 
1634   // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1635   // not a legal type, just bitcast the node to use i16
1636   // types and bitcast the result back to f16
1637   if (VecTy.getVectorElementType() == MVT::f16) {
1638     SmallVector<SDValue,64> NewOps;
1639     for (unsigned i = 0; i != Size; i++)
1640       NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1641 
1642     SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
1643         tyVector(VecTy, MVT::i16), NewOps);
1644     return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1645   }
1646 
1647   // First, split the BUILD_VECTOR for vector pairs. We could generate
1648   // some pairs directly (via splat), but splats should be generated
1649   // by the combiner prior to getting here.
1650   if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1651     ArrayRef<SDValue> A(Ops);
1652     MVT SingleTy = typeSplit(VecTy).first;
1653     SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG);
1654     SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG);
1655     return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1656   }
1657 
1658   return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1659 }
1660 
1661 SDValue
LowerHvxSplatVector(SDValue Op,SelectionDAG & DAG) const1662 HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1663       const {
1664   const SDLoc &dl(Op);
1665   MVT VecTy = ty(Op);
1666   MVT ArgTy = ty(Op.getOperand(0));
1667 
1668   if (ArgTy == MVT::f16) {
1669     MVT SplatTy =  MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1670     SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1671     SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1672     SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1673     return DAG.getBitcast(VecTy, Splat);
1674   }
1675 
1676   return SDValue();
1677 }
1678 
1679 SDValue
LowerHvxConcatVectors(SDValue Op,SelectionDAG & DAG) const1680 HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1681       const {
1682   // Vector concatenation of two integer (non-bool) vectors does not need
1683   // special lowering. Custom-lower concats of bool vectors and expand
1684   // concats of more than 2 vectors.
1685   MVT VecTy = ty(Op);
1686   const SDLoc &dl(Op);
1687   unsigned NumOp = Op.getNumOperands();
1688   if (VecTy.getVectorElementType() != MVT::i1) {
1689     if (NumOp == 2)
1690       return Op;
1691     // Expand the other cases into a build-vector.
1692     SmallVector<SDValue,8> Elems;
1693     for (SDValue V : Op.getNode()->ops())
1694       DAG.ExtractVectorElements(V, Elems);
1695     // A vector of i16 will be broken up into a build_vector of i16's.
1696     // This is a problem, since at the time of operation legalization,
1697     // all operations are expected to be type-legalized, and i16 is not
1698     // a legal type. If any of the extracted elements is not of a valid
1699     // type, sign-extend it to a valid one.
1700     for (SDValue &V : Elems) {
1701       MVT Ty = ty(V);
1702       if (!isTypeLegal(Ty)) {
1703         MVT NTy = typeLegalize(Ty, DAG);
1704         if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1705           V = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1706                           DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1707                                       V.getOperand(0), V.getOperand(1)),
1708                           DAG.getValueType(Ty));
1709           continue;
1710         }
1711         // A few less complicated cases.
1712         switch (V.getOpcode()) {
1713           case ISD::Constant:
1714             V = DAG.getSExtOrTrunc(V, dl, NTy);
1715             break;
1716           case ISD::UNDEF:
1717             V = DAG.getUNDEF(NTy);
1718             break;
1719           case ISD::TRUNCATE:
1720             V = V.getOperand(0);
1721             break;
1722           default:
1723             llvm_unreachable("Unexpected vector element");
1724         }
1725       }
1726     }
1727     return DAG.getBuildVector(VecTy, dl, Elems);
1728   }
1729 
1730   assert(VecTy.getVectorElementType() == MVT::i1);
1731   unsigned HwLen = Subtarget.getVectorLength();
1732   assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1733 
1734   SDValue Op0 = Op.getOperand(0);
1735 
1736   // If the operands are HVX types (i.e. not scalar predicates), then
1737   // defer the concatenation, and create QCAT instead.
1738   if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1739     if (NumOp == 2)
1740       return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1741 
1742     ArrayRef<SDUse> U(Op.getNode()->ops());
1743     SmallVector<SDValue, 4> SV(U);
1744     ArrayRef<SDValue> Ops(SV);
1745 
1746     MVT HalfTy = typeSplit(VecTy).first;
1747     SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1748                              Ops.take_front(NumOp/2));
1749     SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1750                              Ops.take_back(NumOp/2));
1751     return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1752   }
1753 
1754   // Count how many bytes (in a vector register) each bit in VecTy
1755   // corresponds to.
1756   unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1757 
1758   SmallVector<SDValue,8> Prefixes;
1759   for (SDValue V : Op.getNode()->op_values()) {
1760     SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1761     Prefixes.push_back(P);
1762   }
1763 
1764   unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1765   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1766   SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1767   SDValue Res = getZero(dl, ByteTy, DAG);
1768   for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1769     Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1770     Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1771   }
1772   return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1773 }
1774 
1775 SDValue
LowerHvxExtractElement(SDValue Op,SelectionDAG & DAG) const1776 HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1777       const {
1778   // Change the type of the extracted element to i32.
1779   SDValue VecV = Op.getOperand(0);
1780   MVT ElemTy = ty(VecV).getVectorElementType();
1781   const SDLoc &dl(Op);
1782   SDValue IdxV = Op.getOperand(1);
1783   if (ElemTy == MVT::i1)
1784     return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1785 
1786   return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1787 }
1788 
1789 SDValue
LowerHvxInsertElement(SDValue Op,SelectionDAG & DAG) const1790 HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1791       const {
1792   const SDLoc &dl(Op);
1793   MVT VecTy = ty(Op);
1794   SDValue VecV = Op.getOperand(0);
1795   SDValue ValV = Op.getOperand(1);
1796   SDValue IdxV = Op.getOperand(2);
1797   MVT ElemTy = ty(VecV).getVectorElementType();
1798   if (ElemTy == MVT::i1)
1799     return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1800 
1801   if (ElemTy == MVT::f16) {
1802     SDValue T0 = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
1803         tyVector(VecTy, MVT::i16),
1804         DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1805         DAG.getBitcast(MVT::i16, ValV), IdxV);
1806     return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1807   }
1808 
1809   return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1810 }
1811 
1812 SDValue
LowerHvxExtractSubvector(SDValue Op,SelectionDAG & DAG) const1813 HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1814       const {
1815   SDValue SrcV = Op.getOperand(0);
1816   MVT SrcTy = ty(SrcV);
1817   MVT DstTy = ty(Op);
1818   SDValue IdxV = Op.getOperand(1);
1819   unsigned Idx = IdxV.getNode()->getAsZExtVal();
1820   assert(Idx % DstTy.getVectorNumElements() == 0);
1821   (void)Idx;
1822   const SDLoc &dl(Op);
1823 
1824   MVT ElemTy = SrcTy.getVectorElementType();
1825   if (ElemTy == MVT::i1)
1826     return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1827 
1828   return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1829 }
1830 
1831 SDValue
LowerHvxInsertSubvector(SDValue Op,SelectionDAG & DAG) const1832 HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1833       const {
1834   // Idx does not need to be a constant.
1835   SDValue VecV = Op.getOperand(0);
1836   SDValue ValV = Op.getOperand(1);
1837   SDValue IdxV = Op.getOperand(2);
1838 
1839   const SDLoc &dl(Op);
1840   MVT VecTy = ty(VecV);
1841   MVT ElemTy = VecTy.getVectorElementType();
1842   if (ElemTy == MVT::i1)
1843     return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1844 
1845   return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1846 }
1847 
1848 SDValue
LowerHvxAnyExt(SDValue Op,SelectionDAG & DAG) const1849 HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1850   // Lower any-extends of boolean vectors to sign-extends, since they
1851   // translate directly to Q2V. Zero-extending could also be done equally
1852   // fast, but Q2V is used/recognized in more places.
1853   // For all other vectors, use zero-extend.
1854   MVT ResTy = ty(Op);
1855   SDValue InpV = Op.getOperand(0);
1856   MVT ElemTy = ty(InpV).getVectorElementType();
1857   if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1858     return LowerHvxSignExt(Op, DAG);
1859   return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1860 }
1861 
1862 SDValue
LowerHvxSignExt(SDValue Op,SelectionDAG & DAG) const1863 HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1864   MVT ResTy = ty(Op);
1865   SDValue InpV = Op.getOperand(0);
1866   MVT ElemTy = ty(InpV).getVectorElementType();
1867   if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1868     return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1869   return Op;
1870 }
1871 
1872 SDValue
LowerHvxZeroExt(SDValue Op,SelectionDAG & DAG) const1873 HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1874   MVT ResTy = ty(Op);
1875   SDValue InpV = Op.getOperand(0);
1876   MVT ElemTy = ty(InpV).getVectorElementType();
1877   if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1878     return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1879   return Op;
1880 }
1881 
1882 SDValue
LowerHvxCttz(SDValue Op,SelectionDAG & DAG) const1883 HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1884   // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1885   // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1886   const SDLoc &dl(Op);
1887   MVT ResTy = ty(Op);
1888   SDValue InpV = Op.getOperand(0);
1889   assert(ResTy == ty(InpV));
1890 
1891   // Calculate the vectors of 1 and bitwidth(x).
1892   MVT ElemTy = ty(InpV).getVectorElementType();
1893   unsigned ElemWidth = ElemTy.getSizeInBits();
1894 
1895   SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1896                              DAG.getConstant(1, dl, MVT::i32));
1897   SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1898                              DAG.getConstant(ElemWidth, dl, MVT::i32));
1899   SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1900                               DAG.getAllOnesConstant(dl, MVT::i32));
1901 
1902   // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1903   // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1904   // it separately in custom combine or selection).
1905   SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1906                           {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1907                            DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1908   return DAG.getNode(ISD::SUB, dl, ResTy,
1909                      {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1910 }
1911 
1912 SDValue
LowerHvxMulh(SDValue Op,SelectionDAG & DAG) const1913 HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1914   const SDLoc &dl(Op);
1915   MVT ResTy = ty(Op);
1916   assert(ResTy.getVectorElementType() == MVT::i32);
1917 
1918   SDValue Vs = Op.getOperand(0);
1919   SDValue Vt = Op.getOperand(1);
1920 
1921   SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
1922   unsigned Opc = Op.getOpcode();
1923 
1924   // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1925   if (Opc == ISD::MULHU)
1926     return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1927   if (Opc == ISD::MULHS)
1928     return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1929 
1930 #ifndef NDEBUG
1931   Op.dump(&DAG);
1932 #endif
1933   llvm_unreachable("Unexpected mulh operation");
1934 }
1935 
1936 SDValue
LowerHvxMulLoHi(SDValue Op,SelectionDAG & DAG) const1937 HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
1938   const SDLoc &dl(Op);
1939   unsigned Opc = Op.getOpcode();
1940   SDValue Vu = Op.getOperand(0);
1941   SDValue Vv = Op.getOperand(1);
1942 
1943   // If the HI part is not used, convert it to a regular MUL.
1944   if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
1945     // Need to preserve the types and the number of values.
1946     SDValue Hi = DAG.getUNDEF(ty(HiVal));
1947     SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
1948     return DAG.getMergeValues({Lo, Hi}, dl);
1949   }
1950 
1951   bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
1952   bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
1953 
1954   // Legal on HVX v62+, but lower it here because patterns can't handle multi-
1955   // valued nodes.
1956   if (Subtarget.useHVXV62Ops())
1957     return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1958 
1959   if (Opc == HexagonISD::SMUL_LOHI) {
1960     // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
1961     // for other signedness LOHI is cheaper.
1962     if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
1963       SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
1964       SDValue Lo = DAG.getUNDEF(ty(LoVal));
1965       return DAG.getMergeValues({Lo, Hi}, dl);
1966     }
1967   }
1968 
1969   return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1970 }
1971 
1972 SDValue
LowerHvxBitcast(SDValue Op,SelectionDAG & DAG) const1973 HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
1974   SDValue Val = Op.getOperand(0);
1975   MVT ResTy = ty(Op);
1976   MVT ValTy = ty(Val);
1977   const SDLoc &dl(Op);
1978 
1979   if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
1980     unsigned HwLen = Subtarget.getVectorLength();
1981     MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
1982     SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
1983     unsigned BitWidth = ResTy.getSizeInBits();
1984 
1985     if (BitWidth < 64) {
1986       SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
1987           dl, MVT::i32, DAG);
1988       if (BitWidth == 32)
1989         return W0;
1990       assert(BitWidth < 32u);
1991       return DAG.getZExtOrTrunc(W0, dl, ResTy);
1992     }
1993 
1994     // The result is >= 64 bits. The only options are 64 or 128.
1995     assert(BitWidth == 64 || BitWidth == 128);
1996     SmallVector<SDValue,4> Words;
1997     for (unsigned i = 0; i != BitWidth/32; ++i) {
1998       SDValue W = extractHvxElementReg(
1999           VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
2000       Words.push_back(W);
2001     }
2002     SmallVector<SDValue,2> Combines;
2003     assert(Words.size() % 2 == 0);
2004     for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2005       SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
2006       Combines.push_back(C);
2007     }
2008 
2009     if (BitWidth == 64)
2010       return Combines[0];
2011 
2012     return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2013   }
2014 
2015   // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2016   // Splat the input into a 32-element i32 vector, then AND each element
2017   // with a unique bitmask to isolate individual bits.
2018   if (ResTy == MVT::v32i1 &&
2019       (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2020       Subtarget.useHVX128BOps()) {
2021     SDValue Val32 = Val;
2022     if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2023       Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2024 
2025     MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2026     SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2027     SmallVector<SDValue, 32> Mask;
2028     for (unsigned i = 0; i < 32; ++i)
2029       Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32));
2030 
2031     SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2032     SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2033     return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded);
2034   }
2035 
2036   if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2037     // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2038     unsigned BitWidth = ValTy.getSizeInBits();
2039     unsigned HwLen = Subtarget.getVectorLength();
2040     assert(BitWidth == HwLen);
2041 
2042     MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2043     SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2044     // Splat each byte of Val 8 times.
2045     // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2046     // where b0, b1,..., b15 are least to most significant bytes of I.
2047     SmallVector<SDValue, 128> Bytes;
2048     // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2049     // These are bytes with the LSB rotated left with respect to their index.
2050     SmallVector<SDValue, 128> Tmp;
2051     for (unsigned I = 0; I != HwLen / 8; ++I) {
2052       SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2053       SDValue Byte =
2054           DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2055       for (unsigned J = 0; J != 8; ++J) {
2056         Bytes.push_back(Byte);
2057         Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2058       }
2059     }
2060 
2061     MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2062     SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2063     SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2064 
2065     // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2066     I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2067     return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2068   }
2069 
2070   return Op;
2071 }
2072 
2073 SDValue
LowerHvxExtend(SDValue Op,SelectionDAG & DAG) const2074 HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2075   // Sign- and zero-extends are legal.
2076   assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2077   return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2078                      Op.getOperand(0));
2079 }
2080 
2081 SDValue
LowerHvxSelect(SDValue Op,SelectionDAG & DAG) const2082 HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2083   MVT ResTy = ty(Op);
2084   if (ResTy.getVectorElementType() != MVT::i1)
2085     return Op;
2086 
2087   const SDLoc &dl(Op);
2088   unsigned HwLen = Subtarget.getVectorLength();
2089   unsigned VecLen = ResTy.getVectorNumElements();
2090   assert(HwLen % VecLen == 0);
2091   unsigned ElemSize = HwLen / VecLen;
2092 
2093   MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2094   SDValue S =
2095       DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2096                   DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2097                   DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2098   return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2099 }
2100 
2101 SDValue
LowerHvxShift(SDValue Op,SelectionDAG & DAG) const2102 HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2103   if (SDValue S = getVectorShiftByInt(Op, DAG))
2104     return S;
2105   return Op;
2106 }
2107 
2108 SDValue
LowerHvxFunnelShift(SDValue Op,SelectionDAG & DAG) const2109 HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2110                                            SelectionDAG &DAG) const {
2111   unsigned Opc = Op.getOpcode();
2112   assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2113 
2114   // Make sure the shift amount is within the range of the bitwidth
2115   // of the element type.
2116   SDValue A = Op.getOperand(0);
2117   SDValue B = Op.getOperand(1);
2118   SDValue S = Op.getOperand(2);
2119 
2120   MVT InpTy = ty(A);
2121   MVT ElemTy = InpTy.getVectorElementType();
2122 
2123   const SDLoc &dl(Op);
2124   unsigned ElemWidth = ElemTy.getSizeInBits();
2125   bool IsLeft = Opc == ISD::FSHL;
2126 
2127   // The expansion into regular shifts produces worse code for i8 and for
2128   // right shift of i32 on v65+.
2129   bool UseShifts = ElemTy != MVT::i8;
2130   if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2131     UseShifts = false;
2132 
2133   if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2134     // If this is a funnel shift by a scalar, lower it into regular shifts.
2135     SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2136     SDValue ModS =
2137         DAG.getNode(ISD::AND, dl, MVT::i32,
2138                     {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2139     SDValue NegS =
2140         DAG.getNode(ISD::SUB, dl, MVT::i32,
2141                     {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2142     SDValue IsZero =
2143         DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2144     // FSHL A, B  =>  A <<  | B >>n
2145     // FSHR A, B  =>  A <<n | B >>
2146     SDValue Part1 =
2147         DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2148     SDValue Part2 =
2149         DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2150     SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2151     // If the shift amount was 0, pick A or B, depending on the direction.
2152     // The opposite shift will also be by 0, so the "Or" will be incorrect.
2153     return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2154   }
2155 
2156   SDValue Mask = DAG.getSplatBuildVector(
2157       InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2158 
2159   unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2160   return DAG.getNode(MOpc, dl, ty(Op),
2161                      {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2162 }
2163 
2164 SDValue
LowerHvxIntrinsic(SDValue Op,SelectionDAG & DAG) const2165 HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2166   const SDLoc &dl(Op);
2167   unsigned IntNo = Op.getConstantOperandVal(0);
2168   SmallVector<SDValue> Ops(Op->ops());
2169 
2170   auto Swap = [&](SDValue P) {
2171     return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2172   };
2173 
2174   switch (IntNo) {
2175   case Intrinsic::hexagon_V6_pred_typecast:
2176   case Intrinsic::hexagon_V6_pred_typecast_128B: {
2177     MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2178     if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2179       if (ResTy == InpTy)
2180         return Ops[1];
2181       return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2182     }
2183     break;
2184   }
2185   case Intrinsic::hexagon_V6_vmpyss_parts:
2186   case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2187     return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2188                             {Ops[1], Ops[2]}));
2189   case Intrinsic::hexagon_V6_vmpyuu_parts:
2190   case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2191     return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2192                             {Ops[1], Ops[2]}));
2193   case Intrinsic::hexagon_V6_vmpyus_parts:
2194   case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2195     return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2196                             {Ops[1], Ops[2]}));
2197   }
2198   } // switch
2199 
2200   return Op;
2201 }
2202 
2203 SDValue
LowerHvxMaskedOp(SDValue Op,SelectionDAG & DAG) const2204 HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2205   const SDLoc &dl(Op);
2206   unsigned HwLen = Subtarget.getVectorLength();
2207   MachineFunction &MF = DAG.getMachineFunction();
2208   auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2209   SDValue Mask = MaskN->getMask();
2210   SDValue Chain = MaskN->getChain();
2211   SDValue Base = MaskN->getBasePtr();
2212   auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2213 
2214   unsigned Opc = Op->getOpcode();
2215   assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2216 
2217   if (Opc == ISD::MLOAD) {
2218     MVT ValTy = ty(Op);
2219     SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2220     SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2221     if (isUndef(Thru))
2222       return Load;
2223     SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2224     return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2225   }
2226 
2227   // MSTORE
2228   // HVX only has aligned masked stores.
2229 
2230   // TODO: Fold negations of the mask into the store.
2231   unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2232   SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2233   SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2234 
2235   if (MaskN->getAlign().value() % HwLen == 0) {
2236     SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2237                              {Mask, Base, Offset0, Value, Chain}, DAG);
2238     DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2239     return Store;
2240   }
2241 
2242   // Unaligned case.
2243   auto StoreAlign = [&](SDValue V, SDValue A) {
2244     SDValue Z = getZero(dl, ty(V), DAG);
2245     // TODO: use funnel shifts?
2246     // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2247     // upper half.
2248     SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2249     SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2250     return std::make_pair(LoV, HiV);
2251   };
2252 
2253   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2254   MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2255   SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2256   VectorPair Tmp = StoreAlign(MaskV, Base);
2257   VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2258                       DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2259   VectorPair ValueU = StoreAlign(Value, Base);
2260 
2261   SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2262   SDValue StoreLo =
2263       getInstr(StoreOpc, dl, MVT::Other,
2264                {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2265   SDValue StoreHi =
2266       getInstr(StoreOpc, dl, MVT::Other,
2267                {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2268   DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2269   DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2270   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2271 }
2272 
LowerHvxFpExtend(SDValue Op,SelectionDAG & DAG) const2273 SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2274                                                 SelectionDAG &DAG) const {
2275   // This conversion only applies to QFloat. IEEE extension from f16 to f32
2276   // is legal (done via a pattern).
2277   assert(Subtarget.useHVXQFloatOps());
2278 
2279   assert(Op->getOpcode() == ISD::FP_EXTEND);
2280 
2281   MVT VecTy = ty(Op);
2282   MVT ArgTy = ty(Op.getOperand(0));
2283   const SDLoc &dl(Op);
2284   assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2285 
2286   SDValue F16Vec = Op.getOperand(0);
2287 
2288   APFloat FloatVal = APFloat(1.0f);
2289   bool Ignored;
2290   FloatVal.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
2291   SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2292   SDValue VmpyVec =
2293       getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2294 
2295   MVT HalfTy = typeSplit(VecTy).first;
2296   VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2297   SDValue LoVec =
2298       getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2299   SDValue HiVec =
2300       getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2301 
2302   SDValue ShuffVec =
2303       getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2304                {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2305 
2306   return ShuffVec;
2307 }
2308 
2309 SDValue
LowerHvxFpToInt(SDValue Op,SelectionDAG & DAG) const2310 HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2311   // Catch invalid conversion ops (just in case).
2312   assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2313          Op.getOpcode() == ISD::FP_TO_UINT);
2314 
2315   MVT ResTy = ty(Op);
2316   MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2317   MVT IntTy = ResTy.getVectorElementType();
2318 
2319   if (Subtarget.useHVXIEEEFPOps()) {
2320     // There are only conversions from f16.
2321     if (FpTy == MVT::f16) {
2322       // Other int types aren't legal in HVX, so we shouldn't see them here.
2323       assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2324       // Conversions to i8 and i16 are legal.
2325       if (IntTy == MVT::i8 || IntTy == MVT::i16)
2326         return Op;
2327     }
2328   }
2329 
2330   if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2331     return EqualizeFpIntConversion(Op, DAG);
2332 
2333   return ExpandHvxFpToInt(Op, DAG);
2334 }
2335 
2336 SDValue
LowerHvxIntToFp(SDValue Op,SelectionDAG & DAG) const2337 HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2338   // Catch invalid conversion ops (just in case).
2339   assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2340          Op.getOpcode() == ISD::UINT_TO_FP);
2341 
2342   MVT ResTy = ty(Op);
2343   MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2344   MVT FpTy = ResTy.getVectorElementType();
2345 
2346   if (Subtarget.useHVXIEEEFPOps()) {
2347     // There are only conversions to f16.
2348     if (FpTy == MVT::f16) {
2349       // Other int types aren't legal in HVX, so we shouldn't see them here.
2350       assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2351       // i8, i16 -> f16 is legal.
2352       if (IntTy == MVT::i8 || IntTy == MVT::i16)
2353         return Op;
2354     }
2355   }
2356 
2357   if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2358     return EqualizeFpIntConversion(Op, DAG);
2359 
2360   return ExpandHvxIntToFp(Op, DAG);
2361 }
2362 
2363 HexagonTargetLowering::TypePair
typeExtendToWider(MVT Ty0,MVT Ty1) const2364 HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2365   // Compare the widths of elements of the two types, and extend the narrower
2366   // type to match the with of the wider type. For vector types, apply this
2367   // to the element type.
2368   assert(Ty0.isVector() == Ty1.isVector());
2369 
2370   MVT ElemTy0 = Ty0.getScalarType();
2371   MVT ElemTy1 = Ty1.getScalarType();
2372 
2373   unsigned Width0 = ElemTy0.getSizeInBits();
2374   unsigned Width1 = ElemTy1.getSizeInBits();
2375   unsigned MaxWidth = std::max(Width0, Width1);
2376 
2377   auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2378     if (ScalarTy.isInteger())
2379       return MVT::getIntegerVT(Width);
2380     assert(ScalarTy.isFloatingPoint());
2381     return MVT::getFloatingPointVT(Width);
2382   };
2383 
2384   MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2385   MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2386 
2387   if (!Ty0.isVector()) {
2388     // Both types are scalars.
2389     return {WideETy0, WideETy1};
2390   }
2391 
2392   // Vector types.
2393   unsigned NumElem = Ty0.getVectorNumElements();
2394   assert(NumElem == Ty1.getVectorNumElements());
2395 
2396   return {MVT::getVectorVT(WideETy0, NumElem),
2397           MVT::getVectorVT(WideETy1, NumElem)};
2398 }
2399 
2400 HexagonTargetLowering::TypePair
typeWidenToWider(MVT Ty0,MVT Ty1) const2401 HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2402   // Compare the numbers of elements of two vector types, and widen the
2403   // narrower one to match the number of elements in the wider one.
2404   assert(Ty0.isVector() && Ty1.isVector());
2405 
2406   unsigned Len0 = Ty0.getVectorNumElements();
2407   unsigned Len1 = Ty1.getVectorNumElements();
2408   if (Len0 == Len1)
2409     return {Ty0, Ty1};
2410 
2411   unsigned MaxLen = std::max(Len0, Len1);
2412   return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2413           MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2414 }
2415 
2416 MVT
typeLegalize(MVT Ty,SelectionDAG & DAG) const2417 HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2418   EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2419   assert(LegalTy.isSimple());
2420   return LegalTy.getSimpleVT();
2421 }
2422 
2423 MVT
typeWidenToHvx(MVT Ty) const2424 HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2425   unsigned HwWidth = 8 * Subtarget.getVectorLength();
2426   assert(Ty.getSizeInBits() <= HwWidth);
2427   if (Ty.getSizeInBits() == HwWidth)
2428     return Ty;
2429 
2430   MVT ElemTy = Ty.getScalarType();
2431   return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2432 }
2433 
2434 HexagonTargetLowering::VectorPair
emitHvxAddWithOverflow(SDValue A,SDValue B,const SDLoc & dl,bool Signed,SelectionDAG & DAG) const2435 HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2436       const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2437   // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2438   // whether an overflow has occurred.
2439   MVT ResTy = ty(A);
2440   assert(ResTy == ty(B));
2441   MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2442 
2443   if (!Signed) {
2444     // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2445     // save any instructions.
2446     SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2447     SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2448     return {Add, Ovf};
2449   }
2450 
2451   // Signed overflow has happened, if:
2452   // (A, B have the same sign) and (A+B has a different sign from either)
2453   // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2454   SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2455   SDValue NotA =
2456       DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2457   SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2458   SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2459   SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2460   SDValue MSB =
2461       DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2462   return {Add, MSB};
2463 }
2464 
2465 HexagonTargetLowering::VectorPair
emitHvxShiftRightRnd(SDValue Val,unsigned Amt,bool Signed,SelectionDAG & DAG) const2466 HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2467       bool Signed, SelectionDAG &DAG) const {
2468   // Shift Val right by Amt bits, round the result to the nearest integer,
2469   // tie-break by rounding halves to even integer.
2470 
2471   const SDLoc &dl(Val);
2472   MVT ValTy = ty(Val);
2473 
2474   // This should also work for signed integers.
2475   //
2476   //   uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2477   //   bool ovf = (inp > tmp0);
2478   //   uint rup = inp & (1 << (Amt+1));
2479   //
2480   //   uint tmp1 = inp >> (Amt-1);    // tmp1 == tmp2 iff
2481   //   uint tmp2 = tmp0 >> (Amt-1);   // the Amt-1 lower bits were all 0
2482   //   uint tmp3 = tmp2 + rup;
2483   //   uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2484   unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2485   MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2486   MVT IntTy = tyVector(ValTy, ElemTy);
2487   MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2488   unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2489 
2490   SDValue Inp = DAG.getBitcast(IntTy, Val);
2491   SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2492 
2493   SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2494   SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2495   SDValue Zero = getZero(dl, IntTy, DAG);
2496   SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2497   SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2498   auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2499 
2500   SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2501   SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2502   SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2503   SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2504 
2505   SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2506   SDValue One = DAG.getConstant(1, dl, IntTy);
2507   SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2508   SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2509   SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2510   return {Mux, Ovf};
2511 }
2512 
2513 SDValue
emitHvxMulHsV60(SDValue A,SDValue B,const SDLoc & dl,SelectionDAG & DAG) const2514 HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2515                                        SelectionDAG &DAG) const {
2516   MVT VecTy = ty(A);
2517   MVT PairTy = typeJoin({VecTy, VecTy});
2518   assert(VecTy.getVectorElementType() == MVT::i32);
2519 
2520   SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2521 
2522   // mulhs(A,B) =
2523   //   = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2524   //   = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2525   //      + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2526   //   = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2527   // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2528   // anything, so it cannot produce any carry over to higher bits),
2529   // so everything in [] can be shifted by 16 without loss of precision.
2530   //   = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2531   //   = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2532   // The final additions need to make sure to properly maintain any carry-
2533   // out bits.
2534   //
2535   //                Hi(B) Lo(B)
2536   //                Hi(A) Lo(A)
2537   //               --------------
2538   //                Lo(B)*Lo(A)  | T0 = V6_vmpyewuh(B,A) does this,
2539   //         Hi(B)*Lo(A)         |      + dropping the low 16 bits
2540   //         Hi(A)*Lo(B)   | T2
2541   //  Hi(B)*Hi(A)
2542 
2543   SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2544   // T1 = get Hi(A) into low halves.
2545   SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2546   // P0 = interleaved T1.h*B.uh (full precision product)
2547   SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2548   // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2549   SDValue T2 = LoHalf(P0, DAG);
2550   // We need to add T0+T2, recording the carry-out, which will be 1<<16
2551   // added to the final sum.
2552   // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2553   SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2554   // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2555   SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2556   // T3 = full-precision(T0+T2) >> 16
2557   // The low halves are added-unsigned, the high ones are added-signed.
2558   SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2559                         {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2560   SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2561   // P3 = interleaved Hi(B)*Hi(A) (full precision),
2562   // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2563   SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2564   SDValue T5 = LoHalf(P3, DAG);
2565   // Add:
2566   SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2567   return T6;
2568 }
2569 
2570 SDValue
emitHvxMulLoHiV60(SDValue A,bool SignedA,SDValue B,bool SignedB,const SDLoc & dl,SelectionDAG & DAG) const2571 HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2572                                          bool SignedB, const SDLoc &dl,
2573                                          SelectionDAG &DAG) const {
2574   MVT VecTy = ty(A);
2575   MVT PairTy = typeJoin({VecTy, VecTy});
2576   assert(VecTy.getVectorElementType() == MVT::i32);
2577 
2578   SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2579 
2580   if (SignedA && !SignedB) {
2581     // Make A:unsigned, B:signed.
2582     std::swap(A, B);
2583     std::swap(SignedA, SignedB);
2584   }
2585 
2586   // Do halfword-wise multiplications for unsigned*unsigned product, then
2587   // add corrections for signed and unsigned*signed.
2588 
2589   SDValue Lo, Hi;
2590 
2591   // P0:lo = (uu) products of low halves of A and B,
2592   // P0:hi = (uu) products of high halves.
2593   SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2594 
2595   // Swap low/high halves in B
2596   SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2597                         {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2598   SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2599   // P1 = products of even/odd halfwords.
2600   // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2601   // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2602   SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2603 
2604   // P2:lo = low halves of P1:lo + P1:hi,
2605   // P2:hi = high halves of P1:lo + P1:hi.
2606   SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2607                         {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2608   // Still need to add the high halves of P0:lo to P2:lo
2609   SDValue T2 =
2610       getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2611   SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2612 
2613   // The high halves of T3 will contribute to the HI part of LOHI.
2614   SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2615                         {HiHalf(P2, DAG), T3, S16}, DAG);
2616 
2617   // The low halves of P2 need to be added to high halves of the LO part.
2618   Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2619                 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2620   Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2621 
2622   if (SignedA) {
2623     assert(SignedB && "Signed A and unsigned B should have been inverted");
2624 
2625     MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2626     SDValue Zero = getZero(dl, VecTy, DAG);
2627     SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2628     SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2629     SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2630     SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2631     Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2632   } else if (SignedB) {
2633     // Same correction as for mulhus:
2634     // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2635     MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2636     SDValue Zero = getZero(dl, VecTy, DAG);
2637     SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2638     Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2639   } else {
2640     assert(!SignedA && !SignedB);
2641   }
2642 
2643   return DAG.getMergeValues({Lo, Hi}, dl);
2644 }
2645 
2646 SDValue
emitHvxMulLoHiV62(SDValue A,bool SignedA,SDValue B,bool SignedB,const SDLoc & dl,SelectionDAG & DAG) const2647 HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2648                                          SDValue B, bool SignedB,
2649                                          const SDLoc &dl,
2650                                          SelectionDAG &DAG) const {
2651   MVT VecTy = ty(A);
2652   MVT PairTy = typeJoin({VecTy, VecTy});
2653   assert(VecTy.getVectorElementType() == MVT::i32);
2654 
2655   if (SignedA && !SignedB) {
2656     // Make A:unsigned, B:signed.
2657     std::swap(A, B);
2658     std::swap(SignedA, SignedB);
2659   }
2660 
2661   // Do S*S first, then make corrections for U*S or U*U if needed.
2662   SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2663   SDValue P1 =
2664       getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2665   SDValue Lo = LoHalf(P1, DAG);
2666   SDValue Hi = HiHalf(P1, DAG);
2667 
2668   if (!SignedB) {
2669     assert(!SignedA && "Signed A and unsigned B should have been inverted");
2670     SDValue Zero = getZero(dl, VecTy, DAG);
2671     MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2672 
2673     // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2674     // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2675     //          (V6_vaddw (HiHalf (Muls64O $A, $B)),
2676     //                    (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2677     //                               (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2678     //                               $A))>;
2679     SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2680     SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2681     SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2682     SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2683     Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2684   } else if (!SignedA) {
2685     SDValue Zero = getZero(dl, VecTy, DAG);
2686     MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2687 
2688     // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2689     // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2690     //          (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2691     //                     (HiHalf (Muls64O $A, $B)),
2692     //                     $B)>;
2693     SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2694     Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2695   }
2696 
2697   return DAG.getMergeValues({Lo, Hi}, dl);
2698 }
2699 
2700 SDValue
EqualizeFpIntConversion(SDValue Op,SelectionDAG & DAG) const2701 HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2702       const {
2703   // Rewrite conversion between integer and floating-point in such a way that
2704   // the integer type is extended/narrowed to match the bitwidth of the
2705   // floating-point type, combined with additional integer-integer extensions
2706   // or narrowings to match the original input/result types.
2707   // E.g.  f32 -> i8  ==>  f32 -> i32 -> i8
2708   //
2709   // The input/result types are not required to be legal, but if they are
2710   // legal, this function should not introduce illegal types.
2711 
2712   unsigned Opc = Op.getOpcode();
2713   assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT ||
2714          Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
2715 
2716   SDValue Inp = Op.getOperand(0);
2717   MVT InpTy = ty(Inp);
2718   MVT ResTy = ty(Op);
2719 
2720   if (InpTy == ResTy)
2721     return Op;
2722 
2723   const SDLoc &dl(Op);
2724   bool Signed = Opc == ISD::FP_TO_SINT || Opc == ISD::SINT_TO_FP;
2725 
2726   auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2727   SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2728   SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2729   SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2730   return Res;
2731 }
2732 
2733 SDValue
ExpandHvxFpToInt(SDValue Op,SelectionDAG & DAG) const2734 HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2735   unsigned Opc = Op.getOpcode();
2736   assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT);
2737 
2738   const SDLoc &dl(Op);
2739   SDValue Op0 = Op.getOperand(0);
2740   MVT InpTy = ty(Op0);
2741   MVT ResTy = ty(Op);
2742   assert(InpTy.changeTypeToInteger() == ResTy);
2743 
2744   // int32_t conv_f32_to_i32(uint32_t inp) {
2745   //   // s | exp8 | frac23
2746   //
2747   //   int neg = (int32_t)inp < 0;
2748   //
2749   //   // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2750   //   // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2751   //   // produce a large positive "expm1", which will result in max u/int.
2752   //   // In all IEEE formats, bias is the largest positive number that can be
2753   //   // represented in bias-width bits (i.e. 011..1).
2754   //   int32_t expm1 = (inp << 1) - 0x80000000;
2755   //   expm1 >>= 24;
2756   //
2757   //   // Always insert the "implicit 1". Subnormal numbers will become 0
2758   //   // regardless.
2759   //   uint32_t frac = (inp << 8) | 0x80000000;
2760   //
2761   //   // "frac" is the fraction part represented as Q1.31. If it was
2762   //   // interpreted as uint32_t, it would be the fraction part multiplied
2763   //   // by 2^31.
2764   //
2765   //   // Calculate the amount of right shift, since shifting further to the
2766   //   // left would lose significant bits. Limit it to 32, because we want
2767   //   // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2768   //   // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2769   //   // left by 31). "rsh" can be negative.
2770   //   int32_t rsh = min(31 - (expm1 + 1), 32);
2771   //
2772   //   frac >>= rsh;   // rsh == 32 will produce 0
2773   //
2774   //   // Everything up to this point is the same for conversion to signed
2775   //   // unsigned integer.
2776   //
2777   //   if (neg)                 // Only for signed int
2778   //     frac = -frac;          //
2779   //   if (rsh <= 0 && neg)     //   bound = neg ? 0x80000000 : 0x7fffffff
2780   //     frac = 0x80000000;     //   frac = rsh <= 0 ? bound : frac
2781   //   if (rsh <= 0 && !neg)    //
2782   //     frac = 0x7fffffff;     //
2783   //
2784   //   if (neg)                 // Only for unsigned int
2785   //     frac = 0;              //
2786   //   if (rsh < 0 && !neg)     //   frac = rsh < 0 ? 0x7fffffff : frac;
2787   //     frac = 0x7fffffff;     //   frac = neg ? 0 : frac;
2788   //
2789   //   return frac;
2790   // }
2791 
2792   MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
2793 
2794   // Zero = V6_vd0();
2795   // Neg = V6_vgtw(Zero, Inp);
2796   // One = V6_lvsplatw(1);
2797   // M80 = V6_lvsplatw(0x80000000);
2798   // Exp00 = V6_vaslwv(Inp, One);
2799   // Exp01 = V6_vsubw(Exp00, M80);
2800   // ExpM1 = V6_vasrw(Exp01, 24);
2801   // Frc00 = V6_vaslw(Inp, 8);
2802   // Frc01 = V6_vor(Frc00, M80);
2803   // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
2804   // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
2805   // Frc02 = V6_vlsrwv(Frc01, Rsh01);
2806 
2807   // if signed int:
2808   // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
2809   // Pos = V6_vgtw(Rsh01, Zero);
2810   // Frc13 = V6_vsubw(Zero, Frc02);
2811   // Frc14 = V6_vmux(Neg, Frc13, Frc02);
2812   // Int = V6_vmux(Pos, Frc14, Bnd);
2813   //
2814   // if unsigned int:
2815   // Rsn = V6_vgtw(Zero, Rsh01)
2816   // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
2817   // Int = V6_vmux(Neg, Zero, Frc23)
2818 
2819   auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
2820   unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2821   assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
2822 
2823   SDValue Inp = DAG.getBitcast(ResTy, Op0);
2824   SDValue Zero = getZero(dl, ResTy, DAG);
2825   SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
2826   SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
2827   SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
2828   SDValue One = DAG.getConstant(1, dl, ResTy);
2829   SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
2830   SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
2831   SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
2832   SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
2833 
2834   SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
2835   SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
2836   SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
2837 
2838   SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
2839   SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
2840   SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
2841   SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
2842   SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
2843 
2844   SDValue Int;
2845 
2846   if (Opc == ISD::FP_TO_SINT) {
2847     SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
2848     SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
2849     SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
2850     SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
2851     Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
2852   } else {
2853     assert(Opc == ISD::FP_TO_UINT);
2854     SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
2855     SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
2856     Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
2857   }
2858 
2859   return Int;
2860 }
2861 
2862 SDValue
ExpandHvxIntToFp(SDValue Op,SelectionDAG & DAG) const2863 HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2864   unsigned Opc = Op.getOpcode();
2865   assert(Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
2866 
2867   const SDLoc &dl(Op);
2868   SDValue Op0 = Op.getOperand(0);
2869   MVT InpTy = ty(Op0);
2870   MVT ResTy = ty(Op);
2871   assert(ResTy.changeTypeToInteger() == InpTy);
2872 
2873   // uint32_t vnoc1_rnd(int32_t w) {
2874   //   int32_t iszero = w == 0;
2875   //   int32_t isneg = w < 0;
2876   //   uint32_t u = __builtin_HEXAGON_A2_abs(w);
2877   //
2878   //   uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
2879   //   uint32_t frac0 = (uint64_t)u << norm_left;
2880   //
2881   //   // Rounding:
2882   //   uint32_t frac1 = frac0 + ((1 << 8) - 1);
2883   //   uint32_t renorm = (frac0 > frac1);
2884   //   uint32_t rup = (int)(frac0 << 22) < 0;
2885   //
2886   //   uint32_t frac2 = frac0 >> 8;
2887   //   uint32_t frac3 = frac1 >> 8;
2888   //   uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
2889   //
2890   //   int32_t exp = 32 - norm_left + renorm + 127;
2891   //   exp <<= 23;
2892   //
2893   //   uint32_t sign = 0x80000000 * isneg;
2894   //   uint32_t f = sign | exp | frac;
2895   //   return iszero ? 0 : f;
2896   // }
2897 
2898   MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
2899   bool Signed = Opc == ISD::SINT_TO_FP;
2900 
2901   auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
2902   unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2903 
2904   SDValue Zero = getZero(dl, InpTy, DAG);
2905   SDValue One = DAG.getConstant(1, dl, InpTy);
2906   SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
2907   SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
2908   SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
2909   SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
2910   SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
2911 
2912   auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
2913   if (Signed) {
2914     SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
2915     SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
2916     SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
2917     Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
2918   }
2919 
2920   SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
2921   SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
2922   SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
2923   SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
2924   SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
2925                              {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
2926   SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
2927   SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
2928   SDValue Flt = DAG.getBitcast(ResTy, Flt1);
2929 
2930   return Flt;
2931 }
2932 
2933 SDValue
CreateTLWrapper(SDValue Op,SelectionDAG & DAG) const2934 HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2935   unsigned Opc = Op.getOpcode();
2936   unsigned TLOpc;
2937   switch (Opc) {
2938   case ISD::ANY_EXTEND:
2939   case ISD::SIGN_EXTEND:
2940   case ISD::ZERO_EXTEND:
2941     TLOpc = HexagonISD::TL_EXTEND;
2942     break;
2943   case ISD::TRUNCATE:
2944     TLOpc = HexagonISD::TL_TRUNCATE;
2945     break;
2946 #ifndef NDEBUG
2947     Op.dump(&DAG);
2948 #endif
2949     llvm_unreachable("Unexpected operator");
2950   }
2951 
2952   const SDLoc &dl(Op);
2953   return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
2954                      DAG.getUNDEF(MVT::i128), // illegal type
2955                      DAG.getConstant(Opc, dl, MVT::i32));
2956 }
2957 
2958 SDValue
RemoveTLWrapper(SDValue Op,SelectionDAG & DAG) const2959 HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2960   assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
2961          Op.getOpcode() == HexagonISD::TL_TRUNCATE);
2962   unsigned Opc = Op.getConstantOperandVal(2);
2963   return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
2964 }
2965 
2966 HexagonTargetLowering::VectorPair
SplitVectorOp(SDValue Op,SelectionDAG & DAG) const2967 HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
2968   assert(!Op.isMachineOpcode());
2969   SmallVector<SDValue, 2> OpsL, OpsH;
2970   const SDLoc &dl(Op);
2971 
2972   auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
2973     MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
2974     SDValue TV = DAG.getValueType(Ty);
2975     return std::make_pair(TV, TV);
2976   };
2977 
2978   for (SDValue A : Op.getNode()->ops()) {
2979     auto [Lo, Hi] =
2980         ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
2981     // Special case for type operand.
2982     switch (Op.getOpcode()) {
2983       case ISD::SIGN_EXTEND_INREG:
2984       case HexagonISD::SSAT:
2985       case HexagonISD::USAT:
2986         if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
2987           std::tie(Lo, Hi) = SplitVTNode(N);
2988       break;
2989     }
2990     OpsL.push_back(Lo);
2991     OpsH.push_back(Hi);
2992   }
2993 
2994   MVT ResTy = ty(Op);
2995   MVT HalfTy = typeSplit(ResTy).first;
2996   SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
2997   SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
2998   return {L, H};
2999 }
3000 
3001 SDValue
SplitHvxMemOp(SDValue Op,SelectionDAG & DAG) const3002 HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3003   auto *MemN = cast<MemSDNode>(Op.getNode());
3004 
3005   MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3006   if (!isHvxPairTy(MemTy))
3007     return Op;
3008 
3009   const SDLoc &dl(Op);
3010   unsigned HwLen = Subtarget.getVectorLength();
3011   MVT SingleTy = typeSplit(MemTy).first;
3012   SDValue Chain = MemN->getChain();
3013   SDValue Base0 = MemN->getBasePtr();
3014   SDValue Base1 =
3015       DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
3016   unsigned MemOpc = MemN->getOpcode();
3017 
3018   MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3019   if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3020     MachineFunction &MF = DAG.getMachineFunction();
3021     uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3022                            ? (uint64_t)MemoryLocation::UnknownSize
3023                            : HwLen;
3024     MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
3025     MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
3026   }
3027 
3028   if (MemOpc == ISD::LOAD) {
3029     assert(cast<LoadSDNode>(Op)->isUnindexed());
3030     SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
3031     SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
3032     return DAG.getMergeValues(
3033         { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
3034           DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3035                       Load0.getValue(1), Load1.getValue(1)) }, dl);
3036   }
3037   if (MemOpc == ISD::STORE) {
3038     assert(cast<StoreSDNode>(Op)->isUnindexed());
3039     VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3040     SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3041     SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3042     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3043   }
3044 
3045   assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3046 
3047   auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3048   assert(MaskN->isUnindexed());
3049   VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3050   SDValue Offset = DAG.getUNDEF(MVT::i32);
3051 
3052   if (MemOpc == ISD::MLOAD) {
3053     VectorPair Thru =
3054         opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3055     SDValue MLoad0 =
3056         DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3057                           Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3058                           ISD::NON_EXTLOAD, false);
3059     SDValue MLoad1 =
3060         DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3061                           Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3062                           ISD::NON_EXTLOAD, false);
3063     return DAG.getMergeValues(
3064         { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3065           DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3066                       MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3067   }
3068   if (MemOpc == ISD::MSTORE) {
3069     VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3070     SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3071                                          Masks.first, SingleTy, MOp0,
3072                                          ISD::UNINDEXED, false, false);
3073     SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3074                                          Masks.second, SingleTy, MOp1,
3075                                          ISD::UNINDEXED, false, false);
3076     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3077   }
3078 
3079   std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3080   llvm_unreachable(Name.c_str());
3081 }
3082 
3083 SDValue
WidenHvxLoad(SDValue Op,SelectionDAG & DAG) const3084 HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3085   const SDLoc &dl(Op);
3086   auto *LoadN = cast<LoadSDNode>(Op.getNode());
3087   assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3088   assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3089          "Not widening loads of i1 yet");
3090 
3091   SDValue Chain = LoadN->getChain();
3092   SDValue Base = LoadN->getBasePtr();
3093   SDValue Offset = DAG.getUNDEF(MVT::i32);
3094 
3095   MVT ResTy = ty(Op);
3096   unsigned HwLen = Subtarget.getVectorLength();
3097   unsigned ResLen = ResTy.getStoreSize();
3098   assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3099 
3100   MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3101   SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3102                           {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3103 
3104   MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3105   MachineFunction &MF = DAG.getMachineFunction();
3106   auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3107 
3108   SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3109                                    DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3110                                    ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
3111   SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3112   return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3113 }
3114 
3115 SDValue
WidenHvxStore(SDValue Op,SelectionDAG & DAG) const3116 HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3117   const SDLoc &dl(Op);
3118   auto *StoreN = cast<StoreSDNode>(Op.getNode());
3119   assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3120   assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3121          "Not widening stores of i1 yet");
3122 
3123   SDValue Chain = StoreN->getChain();
3124   SDValue Base = StoreN->getBasePtr();
3125   SDValue Offset = DAG.getUNDEF(MVT::i32);
3126 
3127   SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3128   MVT ValueTy = ty(Value);
3129   unsigned ValueLen = ValueTy.getVectorNumElements();
3130   unsigned HwLen = Subtarget.getVectorLength();
3131   assert(isPowerOf2_32(ValueLen));
3132 
3133   for (unsigned Len = ValueLen; Len < HwLen; ) {
3134     Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3135     Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3136   }
3137   assert(ty(Value).getVectorNumElements() == HwLen);  // Paranoia
3138 
3139   assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3140   MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3141   SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3142                           {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3143   MachineFunction &MF = DAG.getMachineFunction();
3144   auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3145   return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3146                             MemOp, ISD::UNINDEXED, false, false);
3147 }
3148 
3149 SDValue
WidenHvxSetCC(SDValue Op,SelectionDAG & DAG) const3150 HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3151   const SDLoc &dl(Op);
3152   SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3153   MVT ElemTy = ty(Op0).getVectorElementType();
3154   unsigned HwLen = Subtarget.getVectorLength();
3155 
3156   unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3157   assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3158   MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3159   if (!Subtarget.isHVXVectorType(WideOpTy, true))
3160     return SDValue();
3161 
3162   SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3163   SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3164   EVT ResTy =
3165       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3166   SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3167                               {WideOp0, WideOp1, Op.getOperand(2)});
3168 
3169   EVT RetTy = typeLegalize(ty(Op), DAG);
3170   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3171                      {SetCC, getZero(dl, MVT::i32, DAG)});
3172 }
3173 
3174 SDValue
LowerHvxOperation(SDValue Op,SelectionDAG & DAG) const3175 HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3176   unsigned Opc = Op.getOpcode();
3177   bool IsPairOp = isHvxPairTy(ty(Op)) ||
3178                   llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3179                     return isHvxPairTy(ty(V));
3180                   });
3181 
3182   if (IsPairOp) {
3183     switch (Opc) {
3184       default:
3185         break;
3186       case ISD::LOAD:
3187       case ISD::STORE:
3188       case ISD::MLOAD:
3189       case ISD::MSTORE:
3190         return SplitHvxMemOp(Op, DAG);
3191       case ISD::SINT_TO_FP:
3192       case ISD::UINT_TO_FP:
3193       case ISD::FP_TO_SINT:
3194       case ISD::FP_TO_UINT:
3195         if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3196           return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3197         break;
3198       case ISD::ABS:
3199       case ISD::CTPOP:
3200       case ISD::CTLZ:
3201       case ISD::CTTZ:
3202       case ISD::MUL:
3203       case ISD::FADD:
3204       case ISD::FSUB:
3205       case ISD::FMUL:
3206       case ISD::FMINIMUMNUM:
3207       case ISD::FMAXIMUMNUM:
3208       case ISD::MULHS:
3209       case ISD::MULHU:
3210       case ISD::AND:
3211       case ISD::OR:
3212       case ISD::XOR:
3213       case ISD::SRA:
3214       case ISD::SHL:
3215       case ISD::SRL:
3216       case ISD::FSHL:
3217       case ISD::FSHR:
3218       case ISD::SMIN:
3219       case ISD::SMAX:
3220       case ISD::UMIN:
3221       case ISD::UMAX:
3222       case ISD::SETCC:
3223       case ISD::VSELECT:
3224       case ISD::SIGN_EXTEND_INREG:
3225       case ISD::SPLAT_VECTOR:
3226         return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3227       case ISD::SIGN_EXTEND:
3228       case ISD::ZERO_EXTEND:
3229         // In general, sign- and zero-extends can't be split and still
3230         // be legal. The only exception is extending bool vectors.
3231         if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3232           return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3233         break;
3234     }
3235   }
3236 
3237   switch (Opc) {
3238     default:
3239       break;
3240     case ISD::BUILD_VECTOR:            return LowerHvxBuildVector(Op, DAG);
3241     case ISD::SPLAT_VECTOR:            return LowerHvxSplatVector(Op, DAG);
3242     case ISD::CONCAT_VECTORS:          return LowerHvxConcatVectors(Op, DAG);
3243     case ISD::INSERT_SUBVECTOR:        return LowerHvxInsertSubvector(Op, DAG);
3244     case ISD::INSERT_VECTOR_ELT:       return LowerHvxInsertElement(Op, DAG);
3245     case ISD::EXTRACT_SUBVECTOR:       return LowerHvxExtractSubvector(Op, DAG);
3246     case ISD::EXTRACT_VECTOR_ELT:      return LowerHvxExtractElement(Op, DAG);
3247     case ISD::BITCAST:                 return LowerHvxBitcast(Op, DAG);
3248     case ISD::ANY_EXTEND:              return LowerHvxAnyExt(Op, DAG);
3249     case ISD::SIGN_EXTEND:             return LowerHvxSignExt(Op, DAG);
3250     case ISD::ZERO_EXTEND:             return LowerHvxZeroExt(Op, DAG);
3251     case ISD::CTTZ:                    return LowerHvxCttz(Op, DAG);
3252     case ISD::SELECT:                  return LowerHvxSelect(Op, DAG);
3253     case ISD::SRA:
3254     case ISD::SHL:
3255     case ISD::SRL:                     return LowerHvxShift(Op, DAG);
3256     case ISD::FSHL:
3257     case ISD::FSHR:                    return LowerHvxFunnelShift(Op, DAG);
3258     case ISD::MULHS:
3259     case ISD::MULHU:                   return LowerHvxMulh(Op, DAG);
3260     case ISD::SMUL_LOHI:
3261     case ISD::UMUL_LOHI:               return LowerHvxMulLoHi(Op, DAG);
3262     case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3263     case ISD::SETCC:
3264     case ISD::INTRINSIC_VOID:          return Op;
3265     case ISD::INTRINSIC_WO_CHAIN:      return LowerHvxIntrinsic(Op, DAG);
3266     case ISD::MLOAD:
3267     case ISD::MSTORE:                  return LowerHvxMaskedOp(Op, DAG);
3268     // Unaligned loads will be handled by the default lowering.
3269     case ISD::LOAD:                    return SDValue();
3270     case ISD::FP_EXTEND:               return LowerHvxFpExtend(Op, DAG);
3271     case ISD::FP_TO_SINT:
3272     case ISD::FP_TO_UINT:              return LowerHvxFpToInt(Op, DAG);
3273     case ISD::SINT_TO_FP:
3274     case ISD::UINT_TO_FP:              return LowerHvxIntToFp(Op, DAG);
3275 
3276     // Special nodes:
3277     case HexagonISD::SMUL_LOHI:
3278     case HexagonISD::UMUL_LOHI:
3279     case HexagonISD::USMUL_LOHI:       return LowerHvxMulLoHi(Op, DAG);
3280   }
3281 #ifndef NDEBUG
3282   Op.dumpr(&DAG);
3283 #endif
3284   llvm_unreachable("Unhandled HVX operation");
3285 }
3286 
3287 SDValue
ExpandHvxResizeIntoSteps(SDValue Op,SelectionDAG & DAG) const3288 HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3289       const {
3290   // Rewrite the extension/truncation/saturation op into steps where each
3291   // step changes the type widths by a factor of 2.
3292   // E.g.  i8 -> i16 remains unchanged, but i8 -> i32  ==>  i8 -> i16 -> i32.
3293   //
3294   // Some of the vector types in Op may not be legal.
3295 
3296   unsigned Opc = Op.getOpcode();
3297   switch (Opc) {
3298     case HexagonISD::SSAT:
3299     case HexagonISD::USAT:
3300     case HexagonISD::TL_EXTEND:
3301     case HexagonISD::TL_TRUNCATE:
3302       break;
3303     case ISD::ANY_EXTEND:
3304     case ISD::ZERO_EXTEND:
3305     case ISD::SIGN_EXTEND:
3306     case ISD::TRUNCATE:
3307       llvm_unreachable("ISD:: ops will be auto-folded");
3308       break;
3309 #ifndef NDEBUG
3310     Op.dump(&DAG);
3311 #endif
3312     llvm_unreachable("Unexpected operation");
3313   }
3314 
3315   SDValue Inp = Op.getOperand(0);
3316   MVT InpTy = ty(Inp);
3317   MVT ResTy = ty(Op);
3318 
3319   unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3320   unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3321   assert(InpWidth != ResWidth);
3322 
3323   if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3324     return Op;
3325 
3326   const SDLoc &dl(Op);
3327   unsigned NumElems = InpTy.getVectorNumElements();
3328   assert(NumElems == ResTy.getVectorNumElements());
3329 
3330   auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3331     MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3332     switch (Opc) {
3333       case HexagonISD::SSAT:
3334       case HexagonISD::USAT:
3335         return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3336       case HexagonISD::TL_EXTEND:
3337       case HexagonISD::TL_TRUNCATE:
3338         return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3339       default:
3340         llvm_unreachable("Unexpected opcode");
3341     }
3342   };
3343 
3344   SDValue S = Inp;
3345   if (InpWidth < ResWidth) {
3346     assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3347     while (InpWidth * 2 <= ResWidth)
3348       S = repeatOp(InpWidth *= 2, S);
3349   } else {
3350     // InpWidth > ResWidth
3351     assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3352     while (InpWidth / 2 >= ResWidth)
3353       S = repeatOp(InpWidth /= 2, S);
3354   }
3355   return S;
3356 }
3357 
3358 SDValue
LegalizeHvxResize(SDValue Op,SelectionDAG & DAG) const3359 HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3360   SDValue Inp0 = Op.getOperand(0);
3361   MVT InpTy = ty(Inp0);
3362   MVT ResTy = ty(Op);
3363   unsigned InpWidth = InpTy.getSizeInBits();
3364   unsigned ResWidth = ResTy.getSizeInBits();
3365   unsigned Opc = Op.getOpcode();
3366 
3367   if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3368     // First, make sure that the narrower type is widened to HVX.
3369     // This may cause the result to be wider than what the legalizer
3370     // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3371     // desired type.
3372     auto [WInpTy, WResTy] =
3373         InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3374                             : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3375     SDValue W = appendUndef(Inp0, WInpTy, DAG);
3376     SDValue S;
3377     if (Opc == HexagonISD::TL_EXTEND || Opc == HexagonISD::TL_TRUNCATE) {
3378       S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3379                       Op.getOperand(2));
3380     } else {
3381       S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3382     }
3383     SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3384     return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3385   } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3386     return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3387   } else {
3388     assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3389     return RemoveTLWrapper(Op, DAG);
3390   }
3391   llvm_unreachable("Unexpected situation");
3392 }
3393 
3394 void
LowerHvxOperationWrapper(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const3395 HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3396       SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3397   unsigned Opc = N->getOpcode();
3398   SDValue Op(N, 0);
3399   SDValue Inp0;   // Optional first argument.
3400   if (N->getNumOperands() > 0)
3401     Inp0 = Op.getOperand(0);
3402 
3403   switch (Opc) {
3404     case ISD::ANY_EXTEND:
3405     case ISD::SIGN_EXTEND:
3406     case ISD::ZERO_EXTEND:
3407     case ISD::TRUNCATE:
3408       if (Subtarget.isHVXElementType(ty(Op)) &&
3409           Subtarget.isHVXElementType(ty(Inp0))) {
3410         Results.push_back(CreateTLWrapper(Op, DAG));
3411       }
3412       break;
3413     case ISD::SETCC:
3414       if (shouldWidenToHvx(ty(Inp0), DAG)) {
3415         if (SDValue T = WidenHvxSetCC(Op, DAG))
3416           Results.push_back(T);
3417       }
3418       break;
3419     case ISD::STORE: {
3420       if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3421         SDValue Store = WidenHvxStore(Op, DAG);
3422         Results.push_back(Store);
3423       }
3424       break;
3425     }
3426     case ISD::MLOAD:
3427       if (isHvxPairTy(ty(Op))) {
3428         SDValue S = SplitHvxMemOp(Op, DAG);
3429         assert(S->getOpcode() == ISD::MERGE_VALUES);
3430         Results.push_back(S.getOperand(0));
3431         Results.push_back(S.getOperand(1));
3432       }
3433       break;
3434     case ISD::MSTORE:
3435       if (isHvxPairTy(ty(Op->getOperand(1)))) {    // Stored value
3436         SDValue S = SplitHvxMemOp(Op, DAG);
3437         Results.push_back(S);
3438       }
3439       break;
3440     case ISD::SINT_TO_FP:
3441     case ISD::UINT_TO_FP:
3442     case ISD::FP_TO_SINT:
3443     case ISD::FP_TO_UINT:
3444       if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3445         SDValue T = EqualizeFpIntConversion(Op, DAG);
3446         Results.push_back(T);
3447       }
3448       break;
3449     case HexagonISD::SSAT:
3450     case HexagonISD::USAT:
3451     case HexagonISD::TL_EXTEND:
3452     case HexagonISD::TL_TRUNCATE:
3453       Results.push_back(LegalizeHvxResize(Op, DAG));
3454       break;
3455     default:
3456       break;
3457   }
3458 }
3459 
3460 void
ReplaceHvxNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const3461 HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3462       SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3463   unsigned Opc = N->getOpcode();
3464   SDValue Op(N, 0);
3465   SDValue Inp0;   // Optional first argument.
3466   if (N->getNumOperands() > 0)
3467     Inp0 = Op.getOperand(0);
3468 
3469   switch (Opc) {
3470     case ISD::ANY_EXTEND:
3471     case ISD::SIGN_EXTEND:
3472     case ISD::ZERO_EXTEND:
3473     case ISD::TRUNCATE:
3474       if (Subtarget.isHVXElementType(ty(Op)) &&
3475           Subtarget.isHVXElementType(ty(Inp0))) {
3476         Results.push_back(CreateTLWrapper(Op, DAG));
3477       }
3478       break;
3479     case ISD::SETCC:
3480       if (shouldWidenToHvx(ty(Op), DAG)) {
3481         if (SDValue T = WidenHvxSetCC(Op, DAG))
3482           Results.push_back(T);
3483       }
3484       break;
3485     case ISD::LOAD: {
3486       if (shouldWidenToHvx(ty(Op), DAG)) {
3487         SDValue Load = WidenHvxLoad(Op, DAG);
3488         assert(Load->getOpcode() == ISD::MERGE_VALUES);
3489         Results.push_back(Load.getOperand(0));
3490         Results.push_back(Load.getOperand(1));
3491       }
3492       break;
3493     }
3494     case ISD::BITCAST:
3495       if (isHvxBoolTy(ty(Inp0))) {
3496         SDValue C = LowerHvxBitcast(Op, DAG);
3497         Results.push_back(C);
3498       }
3499       break;
3500     case ISD::FP_TO_SINT:
3501     case ISD::FP_TO_UINT:
3502       if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3503         SDValue T = EqualizeFpIntConversion(Op, DAG);
3504         Results.push_back(T);
3505       }
3506       break;
3507     case HexagonISD::SSAT:
3508     case HexagonISD::USAT:
3509     case HexagonISD::TL_EXTEND:
3510     case HexagonISD::TL_TRUNCATE:
3511       Results.push_back(LegalizeHvxResize(Op, DAG));
3512       break;
3513     default:
3514       break;
3515   }
3516 }
3517 
3518 SDValue
combineTruncateBeforeLegal(SDValue Op,DAGCombinerInfo & DCI) const3519 HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3520                                                   DAGCombinerInfo &DCI) const {
3521   // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3522   // to extract-subvector (shuffle V, pick even, pick odd)
3523 
3524   assert(Op.getOpcode() == ISD::TRUNCATE);
3525   SelectionDAG &DAG = DCI.DAG;
3526   const SDLoc &dl(Op);
3527 
3528   if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3529     return SDValue();
3530   SDValue Cast = Op.getOperand(0);
3531   SDValue Src = Cast.getOperand(0);
3532 
3533   EVT TruncTy = Op.getValueType();
3534   EVT CastTy = Cast.getValueType();
3535   EVT SrcTy = Src.getValueType();
3536   if (SrcTy.isSimple())
3537     return SDValue();
3538   if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3539     return SDValue();
3540   unsigned SrcLen = SrcTy.getVectorNumElements();
3541   unsigned CastLen = CastTy.getVectorNumElements();
3542   if (2 * CastLen != SrcLen)
3543     return SDValue();
3544 
3545   SmallVector<int, 128> Mask(SrcLen);
3546   for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3547     Mask[i] = 2 * i;
3548     Mask[i + CastLen] = 2 * i + 1;
3549   }
3550   SDValue Deal =
3551       DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3552   return opSplit(Deal, dl, DAG).first;
3553 }
3554 
3555 SDValue
combineConcatVectorsBeforeLegal(SDValue Op,DAGCombinerInfo & DCI) const3556 HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3557     SDValue Op, DAGCombinerInfo &DCI) const {
3558   // Fold
3559   //   concat (shuffle x, y, m1), (shuffle x, y, m2)
3560   // into
3561   //   shuffle (concat x, y), undef, m3
3562   if (Op.getNumOperands() != 2)
3563     return SDValue();
3564 
3565   SelectionDAG &DAG = DCI.DAG;
3566   const SDLoc &dl(Op);
3567   SDValue V0 = Op.getOperand(0);
3568   SDValue V1 = Op.getOperand(1);
3569 
3570   if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3571     return SDValue();
3572   if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3573     return SDValue();
3574 
3575   SetVector<SDValue> Order;
3576   Order.insert(V0.getOperand(0));
3577   Order.insert(V0.getOperand(1));
3578   Order.insert(V1.getOperand(0));
3579   Order.insert(V1.getOperand(1));
3580 
3581   if (Order.size() > 2)
3582     return SDValue();
3583 
3584   // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3585   // result must be the same.
3586   EVT InpTy = V0.getValueType();
3587   assert(InpTy.isVector());
3588   unsigned InpLen = InpTy.getVectorNumElements();
3589 
3590   SmallVector<int, 128> LongMask;
3591   auto AppendToMask = [&](SDValue Shuffle) {
3592     auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3593     ArrayRef<int> Mask = SV->getMask();
3594     SDValue X = Shuffle.getOperand(0);
3595     SDValue Y = Shuffle.getOperand(1);
3596     for (int M : Mask) {
3597       if (M == -1) {
3598         LongMask.push_back(M);
3599         continue;
3600       }
3601       SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3602       if (static_cast<unsigned>(M) >= InpLen)
3603         M -= InpLen;
3604 
3605       int OutOffset = Order[0] == Src ? 0 : InpLen;
3606       LongMask.push_back(M + OutOffset);
3607     }
3608   };
3609 
3610   AppendToMask(V0);
3611   AppendToMask(V1);
3612 
3613   SDValue C0 = Order.front();
3614   SDValue C1 = Order.back();  // Can be same as front
3615   EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3616 
3617   SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3618   return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3619 }
3620 
3621 SDValue
PerformHvxDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const3622 HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3623       const {
3624   const SDLoc &dl(N);
3625   SelectionDAG &DAG = DCI.DAG;
3626   SDValue Op(N, 0);
3627   unsigned Opc = Op.getOpcode();
3628 
3629   SmallVector<SDValue, 4> Ops(N->ops());
3630 
3631   if (Opc == ISD::TRUNCATE)
3632     return combineTruncateBeforeLegal(Op, DCI);
3633   if (Opc == ISD::CONCAT_VECTORS)
3634     return combineConcatVectorsBeforeLegal(Op, DCI);
3635 
3636   if (DCI.isBeforeLegalizeOps())
3637     return SDValue();
3638 
3639   switch (Opc) {
3640     case ISD::VSELECT: {
3641       // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3642       SDValue Cond = Ops[0];
3643       if (Cond->getOpcode() == ISD::XOR) {
3644         SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3645         if (C1->getOpcode() == HexagonISD::QTRUE)
3646           return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
3647       }
3648       break;
3649     }
3650     case HexagonISD::V2Q:
3651       if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3652         if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
3653           return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
3654                              : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
3655       }
3656       break;
3657     case HexagonISD::Q2V:
3658       if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3659         return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3660                            DAG.getAllOnesConstant(dl, MVT::i32));
3661       if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3662         return getZero(dl, ty(Op), DAG);
3663       break;
3664     case HexagonISD::VINSERTW0:
3665       if (isUndef(Ops[1]))
3666         return Ops[0];
3667       break;
3668     case HexagonISD::VROR: {
3669       if (Ops[0].getOpcode() == HexagonISD::VROR) {
3670         SDValue Vec = Ops[0].getOperand(0);
3671         SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
3672         SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
3673         return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
3674       }
3675       break;
3676     }
3677   }
3678 
3679   return SDValue();
3680 }
3681 
3682 bool
shouldSplitToHvx(MVT Ty,SelectionDAG & DAG) const3683 HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3684   if (Subtarget.isHVXVectorType(Ty, true))
3685     return false;
3686   auto Action = getPreferredHvxVectorAction(Ty);
3687   if (Action == TargetLoweringBase::TypeSplitVector)
3688     return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3689   return false;
3690 }
3691 
3692 bool
shouldWidenToHvx(MVT Ty,SelectionDAG & DAG) const3693 HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3694   if (Subtarget.isHVXVectorType(Ty, true))
3695     return false;
3696   auto Action = getPreferredHvxVectorAction(Ty);
3697   if (Action == TargetLoweringBase::TypeWidenVector)
3698     return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3699   return false;
3700 }
3701 
3702 bool
isHvxOperation(SDNode * N,SelectionDAG & DAG) const3703 HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3704   if (!Subtarget.useHVXOps())
3705     return false;
3706   // If the type of any result, or any operand type are HVX vector types,
3707   // this is an HVX operation.
3708   auto IsHvxTy = [this](EVT Ty) {
3709     return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
3710   };
3711   auto IsHvxOp = [this](SDValue Op) {
3712     return Op.getValueType().isSimple() &&
3713            Subtarget.isHVXVectorType(ty(Op), true);
3714   };
3715   if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
3716     return true;
3717 
3718   // Check if this could be an HVX operation after type widening.
3719   auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3720     if (!Op.getValueType().isSimple())
3721       return false;
3722     MVT ValTy = ty(Op);
3723     return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
3724   };
3725 
3726   for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3727     if (IsWidenedToHvx(SDValue(N, i)))
3728       return true;
3729   }
3730   return llvm::any_of(N->ops(), IsWidenedToHvx);
3731 }
3732