xref: /freebsd/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation  ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that LoongArch uses to lower LLVM code into
10 // a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LoongArchISelLowering.h"
15 #include "LoongArch.h"
16 #include "LoongArchMachineFunctionInfo.h"
17 #include "LoongArchRegisterInfo.h"
18 #include "LoongArchSubtarget.h"
19 #include "MCTargetDesc/LoongArchBaseInfo.h"
20 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/CodeGen/ISDOpcodes.h"
25 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/IntrinsicsLoongArch.h"
30 #include "llvm/Support/CodeGen.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/KnownBits.h"
34 #include "llvm/Support/MathExtras.h"
35 #include <llvm/Analysis/VectorUtils.h>
36 
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "loongarch-isel-lowering"
40 
41 STATISTIC(NumTailCalls, "Number of tail calls");
42 
43 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
44                                   cl::desc("Trap on integer division by zero."),
45                                   cl::init(false));
46 
LoongArchTargetLowering(const TargetMachine & TM,const LoongArchSubtarget & STI)47 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
48                                                  const LoongArchSubtarget &STI)
49     : TargetLowering(TM), Subtarget(STI) {
50 
51   MVT GRLenVT = Subtarget.getGRLenVT();
52 
53   // Set up the register classes.
54 
55   addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
56   if (Subtarget.hasBasicF())
57     addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
58   if (Subtarget.hasBasicD())
59     addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
60 
61   static const MVT::SimpleValueType LSXVTs[] = {
62       MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
63   static const MVT::SimpleValueType LASXVTs[] = {
64       MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
65 
66   if (Subtarget.hasExtLSX())
67     for (MVT VT : LSXVTs)
68       addRegisterClass(VT, &LoongArch::LSX128RegClass);
69 
70   if (Subtarget.hasExtLASX())
71     for (MVT VT : LASXVTs)
72       addRegisterClass(VT, &LoongArch::LASX256RegClass);
73 
74   // Set operations for LA32 and LA64.
75 
76   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
77                    MVT::i1, Promote);
78 
79   setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
80   setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
81   setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
82   setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
83   setOperationAction(ISD::ROTL, GRLenVT, Expand);
84   setOperationAction(ISD::CTPOP, GRLenVT, Expand);
85 
86   setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
87                       ISD::JumpTable, ISD::GlobalTLSAddress},
88                      GRLenVT, Custom);
89 
90   setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom);
91 
92   setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
93   setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
94   setOperationAction(ISD::VASTART, MVT::Other, Custom);
95   setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
96 
97   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
98   setOperationAction(ISD::TRAP, MVT::Other, Legal);
99 
100   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
101   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
102   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
103 
104   setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
105 
106   // BITREV/REVB requires the 32S feature.
107   if (STI.has32S()) {
108     // Expand bitreverse.i16 with native-width bitrev and shift for now, before
109     // we get to know which of sll and revb.2h is faster.
110     setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
111     setOperationAction(ISD::BITREVERSE, GRLenVT, Legal);
112 
113     // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
114     // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
115     // and i32 could still be byte-swapped relatively cheaply.
116     setOperationAction(ISD::BSWAP, MVT::i16, Custom);
117   } else {
118     setOperationAction(ISD::BSWAP, GRLenVT, Expand);
119     setOperationAction(ISD::CTTZ, GRLenVT, Expand);
120     setOperationAction(ISD::CTLZ, GRLenVT, Expand);
121     setOperationAction(ISD::ROTR, GRLenVT, Expand);
122     setOperationAction(ISD::SELECT, GRLenVT, Custom);
123     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
124     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
125   }
126 
127   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
128   setOperationAction(ISD::BR_CC, GRLenVT, Expand);
129   setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
130   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
131   setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
132 
133   setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
134   setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
135 
136   // Set operations for LA64 only.
137 
138   if (Subtarget.is64Bit()) {
139     setOperationAction(ISD::ADD, MVT::i32, Custom);
140     setOperationAction(ISD::SUB, MVT::i32, Custom);
141     setOperationAction(ISD::SHL, MVT::i32, Custom);
142     setOperationAction(ISD::SRA, MVT::i32, Custom);
143     setOperationAction(ISD::SRL, MVT::i32, Custom);
144     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
145     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
146     setOperationAction(ISD::ROTR, MVT::i32, Custom);
147     setOperationAction(ISD::ROTL, MVT::i32, Custom);
148     setOperationAction(ISD::CTTZ, MVT::i32, Custom);
149     setOperationAction(ISD::CTLZ, MVT::i32, Custom);
150     setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
151     setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
152     setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
153     setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
154     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
155     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
156 
157     setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
158     setOperationAction(ISD::BSWAP, MVT::i32, Custom);
159     setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
160                        Custom);
161     setOperationAction(ISD::LROUND, MVT::i32, Custom);
162   }
163 
164   // Set operations for LA32 only.
165 
166   if (!Subtarget.is64Bit()) {
167     setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
168     setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
169     setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
170     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
171     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
172     if (Subtarget.hasBasicD())
173       setOperationAction(ISD::BITCAST, MVT::i64, Custom);
174   }
175 
176   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
177 
178   static const ISD::CondCode FPCCToExpand[] = {
179       ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
180       ISD::SETGE,  ISD::SETNE,  ISD::SETGT};
181 
182   // Set operations for 'F' feature.
183 
184   if (Subtarget.hasBasicF()) {
185     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
186     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
187     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
188     setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
189     setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
190 
191     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
192     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
193     setOperationAction(ISD::FMA, MVT::f32, Legal);
194     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
195     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
196     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
197     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
198     setOperationAction(ISD::FCANONICALIZE, MVT::f32, Legal);
199     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
200     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
201     setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
202     setOperationAction(ISD::FSIN, MVT::f32, Expand);
203     setOperationAction(ISD::FCOS, MVT::f32, Expand);
204     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
205     setOperationAction(ISD::FPOW, MVT::f32, Expand);
206     setOperationAction(ISD::FREM, MVT::f32, Expand);
207     setOperationAction(ISD::FP16_TO_FP, MVT::f32,
208                        Subtarget.isSoftFPABI() ? LibCall : Custom);
209     setOperationAction(ISD::FP_TO_FP16, MVT::f32,
210                        Subtarget.isSoftFPABI() ? LibCall : Custom);
211     setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
212     setOperationAction(ISD::FP_TO_BF16, MVT::f32,
213                        Subtarget.isSoftFPABI() ? LibCall : Custom);
214 
215     if (Subtarget.is64Bit())
216       setOperationAction(ISD::FRINT, MVT::f32, Legal);
217 
218     if (!Subtarget.hasBasicD()) {
219       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
220       if (Subtarget.is64Bit()) {
221         setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
222         setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
223       }
224     }
225   }
226 
227   // Set operations for 'D' feature.
228 
229   if (Subtarget.hasBasicD()) {
230     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
231     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
232     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
233     setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
234     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
235     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
236     setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
237 
238     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
239     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
240     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
241     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
242     setOperationAction(ISD::FMA, MVT::f64, Legal);
243     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
244     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
245     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
246     setOperationAction(ISD::FCANONICALIZE, MVT::f64, Legal);
247     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
248     setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
249     setOperationAction(ISD::FSIN, MVT::f64, Expand);
250     setOperationAction(ISD::FCOS, MVT::f64, Expand);
251     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
252     setOperationAction(ISD::FPOW, MVT::f64, Expand);
253     setOperationAction(ISD::FREM, MVT::f64, Expand);
254     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
255     setOperationAction(ISD::FP_TO_FP16, MVT::f64,
256                        Subtarget.isSoftFPABI() ? LibCall : Custom);
257     setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
258     setOperationAction(ISD::FP_TO_BF16, MVT::f64,
259                        Subtarget.isSoftFPABI() ? LibCall : Custom);
260 
261     if (Subtarget.is64Bit())
262       setOperationAction(ISD::FRINT, MVT::f64, Legal);
263   }
264 
265   // Set operations for 'LSX' feature.
266 
267   if (Subtarget.hasExtLSX()) {
268     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
269       // Expand all truncating stores and extending loads.
270       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
271         setTruncStoreAction(VT, InnerVT, Expand);
272         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
273         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
274         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
275       }
276       // By default everything must be expanded. Then we will selectively turn
277       // on ones that can be effectively codegen'd.
278       for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
279         setOperationAction(Op, VT, Expand);
280     }
281 
282     for (MVT VT : LSXVTs) {
283       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
284       setOperationAction(ISD::BITCAST, VT, Legal);
285       setOperationAction(ISD::UNDEF, VT, Legal);
286 
287       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
288       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
289       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
290 
291       setOperationAction(ISD::SETCC, VT, Legal);
292       setOperationAction(ISD::VSELECT, VT, Legal);
293       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
294       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
295     }
296     for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
297       setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
298       setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
299                          Legal);
300       setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
301                          VT, Legal);
302       setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
303       setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
304       setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
305       setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
306       setCondCodeAction(
307           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
308           Expand);
309       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
310       setOperationAction(ISD::ABDS, VT, Legal);
311       setOperationAction(ISD::ABDU, VT, Legal);
312     }
313     for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
314       setOperationAction(ISD::BITREVERSE, VT, Custom);
315     for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
316       setOperationAction(ISD::BSWAP, VT, Legal);
317     for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
318       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
319       setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
320     }
321     for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
322       setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
323       setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
324       setOperationAction(ISD::FMA, VT, Legal);
325       setOperationAction(ISD::FSQRT, VT, Legal);
326       setOperationAction(ISD::FNEG, VT, Legal);
327       setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
328                          ISD::SETUGE, ISD::SETUGT},
329                         VT, Expand);
330       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
331     }
332     setOperationAction(ISD::CTPOP, GRLenVT, Legal);
333     setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
334     setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
335     setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
336     setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
337 
338     for (MVT VT :
339          {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
340           MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
341       setOperationAction(ISD::TRUNCATE, VT, Custom);
342     }
343   }
344 
345   // Set operations for 'LASX' feature.
346 
347   if (Subtarget.hasExtLASX()) {
348     for (MVT VT : LASXVTs) {
349       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
350       setOperationAction(ISD::BITCAST, VT, Legal);
351       setOperationAction(ISD::UNDEF, VT, Legal);
352 
353       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
354       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
355       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
356       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
357       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
358 
359       setOperationAction(ISD::SETCC, VT, Legal);
360       setOperationAction(ISD::VSELECT, VT, Legal);
361       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
362     }
363     for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
364       setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
365       setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
366                          Legal);
367       setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
368                          VT, Legal);
369       setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
370       setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
371       setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
372       setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
373       setCondCodeAction(
374           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
375           Expand);
376       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
377       setOperationAction(ISD::ABDS, VT, Legal);
378       setOperationAction(ISD::ABDU, VT, Legal);
379     }
380     for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
381       setOperationAction(ISD::BITREVERSE, VT, Custom);
382     for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
383       setOperationAction(ISD::BSWAP, VT, Legal);
384     for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
385       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
386       setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
387     }
388     for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
389       setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
390       setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
391       setOperationAction(ISD::FMA, VT, Legal);
392       setOperationAction(ISD::FSQRT, VT, Legal);
393       setOperationAction(ISD::FNEG, VT, Legal);
394       setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
395                          ISD::SETUGE, ISD::SETUGT},
396                         VT, Expand);
397       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
398     }
399   }
400 
401   // Set DAG combine for LA32 and LA64.
402 
403   setTargetDAGCombine(ISD::AND);
404   setTargetDAGCombine(ISD::OR);
405   setTargetDAGCombine(ISD::SRL);
406   setTargetDAGCombine(ISD::SETCC);
407 
408   // Set DAG combine for 'LSX' feature.
409 
410   if (Subtarget.hasExtLSX()) {
411     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
412     setTargetDAGCombine(ISD::BITCAST);
413   }
414 
415   // Compute derived properties from the register classes.
416   computeRegisterProperties(Subtarget.getRegisterInfo());
417 
418   setStackPointerRegisterToSaveRestore(LoongArch::R3);
419 
420   setBooleanContents(ZeroOrOneBooleanContent);
421   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
422 
423   setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
424 
425   setMinCmpXchgSizeInBits(32);
426 
427   // Function alignments.
428   setMinFunctionAlignment(Align(4));
429   // Set preferred alignments.
430   setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
431   setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
432   setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
433 
434   // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
435   if (Subtarget.hasLAMCAS())
436     setMinCmpXchgSizeInBits(8);
437 
438   if (Subtarget.hasSCQ()) {
439     setMaxAtomicSizeInBitsSupported(128);
440     setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
441   }
442 }
443 
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const444 bool LoongArchTargetLowering::isOffsetFoldingLegal(
445     const GlobalAddressSDNode *GA) const {
446   // In order to maximise the opportunity for common subexpression elimination,
447   // keep a separate ADD node for the global address offset instead of folding
448   // it in the global address node. Later peephole optimisations may choose to
449   // fold it back in when profitable.
450   return false;
451 }
452 
LowerOperation(SDValue Op,SelectionDAG & DAG) const453 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
454                                                 SelectionDAG &DAG) const {
455   switch (Op.getOpcode()) {
456   case ISD::ATOMIC_FENCE:
457     return lowerATOMIC_FENCE(Op, DAG);
458   case ISD::EH_DWARF_CFA:
459     return lowerEH_DWARF_CFA(Op, DAG);
460   case ISD::GlobalAddress:
461     return lowerGlobalAddress(Op, DAG);
462   case ISD::GlobalTLSAddress:
463     return lowerGlobalTLSAddress(Op, DAG);
464   case ISD::INTRINSIC_WO_CHAIN:
465     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
466   case ISD::INTRINSIC_W_CHAIN:
467     return lowerINTRINSIC_W_CHAIN(Op, DAG);
468   case ISD::INTRINSIC_VOID:
469     return lowerINTRINSIC_VOID(Op, DAG);
470   case ISD::BlockAddress:
471     return lowerBlockAddress(Op, DAG);
472   case ISD::JumpTable:
473     return lowerJumpTable(Op, DAG);
474   case ISD::SHL_PARTS:
475     return lowerShiftLeftParts(Op, DAG);
476   case ISD::SRA_PARTS:
477     return lowerShiftRightParts(Op, DAG, true);
478   case ISD::SRL_PARTS:
479     return lowerShiftRightParts(Op, DAG, false);
480   case ISD::ConstantPool:
481     return lowerConstantPool(Op, DAG);
482   case ISD::FP_TO_SINT:
483     return lowerFP_TO_SINT(Op, DAG);
484   case ISD::BITCAST:
485     return lowerBITCAST(Op, DAG);
486   case ISD::UINT_TO_FP:
487     return lowerUINT_TO_FP(Op, DAG);
488   case ISD::SINT_TO_FP:
489     return lowerSINT_TO_FP(Op, DAG);
490   case ISD::VASTART:
491     return lowerVASTART(Op, DAG);
492   case ISD::FRAMEADDR:
493     return lowerFRAMEADDR(Op, DAG);
494   case ISD::RETURNADDR:
495     return lowerRETURNADDR(Op, DAG);
496   case ISD::WRITE_REGISTER:
497     return lowerWRITE_REGISTER(Op, DAG);
498   case ISD::INSERT_VECTOR_ELT:
499     return lowerINSERT_VECTOR_ELT(Op, DAG);
500   case ISD::EXTRACT_VECTOR_ELT:
501     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
502   case ISD::BUILD_VECTOR:
503     return lowerBUILD_VECTOR(Op, DAG);
504   case ISD::CONCAT_VECTORS:
505     return lowerCONCAT_VECTORS(Op, DAG);
506   case ISD::VECTOR_SHUFFLE:
507     return lowerVECTOR_SHUFFLE(Op, DAG);
508   case ISD::BITREVERSE:
509     return lowerBITREVERSE(Op, DAG);
510   case ISD::SCALAR_TO_VECTOR:
511     return lowerSCALAR_TO_VECTOR(Op, DAG);
512   case ISD::PREFETCH:
513     return lowerPREFETCH(Op, DAG);
514   case ISD::SELECT:
515     return lowerSELECT(Op, DAG);
516   case ISD::FP_TO_FP16:
517     return lowerFP_TO_FP16(Op, DAG);
518   case ISD::FP16_TO_FP:
519     return lowerFP16_TO_FP(Op, DAG);
520   case ISD::FP_TO_BF16:
521     return lowerFP_TO_BF16(Op, DAG);
522   case ISD::BF16_TO_FP:
523     return lowerBF16_TO_FP(Op, DAG);
524   }
525   return SDValue();
526 }
527 
lowerPREFETCH(SDValue Op,SelectionDAG & DAG) const528 SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
529                                                SelectionDAG &DAG) const {
530   unsigned IsData = Op.getConstantOperandVal(4);
531 
532   // We don't support non-data prefetch.
533   // Just preserve the chain.
534   if (!IsData)
535     return Op.getOperand(0);
536 
537   return Op;
538 }
539 
540 // Return true if Val is equal to (setcc LHS, RHS, CC).
541 // Return false if Val is the inverse of (setcc LHS, RHS, CC).
542 // Otherwise, return std::nullopt.
matchSetCC(SDValue LHS,SDValue RHS,ISD::CondCode CC,SDValue Val)543 static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
544                                       ISD::CondCode CC, SDValue Val) {
545   assert(Val->getOpcode() == ISD::SETCC);
546   SDValue LHS2 = Val.getOperand(0);
547   SDValue RHS2 = Val.getOperand(1);
548   ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
549 
550   if (LHS == LHS2 && RHS == RHS2) {
551     if (CC == CC2)
552       return true;
553     if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
554       return false;
555   } else if (LHS == RHS2 && RHS == LHS2) {
556     CC2 = ISD::getSetCCSwappedOperands(CC2);
557     if (CC == CC2)
558       return true;
559     if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
560       return false;
561   }
562 
563   return std::nullopt;
564 }
565 
combineSelectToBinOp(SDNode * N,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget)566 static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
567                                     const LoongArchSubtarget &Subtarget) {
568   SDValue CondV = N->getOperand(0);
569   SDValue TrueV = N->getOperand(1);
570   SDValue FalseV = N->getOperand(2);
571   MVT VT = N->getSimpleValueType(0);
572   SDLoc DL(N);
573 
574   // (select c, -1, y) -> -c | y
575   if (isAllOnesConstant(TrueV)) {
576     SDValue Neg = DAG.getNegative(CondV, DL, VT);
577     return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
578   }
579   // (select c, y, -1) -> (c-1) | y
580   if (isAllOnesConstant(FalseV)) {
581     SDValue Neg =
582         DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
583     return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
584   }
585 
586   // (select c, 0, y) -> (c-1) & y
587   if (isNullConstant(TrueV)) {
588     SDValue Neg =
589         DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
590     return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
591   }
592   // (select c, y, 0) -> -c & y
593   if (isNullConstant(FalseV)) {
594     SDValue Neg = DAG.getNegative(CondV, DL, VT);
595     return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
596   }
597 
598   // select c, ~x, x --> xor -c, x
599   if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
600     const APInt &TrueVal = TrueV->getAsAPIntVal();
601     const APInt &FalseVal = FalseV->getAsAPIntVal();
602     if (~TrueVal == FalseVal) {
603       SDValue Neg = DAG.getNegative(CondV, DL, VT);
604       return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
605     }
606   }
607 
608   // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
609   // when both truev and falsev are also setcc.
610   if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
611       FalseV.getOpcode() == ISD::SETCC) {
612     SDValue LHS = CondV.getOperand(0);
613     SDValue RHS = CondV.getOperand(1);
614     ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
615 
616     // (select x, x, y) -> x | y
617     // (select !x, x, y) -> x & y
618     if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
619       return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
620                          DAG.getFreeze(FalseV));
621     }
622     // (select x, y, x) -> x & y
623     // (select !x, y, x) -> x | y
624     if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
625       return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
626                          DAG.getFreeze(TrueV), FalseV);
627     }
628   }
629 
630   return SDValue();
631 }
632 
633 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
634 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
635 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up
636 // being `0` or `-1`. In such cases we can replace `select` with `and`.
637 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
638 // than `c0`?
639 static SDValue
foldBinOpIntoSelectIfProfitable(SDNode * BO,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget)640 foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
641                                 const LoongArchSubtarget &Subtarget) {
642   unsigned SelOpNo = 0;
643   SDValue Sel = BO->getOperand(0);
644   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
645     SelOpNo = 1;
646     Sel = BO->getOperand(1);
647   }
648 
649   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
650     return SDValue();
651 
652   unsigned ConstSelOpNo = 1;
653   unsigned OtherSelOpNo = 2;
654   if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
655     ConstSelOpNo = 2;
656     OtherSelOpNo = 1;
657   }
658   SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
659   ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
660   if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
661     return SDValue();
662 
663   SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
664   ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
665   if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
666     return SDValue();
667 
668   SDLoc DL(Sel);
669   EVT VT = BO->getValueType(0);
670 
671   SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
672   if (SelOpNo == 1)
673     std::swap(NewConstOps[0], NewConstOps[1]);
674 
675   SDValue NewConstOp =
676       DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
677   if (!NewConstOp)
678     return SDValue();
679 
680   const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
681   if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
682     return SDValue();
683 
684   SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
685   SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
686   if (SelOpNo == 1)
687     std::swap(NewNonConstOps[0], NewNonConstOps[1]);
688   SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
689 
690   SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
691   SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
692   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
693 }
694 
695 // Changes the condition code and swaps operands if necessary, so the SetCC
696 // operation matches one of the comparisons supported directly by branches
697 // in the LoongArch ISA. May adjust compares to favor compare with 0 over
698 // compare with 1/-1.
translateSetCCForBranch(const SDLoc & DL,SDValue & LHS,SDValue & RHS,ISD::CondCode & CC,SelectionDAG & DAG)699 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
700                                     ISD::CondCode &CC, SelectionDAG &DAG) {
701   // If this is a single bit test that can't be handled by ANDI, shift the
702   // bit to be tested to the MSB and perform a signed compare with 0.
703   if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
704       LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
705       isa<ConstantSDNode>(LHS.getOperand(1))) {
706     uint64_t Mask = LHS.getConstantOperandVal(1);
707     if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
708       unsigned ShAmt = 0;
709       if (isPowerOf2_64(Mask)) {
710         CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
711         ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
712       } else {
713         ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
714       }
715 
716       LHS = LHS.getOperand(0);
717       if (ShAmt != 0)
718         LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
719                           DAG.getConstant(ShAmt, DL, LHS.getValueType()));
720       return;
721     }
722   }
723 
724   if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
725     int64_t C = RHSC->getSExtValue();
726     switch (CC) {
727     default:
728       break;
729     case ISD::SETGT:
730       // Convert X > -1 to X >= 0.
731       if (C == -1) {
732         RHS = DAG.getConstant(0, DL, RHS.getValueType());
733         CC = ISD::SETGE;
734         return;
735       }
736       break;
737     case ISD::SETLT:
738       // Convert X < 1 to 0 >= X.
739       if (C == 1) {
740         RHS = LHS;
741         LHS = DAG.getConstant(0, DL, RHS.getValueType());
742         CC = ISD::SETGE;
743         return;
744       }
745       break;
746     }
747   }
748 
749   switch (CC) {
750   default:
751     break;
752   case ISD::SETGT:
753   case ISD::SETLE:
754   case ISD::SETUGT:
755   case ISD::SETULE:
756     CC = ISD::getSetCCSwappedOperands(CC);
757     std::swap(LHS, RHS);
758     break;
759   }
760 }
761 
lowerSELECT(SDValue Op,SelectionDAG & DAG) const762 SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
763                                              SelectionDAG &DAG) const {
764   SDValue CondV = Op.getOperand(0);
765   SDValue TrueV = Op.getOperand(1);
766   SDValue FalseV = Op.getOperand(2);
767   SDLoc DL(Op);
768   MVT VT = Op.getSimpleValueType();
769   MVT GRLenVT = Subtarget.getGRLenVT();
770 
771   if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
772     return V;
773 
774   if (Op.hasOneUse()) {
775     unsigned UseOpc = Op->user_begin()->getOpcode();
776     if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
777       SDNode *BinOp = *Op->user_begin();
778       if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
779                                                            DAG, Subtarget)) {
780         DAG.ReplaceAllUsesWith(BinOp, &NewSel);
781         // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
782         // may return a constant node and cause crash in lowerSELECT.
783         if (NewSel.getOpcode() == ISD::SELECT)
784           return lowerSELECT(NewSel, DAG);
785         return NewSel;
786       }
787     }
788   }
789 
790   // If the condition is not an integer SETCC which operates on GRLenVT, we need
791   // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
792   // (select condv, truev, falsev)
793   // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
794   if (CondV.getOpcode() != ISD::SETCC ||
795       CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
796     SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
797     SDValue SetNE = DAG.getCondCode(ISD::SETNE);
798 
799     SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
800 
801     return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
802   }
803 
804   // If the CondV is the output of a SETCC node which operates on GRLenVT
805   // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
806   // to take advantage of the integer compare+branch instructions. i.e.: (select
807   // (setcc lhs, rhs, cc), truev, falsev)
808   // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
809   SDValue LHS = CondV.getOperand(0);
810   SDValue RHS = CondV.getOperand(1);
811   ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
812 
813   // Special case for a select of 2 constants that have a difference of 1.
814   // Normally this is done by DAGCombine, but if the select is introduced by
815   // type legalization or op legalization, we miss it. Restricting to SETLT
816   // case for now because that is what signed saturating add/sub need.
817   // FIXME: We don't need the condition to be SETLT or even a SETCC,
818   // but we would probably want to swap the true/false values if the condition
819   // is SETGE/SETLE to avoid an XORI.
820   if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
821       CCVal == ISD::SETLT) {
822     const APInt &TrueVal = TrueV->getAsAPIntVal();
823     const APInt &FalseVal = FalseV->getAsAPIntVal();
824     if (TrueVal - 1 == FalseVal)
825       return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
826     if (TrueVal + 1 == FalseVal)
827       return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
828   }
829 
830   translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
831   // 1 < x ? x : 1 -> 0 < x ? x : 1
832   if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
833       RHS == TrueV && LHS == FalseV) {
834     LHS = DAG.getConstant(0, DL, VT);
835     // 0 <u x is the same as x != 0.
836     if (CCVal == ISD::SETULT) {
837       std::swap(LHS, RHS);
838       CCVal = ISD::SETNE;
839     }
840   }
841 
842   // x <s -1 ? x : -1 -> x <s 0 ? x : -1
843   if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
844       RHS == FalseV) {
845     RHS = DAG.getConstant(0, DL, VT);
846   }
847 
848   SDValue TargetCC = DAG.getCondCode(CCVal);
849 
850   if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
851     // (select (setcc lhs, rhs, CC), constant, falsev)
852     // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
853     std::swap(TrueV, FalseV);
854     TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
855   }
856 
857   SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
858   return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
859 }
860 
861 SDValue
lowerSCALAR_TO_VECTOR(SDValue Op,SelectionDAG & DAG) const862 LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
863                                                SelectionDAG &DAG) const {
864   SDLoc DL(Op);
865   MVT OpVT = Op.getSimpleValueType();
866 
867   SDValue Vector = DAG.getUNDEF(OpVT);
868   SDValue Val = Op.getOperand(0);
869   SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
870 
871   return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
872 }
873 
lowerBITREVERSE(SDValue Op,SelectionDAG & DAG) const874 SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
875                                                  SelectionDAG &DAG) const {
876   EVT ResTy = Op->getValueType(0);
877   SDValue Src = Op->getOperand(0);
878   SDLoc DL(Op);
879 
880   EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
881   unsigned int OrigEltNum = ResTy.getVectorNumElements();
882   unsigned int NewEltNum = NewVT.getVectorNumElements();
883 
884   SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
885 
886   SmallVector<SDValue, 8> Ops;
887   for (unsigned int i = 0; i < NewEltNum; i++) {
888     SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
889                              DAG.getConstant(i, DL, MVT::i64));
890     unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
891                          ? (unsigned)LoongArchISD::BITREV_8B
892                          : (unsigned)ISD::BITREVERSE;
893     Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
894   }
895   SDValue Res =
896       DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
897 
898   switch (ResTy.getSimpleVT().SimpleTy) {
899   default:
900     return SDValue();
901   case MVT::v16i8:
902   case MVT::v32i8:
903     return Res;
904   case MVT::v8i16:
905   case MVT::v16i16:
906   case MVT::v4i32:
907   case MVT::v8i32: {
908     SmallVector<int, 32> Mask;
909     for (unsigned int i = 0; i < NewEltNum; i++)
910       for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
911         Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
912     return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
913   }
914   }
915 }
916 
917 // Widen element type to get a new mask value (if possible).
918 // For example:
919 //  shufflevector <4 x i32> %a, <4 x i32> %b,
920 //                <4 x i32> <i32 6, i32 7, i32 2, i32 3>
921 // is equivalent to:
922 //  shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
923 // can be lowered to:
924 //  VPACKOD_D vr0, vr0, vr1
widenShuffleMask(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)925 static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
926                                 SDValue V1, SDValue V2, SelectionDAG &DAG) {
927   unsigned EltBits = VT.getScalarSizeInBits();
928 
929   if (EltBits > 32 || EltBits == 1)
930     return SDValue();
931 
932   SmallVector<int, 8> NewMask;
933   if (widenShuffleMaskElts(Mask, NewMask)) {
934     MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
935                                         : MVT::getIntegerVT(EltBits * 2);
936     MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
937     if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
938       SDValue NewV1 = DAG.getBitcast(NewVT, V1);
939       SDValue NewV2 = DAG.getBitcast(NewVT, V2);
940       return DAG.getBitcast(
941           VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
942     }
943   }
944 
945   return SDValue();
946 }
947 
948 /// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
949 /// instruction.
950 // The funciton matches elements from one of the input vector shuffled to the
951 // left or right with zeroable elements 'shifted in'. It handles both the
952 // strictly bit-wise element shifts and the byte shfit across an entire 128-bit
953 // lane.
954 // Mostly copied from X86.
matchShuffleAsShift(MVT & ShiftVT,unsigned & Opcode,unsigned ScalarSizeInBits,ArrayRef<int> Mask,int MaskOffset,const APInt & Zeroable)955 static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
956                                unsigned ScalarSizeInBits, ArrayRef<int> Mask,
957                                int MaskOffset, const APInt &Zeroable) {
958   int Size = Mask.size();
959   unsigned SizeInBits = Size * ScalarSizeInBits;
960 
961   auto CheckZeros = [&](int Shift, int Scale, bool Left) {
962     for (int i = 0; i < Size; i += Scale)
963       for (int j = 0; j < Shift; ++j)
964         if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
965           return false;
966 
967     return true;
968   };
969 
970   auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
971                                         int Step = 1) {
972     for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
973       if (!(Mask[i] == -1 || Mask[i] == Low))
974         return false;
975     return true;
976   };
977 
978   auto MatchShift = [&](int Shift, int Scale, bool Left) {
979     for (int i = 0; i != Size; i += Scale) {
980       unsigned Pos = Left ? i + Shift : i;
981       unsigned Low = Left ? i : i + Shift;
982       unsigned Len = Scale - Shift;
983       if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
984         return -1;
985     }
986 
987     int ShiftEltBits = ScalarSizeInBits * Scale;
988     bool ByteShift = ShiftEltBits > 64;
989     Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
990                   : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
991     int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
992 
993     // Normalize the scale for byte shifts to still produce an i64 element
994     // type.
995     Scale = ByteShift ? Scale / 2 : Scale;
996 
997     // We need to round trip through the appropriate type for the shift.
998     MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
999     ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1000                         : MVT::getVectorVT(ShiftSVT, Size / Scale);
1001     return (int)ShiftAmt;
1002   };
1003 
1004   unsigned MaxWidth = 128;
1005   for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1006     for (int Shift = 1; Shift != Scale; ++Shift)
1007       for (bool Left : {true, false})
1008         if (CheckZeros(Shift, Scale, Left)) {
1009           int ShiftAmt = MatchShift(Shift, Scale, Left);
1010           if (0 < ShiftAmt)
1011             return ShiftAmt;
1012         }
1013 
1014   // no match
1015   return -1;
1016 }
1017 
1018 /// Lower VECTOR_SHUFFLE as shift (if possible).
1019 ///
1020 /// For example:
1021 ///   %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1022 ///                      <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1023 /// is lowered to:
1024 ///     (VBSLL_V $v0, $v0, 4)
1025 ///
1026 ///   %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1027 ///                      <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1028 /// is lowered to:
1029 ///     (VSLLI_D $v0, $v0, 32)
lowerVECTOR_SHUFFLEAsShift(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG,const APInt & Zeroable)1030 static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef<int> Mask,
1031                                           MVT VT, SDValue V1, SDValue V2,
1032                                           SelectionDAG &DAG,
1033                                           const APInt &Zeroable) {
1034   int Size = Mask.size();
1035   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1036 
1037   MVT ShiftVT;
1038   SDValue V = V1;
1039   unsigned Opcode;
1040 
1041   // Try to match shuffle against V1 shift.
1042   int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1043                                      Mask, 0, Zeroable);
1044 
1045   // If V1 failed, try to match shuffle against V2 shift.
1046   if (ShiftAmt < 0) {
1047     ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1048                                    Mask, Size, Zeroable);
1049     V = V2;
1050   }
1051 
1052   if (ShiftAmt < 0)
1053     return SDValue();
1054 
1055   assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1056          "Illegal integer vector type");
1057   V = DAG.getBitcast(ShiftVT, V);
1058   V = DAG.getNode(Opcode, DL, ShiftVT, V,
1059                   DAG.getConstant(ShiftAmt, DL, MVT::i64));
1060   return DAG.getBitcast(VT, V);
1061 }
1062 
1063 /// Determine whether a range fits a regular pattern of values.
1064 /// This function accounts for the possibility of jumping over the End iterator.
1065 template <typename ValType>
1066 static bool
fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,unsigned CheckStride,typename SmallVectorImpl<ValType>::const_iterator End,ValType ExpectedIndex,unsigned ExpectedIndexStride)1067 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
1068                    unsigned CheckStride,
1069                    typename SmallVectorImpl<ValType>::const_iterator End,
1070                    ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1071   auto &I = Begin;
1072 
1073   while (I != End) {
1074     if (*I != -1 && *I != ExpectedIndex)
1075       return false;
1076     ExpectedIndex += ExpectedIndexStride;
1077 
1078     // Incrementing past End is undefined behaviour so we must increment one
1079     // step at a time and check for End at each step.
1080     for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1081       ; // Empty loop body.
1082   }
1083   return true;
1084 }
1085 
1086 /// Compute whether each element of a shuffle is zeroable.
1087 ///
1088 /// A "zeroable" vector shuffle element is one which can be lowered to zero.
computeZeroableShuffleElements(ArrayRef<int> Mask,SDValue V1,SDValue V2,APInt & KnownUndef,APInt & KnownZero)1089 static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
1090                                            SDValue V2, APInt &KnownUndef,
1091                                            APInt &KnownZero) {
1092   int Size = Mask.size();
1093   KnownUndef = KnownZero = APInt::getZero(Size);
1094 
1095   V1 = peekThroughBitcasts(V1);
1096   V2 = peekThroughBitcasts(V2);
1097 
1098   bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1099   bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1100 
1101   int VectorSizeInBits = V1.getValueSizeInBits();
1102   int ScalarSizeInBits = VectorSizeInBits / Size;
1103   assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1104   (void)ScalarSizeInBits;
1105 
1106   for (int i = 0; i < Size; ++i) {
1107     int M = Mask[i];
1108     if (M < 0) {
1109       KnownUndef.setBit(i);
1110       continue;
1111     }
1112     if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1113       KnownZero.setBit(i);
1114       continue;
1115     }
1116   }
1117 }
1118 
1119 /// Test whether a shuffle mask is equivalent within each sub-lane.
1120 ///
1121 /// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1122 /// non-trivial to compute in the face of undef lanes. The representation is
1123 /// suitable for use with existing 128-bit shuffles as entries from the second
1124 /// vector have been remapped to [LaneSize, 2*LaneSize).
isRepeatedShuffleMask(unsigned LaneSizeInBits,MVT VT,ArrayRef<int> Mask,SmallVectorImpl<int> & RepeatedMask)1125 static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1126                                   ArrayRef<int> Mask,
1127                                   SmallVectorImpl<int> &RepeatedMask) {
1128   auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1129   RepeatedMask.assign(LaneSize, -1);
1130   int Size = Mask.size();
1131   for (int i = 0; i < Size; ++i) {
1132     assert(Mask[i] == -1 || Mask[i] >= 0);
1133     if (Mask[i] < 0)
1134       continue;
1135     if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1136       // This entry crosses lanes, so there is no way to model this shuffle.
1137       return false;
1138 
1139     // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1140     // Adjust second vector indices to start at LaneSize instead of Size.
1141     int LocalM =
1142         Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1143     if (RepeatedMask[i % LaneSize] < 0)
1144       // This is the first non-undef entry in this slot of a 128-bit lane.
1145       RepeatedMask[i % LaneSize] = LocalM;
1146     else if (RepeatedMask[i % LaneSize] != LocalM)
1147       // Found a mismatch with the repeated mask.
1148       return false;
1149   }
1150   return true;
1151 }
1152 
1153 /// Attempts to match vector shuffle as byte rotation.
matchShuffleAsByteRotate(MVT VT,SDValue & V1,SDValue & V2,ArrayRef<int> Mask)1154 static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
1155                                     ArrayRef<int> Mask) {
1156 
1157   SDValue Lo, Hi;
1158   SmallVector<int, 16> RepeatedMask;
1159 
1160   if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1161     return -1;
1162 
1163   int NumElts = RepeatedMask.size();
1164   int Rotation = 0;
1165   int Scale = 16 / NumElts;
1166 
1167   for (int i = 0; i < NumElts; ++i) {
1168     int M = RepeatedMask[i];
1169     assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1170            "Unexpected mask index.");
1171     if (M < 0)
1172       continue;
1173 
1174     // Determine where a rotated vector would have started.
1175     int StartIdx = i - (M % NumElts);
1176     if (StartIdx == 0)
1177       return -1;
1178 
1179     // If we found the tail of a vector the rotation must be the missing
1180     // front. If we found the head of a vector, it must be how much of the
1181     // head.
1182     int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1183 
1184     if (Rotation == 0)
1185       Rotation = CandidateRotation;
1186     else if (Rotation != CandidateRotation)
1187       return -1;
1188 
1189     // Compute which value this mask is pointing at.
1190     SDValue MaskV = M < NumElts ? V1 : V2;
1191 
1192     // Compute which of the two target values this index should be assigned
1193     // to. This reflects whether the high elements are remaining or the low
1194     // elements are remaining.
1195     SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1196 
1197     // Either set up this value if we've not encountered it before, or check
1198     // that it remains consistent.
1199     if (!TargetV)
1200       TargetV = MaskV;
1201     else if (TargetV != MaskV)
1202       return -1;
1203   }
1204 
1205   // Check that we successfully analyzed the mask, and normalize the results.
1206   assert(Rotation != 0 && "Failed to locate a viable rotation!");
1207   assert((Lo || Hi) && "Failed to find a rotated input vector!");
1208   if (!Lo)
1209     Lo = Hi;
1210   else if (!Hi)
1211     Hi = Lo;
1212 
1213   V1 = Lo;
1214   V2 = Hi;
1215 
1216   return Rotation * Scale;
1217 }
1218 
1219 /// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1220 ///
1221 /// For example:
1222 ///   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1223 ///                            <2 x i32> <i32 3, i32 0>
1224 /// is lowered to:
1225 ///      (VBSRL_V $v1, $v1, 8)
1226 ///      (VBSLL_V $v0, $v0, 8)
1227 ///      (VOR_V $v0, $V0, $v1)
lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1228 static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL,
1229                                                ArrayRef<int> Mask, MVT VT,
1230                                                SDValue V1, SDValue V2,
1231                                                SelectionDAG &DAG) {
1232 
1233   SDValue Lo = V1, Hi = V2;
1234   int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1235   if (ByteRotation <= 0)
1236     return SDValue();
1237 
1238   MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1239   Lo = DAG.getBitcast(ByteVT, Lo);
1240   Hi = DAG.getBitcast(ByteVT, Hi);
1241 
1242   int LoByteShift = 16 - ByteRotation;
1243   int HiByteShift = ByteRotation;
1244 
1245   SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1246                                 DAG.getConstant(LoByteShift, DL, MVT::i64));
1247   SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1248                                 DAG.getConstant(HiByteShift, DL, MVT::i64));
1249   return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1250 }
1251 
1252 /// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1253 ///
1254 /// For example:
1255 ///   %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1256 ///                      <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1257 ///   %3 = bitcast <4 x i32> %2 to <2 x i64>
1258 /// is lowered to:
1259 ///     (VREPLI $v1, 0)
1260 ///     (VILVL $v0, $v1, $v0)
lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG,const APInt & Zeroable)1261 static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL,
1262                                                     ArrayRef<int> Mask, MVT VT,
1263                                                     SDValue V1, SDValue V2,
1264                                                     SelectionDAG &DAG,
1265                                                     const APInt &Zeroable) {
1266   int Bits = VT.getSizeInBits();
1267   int EltBits = VT.getScalarSizeInBits();
1268   int NumElements = VT.getVectorNumElements();
1269 
1270   if (Zeroable.isAllOnes())
1271     return DAG.getConstant(0, DL, VT);
1272 
1273   // Define a helper function to check a particular ext-scale and lower to it if
1274   // valid.
1275   auto Lower = [&](int Scale) -> SDValue {
1276     SDValue InputV;
1277     bool AnyExt = true;
1278     int Offset = 0;
1279     for (int i = 0; i < NumElements; i++) {
1280       int M = Mask[i];
1281       if (M < 0)
1282         continue;
1283       if (i % Scale != 0) {
1284         // Each of the extended elements need to be zeroable.
1285         if (!Zeroable[i])
1286           return SDValue();
1287 
1288         AnyExt = false;
1289         continue;
1290       }
1291 
1292       // Each of the base elements needs to be consecutive indices into the
1293       // same input vector.
1294       SDValue V = M < NumElements ? V1 : V2;
1295       M = M % NumElements;
1296       if (!InputV) {
1297         InputV = V;
1298         Offset = M - (i / Scale);
1299 
1300         // These offset can't be handled
1301         if (Offset % (NumElements / Scale))
1302           return SDValue();
1303       } else if (InputV != V)
1304         return SDValue();
1305 
1306       if (M != (Offset + (i / Scale)))
1307         return SDValue(); // Non-consecutive strided elements.
1308     }
1309 
1310     // If we fail to find an input, we have a zero-shuffle which should always
1311     // have already been handled.
1312     if (!InputV)
1313       return SDValue();
1314 
1315     do {
1316       unsigned VilVLoHi = LoongArchISD::VILVL;
1317       if (Offset >= (NumElements / 2)) {
1318         VilVLoHi = LoongArchISD::VILVH;
1319         Offset -= (NumElements / 2);
1320       }
1321 
1322       MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1323       SDValue Ext =
1324           AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1325       InputV = DAG.getBitcast(InputVT, InputV);
1326       InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1327       Scale /= 2;
1328       EltBits *= 2;
1329       NumElements /= 2;
1330     } while (Scale > 1);
1331     return DAG.getBitcast(VT, InputV);
1332   };
1333 
1334   // Each iteration, try extending the elements half as much, but into twice as
1335   // many elements.
1336   for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1337        NumExtElements *= 2) {
1338     if (SDValue V = Lower(NumElements / NumExtElements))
1339       return V;
1340   }
1341   return SDValue();
1342 }
1343 
1344 /// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1345 ///
1346 /// VREPLVEI performs vector broadcast based on an element specified by an
1347 /// integer immediate, with its mask being similar to:
1348 ///   <x, x, x, ...>
1349 /// where x is any valid index.
1350 ///
1351 /// When undef's appear in the mask they are treated as if they were whatever
1352 /// value is necessary in order to fit the above form.
lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1353 static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask,
1354                                             MVT VT, SDValue V1, SDValue V2,
1355                                             SelectionDAG &DAG) {
1356   int SplatIndex = -1;
1357   for (const auto &M : Mask) {
1358     if (M != -1) {
1359       SplatIndex = M;
1360       break;
1361     }
1362   }
1363 
1364   if (SplatIndex == -1)
1365     return DAG.getUNDEF(VT);
1366 
1367   assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1368   if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1369     APInt Imm(64, SplatIndex);
1370     return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1371                        DAG.getConstant(Imm, DL, MVT::i64));
1372   }
1373 
1374   return SDValue();
1375 }
1376 
1377 /// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1378 ///
1379 /// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1380 /// elements according to a <4 x i2> constant (encoded as an integer immediate).
1381 ///
1382 /// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1383 ///   <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1384 /// When undef's appear they are treated as if they were whatever value is
1385 /// necessary in order to fit the above forms.
1386 ///
1387 /// For example:
1388 ///   %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1389 ///                      <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1390 ///                                 i32 7, i32 6, i32 5, i32 4>
1391 /// is lowered to:
1392 ///   (VSHUF4I_H $v0, $v1, 27)
1393 /// where the 27 comes from:
1394 ///   3 + (2 << 2) + (1 << 4) + (0 << 6)
lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1395 static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
1396                                            MVT VT, SDValue V1, SDValue V2,
1397                                            SelectionDAG &DAG) {
1398 
1399   unsigned SubVecSize = 4;
1400   if (VT == MVT::v2f64 || VT == MVT::v2i64)
1401     SubVecSize = 2;
1402 
1403   int SubMask[4] = {-1, -1, -1, -1};
1404   for (unsigned i = 0; i < SubVecSize; ++i) {
1405     for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1406       int M = Mask[j];
1407 
1408       // Convert from vector index to 4-element subvector index
1409       // If an index refers to an element outside of the subvector then give up
1410       if (M != -1) {
1411         M -= 4 * (j / SubVecSize);
1412         if (M < 0 || M >= 4)
1413           return SDValue();
1414       }
1415 
1416       // If the mask has an undef, replace it with the current index.
1417       // Note that it might still be undef if the current index is also undef
1418       if (SubMask[i] == -1)
1419         SubMask[i] = M;
1420       // Check that non-undef values are the same as in the mask. If they
1421       // aren't then give up
1422       else if (M != -1 && M != SubMask[i])
1423         return SDValue();
1424     }
1425   }
1426 
1427   // Calculate the immediate. Replace any remaining undefs with zero
1428   APInt Imm(64, 0);
1429   for (int i = SubVecSize - 1; i >= 0; --i) {
1430     int M = SubMask[i];
1431 
1432     if (M == -1)
1433       M = 0;
1434 
1435     Imm <<= 2;
1436     Imm |= M & 0x3;
1437   }
1438 
1439   // Return vshuf4i.d
1440   if (VT == MVT::v2f64 || VT == MVT::v2i64)
1441     return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1442                        DAG.getConstant(Imm, DL, MVT::i64));
1443 
1444   return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1445                      DAG.getConstant(Imm, DL, MVT::i64));
1446 }
1447 
1448 /// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1449 ///
1450 /// VPACKEV interleaves the even elements from each vector.
1451 ///
1452 /// It is possible to lower into VPACKEV when the mask consists of two of the
1453 /// following forms interleaved:
1454 ///   <0, 2, 4, ...>
1455 ///   <n, n+2, n+4, ...>
1456 /// where n is the number of elements in the vector.
1457 /// For example:
1458 ///   <0, 0, 2, 2, 4, 4, ...>
1459 ///   <0, n, 2, n+2, 4, n+4, ...>
1460 ///
1461 /// When undef's appear in the mask they are treated as if they were whatever
1462 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1463 static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
1464                                            MVT VT, SDValue V1, SDValue V2,
1465                                            SelectionDAG &DAG) {
1466 
1467   const auto &Begin = Mask.begin();
1468   const auto &End = Mask.end();
1469   SDValue OriV1 = V1, OriV2 = V2;
1470 
1471   if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1472     V1 = OriV1;
1473   else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1474     V1 = OriV2;
1475   else
1476     return SDValue();
1477 
1478   if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1479     V2 = OriV1;
1480   else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1481     V2 = OriV2;
1482   else
1483     return SDValue();
1484 
1485   return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1486 }
1487 
1488 /// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1489 ///
1490 /// VPACKOD interleaves the odd elements from each vector.
1491 ///
1492 /// It is possible to lower into VPACKOD when the mask consists of two of the
1493 /// following forms interleaved:
1494 ///   <1, 3, 5, ...>
1495 ///   <n+1, n+3, n+5, ...>
1496 /// where n is the number of elements in the vector.
1497 /// For example:
1498 ///   <1, 1, 3, 3, 5, 5, ...>
1499 ///   <1, n+1, 3, n+3, 5, n+5, ...>
1500 ///
1501 /// When undef's appear in the mask they are treated as if they were whatever
1502 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1503 static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
1504                                            MVT VT, SDValue V1, SDValue V2,
1505                                            SelectionDAG &DAG) {
1506 
1507   const auto &Begin = Mask.begin();
1508   const auto &End = Mask.end();
1509   SDValue OriV1 = V1, OriV2 = V2;
1510 
1511   if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1512     V1 = OriV1;
1513   else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1514     V1 = OriV2;
1515   else
1516     return SDValue();
1517 
1518   if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1519     V2 = OriV1;
1520   else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1521     V2 = OriV2;
1522   else
1523     return SDValue();
1524 
1525   return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1526 }
1527 
1528 /// Lower VECTOR_SHUFFLE into VILVH (if possible).
1529 ///
1530 /// VILVH interleaves consecutive elements from the left (highest-indexed) half
1531 /// of each vector.
1532 ///
1533 /// It is possible to lower into VILVH when the mask consists of two of the
1534 /// following forms interleaved:
1535 ///   <x, x+1, x+2, ...>
1536 ///   <n+x, n+x+1, n+x+2, ...>
1537 /// where n is the number of elements in the vector and x is half n.
1538 /// For example:
1539 ///   <x, x, x+1, x+1, x+2, x+2, ...>
1540 ///   <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1541 ///
1542 /// When undef's appear in the mask they are treated as if they were whatever
1543 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VILVH(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1544 static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
1545                                          MVT VT, SDValue V1, SDValue V2,
1546                                          SelectionDAG &DAG) {
1547 
1548   const auto &Begin = Mask.begin();
1549   const auto &End = Mask.end();
1550   unsigned HalfSize = Mask.size() / 2;
1551   SDValue OriV1 = V1, OriV2 = V2;
1552 
1553   if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1554     V1 = OriV1;
1555   else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1556     V1 = OriV2;
1557   else
1558     return SDValue();
1559 
1560   if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1561     V2 = OriV1;
1562   else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1563                                    1))
1564     V2 = OriV2;
1565   else
1566     return SDValue();
1567 
1568   return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1569 }
1570 
1571 /// Lower VECTOR_SHUFFLE into VILVL (if possible).
1572 ///
1573 /// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1574 /// of each vector.
1575 ///
1576 /// It is possible to lower into VILVL when the mask consists of two of the
1577 /// following forms interleaved:
1578 ///   <0, 1, 2, ...>
1579 ///   <n, n+1, n+2, ...>
1580 /// where n is the number of elements in the vector.
1581 /// For example:
1582 ///   <0, 0, 1, 1, 2, 2, ...>
1583 ///   <0, n, 1, n+1, 2, n+2, ...>
1584 ///
1585 /// When undef's appear in the mask they are treated as if they were whatever
1586 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VILVL(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1587 static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
1588                                          MVT VT, SDValue V1, SDValue V2,
1589                                          SelectionDAG &DAG) {
1590 
1591   const auto &Begin = Mask.begin();
1592   const auto &End = Mask.end();
1593   SDValue OriV1 = V1, OriV2 = V2;
1594 
1595   if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1596     V1 = OriV1;
1597   else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1598     V1 = OriV2;
1599   else
1600     return SDValue();
1601 
1602   if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1603     V2 = OriV1;
1604   else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1605     V2 = OriV2;
1606   else
1607     return SDValue();
1608 
1609   return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1610 }
1611 
1612 /// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1613 ///
1614 /// VPICKEV copies the even elements of each vector into the result vector.
1615 ///
1616 /// It is possible to lower into VPICKEV when the mask consists of two of the
1617 /// following forms concatenated:
1618 ///   <0, 2, 4, ...>
1619 ///   <n, n+2, n+4, ...>
1620 /// where n is the number of elements in the vector.
1621 /// For example:
1622 ///   <0, 2, 4, ..., 0, 2, 4, ...>
1623 ///   <0, 2, 4, ..., n, n+2, n+4, ...>
1624 ///
1625 /// When undef's appear in the mask they are treated as if they were whatever
1626 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1627 static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1628                                            MVT VT, SDValue V1, SDValue V2,
1629                                            SelectionDAG &DAG) {
1630 
1631   const auto &Begin = Mask.begin();
1632   const auto &Mid = Mask.begin() + Mask.size() / 2;
1633   const auto &End = Mask.end();
1634   SDValue OriV1 = V1, OriV2 = V2;
1635 
1636   if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1637     V1 = OriV1;
1638   else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1639     V1 = OriV2;
1640   else
1641     return SDValue();
1642 
1643   if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1644     V2 = OriV1;
1645   else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1646     V2 = OriV2;
1647 
1648   else
1649     return SDValue();
1650 
1651   return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1652 }
1653 
1654 /// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1655 ///
1656 /// VPICKOD copies the odd elements of each vector into the result vector.
1657 ///
1658 /// It is possible to lower into VPICKOD when the mask consists of two of the
1659 /// following forms concatenated:
1660 ///   <1, 3, 5, ...>
1661 ///   <n+1, n+3, n+5, ...>
1662 /// where n is the number of elements in the vector.
1663 /// For example:
1664 ///   <1, 3, 5, ..., 1, 3, 5, ...>
1665 ///   <1, 3, 5, ..., n+1, n+3, n+5, ...>
1666 ///
1667 /// When undef's appear in the mask they are treated as if they were whatever
1668 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1669 static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1670                                            MVT VT, SDValue V1, SDValue V2,
1671                                            SelectionDAG &DAG) {
1672 
1673   const auto &Begin = Mask.begin();
1674   const auto &Mid = Mask.begin() + Mask.size() / 2;
1675   const auto &End = Mask.end();
1676   SDValue OriV1 = V1, OriV2 = V2;
1677 
1678   if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1679     V1 = OriV1;
1680   else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1681     V1 = OriV2;
1682   else
1683     return SDValue();
1684 
1685   if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1686     V2 = OriV1;
1687   else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1688     V2 = OriV2;
1689   else
1690     return SDValue();
1691 
1692   return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1693 }
1694 
1695 /// Lower VECTOR_SHUFFLE into VSHUF.
1696 ///
1697 /// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1698 /// adding it as an operand to the resulting VSHUF.
lowerVECTOR_SHUFFLE_VSHUF(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1699 static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1700                                          MVT VT, SDValue V1, SDValue V2,
1701                                          SelectionDAG &DAG) {
1702 
1703   SmallVector<SDValue, 16> Ops;
1704   for (auto M : Mask)
1705     Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
1706 
1707   EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1708   SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1709 
1710   // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1711   // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1712   // VSHF concatenates the vectors in a bitwise fashion:
1713   // <0b00, 0b01> + <0b10, 0b11> ->
1714   // 0b0100       + 0b1110       -> 0b01001110
1715   //                                <0b10, 0b11, 0b00, 0b01>
1716   // We must therefore swap the operands to get the correct result.
1717   return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1718 }
1719 
1720 /// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1721 ///
1722 /// This routine breaks down the specific type of 128-bit shuffle and
1723 /// dispatches to the lowering routines accordingly.
lower128BitShuffle(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1724 static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1725                                   SDValue V1, SDValue V2, SelectionDAG &DAG) {
1726   assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1727           VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1728           VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1729          "Vector type is unsupported for lsx!");
1730   assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1731          "Two operands have different types!");
1732   assert(VT.getVectorNumElements() == Mask.size() &&
1733          "Unexpected mask size for shuffle!");
1734   assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1735 
1736   APInt KnownUndef, KnownZero;
1737   computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1738   APInt Zeroable = KnownUndef | KnownZero;
1739 
1740   SDValue Result;
1741   // TODO: Add more comparison patterns.
1742   if (V2.isUndef()) {
1743     if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
1744       return Result;
1745     if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
1746       return Result;
1747 
1748     // TODO: This comment may be enabled in the future to better match the
1749     // pattern for instruction selection.
1750     /* V2 = V1; */
1751   }
1752 
1753   // It is recommended not to change the pattern comparison order for better
1754   // performance.
1755   if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
1756     return Result;
1757   if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
1758     return Result;
1759   if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
1760     return Result;
1761   if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
1762     return Result;
1763   if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
1764     return Result;
1765   if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
1766     return Result;
1767   if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
1768       (Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
1769     return Result;
1770   if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
1771                                                      Zeroable)))
1772     return Result;
1773   if ((Result =
1774            lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Zeroable)))
1775     return Result;
1776   if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG)))
1777     return Result;
1778   if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
1779     return NewShuffle;
1780   if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
1781     return Result;
1782   return SDValue();
1783 }
1784 
1785 /// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
1786 ///
1787 /// It is a XVREPLVEI when the mask is:
1788 ///   <x, x, x, ..., x+n, x+n, x+n, ...>
1789 /// where the number of x is equal to n and n is half the length of vector.
1790 ///
1791 /// When undef's appear in the mask they are treated as if they were whatever
1792 /// value is necessary in order to fit the above form.
lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1793 static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
1794                                              ArrayRef<int> Mask, MVT VT,
1795                                              SDValue V1, SDValue V2,
1796                                              SelectionDAG &DAG) {
1797   int SplatIndex = -1;
1798   for (const auto &M : Mask) {
1799     if (M != -1) {
1800       SplatIndex = M;
1801       break;
1802     }
1803   }
1804 
1805   if (SplatIndex == -1)
1806     return DAG.getUNDEF(VT);
1807 
1808   const auto &Begin = Mask.begin();
1809   const auto &End = Mask.end();
1810   unsigned HalfSize = Mask.size() / 2;
1811 
1812   assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1813   if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
1814       fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
1815                               0)) {
1816     APInt Imm(64, SplatIndex);
1817     return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1818                        DAG.getConstant(Imm, DL, MVT::i64));
1819   }
1820 
1821   return SDValue();
1822 }
1823 
1824 /// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1825 static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
1826                                             MVT VT, SDValue V1, SDValue V2,
1827                                             SelectionDAG &DAG) {
1828   // When the size is less than or equal to 4, lower cost instructions may be
1829   // used.
1830   if (Mask.size() <= 4)
1831     return SDValue();
1832   return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
1833 }
1834 
1835 /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1836 static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
1837                                             MVT VT, SDValue V1, SDValue V2,
1838                                             SelectionDAG &DAG) {
1839   return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
1840 }
1841 
1842 /// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1843 static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
1844                                             MVT VT, SDValue V1, SDValue V2,
1845                                             SelectionDAG &DAG) {
1846   return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
1847 }
1848 
1849 /// Lower VECTOR_SHUFFLE into XVILVH (if possible).
lowerVECTOR_SHUFFLE_XVILVH(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1850 static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
1851                                           MVT VT, SDValue V1, SDValue V2,
1852                                           SelectionDAG &DAG) {
1853 
1854   const auto &Begin = Mask.begin();
1855   const auto &End = Mask.end();
1856   unsigned HalfSize = Mask.size() / 2;
1857   unsigned LeftSize = HalfSize / 2;
1858   SDValue OriV1 = V1, OriV2 = V2;
1859 
1860   if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
1861                               1) &&
1862       fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
1863     V1 = OriV1;
1864   else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
1865                                    Mask.size() + HalfSize - LeftSize, 1) &&
1866            fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1867                                    Mask.size() + HalfSize + LeftSize, 1))
1868     V1 = OriV2;
1869   else
1870     return SDValue();
1871 
1872   if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
1873                               1) &&
1874       fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
1875                               1))
1876     V2 = OriV1;
1877   else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
1878                                    Mask.size() + HalfSize - LeftSize, 1) &&
1879            fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1880                                    Mask.size() + HalfSize + LeftSize, 1))
1881     V2 = OriV2;
1882   else
1883     return SDValue();
1884 
1885   return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1886 }
1887 
1888 /// Lower VECTOR_SHUFFLE into XVILVL (if possible).
lowerVECTOR_SHUFFLE_XVILVL(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1889 static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
1890                                           MVT VT, SDValue V1, SDValue V2,
1891                                           SelectionDAG &DAG) {
1892 
1893   const auto &Begin = Mask.begin();
1894   const auto &End = Mask.end();
1895   unsigned HalfSize = Mask.size() / 2;
1896   SDValue OriV1 = V1, OriV2 = V2;
1897 
1898   if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
1899       fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
1900     V1 = OriV1;
1901   else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
1902            fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1903                                    Mask.size() + HalfSize, 1))
1904     V1 = OriV2;
1905   else
1906     return SDValue();
1907 
1908   if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
1909       fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1910     V2 = OriV1;
1911   else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1912                                    1) &&
1913            fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1914                                    Mask.size() + HalfSize, 1))
1915     V2 = OriV2;
1916   else
1917     return SDValue();
1918 
1919   return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1920 }
1921 
1922 /// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1923 static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1924                                             MVT VT, SDValue V1, SDValue V2,
1925                                             SelectionDAG &DAG) {
1926 
1927   const auto &Begin = Mask.begin();
1928   const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1929   const auto &Mid = Mask.begin() + Mask.size() / 2;
1930   const auto &RightMid = Mask.end() - Mask.size() / 4;
1931   const auto &End = Mask.end();
1932   unsigned HalfSize = Mask.size() / 2;
1933   SDValue OriV1 = V1, OriV2 = V2;
1934 
1935   if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1936       fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1937     V1 = OriV1;
1938   else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1939            fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1940     V1 = OriV2;
1941   else
1942     return SDValue();
1943 
1944   if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1945       fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1946     V2 = OriV1;
1947   else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1948            fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1949     V2 = OriV2;
1950 
1951   else
1952     return SDValue();
1953 
1954   return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1955 }
1956 
1957 /// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1958 static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1959                                             MVT VT, SDValue V1, SDValue V2,
1960                                             SelectionDAG &DAG) {
1961 
1962   const auto &Begin = Mask.begin();
1963   const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1964   const auto &Mid = Mask.begin() + Mask.size() / 2;
1965   const auto &RightMid = Mask.end() - Mask.size() / 4;
1966   const auto &End = Mask.end();
1967   unsigned HalfSize = Mask.size() / 2;
1968   SDValue OriV1 = V1, OriV2 = V2;
1969 
1970   if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1971       fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1972     V1 = OriV1;
1973   else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1974            fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1975                                    2))
1976     V1 = OriV2;
1977   else
1978     return SDValue();
1979 
1980   if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1981       fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1982     V2 = OriV1;
1983   else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1984            fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1985                                    2))
1986     V2 = OriV2;
1987   else
1988     return SDValue();
1989 
1990   return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1991 }
1992 
1993 /// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1994 static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1995                                           MVT VT, SDValue V1, SDValue V2,
1996                                           SelectionDAG &DAG) {
1997 
1998   int MaskSize = Mask.size();
1999   int HalfSize = Mask.size() / 2;
2000   const auto &Begin = Mask.begin();
2001   const auto &Mid = Mask.begin() + HalfSize;
2002   const auto &End = Mask.end();
2003 
2004   // VECTOR_SHUFFLE concatenates the vectors:
2005   //  <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2006   //  shuffling ->
2007   //  <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2008   //
2009   // XVSHUF concatenates the vectors:
2010   //  <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2011   //  shuffling ->
2012   //  <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2013   SmallVector<SDValue, 8> MaskAlloc;
2014   for (auto it = Begin; it < Mid; it++) {
2015     if (*it < 0) // UNDEF
2016       MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2017     else if ((*it >= 0 && *it < HalfSize) ||
2018              (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2019       int M = *it < HalfSize ? *it : *it - HalfSize;
2020       MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2021     } else
2022       return SDValue();
2023   }
2024   assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2025 
2026   for (auto it = Mid; it < End; it++) {
2027     if (*it < 0) // UNDEF
2028       MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2029     else if ((*it >= HalfSize && *it < MaskSize) ||
2030              (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2031       int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2032       MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2033     } else
2034       return SDValue();
2035   }
2036   assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2037 
2038   EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2039   SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2040   return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2041 }
2042 
2043 /// Shuffle vectors by lane to generate more optimized instructions.
2044 /// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2045 ///
2046 /// Therefore, except for the following four cases, other cases are regarded
2047 /// as cross-lane shuffles, where optimization is relatively limited.
2048 ///
2049 /// - Shuffle high, low lanes of two inputs vector
2050 ///   <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2051 /// - Shuffle low, high lanes of two inputs vector
2052 ///   <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2053 /// - Shuffle low, low lanes of two inputs vector
2054 ///   <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2055 /// - Shuffle high, high lanes of two inputs vector
2056 ///   <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2057 ///
2058 /// The first case is the closest to LoongArch instructions and the other
2059 /// cases need to be converted to it for processing.
2060 ///
2061 /// This function may modify V1, V2 and Mask
canonicalizeShuffleVectorByLane(const SDLoc & DL,MutableArrayRef<int> Mask,MVT VT,SDValue & V1,SDValue & V2,SelectionDAG & DAG)2062 static void canonicalizeShuffleVectorByLane(const SDLoc &DL,
2063                                             MutableArrayRef<int> Mask, MVT VT,
2064                                             SDValue &V1, SDValue &V2,
2065                                             SelectionDAG &DAG) {
2066 
2067   enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2068 
2069   int MaskSize = Mask.size();
2070   int HalfSize = Mask.size() / 2;
2071 
2072   HalfMaskType preMask = None, postMask = None;
2073 
2074   if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2075         return M < 0 || (M >= 0 && M < HalfSize) ||
2076                (M >= MaskSize && M < MaskSize + HalfSize);
2077       }))
2078     preMask = HighLaneTy;
2079   else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2080              return M < 0 || (M >= HalfSize && M < MaskSize) ||
2081                     (M >= MaskSize + HalfSize && M < MaskSize * 2);
2082            }))
2083     preMask = LowLaneTy;
2084 
2085   if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2086         return M < 0 || (M >= 0 && M < HalfSize) ||
2087                (M >= MaskSize && M < MaskSize + HalfSize);
2088       }))
2089     postMask = HighLaneTy;
2090   else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2091              return M < 0 || (M >= HalfSize && M < MaskSize) ||
2092                     (M >= MaskSize + HalfSize && M < MaskSize * 2);
2093            }))
2094     postMask = LowLaneTy;
2095 
2096   // The pre-half of mask is high lane type, and the post-half of mask
2097   // is low lane type, which is closest to the LoongArch instructions.
2098   //
2099   // Note: In the LoongArch architecture, the high lane of mask corresponds
2100   // to the lower 128-bit of vector register, and the low lane of mask
2101   // corresponds the higher 128-bit of vector register.
2102   if (preMask == HighLaneTy && postMask == LowLaneTy) {
2103     return;
2104   }
2105   if (preMask == LowLaneTy && postMask == HighLaneTy) {
2106     V1 = DAG.getBitcast(MVT::v4i64, V1);
2107     V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2108                      DAG.getConstant(0b01001110, DL, MVT::i64));
2109     V1 = DAG.getBitcast(VT, V1);
2110 
2111     if (!V2.isUndef()) {
2112       V2 = DAG.getBitcast(MVT::v4i64, V2);
2113       V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2114                        DAG.getConstant(0b01001110, DL, MVT::i64));
2115       V2 = DAG.getBitcast(VT, V2);
2116     }
2117 
2118     for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2119       *it = *it < 0 ? *it : *it - HalfSize;
2120     }
2121     for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2122       *it = *it < 0 ? *it : *it + HalfSize;
2123     }
2124   } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2125     V1 = DAG.getBitcast(MVT::v4i64, V1);
2126     V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2127                      DAG.getConstant(0b11101110, DL, MVT::i64));
2128     V1 = DAG.getBitcast(VT, V1);
2129 
2130     if (!V2.isUndef()) {
2131       V2 = DAG.getBitcast(MVT::v4i64, V2);
2132       V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2133                        DAG.getConstant(0b11101110, DL, MVT::i64));
2134       V2 = DAG.getBitcast(VT, V2);
2135     }
2136 
2137     for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2138       *it = *it < 0 ? *it : *it - HalfSize;
2139     }
2140   } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2141     V1 = DAG.getBitcast(MVT::v4i64, V1);
2142     V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2143                      DAG.getConstant(0b01000100, DL, MVT::i64));
2144     V1 = DAG.getBitcast(VT, V1);
2145 
2146     if (!V2.isUndef()) {
2147       V2 = DAG.getBitcast(MVT::v4i64, V2);
2148       V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2149                        DAG.getConstant(0b01000100, DL, MVT::i64));
2150       V2 = DAG.getBitcast(VT, V2);
2151     }
2152 
2153     for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2154       *it = *it < 0 ? *it : *it + HalfSize;
2155     }
2156   } else { // cross-lane
2157     return;
2158   }
2159 }
2160 
2161 /// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2162 /// Only for 256-bit vector.
2163 ///
2164 /// For example:
2165 /// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2166 ///                    <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2167 /// is lowerded to:
2168 ///     (XVPERMI $xr2, $xr0, 78)
2169 ///     (XVSHUF  $xr1, $xr2, $xr0)
2170 ///     (XVORI   $xr0, $xr1, 0)
lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)2171 static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL,
2172                                                           ArrayRef<int> Mask,
2173                                                           MVT VT, SDValue V1,
2174                                                           SDValue V2,
2175                                                           SelectionDAG &DAG) {
2176   assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2177   int Size = Mask.size();
2178   int LaneSize = Size / 2;
2179 
2180   bool LaneCrossing[2] = {false, false};
2181   for (int i = 0; i < Size; ++i)
2182     if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2183       LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2184 
2185   // Ensure that all lanes ared involved.
2186   if (!LaneCrossing[0] && !LaneCrossing[1])
2187     return SDValue();
2188 
2189   SmallVector<int> InLaneMask;
2190   InLaneMask.assign(Mask.begin(), Mask.end());
2191   for (int i = 0; i < Size; ++i) {
2192     int &M = InLaneMask[i];
2193     if (M < 0)
2194       continue;
2195     if (((M % Size) / LaneSize) != (i / LaneSize))
2196       M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2197   }
2198 
2199   SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2200   Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2201                                  DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2202   Flipped = DAG.getBitcast(VT, Flipped);
2203   return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2204 }
2205 
2206 /// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2207 ///
2208 /// This routine breaks down the specific type of 256-bit shuffle and
2209 /// dispatches to the lowering routines accordingly.
lower256BitShuffle(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)2210 static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2211                                   SDValue V1, SDValue V2, SelectionDAG &DAG) {
2212   assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2213           VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2214           VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2215          "Vector type is unsupported for lasx!");
2216   assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2217          "Two operands have different types!");
2218   assert(VT.getVectorNumElements() == Mask.size() &&
2219          "Unexpected mask size for shuffle!");
2220   assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2221   assert(Mask.size() >= 4 && "Mask size is less than 4.");
2222 
2223   // canonicalize non cross-lane shuffle vector
2224   SmallVector<int> NewMask(Mask);
2225   canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
2226 
2227   APInt KnownUndef, KnownZero;
2228   computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
2229   APInt Zeroable = KnownUndef | KnownZero;
2230 
2231   SDValue Result;
2232   // TODO: Add more comparison patterns.
2233   if (V2.isUndef()) {
2234     if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
2235       return Result;
2236     if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
2237       return Result;
2238     if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2239                                                              V1, V2, DAG)))
2240       return Result;
2241 
2242     // TODO: This comment may be enabled in the future to better match the
2243     // pattern for instruction selection.
2244     /* V2 = V1; */
2245   }
2246 
2247   // It is recommended not to change the pattern comparison order for better
2248   // performance.
2249   if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
2250     return Result;
2251   if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
2252     return Result;
2253   if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
2254     return Result;
2255   if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
2256     return Result;
2257   if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
2258     return Result;
2259   if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
2260     return Result;
2261   if ((Result =
2262            lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG, Zeroable)))
2263     return Result;
2264   if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG)))
2265     return Result;
2266   if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2267     return NewShuffle;
2268   if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2269     return Result;
2270 
2271   return SDValue();
2272 }
2273 
lowerVECTOR_SHUFFLE(SDValue Op,SelectionDAG & DAG) const2274 SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2275                                                      SelectionDAG &DAG) const {
2276   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2277   ArrayRef<int> OrigMask = SVOp->getMask();
2278   SDValue V1 = Op.getOperand(0);
2279   SDValue V2 = Op.getOperand(1);
2280   MVT VT = Op.getSimpleValueType();
2281   int NumElements = VT.getVectorNumElements();
2282   SDLoc DL(Op);
2283 
2284   bool V1IsUndef = V1.isUndef();
2285   bool V2IsUndef = V2.isUndef();
2286   if (V1IsUndef && V2IsUndef)
2287     return DAG.getUNDEF(VT);
2288 
2289   // When we create a shuffle node we put the UNDEF node to second operand,
2290   // but in some cases the first operand may be transformed to UNDEF.
2291   // In this case we should just commute the node.
2292   if (V1IsUndef)
2293     return DAG.getCommutedVectorShuffle(*SVOp);
2294 
2295   // Check for non-undef masks pointing at an undef vector and make the masks
2296   // undef as well. This makes it easier to match the shuffle based solely on
2297   // the mask.
2298   if (V2IsUndef &&
2299       any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2300     SmallVector<int, 8> NewMask(OrigMask);
2301     for (int &M : NewMask)
2302       if (M >= NumElements)
2303         M = -1;
2304     return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2305   }
2306 
2307   // Check for illegal shuffle mask element index values.
2308   int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2309   (void)MaskUpperLimit;
2310   assert(llvm::all_of(OrigMask,
2311                       [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2312          "Out of bounds shuffle index");
2313 
2314   // For each vector width, delegate to a specialized lowering routine.
2315   if (VT.is128BitVector())
2316     return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
2317 
2318   if (VT.is256BitVector())
2319     return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
2320 
2321   return SDValue();
2322 }
2323 
lowerFP_TO_FP16(SDValue Op,SelectionDAG & DAG) const2324 SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2325                                                  SelectionDAG &DAG) const {
2326   // Custom lower to ensure the libcall return is passed in an FPR on hard
2327   // float ABIs.
2328   SDLoc DL(Op);
2329   MakeLibCallOptions CallOptions;
2330   SDValue Op0 = Op.getOperand(0);
2331   SDValue Chain = SDValue();
2332   RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2333   SDValue Res;
2334   std::tie(Res, Chain) =
2335       makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2336   if (Subtarget.is64Bit())
2337     return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2338   return DAG.getBitcast(MVT::i32, Res);
2339 }
2340 
lowerFP16_TO_FP(SDValue Op,SelectionDAG & DAG) const2341 SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2342                                                  SelectionDAG &DAG) const {
2343   // Custom lower to ensure the libcall argument is passed in an FPR on hard
2344   // float ABIs.
2345   SDLoc DL(Op);
2346   MakeLibCallOptions CallOptions;
2347   SDValue Op0 = Op.getOperand(0);
2348   SDValue Chain = SDValue();
2349   SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2350                                                   DL, MVT::f32, Op0)
2351                                     : DAG.getBitcast(MVT::f32, Op0);
2352   SDValue Res;
2353   std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2354                                      CallOptions, DL, Chain);
2355   return Res;
2356 }
2357 
lowerFP_TO_BF16(SDValue Op,SelectionDAG & DAG) const2358 SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2359                                                  SelectionDAG &DAG) const {
2360   assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2361   SDLoc DL(Op);
2362   MakeLibCallOptions CallOptions;
2363   RTLIB::Libcall LC =
2364       RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2365   SDValue Res =
2366       makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2367   if (Subtarget.is64Bit())
2368     return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2369   return DAG.getBitcast(MVT::i32, Res);
2370 }
2371 
lowerBF16_TO_FP(SDValue Op,SelectionDAG & DAG) const2372 SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2373                                                  SelectionDAG &DAG) const {
2374   assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2375   MVT VT = Op.getSimpleValueType();
2376   SDLoc DL(Op);
2377   Op = DAG.getNode(
2378       ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2379       DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2380   SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2381                                                   DL, MVT::f32, Op)
2382                                     : DAG.getBitcast(MVT::f32, Op);
2383   if (VT != MVT::f32)
2384     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2385   return Res;
2386 }
2387 
isConstantOrUndef(const SDValue Op)2388 static bool isConstantOrUndef(const SDValue Op) {
2389   if (Op->isUndef())
2390     return true;
2391   if (isa<ConstantSDNode>(Op))
2392     return true;
2393   if (isa<ConstantFPSDNode>(Op))
2394     return true;
2395   return false;
2396 }
2397 
isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode * Op)2398 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
2399   for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2400     if (isConstantOrUndef(Op->getOperand(i)))
2401       return true;
2402   return false;
2403 }
2404 
2405 // Lower BUILD_VECTOR as broadcast load (if possible).
2406 // For example:
2407 //   %a = load i8, ptr %ptr
2408 //   %b = build_vector %a, %a, %a, %a
2409 // is lowered to :
2410 //   (VLDREPL_B $a0, 0)
lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode * BVOp,const SDLoc & DL,SelectionDAG & DAG)2411 static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
2412                                                 const SDLoc &DL,
2413                                                 SelectionDAG &DAG) {
2414   MVT VT = BVOp->getSimpleValueType(0);
2415   int NumOps = BVOp->getNumOperands();
2416 
2417   assert((VT.is128BitVector() || VT.is256BitVector()) &&
2418          "Unsupported vector type for broadcast.");
2419 
2420   SDValue IdentitySrc;
2421   bool IsIdeneity = true;
2422 
2423   for (int i = 0; i != NumOps; i++) {
2424     SDValue Op = BVOp->getOperand(i);
2425     if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2426       IsIdeneity = false;
2427       break;
2428     }
2429     IdentitySrc = BVOp->getOperand(0);
2430   }
2431 
2432   // make sure that this load is valid and only has one user.
2433   if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2434     return SDValue();
2435 
2436   auto *LN = cast<LoadSDNode>(IdentitySrc);
2437   auto ExtType = LN->getExtensionType();
2438 
2439   if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2440       VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2441     SDVTList Tys =
2442         LN->isIndexed()
2443             ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2444             : DAG.getVTList(VT, MVT::Other);
2445     SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2446     SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2447     DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2448     return BCast;
2449   }
2450   return SDValue();
2451 }
2452 
lowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const2453 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2454                                                    SelectionDAG &DAG) const {
2455   BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2456   EVT ResTy = Op->getValueType(0);
2457   SDLoc DL(Op);
2458   APInt SplatValue, SplatUndef;
2459   unsigned SplatBitSize;
2460   bool HasAnyUndefs;
2461   bool Is128Vec = ResTy.is128BitVector();
2462   bool Is256Vec = ResTy.is256BitVector();
2463 
2464   if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2465       (!Subtarget.hasExtLASX() || !Is256Vec))
2466     return SDValue();
2467 
2468   if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2469     return Result;
2470 
2471   if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2472                             /*MinSplatBits=*/8) &&
2473       SplatBitSize <= 64) {
2474     // We can only cope with 8, 16, 32, or 64-bit elements.
2475     if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2476         SplatBitSize != 64)
2477       return SDValue();
2478 
2479     EVT ViaVecTy;
2480 
2481     switch (SplatBitSize) {
2482     default:
2483       return SDValue();
2484     case 8:
2485       ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2486       break;
2487     case 16:
2488       ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2489       break;
2490     case 32:
2491       ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2492       break;
2493     case 64:
2494       ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2495       break;
2496     }
2497 
2498     // SelectionDAG::getConstant will promote SplatValue appropriately.
2499     SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2500 
2501     // Bitcast to the type we originally wanted.
2502     if (ViaVecTy != ResTy)
2503       Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2504 
2505     return Result;
2506   }
2507 
2508   if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2509     return Op;
2510 
2511   if (!isConstantOrUndefBUILD_VECTOR(Node)) {
2512     // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2513     // The resulting code is the same length as the expansion, but it doesn't
2514     // use memory operations.
2515     EVT ResTy = Node->getValueType(0);
2516 
2517     assert(ResTy.isVector());
2518 
2519     unsigned NumElts = ResTy.getVectorNumElements();
2520     SDValue Vector = DAG.getUNDEF(ResTy);
2521     for (unsigned i = 0; i < NumElts; ++i) {
2522       Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
2523                            Node->getOperand(i),
2524                            DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2525     }
2526     return Vector;
2527   }
2528 
2529   return SDValue();
2530 }
2531 
lowerCONCAT_VECTORS(SDValue Op,SelectionDAG & DAG) const2532 SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
2533                                                      SelectionDAG &DAG) const {
2534   SDLoc DL(Op);
2535   MVT ResVT = Op.getSimpleValueType();
2536   assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
2537 
2538   unsigned NumOperands = Op.getNumOperands();
2539   unsigned NumFreezeUndef = 0;
2540   unsigned NumZero = 0;
2541   unsigned NumNonZero = 0;
2542   unsigned NonZeros = 0;
2543   SmallSet<SDValue, 4> Undefs;
2544   for (unsigned i = 0; i != NumOperands; ++i) {
2545     SDValue SubVec = Op.getOperand(i);
2546     if (SubVec.isUndef())
2547       continue;
2548     if (ISD::isFreezeUndef(SubVec.getNode())) {
2549       // If the freeze(undef) has multiple uses then we must fold to zero.
2550       if (SubVec.hasOneUse()) {
2551         ++NumFreezeUndef;
2552       } else {
2553         ++NumZero;
2554         Undefs.insert(SubVec);
2555       }
2556     } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
2557       ++NumZero;
2558     else {
2559       assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
2560       NonZeros |= 1 << i;
2561       ++NumNonZero;
2562     }
2563   }
2564 
2565   // If we have more than 2 non-zeros, build each half separately.
2566   if (NumNonZero > 2) {
2567     MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
2568     ArrayRef<SDUse> Ops = Op->ops();
2569     SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2570                              Ops.slice(0, NumOperands / 2));
2571     SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2572                              Ops.slice(NumOperands / 2));
2573     return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
2574   }
2575 
2576   // Otherwise, build it up through insert_subvectors.
2577   SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
2578                         : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
2579                                           : DAG.getUNDEF(ResVT));
2580 
2581   // Replace Undef operands with ZeroVector.
2582   for (SDValue U : Undefs)
2583     DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
2584 
2585   MVT SubVT = Op.getOperand(0).getSimpleValueType();
2586   unsigned NumSubElems = SubVT.getVectorNumElements();
2587   for (unsigned i = 0; i != NumOperands; ++i) {
2588     if ((NonZeros & (1 << i)) == 0)
2589       continue;
2590 
2591     Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
2592                       DAG.getVectorIdxConstant(i * NumSubElems, DL));
2593   }
2594 
2595   return Vec;
2596 }
2597 
2598 SDValue
lowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const2599 LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2600                                                  SelectionDAG &DAG) const {
2601   EVT VecTy = Op->getOperand(0)->getValueType(0);
2602   SDValue Idx = Op->getOperand(1);
2603   EVT EltTy = VecTy.getVectorElementType();
2604   unsigned NumElts = VecTy.getVectorNumElements();
2605 
2606   if (isa<ConstantSDNode>(Idx) &&
2607       (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
2608        EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
2609     return Op;
2610 
2611   return SDValue();
2612 }
2613 
2614 SDValue
lowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const2615 LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2616                                                 SelectionDAG &DAG) const {
2617   if (isa<ConstantSDNode>(Op->getOperand(2)))
2618     return Op;
2619   return SDValue();
2620 }
2621 
lowerATOMIC_FENCE(SDValue Op,SelectionDAG & DAG) const2622 SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
2623                                                    SelectionDAG &DAG) const {
2624   SDLoc DL(Op);
2625   SyncScope::ID FenceSSID =
2626       static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
2627 
2628   // singlethread fences only synchronize with signal handlers on the same
2629   // thread and thus only need to preserve instruction order, not actually
2630   // enforce memory ordering.
2631   if (FenceSSID == SyncScope::SingleThread)
2632     // MEMBARRIER is a compiler barrier; it codegens to a no-op.
2633     return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
2634 
2635   return Op;
2636 }
2637 
lowerWRITE_REGISTER(SDValue Op,SelectionDAG & DAG) const2638 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
2639                                                      SelectionDAG &DAG) const {
2640 
2641   if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
2642     DAG.getContext()->emitError(
2643         "On LA64, only 64-bit registers can be written.");
2644     return Op.getOperand(0);
2645   }
2646 
2647   if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
2648     DAG.getContext()->emitError(
2649         "On LA32, only 32-bit registers can be written.");
2650     return Op.getOperand(0);
2651   }
2652 
2653   return Op;
2654 }
2655 
lowerFRAMEADDR(SDValue Op,SelectionDAG & DAG) const2656 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
2657                                                 SelectionDAG &DAG) const {
2658   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
2659     DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
2660                                 "be a constant integer");
2661     return SDValue();
2662   }
2663 
2664   MachineFunction &MF = DAG.getMachineFunction();
2665   MF.getFrameInfo().setFrameAddressIsTaken(true);
2666   Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
2667   EVT VT = Op.getValueType();
2668   SDLoc DL(Op);
2669   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
2670   unsigned Depth = Op.getConstantOperandVal(0);
2671   int GRLenInBytes = Subtarget.getGRLen() / 8;
2672 
2673   while (Depth--) {
2674     int Offset = -(GRLenInBytes * 2);
2675     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
2676                               DAG.getSignedConstant(Offset, DL, VT));
2677     FrameAddr =
2678         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2679   }
2680   return FrameAddr;
2681 }
2682 
lowerRETURNADDR(SDValue Op,SelectionDAG & DAG) const2683 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
2684                                                  SelectionDAG &DAG) const {
2685   // Currently only support lowering return address for current frame.
2686   if (Op.getConstantOperandVal(0) != 0) {
2687     DAG.getContext()->emitError(
2688         "return address can only be determined for the current frame");
2689     return SDValue();
2690   }
2691 
2692   MachineFunction &MF = DAG.getMachineFunction();
2693   MF.getFrameInfo().setReturnAddressIsTaken(true);
2694   MVT GRLenVT = Subtarget.getGRLenVT();
2695 
2696   // Return the value of the return address register, marking it an implicit
2697   // live-in.
2698   Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
2699                               getRegClassFor(GRLenVT));
2700   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
2701 }
2702 
lowerEH_DWARF_CFA(SDValue Op,SelectionDAG & DAG) const2703 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
2704                                                    SelectionDAG &DAG) const {
2705   MachineFunction &MF = DAG.getMachineFunction();
2706   auto Size = Subtarget.getGRLen() / 8;
2707   auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
2708   return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2709 }
2710 
lowerVASTART(SDValue Op,SelectionDAG & DAG) const2711 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
2712                                               SelectionDAG &DAG) const {
2713   MachineFunction &MF = DAG.getMachineFunction();
2714   auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
2715 
2716   SDLoc DL(Op);
2717   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2718                                  getPointerTy(MF.getDataLayout()));
2719 
2720   // vastart just stores the address of the VarArgsFrameIndex slot into the
2721   // memory location argument.
2722   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2723   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
2724                       MachinePointerInfo(SV));
2725 }
2726 
lowerUINT_TO_FP(SDValue Op,SelectionDAG & DAG) const2727 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
2728                                                  SelectionDAG &DAG) const {
2729   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
2730          !Subtarget.hasBasicD() && "unexpected target features");
2731 
2732   SDLoc DL(Op);
2733   SDValue Op0 = Op.getOperand(0);
2734   if (Op0->getOpcode() == ISD::AND) {
2735     auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
2736     if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
2737       return Op;
2738   }
2739 
2740   if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
2741       Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
2742       Op0.getConstantOperandVal(2) == UINT64_C(0))
2743     return Op;
2744 
2745   if (Op0.getOpcode() == ISD::AssertZext &&
2746       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
2747     return Op;
2748 
2749   EVT OpVT = Op0.getValueType();
2750   EVT RetVT = Op.getValueType();
2751   RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
2752   MakeLibCallOptions CallOptions;
2753   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
2754   SDValue Chain = SDValue();
2755   SDValue Result;
2756   std::tie(Result, Chain) =
2757       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
2758   return Result;
2759 }
2760 
lowerSINT_TO_FP(SDValue Op,SelectionDAG & DAG) const2761 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
2762                                                  SelectionDAG &DAG) const {
2763   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
2764          !Subtarget.hasBasicD() && "unexpected target features");
2765 
2766   SDLoc DL(Op);
2767   SDValue Op0 = Op.getOperand(0);
2768 
2769   if ((Op0.getOpcode() == ISD::AssertSext ||
2770        Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
2771       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
2772     return Op;
2773 
2774   EVT OpVT = Op0.getValueType();
2775   EVT RetVT = Op.getValueType();
2776   RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
2777   MakeLibCallOptions CallOptions;
2778   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
2779   SDValue Chain = SDValue();
2780   SDValue Result;
2781   std::tie(Result, Chain) =
2782       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
2783   return Result;
2784 }
2785 
lowerBITCAST(SDValue Op,SelectionDAG & DAG) const2786 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
2787                                               SelectionDAG &DAG) const {
2788 
2789   SDLoc DL(Op);
2790   EVT VT = Op.getValueType();
2791   SDValue Op0 = Op.getOperand(0);
2792   EVT Op0VT = Op0.getValueType();
2793 
2794   if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
2795       Subtarget.is64Bit() && Subtarget.hasBasicF()) {
2796     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
2797     return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
2798   }
2799   if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
2800     SDValue Lo, Hi;
2801     std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
2802     return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
2803   }
2804   return Op;
2805 }
2806 
lowerFP_TO_SINT(SDValue Op,SelectionDAG & DAG) const2807 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
2808                                                  SelectionDAG &DAG) const {
2809 
2810   SDLoc DL(Op);
2811   SDValue Op0 = Op.getOperand(0);
2812 
2813   if (Op0.getValueType() == MVT::f16)
2814     Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
2815 
2816   if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
2817       !Subtarget.hasBasicD()) {
2818     SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
2819     return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
2820   }
2821 
2822   EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
2823   SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
2824   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
2825 }
2826 
getTargetNode(GlobalAddressSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)2827 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
2828                              SelectionDAG &DAG, unsigned Flags) {
2829   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
2830 }
2831 
getTargetNode(BlockAddressSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)2832 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
2833                              SelectionDAG &DAG, unsigned Flags) {
2834   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
2835                                    Flags);
2836 }
2837 
getTargetNode(ConstantPoolSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)2838 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
2839                              SelectionDAG &DAG, unsigned Flags) {
2840   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
2841                                    N->getOffset(), Flags);
2842 }
2843 
getTargetNode(JumpTableSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)2844 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
2845                              SelectionDAG &DAG, unsigned Flags) {
2846   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
2847 }
2848 
2849 template <class NodeTy>
getAddr(NodeTy * N,SelectionDAG & DAG,CodeModel::Model M,bool IsLocal) const2850 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
2851                                          CodeModel::Model M,
2852                                          bool IsLocal) const {
2853   SDLoc DL(N);
2854   EVT Ty = getPointerTy(DAG.getDataLayout());
2855   SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2856   SDValue Load;
2857 
2858   switch (M) {
2859   default:
2860     report_fatal_error("Unsupported code model");
2861 
2862   case CodeModel::Large: {
2863     assert(Subtarget.is64Bit() && "Large code model requires LA64");
2864 
2865     // This is not actually used, but is necessary for successfully matching
2866     // the PseudoLA_*_LARGE nodes.
2867     SDValue Tmp = DAG.getConstant(0, DL, Ty);
2868     if (IsLocal) {
2869       // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
2870       // eventually becomes the desired 5-insn code sequence.
2871       Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
2872                                         Tmp, Addr),
2873                      0);
2874     } else {
2875       // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
2876       // eventually becomes the desired 5-insn code sequence.
2877       Load = SDValue(
2878           DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
2879           0);
2880     }
2881     break;
2882   }
2883 
2884   case CodeModel::Small:
2885   case CodeModel::Medium:
2886     if (IsLocal) {
2887       // This generates the pattern (PseudoLA_PCREL sym), which expands to
2888       // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
2889       Load = SDValue(
2890           DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
2891     } else {
2892       // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
2893       // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
2894       Load =
2895           SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
2896     }
2897   }
2898 
2899   if (!IsLocal) {
2900     // Mark the load instruction as invariant to enable hoisting in MachineLICM.
2901     MachineFunction &MF = DAG.getMachineFunction();
2902     MachineMemOperand *MemOp = MF.getMachineMemOperand(
2903         MachinePointerInfo::getGOT(MF),
2904         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
2905             MachineMemOperand::MOInvariant,
2906         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
2907     DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
2908   }
2909 
2910   return Load;
2911 }
2912 
lowerBlockAddress(SDValue Op,SelectionDAG & DAG) const2913 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
2914                                                    SelectionDAG &DAG) const {
2915   return getAddr(cast<BlockAddressSDNode>(Op), DAG,
2916                  DAG.getTarget().getCodeModel());
2917 }
2918 
lowerJumpTable(SDValue Op,SelectionDAG & DAG) const2919 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
2920                                                 SelectionDAG &DAG) const {
2921   return getAddr(cast<JumpTableSDNode>(Op), DAG,
2922                  DAG.getTarget().getCodeModel());
2923 }
2924 
lowerConstantPool(SDValue Op,SelectionDAG & DAG) const2925 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
2926                                                    SelectionDAG &DAG) const {
2927   return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
2928                  DAG.getTarget().getCodeModel());
2929 }
2930 
lowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const2931 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
2932                                                     SelectionDAG &DAG) const {
2933   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
2934   assert(N->getOffset() == 0 && "unexpected offset in global node");
2935   auto CM = DAG.getTarget().getCodeModel();
2936   const GlobalValue *GV = N->getGlobal();
2937 
2938   if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
2939     if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
2940       CM = *GCM;
2941   }
2942 
2943   return getAddr(N, DAG, CM, GV->isDSOLocal());
2944 }
2945 
getStaticTLSAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,unsigned Opc,bool UseGOT,bool Large) const2946 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
2947                                                   SelectionDAG &DAG,
2948                                                   unsigned Opc, bool UseGOT,
2949                                                   bool Large) const {
2950   SDLoc DL(N);
2951   EVT Ty = getPointerTy(DAG.getDataLayout());
2952   MVT GRLenVT = Subtarget.getGRLenVT();
2953 
2954   // This is not actually used, but is necessary for successfully matching the
2955   // PseudoLA_*_LARGE nodes.
2956   SDValue Tmp = DAG.getConstant(0, DL, Ty);
2957   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
2958 
2959   // Only IE needs an extra argument for large code model.
2960   SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
2961                        ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
2962                        : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
2963 
2964   // If it is LE for normal/medium code model, the add tp operation will occur
2965   // during the pseudo-instruction expansion.
2966   if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
2967     return Offset;
2968 
2969   if (UseGOT) {
2970     // Mark the load instruction as invariant to enable hoisting in MachineLICM.
2971     MachineFunction &MF = DAG.getMachineFunction();
2972     MachineMemOperand *MemOp = MF.getMachineMemOperand(
2973         MachinePointerInfo::getGOT(MF),
2974         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
2975             MachineMemOperand::MOInvariant,
2976         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
2977     DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
2978   }
2979 
2980   // Add the thread pointer.
2981   return DAG.getNode(ISD::ADD, DL, Ty, Offset,
2982                      DAG.getRegister(LoongArch::R2, GRLenVT));
2983 }
2984 
getDynamicTLSAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,unsigned Opc,bool Large) const2985 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
2986                                                    SelectionDAG &DAG,
2987                                                    unsigned Opc,
2988                                                    bool Large) const {
2989   SDLoc DL(N);
2990   EVT Ty = getPointerTy(DAG.getDataLayout());
2991   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
2992 
2993   // This is not actually used, but is necessary for successfully matching the
2994   // PseudoLA_*_LARGE nodes.
2995   SDValue Tmp = DAG.getConstant(0, DL, Ty);
2996 
2997   // Use a PC-relative addressing mode to access the dynamic GOT address.
2998   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
2999   SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3000                        : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3001 
3002   // Prepare argument list to generate call.
3003   ArgListTy Args;
3004   ArgListEntry Entry;
3005   Entry.Node = Load;
3006   Entry.Ty = CallTy;
3007   Args.push_back(Entry);
3008 
3009   // Setup call to __tls_get_addr.
3010   TargetLowering::CallLoweringInfo CLI(DAG);
3011   CLI.setDebugLoc(DL)
3012       .setChain(DAG.getEntryNode())
3013       .setLibCallee(CallingConv::C, CallTy,
3014                     DAG.getExternalSymbol("__tls_get_addr", Ty),
3015                     std::move(Args));
3016 
3017   return LowerCallTo(CLI).first;
3018 }
3019 
getTLSDescAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,unsigned Opc,bool Large) const3020 SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3021                                                 SelectionDAG &DAG, unsigned Opc,
3022                                                 bool Large) const {
3023   SDLoc DL(N);
3024   EVT Ty = getPointerTy(DAG.getDataLayout());
3025   const GlobalValue *GV = N->getGlobal();
3026 
3027   // This is not actually used, but is necessary for successfully matching the
3028   // PseudoLA_*_LARGE nodes.
3029   SDValue Tmp = DAG.getConstant(0, DL, Ty);
3030 
3031   // Use a PC-relative addressing mode to access the global dynamic GOT address.
3032   // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3033   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3034   return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3035                : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3036 }
3037 
3038 SDValue
lowerGlobalTLSAddress(SDValue Op,SelectionDAG & DAG) const3039 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3040                                                SelectionDAG &DAG) const {
3041   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3042       CallingConv::GHC)
3043     report_fatal_error("In GHC calling convention TLS is not supported");
3044 
3045   bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3046   assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3047 
3048   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3049   assert(N->getOffset() == 0 && "unexpected offset in global node");
3050 
3051   if (DAG.getTarget().useEmulatedTLS())
3052     reportFatalUsageError("the emulated TLS is prohibited");
3053 
3054   bool IsDesc = DAG.getTarget().useTLSDESC();
3055 
3056   switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3057   case TLSModel::GeneralDynamic:
3058     // In this model, application code calls the dynamic linker function
3059     // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3060     // runtime.
3061     if (!IsDesc)
3062       return getDynamicTLSAddr(N, DAG,
3063                                Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3064                                      : LoongArch::PseudoLA_TLS_GD,
3065                                Large);
3066     break;
3067   case TLSModel::LocalDynamic:
3068     // Same as GeneralDynamic, except for assembly modifiers and relocation
3069     // records.
3070     if (!IsDesc)
3071       return getDynamicTLSAddr(N, DAG,
3072                                Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3073                                      : LoongArch::PseudoLA_TLS_LD,
3074                                Large);
3075     break;
3076   case TLSModel::InitialExec:
3077     // This model uses the GOT to resolve TLS offsets.
3078     return getStaticTLSAddr(N, DAG,
3079                             Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3080                                   : LoongArch::PseudoLA_TLS_IE,
3081                             /*UseGOT=*/true, Large);
3082   case TLSModel::LocalExec:
3083     // This model is used when static linking as the TLS offsets are resolved
3084     // during program linking.
3085     //
3086     // This node doesn't need an extra argument for the large code model.
3087     return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3088                             /*UseGOT=*/false, Large);
3089   }
3090 
3091   return getTLSDescAddr(N, DAG,
3092                         Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3093                               : LoongArch::PseudoLA_TLS_DESC,
3094                         Large);
3095 }
3096 
3097 template <unsigned N>
checkIntrinsicImmArg(SDValue Op,unsigned ImmOp,SelectionDAG & DAG,bool IsSigned=false)3098 static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
3099                                     SelectionDAG &DAG, bool IsSigned = false) {
3100   auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3101   // Check the ImmArg.
3102   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3103       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3104     DAG.getContext()->emitError(Op->getOperationName(0) +
3105                                 ": argument out of range.");
3106     return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3107   }
3108   return SDValue();
3109 }
3110 
3111 SDValue
lowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const3112 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3113                                                  SelectionDAG &DAG) const {
3114   switch (Op.getConstantOperandVal(0)) {
3115   default:
3116     return SDValue(); // Don't custom lower most intrinsics.
3117   case Intrinsic::thread_pointer: {
3118     EVT PtrVT = getPointerTy(DAG.getDataLayout());
3119     return DAG.getRegister(LoongArch::R2, PtrVT);
3120   }
3121   case Intrinsic::loongarch_lsx_vpickve2gr_d:
3122   case Intrinsic::loongarch_lsx_vpickve2gr_du:
3123   case Intrinsic::loongarch_lsx_vreplvei_d:
3124   case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3125     return checkIntrinsicImmArg<1>(Op, 2, DAG);
3126   case Intrinsic::loongarch_lsx_vreplvei_w:
3127   case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3128   case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3129   case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3130   case Intrinsic::loongarch_lasx_xvpickve_d:
3131   case Intrinsic::loongarch_lasx_xvpickve_d_f:
3132     return checkIntrinsicImmArg<2>(Op, 2, DAG);
3133   case Intrinsic::loongarch_lasx_xvinsve0_d:
3134     return checkIntrinsicImmArg<2>(Op, 3, DAG);
3135   case Intrinsic::loongarch_lsx_vsat_b:
3136   case Intrinsic::loongarch_lsx_vsat_bu:
3137   case Intrinsic::loongarch_lsx_vrotri_b:
3138   case Intrinsic::loongarch_lsx_vsllwil_h_b:
3139   case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3140   case Intrinsic::loongarch_lsx_vsrlri_b:
3141   case Intrinsic::loongarch_lsx_vsrari_b:
3142   case Intrinsic::loongarch_lsx_vreplvei_h:
3143   case Intrinsic::loongarch_lasx_xvsat_b:
3144   case Intrinsic::loongarch_lasx_xvsat_bu:
3145   case Intrinsic::loongarch_lasx_xvrotri_b:
3146   case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3147   case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3148   case Intrinsic::loongarch_lasx_xvsrlri_b:
3149   case Intrinsic::loongarch_lasx_xvsrari_b:
3150   case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3151   case Intrinsic::loongarch_lasx_xvpickve_w:
3152   case Intrinsic::loongarch_lasx_xvpickve_w_f:
3153     return checkIntrinsicImmArg<3>(Op, 2, DAG);
3154   case Intrinsic::loongarch_lasx_xvinsve0_w:
3155     return checkIntrinsicImmArg<3>(Op, 3, DAG);
3156   case Intrinsic::loongarch_lsx_vsat_h:
3157   case Intrinsic::loongarch_lsx_vsat_hu:
3158   case Intrinsic::loongarch_lsx_vrotri_h:
3159   case Intrinsic::loongarch_lsx_vsllwil_w_h:
3160   case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3161   case Intrinsic::loongarch_lsx_vsrlri_h:
3162   case Intrinsic::loongarch_lsx_vsrari_h:
3163   case Intrinsic::loongarch_lsx_vreplvei_b:
3164   case Intrinsic::loongarch_lasx_xvsat_h:
3165   case Intrinsic::loongarch_lasx_xvsat_hu:
3166   case Intrinsic::loongarch_lasx_xvrotri_h:
3167   case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3168   case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3169   case Intrinsic::loongarch_lasx_xvsrlri_h:
3170   case Intrinsic::loongarch_lasx_xvsrari_h:
3171   case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3172     return checkIntrinsicImmArg<4>(Op, 2, DAG);
3173   case Intrinsic::loongarch_lsx_vsrlni_b_h:
3174   case Intrinsic::loongarch_lsx_vsrani_b_h:
3175   case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3176   case Intrinsic::loongarch_lsx_vsrarni_b_h:
3177   case Intrinsic::loongarch_lsx_vssrlni_b_h:
3178   case Intrinsic::loongarch_lsx_vssrani_b_h:
3179   case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3180   case Intrinsic::loongarch_lsx_vssrani_bu_h:
3181   case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3182   case Intrinsic::loongarch_lsx_vssrarni_b_h:
3183   case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3184   case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3185   case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3186   case Intrinsic::loongarch_lasx_xvsrani_b_h:
3187   case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3188   case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3189   case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3190   case Intrinsic::loongarch_lasx_xvssrani_b_h:
3191   case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3192   case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3193   case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3194   case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3195   case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3196   case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3197     return checkIntrinsicImmArg<4>(Op, 3, DAG);
3198   case Intrinsic::loongarch_lsx_vsat_w:
3199   case Intrinsic::loongarch_lsx_vsat_wu:
3200   case Intrinsic::loongarch_lsx_vrotri_w:
3201   case Intrinsic::loongarch_lsx_vsllwil_d_w:
3202   case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3203   case Intrinsic::loongarch_lsx_vsrlri_w:
3204   case Intrinsic::loongarch_lsx_vsrari_w:
3205   case Intrinsic::loongarch_lsx_vslei_bu:
3206   case Intrinsic::loongarch_lsx_vslei_hu:
3207   case Intrinsic::loongarch_lsx_vslei_wu:
3208   case Intrinsic::loongarch_lsx_vslei_du:
3209   case Intrinsic::loongarch_lsx_vslti_bu:
3210   case Intrinsic::loongarch_lsx_vslti_hu:
3211   case Intrinsic::loongarch_lsx_vslti_wu:
3212   case Intrinsic::loongarch_lsx_vslti_du:
3213   case Intrinsic::loongarch_lsx_vbsll_v:
3214   case Intrinsic::loongarch_lsx_vbsrl_v:
3215   case Intrinsic::loongarch_lasx_xvsat_w:
3216   case Intrinsic::loongarch_lasx_xvsat_wu:
3217   case Intrinsic::loongarch_lasx_xvrotri_w:
3218   case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3219   case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3220   case Intrinsic::loongarch_lasx_xvsrlri_w:
3221   case Intrinsic::loongarch_lasx_xvsrari_w:
3222   case Intrinsic::loongarch_lasx_xvslei_bu:
3223   case Intrinsic::loongarch_lasx_xvslei_hu:
3224   case Intrinsic::loongarch_lasx_xvslei_wu:
3225   case Intrinsic::loongarch_lasx_xvslei_du:
3226   case Intrinsic::loongarch_lasx_xvslti_bu:
3227   case Intrinsic::loongarch_lasx_xvslti_hu:
3228   case Intrinsic::loongarch_lasx_xvslti_wu:
3229   case Intrinsic::loongarch_lasx_xvslti_du:
3230   case Intrinsic::loongarch_lasx_xvbsll_v:
3231   case Intrinsic::loongarch_lasx_xvbsrl_v:
3232     return checkIntrinsicImmArg<5>(Op, 2, DAG);
3233   case Intrinsic::loongarch_lsx_vseqi_b:
3234   case Intrinsic::loongarch_lsx_vseqi_h:
3235   case Intrinsic::loongarch_lsx_vseqi_w:
3236   case Intrinsic::loongarch_lsx_vseqi_d:
3237   case Intrinsic::loongarch_lsx_vslei_b:
3238   case Intrinsic::loongarch_lsx_vslei_h:
3239   case Intrinsic::loongarch_lsx_vslei_w:
3240   case Intrinsic::loongarch_lsx_vslei_d:
3241   case Intrinsic::loongarch_lsx_vslti_b:
3242   case Intrinsic::loongarch_lsx_vslti_h:
3243   case Intrinsic::loongarch_lsx_vslti_w:
3244   case Intrinsic::loongarch_lsx_vslti_d:
3245   case Intrinsic::loongarch_lasx_xvseqi_b:
3246   case Intrinsic::loongarch_lasx_xvseqi_h:
3247   case Intrinsic::loongarch_lasx_xvseqi_w:
3248   case Intrinsic::loongarch_lasx_xvseqi_d:
3249   case Intrinsic::loongarch_lasx_xvslei_b:
3250   case Intrinsic::loongarch_lasx_xvslei_h:
3251   case Intrinsic::loongarch_lasx_xvslei_w:
3252   case Intrinsic::loongarch_lasx_xvslei_d:
3253   case Intrinsic::loongarch_lasx_xvslti_b:
3254   case Intrinsic::loongarch_lasx_xvslti_h:
3255   case Intrinsic::loongarch_lasx_xvslti_w:
3256   case Intrinsic::loongarch_lasx_xvslti_d:
3257     return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3258   case Intrinsic::loongarch_lsx_vsrlni_h_w:
3259   case Intrinsic::loongarch_lsx_vsrani_h_w:
3260   case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3261   case Intrinsic::loongarch_lsx_vsrarni_h_w:
3262   case Intrinsic::loongarch_lsx_vssrlni_h_w:
3263   case Intrinsic::loongarch_lsx_vssrani_h_w:
3264   case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3265   case Intrinsic::loongarch_lsx_vssrani_hu_w:
3266   case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3267   case Intrinsic::loongarch_lsx_vssrarni_h_w:
3268   case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3269   case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3270   case Intrinsic::loongarch_lsx_vfrstpi_b:
3271   case Intrinsic::loongarch_lsx_vfrstpi_h:
3272   case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3273   case Intrinsic::loongarch_lasx_xvsrani_h_w:
3274   case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3275   case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3276   case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3277   case Intrinsic::loongarch_lasx_xvssrani_h_w:
3278   case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3279   case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3280   case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3281   case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3282   case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3283   case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3284   case Intrinsic::loongarch_lasx_xvfrstpi_b:
3285   case Intrinsic::loongarch_lasx_xvfrstpi_h:
3286     return checkIntrinsicImmArg<5>(Op, 3, DAG);
3287   case Intrinsic::loongarch_lsx_vsat_d:
3288   case Intrinsic::loongarch_lsx_vsat_du:
3289   case Intrinsic::loongarch_lsx_vrotri_d:
3290   case Intrinsic::loongarch_lsx_vsrlri_d:
3291   case Intrinsic::loongarch_lsx_vsrari_d:
3292   case Intrinsic::loongarch_lasx_xvsat_d:
3293   case Intrinsic::loongarch_lasx_xvsat_du:
3294   case Intrinsic::loongarch_lasx_xvrotri_d:
3295   case Intrinsic::loongarch_lasx_xvsrlri_d:
3296   case Intrinsic::loongarch_lasx_xvsrari_d:
3297     return checkIntrinsicImmArg<6>(Op, 2, DAG);
3298   case Intrinsic::loongarch_lsx_vsrlni_w_d:
3299   case Intrinsic::loongarch_lsx_vsrani_w_d:
3300   case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3301   case Intrinsic::loongarch_lsx_vsrarni_w_d:
3302   case Intrinsic::loongarch_lsx_vssrlni_w_d:
3303   case Intrinsic::loongarch_lsx_vssrani_w_d:
3304   case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3305   case Intrinsic::loongarch_lsx_vssrani_wu_d:
3306   case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3307   case Intrinsic::loongarch_lsx_vssrarni_w_d:
3308   case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3309   case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3310   case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3311   case Intrinsic::loongarch_lasx_xvsrani_w_d:
3312   case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3313   case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3314   case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3315   case Intrinsic::loongarch_lasx_xvssrani_w_d:
3316   case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3317   case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3318   case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3319   case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3320   case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3321   case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3322     return checkIntrinsicImmArg<6>(Op, 3, DAG);
3323   case Intrinsic::loongarch_lsx_vsrlni_d_q:
3324   case Intrinsic::loongarch_lsx_vsrani_d_q:
3325   case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3326   case Intrinsic::loongarch_lsx_vsrarni_d_q:
3327   case Intrinsic::loongarch_lsx_vssrlni_d_q:
3328   case Intrinsic::loongarch_lsx_vssrani_d_q:
3329   case Intrinsic::loongarch_lsx_vssrlni_du_q:
3330   case Intrinsic::loongarch_lsx_vssrani_du_q:
3331   case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3332   case Intrinsic::loongarch_lsx_vssrarni_d_q:
3333   case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3334   case Intrinsic::loongarch_lsx_vssrarni_du_q:
3335   case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3336   case Intrinsic::loongarch_lasx_xvsrani_d_q:
3337   case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3338   case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3339   case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3340   case Intrinsic::loongarch_lasx_xvssrani_d_q:
3341   case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3342   case Intrinsic::loongarch_lasx_xvssrani_du_q:
3343   case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3344   case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3345   case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3346   case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3347     return checkIntrinsicImmArg<7>(Op, 3, DAG);
3348   case Intrinsic::loongarch_lsx_vnori_b:
3349   case Intrinsic::loongarch_lsx_vshuf4i_b:
3350   case Intrinsic::loongarch_lsx_vshuf4i_h:
3351   case Intrinsic::loongarch_lsx_vshuf4i_w:
3352   case Intrinsic::loongarch_lasx_xvnori_b:
3353   case Intrinsic::loongarch_lasx_xvshuf4i_b:
3354   case Intrinsic::loongarch_lasx_xvshuf4i_h:
3355   case Intrinsic::loongarch_lasx_xvshuf4i_w:
3356   case Intrinsic::loongarch_lasx_xvpermi_d:
3357     return checkIntrinsicImmArg<8>(Op, 2, DAG);
3358   case Intrinsic::loongarch_lsx_vshuf4i_d:
3359   case Intrinsic::loongarch_lsx_vpermi_w:
3360   case Intrinsic::loongarch_lsx_vbitseli_b:
3361   case Intrinsic::loongarch_lsx_vextrins_b:
3362   case Intrinsic::loongarch_lsx_vextrins_h:
3363   case Intrinsic::loongarch_lsx_vextrins_w:
3364   case Intrinsic::loongarch_lsx_vextrins_d:
3365   case Intrinsic::loongarch_lasx_xvshuf4i_d:
3366   case Intrinsic::loongarch_lasx_xvpermi_w:
3367   case Intrinsic::loongarch_lasx_xvpermi_q:
3368   case Intrinsic::loongarch_lasx_xvbitseli_b:
3369   case Intrinsic::loongarch_lasx_xvextrins_b:
3370   case Intrinsic::loongarch_lasx_xvextrins_h:
3371   case Intrinsic::loongarch_lasx_xvextrins_w:
3372   case Intrinsic::loongarch_lasx_xvextrins_d:
3373     return checkIntrinsicImmArg<8>(Op, 3, DAG);
3374   case Intrinsic::loongarch_lsx_vrepli_b:
3375   case Intrinsic::loongarch_lsx_vrepli_h:
3376   case Intrinsic::loongarch_lsx_vrepli_w:
3377   case Intrinsic::loongarch_lsx_vrepli_d:
3378   case Intrinsic::loongarch_lasx_xvrepli_b:
3379   case Intrinsic::loongarch_lasx_xvrepli_h:
3380   case Intrinsic::loongarch_lasx_xvrepli_w:
3381   case Intrinsic::loongarch_lasx_xvrepli_d:
3382     return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3383   case Intrinsic::loongarch_lsx_vldi:
3384   case Intrinsic::loongarch_lasx_xvldi:
3385     return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
3386   }
3387 }
3388 
3389 // Helper function that emits error message for intrinsics with chain and return
3390 // merge values of a UNDEF and the chain.
emitIntrinsicWithChainErrorMessage(SDValue Op,StringRef ErrorMsg,SelectionDAG & DAG)3391 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
3392                                                   StringRef ErrorMsg,
3393                                                   SelectionDAG &DAG) {
3394   DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3395   return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
3396                             SDLoc(Op));
3397 }
3398 
3399 SDValue
lowerINTRINSIC_W_CHAIN(SDValue Op,SelectionDAG & DAG) const3400 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3401                                                 SelectionDAG &DAG) const {
3402   SDLoc DL(Op);
3403   MVT GRLenVT = Subtarget.getGRLenVT();
3404   EVT VT = Op.getValueType();
3405   SDValue Chain = Op.getOperand(0);
3406   const StringRef ErrorMsgOOR = "argument out of range";
3407   const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3408   const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3409 
3410   switch (Op.getConstantOperandVal(1)) {
3411   default:
3412     return Op;
3413   case Intrinsic::loongarch_crc_w_b_w:
3414   case Intrinsic::loongarch_crc_w_h_w:
3415   case Intrinsic::loongarch_crc_w_w_w:
3416   case Intrinsic::loongarch_crc_w_d_w:
3417   case Intrinsic::loongarch_crcc_w_b_w:
3418   case Intrinsic::loongarch_crcc_w_h_w:
3419   case Intrinsic::loongarch_crcc_w_w_w:
3420   case Intrinsic::loongarch_crcc_w_d_w:
3421     return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
3422   case Intrinsic::loongarch_csrrd_w:
3423   case Intrinsic::loongarch_csrrd_d: {
3424     unsigned Imm = Op.getConstantOperandVal(2);
3425     return !isUInt<14>(Imm)
3426                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3427                : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3428                              {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3429   }
3430   case Intrinsic::loongarch_csrwr_w:
3431   case Intrinsic::loongarch_csrwr_d: {
3432     unsigned Imm = Op.getConstantOperandVal(3);
3433     return !isUInt<14>(Imm)
3434                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3435                : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3436                              {Chain, Op.getOperand(2),
3437                               DAG.getConstant(Imm, DL, GRLenVT)});
3438   }
3439   case Intrinsic::loongarch_csrxchg_w:
3440   case Intrinsic::loongarch_csrxchg_d: {
3441     unsigned Imm = Op.getConstantOperandVal(4);
3442     return !isUInt<14>(Imm)
3443                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3444                : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3445                              {Chain, Op.getOperand(2), Op.getOperand(3),
3446                               DAG.getConstant(Imm, DL, GRLenVT)});
3447   }
3448   case Intrinsic::loongarch_iocsrrd_d: {
3449     return DAG.getNode(
3450         LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
3451         {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
3452   }
3453 #define IOCSRRD_CASE(NAME, NODE)                                               \
3454   case Intrinsic::loongarch_##NAME: {                                          \
3455     return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other},          \
3456                        {Chain, Op.getOperand(2)});                             \
3457   }
3458     IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3459     IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3460     IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3461 #undef IOCSRRD_CASE
3462   case Intrinsic::loongarch_cpucfg: {
3463     return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3464                        {Chain, Op.getOperand(2)});
3465   }
3466   case Intrinsic::loongarch_lddir_d: {
3467     unsigned Imm = Op.getConstantOperandVal(3);
3468     return !isUInt<8>(Imm)
3469                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3470                : Op;
3471   }
3472   case Intrinsic::loongarch_movfcsr2gr: {
3473     if (!Subtarget.hasBasicF())
3474       return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
3475     unsigned Imm = Op.getConstantOperandVal(2);
3476     return !isUInt<2>(Imm)
3477                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3478                : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
3479                              {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3480   }
3481   case Intrinsic::loongarch_lsx_vld:
3482   case Intrinsic::loongarch_lsx_vldrepl_b:
3483   case Intrinsic::loongarch_lasx_xvld:
3484   case Intrinsic::loongarch_lasx_xvldrepl_b:
3485     return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3486                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3487                : SDValue();
3488   case Intrinsic::loongarch_lsx_vldrepl_h:
3489   case Intrinsic::loongarch_lasx_xvldrepl_h:
3490     return !isShiftedInt<11, 1>(
3491                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3492                ? emitIntrinsicWithChainErrorMessage(
3493                      Op, "argument out of range or not a multiple of 2", DAG)
3494                : SDValue();
3495   case Intrinsic::loongarch_lsx_vldrepl_w:
3496   case Intrinsic::loongarch_lasx_xvldrepl_w:
3497     return !isShiftedInt<10, 2>(
3498                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3499                ? emitIntrinsicWithChainErrorMessage(
3500                      Op, "argument out of range or not a multiple of 4", DAG)
3501                : SDValue();
3502   case Intrinsic::loongarch_lsx_vldrepl_d:
3503   case Intrinsic::loongarch_lasx_xvldrepl_d:
3504     return !isShiftedInt<9, 3>(
3505                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3506                ? emitIntrinsicWithChainErrorMessage(
3507                      Op, "argument out of range or not a multiple of 8", DAG)
3508                : SDValue();
3509   }
3510 }
3511 
3512 // Helper function that emits error message for intrinsics with void return
3513 // value and return the chain.
emitIntrinsicErrorMessage(SDValue Op,StringRef ErrorMsg,SelectionDAG & DAG)3514 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
3515                                          SelectionDAG &DAG) {
3516 
3517   DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3518   return Op.getOperand(0);
3519 }
3520 
lowerINTRINSIC_VOID(SDValue Op,SelectionDAG & DAG) const3521 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
3522                                                      SelectionDAG &DAG) const {
3523   SDLoc DL(Op);
3524   MVT GRLenVT = Subtarget.getGRLenVT();
3525   SDValue Chain = Op.getOperand(0);
3526   uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
3527   SDValue Op2 = Op.getOperand(2);
3528   const StringRef ErrorMsgOOR = "argument out of range";
3529   const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3530   const StringRef ErrorMsgReqLA32 = "requires loongarch32";
3531   const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3532 
3533   switch (IntrinsicEnum) {
3534   default:
3535     // TODO: Add more Intrinsics.
3536     return SDValue();
3537   case Intrinsic::loongarch_cacop_d:
3538   case Intrinsic::loongarch_cacop_w: {
3539     if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
3540       return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
3541     if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
3542       return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
3543     // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
3544     unsigned Imm1 = Op2->getAsZExtVal();
3545     int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
3546     if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
3547       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
3548     return Op;
3549   }
3550   case Intrinsic::loongarch_dbar: {
3551     unsigned Imm = Op2->getAsZExtVal();
3552     return !isUInt<15>(Imm)
3553                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3554                : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
3555                              DAG.getConstant(Imm, DL, GRLenVT));
3556   }
3557   case Intrinsic::loongarch_ibar: {
3558     unsigned Imm = Op2->getAsZExtVal();
3559     return !isUInt<15>(Imm)
3560                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3561                : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
3562                              DAG.getConstant(Imm, DL, GRLenVT));
3563   }
3564   case Intrinsic::loongarch_break: {
3565     unsigned Imm = Op2->getAsZExtVal();
3566     return !isUInt<15>(Imm)
3567                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3568                : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
3569                              DAG.getConstant(Imm, DL, GRLenVT));
3570   }
3571   case Intrinsic::loongarch_movgr2fcsr: {
3572     if (!Subtarget.hasBasicF())
3573       return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
3574     unsigned Imm = Op2->getAsZExtVal();
3575     return !isUInt<2>(Imm)
3576                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3577                : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
3578                              DAG.getConstant(Imm, DL, GRLenVT),
3579                              DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
3580                                          Op.getOperand(3)));
3581   }
3582   case Intrinsic::loongarch_syscall: {
3583     unsigned Imm = Op2->getAsZExtVal();
3584     return !isUInt<15>(Imm)
3585                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3586                : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
3587                              DAG.getConstant(Imm, DL, GRLenVT));
3588   }
3589 #define IOCSRWR_CASE(NAME, NODE)                                               \
3590   case Intrinsic::loongarch_##NAME: {                                          \
3591     SDValue Op3 = Op.getOperand(3);                                            \
3592     return Subtarget.is64Bit()                                                 \
3593                ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain,        \
3594                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),  \
3595                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3))  \
3596                : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2,   \
3597                              Op3);                                             \
3598   }
3599     IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
3600     IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
3601     IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
3602 #undef IOCSRWR_CASE
3603   case Intrinsic::loongarch_iocsrwr_d: {
3604     return !Subtarget.is64Bit()
3605                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3606                : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
3607                              Op2,
3608                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
3609                                          Op.getOperand(3)));
3610   }
3611 #define ASRT_LE_GT_CASE(NAME)                                                  \
3612   case Intrinsic::loongarch_##NAME: {                                          \
3613     return !Subtarget.is64Bit()                                                \
3614                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)           \
3615                : Op;                                                           \
3616   }
3617     ASRT_LE_GT_CASE(asrtle_d)
3618     ASRT_LE_GT_CASE(asrtgt_d)
3619 #undef ASRT_LE_GT_CASE
3620   case Intrinsic::loongarch_ldpte_d: {
3621     unsigned Imm = Op.getConstantOperandVal(3);
3622     return !Subtarget.is64Bit()
3623                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3624            : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3625                              : Op;
3626   }
3627   case Intrinsic::loongarch_lsx_vst:
3628   case Intrinsic::loongarch_lasx_xvst:
3629     return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
3630                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3631                : SDValue();
3632   case Intrinsic::loongarch_lasx_xvstelm_b:
3633     return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3634             !isUInt<5>(Op.getConstantOperandVal(5)))
3635                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3636                : SDValue();
3637   case Intrinsic::loongarch_lsx_vstelm_b:
3638     return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3639             !isUInt<4>(Op.getConstantOperandVal(5)))
3640                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3641                : SDValue();
3642   case Intrinsic::loongarch_lasx_xvstelm_h:
3643     return (!isShiftedInt<8, 1>(
3644                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3645             !isUInt<4>(Op.getConstantOperandVal(5)))
3646                ? emitIntrinsicErrorMessage(
3647                      Op, "argument out of range or not a multiple of 2", DAG)
3648                : SDValue();
3649   case Intrinsic::loongarch_lsx_vstelm_h:
3650     return (!isShiftedInt<8, 1>(
3651                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3652             !isUInt<3>(Op.getConstantOperandVal(5)))
3653                ? emitIntrinsicErrorMessage(
3654                      Op, "argument out of range or not a multiple of 2", DAG)
3655                : SDValue();
3656   case Intrinsic::loongarch_lasx_xvstelm_w:
3657     return (!isShiftedInt<8, 2>(
3658                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3659             !isUInt<3>(Op.getConstantOperandVal(5)))
3660                ? emitIntrinsicErrorMessage(
3661                      Op, "argument out of range or not a multiple of 4", DAG)
3662                : SDValue();
3663   case Intrinsic::loongarch_lsx_vstelm_w:
3664     return (!isShiftedInt<8, 2>(
3665                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3666             !isUInt<2>(Op.getConstantOperandVal(5)))
3667                ? emitIntrinsicErrorMessage(
3668                      Op, "argument out of range or not a multiple of 4", DAG)
3669                : SDValue();
3670   case Intrinsic::loongarch_lasx_xvstelm_d:
3671     return (!isShiftedInt<8, 3>(
3672                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3673             !isUInt<2>(Op.getConstantOperandVal(5)))
3674                ? emitIntrinsicErrorMessage(
3675                      Op, "argument out of range or not a multiple of 8", DAG)
3676                : SDValue();
3677   case Intrinsic::loongarch_lsx_vstelm_d:
3678     return (!isShiftedInt<8, 3>(
3679                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3680             !isUInt<1>(Op.getConstantOperandVal(5)))
3681                ? emitIntrinsicErrorMessage(
3682                      Op, "argument out of range or not a multiple of 8", DAG)
3683                : SDValue();
3684   }
3685 }
3686 
lowerShiftLeftParts(SDValue Op,SelectionDAG & DAG) const3687 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
3688                                                      SelectionDAG &DAG) const {
3689   SDLoc DL(Op);
3690   SDValue Lo = Op.getOperand(0);
3691   SDValue Hi = Op.getOperand(1);
3692   SDValue Shamt = Op.getOperand(2);
3693   EVT VT = Lo.getValueType();
3694 
3695   // if Shamt-GRLen < 0: // Shamt < GRLen
3696   //   Lo = Lo << Shamt
3697   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
3698   // else:
3699   //   Lo = 0
3700   //   Hi = Lo << (Shamt-GRLen)
3701 
3702   SDValue Zero = DAG.getConstant(0, DL, VT);
3703   SDValue One = DAG.getConstant(1, DL, VT);
3704   SDValue MinusGRLen =
3705       DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
3706   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
3707   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
3708   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
3709 
3710   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3711   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3712   SDValue ShiftRightLo =
3713       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
3714   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3715   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3716   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
3717 
3718   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
3719 
3720   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
3721   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3722 
3723   SDValue Parts[2] = {Lo, Hi};
3724   return DAG.getMergeValues(Parts, DL);
3725 }
3726 
lowerShiftRightParts(SDValue Op,SelectionDAG & DAG,bool IsSRA) const3727 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
3728                                                       SelectionDAG &DAG,
3729                                                       bool IsSRA) const {
3730   SDLoc DL(Op);
3731   SDValue Lo = Op.getOperand(0);
3732   SDValue Hi = Op.getOperand(1);
3733   SDValue Shamt = Op.getOperand(2);
3734   EVT VT = Lo.getValueType();
3735 
3736   // SRA expansion:
3737   //   if Shamt-GRLen < 0: // Shamt < GRLen
3738   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
3739   //     Hi = Hi >>s Shamt
3740   //   else:
3741   //     Lo = Hi >>s (Shamt-GRLen);
3742   //     Hi = Hi >>s (GRLen-1)
3743   //
3744   // SRL expansion:
3745   //   if Shamt-GRLen < 0: // Shamt < GRLen
3746   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
3747   //     Hi = Hi >>u Shamt
3748   //   else:
3749   //     Lo = Hi >>u (Shamt-GRLen);
3750   //     Hi = 0;
3751 
3752   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3753 
3754   SDValue Zero = DAG.getConstant(0, DL, VT);
3755   SDValue One = DAG.getConstant(1, DL, VT);
3756   SDValue MinusGRLen =
3757       DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
3758   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
3759   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
3760   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
3761 
3762   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
3763   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
3764   SDValue ShiftLeftHi =
3765       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
3766   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
3767   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
3768   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
3769   SDValue HiFalse =
3770       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
3771 
3772   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
3773 
3774   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
3775   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3776 
3777   SDValue Parts[2] = {Lo, Hi};
3778   return DAG.getMergeValues(Parts, DL);
3779 }
3780 
3781 // Returns the opcode of the target-specific SDNode that implements the 32-bit
3782 // form of the given Opcode.
getLoongArchWOpcode(unsigned Opcode)3783 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
3784   switch (Opcode) {
3785   default:
3786     llvm_unreachable("Unexpected opcode");
3787   case ISD::SDIV:
3788     return LoongArchISD::DIV_W;
3789   case ISD::UDIV:
3790     return LoongArchISD::DIV_WU;
3791   case ISD::SREM:
3792     return LoongArchISD::MOD_W;
3793   case ISD::UREM:
3794     return LoongArchISD::MOD_WU;
3795   case ISD::SHL:
3796     return LoongArchISD::SLL_W;
3797   case ISD::SRA:
3798     return LoongArchISD::SRA_W;
3799   case ISD::SRL:
3800     return LoongArchISD::SRL_W;
3801   case ISD::ROTL:
3802   case ISD::ROTR:
3803     return LoongArchISD::ROTR_W;
3804   case ISD::CTTZ:
3805     return LoongArchISD::CTZ_W;
3806   case ISD::CTLZ:
3807     return LoongArchISD::CLZ_W;
3808   }
3809 }
3810 
3811 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
3812 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
3813 // otherwise be promoted to i64, making it difficult to select the
3814 // SLL_W/.../*W later one because the fact the operation was originally of
3815 // type i8/i16/i32 is lost.
customLegalizeToWOp(SDNode * N,SelectionDAG & DAG,int NumOp,unsigned ExtOpc=ISD::ANY_EXTEND)3816 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
3817                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
3818   SDLoc DL(N);
3819   LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
3820   SDValue NewOp0, NewRes;
3821 
3822   switch (NumOp) {
3823   default:
3824     llvm_unreachable("Unexpected NumOp");
3825   case 1: {
3826     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
3827     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
3828     break;
3829   }
3830   case 2: {
3831     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
3832     SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
3833     if (N->getOpcode() == ISD::ROTL) {
3834       SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
3835       NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
3836     }
3837     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
3838     break;
3839   }
3840     // TODO:Handle more NumOp.
3841   }
3842 
3843   // ReplaceNodeResults requires we maintain the same type for the return
3844   // value.
3845   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
3846 }
3847 
3848 // Converts the given 32-bit operation to a i64 operation with signed extension
3849 // semantic to reduce the signed extension instructions.
customLegalizeToWOpWithSExt(SDNode * N,SelectionDAG & DAG)3850 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
3851   SDLoc DL(N);
3852   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3853   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
3854   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
3855   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
3856                                DAG.getValueType(MVT::i32));
3857   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
3858 }
3859 
3860 // Helper function that emits error message for intrinsics with/without chain
3861 // and return a UNDEF or and the chain as the results.
emitErrorAndReplaceIntrinsicResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,StringRef ErrorMsg,bool WithChain=true)3862 static void emitErrorAndReplaceIntrinsicResults(
3863     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
3864     StringRef ErrorMsg, bool WithChain = true) {
3865   DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
3866   Results.push_back(DAG.getUNDEF(N->getValueType(0)));
3867   if (!WithChain)
3868     return;
3869   Results.push_back(N->getOperand(0));
3870 }
3871 
3872 template <unsigned N>
3873 static void
replaceVPICKVE2GRResults(SDNode * Node,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget,unsigned ResOp)3874 replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
3875                          SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
3876                          unsigned ResOp) {
3877   const StringRef ErrorMsgOOR = "argument out of range";
3878   unsigned Imm = Node->getConstantOperandVal(2);
3879   if (!isUInt<N>(Imm)) {
3880     emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,
3881                                         /*WithChain=*/false);
3882     return;
3883   }
3884   SDLoc DL(Node);
3885   SDValue Vec = Node->getOperand(1);
3886 
3887   SDValue PickElt =
3888       DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
3889                   DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
3890                   DAG.getValueType(Vec.getValueType().getVectorElementType()));
3891   Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
3892                                 PickElt.getValue(0)));
3893 }
3894 
replaceVecCondBranchResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget,unsigned ResOp)3895 static void replaceVecCondBranchResults(SDNode *N,
3896                                         SmallVectorImpl<SDValue> &Results,
3897                                         SelectionDAG &DAG,
3898                                         const LoongArchSubtarget &Subtarget,
3899                                         unsigned ResOp) {
3900   SDLoc DL(N);
3901   SDValue Vec = N->getOperand(1);
3902 
3903   SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
3904   Results.push_back(
3905       DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
3906 }
3907 
3908 static void
replaceINTRINSIC_WO_CHAINResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget)3909 replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
3910                                  SelectionDAG &DAG,
3911                                  const LoongArchSubtarget &Subtarget) {
3912   switch (N->getConstantOperandVal(0)) {
3913   default:
3914     llvm_unreachable("Unexpected Intrinsic.");
3915   case Intrinsic::loongarch_lsx_vpickve2gr_b:
3916     replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
3917                                 LoongArchISD::VPICK_SEXT_ELT);
3918     break;
3919   case Intrinsic::loongarch_lsx_vpickve2gr_h:
3920   case Intrinsic::loongarch_lasx_xvpickve2gr_w:
3921     replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
3922                                 LoongArchISD::VPICK_SEXT_ELT);
3923     break;
3924   case Intrinsic::loongarch_lsx_vpickve2gr_w:
3925     replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
3926                                 LoongArchISD::VPICK_SEXT_ELT);
3927     break;
3928   case Intrinsic::loongarch_lsx_vpickve2gr_bu:
3929     replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
3930                                 LoongArchISD::VPICK_ZEXT_ELT);
3931     break;
3932   case Intrinsic::loongarch_lsx_vpickve2gr_hu:
3933   case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
3934     replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
3935                                 LoongArchISD::VPICK_ZEXT_ELT);
3936     break;
3937   case Intrinsic::loongarch_lsx_vpickve2gr_wu:
3938     replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
3939                                 LoongArchISD::VPICK_ZEXT_ELT);
3940     break;
3941   case Intrinsic::loongarch_lsx_bz_b:
3942   case Intrinsic::loongarch_lsx_bz_h:
3943   case Intrinsic::loongarch_lsx_bz_w:
3944   case Intrinsic::loongarch_lsx_bz_d:
3945   case Intrinsic::loongarch_lasx_xbz_b:
3946   case Intrinsic::loongarch_lasx_xbz_h:
3947   case Intrinsic::loongarch_lasx_xbz_w:
3948   case Intrinsic::loongarch_lasx_xbz_d:
3949     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3950                                 LoongArchISD::VALL_ZERO);
3951     break;
3952   case Intrinsic::loongarch_lsx_bz_v:
3953   case Intrinsic::loongarch_lasx_xbz_v:
3954     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3955                                 LoongArchISD::VANY_ZERO);
3956     break;
3957   case Intrinsic::loongarch_lsx_bnz_b:
3958   case Intrinsic::loongarch_lsx_bnz_h:
3959   case Intrinsic::loongarch_lsx_bnz_w:
3960   case Intrinsic::loongarch_lsx_bnz_d:
3961   case Intrinsic::loongarch_lasx_xbnz_b:
3962   case Intrinsic::loongarch_lasx_xbnz_h:
3963   case Intrinsic::loongarch_lasx_xbnz_w:
3964   case Intrinsic::loongarch_lasx_xbnz_d:
3965     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3966                                 LoongArchISD::VALL_NONZERO);
3967     break;
3968   case Intrinsic::loongarch_lsx_bnz_v:
3969   case Intrinsic::loongarch_lasx_xbnz_v:
3970     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3971                                 LoongArchISD::VANY_NONZERO);
3972     break;
3973   }
3974 }
3975 
replaceCMP_XCHG_128Results(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG)3976 static void replaceCMP_XCHG_128Results(SDNode *N,
3977                                        SmallVectorImpl<SDValue> &Results,
3978                                        SelectionDAG &DAG) {
3979   assert(N->getValueType(0) == MVT::i128 &&
3980          "AtomicCmpSwap on types less than 128 should be legal");
3981   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3982 
3983   unsigned Opcode;
3984   switch (MemOp->getMergedOrdering()) {
3985   case AtomicOrdering::Acquire:
3986   case AtomicOrdering::AcquireRelease:
3987   case AtomicOrdering::SequentiallyConsistent:
3988     Opcode = LoongArch::PseudoCmpXchg128Acquire;
3989     break;
3990   case AtomicOrdering::Monotonic:
3991   case AtomicOrdering::Release:
3992     Opcode = LoongArch::PseudoCmpXchg128;
3993     break;
3994   default:
3995     llvm_unreachable("Unexpected ordering!");
3996   }
3997 
3998   SDLoc DL(N);
3999   auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4000   auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4001   SDValue Ops[] = {N->getOperand(1), CmpVal.first,  CmpVal.second,
4002                    NewVal.first,     NewVal.second, N->getOperand(0)};
4003 
4004   SDNode *CmpSwap = DAG.getMachineNode(
4005       Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4006       Ops);
4007   DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4008   Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4009                                 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4010   Results.push_back(SDValue(CmpSwap, 3));
4011 }
4012 
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const4013 void LoongArchTargetLowering::ReplaceNodeResults(
4014     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
4015   SDLoc DL(N);
4016   EVT VT = N->getValueType(0);
4017   switch (N->getOpcode()) {
4018   default:
4019     llvm_unreachable("Don't know how to legalize this operation");
4020   case ISD::ADD:
4021   case ISD::SUB:
4022     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4023            "Unexpected custom legalisation");
4024     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4025     break;
4026   case ISD::SDIV:
4027   case ISD::UDIV:
4028   case ISD::SREM:
4029   case ISD::UREM:
4030     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4031            "Unexpected custom legalisation");
4032     Results.push_back(customLegalizeToWOp(N, DAG, 2,
4033                                           Subtarget.hasDiv32() && VT == MVT::i32
4034                                               ? ISD::ANY_EXTEND
4035                                               : ISD::SIGN_EXTEND));
4036     break;
4037   case ISD::SHL:
4038   case ISD::SRA:
4039   case ISD::SRL:
4040     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4041            "Unexpected custom legalisation");
4042     if (N->getOperand(1).getOpcode() != ISD::Constant) {
4043       Results.push_back(customLegalizeToWOp(N, DAG, 2));
4044       break;
4045     }
4046     break;
4047   case ISD::ROTL:
4048   case ISD::ROTR:
4049     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4050            "Unexpected custom legalisation");
4051     Results.push_back(customLegalizeToWOp(N, DAG, 2));
4052     break;
4053   case ISD::FP_TO_SINT: {
4054     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4055            "Unexpected custom legalisation");
4056     SDValue Src = N->getOperand(0);
4057     EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4058     if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4059         TargetLowering::TypeSoftenFloat) {
4060       if (!isTypeLegal(Src.getValueType()))
4061         return;
4062       if (Src.getValueType() == MVT::f16)
4063         Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4064       SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4065       Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4066       return;
4067     }
4068     // If the FP type needs to be softened, emit a library call using the 'si'
4069     // version. If we left it to default legalization we'd end up with 'di'.
4070     RTLIB::Libcall LC;
4071     LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4072     MakeLibCallOptions CallOptions;
4073     EVT OpVT = Src.getValueType();
4074     CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
4075     SDValue Chain = SDValue();
4076     SDValue Result;
4077     std::tie(Result, Chain) =
4078         makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4079     Results.push_back(Result);
4080     break;
4081   }
4082   case ISD::BITCAST: {
4083     SDValue Src = N->getOperand(0);
4084     EVT SrcVT = Src.getValueType();
4085     if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4086         Subtarget.hasBasicF()) {
4087       SDValue Dst =
4088           DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4089       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4090     } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4091       SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
4092                                    DAG.getVTList(MVT::i32, MVT::i32), Src);
4093       SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4094                                    NewReg.getValue(0), NewReg.getValue(1));
4095       Results.push_back(RetReg);
4096     }
4097     break;
4098   }
4099   case ISD::FP_TO_UINT: {
4100     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4101            "Unexpected custom legalisation");
4102     auto &TLI = DAG.getTargetLoweringInfo();
4103     SDValue Tmp1, Tmp2;
4104     TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4105     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4106     break;
4107   }
4108   case ISD::BSWAP: {
4109     SDValue Src = N->getOperand(0);
4110     assert((VT == MVT::i16 || VT == MVT::i32) &&
4111            "Unexpected custom legalization");
4112     MVT GRLenVT = Subtarget.getGRLenVT();
4113     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4114     SDValue Tmp;
4115     switch (VT.getSizeInBits()) {
4116     default:
4117       llvm_unreachable("Unexpected operand width");
4118     case 16:
4119       Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4120       break;
4121     case 32:
4122       // Only LA64 will get to here due to the size mismatch between VT and
4123       // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4124       Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4125       break;
4126     }
4127     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4128     break;
4129   }
4130   case ISD::BITREVERSE: {
4131     SDValue Src = N->getOperand(0);
4132     assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4133            "Unexpected custom legalization");
4134     MVT GRLenVT = Subtarget.getGRLenVT();
4135     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4136     SDValue Tmp;
4137     switch (VT.getSizeInBits()) {
4138     default:
4139       llvm_unreachable("Unexpected operand width");
4140     case 8:
4141       Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4142       break;
4143     case 32:
4144       Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4145       break;
4146     }
4147     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4148     break;
4149   }
4150   case ISD::CTLZ:
4151   case ISD::CTTZ: {
4152     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4153            "Unexpected custom legalisation");
4154     Results.push_back(customLegalizeToWOp(N, DAG, 1));
4155     break;
4156   }
4157   case ISD::INTRINSIC_W_CHAIN: {
4158     SDValue Chain = N->getOperand(0);
4159     SDValue Op2 = N->getOperand(2);
4160     MVT GRLenVT = Subtarget.getGRLenVT();
4161     const StringRef ErrorMsgOOR = "argument out of range";
4162     const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4163     const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4164 
4165     switch (N->getConstantOperandVal(1)) {
4166     default:
4167       llvm_unreachable("Unexpected Intrinsic.");
4168     case Intrinsic::loongarch_movfcsr2gr: {
4169       if (!Subtarget.hasBasicF()) {
4170         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4171         return;
4172       }
4173       unsigned Imm = Op2->getAsZExtVal();
4174       if (!isUInt<2>(Imm)) {
4175         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4176         return;
4177       }
4178       SDValue MOVFCSR2GRResults = DAG.getNode(
4179           LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4180           {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4181       Results.push_back(
4182           DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4183       Results.push_back(MOVFCSR2GRResults.getValue(1));
4184       break;
4185     }
4186 #define CRC_CASE_EXT_BINARYOP(NAME, NODE)                                      \
4187   case Intrinsic::loongarch_##NAME: {                                          \
4188     SDValue NODE = DAG.getNode(                                                \
4189         LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
4190         {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),               \
4191          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
4192     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
4193     Results.push_back(NODE.getValue(1));                                       \
4194     break;                                                                     \
4195   }
4196       CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4197       CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4198       CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4199       CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4200       CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4201       CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4202 #undef CRC_CASE_EXT_BINARYOP
4203 
4204 #define CRC_CASE_EXT_UNARYOP(NAME, NODE)                                       \
4205   case Intrinsic::loongarch_##NAME: {                                          \
4206     SDValue NODE = DAG.getNode(                                                \
4207         LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
4208         {Chain, Op2,                                                           \
4209          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
4210     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
4211     Results.push_back(NODE.getValue(1));                                       \
4212     break;                                                                     \
4213   }
4214       CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4215       CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4216 #undef CRC_CASE_EXT_UNARYOP
4217 #define CSR_CASE(ID)                                                           \
4218   case Intrinsic::loongarch_##ID: {                                            \
4219     if (!Subtarget.is64Bit())                                                  \
4220       emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);   \
4221     break;                                                                     \
4222   }
4223       CSR_CASE(csrrd_d);
4224       CSR_CASE(csrwr_d);
4225       CSR_CASE(csrxchg_d);
4226       CSR_CASE(iocsrrd_d);
4227 #undef CSR_CASE
4228     case Intrinsic::loongarch_csrrd_w: {
4229       unsigned Imm = Op2->getAsZExtVal();
4230       if (!isUInt<14>(Imm)) {
4231         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4232         return;
4233       }
4234       SDValue CSRRDResults =
4235           DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4236                       {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4237       Results.push_back(
4238           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4239       Results.push_back(CSRRDResults.getValue(1));
4240       break;
4241     }
4242     case Intrinsic::loongarch_csrwr_w: {
4243       unsigned Imm = N->getConstantOperandVal(3);
4244       if (!isUInt<14>(Imm)) {
4245         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4246         return;
4247       }
4248       SDValue CSRWRResults =
4249           DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4250                       {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4251                        DAG.getConstant(Imm, DL, GRLenVT)});
4252       Results.push_back(
4253           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4254       Results.push_back(CSRWRResults.getValue(1));
4255       break;
4256     }
4257     case Intrinsic::loongarch_csrxchg_w: {
4258       unsigned Imm = N->getConstantOperandVal(4);
4259       if (!isUInt<14>(Imm)) {
4260         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4261         return;
4262       }
4263       SDValue CSRXCHGResults = DAG.getNode(
4264           LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4265           {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4266            DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4267            DAG.getConstant(Imm, DL, GRLenVT)});
4268       Results.push_back(
4269           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4270       Results.push_back(CSRXCHGResults.getValue(1));
4271       break;
4272     }
4273 #define IOCSRRD_CASE(NAME, NODE)                                               \
4274   case Intrinsic::loongarch_##NAME: {                                          \
4275     SDValue IOCSRRDResults =                                                   \
4276         DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},            \
4277                     {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4278     Results.push_back(                                                         \
4279         DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0)));       \
4280     Results.push_back(IOCSRRDResults.getValue(1));                             \
4281     break;                                                                     \
4282   }
4283       IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4284       IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4285       IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4286 #undef IOCSRRD_CASE
4287     case Intrinsic::loongarch_cpucfg: {
4288       SDValue CPUCFGResults =
4289           DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4290                       {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4291       Results.push_back(
4292           DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4293       Results.push_back(CPUCFGResults.getValue(1));
4294       break;
4295     }
4296     case Intrinsic::loongarch_lddir_d: {
4297       if (!Subtarget.is64Bit()) {
4298         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4299         return;
4300       }
4301       break;
4302     }
4303     }
4304     break;
4305   }
4306   case ISD::READ_REGISTER: {
4307     if (Subtarget.is64Bit())
4308       DAG.getContext()->emitError(
4309           "On LA64, only 64-bit registers can be read.");
4310     else
4311       DAG.getContext()->emitError(
4312           "On LA32, only 32-bit registers can be read.");
4313     Results.push_back(DAG.getUNDEF(VT));
4314     Results.push_back(N->getOperand(0));
4315     break;
4316   }
4317   case ISD::INTRINSIC_WO_CHAIN: {
4318     replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4319     break;
4320   }
4321   case ISD::LROUND: {
4322     SDValue Op0 = N->getOperand(0);
4323     EVT OpVT = Op0.getValueType();
4324     RTLIB::Libcall LC =
4325         OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4326     MakeLibCallOptions CallOptions;
4327     CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
4328     SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4329     Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4330     Results.push_back(Result);
4331     break;
4332   }
4333   case ISD::ATOMIC_CMP_SWAP: {
4334     replaceCMP_XCHG_128Results(N, Results, DAG);
4335     break;
4336   }
4337   case ISD::TRUNCATE: {
4338     MVT VT = N->getSimpleValueType(0);
4339     if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4340       return;
4341 
4342     MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4343     SDValue In = N->getOperand(0);
4344     EVT InVT = In.getValueType();
4345     EVT InEltVT = InVT.getVectorElementType();
4346     EVT EltVT = VT.getVectorElementType();
4347     unsigned MinElts = VT.getVectorNumElements();
4348     unsigned WidenNumElts = WidenVT.getVectorNumElements();
4349     unsigned InBits = InVT.getSizeInBits();
4350 
4351     if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4352       if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4353         int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4354         SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4355         for (unsigned I = 0; I < MinElts; ++I)
4356           TruncMask[I] = Scale * I;
4357 
4358         unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4359         MVT SVT = In.getSimpleValueType().getScalarType();
4360         MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4361         SDValue WidenIn =
4362             DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4363                         DAG.getVectorIdxConstant(0, DL));
4364         assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4365                "Illegal vector type in truncation");
4366         WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4367         Results.push_back(
4368             DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4369         return;
4370       }
4371     }
4372 
4373     break;
4374   }
4375   }
4376 }
4377 
performANDCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)4378 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
4379                                  TargetLowering::DAGCombinerInfo &DCI,
4380                                  const LoongArchSubtarget &Subtarget) {
4381   if (DCI.isBeforeLegalizeOps())
4382     return SDValue();
4383 
4384   SDValue FirstOperand = N->getOperand(0);
4385   SDValue SecondOperand = N->getOperand(1);
4386   unsigned FirstOperandOpc = FirstOperand.getOpcode();
4387   EVT ValTy = N->getValueType(0);
4388   SDLoc DL(N);
4389   uint64_t lsb, msb;
4390   unsigned SMIdx, SMLen;
4391   ConstantSDNode *CN;
4392   SDValue NewOperand;
4393   MVT GRLenVT = Subtarget.getGRLenVT();
4394 
4395   // BSTRPICK requires the 32S feature.
4396   if (!Subtarget.has32S())
4397     return SDValue();
4398 
4399   // Op's second operand must be a shifted mask.
4400   if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
4401       !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
4402     return SDValue();
4403 
4404   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4405     // Pattern match BSTRPICK.
4406     //  $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4407     //  => BSTRPICK $dst, $src, msb, lsb
4408     //  where msb = lsb + len - 1
4409 
4410     // The second operand of the shift must be an immediate.
4411     if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
4412       return SDValue();
4413 
4414     lsb = CN->getZExtValue();
4415 
4416     // Return if the shifted mask does not start at bit 0 or the sum of its
4417     // length and lsb exceeds the word's size.
4418     if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4419       return SDValue();
4420 
4421     NewOperand = FirstOperand.getOperand(0);
4422   } else {
4423     // Pattern match BSTRPICK.
4424     //  $dst = and $src, (2**len- 1) , if len > 12
4425     //  => BSTRPICK $dst, $src, msb, lsb
4426     //  where lsb = 0 and msb = len - 1
4427 
4428     // If the mask is <= 0xfff, andi can be used instead.
4429     if (CN->getZExtValue() <= 0xfff)
4430       return SDValue();
4431 
4432     // Return if the MSB exceeds.
4433     if (SMIdx + SMLen > ValTy.getSizeInBits())
4434       return SDValue();
4435 
4436     if (SMIdx > 0) {
4437       // Omit if the constant has more than 2 uses. This a conservative
4438       // decision. Whether it is a win depends on the HW microarchitecture.
4439       // However it should always be better for 1 and 2 uses.
4440       if (CN->use_size() > 2)
4441         return SDValue();
4442       // Return if the constant can be composed by a single LU12I.W.
4443       if ((CN->getZExtValue() & 0xfff) == 0)
4444         return SDValue();
4445       // Return if the constand can be composed by a single ADDI with
4446       // the zero register.
4447       if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
4448         return SDValue();
4449     }
4450 
4451     lsb = SMIdx;
4452     NewOperand = FirstOperand;
4453   }
4454 
4455   msb = lsb + SMLen - 1;
4456   SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
4457                             DAG.getConstant(msb, DL, GRLenVT),
4458                             DAG.getConstant(lsb, DL, GRLenVT));
4459   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
4460     return NR0;
4461   // Try to optimize to
4462   //   bstrpick $Rd, $Rs, msb, lsb
4463   //   slli     $Rd, $Rd, lsb
4464   return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
4465                      DAG.getConstant(lsb, DL, GRLenVT));
4466 }
4467 
performSRLCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)4468 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
4469                                  TargetLowering::DAGCombinerInfo &DCI,
4470                                  const LoongArchSubtarget &Subtarget) {
4471   // BSTRPICK requires the 32S feature.
4472   if (!Subtarget.has32S())
4473     return SDValue();
4474 
4475   if (DCI.isBeforeLegalizeOps())
4476     return SDValue();
4477 
4478   // $dst = srl (and $src, Mask), Shamt
4479   // =>
4480   // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
4481   // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
4482   //
4483 
4484   SDValue FirstOperand = N->getOperand(0);
4485   ConstantSDNode *CN;
4486   EVT ValTy = N->getValueType(0);
4487   SDLoc DL(N);
4488   MVT GRLenVT = Subtarget.getGRLenVT();
4489   unsigned MaskIdx, MaskLen;
4490   uint64_t Shamt;
4491 
4492   // The first operand must be an AND and the second operand of the AND must be
4493   // a shifted mask.
4494   if (FirstOperand.getOpcode() != ISD::AND ||
4495       !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
4496       !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
4497     return SDValue();
4498 
4499   // The second operand (shift amount) must be an immediate.
4500   if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
4501     return SDValue();
4502 
4503   Shamt = CN->getZExtValue();
4504   if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
4505     return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
4506                        FirstOperand->getOperand(0),
4507                        DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4508                        DAG.getConstant(Shamt, DL, GRLenVT));
4509 
4510   return SDValue();
4511 }
4512 
4513 // Helper to peek through bitops/trunc/setcc to determine size of source vector.
4514 // Allows BITCASTCombine to determine what size vector generated a <X x i1>.
checkBitcastSrcVectorSize(SDValue Src,unsigned Size,unsigned Depth)4515 static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
4516                                       unsigned Depth) {
4517   // Limit recursion.
4518   if (Depth >= SelectionDAG::MaxRecursionDepth)
4519     return false;
4520   switch (Src.getOpcode()) {
4521   case ISD::SETCC:
4522   case ISD::TRUNCATE:
4523     return Src.getOperand(0).getValueSizeInBits() == Size;
4524   case ISD::FREEZE:
4525     return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
4526   case ISD::AND:
4527   case ISD::XOR:
4528   case ISD::OR:
4529     return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
4530            checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
4531   case ISD::SELECT:
4532   case ISD::VSELECT:
4533     return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
4534            checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
4535            checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
4536   case ISD::BUILD_VECTOR:
4537     return ISD::isBuildVectorAllZeros(Src.getNode()) ||
4538            ISD::isBuildVectorAllOnes(Src.getNode());
4539   }
4540   return false;
4541 }
4542 
4543 // Helper to push sign extension of vXi1 SETCC result through bitops.
signExtendBitcastSrcVector(SelectionDAG & DAG,EVT SExtVT,SDValue Src,const SDLoc & DL)4544 static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
4545                                           SDValue Src, const SDLoc &DL) {
4546   switch (Src.getOpcode()) {
4547   case ISD::SETCC:
4548   case ISD::FREEZE:
4549   case ISD::TRUNCATE:
4550   case ISD::BUILD_VECTOR:
4551     return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4552   case ISD::AND:
4553   case ISD::XOR:
4554   case ISD::OR:
4555     return DAG.getNode(
4556         Src.getOpcode(), DL, SExtVT,
4557         signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
4558         signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
4559   case ISD::SELECT:
4560   case ISD::VSELECT:
4561     return DAG.getSelect(
4562         DL, SExtVT, Src.getOperand(0),
4563         signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
4564         signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
4565   }
4566   llvm_unreachable("Unexpected node type for vXi1 sign extension");
4567 }
4568 
4569 static SDValue
performSETCC_BITCASTCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)4570 performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG,
4571                             TargetLowering::DAGCombinerInfo &DCI,
4572                             const LoongArchSubtarget &Subtarget) {
4573   SDLoc DL(N);
4574   EVT VT = N->getValueType(0);
4575   SDValue Src = N->getOperand(0);
4576   EVT SrcVT = Src.getValueType();
4577 
4578   if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
4579     return SDValue();
4580 
4581   bool UseLASX;
4582   unsigned Opc = ISD::DELETED_NODE;
4583   EVT CmpVT = Src.getOperand(0).getValueType();
4584   EVT EltVT = CmpVT.getVectorElementType();
4585 
4586   if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
4587     UseLASX = false;
4588   else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
4589            CmpVT.getSizeInBits() == 256)
4590     UseLASX = true;
4591   else
4592     return SDValue();
4593 
4594   SDValue SrcN1 = Src.getOperand(1);
4595   switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
4596   default:
4597     break;
4598   case ISD::SETEQ:
4599     // x == 0 => not (vmsknez.b x)
4600     if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4601       Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
4602     break;
4603   case ISD::SETGT:
4604     // x > -1 => vmskgez.b x
4605     if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
4606       Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
4607     break;
4608   case ISD::SETGE:
4609     // x >= 0 => vmskgez.b x
4610     if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4611       Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
4612     break;
4613   case ISD::SETLT:
4614     // x < 0 => vmskltz.{b,h,w,d} x
4615     if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
4616         (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4617          EltVT == MVT::i64))
4618       Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4619     break;
4620   case ISD::SETLE:
4621     // x <= -1 => vmskltz.{b,h,w,d} x
4622     if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
4623         (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4624          EltVT == MVT::i64))
4625       Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4626     break;
4627   case ISD::SETNE:
4628     // x != 0 => vmsknez.b x
4629     if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4630       Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
4631     break;
4632   }
4633 
4634   if (Opc == ISD::DELETED_NODE)
4635     return SDValue();
4636 
4637   SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
4638   EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
4639   V = DAG.getZExtOrTrunc(V, DL, T);
4640   return DAG.getBitcast(VT, V);
4641 }
4642 
performBITCASTCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)4643 static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
4644                                      TargetLowering::DAGCombinerInfo &DCI,
4645                                      const LoongArchSubtarget &Subtarget) {
4646   SDLoc DL(N);
4647   EVT VT = N->getValueType(0);
4648   SDValue Src = N->getOperand(0);
4649   EVT SrcVT = Src.getValueType();
4650 
4651   if (!DCI.isBeforeLegalizeOps())
4652     return SDValue();
4653 
4654   if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
4655     return SDValue();
4656 
4657   // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
4658   SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
4659   if (Res)
4660     return Res;
4661 
4662   // Generate vXi1 using [X]VMSKLTZ
4663   MVT SExtVT;
4664   unsigned Opc;
4665   bool UseLASX = false;
4666   bool PropagateSExt = false;
4667 
4668   if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
4669     EVT CmpVT = Src.getOperand(0).getValueType();
4670     if (CmpVT.getSizeInBits() > 256)
4671       return SDValue();
4672   }
4673 
4674   switch (SrcVT.getSimpleVT().SimpleTy) {
4675   default:
4676     return SDValue();
4677   case MVT::v2i1:
4678     SExtVT = MVT::v2i64;
4679     break;
4680   case MVT::v4i1:
4681     SExtVT = MVT::v4i32;
4682     if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4683       SExtVT = MVT::v4i64;
4684       UseLASX = true;
4685       PropagateSExt = true;
4686     }
4687     break;
4688   case MVT::v8i1:
4689     SExtVT = MVT::v8i16;
4690     if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4691       SExtVT = MVT::v8i32;
4692       UseLASX = true;
4693       PropagateSExt = true;
4694     }
4695     break;
4696   case MVT::v16i1:
4697     SExtVT = MVT::v16i8;
4698     if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4699       SExtVT = MVT::v16i16;
4700       UseLASX = true;
4701       PropagateSExt = true;
4702     }
4703     break;
4704   case MVT::v32i1:
4705     SExtVT = MVT::v32i8;
4706     UseLASX = true;
4707     break;
4708   };
4709   if (UseLASX && !(Subtarget.has32S() && Subtarget.hasExtLASX()))
4710     return SDValue();
4711   Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
4712                       : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4713   Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4714 
4715   SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src);
4716   EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
4717   V = DAG.getZExtOrTrunc(V, DL, T);
4718   return DAG.getBitcast(VT, V);
4719 }
4720 
performORCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)4721 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
4722                                 TargetLowering::DAGCombinerInfo &DCI,
4723                                 const LoongArchSubtarget &Subtarget) {
4724   MVT GRLenVT = Subtarget.getGRLenVT();
4725   EVT ValTy = N->getValueType(0);
4726   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4727   ConstantSDNode *CN0, *CN1;
4728   SDLoc DL(N);
4729   unsigned ValBits = ValTy.getSizeInBits();
4730   unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
4731   unsigned Shamt;
4732   bool SwapAndRetried = false;
4733 
4734   // BSTRPICK requires the 32S feature.
4735   if (!Subtarget.has32S())
4736     return SDValue();
4737 
4738   if (DCI.isBeforeLegalizeOps())
4739     return SDValue();
4740 
4741   if (ValBits != 32 && ValBits != 64)
4742     return SDValue();
4743 
4744 Retry:
4745   // 1st pattern to match BSTRINS:
4746   //  R = or (and X, mask0), (and (shl Y, lsb), mask1)
4747   //  where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
4748   //  =>
4749   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
4750   if (N0.getOpcode() == ISD::AND &&
4751       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4752       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4753       N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
4754       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4755       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
4756       MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
4757       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
4758       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
4759       (MaskIdx0 + MaskLen0 <= ValBits)) {
4760     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
4761     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4762                        N1.getOperand(0).getOperand(0),
4763                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
4764                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
4765   }
4766 
4767   // 2nd pattern to match BSTRINS:
4768   //  R = or (and X, mask0), (shl (and Y, mask1), lsb)
4769   //  where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
4770   //  =>
4771   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
4772   if (N0.getOpcode() == ISD::AND &&
4773       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4774       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4775       N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
4776       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4777       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
4778       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
4779       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
4780       MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
4781       (MaskIdx0 + MaskLen0 <= ValBits)) {
4782     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
4783     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4784                        N1.getOperand(0).getOperand(0),
4785                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
4786                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
4787   }
4788 
4789   // 3rd pattern to match BSTRINS:
4790   //  R = or (and X, mask0), (and Y, mask1)
4791   //  where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
4792   //  =>
4793   //  R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
4794   //  where msb = lsb + size - 1
4795   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4796       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4797       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4798       (MaskIdx0 + MaskLen0 <= 64) &&
4799       (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
4800       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
4801     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
4802     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4803                        DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
4804                                    DAG.getConstant(MaskIdx0, DL, GRLenVT)),
4805                        DAG.getConstant(ValBits == 32
4806                                            ? (MaskIdx0 + (MaskLen0 & 31) - 1)
4807                                            : (MaskIdx0 + MaskLen0 - 1),
4808                                        DL, GRLenVT),
4809                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
4810   }
4811 
4812   // 4th pattern to match BSTRINS:
4813   //  R = or (and X, mask), (shl Y, shamt)
4814   //  where mask = (2**shamt - 1)
4815   //  =>
4816   //  R = BSTRINS X, Y, ValBits - 1, shamt
4817   //  where ValBits = 32 or 64
4818   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
4819       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4820       isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
4821       MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4822       (Shamt = CN1->getZExtValue()) == MaskLen0 &&
4823       (MaskIdx0 + MaskLen0 <= ValBits)) {
4824     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
4825     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4826                        N1.getOperand(0),
4827                        DAG.getConstant((ValBits - 1), DL, GRLenVT),
4828                        DAG.getConstant(Shamt, DL, GRLenVT));
4829   }
4830 
4831   // 5th pattern to match BSTRINS:
4832   //  R = or (and X, mask), const
4833   //  where ~mask = (2**size - 1) << lsb, mask & const = 0
4834   //  =>
4835   //  R = BSTRINS X, (const >> lsb), msb, lsb
4836   //  where msb = lsb + size - 1
4837   if (N0.getOpcode() == ISD::AND &&
4838       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4839       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4840       (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
4841       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
4842     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
4843     return DAG.getNode(
4844         LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4845         DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
4846         DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
4847                                       : (MaskIdx0 + MaskLen0 - 1),
4848                         DL, GRLenVT),
4849         DAG.getConstant(MaskIdx0, DL, GRLenVT));
4850   }
4851 
4852   // 6th pattern.
4853   // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
4854   // by the incoming bits are known to be zero.
4855   // =>
4856   // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
4857   //
4858   // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
4859   // pattern is more common than the 1st. So we put the 1st before the 6th in
4860   // order to match as many nodes as possible.
4861   ConstantSDNode *CNMask, *CNShamt;
4862   unsigned MaskIdx, MaskLen;
4863   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
4864       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
4865       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
4866       MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4867       CNShamt->getZExtValue() + MaskLen <= ValBits) {
4868     Shamt = CNShamt->getZExtValue();
4869     APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
4870     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
4871       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
4872       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
4873                          N1.getOperand(0).getOperand(0),
4874                          DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
4875                          DAG.getConstant(Shamt, DL, GRLenVT));
4876     }
4877   }
4878 
4879   // 7th pattern.
4880   // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
4881   // overwritten by the incoming bits are known to be zero.
4882   // =>
4883   // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
4884   //
4885   // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
4886   // before the 7th in order to match as many nodes as possible.
4887   if (N1.getOpcode() == ISD::AND &&
4888       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4889       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
4890       N1.getOperand(0).getOpcode() == ISD::SHL &&
4891       (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
4892       CNShamt->getZExtValue() == MaskIdx) {
4893     APInt ShMask(ValBits, CNMask->getZExtValue());
4894     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
4895       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
4896       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
4897                          N1.getOperand(0).getOperand(0),
4898                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4899                          DAG.getConstant(MaskIdx, DL, GRLenVT));
4900     }
4901   }
4902 
4903   // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
4904   if (!SwapAndRetried) {
4905     std::swap(N0, N1);
4906     SwapAndRetried = true;
4907     goto Retry;
4908   }
4909 
4910   SwapAndRetried = false;
4911 Retry2:
4912   // 8th pattern.
4913   // a = b | (c & shifted_mask), where all positions in b to be overwritten by
4914   // the incoming bits are known to be zero.
4915   // =>
4916   // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
4917   //
4918   // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
4919   // we put it here in order to match as many nodes as possible or generate less
4920   // instructions.
4921   if (N1.getOpcode() == ISD::AND &&
4922       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4923       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
4924     APInt ShMask(ValBits, CNMask->getZExtValue());
4925     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
4926       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
4927       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
4928                          DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
4929                                      N1->getOperand(0),
4930                                      DAG.getConstant(MaskIdx, DL, GRLenVT)),
4931                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4932                          DAG.getConstant(MaskIdx, DL, GRLenVT));
4933     }
4934   }
4935   // Swap N0/N1 and retry.
4936   if (!SwapAndRetried) {
4937     std::swap(N0, N1);
4938     SwapAndRetried = true;
4939     goto Retry2;
4940   }
4941 
4942   return SDValue();
4943 }
4944 
checkValueWidth(SDValue V,ISD::LoadExtType & ExtType)4945 static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
4946   ExtType = ISD::NON_EXTLOAD;
4947 
4948   switch (V.getNode()->getOpcode()) {
4949   case ISD::LOAD: {
4950     LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
4951     if ((LoadNode->getMemoryVT() == MVT::i8) ||
4952         (LoadNode->getMemoryVT() == MVT::i16)) {
4953       ExtType = LoadNode->getExtensionType();
4954       return true;
4955     }
4956     return false;
4957   }
4958   case ISD::AssertSext: {
4959     VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
4960     if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
4961       ExtType = ISD::SEXTLOAD;
4962       return true;
4963     }
4964     return false;
4965   }
4966   case ISD::AssertZext: {
4967     VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
4968     if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
4969       ExtType = ISD::ZEXTLOAD;
4970       return true;
4971     }
4972     return false;
4973   }
4974   default:
4975     return false;
4976   }
4977 
4978   return false;
4979 }
4980 
4981 // Eliminate redundant truncation and zero-extension nodes.
4982 // * Case 1:
4983 //  +------------+ +------------+ +------------+
4984 //  |   Input1   | |   Input2   | |     CC     |
4985 //  +------------+ +------------+ +------------+
4986 //         |              |              |
4987 //         V              V              +----+
4988 //  +------------+ +------------+             |
4989 //  |  TRUNCATE  | |  TRUNCATE  |             |
4990 //  +------------+ +------------+             |
4991 //         |              |                   |
4992 //         V              V                   |
4993 //  +------------+ +------------+             |
4994 //  |  ZERO_EXT  | |  ZERO_EXT  |             |
4995 //  +------------+ +------------+             |
4996 //         |              |                   |
4997 //         |              +-------------+     |
4998 //         V              V             |     |
4999 //        +----------------+            |     |
5000 //        |      AND       |            |     |
5001 //        +----------------+            |     |
5002 //                |                     |     |
5003 //                +---------------+     |     |
5004 //                                |     |     |
5005 //                                V     V     V
5006 //                               +-------------+
5007 //                               |     CMP     |
5008 //                               +-------------+
5009 // * Case 2:
5010 //  +------------+ +------------+ +-------------+ +------------+ +------------+
5011 //  |   Input1   | |   Input2   | | Constant -1 | | Constant 0 | |     CC     |
5012 //  +------------+ +------------+ +-------------+ +------------+ +------------+
5013 //         |              |             |               |               |
5014 //         V              |             |               |               |
5015 //  +------------+        |             |               |               |
5016 //  |     XOR    |<---------------------+               |               |
5017 //  +------------+        |                             |               |
5018 //         |              |                             |               |
5019 //         V              V             +---------------+               |
5020 //  +------------+ +------------+       |                               |
5021 //  |  TRUNCATE  | |  TRUNCATE  |       |     +-------------------------+
5022 //  +------------+ +------------+       |     |
5023 //         |              |             |     |
5024 //         V              V             |     |
5025 //  +------------+ +------------+       |     |
5026 //  |  ZERO_EXT  | |  ZERO_EXT  |       |     |
5027 //  +------------+ +------------+       |     |
5028 //         |              |             |     |
5029 //         V              V             |     |
5030 //        +----------------+            |     |
5031 //        |      AND       |            |     |
5032 //        +----------------+            |     |
5033 //                |                     |     |
5034 //                +---------------+     |     |
5035 //                                |     |     |
5036 //                                V     V     V
5037 //                               +-------------+
5038 //                               |     CMP     |
5039 //                               +-------------+
performSETCCCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5040 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
5041                                    TargetLowering::DAGCombinerInfo &DCI,
5042                                    const LoongArchSubtarget &Subtarget) {
5043   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5044 
5045   SDNode *AndNode = N->getOperand(0).getNode();
5046   if (AndNode->getOpcode() != ISD::AND)
5047     return SDValue();
5048 
5049   SDValue AndInputValue2 = AndNode->getOperand(1);
5050   if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5051     return SDValue();
5052 
5053   SDValue CmpInputValue = N->getOperand(1);
5054   SDValue AndInputValue1 = AndNode->getOperand(0);
5055   if (AndInputValue1.getOpcode() == ISD::XOR) {
5056     if (CC != ISD::SETEQ && CC != ISD::SETNE)
5057       return SDValue();
5058     ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5059     if (!CN || CN->getSExtValue() != -1)
5060       return SDValue();
5061     CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5062     if (!CN || CN->getSExtValue() != 0)
5063       return SDValue();
5064     AndInputValue1 = AndInputValue1.getOperand(0);
5065     if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5066       return SDValue();
5067   } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5068     if (AndInputValue2 != CmpInputValue)
5069       return SDValue();
5070   } else {
5071     return SDValue();
5072   }
5073 
5074   SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5075   if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5076     return SDValue();
5077 
5078   SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5079   if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5080     return SDValue();
5081 
5082   SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5083   SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5084   ISD::LoadExtType ExtType1;
5085   ISD::LoadExtType ExtType2;
5086 
5087   if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5088       !checkValueWidth(TruncInputValue2, ExtType2))
5089     return SDValue();
5090 
5091   if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5092       AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5093     return SDValue();
5094 
5095   if ((ExtType2 != ISD::ZEXTLOAD) &&
5096       ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5097     return SDValue();
5098 
5099   // These truncation and zero-extension nodes are not necessary, remove them.
5100   SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5101                                TruncInputValue1, TruncInputValue2);
5102   SDValue NewSetCC =
5103       DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5104   DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5105   return SDValue(N, 0);
5106 }
5107 
5108 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
performBITREV_WCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5109 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
5110                                       TargetLowering::DAGCombinerInfo &DCI,
5111                                       const LoongArchSubtarget &Subtarget) {
5112   if (DCI.isBeforeLegalizeOps())
5113     return SDValue();
5114 
5115   SDValue Src = N->getOperand(0);
5116   if (Src.getOpcode() != LoongArchISD::REVB_2W)
5117     return SDValue();
5118 
5119   return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5120                      Src.getOperand(0));
5121 }
5122 
5123 template <unsigned N>
legalizeIntrinsicImmArg(SDNode * Node,unsigned ImmOp,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget,bool IsSigned=false)5124 static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
5125                                        SelectionDAG &DAG,
5126                                        const LoongArchSubtarget &Subtarget,
5127                                        bool IsSigned = false) {
5128   SDLoc DL(Node);
5129   auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5130   // Check the ImmArg.
5131   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5132       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5133     DAG.getContext()->emitError(Node->getOperationName(0) +
5134                                 ": argument out of range.");
5135     return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5136   }
5137   return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5138 }
5139 
5140 template <unsigned N>
lowerVectorSplatImm(SDNode * Node,unsigned ImmOp,SelectionDAG & DAG,bool IsSigned=false)5141 static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5142                                    SelectionDAG &DAG, bool IsSigned = false) {
5143   SDLoc DL(Node);
5144   EVT ResTy = Node->getValueType(0);
5145   auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5146 
5147   // Check the ImmArg.
5148   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5149       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5150     DAG.getContext()->emitError(Node->getOperationName(0) +
5151                                 ": argument out of range.");
5152     return DAG.getNode(ISD::UNDEF, DL, ResTy);
5153   }
5154   return DAG.getConstant(
5155       APInt(ResTy.getScalarType().getSizeInBits(),
5156             IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5157       DL, ResTy);
5158 }
5159 
truncateVecElts(SDNode * Node,SelectionDAG & DAG)5160 static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
5161   SDLoc DL(Node);
5162   EVT ResTy = Node->getValueType(0);
5163   SDValue Vec = Node->getOperand(2);
5164   SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5165   return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5166 }
5167 
lowerVectorBitClear(SDNode * Node,SelectionDAG & DAG)5168 static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
5169   SDLoc DL(Node);
5170   EVT ResTy = Node->getValueType(0);
5171   SDValue One = DAG.getConstant(1, DL, ResTy);
5172   SDValue Bit =
5173       DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5174 
5175   return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5176                      DAG.getNOT(DL, Bit, ResTy));
5177 }
5178 
5179 template <unsigned N>
lowerVectorBitClearImm(SDNode * Node,SelectionDAG & DAG)5180 static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
5181   SDLoc DL(Node);
5182   EVT ResTy = Node->getValueType(0);
5183   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5184   // Check the unsigned ImmArg.
5185   if (!isUInt<N>(CImm->getZExtValue())) {
5186     DAG.getContext()->emitError(Node->getOperationName(0) +
5187                                 ": argument out of range.");
5188     return DAG.getNode(ISD::UNDEF, DL, ResTy);
5189   }
5190 
5191   APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5192   SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5193 
5194   return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5195 }
5196 
5197 template <unsigned N>
lowerVectorBitSetImm(SDNode * Node,SelectionDAG & DAG)5198 static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
5199   SDLoc DL(Node);
5200   EVT ResTy = Node->getValueType(0);
5201   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5202   // Check the unsigned ImmArg.
5203   if (!isUInt<N>(CImm->getZExtValue())) {
5204     DAG.getContext()->emitError(Node->getOperationName(0) +
5205                                 ": argument out of range.");
5206     return DAG.getNode(ISD::UNDEF, DL, ResTy);
5207   }
5208 
5209   APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5210   SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5211   return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5212 }
5213 
5214 template <unsigned N>
lowerVectorBitRevImm(SDNode * Node,SelectionDAG & DAG)5215 static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
5216   SDLoc DL(Node);
5217   EVT ResTy = Node->getValueType(0);
5218   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5219   // Check the unsigned ImmArg.
5220   if (!isUInt<N>(CImm->getZExtValue())) {
5221     DAG.getContext()->emitError(Node->getOperationName(0) +
5222                                 ": argument out of range.");
5223     return DAG.getNode(ISD::UNDEF, DL, ResTy);
5224   }
5225 
5226   APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5227   SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5228   return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
5229 }
5230 
5231 static SDValue
performINTRINSIC_WO_CHAINCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5232 performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
5233                                  TargetLowering::DAGCombinerInfo &DCI,
5234                                  const LoongArchSubtarget &Subtarget) {
5235   SDLoc DL(N);
5236   switch (N->getConstantOperandVal(0)) {
5237   default:
5238     break;
5239   case Intrinsic::loongarch_lsx_vadd_b:
5240   case Intrinsic::loongarch_lsx_vadd_h:
5241   case Intrinsic::loongarch_lsx_vadd_w:
5242   case Intrinsic::loongarch_lsx_vadd_d:
5243   case Intrinsic::loongarch_lasx_xvadd_b:
5244   case Intrinsic::loongarch_lasx_xvadd_h:
5245   case Intrinsic::loongarch_lasx_xvadd_w:
5246   case Intrinsic::loongarch_lasx_xvadd_d:
5247     return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5248                        N->getOperand(2));
5249   case Intrinsic::loongarch_lsx_vaddi_bu:
5250   case Intrinsic::loongarch_lsx_vaddi_hu:
5251   case Intrinsic::loongarch_lsx_vaddi_wu:
5252   case Intrinsic::loongarch_lsx_vaddi_du:
5253   case Intrinsic::loongarch_lasx_xvaddi_bu:
5254   case Intrinsic::loongarch_lasx_xvaddi_hu:
5255   case Intrinsic::loongarch_lasx_xvaddi_wu:
5256   case Intrinsic::loongarch_lasx_xvaddi_du:
5257     return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5258                        lowerVectorSplatImm<5>(N, 2, DAG));
5259   case Intrinsic::loongarch_lsx_vsub_b:
5260   case Intrinsic::loongarch_lsx_vsub_h:
5261   case Intrinsic::loongarch_lsx_vsub_w:
5262   case Intrinsic::loongarch_lsx_vsub_d:
5263   case Intrinsic::loongarch_lasx_xvsub_b:
5264   case Intrinsic::loongarch_lasx_xvsub_h:
5265   case Intrinsic::loongarch_lasx_xvsub_w:
5266   case Intrinsic::loongarch_lasx_xvsub_d:
5267     return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5268                        N->getOperand(2));
5269   case Intrinsic::loongarch_lsx_vsubi_bu:
5270   case Intrinsic::loongarch_lsx_vsubi_hu:
5271   case Intrinsic::loongarch_lsx_vsubi_wu:
5272   case Intrinsic::loongarch_lsx_vsubi_du:
5273   case Intrinsic::loongarch_lasx_xvsubi_bu:
5274   case Intrinsic::loongarch_lasx_xvsubi_hu:
5275   case Intrinsic::loongarch_lasx_xvsubi_wu:
5276   case Intrinsic::loongarch_lasx_xvsubi_du:
5277     return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5278                        lowerVectorSplatImm<5>(N, 2, DAG));
5279   case Intrinsic::loongarch_lsx_vneg_b:
5280   case Intrinsic::loongarch_lsx_vneg_h:
5281   case Intrinsic::loongarch_lsx_vneg_w:
5282   case Intrinsic::loongarch_lsx_vneg_d:
5283   case Intrinsic::loongarch_lasx_xvneg_b:
5284   case Intrinsic::loongarch_lasx_xvneg_h:
5285   case Intrinsic::loongarch_lasx_xvneg_w:
5286   case Intrinsic::loongarch_lasx_xvneg_d:
5287     return DAG.getNode(
5288         ISD::SUB, DL, N->getValueType(0),
5289         DAG.getConstant(
5290             APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
5291                   /*isSigned=*/true),
5292             SDLoc(N), N->getValueType(0)),
5293         N->getOperand(1));
5294   case Intrinsic::loongarch_lsx_vmax_b:
5295   case Intrinsic::loongarch_lsx_vmax_h:
5296   case Intrinsic::loongarch_lsx_vmax_w:
5297   case Intrinsic::loongarch_lsx_vmax_d:
5298   case Intrinsic::loongarch_lasx_xvmax_b:
5299   case Intrinsic::loongarch_lasx_xvmax_h:
5300   case Intrinsic::loongarch_lasx_xvmax_w:
5301   case Intrinsic::loongarch_lasx_xvmax_d:
5302     return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5303                        N->getOperand(2));
5304   case Intrinsic::loongarch_lsx_vmax_bu:
5305   case Intrinsic::loongarch_lsx_vmax_hu:
5306   case Intrinsic::loongarch_lsx_vmax_wu:
5307   case Intrinsic::loongarch_lsx_vmax_du:
5308   case Intrinsic::loongarch_lasx_xvmax_bu:
5309   case Intrinsic::loongarch_lasx_xvmax_hu:
5310   case Intrinsic::loongarch_lasx_xvmax_wu:
5311   case Intrinsic::loongarch_lasx_xvmax_du:
5312     return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5313                        N->getOperand(2));
5314   case Intrinsic::loongarch_lsx_vmaxi_b:
5315   case Intrinsic::loongarch_lsx_vmaxi_h:
5316   case Intrinsic::loongarch_lsx_vmaxi_w:
5317   case Intrinsic::loongarch_lsx_vmaxi_d:
5318   case Intrinsic::loongarch_lasx_xvmaxi_b:
5319   case Intrinsic::loongarch_lasx_xvmaxi_h:
5320   case Intrinsic::loongarch_lasx_xvmaxi_w:
5321   case Intrinsic::loongarch_lasx_xvmaxi_d:
5322     return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5323                        lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5324   case Intrinsic::loongarch_lsx_vmaxi_bu:
5325   case Intrinsic::loongarch_lsx_vmaxi_hu:
5326   case Intrinsic::loongarch_lsx_vmaxi_wu:
5327   case Intrinsic::loongarch_lsx_vmaxi_du:
5328   case Intrinsic::loongarch_lasx_xvmaxi_bu:
5329   case Intrinsic::loongarch_lasx_xvmaxi_hu:
5330   case Intrinsic::loongarch_lasx_xvmaxi_wu:
5331   case Intrinsic::loongarch_lasx_xvmaxi_du:
5332     return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5333                        lowerVectorSplatImm<5>(N, 2, DAG));
5334   case Intrinsic::loongarch_lsx_vmin_b:
5335   case Intrinsic::loongarch_lsx_vmin_h:
5336   case Intrinsic::loongarch_lsx_vmin_w:
5337   case Intrinsic::loongarch_lsx_vmin_d:
5338   case Intrinsic::loongarch_lasx_xvmin_b:
5339   case Intrinsic::loongarch_lasx_xvmin_h:
5340   case Intrinsic::loongarch_lasx_xvmin_w:
5341   case Intrinsic::loongarch_lasx_xvmin_d:
5342     return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5343                        N->getOperand(2));
5344   case Intrinsic::loongarch_lsx_vmin_bu:
5345   case Intrinsic::loongarch_lsx_vmin_hu:
5346   case Intrinsic::loongarch_lsx_vmin_wu:
5347   case Intrinsic::loongarch_lsx_vmin_du:
5348   case Intrinsic::loongarch_lasx_xvmin_bu:
5349   case Intrinsic::loongarch_lasx_xvmin_hu:
5350   case Intrinsic::loongarch_lasx_xvmin_wu:
5351   case Intrinsic::loongarch_lasx_xvmin_du:
5352     return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5353                        N->getOperand(2));
5354   case Intrinsic::loongarch_lsx_vmini_b:
5355   case Intrinsic::loongarch_lsx_vmini_h:
5356   case Intrinsic::loongarch_lsx_vmini_w:
5357   case Intrinsic::loongarch_lsx_vmini_d:
5358   case Intrinsic::loongarch_lasx_xvmini_b:
5359   case Intrinsic::loongarch_lasx_xvmini_h:
5360   case Intrinsic::loongarch_lasx_xvmini_w:
5361   case Intrinsic::loongarch_lasx_xvmini_d:
5362     return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5363                        lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5364   case Intrinsic::loongarch_lsx_vmini_bu:
5365   case Intrinsic::loongarch_lsx_vmini_hu:
5366   case Intrinsic::loongarch_lsx_vmini_wu:
5367   case Intrinsic::loongarch_lsx_vmini_du:
5368   case Intrinsic::loongarch_lasx_xvmini_bu:
5369   case Intrinsic::loongarch_lasx_xvmini_hu:
5370   case Intrinsic::loongarch_lasx_xvmini_wu:
5371   case Intrinsic::loongarch_lasx_xvmini_du:
5372     return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5373                        lowerVectorSplatImm<5>(N, 2, DAG));
5374   case Intrinsic::loongarch_lsx_vmul_b:
5375   case Intrinsic::loongarch_lsx_vmul_h:
5376   case Intrinsic::loongarch_lsx_vmul_w:
5377   case Intrinsic::loongarch_lsx_vmul_d:
5378   case Intrinsic::loongarch_lasx_xvmul_b:
5379   case Intrinsic::loongarch_lasx_xvmul_h:
5380   case Intrinsic::loongarch_lasx_xvmul_w:
5381   case Intrinsic::loongarch_lasx_xvmul_d:
5382     return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
5383                        N->getOperand(2));
5384   case Intrinsic::loongarch_lsx_vmadd_b:
5385   case Intrinsic::loongarch_lsx_vmadd_h:
5386   case Intrinsic::loongarch_lsx_vmadd_w:
5387   case Intrinsic::loongarch_lsx_vmadd_d:
5388   case Intrinsic::loongarch_lasx_xvmadd_b:
5389   case Intrinsic::loongarch_lasx_xvmadd_h:
5390   case Intrinsic::loongarch_lasx_xvmadd_w:
5391   case Intrinsic::loongarch_lasx_xvmadd_d: {
5392     EVT ResTy = N->getValueType(0);
5393     return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
5394                        DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5395                                    N->getOperand(3)));
5396   }
5397   case Intrinsic::loongarch_lsx_vmsub_b:
5398   case Intrinsic::loongarch_lsx_vmsub_h:
5399   case Intrinsic::loongarch_lsx_vmsub_w:
5400   case Intrinsic::loongarch_lsx_vmsub_d:
5401   case Intrinsic::loongarch_lasx_xvmsub_b:
5402   case Intrinsic::loongarch_lasx_xvmsub_h:
5403   case Intrinsic::loongarch_lasx_xvmsub_w:
5404   case Intrinsic::loongarch_lasx_xvmsub_d: {
5405     EVT ResTy = N->getValueType(0);
5406     return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
5407                        DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5408                                    N->getOperand(3)));
5409   }
5410   case Intrinsic::loongarch_lsx_vdiv_b:
5411   case Intrinsic::loongarch_lsx_vdiv_h:
5412   case Intrinsic::loongarch_lsx_vdiv_w:
5413   case Intrinsic::loongarch_lsx_vdiv_d:
5414   case Intrinsic::loongarch_lasx_xvdiv_b:
5415   case Intrinsic::loongarch_lasx_xvdiv_h:
5416   case Intrinsic::loongarch_lasx_xvdiv_w:
5417   case Intrinsic::loongarch_lasx_xvdiv_d:
5418     return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
5419                        N->getOperand(2));
5420   case Intrinsic::loongarch_lsx_vdiv_bu:
5421   case Intrinsic::loongarch_lsx_vdiv_hu:
5422   case Intrinsic::loongarch_lsx_vdiv_wu:
5423   case Intrinsic::loongarch_lsx_vdiv_du:
5424   case Intrinsic::loongarch_lasx_xvdiv_bu:
5425   case Intrinsic::loongarch_lasx_xvdiv_hu:
5426   case Intrinsic::loongarch_lasx_xvdiv_wu:
5427   case Intrinsic::loongarch_lasx_xvdiv_du:
5428     return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
5429                        N->getOperand(2));
5430   case Intrinsic::loongarch_lsx_vmod_b:
5431   case Intrinsic::loongarch_lsx_vmod_h:
5432   case Intrinsic::loongarch_lsx_vmod_w:
5433   case Intrinsic::loongarch_lsx_vmod_d:
5434   case Intrinsic::loongarch_lasx_xvmod_b:
5435   case Intrinsic::loongarch_lasx_xvmod_h:
5436   case Intrinsic::loongarch_lasx_xvmod_w:
5437   case Intrinsic::loongarch_lasx_xvmod_d:
5438     return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
5439                        N->getOperand(2));
5440   case Intrinsic::loongarch_lsx_vmod_bu:
5441   case Intrinsic::loongarch_lsx_vmod_hu:
5442   case Intrinsic::loongarch_lsx_vmod_wu:
5443   case Intrinsic::loongarch_lsx_vmod_du:
5444   case Intrinsic::loongarch_lasx_xvmod_bu:
5445   case Intrinsic::loongarch_lasx_xvmod_hu:
5446   case Intrinsic::loongarch_lasx_xvmod_wu:
5447   case Intrinsic::loongarch_lasx_xvmod_du:
5448     return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
5449                        N->getOperand(2));
5450   case Intrinsic::loongarch_lsx_vand_v:
5451   case Intrinsic::loongarch_lasx_xvand_v:
5452     return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5453                        N->getOperand(2));
5454   case Intrinsic::loongarch_lsx_vor_v:
5455   case Intrinsic::loongarch_lasx_xvor_v:
5456     return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5457                        N->getOperand(2));
5458   case Intrinsic::loongarch_lsx_vxor_v:
5459   case Intrinsic::loongarch_lasx_xvxor_v:
5460     return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5461                        N->getOperand(2));
5462   case Intrinsic::loongarch_lsx_vnor_v:
5463   case Intrinsic::loongarch_lasx_xvnor_v: {
5464     SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5465                               N->getOperand(2));
5466     return DAG.getNOT(DL, Res, Res->getValueType(0));
5467   }
5468   case Intrinsic::loongarch_lsx_vandi_b:
5469   case Intrinsic::loongarch_lasx_xvandi_b:
5470     return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5471                        lowerVectorSplatImm<8>(N, 2, DAG));
5472   case Intrinsic::loongarch_lsx_vori_b:
5473   case Intrinsic::loongarch_lasx_xvori_b:
5474     return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5475                        lowerVectorSplatImm<8>(N, 2, DAG));
5476   case Intrinsic::loongarch_lsx_vxori_b:
5477   case Intrinsic::loongarch_lasx_xvxori_b:
5478     return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5479                        lowerVectorSplatImm<8>(N, 2, DAG));
5480   case Intrinsic::loongarch_lsx_vsll_b:
5481   case Intrinsic::loongarch_lsx_vsll_h:
5482   case Intrinsic::loongarch_lsx_vsll_w:
5483   case Intrinsic::loongarch_lsx_vsll_d:
5484   case Intrinsic::loongarch_lasx_xvsll_b:
5485   case Intrinsic::loongarch_lasx_xvsll_h:
5486   case Intrinsic::loongarch_lasx_xvsll_w:
5487   case Intrinsic::loongarch_lasx_xvsll_d:
5488     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5489                        truncateVecElts(N, DAG));
5490   case Intrinsic::loongarch_lsx_vslli_b:
5491   case Intrinsic::loongarch_lasx_xvslli_b:
5492     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5493                        lowerVectorSplatImm<3>(N, 2, DAG));
5494   case Intrinsic::loongarch_lsx_vslli_h:
5495   case Intrinsic::loongarch_lasx_xvslli_h:
5496     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5497                        lowerVectorSplatImm<4>(N, 2, DAG));
5498   case Intrinsic::loongarch_lsx_vslli_w:
5499   case Intrinsic::loongarch_lasx_xvslli_w:
5500     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5501                        lowerVectorSplatImm<5>(N, 2, DAG));
5502   case Intrinsic::loongarch_lsx_vslli_d:
5503   case Intrinsic::loongarch_lasx_xvslli_d:
5504     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5505                        lowerVectorSplatImm<6>(N, 2, DAG));
5506   case Intrinsic::loongarch_lsx_vsrl_b:
5507   case Intrinsic::loongarch_lsx_vsrl_h:
5508   case Intrinsic::loongarch_lsx_vsrl_w:
5509   case Intrinsic::loongarch_lsx_vsrl_d:
5510   case Intrinsic::loongarch_lasx_xvsrl_b:
5511   case Intrinsic::loongarch_lasx_xvsrl_h:
5512   case Intrinsic::loongarch_lasx_xvsrl_w:
5513   case Intrinsic::loongarch_lasx_xvsrl_d:
5514     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5515                        truncateVecElts(N, DAG));
5516   case Intrinsic::loongarch_lsx_vsrli_b:
5517   case Intrinsic::loongarch_lasx_xvsrli_b:
5518     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5519                        lowerVectorSplatImm<3>(N, 2, DAG));
5520   case Intrinsic::loongarch_lsx_vsrli_h:
5521   case Intrinsic::loongarch_lasx_xvsrli_h:
5522     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5523                        lowerVectorSplatImm<4>(N, 2, DAG));
5524   case Intrinsic::loongarch_lsx_vsrli_w:
5525   case Intrinsic::loongarch_lasx_xvsrli_w:
5526     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5527                        lowerVectorSplatImm<5>(N, 2, DAG));
5528   case Intrinsic::loongarch_lsx_vsrli_d:
5529   case Intrinsic::loongarch_lasx_xvsrli_d:
5530     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5531                        lowerVectorSplatImm<6>(N, 2, DAG));
5532   case Intrinsic::loongarch_lsx_vsra_b:
5533   case Intrinsic::loongarch_lsx_vsra_h:
5534   case Intrinsic::loongarch_lsx_vsra_w:
5535   case Intrinsic::loongarch_lsx_vsra_d:
5536   case Intrinsic::loongarch_lasx_xvsra_b:
5537   case Intrinsic::loongarch_lasx_xvsra_h:
5538   case Intrinsic::loongarch_lasx_xvsra_w:
5539   case Intrinsic::loongarch_lasx_xvsra_d:
5540     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5541                        truncateVecElts(N, DAG));
5542   case Intrinsic::loongarch_lsx_vsrai_b:
5543   case Intrinsic::loongarch_lasx_xvsrai_b:
5544     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5545                        lowerVectorSplatImm<3>(N, 2, DAG));
5546   case Intrinsic::loongarch_lsx_vsrai_h:
5547   case Intrinsic::loongarch_lasx_xvsrai_h:
5548     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5549                        lowerVectorSplatImm<4>(N, 2, DAG));
5550   case Intrinsic::loongarch_lsx_vsrai_w:
5551   case Intrinsic::loongarch_lasx_xvsrai_w:
5552     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5553                        lowerVectorSplatImm<5>(N, 2, DAG));
5554   case Intrinsic::loongarch_lsx_vsrai_d:
5555   case Intrinsic::loongarch_lasx_xvsrai_d:
5556     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5557                        lowerVectorSplatImm<6>(N, 2, DAG));
5558   case Intrinsic::loongarch_lsx_vclz_b:
5559   case Intrinsic::loongarch_lsx_vclz_h:
5560   case Intrinsic::loongarch_lsx_vclz_w:
5561   case Intrinsic::loongarch_lsx_vclz_d:
5562   case Intrinsic::loongarch_lasx_xvclz_b:
5563   case Intrinsic::loongarch_lasx_xvclz_h:
5564   case Intrinsic::loongarch_lasx_xvclz_w:
5565   case Intrinsic::loongarch_lasx_xvclz_d:
5566     return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
5567   case Intrinsic::loongarch_lsx_vpcnt_b:
5568   case Intrinsic::loongarch_lsx_vpcnt_h:
5569   case Intrinsic::loongarch_lsx_vpcnt_w:
5570   case Intrinsic::loongarch_lsx_vpcnt_d:
5571   case Intrinsic::loongarch_lasx_xvpcnt_b:
5572   case Intrinsic::loongarch_lasx_xvpcnt_h:
5573   case Intrinsic::loongarch_lasx_xvpcnt_w:
5574   case Intrinsic::loongarch_lasx_xvpcnt_d:
5575     return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
5576   case Intrinsic::loongarch_lsx_vbitclr_b:
5577   case Intrinsic::loongarch_lsx_vbitclr_h:
5578   case Intrinsic::loongarch_lsx_vbitclr_w:
5579   case Intrinsic::loongarch_lsx_vbitclr_d:
5580   case Intrinsic::loongarch_lasx_xvbitclr_b:
5581   case Intrinsic::loongarch_lasx_xvbitclr_h:
5582   case Intrinsic::loongarch_lasx_xvbitclr_w:
5583   case Intrinsic::loongarch_lasx_xvbitclr_d:
5584     return lowerVectorBitClear(N, DAG);
5585   case Intrinsic::loongarch_lsx_vbitclri_b:
5586   case Intrinsic::loongarch_lasx_xvbitclri_b:
5587     return lowerVectorBitClearImm<3>(N, DAG);
5588   case Intrinsic::loongarch_lsx_vbitclri_h:
5589   case Intrinsic::loongarch_lasx_xvbitclri_h:
5590     return lowerVectorBitClearImm<4>(N, DAG);
5591   case Intrinsic::loongarch_lsx_vbitclri_w:
5592   case Intrinsic::loongarch_lasx_xvbitclri_w:
5593     return lowerVectorBitClearImm<5>(N, DAG);
5594   case Intrinsic::loongarch_lsx_vbitclri_d:
5595   case Intrinsic::loongarch_lasx_xvbitclri_d:
5596     return lowerVectorBitClearImm<6>(N, DAG);
5597   case Intrinsic::loongarch_lsx_vbitset_b:
5598   case Intrinsic::loongarch_lsx_vbitset_h:
5599   case Intrinsic::loongarch_lsx_vbitset_w:
5600   case Intrinsic::loongarch_lsx_vbitset_d:
5601   case Intrinsic::loongarch_lasx_xvbitset_b:
5602   case Intrinsic::loongarch_lasx_xvbitset_h:
5603   case Intrinsic::loongarch_lasx_xvbitset_w:
5604   case Intrinsic::loongarch_lasx_xvbitset_d: {
5605     EVT VecTy = N->getValueType(0);
5606     SDValue One = DAG.getConstant(1, DL, VecTy);
5607     return DAG.getNode(
5608         ISD::OR, DL, VecTy, N->getOperand(1),
5609         DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
5610   }
5611   case Intrinsic::loongarch_lsx_vbitseti_b:
5612   case Intrinsic::loongarch_lasx_xvbitseti_b:
5613     return lowerVectorBitSetImm<3>(N, DAG);
5614   case Intrinsic::loongarch_lsx_vbitseti_h:
5615   case Intrinsic::loongarch_lasx_xvbitseti_h:
5616     return lowerVectorBitSetImm<4>(N, DAG);
5617   case Intrinsic::loongarch_lsx_vbitseti_w:
5618   case Intrinsic::loongarch_lasx_xvbitseti_w:
5619     return lowerVectorBitSetImm<5>(N, DAG);
5620   case Intrinsic::loongarch_lsx_vbitseti_d:
5621   case Intrinsic::loongarch_lasx_xvbitseti_d:
5622     return lowerVectorBitSetImm<6>(N, DAG);
5623   case Intrinsic::loongarch_lsx_vbitrev_b:
5624   case Intrinsic::loongarch_lsx_vbitrev_h:
5625   case Intrinsic::loongarch_lsx_vbitrev_w:
5626   case Intrinsic::loongarch_lsx_vbitrev_d:
5627   case Intrinsic::loongarch_lasx_xvbitrev_b:
5628   case Intrinsic::loongarch_lasx_xvbitrev_h:
5629   case Intrinsic::loongarch_lasx_xvbitrev_w:
5630   case Intrinsic::loongarch_lasx_xvbitrev_d: {
5631     EVT VecTy = N->getValueType(0);
5632     SDValue One = DAG.getConstant(1, DL, VecTy);
5633     return DAG.getNode(
5634         ISD::XOR, DL, VecTy, N->getOperand(1),
5635         DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
5636   }
5637   case Intrinsic::loongarch_lsx_vbitrevi_b:
5638   case Intrinsic::loongarch_lasx_xvbitrevi_b:
5639     return lowerVectorBitRevImm<3>(N, DAG);
5640   case Intrinsic::loongarch_lsx_vbitrevi_h:
5641   case Intrinsic::loongarch_lasx_xvbitrevi_h:
5642     return lowerVectorBitRevImm<4>(N, DAG);
5643   case Intrinsic::loongarch_lsx_vbitrevi_w:
5644   case Intrinsic::loongarch_lasx_xvbitrevi_w:
5645     return lowerVectorBitRevImm<5>(N, DAG);
5646   case Intrinsic::loongarch_lsx_vbitrevi_d:
5647   case Intrinsic::loongarch_lasx_xvbitrevi_d:
5648     return lowerVectorBitRevImm<6>(N, DAG);
5649   case Intrinsic::loongarch_lsx_vfadd_s:
5650   case Intrinsic::loongarch_lsx_vfadd_d:
5651   case Intrinsic::loongarch_lasx_xvfadd_s:
5652   case Intrinsic::loongarch_lasx_xvfadd_d:
5653     return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
5654                        N->getOperand(2));
5655   case Intrinsic::loongarch_lsx_vfsub_s:
5656   case Intrinsic::loongarch_lsx_vfsub_d:
5657   case Intrinsic::loongarch_lasx_xvfsub_s:
5658   case Intrinsic::loongarch_lasx_xvfsub_d:
5659     return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
5660                        N->getOperand(2));
5661   case Intrinsic::loongarch_lsx_vfmul_s:
5662   case Intrinsic::loongarch_lsx_vfmul_d:
5663   case Intrinsic::loongarch_lasx_xvfmul_s:
5664   case Intrinsic::loongarch_lasx_xvfmul_d:
5665     return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
5666                        N->getOperand(2));
5667   case Intrinsic::loongarch_lsx_vfdiv_s:
5668   case Intrinsic::loongarch_lsx_vfdiv_d:
5669   case Intrinsic::loongarch_lasx_xvfdiv_s:
5670   case Intrinsic::loongarch_lasx_xvfdiv_d:
5671     return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
5672                        N->getOperand(2));
5673   case Intrinsic::loongarch_lsx_vfmadd_s:
5674   case Intrinsic::loongarch_lsx_vfmadd_d:
5675   case Intrinsic::loongarch_lasx_xvfmadd_s:
5676   case Intrinsic::loongarch_lasx_xvfmadd_d:
5677     return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
5678                        N->getOperand(2), N->getOperand(3));
5679   case Intrinsic::loongarch_lsx_vinsgr2vr_b:
5680     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
5681                        N->getOperand(1), N->getOperand(2),
5682                        legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
5683   case Intrinsic::loongarch_lsx_vinsgr2vr_h:
5684   case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
5685     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
5686                        N->getOperand(1), N->getOperand(2),
5687                        legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
5688   case Intrinsic::loongarch_lsx_vinsgr2vr_w:
5689   case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
5690     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
5691                        N->getOperand(1), N->getOperand(2),
5692                        legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
5693   case Intrinsic::loongarch_lsx_vinsgr2vr_d:
5694     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
5695                        N->getOperand(1), N->getOperand(2),
5696                        legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
5697   case Intrinsic::loongarch_lsx_vreplgr2vr_b:
5698   case Intrinsic::loongarch_lsx_vreplgr2vr_h:
5699   case Intrinsic::loongarch_lsx_vreplgr2vr_w:
5700   case Intrinsic::loongarch_lsx_vreplgr2vr_d:
5701   case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
5702   case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
5703   case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
5704   case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
5705     return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
5706                        DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
5707                                    N->getOperand(1)));
5708   case Intrinsic::loongarch_lsx_vreplve_b:
5709   case Intrinsic::loongarch_lsx_vreplve_h:
5710   case Intrinsic::loongarch_lsx_vreplve_w:
5711   case Intrinsic::loongarch_lsx_vreplve_d:
5712   case Intrinsic::loongarch_lasx_xvreplve_b:
5713   case Intrinsic::loongarch_lasx_xvreplve_h:
5714   case Intrinsic::loongarch_lasx_xvreplve_w:
5715   case Intrinsic::loongarch_lasx_xvreplve_d:
5716     return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
5717                        N->getOperand(1),
5718                        DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
5719                                    N->getOperand(2)));
5720   }
5721   return SDValue();
5722 }
5723 
performMOVGR2FR_WCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5724 static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG,
5725                                         TargetLowering::DAGCombinerInfo &DCI,
5726                                         const LoongArchSubtarget &Subtarget) {
5727   // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
5728   // conversion is unnecessary and can be replaced with the
5729   // MOVFR2GR_S_LA64 operand.
5730   SDValue Op0 = N->getOperand(0);
5731   if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
5732     return Op0.getOperand(0);
5733   return SDValue();
5734 }
5735 
performMOVFR2GR_SCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5736 static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG,
5737                                         TargetLowering::DAGCombinerInfo &DCI,
5738                                         const LoongArchSubtarget &Subtarget) {
5739   // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
5740   // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
5741   // operand.
5742   SDValue Op0 = N->getOperand(0);
5743   if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
5744     assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
5745            "Unexpected value type!");
5746     return Op0.getOperand(0);
5747   }
5748   return SDValue();
5749 }
5750 
performVMSKLTZCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5751 static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG,
5752                                      TargetLowering::DAGCombinerInfo &DCI,
5753                                      const LoongArchSubtarget &Subtarget) {
5754   MVT VT = N->getSimpleValueType(0);
5755   unsigned NumBits = VT.getScalarSizeInBits();
5756 
5757   // Simplify the inputs.
5758   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5759   APInt DemandedMask(APInt::getAllOnes(NumBits));
5760   if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
5761     return SDValue(N, 0);
5762 
5763   return SDValue();
5764 }
5765 
5766 static SDValue
performSPLIT_PAIR_F64Combine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5767 performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG,
5768                              TargetLowering::DAGCombinerInfo &DCI,
5769                              const LoongArchSubtarget &Subtarget) {
5770   SDValue Op0 = N->getOperand(0);
5771   SDLoc DL(N);
5772 
5773   // If the input to SplitPairF64 is just BuildPairF64 then the operation is
5774   // redundant. Instead, use BuildPairF64's operands directly.
5775   if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
5776     return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
5777 
5778   if (Op0->isUndef()) {
5779     SDValue Lo = DAG.getUNDEF(MVT::i32);
5780     SDValue Hi = DAG.getUNDEF(MVT::i32);
5781     return DCI.CombineTo(N, Lo, Hi);
5782   }
5783 
5784   // It's cheaper to materialise two 32-bit integers than to load a double
5785   // from the constant pool and transfer it to integer registers through the
5786   // stack.
5787   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
5788     APInt V = C->getValueAPF().bitcastToAPInt();
5789     SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
5790     SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
5791     return DCI.CombineTo(N, Lo, Hi);
5792   }
5793 
5794   return SDValue();
5795 }
5796 
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const5797 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
5798                                                    DAGCombinerInfo &DCI) const {
5799   SelectionDAG &DAG = DCI.DAG;
5800   switch (N->getOpcode()) {
5801   default:
5802     break;
5803   case ISD::AND:
5804     return performANDCombine(N, DAG, DCI, Subtarget);
5805   case ISD::OR:
5806     return performORCombine(N, DAG, DCI, Subtarget);
5807   case ISD::SETCC:
5808     return performSETCCCombine(N, DAG, DCI, Subtarget);
5809   case ISD::SRL:
5810     return performSRLCombine(N, DAG, DCI, Subtarget);
5811   case ISD::BITCAST:
5812     return performBITCASTCombine(N, DAG, DCI, Subtarget);
5813   case LoongArchISD::BITREV_W:
5814     return performBITREV_WCombine(N, DAG, DCI, Subtarget);
5815   case ISD::INTRINSIC_WO_CHAIN:
5816     return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
5817   case LoongArchISD::MOVGR2FR_W_LA64:
5818     return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
5819   case LoongArchISD::MOVFR2GR_S_LA64:
5820     return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
5821   case LoongArchISD::VMSKLTZ:
5822   case LoongArchISD::XVMSKLTZ:
5823     return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
5824   case LoongArchISD::SPLIT_PAIR_F64:
5825     return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
5826   }
5827   return SDValue();
5828 }
5829 
insertDivByZeroTrap(MachineInstr & MI,MachineBasicBlock * MBB)5830 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
5831                                               MachineBasicBlock *MBB) {
5832   if (!ZeroDivCheck)
5833     return MBB;
5834 
5835   // Build instructions:
5836   // MBB:
5837   //   div(or mod)   $dst, $dividend, $divisor
5838   //   bne           $divisor, $zero, SinkMBB
5839   // BreakMBB:
5840   //   break         7 // BRK_DIVZERO
5841   // SinkMBB:
5842   //   fallthrough
5843   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
5844   MachineFunction::iterator It = ++MBB->getIterator();
5845   MachineFunction *MF = MBB->getParent();
5846   auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5847   auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5848   MF->insert(It, BreakMBB);
5849   MF->insert(It, SinkMBB);
5850 
5851   // Transfer the remainder of MBB and its successor edges to SinkMBB.
5852   SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
5853   SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
5854 
5855   const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
5856   DebugLoc DL = MI.getDebugLoc();
5857   MachineOperand &Divisor = MI.getOperand(2);
5858   Register DivisorReg = Divisor.getReg();
5859 
5860   // MBB:
5861   BuildMI(MBB, DL, TII.get(LoongArch::BNE))
5862       .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
5863       .addReg(LoongArch::R0)
5864       .addMBB(SinkMBB);
5865   MBB->addSuccessor(BreakMBB);
5866   MBB->addSuccessor(SinkMBB);
5867 
5868   // BreakMBB:
5869   // See linux header file arch/loongarch/include/uapi/asm/break.h for the
5870   // definition of BRK_DIVZERO.
5871   BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
5872   BreakMBB->addSuccessor(SinkMBB);
5873 
5874   // Clear Divisor's kill flag.
5875   Divisor.setIsKill(false);
5876 
5877   return SinkMBB;
5878 }
5879 
5880 static MachineBasicBlock *
emitVecCondBranchPseudo(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)5881 emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
5882                         const LoongArchSubtarget &Subtarget) {
5883   unsigned CondOpc;
5884   switch (MI.getOpcode()) {
5885   default:
5886     llvm_unreachable("Unexpected opcode");
5887   case LoongArch::PseudoVBZ:
5888     CondOpc = LoongArch::VSETEQZ_V;
5889     break;
5890   case LoongArch::PseudoVBZ_B:
5891     CondOpc = LoongArch::VSETANYEQZ_B;
5892     break;
5893   case LoongArch::PseudoVBZ_H:
5894     CondOpc = LoongArch::VSETANYEQZ_H;
5895     break;
5896   case LoongArch::PseudoVBZ_W:
5897     CondOpc = LoongArch::VSETANYEQZ_W;
5898     break;
5899   case LoongArch::PseudoVBZ_D:
5900     CondOpc = LoongArch::VSETANYEQZ_D;
5901     break;
5902   case LoongArch::PseudoVBNZ:
5903     CondOpc = LoongArch::VSETNEZ_V;
5904     break;
5905   case LoongArch::PseudoVBNZ_B:
5906     CondOpc = LoongArch::VSETALLNEZ_B;
5907     break;
5908   case LoongArch::PseudoVBNZ_H:
5909     CondOpc = LoongArch::VSETALLNEZ_H;
5910     break;
5911   case LoongArch::PseudoVBNZ_W:
5912     CondOpc = LoongArch::VSETALLNEZ_W;
5913     break;
5914   case LoongArch::PseudoVBNZ_D:
5915     CondOpc = LoongArch::VSETALLNEZ_D;
5916     break;
5917   case LoongArch::PseudoXVBZ:
5918     CondOpc = LoongArch::XVSETEQZ_V;
5919     break;
5920   case LoongArch::PseudoXVBZ_B:
5921     CondOpc = LoongArch::XVSETANYEQZ_B;
5922     break;
5923   case LoongArch::PseudoXVBZ_H:
5924     CondOpc = LoongArch::XVSETANYEQZ_H;
5925     break;
5926   case LoongArch::PseudoXVBZ_W:
5927     CondOpc = LoongArch::XVSETANYEQZ_W;
5928     break;
5929   case LoongArch::PseudoXVBZ_D:
5930     CondOpc = LoongArch::XVSETANYEQZ_D;
5931     break;
5932   case LoongArch::PseudoXVBNZ:
5933     CondOpc = LoongArch::XVSETNEZ_V;
5934     break;
5935   case LoongArch::PseudoXVBNZ_B:
5936     CondOpc = LoongArch::XVSETALLNEZ_B;
5937     break;
5938   case LoongArch::PseudoXVBNZ_H:
5939     CondOpc = LoongArch::XVSETALLNEZ_H;
5940     break;
5941   case LoongArch::PseudoXVBNZ_W:
5942     CondOpc = LoongArch::XVSETALLNEZ_W;
5943     break;
5944   case LoongArch::PseudoXVBNZ_D:
5945     CondOpc = LoongArch::XVSETALLNEZ_D;
5946     break;
5947   }
5948 
5949   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
5950   const BasicBlock *LLVM_BB = BB->getBasicBlock();
5951   DebugLoc DL = MI.getDebugLoc();
5952   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5953   MachineFunction::iterator It = ++BB->getIterator();
5954 
5955   MachineFunction *F = BB->getParent();
5956   MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
5957   MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
5958   MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
5959 
5960   F->insert(It, FalseBB);
5961   F->insert(It, TrueBB);
5962   F->insert(It, SinkBB);
5963 
5964   // Transfer the remainder of MBB and its successor edges to Sink.
5965   SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
5966   SinkBB->transferSuccessorsAndUpdatePHIs(BB);
5967 
5968   // Insert the real instruction to BB.
5969   Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
5970   BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
5971 
5972   // Insert branch.
5973   BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
5974   BB->addSuccessor(FalseBB);
5975   BB->addSuccessor(TrueBB);
5976 
5977   // FalseBB.
5978   Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
5979   BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
5980       .addReg(LoongArch::R0)
5981       .addImm(0);
5982   BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
5983   FalseBB->addSuccessor(SinkBB);
5984 
5985   // TrueBB.
5986   Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
5987   BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
5988       .addReg(LoongArch::R0)
5989       .addImm(1);
5990   TrueBB->addSuccessor(SinkBB);
5991 
5992   // SinkBB: merge the results.
5993   BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
5994           MI.getOperand(0).getReg())
5995       .addReg(RD1)
5996       .addMBB(FalseBB)
5997       .addReg(RD2)
5998       .addMBB(TrueBB);
5999 
6000   // The pseudo instruction is gone now.
6001   MI.eraseFromParent();
6002   return SinkBB;
6003 }
6004 
6005 static MachineBasicBlock *
emitPseudoXVINSGR2VR(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)6006 emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
6007                      const LoongArchSubtarget &Subtarget) {
6008   unsigned InsOp;
6009   unsigned HalfSize;
6010   switch (MI.getOpcode()) {
6011   default:
6012     llvm_unreachable("Unexpected opcode");
6013   case LoongArch::PseudoXVINSGR2VR_B:
6014     HalfSize = 16;
6015     InsOp = LoongArch::VINSGR2VR_B;
6016     break;
6017   case LoongArch::PseudoXVINSGR2VR_H:
6018     HalfSize = 8;
6019     InsOp = LoongArch::VINSGR2VR_H;
6020     break;
6021   }
6022   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6023   const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6024   const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6025   DebugLoc DL = MI.getDebugLoc();
6026   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
6027   // XDst = vector_insert XSrc, Elt, Idx
6028   Register XDst = MI.getOperand(0).getReg();
6029   Register XSrc = MI.getOperand(1).getReg();
6030   Register Elt = MI.getOperand(2).getReg();
6031   unsigned Idx = MI.getOperand(3).getImm();
6032 
6033   Register ScratchReg1 = XSrc;
6034   if (Idx >= HalfSize) {
6035     ScratchReg1 = MRI.createVirtualRegister(RC);
6036     BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
6037         .addReg(XSrc)
6038         .addReg(XSrc)
6039         .addImm(1);
6040   }
6041 
6042   Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6043   Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6044   BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6045       .addReg(ScratchReg1, 0, LoongArch::sub_128);
6046   BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
6047       .addReg(ScratchSubReg1)
6048       .addReg(Elt)
6049       .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
6050 
6051   Register ScratchReg2 = XDst;
6052   if (Idx >= HalfSize)
6053     ScratchReg2 = MRI.createVirtualRegister(RC);
6054 
6055   BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
6056       .addImm(0)
6057       .addReg(ScratchSubReg2)
6058       .addImm(LoongArch::sub_128);
6059 
6060   if (Idx >= HalfSize)
6061     BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
6062         .addReg(XSrc)
6063         .addReg(ScratchReg2)
6064         .addImm(2);
6065 
6066   MI.eraseFromParent();
6067   return BB;
6068 }
6069 
emitPseudoCTPOP(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)6070 static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
6071                                           MachineBasicBlock *BB,
6072                                           const LoongArchSubtarget &Subtarget) {
6073   assert(Subtarget.hasExtLSX());
6074   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6075   const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6076   DebugLoc DL = MI.getDebugLoc();
6077   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
6078   Register Dst = MI.getOperand(0).getReg();
6079   Register Src = MI.getOperand(1).getReg();
6080   Register ScratchReg1 = MRI.createVirtualRegister(RC);
6081   Register ScratchReg2 = MRI.createVirtualRegister(RC);
6082   Register ScratchReg3 = MRI.createVirtualRegister(RC);
6083 
6084   BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6085   BuildMI(*BB, MI, DL,
6086           TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6087                                        : LoongArch::VINSGR2VR_W),
6088           ScratchReg2)
6089       .addReg(ScratchReg1)
6090       .addReg(Src)
6091       .addImm(0);
6092   BuildMI(
6093       *BB, MI, DL,
6094       TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6095       ScratchReg3)
6096       .addReg(ScratchReg2);
6097   BuildMI(*BB, MI, DL,
6098           TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6099                                        : LoongArch::VPICKVE2GR_W),
6100           Dst)
6101       .addReg(ScratchReg3)
6102       .addImm(0);
6103 
6104   MI.eraseFromParent();
6105   return BB;
6106 }
6107 
6108 static MachineBasicBlock *
emitPseudoVMSKCOND(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)6109 emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB,
6110                    const LoongArchSubtarget &Subtarget) {
6111   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6112   const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6113   const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6114   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
6115   Register Dst = MI.getOperand(0).getReg();
6116   Register Src = MI.getOperand(1).getReg();
6117   DebugLoc DL = MI.getDebugLoc();
6118   unsigned EleBits = 8;
6119   unsigned NotOpc = 0;
6120   unsigned MskOpc;
6121 
6122   switch (MI.getOpcode()) {
6123   default:
6124     llvm_unreachable("Unexpected opcode");
6125   case LoongArch::PseudoVMSKLTZ_B:
6126     MskOpc = LoongArch::VMSKLTZ_B;
6127     break;
6128   case LoongArch::PseudoVMSKLTZ_H:
6129     MskOpc = LoongArch::VMSKLTZ_H;
6130     EleBits = 16;
6131     break;
6132   case LoongArch::PseudoVMSKLTZ_W:
6133     MskOpc = LoongArch::VMSKLTZ_W;
6134     EleBits = 32;
6135     break;
6136   case LoongArch::PseudoVMSKLTZ_D:
6137     MskOpc = LoongArch::VMSKLTZ_D;
6138     EleBits = 64;
6139     break;
6140   case LoongArch::PseudoVMSKGEZ_B:
6141     MskOpc = LoongArch::VMSKGEZ_B;
6142     break;
6143   case LoongArch::PseudoVMSKEQZ_B:
6144     MskOpc = LoongArch::VMSKNZ_B;
6145     NotOpc = LoongArch::VNOR_V;
6146     break;
6147   case LoongArch::PseudoVMSKNEZ_B:
6148     MskOpc = LoongArch::VMSKNZ_B;
6149     break;
6150   case LoongArch::PseudoXVMSKLTZ_B:
6151     MskOpc = LoongArch::XVMSKLTZ_B;
6152     RC = &LoongArch::LASX256RegClass;
6153     break;
6154   case LoongArch::PseudoXVMSKLTZ_H:
6155     MskOpc = LoongArch::XVMSKLTZ_H;
6156     RC = &LoongArch::LASX256RegClass;
6157     EleBits = 16;
6158     break;
6159   case LoongArch::PseudoXVMSKLTZ_W:
6160     MskOpc = LoongArch::XVMSKLTZ_W;
6161     RC = &LoongArch::LASX256RegClass;
6162     EleBits = 32;
6163     break;
6164   case LoongArch::PseudoXVMSKLTZ_D:
6165     MskOpc = LoongArch::XVMSKLTZ_D;
6166     RC = &LoongArch::LASX256RegClass;
6167     EleBits = 64;
6168     break;
6169   case LoongArch::PseudoXVMSKGEZ_B:
6170     MskOpc = LoongArch::XVMSKGEZ_B;
6171     RC = &LoongArch::LASX256RegClass;
6172     break;
6173   case LoongArch::PseudoXVMSKEQZ_B:
6174     MskOpc = LoongArch::XVMSKNZ_B;
6175     NotOpc = LoongArch::XVNOR_V;
6176     RC = &LoongArch::LASX256RegClass;
6177     break;
6178   case LoongArch::PseudoXVMSKNEZ_B:
6179     MskOpc = LoongArch::XVMSKNZ_B;
6180     RC = &LoongArch::LASX256RegClass;
6181     break;
6182   }
6183 
6184   Register Msk = MRI.createVirtualRegister(RC);
6185   if (NotOpc) {
6186     Register Tmp = MRI.createVirtualRegister(RC);
6187     BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
6188     BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
6189         .addReg(Tmp, RegState::Kill)
6190         .addReg(Tmp, RegState::Kill);
6191   } else {
6192     BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
6193   }
6194 
6195   if (TRI->getRegSizeInBits(*RC) > 128) {
6196     Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6197     Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6198     BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
6199         .addReg(Msk)
6200         .addImm(0);
6201     BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
6202         .addReg(Msk, RegState::Kill)
6203         .addImm(4);
6204     BuildMI(*BB, MI, DL,
6205             TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
6206                                          : LoongArch::BSTRINS_W),
6207             Dst)
6208         .addReg(Lo, RegState::Kill)
6209         .addReg(Hi, RegState::Kill)
6210         .addImm(256 / EleBits - 1)
6211         .addImm(128 / EleBits);
6212   } else {
6213     BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
6214         .addReg(Msk, RegState::Kill)
6215         .addImm(0);
6216   }
6217 
6218   MI.eraseFromParent();
6219   return BB;
6220 }
6221 
6222 static MachineBasicBlock *
emitSplitPairF64Pseudo(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)6223 emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
6224                        const LoongArchSubtarget &Subtarget) {
6225   assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
6226          "Unexpected instruction");
6227 
6228   MachineFunction &MF = *BB->getParent();
6229   DebugLoc DL = MI.getDebugLoc();
6230   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
6231   Register LoReg = MI.getOperand(0).getReg();
6232   Register HiReg = MI.getOperand(1).getReg();
6233   Register SrcReg = MI.getOperand(2).getReg();
6234 
6235   BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
6236   BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
6237       .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
6238   MI.eraseFromParent(); // The pseudo instruction is gone now.
6239   return BB;
6240 }
6241 
6242 static MachineBasicBlock *
emitBuildPairF64Pseudo(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)6243 emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
6244                        const LoongArchSubtarget &Subtarget) {
6245   assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
6246          "Unexpected instruction");
6247 
6248   MachineFunction &MF = *BB->getParent();
6249   DebugLoc DL = MI.getDebugLoc();
6250   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
6251   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
6252   Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
6253   Register DstReg = MI.getOperand(0).getReg();
6254   Register LoReg = MI.getOperand(1).getReg();
6255   Register HiReg = MI.getOperand(2).getReg();
6256 
6257   BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
6258       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
6259   BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
6260       .addReg(TmpReg, RegState::Kill)
6261       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
6262   MI.eraseFromParent(); // The pseudo instruction is gone now.
6263   return BB;
6264 }
6265 
isSelectPseudo(MachineInstr & MI)6266 static bool isSelectPseudo(MachineInstr &MI) {
6267   switch (MI.getOpcode()) {
6268   default:
6269     return false;
6270   case LoongArch::Select_GPR_Using_CC_GPR:
6271     return true;
6272   }
6273 }
6274 
6275 static MachineBasicBlock *
emitSelectPseudo(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)6276 emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB,
6277                  const LoongArchSubtarget &Subtarget) {
6278   // To "insert" Select_* instructions, we actually have to insert the triangle
6279   // control-flow pattern.  The incoming instructions know the destination vreg
6280   // to set, the condition code register to branch on, the true/false values to
6281   // select between, and the condcode to use to select the appropriate branch.
6282   //
6283   // We produce the following control flow:
6284   //     HeadMBB
6285   //     |  \
6286   //     |  IfFalseMBB
6287   //     | /
6288   //    TailMBB
6289   //
6290   // When we find a sequence of selects we attempt to optimize their emission
6291   // by sharing the control flow. Currently we only handle cases where we have
6292   // multiple selects with the exact same condition (same LHS, RHS and CC).
6293   // The selects may be interleaved with other instructions if the other
6294   // instructions meet some requirements we deem safe:
6295   // - They are not pseudo instructions.
6296   // - They are debug instructions. Otherwise,
6297   // - They do not have side-effects, do not access memory and their inputs do
6298   //   not depend on the results of the select pseudo-instructions.
6299   // The TrueV/FalseV operands of the selects cannot depend on the result of
6300   // previous selects in the sequence.
6301   // These conditions could be further relaxed. See the X86 target for a
6302   // related approach and more information.
6303 
6304   Register LHS = MI.getOperand(1).getReg();
6305   Register RHS;
6306   if (MI.getOperand(2).isReg())
6307     RHS = MI.getOperand(2).getReg();
6308   auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
6309 
6310   SmallVector<MachineInstr *, 4> SelectDebugValues;
6311   SmallSet<Register, 4> SelectDests;
6312   SelectDests.insert(MI.getOperand(0).getReg());
6313 
6314   MachineInstr *LastSelectPseudo = &MI;
6315   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
6316        SequenceMBBI != E; ++SequenceMBBI) {
6317     if (SequenceMBBI->isDebugInstr())
6318       continue;
6319     if (isSelectPseudo(*SequenceMBBI)) {
6320       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
6321           !SequenceMBBI->getOperand(2).isReg() ||
6322           SequenceMBBI->getOperand(2).getReg() != RHS ||
6323           SequenceMBBI->getOperand(3).getImm() != CC ||
6324           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
6325           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
6326         break;
6327       LastSelectPseudo = &*SequenceMBBI;
6328       SequenceMBBI->collectDebugValues(SelectDebugValues);
6329       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
6330       continue;
6331     }
6332     if (SequenceMBBI->hasUnmodeledSideEffects() ||
6333         SequenceMBBI->mayLoadOrStore() ||
6334         SequenceMBBI->usesCustomInsertionHook())
6335       break;
6336     if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
6337           return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
6338         }))
6339       break;
6340   }
6341 
6342   const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
6343   const BasicBlock *LLVM_BB = BB->getBasicBlock();
6344   DebugLoc DL = MI.getDebugLoc();
6345   MachineFunction::iterator I = ++BB->getIterator();
6346 
6347   MachineBasicBlock *HeadMBB = BB;
6348   MachineFunction *F = BB->getParent();
6349   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
6350   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
6351 
6352   F->insert(I, IfFalseMBB);
6353   F->insert(I, TailMBB);
6354 
6355   // Set the call frame size on entry to the new basic blocks.
6356   unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
6357   IfFalseMBB->setCallFrameSize(CallFrameSize);
6358   TailMBB->setCallFrameSize(CallFrameSize);
6359 
6360   // Transfer debug instructions associated with the selects to TailMBB.
6361   for (MachineInstr *DebugInstr : SelectDebugValues) {
6362     TailMBB->push_back(DebugInstr->removeFromParent());
6363   }
6364 
6365   // Move all instructions after the sequence to TailMBB.
6366   TailMBB->splice(TailMBB->end(), HeadMBB,
6367                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
6368   // Update machine-CFG edges by transferring all successors of the current
6369   // block to the new block which will contain the Phi nodes for the selects.
6370   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
6371   // Set the successors for HeadMBB.
6372   HeadMBB->addSuccessor(IfFalseMBB);
6373   HeadMBB->addSuccessor(TailMBB);
6374 
6375   // Insert appropriate branch.
6376   if (MI.getOperand(2).isImm())
6377     BuildMI(HeadMBB, DL, TII.get(CC))
6378         .addReg(LHS)
6379         .addImm(MI.getOperand(2).getImm())
6380         .addMBB(TailMBB);
6381   else
6382     BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
6383 
6384   // IfFalseMBB just falls through to TailMBB.
6385   IfFalseMBB->addSuccessor(TailMBB);
6386 
6387   // Create PHIs for all of the select pseudo-instructions.
6388   auto SelectMBBI = MI.getIterator();
6389   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
6390   auto InsertionPoint = TailMBB->begin();
6391   while (SelectMBBI != SelectEnd) {
6392     auto Next = std::next(SelectMBBI);
6393     if (isSelectPseudo(*SelectMBBI)) {
6394       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
6395       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
6396               TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
6397           .addReg(SelectMBBI->getOperand(4).getReg())
6398           .addMBB(HeadMBB)
6399           .addReg(SelectMBBI->getOperand(5).getReg())
6400           .addMBB(IfFalseMBB);
6401       SelectMBBI->eraseFromParent();
6402     }
6403     SelectMBBI = Next;
6404   }
6405 
6406   F->getProperties().resetNoPHIs();
6407   return TailMBB;
6408 }
6409 
EmitInstrWithCustomInserter(MachineInstr & MI,MachineBasicBlock * BB) const6410 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
6411     MachineInstr &MI, MachineBasicBlock *BB) const {
6412   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6413   DebugLoc DL = MI.getDebugLoc();
6414 
6415   switch (MI.getOpcode()) {
6416   default:
6417     llvm_unreachable("Unexpected instr type to insert");
6418   case LoongArch::DIV_W:
6419   case LoongArch::DIV_WU:
6420   case LoongArch::MOD_W:
6421   case LoongArch::MOD_WU:
6422   case LoongArch::DIV_D:
6423   case LoongArch::DIV_DU:
6424   case LoongArch::MOD_D:
6425   case LoongArch::MOD_DU:
6426     return insertDivByZeroTrap(MI, BB);
6427     break;
6428   case LoongArch::WRFCSR: {
6429     BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
6430             LoongArch::FCSR0 + MI.getOperand(0).getImm())
6431         .addReg(MI.getOperand(1).getReg());
6432     MI.eraseFromParent();
6433     return BB;
6434   }
6435   case LoongArch::RDFCSR: {
6436     MachineInstr *ReadFCSR =
6437         BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
6438                 MI.getOperand(0).getReg())
6439             .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
6440     ReadFCSR->getOperand(1).setIsUndef();
6441     MI.eraseFromParent();
6442     return BB;
6443   }
6444   case LoongArch::Select_GPR_Using_CC_GPR:
6445     return emitSelectPseudo(MI, BB, Subtarget);
6446   case LoongArch::BuildPairF64Pseudo:
6447     return emitBuildPairF64Pseudo(MI, BB, Subtarget);
6448   case LoongArch::SplitPairF64Pseudo:
6449     return emitSplitPairF64Pseudo(MI, BB, Subtarget);
6450   case LoongArch::PseudoVBZ:
6451   case LoongArch::PseudoVBZ_B:
6452   case LoongArch::PseudoVBZ_H:
6453   case LoongArch::PseudoVBZ_W:
6454   case LoongArch::PseudoVBZ_D:
6455   case LoongArch::PseudoVBNZ:
6456   case LoongArch::PseudoVBNZ_B:
6457   case LoongArch::PseudoVBNZ_H:
6458   case LoongArch::PseudoVBNZ_W:
6459   case LoongArch::PseudoVBNZ_D:
6460   case LoongArch::PseudoXVBZ:
6461   case LoongArch::PseudoXVBZ_B:
6462   case LoongArch::PseudoXVBZ_H:
6463   case LoongArch::PseudoXVBZ_W:
6464   case LoongArch::PseudoXVBZ_D:
6465   case LoongArch::PseudoXVBNZ:
6466   case LoongArch::PseudoXVBNZ_B:
6467   case LoongArch::PseudoXVBNZ_H:
6468   case LoongArch::PseudoXVBNZ_W:
6469   case LoongArch::PseudoXVBNZ_D:
6470     return emitVecCondBranchPseudo(MI, BB, Subtarget);
6471   case LoongArch::PseudoXVINSGR2VR_B:
6472   case LoongArch::PseudoXVINSGR2VR_H:
6473     return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
6474   case LoongArch::PseudoCTPOP:
6475     return emitPseudoCTPOP(MI, BB, Subtarget);
6476   case LoongArch::PseudoVMSKLTZ_B:
6477   case LoongArch::PseudoVMSKLTZ_H:
6478   case LoongArch::PseudoVMSKLTZ_W:
6479   case LoongArch::PseudoVMSKLTZ_D:
6480   case LoongArch::PseudoVMSKGEZ_B:
6481   case LoongArch::PseudoVMSKEQZ_B:
6482   case LoongArch::PseudoVMSKNEZ_B:
6483   case LoongArch::PseudoXVMSKLTZ_B:
6484   case LoongArch::PseudoXVMSKLTZ_H:
6485   case LoongArch::PseudoXVMSKLTZ_W:
6486   case LoongArch::PseudoXVMSKLTZ_D:
6487   case LoongArch::PseudoXVMSKGEZ_B:
6488   case LoongArch::PseudoXVMSKEQZ_B:
6489   case LoongArch::PseudoXVMSKNEZ_B:
6490     return emitPseudoVMSKCOND(MI, BB, Subtarget);
6491   case TargetOpcode::STATEPOINT:
6492     // STATEPOINT is a pseudo instruction which has no implicit defs/uses
6493     // while bl call instruction (where statepoint will be lowered at the
6494     // end) has implicit def. This def is early-clobber as it will be set at
6495     // the moment of the call and earlier than any use is read.
6496     // Add this implicit dead def here as a workaround.
6497     MI.addOperand(*MI.getMF(),
6498                   MachineOperand::CreateReg(
6499                       LoongArch::R1, /*isDef*/ true,
6500                       /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
6501                       /*isUndef*/ false, /*isEarlyClobber*/ true));
6502     if (!Subtarget.is64Bit())
6503       report_fatal_error("STATEPOINT is only supported on 64-bit targets");
6504     return emitPatchPoint(MI, BB);
6505   }
6506 }
6507 
allowsMisalignedMemoryAccesses(EVT VT,unsigned AddrSpace,Align Alignment,MachineMemOperand::Flags Flags,unsigned * Fast) const6508 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
6509     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
6510     unsigned *Fast) const {
6511   if (!Subtarget.hasUAL())
6512     return false;
6513 
6514   // TODO: set reasonable speed number.
6515   if (Fast)
6516     *Fast = 1;
6517   return true;
6518 }
6519 
getTargetNodeName(unsigned Opcode) const6520 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
6521   switch ((LoongArchISD::NodeType)Opcode) {
6522   case LoongArchISD::FIRST_NUMBER:
6523     break;
6524 
6525 #define NODE_NAME_CASE(node)                                                   \
6526   case LoongArchISD::node:                                                     \
6527     return "LoongArchISD::" #node;
6528 
6529     // TODO: Add more target-dependent nodes later.
6530     NODE_NAME_CASE(CALL)
6531     NODE_NAME_CASE(CALL_MEDIUM)
6532     NODE_NAME_CASE(CALL_LARGE)
6533     NODE_NAME_CASE(RET)
6534     NODE_NAME_CASE(TAIL)
6535     NODE_NAME_CASE(TAIL_MEDIUM)
6536     NODE_NAME_CASE(TAIL_LARGE)
6537     NODE_NAME_CASE(SELECT_CC)
6538     NODE_NAME_CASE(SLL_W)
6539     NODE_NAME_CASE(SRA_W)
6540     NODE_NAME_CASE(SRL_W)
6541     NODE_NAME_CASE(BSTRINS)
6542     NODE_NAME_CASE(BSTRPICK)
6543     NODE_NAME_CASE(MOVGR2FR_W_LA64)
6544     NODE_NAME_CASE(MOVFR2GR_S_LA64)
6545     NODE_NAME_CASE(FTINT)
6546     NODE_NAME_CASE(BUILD_PAIR_F64)
6547     NODE_NAME_CASE(SPLIT_PAIR_F64)
6548     NODE_NAME_CASE(REVB_2H)
6549     NODE_NAME_CASE(REVB_2W)
6550     NODE_NAME_CASE(BITREV_4B)
6551     NODE_NAME_CASE(BITREV_8B)
6552     NODE_NAME_CASE(BITREV_W)
6553     NODE_NAME_CASE(ROTR_W)
6554     NODE_NAME_CASE(ROTL_W)
6555     NODE_NAME_CASE(DIV_W)
6556     NODE_NAME_CASE(DIV_WU)
6557     NODE_NAME_CASE(MOD_W)
6558     NODE_NAME_CASE(MOD_WU)
6559     NODE_NAME_CASE(CLZ_W)
6560     NODE_NAME_CASE(CTZ_W)
6561     NODE_NAME_CASE(DBAR)
6562     NODE_NAME_CASE(IBAR)
6563     NODE_NAME_CASE(BREAK)
6564     NODE_NAME_CASE(SYSCALL)
6565     NODE_NAME_CASE(CRC_W_B_W)
6566     NODE_NAME_CASE(CRC_W_H_W)
6567     NODE_NAME_CASE(CRC_W_W_W)
6568     NODE_NAME_CASE(CRC_W_D_W)
6569     NODE_NAME_CASE(CRCC_W_B_W)
6570     NODE_NAME_CASE(CRCC_W_H_W)
6571     NODE_NAME_CASE(CRCC_W_W_W)
6572     NODE_NAME_CASE(CRCC_W_D_W)
6573     NODE_NAME_CASE(CSRRD)
6574     NODE_NAME_CASE(CSRWR)
6575     NODE_NAME_CASE(CSRXCHG)
6576     NODE_NAME_CASE(IOCSRRD_B)
6577     NODE_NAME_CASE(IOCSRRD_H)
6578     NODE_NAME_CASE(IOCSRRD_W)
6579     NODE_NAME_CASE(IOCSRRD_D)
6580     NODE_NAME_CASE(IOCSRWR_B)
6581     NODE_NAME_CASE(IOCSRWR_H)
6582     NODE_NAME_CASE(IOCSRWR_W)
6583     NODE_NAME_CASE(IOCSRWR_D)
6584     NODE_NAME_CASE(CPUCFG)
6585     NODE_NAME_CASE(MOVGR2FCSR)
6586     NODE_NAME_CASE(MOVFCSR2GR)
6587     NODE_NAME_CASE(CACOP_D)
6588     NODE_NAME_CASE(CACOP_W)
6589     NODE_NAME_CASE(VSHUF)
6590     NODE_NAME_CASE(VPICKEV)
6591     NODE_NAME_CASE(VPICKOD)
6592     NODE_NAME_CASE(VPACKEV)
6593     NODE_NAME_CASE(VPACKOD)
6594     NODE_NAME_CASE(VILVL)
6595     NODE_NAME_CASE(VILVH)
6596     NODE_NAME_CASE(VSHUF4I)
6597     NODE_NAME_CASE(VREPLVEI)
6598     NODE_NAME_CASE(VREPLGR2VR)
6599     NODE_NAME_CASE(XVPERMI)
6600     NODE_NAME_CASE(VPICK_SEXT_ELT)
6601     NODE_NAME_CASE(VPICK_ZEXT_ELT)
6602     NODE_NAME_CASE(VREPLVE)
6603     NODE_NAME_CASE(VALL_ZERO)
6604     NODE_NAME_CASE(VANY_ZERO)
6605     NODE_NAME_CASE(VALL_NONZERO)
6606     NODE_NAME_CASE(VANY_NONZERO)
6607     NODE_NAME_CASE(FRECIPE)
6608     NODE_NAME_CASE(FRSQRTE)
6609     NODE_NAME_CASE(VSLLI)
6610     NODE_NAME_CASE(VSRLI)
6611     NODE_NAME_CASE(VBSLL)
6612     NODE_NAME_CASE(VBSRL)
6613     NODE_NAME_CASE(VLDREPL)
6614     NODE_NAME_CASE(VMSKLTZ)
6615     NODE_NAME_CASE(VMSKGEZ)
6616     NODE_NAME_CASE(VMSKEQZ)
6617     NODE_NAME_CASE(VMSKNEZ)
6618     NODE_NAME_CASE(XVMSKLTZ)
6619     NODE_NAME_CASE(XVMSKGEZ)
6620     NODE_NAME_CASE(XVMSKEQZ)
6621     NODE_NAME_CASE(XVMSKNEZ)
6622   }
6623 #undef NODE_NAME_CASE
6624   return nullptr;
6625 }
6626 
6627 //===----------------------------------------------------------------------===//
6628 //                     Calling Convention Implementation
6629 //===----------------------------------------------------------------------===//
6630 
6631 // Eight general-purpose registers a0-a7 used for passing integer arguments,
6632 // with a0-a1 reused to return values. Generally, the GPRs are used to pass
6633 // fixed-point arguments, and floating-point arguments when no FPR is available
6634 // or with soft float ABI.
6635 const MCPhysReg ArgGPRs[] = {LoongArch::R4,  LoongArch::R5, LoongArch::R6,
6636                              LoongArch::R7,  LoongArch::R8, LoongArch::R9,
6637                              LoongArch::R10, LoongArch::R11};
6638 // Eight floating-point registers fa0-fa7 used for passing floating-point
6639 // arguments, and fa0-fa1 are also used to return values.
6640 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
6641                                LoongArch::F3, LoongArch::F4, LoongArch::F5,
6642                                LoongArch::F6, LoongArch::F7};
6643 // FPR32 and FPR64 alias each other.
6644 const MCPhysReg ArgFPR64s[] = {
6645     LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
6646     LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
6647 
6648 const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
6649                             LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
6650                             LoongArch::VR6, LoongArch::VR7};
6651 
6652 const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
6653                             LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
6654                             LoongArch::XR6, LoongArch::XR7};
6655 
6656 // Pass a 2*GRLen argument that has been split into two GRLen values through
6657 // registers or the stack as necessary.
CC_LoongArchAssign2GRLen(unsigned GRLen,CCState & State,CCValAssign VA1,ISD::ArgFlagsTy ArgFlags1,unsigned ValNo2,MVT ValVT2,MVT LocVT2,ISD::ArgFlagsTy ArgFlags2)6658 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
6659                                      CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
6660                                      unsigned ValNo2, MVT ValVT2, MVT LocVT2,
6661                                      ISD::ArgFlagsTy ArgFlags2) {
6662   unsigned GRLenInBytes = GRLen / 8;
6663   if (Register Reg = State.AllocateReg(ArgGPRs)) {
6664     // At least one half can be passed via register.
6665     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
6666                                      VA1.getLocVT(), CCValAssign::Full));
6667   } else {
6668     // Both halves must be passed on the stack, with proper alignment.
6669     Align StackAlign =
6670         std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
6671     State.addLoc(
6672         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
6673                             State.AllocateStack(GRLenInBytes, StackAlign),
6674                             VA1.getLocVT(), CCValAssign::Full));
6675     State.addLoc(CCValAssign::getMem(
6676         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
6677         LocVT2, CCValAssign::Full));
6678     return false;
6679   }
6680   if (Register Reg = State.AllocateReg(ArgGPRs)) {
6681     // The second half can also be passed via register.
6682     State.addLoc(
6683         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
6684   } else {
6685     // The second half is passed via the stack, without additional alignment.
6686     State.addLoc(CCValAssign::getMem(
6687         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
6688         LocVT2, CCValAssign::Full));
6689   }
6690   return false;
6691 }
6692 
6693 // Implements the LoongArch calling convention. Returns true upon failure.
CC_LoongArch(const DataLayout & DL,LoongArchABI::ABI ABI,unsigned ValNo,MVT ValVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State,bool IsFixed,bool IsRet,Type * OrigTy)6694 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
6695                          unsigned ValNo, MVT ValVT,
6696                          CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6697                          CCState &State, bool IsFixed, bool IsRet,
6698                          Type *OrigTy) {
6699   unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
6700   assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
6701   MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
6702   MVT LocVT = ValVT;
6703 
6704   // Any return value split into more than two values can't be returned
6705   // directly.
6706   if (IsRet && ValNo > 1)
6707     return true;
6708 
6709   // If passing a variadic argument, or if no FPR is available.
6710   bool UseGPRForFloat = true;
6711 
6712   switch (ABI) {
6713   default:
6714     llvm_unreachable("Unexpected ABI");
6715     break;
6716   case LoongArchABI::ABI_ILP32F:
6717   case LoongArchABI::ABI_LP64F:
6718   case LoongArchABI::ABI_ILP32D:
6719   case LoongArchABI::ABI_LP64D:
6720     UseGPRForFloat = !IsFixed;
6721     break;
6722   case LoongArchABI::ABI_ILP32S:
6723   case LoongArchABI::ABI_LP64S:
6724     break;
6725   }
6726 
6727   // If this is a variadic argument, the LoongArch calling convention requires
6728   // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
6729   // byte alignment. An aligned register should be used regardless of whether
6730   // the original argument was split during legalisation or not. The argument
6731   // will not be passed by registers if the original type is larger than
6732   // 2*GRLen, so the register alignment rule does not apply.
6733   unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
6734   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
6735       DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
6736     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
6737     // Skip 'odd' register if necessary.
6738     if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
6739       State.AllocateReg(ArgGPRs);
6740   }
6741 
6742   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
6743   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
6744       State.getPendingArgFlags();
6745 
6746   assert(PendingLocs.size() == PendingArgFlags.size() &&
6747          "PendingLocs and PendingArgFlags out of sync");
6748 
6749   // FPR32 and FPR64 alias each other.
6750   if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
6751     UseGPRForFloat = true;
6752 
6753   if (UseGPRForFloat && ValVT == MVT::f32) {
6754     LocVT = GRLenVT;
6755     LocInfo = CCValAssign::BCvt;
6756   } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
6757     LocVT = MVT::i64;
6758     LocInfo = CCValAssign::BCvt;
6759   } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
6760     // Handle passing f64 on LA32D with a soft float ABI or when floating point
6761     // registers are exhausted.
6762     assert(PendingLocs.empty() && "Can't lower f64 if it is split");
6763     // Depending on available argument GPRS, f64 may be passed in a pair of
6764     // GPRs, split between a GPR and the stack, or passed completely on the
6765     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
6766     // cases.
6767     MCRegister Reg = State.AllocateReg(ArgGPRs);
6768     if (!Reg) {
6769       int64_t StackOffset = State.AllocateStack(8, Align(8));
6770       State.addLoc(
6771           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
6772       return false;
6773     }
6774     LocVT = MVT::i32;
6775     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6776     MCRegister HiReg = State.AllocateReg(ArgGPRs);
6777     if (HiReg) {
6778       State.addLoc(
6779           CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
6780     } else {
6781       int64_t StackOffset = State.AllocateStack(4, Align(4));
6782       State.addLoc(
6783           CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
6784     }
6785     return false;
6786   }
6787 
6788   // Split arguments might be passed indirectly, so keep track of the pending
6789   // values.
6790   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
6791     LocVT = GRLenVT;
6792     LocInfo = CCValAssign::Indirect;
6793     PendingLocs.push_back(
6794         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
6795     PendingArgFlags.push_back(ArgFlags);
6796     if (!ArgFlags.isSplitEnd()) {
6797       return false;
6798     }
6799   }
6800 
6801   // If the split argument only had two elements, it should be passed directly
6802   // in registers or on the stack.
6803   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
6804       PendingLocs.size() <= 2) {
6805     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
6806     // Apply the normal calling convention rules to the first half of the
6807     // split argument.
6808     CCValAssign VA = PendingLocs[0];
6809     ISD::ArgFlagsTy AF = PendingArgFlags[0];
6810     PendingLocs.clear();
6811     PendingArgFlags.clear();
6812     return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
6813                                     ArgFlags);
6814   }
6815 
6816   // Allocate to a register if possible, or else a stack slot.
6817   Register Reg;
6818   unsigned StoreSizeBytes = GRLen / 8;
6819   Align StackAlign = Align(GRLen / 8);
6820 
6821   if (ValVT == MVT::f32 && !UseGPRForFloat)
6822     Reg = State.AllocateReg(ArgFPR32s);
6823   else if (ValVT == MVT::f64 && !UseGPRForFloat)
6824     Reg = State.AllocateReg(ArgFPR64s);
6825   else if (ValVT.is128BitVector())
6826     Reg = State.AllocateReg(ArgVRs);
6827   else if (ValVT.is256BitVector())
6828     Reg = State.AllocateReg(ArgXRs);
6829   else
6830     Reg = State.AllocateReg(ArgGPRs);
6831 
6832   unsigned StackOffset =
6833       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
6834 
6835   // If we reach this point and PendingLocs is non-empty, we must be at the
6836   // end of a split argument that must be passed indirectly.
6837   if (!PendingLocs.empty()) {
6838     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
6839     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
6840     for (auto &It : PendingLocs) {
6841       if (Reg)
6842         It.convertToReg(Reg);
6843       else
6844         It.convertToMem(StackOffset);
6845       State.addLoc(It);
6846     }
6847     PendingLocs.clear();
6848     PendingArgFlags.clear();
6849     return false;
6850   }
6851   assert((!UseGPRForFloat || LocVT == GRLenVT) &&
6852          "Expected an GRLenVT at this stage");
6853 
6854   if (Reg) {
6855     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6856     return false;
6857   }
6858 
6859   // When a floating-point value is passed on the stack, no bit-cast is needed.
6860   if (ValVT.isFloatingPoint()) {
6861     LocVT = ValVT;
6862     LocInfo = CCValAssign::Full;
6863   }
6864 
6865   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
6866   return false;
6867 }
6868 
analyzeInputArgs(MachineFunction & MF,CCState & CCInfo,const SmallVectorImpl<ISD::InputArg> & Ins,bool IsRet,LoongArchCCAssignFn Fn) const6869 void LoongArchTargetLowering::analyzeInputArgs(
6870     MachineFunction &MF, CCState &CCInfo,
6871     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
6872     LoongArchCCAssignFn Fn) const {
6873   FunctionType *FType = MF.getFunction().getFunctionType();
6874   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
6875     MVT ArgVT = Ins[i].VT;
6876     Type *ArgTy = nullptr;
6877     if (IsRet)
6878       ArgTy = FType->getReturnType();
6879     else if (Ins[i].isOrigArg())
6880       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
6881     LoongArchABI::ABI ABI =
6882         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
6883     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
6884            CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
6885       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
6886                         << '\n');
6887       llvm_unreachable("");
6888     }
6889   }
6890 }
6891 
analyzeOutputArgs(MachineFunction & MF,CCState & CCInfo,const SmallVectorImpl<ISD::OutputArg> & Outs,bool IsRet,CallLoweringInfo * CLI,LoongArchCCAssignFn Fn) const6892 void LoongArchTargetLowering::analyzeOutputArgs(
6893     MachineFunction &MF, CCState &CCInfo,
6894     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
6895     CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
6896   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
6897     MVT ArgVT = Outs[i].VT;
6898     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
6899     LoongArchABI::ABI ABI =
6900         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
6901     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
6902            CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
6903       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
6904                         << "\n");
6905       llvm_unreachable("");
6906     }
6907   }
6908 }
6909 
6910 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
6911 // values.
convertLocVTToValVT(SelectionDAG & DAG,SDValue Val,const CCValAssign & VA,const SDLoc & DL)6912 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
6913                                    const CCValAssign &VA, const SDLoc &DL) {
6914   switch (VA.getLocInfo()) {
6915   default:
6916     llvm_unreachable("Unexpected CCValAssign::LocInfo");
6917   case CCValAssign::Full:
6918   case CCValAssign::Indirect:
6919     break;
6920   case CCValAssign::BCvt:
6921     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
6922       Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
6923     else
6924       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
6925     break;
6926   }
6927   return Val;
6928 }
6929 
unpackFromRegLoc(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const SDLoc & DL,const ISD::InputArg & In,const LoongArchTargetLowering & TLI)6930 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
6931                                 const CCValAssign &VA, const SDLoc &DL,
6932                                 const ISD::InputArg &In,
6933                                 const LoongArchTargetLowering &TLI) {
6934   MachineFunction &MF = DAG.getMachineFunction();
6935   MachineRegisterInfo &RegInfo = MF.getRegInfo();
6936   EVT LocVT = VA.getLocVT();
6937   SDValue Val;
6938   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
6939   Register VReg = RegInfo.createVirtualRegister(RC);
6940   RegInfo.addLiveIn(VA.getLocReg(), VReg);
6941   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
6942 
6943   // If input is sign extended from 32 bits, note it for the OptW pass.
6944   if (In.isOrigArg()) {
6945     Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
6946     if (OrigArg->getType()->isIntegerTy()) {
6947       unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
6948       // An input zero extended from i31 can also be considered sign extended.
6949       if ((BitWidth <= 32 && In.Flags.isSExt()) ||
6950           (BitWidth < 32 && In.Flags.isZExt())) {
6951         LoongArchMachineFunctionInfo *LAFI =
6952             MF.getInfo<LoongArchMachineFunctionInfo>();
6953         LAFI->addSExt32Register(VReg);
6954       }
6955     }
6956   }
6957 
6958   return convertLocVTToValVT(DAG, Val, VA, DL);
6959 }
6960 
6961 // The caller is responsible for loading the full value if the argument is
6962 // passed with CCValAssign::Indirect.
unpackFromMemLoc(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const SDLoc & DL)6963 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
6964                                 const CCValAssign &VA, const SDLoc &DL) {
6965   MachineFunction &MF = DAG.getMachineFunction();
6966   MachineFrameInfo &MFI = MF.getFrameInfo();
6967   EVT ValVT = VA.getValVT();
6968   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
6969                                  /*IsImmutable=*/true);
6970   SDValue FIN = DAG.getFrameIndex(
6971       FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
6972 
6973   ISD::LoadExtType ExtType;
6974   switch (VA.getLocInfo()) {
6975   default:
6976     llvm_unreachable("Unexpected CCValAssign::LocInfo");
6977   case CCValAssign::Full:
6978   case CCValAssign::Indirect:
6979   case CCValAssign::BCvt:
6980     ExtType = ISD::NON_EXTLOAD;
6981     break;
6982   }
6983   return DAG.getExtLoad(
6984       ExtType, DL, VA.getLocVT(), Chain, FIN,
6985       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
6986 }
6987 
unpackF64OnLA32DSoftABI(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const CCValAssign & HiVA,const SDLoc & DL)6988 static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain,
6989                                        const CCValAssign &VA,
6990                                        const CCValAssign &HiVA,
6991                                        const SDLoc &DL) {
6992   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
6993          "Unexpected VA");
6994   MachineFunction &MF = DAG.getMachineFunction();
6995   MachineFrameInfo &MFI = MF.getFrameInfo();
6996   MachineRegisterInfo &RegInfo = MF.getRegInfo();
6997 
6998   assert(VA.isRegLoc() && "Expected register VA assignment");
6999 
7000   Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7001   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7002   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7003   SDValue Hi;
7004   if (HiVA.isMemLoc()) {
7005     // Second half of f64 is passed on the stack.
7006     int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7007                                    /*IsImmutable=*/true);
7008     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7009     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7010                      MachinePointerInfo::getFixedStack(MF, FI));
7011   } else {
7012     // Second half of f64 is passed in another GPR.
7013     Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7014     RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7015     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7016   }
7017   return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7018 }
7019 
convertValVTToLocVT(SelectionDAG & DAG,SDValue Val,const CCValAssign & VA,const SDLoc & DL)7020 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
7021                                    const CCValAssign &VA, const SDLoc &DL) {
7022   EVT LocVT = VA.getLocVT();
7023 
7024   switch (VA.getLocInfo()) {
7025   default:
7026     llvm_unreachable("Unexpected CCValAssign::LocInfo");
7027   case CCValAssign::Full:
7028     break;
7029   case CCValAssign::BCvt:
7030     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7031       Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7032     else
7033       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7034     break;
7035   }
7036   return Val;
7037 }
7038 
CC_LoongArch_GHC(unsigned ValNo,MVT ValVT,MVT LocVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State)7039 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7040                              CCValAssign::LocInfo LocInfo,
7041                              ISD::ArgFlagsTy ArgFlags, CCState &State) {
7042   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7043     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7044     //                        s0    s1  s2  s3  s4  s5  s6  s7  s8
7045     static const MCPhysReg GPRList[] = {
7046         LoongArch::R23, LoongArch::R24, LoongArch::R25,
7047         LoongArch::R26, LoongArch::R27, LoongArch::R28,
7048         LoongArch::R29, LoongArch::R30, LoongArch::R31};
7049     if (MCRegister Reg = State.AllocateReg(GPRList)) {
7050       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7051       return false;
7052     }
7053   }
7054 
7055   if (LocVT == MVT::f32) {
7056     // Pass in STG registers: F1, F2, F3, F4
7057     //                        fs0,fs1,fs2,fs3
7058     static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7059                                           LoongArch::F26, LoongArch::F27};
7060     if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7061       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7062       return false;
7063     }
7064   }
7065 
7066   if (LocVT == MVT::f64) {
7067     // Pass in STG registers: D1, D2, D3, D4
7068     //                        fs4,fs5,fs6,fs7
7069     static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7070                                           LoongArch::F30_64, LoongArch::F31_64};
7071     if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7072       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7073       return false;
7074     }
7075   }
7076 
7077   report_fatal_error("No registers left in GHC calling convention");
7078   return true;
7079 }
7080 
7081 // Transform physical registers into virtual registers.
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const7082 SDValue LoongArchTargetLowering::LowerFormalArguments(
7083     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7084     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7085     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7086 
7087   MachineFunction &MF = DAG.getMachineFunction();
7088 
7089   switch (CallConv) {
7090   default:
7091     llvm_unreachable("Unsupported calling convention");
7092   case CallingConv::C:
7093   case CallingConv::Fast:
7094     break;
7095   case CallingConv::GHC:
7096     if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
7097         !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
7098       report_fatal_error(
7099           "GHC calling convention requires the F and D extensions");
7100   }
7101 
7102   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7103   MVT GRLenVT = Subtarget.getGRLenVT();
7104   unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7105   // Used with varargs to acumulate store chains.
7106   std::vector<SDValue> OutChains;
7107 
7108   // Assign locations to all of the incoming arguments.
7109   SmallVector<CCValAssign> ArgLocs;
7110   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7111 
7112   if (CallConv == CallingConv::GHC)
7113     CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
7114   else
7115     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
7116 
7117   for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7118     CCValAssign &VA = ArgLocs[i];
7119     SDValue ArgValue;
7120     // Passing f64 on LA32D with a soft float ABI must be handled as a special
7121     // case.
7122     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7123       assert(VA.needsCustom());
7124       ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
7125     } else if (VA.isRegLoc())
7126       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
7127     else
7128       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7129     if (VA.getLocInfo() == CCValAssign::Indirect) {
7130       // If the original argument was split and passed by reference, we need to
7131       // load all parts of it here (using the same address).
7132       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7133                                    MachinePointerInfo()));
7134       unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
7135       unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
7136       assert(ArgPartOffset == 0);
7137       while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
7138         CCValAssign &PartVA = ArgLocs[i + 1];
7139         unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
7140         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7141         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
7142         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
7143                                      MachinePointerInfo()));
7144         ++i;
7145         ++InsIdx;
7146       }
7147       continue;
7148     }
7149     InVals.push_back(ArgValue);
7150   }
7151 
7152   if (IsVarArg) {
7153     ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
7154     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
7155     const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
7156     MachineFrameInfo &MFI = MF.getFrameInfo();
7157     MachineRegisterInfo &RegInfo = MF.getRegInfo();
7158     auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
7159 
7160     // Offset of the first variable argument from stack pointer, and size of
7161     // the vararg save area. For now, the varargs save area is either zero or
7162     // large enough to hold a0-a7.
7163     int VaArgOffset, VarArgsSaveSize;
7164 
7165     // If all registers are allocated, then all varargs must be passed on the
7166     // stack and we don't need to save any argregs.
7167     if (ArgRegs.size() == Idx) {
7168       VaArgOffset = CCInfo.getStackSize();
7169       VarArgsSaveSize = 0;
7170     } else {
7171       VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
7172       VaArgOffset = -VarArgsSaveSize;
7173     }
7174 
7175     // Record the frame index of the first variable argument
7176     // which is a value necessary to VASTART.
7177     int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7178     LoongArchFI->setVarArgsFrameIndex(FI);
7179 
7180     // If saving an odd number of registers then create an extra stack slot to
7181     // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
7182     // offsets to even-numbered registered remain 2*GRLen-aligned.
7183     if (Idx % 2) {
7184       MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
7185                             true);
7186       VarArgsSaveSize += GRLenInBytes;
7187     }
7188 
7189     // Copy the integer registers that may have been used for passing varargs
7190     // to the vararg save area.
7191     for (unsigned I = Idx; I < ArgRegs.size();
7192          ++I, VaArgOffset += GRLenInBytes) {
7193       const Register Reg = RegInfo.createVirtualRegister(RC);
7194       RegInfo.addLiveIn(ArgRegs[I], Reg);
7195       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
7196       FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7197       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7198       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
7199                                    MachinePointerInfo::getFixedStack(MF, FI));
7200       cast<StoreSDNode>(Store.getNode())
7201           ->getMemOperand()
7202           ->setValue((Value *)nullptr);
7203       OutChains.push_back(Store);
7204     }
7205     LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
7206   }
7207 
7208   // All stores are grouped in one node to allow the matching between
7209   // the size of Ins and InVals. This only happens for vararg functions.
7210   if (!OutChains.empty()) {
7211     OutChains.push_back(Chain);
7212     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
7213   }
7214 
7215   return Chain;
7216 }
7217 
mayBeEmittedAsTailCall(const CallInst * CI) const7218 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
7219   return CI->isTailCall();
7220 }
7221 
7222 // Check if the return value is used as only a return value, as otherwise
7223 // we can't perform a tail-call.
isUsedByReturnOnly(SDNode * N,SDValue & Chain) const7224 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
7225                                                  SDValue &Chain) const {
7226   if (N->getNumValues() != 1)
7227     return false;
7228   if (!N->hasNUsesOfValue(1, 0))
7229     return false;
7230 
7231   SDNode *Copy = *N->user_begin();
7232   if (Copy->getOpcode() != ISD::CopyToReg)
7233     return false;
7234 
7235   // If the ISD::CopyToReg has a glue operand, we conservatively assume it
7236   // isn't safe to perform a tail call.
7237   if (Copy->getGluedNode())
7238     return false;
7239 
7240   // The copy must be used by a LoongArchISD::RET, and nothing else.
7241   bool HasRet = false;
7242   for (SDNode *Node : Copy->users()) {
7243     if (Node->getOpcode() != LoongArchISD::RET)
7244       return false;
7245     HasRet = true;
7246   }
7247 
7248   if (!HasRet)
7249     return false;
7250 
7251   Chain = Copy->getOperand(0);
7252   return true;
7253 }
7254 
7255 // Check whether the call is eligible for tail call optimization.
isEligibleForTailCallOptimization(CCState & CCInfo,CallLoweringInfo & CLI,MachineFunction & MF,const SmallVectorImpl<CCValAssign> & ArgLocs) const7256 bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
7257     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7258     const SmallVectorImpl<CCValAssign> &ArgLocs) const {
7259 
7260   auto CalleeCC = CLI.CallConv;
7261   auto &Outs = CLI.Outs;
7262   auto &Caller = MF.getFunction();
7263   auto CallerCC = Caller.getCallingConv();
7264 
7265   // Do not tail call opt if the stack is used to pass parameters.
7266   if (CCInfo.getStackSize() != 0)
7267     return false;
7268 
7269   // Do not tail call opt if any parameters need to be passed indirectly.
7270   for (auto &VA : ArgLocs)
7271     if (VA.getLocInfo() == CCValAssign::Indirect)
7272       return false;
7273 
7274   // Do not tail call opt if either caller or callee uses struct return
7275   // semantics.
7276   auto IsCallerStructRet = Caller.hasStructRetAttr();
7277   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7278   if (IsCallerStructRet || IsCalleeStructRet)
7279     return false;
7280 
7281   // Do not tail call opt if either the callee or caller has a byval argument.
7282   for (auto &Arg : Outs)
7283     if (Arg.Flags.isByVal())
7284       return false;
7285 
7286   // The callee has to preserve all registers the caller needs to preserve.
7287   const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7288   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7289   if (CalleeCC != CallerCC) {
7290     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7291     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7292       return false;
7293   }
7294   return true;
7295 }
7296 
getPrefTypeAlign(EVT VT,SelectionDAG & DAG)7297 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
7298   return DAG.getDataLayout().getPrefTypeAlign(
7299       VT.getTypeForEVT(*DAG.getContext()));
7300 }
7301 
7302 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7303 // and output parameter nodes.
7304 SDValue
LowerCall(CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const7305 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
7306                                    SmallVectorImpl<SDValue> &InVals) const {
7307   SelectionDAG &DAG = CLI.DAG;
7308   SDLoc &DL = CLI.DL;
7309   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
7310   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7311   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
7312   SDValue Chain = CLI.Chain;
7313   SDValue Callee = CLI.Callee;
7314   CallingConv::ID CallConv = CLI.CallConv;
7315   bool IsVarArg = CLI.IsVarArg;
7316   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7317   MVT GRLenVT = Subtarget.getGRLenVT();
7318   bool &IsTailCall = CLI.IsTailCall;
7319 
7320   MachineFunction &MF = DAG.getMachineFunction();
7321 
7322   // Analyze the operands of the call, assigning locations to each operand.
7323   SmallVector<CCValAssign> ArgLocs;
7324   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7325 
7326   if (CallConv == CallingConv::GHC)
7327     ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
7328   else
7329     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
7330 
7331   // Check if it's really possible to do a tail call.
7332   if (IsTailCall)
7333     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
7334 
7335   if (IsTailCall)
7336     ++NumTailCalls;
7337   else if (CLI.CB && CLI.CB->isMustTailCall())
7338     report_fatal_error("failed to perform tail call elimination on a call "
7339                        "site marked musttail");
7340 
7341   // Get a count of how many bytes are to be pushed on the stack.
7342   unsigned NumBytes = ArgCCInfo.getStackSize();
7343 
7344   // Create local copies for byval args.
7345   SmallVector<SDValue> ByValArgs;
7346   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7347     ISD::ArgFlagsTy Flags = Outs[i].Flags;
7348     if (!Flags.isByVal())
7349       continue;
7350 
7351     SDValue Arg = OutVals[i];
7352     unsigned Size = Flags.getByValSize();
7353     Align Alignment = Flags.getNonZeroByValAlign();
7354 
7355     int FI =
7356         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
7357     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7358     SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
7359 
7360     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
7361                           /*IsVolatile=*/false,
7362                           /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
7363                           MachinePointerInfo(), MachinePointerInfo());
7364     ByValArgs.push_back(FIPtr);
7365   }
7366 
7367   if (!IsTailCall)
7368     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
7369 
7370   // Copy argument values to their designated locations.
7371   SmallVector<std::pair<Register, SDValue>> RegsToPass;
7372   SmallVector<SDValue> MemOpChains;
7373   SDValue StackPtr;
7374   for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
7375        ++i, ++OutIdx) {
7376     CCValAssign &VA = ArgLocs[i];
7377     SDValue ArgValue = OutVals[OutIdx];
7378     ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
7379 
7380     // Handle passing f64 on LA32D with a soft float ABI as a special case.
7381     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7382       assert(VA.isRegLoc() && "Expected register VA assignment");
7383       assert(VA.needsCustom());
7384       SDValue SplitF64 =
7385           DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
7386                       DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
7387       SDValue Lo = SplitF64.getValue(0);
7388       SDValue Hi = SplitF64.getValue(1);
7389 
7390       Register RegLo = VA.getLocReg();
7391       RegsToPass.push_back(std::make_pair(RegLo, Lo));
7392 
7393       // Get the CCValAssign for the Hi part.
7394       CCValAssign &HiVA = ArgLocs[++i];
7395 
7396       if (HiVA.isMemLoc()) {
7397         // Second half of f64 is passed on the stack.
7398         if (!StackPtr.getNode())
7399           StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7400         SDValue Address =
7401             DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7402                         DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
7403         // Emit the store.
7404         MemOpChains.push_back(DAG.getStore(
7405             Chain, DL, Hi, Address,
7406             MachinePointerInfo::getStack(MF, HiVA.getLocMemOffset())));
7407       } else {
7408         // Second half of f64 is passed in another GPR.
7409         Register RegHigh = HiVA.getLocReg();
7410         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
7411       }
7412       continue;
7413     }
7414 
7415     // Promote the value if needed.
7416     // For now, only handle fully promoted and indirect arguments.
7417     if (VA.getLocInfo() == CCValAssign::Indirect) {
7418       // Store the argument in a stack slot and pass its address.
7419       Align StackAlign =
7420           std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
7421                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
7422       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
7423       // If the original argument was split and passed by reference, we need to
7424       // store the required parts of it here (and pass just one address).
7425       unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
7426       unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
7427       assert(ArgPartOffset == 0);
7428       // Calculate the total size to store. We don't have access to what we're
7429       // actually storing other than performing the loop and collecting the
7430       // info.
7431       SmallVector<std::pair<SDValue, SDValue>> Parts;
7432       while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
7433         SDValue PartValue = OutVals[OutIdx + 1];
7434         unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
7435         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7436         EVT PartVT = PartValue.getValueType();
7437 
7438         StoredSize += PartVT.getStoreSize();
7439         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
7440         Parts.push_back(std::make_pair(PartValue, Offset));
7441         ++i;
7442         ++OutIdx;
7443       }
7444       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
7445       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
7446       MemOpChains.push_back(
7447           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
7448                        MachinePointerInfo::getFixedStack(MF, FI)));
7449       for (const auto &Part : Parts) {
7450         SDValue PartValue = Part.first;
7451         SDValue PartOffset = Part.second;
7452         SDValue Address =
7453             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
7454         MemOpChains.push_back(
7455             DAG.getStore(Chain, DL, PartValue, Address,
7456                          MachinePointerInfo::getFixedStack(MF, FI)));
7457       }
7458       ArgValue = SpillSlot;
7459     } else {
7460       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
7461     }
7462 
7463     // Use local copy if it is a byval arg.
7464     if (Flags.isByVal())
7465       ArgValue = ByValArgs[j++];
7466 
7467     if (VA.isRegLoc()) {
7468       // Queue up the argument copies and emit them at the end.
7469       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
7470     } else {
7471       assert(VA.isMemLoc() && "Argument not register or memory");
7472       assert(!IsTailCall && "Tail call not allowed if stack is used "
7473                             "for passing parameters");
7474 
7475       // Work out the address of the stack slot.
7476       if (!StackPtr.getNode())
7477         StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7478       SDValue Address =
7479           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7480                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
7481 
7482       // Emit the store.
7483       MemOpChains.push_back(
7484           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
7485     }
7486   }
7487 
7488   // Join the stores, which are independent of one another.
7489   if (!MemOpChains.empty())
7490     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
7491 
7492   SDValue Glue;
7493 
7494   // Build a sequence of copy-to-reg nodes, chained and glued together.
7495   for (auto &Reg : RegsToPass) {
7496     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
7497     Glue = Chain.getValue(1);
7498   }
7499 
7500   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
7501   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
7502   // split it and then direct call can be matched by PseudoCALL.
7503   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
7504     const GlobalValue *GV = S->getGlobal();
7505     unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
7506                            ? LoongArchII::MO_CALL
7507                            : LoongArchII::MO_CALL_PLT;
7508     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
7509   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
7510     unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
7511                            ? LoongArchII::MO_CALL
7512                            : LoongArchII::MO_CALL_PLT;
7513     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
7514   }
7515 
7516   // The first call operand is the chain and the second is the target address.
7517   SmallVector<SDValue> Ops;
7518   Ops.push_back(Chain);
7519   Ops.push_back(Callee);
7520 
7521   // Add argument registers to the end of the list so that they are
7522   // known live into the call.
7523   for (auto &Reg : RegsToPass)
7524     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
7525 
7526   if (!IsTailCall) {
7527     // Add a register mask operand representing the call-preserved registers.
7528     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
7529     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
7530     assert(Mask && "Missing call preserved mask for calling convention");
7531     Ops.push_back(DAG.getRegisterMask(Mask));
7532   }
7533 
7534   // Glue the call to the argument copies, if any.
7535   if (Glue.getNode())
7536     Ops.push_back(Glue);
7537 
7538   // Emit the call.
7539   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7540   unsigned Op;
7541   switch (DAG.getTarget().getCodeModel()) {
7542   default:
7543     report_fatal_error("Unsupported code model");
7544   case CodeModel::Small:
7545     Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
7546     break;
7547   case CodeModel::Medium:
7548     assert(Subtarget.is64Bit() && "Medium code model requires LA64");
7549     Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
7550     break;
7551   case CodeModel::Large:
7552     assert(Subtarget.is64Bit() && "Large code model requires LA64");
7553     Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
7554     break;
7555   }
7556 
7557   if (IsTailCall) {
7558     MF.getFrameInfo().setHasTailCall();
7559     SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
7560     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
7561     return Ret;
7562   }
7563 
7564   Chain = DAG.getNode(Op, DL, NodeTys, Ops);
7565   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
7566   Glue = Chain.getValue(1);
7567 
7568   // Mark the end of the call, which is glued to the call itself.
7569   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
7570   Glue = Chain.getValue(1);
7571 
7572   // Assign locations to each value returned by this call.
7573   SmallVector<CCValAssign> RVLocs;
7574   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
7575   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
7576 
7577   // Copy all of the result registers out of their specified physreg.
7578   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
7579     auto &VA = RVLocs[i];
7580     // Copy the value out.
7581     SDValue RetValue =
7582         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
7583     // Glue the RetValue to the end of the call sequence.
7584     Chain = RetValue.getValue(1);
7585     Glue = RetValue.getValue(2);
7586 
7587     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7588       assert(VA.needsCustom());
7589       SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
7590                                              MVT::i32, Glue);
7591       Chain = RetValue2.getValue(1);
7592       Glue = RetValue2.getValue(2);
7593       RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
7594                              RetValue, RetValue2);
7595     } else
7596       RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
7597 
7598     InVals.push_back(RetValue);
7599   }
7600 
7601   return Chain;
7602 }
7603 
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context,const Type * RetTy) const7604 bool LoongArchTargetLowering::CanLowerReturn(
7605     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
7606     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
7607     const Type *RetTy) const {
7608   SmallVector<CCValAssign> RVLocs;
7609   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
7610 
7611   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7612     LoongArchABI::ABI ABI =
7613         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7614     if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
7615                      Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
7616                      nullptr))
7617       return false;
7618   }
7619   return true;
7620 }
7621 
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & DL,SelectionDAG & DAG) const7622 SDValue LoongArchTargetLowering::LowerReturn(
7623     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7624     const SmallVectorImpl<ISD::OutputArg> &Outs,
7625     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
7626     SelectionDAG &DAG) const {
7627   // Stores the assignment of the return value to a location.
7628   SmallVector<CCValAssign> RVLocs;
7629 
7630   // Info about the registers and stack slot.
7631   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
7632                  *DAG.getContext());
7633 
7634   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
7635                     nullptr, CC_LoongArch);
7636   if (CallConv == CallingConv::GHC && !RVLocs.empty())
7637     report_fatal_error("GHC functions return void only");
7638   SDValue Glue;
7639   SmallVector<SDValue, 4> RetOps(1, Chain);
7640 
7641   // Copy the result values into the output registers.
7642   for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
7643     SDValue Val = OutVals[OutIdx];
7644     CCValAssign &VA = RVLocs[i];
7645     assert(VA.isRegLoc() && "Can only return in registers!");
7646 
7647     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7648       // Handle returning f64 on LA32D with a soft float ABI.
7649       assert(VA.isRegLoc() && "Expected return via registers");
7650       assert(VA.needsCustom());
7651       SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
7652                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
7653       SDValue Lo = SplitF64.getValue(0);
7654       SDValue Hi = SplitF64.getValue(1);
7655       Register RegLo = VA.getLocReg();
7656       Register RegHi = RVLocs[++i].getLocReg();
7657 
7658       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
7659       Glue = Chain.getValue(1);
7660       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
7661       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
7662       Glue = Chain.getValue(1);
7663       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
7664     } else {
7665       // Handle a 'normal' return.
7666       Val = convertValVTToLocVT(DAG, Val, VA, DL);
7667       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
7668 
7669       // Guarantee that all emitted copies are stuck together.
7670       Glue = Chain.getValue(1);
7671       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7672     }
7673   }
7674 
7675   RetOps[0] = Chain; // Update chain.
7676 
7677   // Add the glue node if we have it.
7678   if (Glue.getNode())
7679     RetOps.push_back(Glue);
7680 
7681   return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
7682 }
7683 
isFPImmVLDILegal(const APFloat & Imm,EVT VT) const7684 bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
7685                                                EVT VT) const {
7686   if (!Subtarget.hasExtLSX())
7687     return false;
7688 
7689   if (VT == MVT::f32) {
7690     uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
7691     return (masked == 0x3e000000 || masked == 0x40000000);
7692   }
7693 
7694   if (VT == MVT::f64) {
7695     uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
7696     return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
7697   }
7698 
7699   return false;
7700 }
7701 
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const7702 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
7703                                            bool ForCodeSize) const {
7704   // TODO: Maybe need more checks here after vector extension is supported.
7705   if (VT == MVT::f32 && !Subtarget.hasBasicF())
7706     return false;
7707   if (VT == MVT::f64 && !Subtarget.hasBasicD())
7708     return false;
7709   return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
7710 }
7711 
isCheapToSpeculateCttz(Type *) const7712 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
7713   return true;
7714 }
7715 
isCheapToSpeculateCtlz(Type *) const7716 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
7717   return true;
7718 }
7719 
shouldInsertFencesForAtomic(const Instruction * I) const7720 bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
7721     const Instruction *I) const {
7722   if (!Subtarget.is64Bit())
7723     return isa<LoadInst>(I) || isa<StoreInst>(I);
7724 
7725   if (isa<LoadInst>(I))
7726     return true;
7727 
7728   // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
7729   // require fences beacuse we can use amswap_db.[w/d].
7730   Type *Ty = I->getOperand(0)->getType();
7731   if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
7732     unsigned Size = Ty->getIntegerBitWidth();
7733     return (Size == 8 || Size == 16);
7734   }
7735 
7736   return false;
7737 }
7738 
getSetCCResultType(const DataLayout & DL,LLVMContext & Context,EVT VT) const7739 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
7740                                                 LLVMContext &Context,
7741                                                 EVT VT) const {
7742   if (!VT.isVector())
7743     return getPointerTy(DL);
7744   return VT.changeVectorElementTypeToInteger();
7745 }
7746 
hasAndNot(SDValue Y) const7747 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
7748   // TODO: Support vectors.
7749   return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
7750 }
7751 
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,MachineFunction & MF,unsigned Intrinsic) const7752 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
7753                                                  const CallInst &I,
7754                                                  MachineFunction &MF,
7755                                                  unsigned Intrinsic) const {
7756   switch (Intrinsic) {
7757   default:
7758     return false;
7759   case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
7760   case Intrinsic::loongarch_masked_atomicrmw_add_i32:
7761   case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
7762   case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
7763     Info.opc = ISD::INTRINSIC_W_CHAIN;
7764     Info.memVT = MVT::i32;
7765     Info.ptrVal = I.getArgOperand(0);
7766     Info.offset = 0;
7767     Info.align = Align(4);
7768     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
7769                  MachineMemOperand::MOVolatile;
7770     return true;
7771     // TODO: Add more Intrinsics later.
7772   }
7773 }
7774 
7775 // When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
7776 // atomicrmw and/or/xor operations with operands less than 32 bits cannot be
7777 // expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
7778 // regression, we need to implement it manually.
emitExpandAtomicRMW(AtomicRMWInst * AI) const7779 void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
7780   AtomicRMWInst::BinOp Op = AI->getOperation();
7781 
7782   assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
7783           Op == AtomicRMWInst::And) &&
7784          "Unable to expand");
7785   unsigned MinWordSize = 4;
7786 
7787   IRBuilder<> Builder(AI);
7788   LLVMContext &Ctx = Builder.getContext();
7789   const DataLayout &DL = AI->getDataLayout();
7790   Type *ValueType = AI->getType();
7791   Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
7792 
7793   Value *Addr = AI->getPointerOperand();
7794   PointerType *PtrTy = cast<PointerType>(Addr->getType());
7795   IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
7796 
7797   Value *AlignedAddr = Builder.CreateIntrinsic(
7798       Intrinsic::ptrmask, {PtrTy, IntTy},
7799       {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
7800       "AlignedAddr");
7801 
7802   Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
7803   Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
7804   Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
7805   ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
7806   Value *Mask = Builder.CreateShl(
7807       ConstantInt::get(WordType,
7808                        (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
7809       ShiftAmt, "Mask");
7810   Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
7811   Value *ValOperand_Shifted =
7812       Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
7813                         ShiftAmt, "ValOperand_Shifted");
7814   Value *NewOperand;
7815   if (Op == AtomicRMWInst::And)
7816     NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
7817   else
7818     NewOperand = ValOperand_Shifted;
7819 
7820   AtomicRMWInst *NewAI =
7821       Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
7822                               AI->getOrdering(), AI->getSyncScopeID());
7823 
7824   Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
7825   Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
7826   Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
7827   AI->replaceAllUsesWith(FinalOldResult);
7828   AI->eraseFromParent();
7829 }
7830 
7831 TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * AI) const7832 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
7833   // TODO: Add more AtomicRMWInst that needs to be extended.
7834 
7835   // Since floating-point operation requires a non-trivial set of data
7836   // operations, use CmpXChg to expand.
7837   if (AI->isFloatingPointOperation() ||
7838       AI->getOperation() == AtomicRMWInst::UIncWrap ||
7839       AI->getOperation() == AtomicRMWInst::UDecWrap ||
7840       AI->getOperation() == AtomicRMWInst::USubCond ||
7841       AI->getOperation() == AtomicRMWInst::USubSat)
7842     return AtomicExpansionKind::CmpXChg;
7843 
7844   if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
7845       (AI->getOperation() == AtomicRMWInst::Xchg ||
7846        AI->getOperation() == AtomicRMWInst::Add ||
7847        AI->getOperation() == AtomicRMWInst::Sub)) {
7848     return AtomicExpansionKind::None;
7849   }
7850 
7851   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
7852   if (Subtarget.hasLAMCAS()) {
7853     if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
7854                       AI->getOperation() == AtomicRMWInst::Or ||
7855                       AI->getOperation() == AtomicRMWInst::Xor))
7856       return AtomicExpansionKind::Expand;
7857     if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
7858       return AtomicExpansionKind::CmpXChg;
7859   }
7860 
7861   if (Size == 8 || Size == 16)
7862     return AtomicExpansionKind::MaskedIntrinsic;
7863   return AtomicExpansionKind::None;
7864 }
7865 
7866 static Intrinsic::ID
getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,AtomicRMWInst::BinOp BinOp)7867 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
7868                                     AtomicRMWInst::BinOp BinOp) {
7869   if (GRLen == 64) {
7870     switch (BinOp) {
7871     default:
7872       llvm_unreachable("Unexpected AtomicRMW BinOp");
7873     case AtomicRMWInst::Xchg:
7874       return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
7875     case AtomicRMWInst::Add:
7876       return Intrinsic::loongarch_masked_atomicrmw_add_i64;
7877     case AtomicRMWInst::Sub:
7878       return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
7879     case AtomicRMWInst::Nand:
7880       return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
7881     case AtomicRMWInst::UMax:
7882       return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
7883     case AtomicRMWInst::UMin:
7884       return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
7885     case AtomicRMWInst::Max:
7886       return Intrinsic::loongarch_masked_atomicrmw_max_i64;
7887     case AtomicRMWInst::Min:
7888       return Intrinsic::loongarch_masked_atomicrmw_min_i64;
7889       // TODO: support other AtomicRMWInst.
7890     }
7891   }
7892 
7893   if (GRLen == 32) {
7894     switch (BinOp) {
7895     default:
7896       llvm_unreachable("Unexpected AtomicRMW BinOp");
7897     case AtomicRMWInst::Xchg:
7898       return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
7899     case AtomicRMWInst::Add:
7900       return Intrinsic::loongarch_masked_atomicrmw_add_i32;
7901     case AtomicRMWInst::Sub:
7902       return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
7903     case AtomicRMWInst::Nand:
7904       return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
7905     case AtomicRMWInst::UMax:
7906       return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
7907     case AtomicRMWInst::UMin:
7908       return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
7909     case AtomicRMWInst::Max:
7910       return Intrinsic::loongarch_masked_atomicrmw_max_i32;
7911     case AtomicRMWInst::Min:
7912       return Intrinsic::loongarch_masked_atomicrmw_min_i32;
7913       // TODO: support other AtomicRMWInst.
7914     }
7915   }
7916 
7917   llvm_unreachable("Unexpected GRLen\n");
7918 }
7919 
7920 TargetLowering::AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst * CI) const7921 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
7922     AtomicCmpXchgInst *CI) const {
7923 
7924   if (Subtarget.hasLAMCAS())
7925     return AtomicExpansionKind::None;
7926 
7927   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
7928   if (Size == 8 || Size == 16)
7929     return AtomicExpansionKind::MaskedIntrinsic;
7930   return AtomicExpansionKind::None;
7931 }
7932 
emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase & Builder,AtomicCmpXchgInst * CI,Value * AlignedAddr,Value * CmpVal,Value * NewVal,Value * Mask,AtomicOrdering Ord) const7933 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
7934     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
7935     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
7936   unsigned GRLen = Subtarget.getGRLen();
7937   AtomicOrdering FailOrd = CI->getFailureOrdering();
7938   Value *FailureOrdering =
7939       Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
7940   Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
7941   if (GRLen == 64) {
7942     CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
7943     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
7944     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
7945     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
7946   }
7947   Type *Tys[] = {AlignedAddr->getType()};
7948   Value *Result = Builder.CreateIntrinsic(
7949       CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
7950   if (GRLen == 64)
7951     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
7952   return Result;
7953 }
7954 
emitMaskedAtomicRMWIntrinsic(IRBuilderBase & Builder,AtomicRMWInst * AI,Value * AlignedAddr,Value * Incr,Value * Mask,Value * ShiftAmt,AtomicOrdering Ord) const7955 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
7956     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
7957     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
7958   // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
7959   // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
7960   // mask, as this produces better code than the LL/SC loop emitted by
7961   // int_loongarch_masked_atomicrmw_xchg.
7962   if (AI->getOperation() == AtomicRMWInst::Xchg &&
7963       isa<ConstantInt>(AI->getValOperand())) {
7964     ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
7965     if (CVal->isZero())
7966       return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
7967                                      Builder.CreateNot(Mask, "Inv_Mask"),
7968                                      AI->getAlign(), Ord);
7969     if (CVal->isMinusOne())
7970       return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
7971                                      AI->getAlign(), Ord);
7972   }
7973 
7974   unsigned GRLen = Subtarget.getGRLen();
7975   Value *Ordering =
7976       Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
7977   Type *Tys[] = {AlignedAddr->getType()};
7978   Function *LlwOpScwLoop = Intrinsic::getOrInsertDeclaration(
7979       AI->getModule(),
7980       getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
7981 
7982   if (GRLen == 64) {
7983     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
7984     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
7985     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
7986   }
7987 
7988   Value *Result;
7989 
7990   // Must pass the shift amount needed to sign extend the loaded value prior
7991   // to performing a signed comparison for min/max. ShiftAmt is the number of
7992   // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
7993   // is the number of bits to left+right shift the value in order to
7994   // sign-extend.
7995   if (AI->getOperation() == AtomicRMWInst::Min ||
7996       AI->getOperation() == AtomicRMWInst::Max) {
7997     const DataLayout &DL = AI->getDataLayout();
7998     unsigned ValWidth =
7999         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8000     Value *SextShamt =
8001         Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
8002     Result = Builder.CreateCall(LlwOpScwLoop,
8003                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8004   } else {
8005     Result =
8006         Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8007   }
8008 
8009   if (GRLen == 64)
8010     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8011   return Result;
8012 }
8013 
isFMAFasterThanFMulAndFAdd(const MachineFunction & MF,EVT VT) const8014 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
8015     const MachineFunction &MF, EVT VT) const {
8016   VT = VT.getScalarType();
8017 
8018   if (!VT.isSimple())
8019     return false;
8020 
8021   switch (VT.getSimpleVT().SimpleTy) {
8022   case MVT::f32:
8023   case MVT::f64:
8024     return true;
8025   default:
8026     break;
8027   }
8028 
8029   return false;
8030 }
8031 
getExceptionPointerRegister(const Constant * PersonalityFn) const8032 Register LoongArchTargetLowering::getExceptionPointerRegister(
8033     const Constant *PersonalityFn) const {
8034   return LoongArch::R4;
8035 }
8036 
getExceptionSelectorRegister(const Constant * PersonalityFn) const8037 Register LoongArchTargetLowering::getExceptionSelectorRegister(
8038     const Constant *PersonalityFn) const {
8039   return LoongArch::R5;
8040 }
8041 
8042 //===----------------------------------------------------------------------===//
8043 // Target Optimization Hooks
8044 //===----------------------------------------------------------------------===//
8045 
getEstimateRefinementSteps(EVT VT,const LoongArchSubtarget & Subtarget)8046 static int getEstimateRefinementSteps(EVT VT,
8047                                       const LoongArchSubtarget &Subtarget) {
8048   // Feature FRECIPE instrucions relative accuracy is 2^-14.
8049   // IEEE float has 23 digits and double has 52 digits.
8050   int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
8051   return RefinementSteps;
8052 }
8053 
getSqrtEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & RefinementSteps,bool & UseOneConstNR,bool Reciprocal) const8054 SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
8055                                                  SelectionDAG &DAG, int Enabled,
8056                                                  int &RefinementSteps,
8057                                                  bool &UseOneConstNR,
8058                                                  bool Reciprocal) const {
8059   if (Subtarget.hasFrecipe()) {
8060     SDLoc DL(Operand);
8061     EVT VT = Operand.getValueType();
8062 
8063     if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8064         (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8065         (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8066         (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8067         (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8068 
8069       if (RefinementSteps == ReciprocalEstimate::Unspecified)
8070         RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8071 
8072       SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
8073       if (Reciprocal)
8074         Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
8075 
8076       return Estimate;
8077     }
8078   }
8079 
8080   return SDValue();
8081 }
8082 
getRecipEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & RefinementSteps) const8083 SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
8084                                                   SelectionDAG &DAG,
8085                                                   int Enabled,
8086                                                   int &RefinementSteps) const {
8087   if (Subtarget.hasFrecipe()) {
8088     SDLoc DL(Operand);
8089     EVT VT = Operand.getValueType();
8090 
8091     if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8092         (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8093         (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8094         (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8095         (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8096 
8097       if (RefinementSteps == ReciprocalEstimate::Unspecified)
8098         RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8099 
8100       return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
8101     }
8102   }
8103 
8104   return SDValue();
8105 }
8106 
8107 //===----------------------------------------------------------------------===//
8108 //                           LoongArch Inline Assembly Support
8109 //===----------------------------------------------------------------------===//
8110 
8111 LoongArchTargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const8112 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
8113   // LoongArch specific constraints in GCC: config/loongarch/constraints.md
8114   //
8115   // 'f':  A floating-point register (if available).
8116   // 'k':  A memory operand whose address is formed by a base register and
8117   //       (optionally scaled) index register.
8118   // 'l':  A signed 16-bit constant.
8119   // 'm':  A memory operand whose address is formed by a base register and
8120   //       offset that is suitable for use in instructions with the same
8121   //       addressing mode as st.w and ld.w.
8122   // 'q':  A general-purpose register except for $r0 and $r1 (for the csrxchg
8123   //       instruction)
8124   // 'I':  A signed 12-bit constant (for arithmetic instructions).
8125   // 'J':  Integer zero.
8126   // 'K':  An unsigned 12-bit constant (for logic instructions).
8127   // "ZB": An address that is held in a general-purpose register. The offset is
8128   //       zero.
8129   // "ZC": A memory operand whose address is formed by a base register and
8130   //       offset that is suitable for use in instructions with the same
8131   //       addressing mode as ll.w and sc.w.
8132   if (Constraint.size() == 1) {
8133     switch (Constraint[0]) {
8134     default:
8135       break;
8136     case 'f':
8137     case 'q':
8138       return C_RegisterClass;
8139     case 'l':
8140     case 'I':
8141     case 'J':
8142     case 'K':
8143       return C_Immediate;
8144     case 'k':
8145       return C_Memory;
8146     }
8147   }
8148 
8149   if (Constraint == "ZC" || Constraint == "ZB")
8150     return C_Memory;
8151 
8152   // 'm' is handled here.
8153   return TargetLowering::getConstraintType(Constraint);
8154 }
8155 
getInlineAsmMemConstraint(StringRef ConstraintCode) const8156 InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
8157     StringRef ConstraintCode) const {
8158   return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
8159       .Case("k", InlineAsm::ConstraintCode::k)
8160       .Case("ZB", InlineAsm::ConstraintCode::ZB)
8161       .Case("ZC", InlineAsm::ConstraintCode::ZC)
8162       .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
8163 }
8164 
8165 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const8166 LoongArchTargetLowering::getRegForInlineAsmConstraint(
8167     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8168   // First, see if this is a constraint that directly corresponds to a LoongArch
8169   // register class.
8170   if (Constraint.size() == 1) {
8171     switch (Constraint[0]) {
8172     case 'r':
8173       // TODO: Support fixed vectors up to GRLen?
8174       if (VT.isVector())
8175         break;
8176       return std::make_pair(0U, &LoongArch::GPRRegClass);
8177     case 'q':
8178       return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
8179     case 'f':
8180       if (Subtarget.hasBasicF() && VT == MVT::f32)
8181         return std::make_pair(0U, &LoongArch::FPR32RegClass);
8182       if (Subtarget.hasBasicD() && VT == MVT::f64)
8183         return std::make_pair(0U, &LoongArch::FPR64RegClass);
8184       if (Subtarget.hasExtLSX() &&
8185           TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
8186         return std::make_pair(0U, &LoongArch::LSX128RegClass);
8187       if (Subtarget.hasExtLASX() &&
8188           TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
8189         return std::make_pair(0U, &LoongArch::LASX256RegClass);
8190       break;
8191     default:
8192       break;
8193     }
8194   }
8195 
8196   // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
8197   // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
8198   // constraints while the official register name is prefixed with a '$'. So we
8199   // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
8200   // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
8201   // case insensitive, so no need to convert the constraint to upper case here.
8202   //
8203   // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
8204   // decode the usage of register name aliases into their official names. And
8205   // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
8206   // official register names.
8207   if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
8208       Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
8209     bool IsFP = Constraint[2] == 'f';
8210     std::pair<StringRef, StringRef> Temp = Constraint.split('$');
8211     std::pair<unsigned, const TargetRegisterClass *> R;
8212     R = TargetLowering::getRegForInlineAsmConstraint(
8213         TRI, join_items("", Temp.first, Temp.second), VT);
8214     // Match those names to the widest floating point register type available.
8215     if (IsFP) {
8216       unsigned RegNo = R.first;
8217       if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
8218         if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
8219           unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
8220           return std::make_pair(DReg, &LoongArch::FPR64RegClass);
8221         }
8222       }
8223     }
8224     return R;
8225   }
8226 
8227   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8228 }
8229 
LowerAsmOperandForConstraint(SDValue Op,StringRef Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const8230 void LoongArchTargetLowering::LowerAsmOperandForConstraint(
8231     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
8232     SelectionDAG &DAG) const {
8233   // Currently only support length 1 constraints.
8234   if (Constraint.size() == 1) {
8235     switch (Constraint[0]) {
8236     case 'l':
8237       // Validate & create a 16-bit signed immediate operand.
8238       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8239         uint64_t CVal = C->getSExtValue();
8240         if (isInt<16>(CVal))
8241           Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8242                                                     Subtarget.getGRLenVT()));
8243       }
8244       return;
8245     case 'I':
8246       // Validate & create a 12-bit signed immediate operand.
8247       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8248         uint64_t CVal = C->getSExtValue();
8249         if (isInt<12>(CVal))
8250           Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8251                                                     Subtarget.getGRLenVT()));
8252       }
8253       return;
8254     case 'J':
8255       // Validate & create an integer zero operand.
8256       if (auto *C = dyn_cast<ConstantSDNode>(Op))
8257         if (C->getZExtValue() == 0)
8258           Ops.push_back(
8259               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
8260       return;
8261     case 'K':
8262       // Validate & create a 12-bit unsigned immediate operand.
8263       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8264         uint64_t CVal = C->getZExtValue();
8265         if (isUInt<12>(CVal))
8266           Ops.push_back(
8267               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
8268       }
8269       return;
8270     default:
8271       break;
8272     }
8273   }
8274   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8275 }
8276 
8277 #define GET_REGISTER_MATCHER
8278 #include "LoongArchGenAsmMatcher.inc"
8279 
8280 Register
getRegisterByName(const char * RegName,LLT VT,const MachineFunction & MF) const8281 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
8282                                            const MachineFunction &MF) const {
8283   std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
8284   std::string NewRegName = Name.second.str();
8285   Register Reg = MatchRegisterAltName(NewRegName);
8286   if (!Reg)
8287     Reg = MatchRegisterName(NewRegName);
8288   if (!Reg)
8289     return Reg;
8290   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8291   if (!ReservedRegs.test(Reg))
8292     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
8293                              StringRef(RegName) + "\"."));
8294   return Reg;
8295 }
8296 
decomposeMulByConstant(LLVMContext & Context,EVT VT,SDValue C) const8297 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
8298                                                      EVT VT, SDValue C) const {
8299   // TODO: Support vectors.
8300   if (!VT.isScalarInteger())
8301     return false;
8302 
8303   // Omit the optimization if the data size exceeds GRLen.
8304   if (VT.getSizeInBits() > Subtarget.getGRLen())
8305     return false;
8306 
8307   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
8308     const APInt &Imm = ConstNode->getAPIntValue();
8309     // Break MUL into (SLLI + ADD/SUB) or ALSL.
8310     if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
8311         (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
8312       return true;
8313     // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
8314     if (ConstNode->hasOneUse() &&
8315         ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
8316          (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
8317       return true;
8318     // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
8319     // in which the immediate has two set bits. Or Break (MUL x, imm)
8320     // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
8321     // equals to (1 << s0) - (1 << s1).
8322     if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
8323       unsigned Shifts = Imm.countr_zero();
8324       // Reject immediates which can be composed via a single LUI.
8325       if (Shifts >= 12)
8326         return false;
8327       // Reject multiplications can be optimized to
8328       // (SLLI (ALSL x, x, 1/2/3/4), s).
8329       APInt ImmPop = Imm.ashr(Shifts);
8330       if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
8331         return false;
8332       // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
8333       // since it needs one more instruction than other 3 cases.
8334       APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
8335       if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
8336           (ImmSmall - Imm).isPowerOf2())
8337         return true;
8338     }
8339   }
8340 
8341   return false;
8342 }
8343 
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const8344 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
8345                                                     const AddrMode &AM,
8346                                                     Type *Ty, unsigned AS,
8347                                                     Instruction *I) const {
8348   // LoongArch has four basic addressing modes:
8349   //  1. reg
8350   //  2. reg + 12-bit signed offset
8351   //  3. reg + 14-bit signed offset left-shifted by 2
8352   //  4. reg1 + reg2
8353   // TODO: Add more checks after support vector extension.
8354 
8355   // No global is ever allowed as a base.
8356   if (AM.BaseGV)
8357     return false;
8358 
8359   // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
8360   // with `UAL` feature.
8361   if (!isInt<12>(AM.BaseOffs) &&
8362       !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
8363     return false;
8364 
8365   switch (AM.Scale) {
8366   case 0:
8367     // "r+i" or just "i", depending on HasBaseReg.
8368     break;
8369   case 1:
8370     // "r+r+i" is not allowed.
8371     if (AM.HasBaseReg && AM.BaseOffs)
8372       return false;
8373     // Otherwise we have "r+r" or "r+i".
8374     break;
8375   case 2:
8376     // "2*r+r" or "2*r+i" is not allowed.
8377     if (AM.HasBaseReg || AM.BaseOffs)
8378       return false;
8379     // Allow "2*r" as "r+r".
8380     break;
8381   default:
8382     return false;
8383   }
8384 
8385   return true;
8386 }
8387 
isLegalICmpImmediate(int64_t Imm) const8388 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
8389   return isInt<12>(Imm);
8390 }
8391 
isLegalAddImmediate(int64_t Imm) const8392 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
8393   return isInt<12>(Imm);
8394 }
8395 
isZExtFree(SDValue Val,EVT VT2) const8396 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
8397   // Zexts are free if they can be combined with a load.
8398   // Don't advertise i32->i64 zextload as being free for LA64. It interacts
8399   // poorly with type legalization of compares preferring sext.
8400   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
8401     EVT MemVT = LD->getMemoryVT();
8402     if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
8403         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
8404          LD->getExtensionType() == ISD::ZEXTLOAD))
8405       return true;
8406   }
8407 
8408   return TargetLowering::isZExtFree(Val, VT2);
8409 }
8410 
isSExtCheaperThanZExt(EVT SrcVT,EVT DstVT) const8411 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
8412                                                     EVT DstVT) const {
8413   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
8414 }
8415 
signExtendConstant(const ConstantInt * CI) const8416 bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {
8417   return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
8418 }
8419 
hasAndNotCompare(SDValue Y) const8420 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
8421   // TODO: Support vectors.
8422   if (Y.getValueType().isVector())
8423     return false;
8424 
8425   return !isa<ConstantSDNode>(Y);
8426 }
8427 
getExtendForAtomicCmpSwapArg() const8428 ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
8429   // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
8430   return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
8431 }
8432 
shouldSignExtendTypeInLibCall(Type * Ty,bool IsSigned) const8433 bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
8434     Type *Ty, bool IsSigned) const {
8435   if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
8436     return true;
8437 
8438   return IsSigned;
8439 }
8440 
shouldExtendTypeInLibCall(EVT Type) const8441 bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
8442   // Return false to suppress the unnecessary extensions if the LibCall
8443   // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
8444   if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
8445                                   Type.getSizeInBits() < Subtarget.getGRLen()))
8446     return false;
8447   return true;
8448 }
8449 
8450 // memcpy, and other memory intrinsics, typically tries to use wider load/store
8451 // if the source/dest is aligned and the copy size is large enough. We therefore
8452 // want to align such objects passed to memory intrinsics.
shouldAlignPointerArgs(CallInst * CI,unsigned & MinSize,Align & PrefAlign) const8453 bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
8454                                                      unsigned &MinSize,
8455                                                      Align &PrefAlign) const {
8456   if (!isa<MemIntrinsic>(CI))
8457     return false;
8458 
8459   if (Subtarget.is64Bit()) {
8460     MinSize = 8;
8461     PrefAlign = Align(8);
8462   } else {
8463     MinSize = 4;
8464     PrefAlign = Align(4);
8465   }
8466 
8467   return true;
8468 }
8469 
8470 TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const8471 LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const {
8472   if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
8473       VT.getVectorElementType() != MVT::i1)
8474     return TypeWidenVector;
8475 
8476   return TargetLoweringBase::getPreferredVectorAction(VT);
8477 }
8478 
splitValueIntoRegisterParts(SelectionDAG & DAG,const SDLoc & DL,SDValue Val,SDValue * Parts,unsigned NumParts,MVT PartVT,std::optional<CallingConv::ID> CC) const8479 bool LoongArchTargetLowering::splitValueIntoRegisterParts(
8480     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
8481     unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
8482   bool IsABIRegCopy = CC.has_value();
8483   EVT ValueVT = Val.getValueType();
8484 
8485   if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
8486       PartVT == MVT::f32) {
8487     // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
8488     // nan, and cast to f32.
8489     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
8490     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
8491     Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
8492                       DAG.getConstant(0xFFFF0000, DL, MVT::i32));
8493     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
8494     Parts[0] = Val;
8495     return true;
8496   }
8497 
8498   return false;
8499 }
8500 
joinRegisterPartsIntoValue(SelectionDAG & DAG,const SDLoc & DL,const SDValue * Parts,unsigned NumParts,MVT PartVT,EVT ValueVT,std::optional<CallingConv::ID> CC) const8501 SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
8502     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
8503     MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
8504   bool IsABIRegCopy = CC.has_value();
8505 
8506   if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
8507       PartVT == MVT::f32) {
8508     SDValue Val = Parts[0];
8509 
8510     // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
8511     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
8512     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
8513     Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
8514     return Val;
8515   }
8516 
8517   return SDValue();
8518 }
8519 
getRegisterTypeForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT) const8520 MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
8521                                                            CallingConv::ID CC,
8522                                                            EVT VT) const {
8523   // Use f32 to pass f16.
8524   if (VT == MVT::f16 && Subtarget.hasBasicF())
8525     return MVT::f32;
8526 
8527   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
8528 }
8529 
getNumRegistersForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT) const8530 unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
8531     LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
8532   // Use f32 to pass f16.
8533   if (VT == MVT::f16 && Subtarget.hasBasicF())
8534     return 1;
8535 
8536   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
8537 }
8538 
SimplifyDemandedBitsForTargetNode(SDValue Op,const APInt & OriginalDemandedBits,const APInt & OriginalDemandedElts,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth) const8539 bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode(
8540     SDValue Op, const APInt &OriginalDemandedBits,
8541     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
8542     unsigned Depth) const {
8543   EVT VT = Op.getValueType();
8544   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
8545   unsigned Opc = Op.getOpcode();
8546   switch (Opc) {
8547   default:
8548     break;
8549   case LoongArchISD::VMSKLTZ:
8550   case LoongArchISD::XVMSKLTZ: {
8551     SDValue Src = Op.getOperand(0);
8552     MVT SrcVT = Src.getSimpleValueType();
8553     unsigned SrcBits = SrcVT.getScalarSizeInBits();
8554     unsigned NumElts = SrcVT.getVectorNumElements();
8555 
8556     // If we don't need the sign bits at all just return zero.
8557     if (OriginalDemandedBits.countr_zero() >= NumElts)
8558       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
8559 
8560     // Only demand the vector elements of the sign bits we need.
8561     APInt KnownUndef, KnownZero;
8562     APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
8563     if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
8564                                    TLO, Depth + 1))
8565       return true;
8566 
8567     Known.Zero = KnownZero.zext(BitWidth);
8568     Known.Zero.setHighBits(BitWidth - NumElts);
8569 
8570     // [X]VMSKLTZ only uses the MSB from each vector element.
8571     KnownBits KnownSrc;
8572     APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
8573     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
8574                              Depth + 1))
8575       return true;
8576 
8577     if (KnownSrc.One[SrcBits - 1])
8578       Known.One.setLowBits(NumElts);
8579     else if (KnownSrc.Zero[SrcBits - 1])
8580       Known.Zero.setLowBits(NumElts);
8581 
8582     // Attempt to avoid multi-use ops if we don't need anything from it.
8583     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
8584             Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
8585       return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
8586     return false;
8587   }
8588   }
8589 
8590   return TargetLowering::SimplifyDemandedBitsForTargetNode(
8591       Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
8592 }
8593