xref: /freebsd/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (revision 7fdf597e96a02165cfe22ff357b857d5fa15ed8a)
1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation  ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that LoongArch uses to lower LLVM code into
10 // a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LoongArchISelLowering.h"
15 #include "LoongArch.h"
16 #include "LoongArchMachineFunctionInfo.h"
17 #include "LoongArchRegisterInfo.h"
18 #include "LoongArchSubtarget.h"
19 #include "LoongArchTargetMachine.h"
20 #include "MCTargetDesc/LoongArchBaseInfo.h"
21 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/CodeGen/ISDOpcodes.h"
25 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/IntrinsicsLoongArch.h"
29 #include "llvm/Support/CodeGen.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/KnownBits.h"
33 #include "llvm/Support/MathExtras.h"
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "loongarch-isel-lowering"
38 
39 STATISTIC(NumTailCalls, "Number of tail calls");
40 
41 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42                                   cl::desc("Trap on integer division by zero."),
43                                   cl::init(false));
44 
45 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
46                                                  const LoongArchSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   MVT GRLenVT = Subtarget.getGRLenVT();
50 
51   // Set up the register classes.
52 
53   addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54   if (Subtarget.hasBasicF())
55     addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56   if (Subtarget.hasBasicD())
57     addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58 
59   static const MVT::SimpleValueType LSXVTs[] = {
60       MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61   static const MVT::SimpleValueType LASXVTs[] = {
62       MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63 
64   if (Subtarget.hasExtLSX())
65     for (MVT VT : LSXVTs)
66       addRegisterClass(VT, &LoongArch::LSX128RegClass);
67 
68   if (Subtarget.hasExtLASX())
69     for (MVT VT : LASXVTs)
70       addRegisterClass(VT, &LoongArch::LASX256RegClass);
71 
72   // Set operations for LA32 and LA64.
73 
74   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
75                    MVT::i1, Promote);
76 
77   setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
78   setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
79   setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
80   setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
81   setOperationAction(ISD::ROTL, GRLenVT, Expand);
82   setOperationAction(ISD::CTPOP, GRLenVT, Expand);
83 
84   setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
85                       ISD::JumpTable, ISD::GlobalTLSAddress},
86                      GRLenVT, Custom);
87 
88   setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom);
89 
90   setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
91   setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
92   setOperationAction(ISD::VASTART, MVT::Other, Custom);
93   setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
94 
95   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
96   setOperationAction(ISD::TRAP, MVT::Other, Legal);
97 
98   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
99   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
100   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
101 
102   // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103   // we get to know which of sll and revb.2h is faster.
104   setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
105   setOperationAction(ISD::BITREVERSE, GRLenVT, Legal);
106 
107   // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108   // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109   // and i32 could still be byte-swapped relatively cheaply.
110   setOperationAction(ISD::BSWAP, MVT::i16, Custom);
111 
112   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
113   setOperationAction(ISD::BR_CC, GRLenVT, Expand);
114   setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
115   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
116   setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
117 
118   setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
119   setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
120 
121   // Set operations for LA64 only.
122 
123   if (Subtarget.is64Bit()) {
124     setOperationAction(ISD::ADD, MVT::i32, Custom);
125     setOperationAction(ISD::SUB, MVT::i32, Custom);
126     setOperationAction(ISD::SHL, MVT::i32, Custom);
127     setOperationAction(ISD::SRA, MVT::i32, Custom);
128     setOperationAction(ISD::SRL, MVT::i32, Custom);
129     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
130     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
131     setOperationAction(ISD::ROTR, MVT::i32, Custom);
132     setOperationAction(ISD::ROTL, MVT::i32, Custom);
133     setOperationAction(ISD::CTTZ, MVT::i32, Custom);
134     setOperationAction(ISD::CTLZ, MVT::i32, Custom);
135     setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
136     setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
137     setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
138     setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
139     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
140     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
141 
142     setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
143     setOperationAction(ISD::BSWAP, MVT::i32, Custom);
144     setOperationAction({ISD::UDIV, ISD::UREM}, MVT::i32, Custom);
145   }
146 
147   // Set operations for LA32 only.
148 
149   if (!Subtarget.is64Bit()) {
150     setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
151     setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
152     setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
153     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
154     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
155   }
156 
157   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
158 
159   static const ISD::CondCode FPCCToExpand[] = {
160       ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
161       ISD::SETGE,  ISD::SETNE,  ISD::SETGT};
162 
163   // Set operations for 'F' feature.
164 
165   if (Subtarget.hasBasicF()) {
166     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
167     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
168     setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
169 
170     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
171     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
172     setOperationAction(ISD::FMA, MVT::f32, Legal);
173     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
174     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
175     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
176     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
177     setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
178     setOperationAction(ISD::FSIN, MVT::f32, Expand);
179     setOperationAction(ISD::FCOS, MVT::f32, Expand);
180     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
181     setOperationAction(ISD::FPOW, MVT::f32, Expand);
182     setOperationAction(ISD::FREM, MVT::f32, Expand);
183     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
184     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
185 
186     if (Subtarget.is64Bit())
187       setOperationAction(ISD::FRINT, MVT::f32, Legal);
188 
189     if (!Subtarget.hasBasicD()) {
190       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
191       if (Subtarget.is64Bit()) {
192         setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
193         setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
194       }
195     }
196   }
197 
198   // Set operations for 'D' feature.
199 
200   if (Subtarget.hasBasicD()) {
201     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
202     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
204     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
205     setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
206 
207     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
208     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
209     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
210     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
211     setOperationAction(ISD::FMA, MVT::f64, Legal);
212     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
213     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
214     setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
215     setOperationAction(ISD::FSIN, MVT::f64, Expand);
216     setOperationAction(ISD::FCOS, MVT::f64, Expand);
217     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
218     setOperationAction(ISD::FPOW, MVT::f64, Expand);
219     setOperationAction(ISD::FREM, MVT::f64, Expand);
220     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
221     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
222 
223     if (Subtarget.is64Bit())
224       setOperationAction(ISD::FRINT, MVT::f64, Legal);
225   }
226 
227   // Set operations for 'LSX' feature.
228 
229   if (Subtarget.hasExtLSX()) {
230     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
231       // Expand all truncating stores and extending loads.
232       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
233         setTruncStoreAction(VT, InnerVT, Expand);
234         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
235         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
236         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
237       }
238       // By default everything must be expanded. Then we will selectively turn
239       // on ones that can be effectively codegen'd.
240       for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
241         setOperationAction(Op, VT, Expand);
242     }
243 
244     for (MVT VT : LSXVTs) {
245       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
246       setOperationAction(ISD::BITCAST, VT, Legal);
247       setOperationAction(ISD::UNDEF, VT, Legal);
248 
249       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
250       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
251       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
252 
253       setOperationAction(ISD::SETCC, VT, Legal);
254       setOperationAction(ISD::VSELECT, VT, Legal);
255       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
256     }
257     for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
258       setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
259       setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
260                          Legal);
261       setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
262                          VT, Legal);
263       setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
264       setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
265       setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
266       setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
267       setCondCodeAction(
268           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
269           Expand);
270     }
271     for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
272       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
273       setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
274     }
275     for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
276       setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
277       setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
278       setOperationAction(ISD::FMA, VT, Legal);
279       setOperationAction(ISD::FSQRT, VT, Legal);
280       setOperationAction(ISD::FNEG, VT, Legal);
281       setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
282                          ISD::SETUGE, ISD::SETUGT},
283                         VT, Expand);
284     }
285   }
286 
287   // Set operations for 'LASX' feature.
288 
289   if (Subtarget.hasExtLASX()) {
290     for (MVT VT : LASXVTs) {
291       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
292       setOperationAction(ISD::BITCAST, VT, Legal);
293       setOperationAction(ISD::UNDEF, VT, Legal);
294 
295       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
296       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
297       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
298       setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
299 
300       setOperationAction(ISD::SETCC, VT, Legal);
301       setOperationAction(ISD::VSELECT, VT, Legal);
302       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
303     }
304     for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
305       setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
306       setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
307                          Legal);
308       setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
309                          VT, Legal);
310       setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
311       setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
312       setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
313       setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
314       setCondCodeAction(
315           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
316           Expand);
317     }
318     for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
319       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
320       setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
321     }
322     for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
323       setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
324       setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
325       setOperationAction(ISD::FMA, VT, Legal);
326       setOperationAction(ISD::FSQRT, VT, Legal);
327       setOperationAction(ISD::FNEG, VT, Legal);
328       setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
329                          ISD::SETUGE, ISD::SETUGT},
330                         VT, Expand);
331     }
332   }
333 
334   // Set DAG combine for LA32 and LA64.
335 
336   setTargetDAGCombine(ISD::AND);
337   setTargetDAGCombine(ISD::OR);
338   setTargetDAGCombine(ISD::SRL);
339   setTargetDAGCombine(ISD::SETCC);
340 
341   // Set DAG combine for 'LSX' feature.
342 
343   if (Subtarget.hasExtLSX())
344     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
345 
346   // Compute derived properties from the register classes.
347   computeRegisterProperties(Subtarget.getRegisterInfo());
348 
349   setStackPointerRegisterToSaveRestore(LoongArch::R3);
350 
351   setBooleanContents(ZeroOrOneBooleanContent);
352   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
353 
354   setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
355 
356   setMinCmpXchgSizeInBits(32);
357 
358   // Function alignments.
359   setMinFunctionAlignment(Align(4));
360   // Set preferred alignments.
361   setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
362   setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
363   setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
364 }
365 
366 bool LoongArchTargetLowering::isOffsetFoldingLegal(
367     const GlobalAddressSDNode *GA) const {
368   // In order to maximise the opportunity for common subexpression elimination,
369   // keep a separate ADD node for the global address offset instead of folding
370   // it in the global address node. Later peephole optimisations may choose to
371   // fold it back in when profitable.
372   return false;
373 }
374 
375 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
376                                                 SelectionDAG &DAG) const {
377   switch (Op.getOpcode()) {
378   case ISD::ATOMIC_FENCE:
379     return lowerATOMIC_FENCE(Op, DAG);
380   case ISD::EH_DWARF_CFA:
381     return lowerEH_DWARF_CFA(Op, DAG);
382   case ISD::GlobalAddress:
383     return lowerGlobalAddress(Op, DAG);
384   case ISD::GlobalTLSAddress:
385     return lowerGlobalTLSAddress(Op, DAG);
386   case ISD::INTRINSIC_WO_CHAIN:
387     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
388   case ISD::INTRINSIC_W_CHAIN:
389     return lowerINTRINSIC_W_CHAIN(Op, DAG);
390   case ISD::INTRINSIC_VOID:
391     return lowerINTRINSIC_VOID(Op, DAG);
392   case ISD::BlockAddress:
393     return lowerBlockAddress(Op, DAG);
394   case ISD::JumpTable:
395     return lowerJumpTable(Op, DAG);
396   case ISD::SHL_PARTS:
397     return lowerShiftLeftParts(Op, DAG);
398   case ISD::SRA_PARTS:
399     return lowerShiftRightParts(Op, DAG, true);
400   case ISD::SRL_PARTS:
401     return lowerShiftRightParts(Op, DAG, false);
402   case ISD::ConstantPool:
403     return lowerConstantPool(Op, DAG);
404   case ISD::FP_TO_SINT:
405     return lowerFP_TO_SINT(Op, DAG);
406   case ISD::BITCAST:
407     return lowerBITCAST(Op, DAG);
408   case ISD::UINT_TO_FP:
409     return lowerUINT_TO_FP(Op, DAG);
410   case ISD::SINT_TO_FP:
411     return lowerSINT_TO_FP(Op, DAG);
412   case ISD::VASTART:
413     return lowerVASTART(Op, DAG);
414   case ISD::FRAMEADDR:
415     return lowerFRAMEADDR(Op, DAG);
416   case ISD::RETURNADDR:
417     return lowerRETURNADDR(Op, DAG);
418   case ISD::WRITE_REGISTER:
419     return lowerWRITE_REGISTER(Op, DAG);
420   case ISD::INSERT_VECTOR_ELT:
421     return lowerINSERT_VECTOR_ELT(Op, DAG);
422   case ISD::EXTRACT_VECTOR_ELT:
423     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
424   case ISD::BUILD_VECTOR:
425     return lowerBUILD_VECTOR(Op, DAG);
426   case ISD::VECTOR_SHUFFLE:
427     return lowerVECTOR_SHUFFLE(Op, DAG);
428   }
429   return SDValue();
430 }
431 
432 /// Determine whether a range fits a regular pattern of values.
433 /// This function accounts for the possibility of jumping over the End iterator.
434 template <typename ValType>
435 static bool
436 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
437                    unsigned CheckStride,
438                    typename SmallVectorImpl<ValType>::const_iterator End,
439                    ValType ExpectedIndex, unsigned ExpectedIndexStride) {
440   auto &I = Begin;
441 
442   while (I != End) {
443     if (*I != -1 && *I != ExpectedIndex)
444       return false;
445     ExpectedIndex += ExpectedIndexStride;
446 
447     // Incrementing past End is undefined behaviour so we must increment one
448     // step at a time and check for End at each step.
449     for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
450       ; // Empty loop body.
451   }
452   return true;
453 }
454 
455 /// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
456 ///
457 /// VREPLVEI performs vector broadcast based on an element specified by an
458 /// integer immediate, with its mask being similar to:
459 ///   <x, x, x, ...>
460 /// where x is any valid index.
461 ///
462 /// When undef's appear in the mask they are treated as if they were whatever
463 /// value is necessary in order to fit the above form.
464 static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask,
465                                             MVT VT, SDValue V1, SDValue V2,
466                                             SelectionDAG &DAG) {
467   int SplatIndex = -1;
468   for (const auto &M : Mask) {
469     if (M != -1) {
470       SplatIndex = M;
471       break;
472     }
473   }
474 
475   if (SplatIndex == -1)
476     return DAG.getUNDEF(VT);
477 
478   assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
479   if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
480     APInt Imm(64, SplatIndex);
481     return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
482                        DAG.getConstant(Imm, DL, MVT::i64));
483   }
484 
485   return SDValue();
486 }
487 
488 /// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
489 ///
490 /// VSHUF4I splits the vector into blocks of four elements, then shuffles these
491 /// elements according to a <4 x i2> constant (encoded as an integer immediate).
492 ///
493 /// It is therefore possible to lower into VSHUF4I when the mask takes the form:
494 ///   <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
495 /// When undef's appear they are treated as if they were whatever value is
496 /// necessary in order to fit the above forms.
497 ///
498 /// For example:
499 ///   %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
500 ///                      <8 x i32> <i32 3, i32 2, i32 1, i32 0,
501 ///                                 i32 7, i32 6, i32 5, i32 4>
502 /// is lowered to:
503 ///   (VSHUF4I_H $v0, $v1, 27)
504 /// where the 27 comes from:
505 ///   3 + (2 << 2) + (1 << 4) + (0 << 6)
506 static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
507                                            MVT VT, SDValue V1, SDValue V2,
508                                            SelectionDAG &DAG) {
509 
510   // When the size is less than 4, lower cost instructions may be used.
511   if (Mask.size() < 4)
512     return SDValue();
513 
514   int SubMask[4] = {-1, -1, -1, -1};
515   for (unsigned i = 0; i < 4; ++i) {
516     for (unsigned j = i; j < Mask.size(); j += 4) {
517       int Idx = Mask[j];
518 
519       // Convert from vector index to 4-element subvector index
520       // If an index refers to an element outside of the subvector then give up
521       if (Idx != -1) {
522         Idx -= 4 * (j / 4);
523         if (Idx < 0 || Idx >= 4)
524           return SDValue();
525       }
526 
527       // If the mask has an undef, replace it with the current index.
528       // Note that it might still be undef if the current index is also undef
529       if (SubMask[i] == -1)
530         SubMask[i] = Idx;
531       // Check that non-undef values are the same as in the mask. If they
532       // aren't then give up
533       else if (Idx != -1 && Idx != SubMask[i])
534         return SDValue();
535     }
536   }
537 
538   // Calculate the immediate. Replace any remaining undefs with zero
539   APInt Imm(64, 0);
540   for (int i = 3; i >= 0; --i) {
541     int Idx = SubMask[i];
542 
543     if (Idx == -1)
544       Idx = 0;
545 
546     Imm <<= 2;
547     Imm |= Idx & 0x3;
548   }
549 
550   return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
551                      DAG.getConstant(Imm, DL, MVT::i64));
552 }
553 
554 /// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
555 ///
556 /// VPACKEV interleaves the even elements from each vector.
557 ///
558 /// It is possible to lower into VPACKEV when the mask consists of two of the
559 /// following forms interleaved:
560 ///   <0, 2, 4, ...>
561 ///   <n, n+2, n+4, ...>
562 /// where n is the number of elements in the vector.
563 /// For example:
564 ///   <0, 0, 2, 2, 4, 4, ...>
565 ///   <0, n, 2, n+2, 4, n+4, ...>
566 ///
567 /// When undef's appear in the mask they are treated as if they were whatever
568 /// value is necessary in order to fit the above forms.
569 static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
570                                            MVT VT, SDValue V1, SDValue V2,
571                                            SelectionDAG &DAG) {
572 
573   const auto &Begin = Mask.begin();
574   const auto &End = Mask.end();
575   SDValue OriV1 = V1, OriV2 = V2;
576 
577   if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
578     V1 = OriV1;
579   else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
580     V1 = OriV2;
581   else
582     return SDValue();
583 
584   if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
585     V2 = OriV1;
586   else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
587     V2 = OriV2;
588   else
589     return SDValue();
590 
591   return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
592 }
593 
594 /// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
595 ///
596 /// VPACKOD interleaves the odd elements from each vector.
597 ///
598 /// It is possible to lower into VPACKOD when the mask consists of two of the
599 /// following forms interleaved:
600 ///   <1, 3, 5, ...>
601 ///   <n+1, n+3, n+5, ...>
602 /// where n is the number of elements in the vector.
603 /// For example:
604 ///   <1, 1, 3, 3, 5, 5, ...>
605 ///   <1, n+1, 3, n+3, 5, n+5, ...>
606 ///
607 /// When undef's appear in the mask they are treated as if they were whatever
608 /// value is necessary in order to fit the above forms.
609 static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
610                                            MVT VT, SDValue V1, SDValue V2,
611                                            SelectionDAG &DAG) {
612 
613   const auto &Begin = Mask.begin();
614   const auto &End = Mask.end();
615   SDValue OriV1 = V1, OriV2 = V2;
616 
617   if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
618     V1 = OriV1;
619   else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
620     V1 = OriV2;
621   else
622     return SDValue();
623 
624   if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
625     V2 = OriV1;
626   else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
627     V2 = OriV2;
628   else
629     return SDValue();
630 
631   return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
632 }
633 
634 /// Lower VECTOR_SHUFFLE into VILVH (if possible).
635 ///
636 /// VILVH interleaves consecutive elements from the left (highest-indexed) half
637 /// of each vector.
638 ///
639 /// It is possible to lower into VILVH when the mask consists of two of the
640 /// following forms interleaved:
641 ///   <x, x+1, x+2, ...>
642 ///   <n+x, n+x+1, n+x+2, ...>
643 /// where n is the number of elements in the vector and x is half n.
644 /// For example:
645 ///   <x, x, x+1, x+1, x+2, x+2, ...>
646 ///   <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
647 ///
648 /// When undef's appear in the mask they are treated as if they were whatever
649 /// value is necessary in order to fit the above forms.
650 static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
651                                          MVT VT, SDValue V1, SDValue V2,
652                                          SelectionDAG &DAG) {
653 
654   const auto &Begin = Mask.begin();
655   const auto &End = Mask.end();
656   unsigned HalfSize = Mask.size() / 2;
657   SDValue OriV1 = V1, OriV2 = V2;
658 
659   if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
660     V1 = OriV1;
661   else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
662     V1 = OriV2;
663   else
664     return SDValue();
665 
666   if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
667     V2 = OriV1;
668   else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
669                                    1))
670     V2 = OriV2;
671   else
672     return SDValue();
673 
674   return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
675 }
676 
677 /// Lower VECTOR_SHUFFLE into VILVL (if possible).
678 ///
679 /// VILVL interleaves consecutive elements from the right (lowest-indexed) half
680 /// of each vector.
681 ///
682 /// It is possible to lower into VILVL when the mask consists of two of the
683 /// following forms interleaved:
684 ///   <0, 1, 2, ...>
685 ///   <n, n+1, n+2, ...>
686 /// where n is the number of elements in the vector.
687 /// For example:
688 ///   <0, 0, 1, 1, 2, 2, ...>
689 ///   <0, n, 1, n+1, 2, n+2, ...>
690 ///
691 /// When undef's appear in the mask they are treated as if they were whatever
692 /// value is necessary in order to fit the above forms.
693 static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
694                                          MVT VT, SDValue V1, SDValue V2,
695                                          SelectionDAG &DAG) {
696 
697   const auto &Begin = Mask.begin();
698   const auto &End = Mask.end();
699   SDValue OriV1 = V1, OriV2 = V2;
700 
701   if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
702     V1 = OriV1;
703   else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
704     V1 = OriV2;
705   else
706     return SDValue();
707 
708   if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
709     V2 = OriV1;
710   else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
711     V2 = OriV2;
712   else
713     return SDValue();
714 
715   return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
716 }
717 
718 /// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
719 ///
720 /// VPICKEV copies the even elements of each vector into the result vector.
721 ///
722 /// It is possible to lower into VPICKEV when the mask consists of two of the
723 /// following forms concatenated:
724 ///   <0, 2, 4, ...>
725 ///   <n, n+2, n+4, ...>
726 /// where n is the number of elements in the vector.
727 /// For example:
728 ///   <0, 2, 4, ..., 0, 2, 4, ...>
729 ///   <0, 2, 4, ..., n, n+2, n+4, ...>
730 ///
731 /// When undef's appear in the mask they are treated as if they were whatever
732 /// value is necessary in order to fit the above forms.
733 static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
734                                            MVT VT, SDValue V1, SDValue V2,
735                                            SelectionDAG &DAG) {
736 
737   const auto &Begin = Mask.begin();
738   const auto &Mid = Mask.begin() + Mask.size() / 2;
739   const auto &End = Mask.end();
740   SDValue OriV1 = V1, OriV2 = V2;
741 
742   if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
743     V1 = OriV1;
744   else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
745     V1 = OriV2;
746   else
747     return SDValue();
748 
749   if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
750     V2 = OriV1;
751   else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
752     V2 = OriV2;
753 
754   else
755     return SDValue();
756 
757   return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
758 }
759 
760 /// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
761 ///
762 /// VPICKOD copies the odd elements of each vector into the result vector.
763 ///
764 /// It is possible to lower into VPICKOD when the mask consists of two of the
765 /// following forms concatenated:
766 ///   <1, 3, 5, ...>
767 ///   <n+1, n+3, n+5, ...>
768 /// where n is the number of elements in the vector.
769 /// For example:
770 ///   <1, 3, 5, ..., 1, 3, 5, ...>
771 ///   <1, 3, 5, ..., n+1, n+3, n+5, ...>
772 ///
773 /// When undef's appear in the mask they are treated as if they were whatever
774 /// value is necessary in order to fit the above forms.
775 static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
776                                            MVT VT, SDValue V1, SDValue V2,
777                                            SelectionDAG &DAG) {
778 
779   const auto &Begin = Mask.begin();
780   const auto &Mid = Mask.begin() + Mask.size() / 2;
781   const auto &End = Mask.end();
782   SDValue OriV1 = V1, OriV2 = V2;
783 
784   if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
785     V1 = OriV1;
786   else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
787     V1 = OriV2;
788   else
789     return SDValue();
790 
791   if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
792     V2 = OriV1;
793   else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
794     V2 = OriV2;
795   else
796     return SDValue();
797 
798   return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
799 }
800 
801 /// Lower VECTOR_SHUFFLE into VSHUF.
802 ///
803 /// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
804 /// adding it as an operand to the resulting VSHUF.
805 static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
806                                          MVT VT, SDValue V1, SDValue V2,
807                                          SelectionDAG &DAG) {
808 
809   SmallVector<SDValue, 16> Ops;
810   for (auto M : Mask)
811     Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
812 
813   EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
814   SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
815 
816   // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
817   // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
818   // VSHF concatenates the vectors in a bitwise fashion:
819   // <0b00, 0b01> + <0b10, 0b11> ->
820   // 0b0100       + 0b1110       -> 0b01001110
821   //                                <0b10, 0b11, 0b00, 0b01>
822   // We must therefore swap the operands to get the correct result.
823   return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
824 }
825 
826 /// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
827 ///
828 /// This routine breaks down the specific type of 128-bit shuffle and
829 /// dispatches to the lowering routines accordingly.
830 static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
831                                   SDValue V1, SDValue V2, SelectionDAG &DAG) {
832   assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
833           VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
834           VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
835          "Vector type is unsupported for lsx!");
836   assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
837          "Two operands have different types!");
838   assert(VT.getVectorNumElements() == Mask.size() &&
839          "Unexpected mask size for shuffle!");
840   assert(Mask.size() % 2 == 0 && "Expected even mask size.");
841 
842   SDValue Result;
843   // TODO: Add more comparison patterns.
844   if (V2.isUndef()) {
845     if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
846       return Result;
847     if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
848       return Result;
849 
850     // TODO: This comment may be enabled in the future to better match the
851     // pattern for instruction selection.
852     /* V2 = V1; */
853   }
854 
855   // It is recommended not to change the pattern comparison order for better
856   // performance.
857   if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
858     return Result;
859   if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
860     return Result;
861   if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
862     return Result;
863   if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
864     return Result;
865   if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
866     return Result;
867   if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
868     return Result;
869   if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
870     return Result;
871 
872   return SDValue();
873 }
874 
875 /// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
876 ///
877 /// It is a XVREPLVEI when the mask is:
878 ///   <x, x, x, ..., x+n, x+n, x+n, ...>
879 /// where the number of x is equal to n and n is half the length of vector.
880 ///
881 /// When undef's appear in the mask they are treated as if they were whatever
882 /// value is necessary in order to fit the above form.
883 static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
884                                              ArrayRef<int> Mask, MVT VT,
885                                              SDValue V1, SDValue V2,
886                                              SelectionDAG &DAG) {
887   int SplatIndex = -1;
888   for (const auto &M : Mask) {
889     if (M != -1) {
890       SplatIndex = M;
891       break;
892     }
893   }
894 
895   if (SplatIndex == -1)
896     return DAG.getUNDEF(VT);
897 
898   const auto &Begin = Mask.begin();
899   const auto &End = Mask.end();
900   unsigned HalfSize = Mask.size() / 2;
901 
902   assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
903   if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
904       fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
905                               0)) {
906     APInt Imm(64, SplatIndex);
907     return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
908                        DAG.getConstant(Imm, DL, MVT::i64));
909   }
910 
911   return SDValue();
912 }
913 
914 /// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
915 static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
916                                             MVT VT, SDValue V1, SDValue V2,
917                                             SelectionDAG &DAG) {
918   // When the size is less than or equal to 4, lower cost instructions may be
919   // used.
920   if (Mask.size() <= 4)
921     return SDValue();
922   return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
923 }
924 
925 /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
926 static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
927                                             MVT VT, SDValue V1, SDValue V2,
928                                             SelectionDAG &DAG) {
929   return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
930 }
931 
932 /// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
933 static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
934                                             MVT VT, SDValue V1, SDValue V2,
935                                             SelectionDAG &DAG) {
936   return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
937 }
938 
939 /// Lower VECTOR_SHUFFLE into XVILVH (if possible).
940 static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
941                                           MVT VT, SDValue V1, SDValue V2,
942                                           SelectionDAG &DAG) {
943 
944   const auto &Begin = Mask.begin();
945   const auto &End = Mask.end();
946   unsigned HalfSize = Mask.size() / 2;
947   unsigned LeftSize = HalfSize / 2;
948   SDValue OriV1 = V1, OriV2 = V2;
949 
950   if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
951                               1) &&
952       fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
953     V1 = OriV1;
954   else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
955                                    Mask.size() + HalfSize - LeftSize, 1) &&
956            fitsRegularPattern<int>(Begin + HalfSize, 2, End,
957                                    Mask.size() + HalfSize + LeftSize, 1))
958     V1 = OriV2;
959   else
960     return SDValue();
961 
962   if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
963                               1) &&
964       fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
965                               1))
966     V2 = OriV1;
967   else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
968                                    Mask.size() + HalfSize - LeftSize, 1) &&
969            fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
970                                    Mask.size() + HalfSize + LeftSize, 1))
971     V2 = OriV2;
972   else
973     return SDValue();
974 
975   return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
976 }
977 
978 /// Lower VECTOR_SHUFFLE into XVILVL (if possible).
979 static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
980                                           MVT VT, SDValue V1, SDValue V2,
981                                           SelectionDAG &DAG) {
982 
983   const auto &Begin = Mask.begin();
984   const auto &End = Mask.end();
985   unsigned HalfSize = Mask.size() / 2;
986   SDValue OriV1 = V1, OriV2 = V2;
987 
988   if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
989       fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
990     V1 = OriV1;
991   else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
992            fitsRegularPattern<int>(Begin + HalfSize, 2, End,
993                                    Mask.size() + HalfSize, 1))
994     V1 = OriV2;
995   else
996     return SDValue();
997 
998   if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
999       fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1000     V2 = OriV1;
1001   else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1002                                    1) &&
1003            fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1004                                    Mask.size() + HalfSize, 1))
1005     V2 = OriV2;
1006   else
1007     return SDValue();
1008 
1009   return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1010 }
1011 
1012 /// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1013 static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1014                                             MVT VT, SDValue V1, SDValue V2,
1015                                             SelectionDAG &DAG) {
1016 
1017   const auto &Begin = Mask.begin();
1018   const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1019   const auto &Mid = Mask.begin() + Mask.size() / 2;
1020   const auto &RightMid = Mask.end() - Mask.size() / 4;
1021   const auto &End = Mask.end();
1022   unsigned HalfSize = Mask.size() / 2;
1023   SDValue OriV1 = V1, OriV2 = V2;
1024 
1025   if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1026       fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1027     V1 = OriV1;
1028   else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1029            fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1030     V1 = OriV2;
1031   else
1032     return SDValue();
1033 
1034   if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1035       fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1036     V2 = OriV1;
1037   else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1038            fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1039     V2 = OriV2;
1040 
1041   else
1042     return SDValue();
1043 
1044   return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1045 }
1046 
1047 /// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1048 static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1049                                             MVT VT, SDValue V1, SDValue V2,
1050                                             SelectionDAG &DAG) {
1051 
1052   const auto &Begin = Mask.begin();
1053   const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1054   const auto &Mid = Mask.begin() + Mask.size() / 2;
1055   const auto &RightMid = Mask.end() - Mask.size() / 4;
1056   const auto &End = Mask.end();
1057   unsigned HalfSize = Mask.size() / 2;
1058   SDValue OriV1 = V1, OriV2 = V2;
1059 
1060   if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1061       fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1062     V1 = OriV1;
1063   else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1064            fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1065                                    2))
1066     V1 = OriV2;
1067   else
1068     return SDValue();
1069 
1070   if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1071       fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1072     V2 = OriV1;
1073   else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1074            fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1075                                    2))
1076     V2 = OriV2;
1077   else
1078     return SDValue();
1079 
1080   return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1081 }
1082 
1083 /// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1084 static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1085                                           MVT VT, SDValue V1, SDValue V2,
1086                                           SelectionDAG &DAG) {
1087 
1088   int MaskSize = Mask.size();
1089   int HalfSize = Mask.size() / 2;
1090   const auto &Begin = Mask.begin();
1091   const auto &Mid = Mask.begin() + HalfSize;
1092   const auto &End = Mask.end();
1093 
1094   // VECTOR_SHUFFLE concatenates the vectors:
1095   //  <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1096   //  shuffling ->
1097   //  <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1098   //
1099   // XVSHUF concatenates the vectors:
1100   //  <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1101   //  shuffling ->
1102   //  <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1103   SmallVector<SDValue, 8> MaskAlloc;
1104   for (auto it = Begin; it < Mid; it++) {
1105     if (*it < 0) // UNDEF
1106       MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1107     else if ((*it >= 0 && *it < HalfSize) ||
1108              (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1109       int M = *it < HalfSize ? *it : *it - HalfSize;
1110       MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1111     } else
1112       return SDValue();
1113   }
1114   assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1115 
1116   for (auto it = Mid; it < End; it++) {
1117     if (*it < 0) // UNDEF
1118       MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1119     else if ((*it >= HalfSize && *it < MaskSize) ||
1120              (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1121       int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1122       MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1123     } else
1124       return SDValue();
1125   }
1126   assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1127 
1128   EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1129   SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1130   return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1131 }
1132 
1133 /// Shuffle vectors by lane to generate more optimized instructions.
1134 /// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1135 ///
1136 /// Therefore, except for the following four cases, other cases are regarded
1137 /// as cross-lane shuffles, where optimization is relatively limited.
1138 ///
1139 /// - Shuffle high, low lanes of two inputs vector
1140 ///   <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1141 /// - Shuffle low, high lanes of two inputs vector
1142 ///   <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1143 /// - Shuffle low, low lanes of two inputs vector
1144 ///   <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1145 /// - Shuffle high, high lanes of two inputs vector
1146 ///   <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1147 ///
1148 /// The first case is the closest to LoongArch instructions and the other
1149 /// cases need to be converted to it for processing.
1150 ///
1151 /// This function may modify V1, V2 and Mask
1152 static void canonicalizeShuffleVectorByLane(const SDLoc &DL,
1153                                             MutableArrayRef<int> Mask, MVT VT,
1154                                             SDValue &V1, SDValue &V2,
1155                                             SelectionDAG &DAG) {
1156 
1157   enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1158 
1159   int MaskSize = Mask.size();
1160   int HalfSize = Mask.size() / 2;
1161 
1162   HalfMaskType preMask = None, postMask = None;
1163 
1164   if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1165         return M < 0 || (M >= 0 && M < HalfSize) ||
1166                (M >= MaskSize && M < MaskSize + HalfSize);
1167       }))
1168     preMask = HighLaneTy;
1169   else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1170              return M < 0 || (M >= HalfSize && M < MaskSize) ||
1171                     (M >= MaskSize + HalfSize && M < MaskSize * 2);
1172            }))
1173     preMask = LowLaneTy;
1174 
1175   if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1176         return M < 0 || (M >= 0 && M < HalfSize) ||
1177                (M >= MaskSize && M < MaskSize + HalfSize);
1178       }))
1179     postMask = HighLaneTy;
1180   else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1181              return M < 0 || (M >= HalfSize && M < MaskSize) ||
1182                     (M >= MaskSize + HalfSize && M < MaskSize * 2);
1183            }))
1184     postMask = LowLaneTy;
1185 
1186   // The pre-half of mask is high lane type, and the post-half of mask
1187   // is low lane type, which is closest to the LoongArch instructions.
1188   //
1189   // Note: In the LoongArch architecture, the high lane of mask corresponds
1190   // to the lower 128-bit of vector register, and the low lane of mask
1191   // corresponds the higher 128-bit of vector register.
1192   if (preMask == HighLaneTy && postMask == LowLaneTy) {
1193     return;
1194   }
1195   if (preMask == LowLaneTy && postMask == HighLaneTy) {
1196     V1 = DAG.getBitcast(MVT::v4i64, V1);
1197     V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1198                      DAG.getConstant(0b01001110, DL, MVT::i64));
1199     V1 = DAG.getBitcast(VT, V1);
1200 
1201     if (!V2.isUndef()) {
1202       V2 = DAG.getBitcast(MVT::v4i64, V2);
1203       V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1204                        DAG.getConstant(0b01001110, DL, MVT::i64));
1205       V2 = DAG.getBitcast(VT, V2);
1206     }
1207 
1208     for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1209       *it = *it < 0 ? *it : *it - HalfSize;
1210     }
1211     for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1212       *it = *it < 0 ? *it : *it + HalfSize;
1213     }
1214   } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1215     V1 = DAG.getBitcast(MVT::v4i64, V1);
1216     V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1217                      DAG.getConstant(0b11101110, DL, MVT::i64));
1218     V1 = DAG.getBitcast(VT, V1);
1219 
1220     if (!V2.isUndef()) {
1221       V2 = DAG.getBitcast(MVT::v4i64, V2);
1222       V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1223                        DAG.getConstant(0b11101110, DL, MVT::i64));
1224       V2 = DAG.getBitcast(VT, V2);
1225     }
1226 
1227     for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1228       *it = *it < 0 ? *it : *it - HalfSize;
1229     }
1230   } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1231     V1 = DAG.getBitcast(MVT::v4i64, V1);
1232     V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1233                      DAG.getConstant(0b01000100, DL, MVT::i64));
1234     V1 = DAG.getBitcast(VT, V1);
1235 
1236     if (!V2.isUndef()) {
1237       V2 = DAG.getBitcast(MVT::v4i64, V2);
1238       V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1239                        DAG.getConstant(0b01000100, DL, MVT::i64));
1240       V2 = DAG.getBitcast(VT, V2);
1241     }
1242 
1243     for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1244       *it = *it < 0 ? *it : *it + HalfSize;
1245     }
1246   } else { // cross-lane
1247     return;
1248   }
1249 }
1250 
1251 /// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1252 ///
1253 /// This routine breaks down the specific type of 256-bit shuffle and
1254 /// dispatches to the lowering routines accordingly.
1255 static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1256                                   SDValue V1, SDValue V2, SelectionDAG &DAG) {
1257   assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1258           VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1259           VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1260          "Vector type is unsupported for lasx!");
1261   assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1262          "Two operands have different types!");
1263   assert(VT.getVectorNumElements() == Mask.size() &&
1264          "Unexpected mask size for shuffle!");
1265   assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1266   assert(Mask.size() >= 4 && "Mask size is less than 4.");
1267 
1268   // canonicalize non cross-lane shuffle vector
1269   SmallVector<int> NewMask(Mask);
1270   canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1271 
1272   SDValue Result;
1273   // TODO: Add more comparison patterns.
1274   if (V2.isUndef()) {
1275     if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1276       return Result;
1277     if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1278       return Result;
1279 
1280     // TODO: This comment may be enabled in the future to better match the
1281     // pattern for instruction selection.
1282     /* V2 = V1; */
1283   }
1284 
1285   // It is recommended not to change the pattern comparison order for better
1286   // performance.
1287   if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1288     return Result;
1289   if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1290     return Result;
1291   if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1292     return Result;
1293   if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1294     return Result;
1295   if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1296     return Result;
1297   if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1298     return Result;
1299   if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1300     return Result;
1301 
1302   return SDValue();
1303 }
1304 
1305 SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1306                                                      SelectionDAG &DAG) const {
1307   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1308   ArrayRef<int> OrigMask = SVOp->getMask();
1309   SDValue V1 = Op.getOperand(0);
1310   SDValue V2 = Op.getOperand(1);
1311   MVT VT = Op.getSimpleValueType();
1312   int NumElements = VT.getVectorNumElements();
1313   SDLoc DL(Op);
1314 
1315   bool V1IsUndef = V1.isUndef();
1316   bool V2IsUndef = V2.isUndef();
1317   if (V1IsUndef && V2IsUndef)
1318     return DAG.getUNDEF(VT);
1319 
1320   // When we create a shuffle node we put the UNDEF node to second operand,
1321   // but in some cases the first operand may be transformed to UNDEF.
1322   // In this case we should just commute the node.
1323   if (V1IsUndef)
1324     return DAG.getCommutedVectorShuffle(*SVOp);
1325 
1326   // Check for non-undef masks pointing at an undef vector and make the masks
1327   // undef as well. This makes it easier to match the shuffle based solely on
1328   // the mask.
1329   if (V2IsUndef &&
1330       any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1331     SmallVector<int, 8> NewMask(OrigMask);
1332     for (int &M : NewMask)
1333       if (M >= NumElements)
1334         M = -1;
1335     return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1336   }
1337 
1338   // Check for illegal shuffle mask element index values.
1339   int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1340   (void)MaskUpperLimit;
1341   assert(llvm::all_of(OrigMask,
1342                       [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1343          "Out of bounds shuffle index");
1344 
1345   // For each vector width, delegate to a specialized lowering routine.
1346   if (VT.is128BitVector())
1347     return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1348 
1349   if (VT.is256BitVector())
1350     return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1351 
1352   return SDValue();
1353 }
1354 
1355 static bool isConstantOrUndef(const SDValue Op) {
1356   if (Op->isUndef())
1357     return true;
1358   if (isa<ConstantSDNode>(Op))
1359     return true;
1360   if (isa<ConstantFPSDNode>(Op))
1361     return true;
1362   return false;
1363 }
1364 
1365 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
1366   for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1367     if (isConstantOrUndef(Op->getOperand(i)))
1368       return true;
1369   return false;
1370 }
1371 
1372 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1373                                                    SelectionDAG &DAG) const {
1374   BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1375   EVT ResTy = Op->getValueType(0);
1376   SDLoc DL(Op);
1377   APInt SplatValue, SplatUndef;
1378   unsigned SplatBitSize;
1379   bool HasAnyUndefs;
1380   bool Is128Vec = ResTy.is128BitVector();
1381   bool Is256Vec = ResTy.is256BitVector();
1382 
1383   if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1384       (!Subtarget.hasExtLASX() || !Is256Vec))
1385     return SDValue();
1386 
1387   if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1388                             /*MinSplatBits=*/8) &&
1389       SplatBitSize <= 64) {
1390     // We can only cope with 8, 16, 32, or 64-bit elements.
1391     if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1392         SplatBitSize != 64)
1393       return SDValue();
1394 
1395     EVT ViaVecTy;
1396 
1397     switch (SplatBitSize) {
1398     default:
1399       return SDValue();
1400     case 8:
1401       ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1402       break;
1403     case 16:
1404       ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1405       break;
1406     case 32:
1407       ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1408       break;
1409     case 64:
1410       ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1411       break;
1412     }
1413 
1414     // SelectionDAG::getConstant will promote SplatValue appropriately.
1415     SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1416 
1417     // Bitcast to the type we originally wanted.
1418     if (ViaVecTy != ResTy)
1419       Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1420 
1421     return Result;
1422   }
1423 
1424   if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1425     return Op;
1426 
1427   if (!isConstantOrUndefBUILD_VECTOR(Node)) {
1428     // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1429     // The resulting code is the same length as the expansion, but it doesn't
1430     // use memory operations.
1431     EVT ResTy = Node->getValueType(0);
1432 
1433     assert(ResTy.isVector());
1434 
1435     unsigned NumElts = ResTy.getVectorNumElements();
1436     SDValue Vector = DAG.getUNDEF(ResTy);
1437     for (unsigned i = 0; i < NumElts; ++i) {
1438       Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
1439                            Node->getOperand(i),
1440                            DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1441     }
1442     return Vector;
1443   }
1444 
1445   return SDValue();
1446 }
1447 
1448 SDValue
1449 LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1450                                                  SelectionDAG &DAG) const {
1451   EVT VecTy = Op->getOperand(0)->getValueType(0);
1452   SDValue Idx = Op->getOperand(1);
1453   EVT EltTy = VecTy.getVectorElementType();
1454   unsigned NumElts = VecTy.getVectorNumElements();
1455 
1456   if (isa<ConstantSDNode>(Idx) &&
1457       (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1458        EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1459     return Op;
1460 
1461   return SDValue();
1462 }
1463 
1464 SDValue
1465 LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1466                                                 SelectionDAG &DAG) const {
1467   if (isa<ConstantSDNode>(Op->getOperand(2)))
1468     return Op;
1469   return SDValue();
1470 }
1471 
1472 SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1473                                                    SelectionDAG &DAG) const {
1474   SDLoc DL(Op);
1475   SyncScope::ID FenceSSID =
1476       static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1477 
1478   // singlethread fences only synchronize with signal handlers on the same
1479   // thread and thus only need to preserve instruction order, not actually
1480   // enforce memory ordering.
1481   if (FenceSSID == SyncScope::SingleThread)
1482     // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1483     return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1484 
1485   return Op;
1486 }
1487 
1488 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1489                                                      SelectionDAG &DAG) const {
1490 
1491   if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1492     DAG.getContext()->emitError(
1493         "On LA64, only 64-bit registers can be written.");
1494     return Op.getOperand(0);
1495   }
1496 
1497   if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1498     DAG.getContext()->emitError(
1499         "On LA32, only 32-bit registers can be written.");
1500     return Op.getOperand(0);
1501   }
1502 
1503   return Op;
1504 }
1505 
1506 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1507                                                 SelectionDAG &DAG) const {
1508   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1509     DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1510                                 "be a constant integer");
1511     return SDValue();
1512   }
1513 
1514   MachineFunction &MF = DAG.getMachineFunction();
1515   MF.getFrameInfo().setFrameAddressIsTaken(true);
1516   Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1517   EVT VT = Op.getValueType();
1518   SDLoc DL(Op);
1519   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1520   unsigned Depth = Op.getConstantOperandVal(0);
1521   int GRLenInBytes = Subtarget.getGRLen() / 8;
1522 
1523   while (Depth--) {
1524     int Offset = -(GRLenInBytes * 2);
1525     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1526                               DAG.getIntPtrConstant(Offset, DL));
1527     FrameAddr =
1528         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1529   }
1530   return FrameAddr;
1531 }
1532 
1533 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1534                                                  SelectionDAG &DAG) const {
1535   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1536     return SDValue();
1537 
1538   // Currently only support lowering return address for current frame.
1539   if (Op.getConstantOperandVal(0) != 0) {
1540     DAG.getContext()->emitError(
1541         "return address can only be determined for the current frame");
1542     return SDValue();
1543   }
1544 
1545   MachineFunction &MF = DAG.getMachineFunction();
1546   MF.getFrameInfo().setReturnAddressIsTaken(true);
1547   MVT GRLenVT = Subtarget.getGRLenVT();
1548 
1549   // Return the value of the return address register, marking it an implicit
1550   // live-in.
1551   Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1552                               getRegClassFor(GRLenVT));
1553   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1554 }
1555 
1556 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1557                                                    SelectionDAG &DAG) const {
1558   MachineFunction &MF = DAG.getMachineFunction();
1559   auto Size = Subtarget.getGRLen() / 8;
1560   auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1561   return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1562 }
1563 
1564 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1565                                               SelectionDAG &DAG) const {
1566   MachineFunction &MF = DAG.getMachineFunction();
1567   auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1568 
1569   SDLoc DL(Op);
1570   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1571                                  getPointerTy(MF.getDataLayout()));
1572 
1573   // vastart just stores the address of the VarArgsFrameIndex slot into the
1574   // memory location argument.
1575   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1576   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1577                       MachinePointerInfo(SV));
1578 }
1579 
1580 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1581                                                  SelectionDAG &DAG) const {
1582   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1583          !Subtarget.hasBasicD() && "unexpected target features");
1584 
1585   SDLoc DL(Op);
1586   SDValue Op0 = Op.getOperand(0);
1587   if (Op0->getOpcode() == ISD::AND) {
1588     auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1589     if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1590       return Op;
1591   }
1592 
1593   if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1594       Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1595       Op0.getConstantOperandVal(2) == UINT64_C(0))
1596     return Op;
1597 
1598   if (Op0.getOpcode() == ISD::AssertZext &&
1599       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1600     return Op;
1601 
1602   EVT OpVT = Op0.getValueType();
1603   EVT RetVT = Op.getValueType();
1604   RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1605   MakeLibCallOptions CallOptions;
1606   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1607   SDValue Chain = SDValue();
1608   SDValue Result;
1609   std::tie(Result, Chain) =
1610       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1611   return Result;
1612 }
1613 
1614 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1615                                                  SelectionDAG &DAG) const {
1616   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1617          !Subtarget.hasBasicD() && "unexpected target features");
1618 
1619   SDLoc DL(Op);
1620   SDValue Op0 = Op.getOperand(0);
1621 
1622   if ((Op0.getOpcode() == ISD::AssertSext ||
1623        Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
1624       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1625     return Op;
1626 
1627   EVT OpVT = Op0.getValueType();
1628   EVT RetVT = Op.getValueType();
1629   RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1630   MakeLibCallOptions CallOptions;
1631   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1632   SDValue Chain = SDValue();
1633   SDValue Result;
1634   std::tie(Result, Chain) =
1635       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1636   return Result;
1637 }
1638 
1639 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1640                                               SelectionDAG &DAG) const {
1641 
1642   SDLoc DL(Op);
1643   SDValue Op0 = Op.getOperand(0);
1644 
1645   if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1646       Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1647     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1648     return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1649   }
1650   return Op;
1651 }
1652 
1653 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1654                                                  SelectionDAG &DAG) const {
1655 
1656   SDLoc DL(Op);
1657 
1658   if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1659       !Subtarget.hasBasicD()) {
1660     SDValue Dst =
1661         DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
1662     return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1663   }
1664 
1665   EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1666   SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
1667   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1668 }
1669 
1670 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
1671                              SelectionDAG &DAG, unsigned Flags) {
1672   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1673 }
1674 
1675 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
1676                              SelectionDAG &DAG, unsigned Flags) {
1677   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1678                                    Flags);
1679 }
1680 
1681 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
1682                              SelectionDAG &DAG, unsigned Flags) {
1683   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1684                                    N->getOffset(), Flags);
1685 }
1686 
1687 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
1688                              SelectionDAG &DAG, unsigned Flags) {
1689   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1690 }
1691 
1692 template <class NodeTy>
1693 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1694                                          CodeModel::Model M,
1695                                          bool IsLocal) const {
1696   SDLoc DL(N);
1697   EVT Ty = getPointerTy(DAG.getDataLayout());
1698   SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1699   SDValue Load;
1700 
1701   switch (M) {
1702   default:
1703     report_fatal_error("Unsupported code model");
1704 
1705   case CodeModel::Large: {
1706     assert(Subtarget.is64Bit() && "Large code model requires LA64");
1707 
1708     // This is not actually used, but is necessary for successfully matching
1709     // the PseudoLA_*_LARGE nodes.
1710     SDValue Tmp = DAG.getConstant(0, DL, Ty);
1711     if (IsLocal) {
1712       // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1713       // eventually becomes the desired 5-insn code sequence.
1714       Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1715                                         Tmp, Addr),
1716                      0);
1717     } else {
1718       // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1719       // eventually becomes the desired 5-insn code sequence.
1720       Load = SDValue(
1721           DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1722           0);
1723     }
1724     break;
1725   }
1726 
1727   case CodeModel::Small:
1728   case CodeModel::Medium:
1729     if (IsLocal) {
1730       // This generates the pattern (PseudoLA_PCREL sym), which expands to
1731       // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1732       Load = SDValue(
1733           DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1734     } else {
1735       // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1736       // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1737       Load =
1738           SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1739     }
1740   }
1741 
1742   if (!IsLocal) {
1743     // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1744     MachineFunction &MF = DAG.getMachineFunction();
1745     MachineMemOperand *MemOp = MF.getMachineMemOperand(
1746         MachinePointerInfo::getGOT(MF),
1747         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
1748             MachineMemOperand::MOInvariant,
1749         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1750     DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1751   }
1752 
1753   return Load;
1754 }
1755 
1756 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1757                                                    SelectionDAG &DAG) const {
1758   return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1759                  DAG.getTarget().getCodeModel());
1760 }
1761 
1762 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1763                                                 SelectionDAG &DAG) const {
1764   return getAddr(cast<JumpTableSDNode>(Op), DAG,
1765                  DAG.getTarget().getCodeModel());
1766 }
1767 
1768 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1769                                                    SelectionDAG &DAG) const {
1770   return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1771                  DAG.getTarget().getCodeModel());
1772 }
1773 
1774 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1775                                                     SelectionDAG &DAG) const {
1776   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1777   assert(N->getOffset() == 0 && "unexpected offset in global node");
1778   auto CM = DAG.getTarget().getCodeModel();
1779   const GlobalValue *GV = N->getGlobal();
1780 
1781   if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1782     if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1783       CM = *GCM;
1784   }
1785 
1786   return getAddr(N, DAG, CM, GV->isDSOLocal());
1787 }
1788 
1789 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1790                                                   SelectionDAG &DAG,
1791                                                   unsigned Opc, bool UseGOT,
1792                                                   bool Large) const {
1793   SDLoc DL(N);
1794   EVT Ty = getPointerTy(DAG.getDataLayout());
1795   MVT GRLenVT = Subtarget.getGRLenVT();
1796 
1797   // This is not actually used, but is necessary for successfully matching the
1798   // PseudoLA_*_LARGE nodes.
1799   SDValue Tmp = DAG.getConstant(0, DL, Ty);
1800   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1801   SDValue Offset = Large
1802                        ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1803                        : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1804   if (UseGOT) {
1805     // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1806     MachineFunction &MF = DAG.getMachineFunction();
1807     MachineMemOperand *MemOp = MF.getMachineMemOperand(
1808         MachinePointerInfo::getGOT(MF),
1809         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
1810             MachineMemOperand::MOInvariant,
1811         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1812     DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1813   }
1814 
1815   // Add the thread pointer.
1816   return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1817                      DAG.getRegister(LoongArch::R2, GRLenVT));
1818 }
1819 
1820 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1821                                                    SelectionDAG &DAG,
1822                                                    unsigned Opc,
1823                                                    bool Large) const {
1824   SDLoc DL(N);
1825   EVT Ty = getPointerTy(DAG.getDataLayout());
1826   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1827 
1828   // This is not actually used, but is necessary for successfully matching the
1829   // PseudoLA_*_LARGE nodes.
1830   SDValue Tmp = DAG.getConstant(0, DL, Ty);
1831 
1832   // Use a PC-relative addressing mode to access the dynamic GOT address.
1833   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1834   SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1835                        : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1836 
1837   // Prepare argument list to generate call.
1838   ArgListTy Args;
1839   ArgListEntry Entry;
1840   Entry.Node = Load;
1841   Entry.Ty = CallTy;
1842   Args.push_back(Entry);
1843 
1844   // Setup call to __tls_get_addr.
1845   TargetLowering::CallLoweringInfo CLI(DAG);
1846   CLI.setDebugLoc(DL)
1847       .setChain(DAG.getEntryNode())
1848       .setLibCallee(CallingConv::C, CallTy,
1849                     DAG.getExternalSymbol("__tls_get_addr", Ty),
1850                     std::move(Args));
1851 
1852   return LowerCallTo(CLI).first;
1853 }
1854 
1855 SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1856                                                 SelectionDAG &DAG, unsigned Opc,
1857                                                 bool Large) const {
1858   SDLoc DL(N);
1859   EVT Ty = getPointerTy(DAG.getDataLayout());
1860   const GlobalValue *GV = N->getGlobal();
1861 
1862   // This is not actually used, but is necessary for successfully matching the
1863   // PseudoLA_*_LARGE nodes.
1864   SDValue Tmp = DAG.getConstant(0, DL, Ty);
1865 
1866   // Use a PC-relative addressing mode to access the global dynamic GOT address.
1867   // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1868   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1869   return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1870                : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1871 }
1872 
1873 SDValue
1874 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1875                                                SelectionDAG &DAG) const {
1876   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
1877       CallingConv::GHC)
1878     report_fatal_error("In GHC calling convention TLS is not supported");
1879 
1880   bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1881   assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1882 
1883   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1884   assert(N->getOffset() == 0 && "unexpected offset in global node");
1885 
1886   if (DAG.getTarget().useEmulatedTLS())
1887     report_fatal_error("the emulated TLS is prohibited",
1888                        /*GenCrashDiag=*/false);
1889 
1890   bool IsDesc = DAG.getTarget().useTLSDESC();
1891 
1892   switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1893   case TLSModel::GeneralDynamic:
1894     // In this model, application code calls the dynamic linker function
1895     // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1896     // runtime.
1897     if (!IsDesc)
1898       return getDynamicTLSAddr(N, DAG,
1899                                Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1900                                      : LoongArch::PseudoLA_TLS_GD,
1901                                Large);
1902     break;
1903   case TLSModel::LocalDynamic:
1904     // Same as GeneralDynamic, except for assembly modifiers and relocation
1905     // records.
1906     if (!IsDesc)
1907       return getDynamicTLSAddr(N, DAG,
1908                                Large ? LoongArch::PseudoLA_TLS_LD_LARGE
1909                                      : LoongArch::PseudoLA_TLS_LD,
1910                                Large);
1911     break;
1912   case TLSModel::InitialExec:
1913     // This model uses the GOT to resolve TLS offsets.
1914     return getStaticTLSAddr(N, DAG,
1915                             Large ? LoongArch::PseudoLA_TLS_IE_LARGE
1916                                   : LoongArch::PseudoLA_TLS_IE,
1917                             /*UseGOT=*/true, Large);
1918   case TLSModel::LocalExec:
1919     // This model is used when static linking as the TLS offsets are resolved
1920     // during program linking.
1921     //
1922     // This node doesn't need an extra argument for the large code model.
1923     return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
1924                             /*UseGOT=*/false);
1925   }
1926 
1927   return getTLSDescAddr(N, DAG,
1928                         Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE
1929                               : LoongArch::PseudoLA_TLS_DESC_PC,
1930                         Large);
1931 }
1932 
1933 template <unsigned N>
1934 static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
1935                                     SelectionDAG &DAG, bool IsSigned = false) {
1936   auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
1937   // Check the ImmArg.
1938   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
1939       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
1940     DAG.getContext()->emitError(Op->getOperationName(0) +
1941                                 ": argument out of range.");
1942     return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
1943   }
1944   return SDValue();
1945 }
1946 
1947 SDValue
1948 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1949                                                  SelectionDAG &DAG) const {
1950   SDLoc DL(Op);
1951   switch (Op.getConstantOperandVal(0)) {
1952   default:
1953     return SDValue(); // Don't custom lower most intrinsics.
1954   case Intrinsic::thread_pointer: {
1955     EVT PtrVT = getPointerTy(DAG.getDataLayout());
1956     return DAG.getRegister(LoongArch::R2, PtrVT);
1957   }
1958   case Intrinsic::loongarch_lsx_vpickve2gr_d:
1959   case Intrinsic::loongarch_lsx_vpickve2gr_du:
1960   case Intrinsic::loongarch_lsx_vreplvei_d:
1961   case Intrinsic::loongarch_lasx_xvrepl128vei_d:
1962     return checkIntrinsicImmArg<1>(Op, 2, DAG);
1963   case Intrinsic::loongarch_lsx_vreplvei_w:
1964   case Intrinsic::loongarch_lasx_xvrepl128vei_w:
1965   case Intrinsic::loongarch_lasx_xvpickve2gr_d:
1966   case Intrinsic::loongarch_lasx_xvpickve2gr_du:
1967   case Intrinsic::loongarch_lasx_xvpickve_d:
1968   case Intrinsic::loongarch_lasx_xvpickve_d_f:
1969     return checkIntrinsicImmArg<2>(Op, 2, DAG);
1970   case Intrinsic::loongarch_lasx_xvinsve0_d:
1971     return checkIntrinsicImmArg<2>(Op, 3, DAG);
1972   case Intrinsic::loongarch_lsx_vsat_b:
1973   case Intrinsic::loongarch_lsx_vsat_bu:
1974   case Intrinsic::loongarch_lsx_vrotri_b:
1975   case Intrinsic::loongarch_lsx_vsllwil_h_b:
1976   case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1977   case Intrinsic::loongarch_lsx_vsrlri_b:
1978   case Intrinsic::loongarch_lsx_vsrari_b:
1979   case Intrinsic::loongarch_lsx_vreplvei_h:
1980   case Intrinsic::loongarch_lasx_xvsat_b:
1981   case Intrinsic::loongarch_lasx_xvsat_bu:
1982   case Intrinsic::loongarch_lasx_xvrotri_b:
1983   case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1984   case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1985   case Intrinsic::loongarch_lasx_xvsrlri_b:
1986   case Intrinsic::loongarch_lasx_xvsrari_b:
1987   case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1988   case Intrinsic::loongarch_lasx_xvpickve_w:
1989   case Intrinsic::loongarch_lasx_xvpickve_w_f:
1990     return checkIntrinsicImmArg<3>(Op, 2, DAG);
1991   case Intrinsic::loongarch_lasx_xvinsve0_w:
1992     return checkIntrinsicImmArg<3>(Op, 3, DAG);
1993   case Intrinsic::loongarch_lsx_vsat_h:
1994   case Intrinsic::loongarch_lsx_vsat_hu:
1995   case Intrinsic::loongarch_lsx_vrotri_h:
1996   case Intrinsic::loongarch_lsx_vsllwil_w_h:
1997   case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1998   case Intrinsic::loongarch_lsx_vsrlri_h:
1999   case Intrinsic::loongarch_lsx_vsrari_h:
2000   case Intrinsic::loongarch_lsx_vreplvei_b:
2001   case Intrinsic::loongarch_lasx_xvsat_h:
2002   case Intrinsic::loongarch_lasx_xvsat_hu:
2003   case Intrinsic::loongarch_lasx_xvrotri_h:
2004   case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2005   case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2006   case Intrinsic::loongarch_lasx_xvsrlri_h:
2007   case Intrinsic::loongarch_lasx_xvsrari_h:
2008   case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2009     return checkIntrinsicImmArg<4>(Op, 2, DAG);
2010   case Intrinsic::loongarch_lsx_vsrlni_b_h:
2011   case Intrinsic::loongarch_lsx_vsrani_b_h:
2012   case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2013   case Intrinsic::loongarch_lsx_vsrarni_b_h:
2014   case Intrinsic::loongarch_lsx_vssrlni_b_h:
2015   case Intrinsic::loongarch_lsx_vssrani_b_h:
2016   case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2017   case Intrinsic::loongarch_lsx_vssrani_bu_h:
2018   case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2019   case Intrinsic::loongarch_lsx_vssrarni_b_h:
2020   case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2021   case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2022   case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2023   case Intrinsic::loongarch_lasx_xvsrani_b_h:
2024   case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2025   case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2026   case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2027   case Intrinsic::loongarch_lasx_xvssrani_b_h:
2028   case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2029   case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2030   case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2031   case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2032   case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2033   case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2034     return checkIntrinsicImmArg<4>(Op, 3, DAG);
2035   case Intrinsic::loongarch_lsx_vsat_w:
2036   case Intrinsic::loongarch_lsx_vsat_wu:
2037   case Intrinsic::loongarch_lsx_vrotri_w:
2038   case Intrinsic::loongarch_lsx_vsllwil_d_w:
2039   case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2040   case Intrinsic::loongarch_lsx_vsrlri_w:
2041   case Intrinsic::loongarch_lsx_vsrari_w:
2042   case Intrinsic::loongarch_lsx_vslei_bu:
2043   case Intrinsic::loongarch_lsx_vslei_hu:
2044   case Intrinsic::loongarch_lsx_vslei_wu:
2045   case Intrinsic::loongarch_lsx_vslei_du:
2046   case Intrinsic::loongarch_lsx_vslti_bu:
2047   case Intrinsic::loongarch_lsx_vslti_hu:
2048   case Intrinsic::loongarch_lsx_vslti_wu:
2049   case Intrinsic::loongarch_lsx_vslti_du:
2050   case Intrinsic::loongarch_lsx_vbsll_v:
2051   case Intrinsic::loongarch_lsx_vbsrl_v:
2052   case Intrinsic::loongarch_lasx_xvsat_w:
2053   case Intrinsic::loongarch_lasx_xvsat_wu:
2054   case Intrinsic::loongarch_lasx_xvrotri_w:
2055   case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2056   case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2057   case Intrinsic::loongarch_lasx_xvsrlri_w:
2058   case Intrinsic::loongarch_lasx_xvsrari_w:
2059   case Intrinsic::loongarch_lasx_xvslei_bu:
2060   case Intrinsic::loongarch_lasx_xvslei_hu:
2061   case Intrinsic::loongarch_lasx_xvslei_wu:
2062   case Intrinsic::loongarch_lasx_xvslei_du:
2063   case Intrinsic::loongarch_lasx_xvslti_bu:
2064   case Intrinsic::loongarch_lasx_xvslti_hu:
2065   case Intrinsic::loongarch_lasx_xvslti_wu:
2066   case Intrinsic::loongarch_lasx_xvslti_du:
2067   case Intrinsic::loongarch_lasx_xvbsll_v:
2068   case Intrinsic::loongarch_lasx_xvbsrl_v:
2069     return checkIntrinsicImmArg<5>(Op, 2, DAG);
2070   case Intrinsic::loongarch_lsx_vseqi_b:
2071   case Intrinsic::loongarch_lsx_vseqi_h:
2072   case Intrinsic::loongarch_lsx_vseqi_w:
2073   case Intrinsic::loongarch_lsx_vseqi_d:
2074   case Intrinsic::loongarch_lsx_vslei_b:
2075   case Intrinsic::loongarch_lsx_vslei_h:
2076   case Intrinsic::loongarch_lsx_vslei_w:
2077   case Intrinsic::loongarch_lsx_vslei_d:
2078   case Intrinsic::loongarch_lsx_vslti_b:
2079   case Intrinsic::loongarch_lsx_vslti_h:
2080   case Intrinsic::loongarch_lsx_vslti_w:
2081   case Intrinsic::loongarch_lsx_vslti_d:
2082   case Intrinsic::loongarch_lasx_xvseqi_b:
2083   case Intrinsic::loongarch_lasx_xvseqi_h:
2084   case Intrinsic::loongarch_lasx_xvseqi_w:
2085   case Intrinsic::loongarch_lasx_xvseqi_d:
2086   case Intrinsic::loongarch_lasx_xvslei_b:
2087   case Intrinsic::loongarch_lasx_xvslei_h:
2088   case Intrinsic::loongarch_lasx_xvslei_w:
2089   case Intrinsic::loongarch_lasx_xvslei_d:
2090   case Intrinsic::loongarch_lasx_xvslti_b:
2091   case Intrinsic::loongarch_lasx_xvslti_h:
2092   case Intrinsic::loongarch_lasx_xvslti_w:
2093   case Intrinsic::loongarch_lasx_xvslti_d:
2094     return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2095   case Intrinsic::loongarch_lsx_vsrlni_h_w:
2096   case Intrinsic::loongarch_lsx_vsrani_h_w:
2097   case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2098   case Intrinsic::loongarch_lsx_vsrarni_h_w:
2099   case Intrinsic::loongarch_lsx_vssrlni_h_w:
2100   case Intrinsic::loongarch_lsx_vssrani_h_w:
2101   case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2102   case Intrinsic::loongarch_lsx_vssrani_hu_w:
2103   case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2104   case Intrinsic::loongarch_lsx_vssrarni_h_w:
2105   case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2106   case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2107   case Intrinsic::loongarch_lsx_vfrstpi_b:
2108   case Intrinsic::loongarch_lsx_vfrstpi_h:
2109   case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2110   case Intrinsic::loongarch_lasx_xvsrani_h_w:
2111   case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2112   case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2113   case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2114   case Intrinsic::loongarch_lasx_xvssrani_h_w:
2115   case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2116   case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2117   case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2118   case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2119   case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2120   case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2121   case Intrinsic::loongarch_lasx_xvfrstpi_b:
2122   case Intrinsic::loongarch_lasx_xvfrstpi_h:
2123     return checkIntrinsicImmArg<5>(Op, 3, DAG);
2124   case Intrinsic::loongarch_lsx_vsat_d:
2125   case Intrinsic::loongarch_lsx_vsat_du:
2126   case Intrinsic::loongarch_lsx_vrotri_d:
2127   case Intrinsic::loongarch_lsx_vsrlri_d:
2128   case Intrinsic::loongarch_lsx_vsrari_d:
2129   case Intrinsic::loongarch_lasx_xvsat_d:
2130   case Intrinsic::loongarch_lasx_xvsat_du:
2131   case Intrinsic::loongarch_lasx_xvrotri_d:
2132   case Intrinsic::loongarch_lasx_xvsrlri_d:
2133   case Intrinsic::loongarch_lasx_xvsrari_d:
2134     return checkIntrinsicImmArg<6>(Op, 2, DAG);
2135   case Intrinsic::loongarch_lsx_vsrlni_w_d:
2136   case Intrinsic::loongarch_lsx_vsrani_w_d:
2137   case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2138   case Intrinsic::loongarch_lsx_vsrarni_w_d:
2139   case Intrinsic::loongarch_lsx_vssrlni_w_d:
2140   case Intrinsic::loongarch_lsx_vssrani_w_d:
2141   case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2142   case Intrinsic::loongarch_lsx_vssrani_wu_d:
2143   case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2144   case Intrinsic::loongarch_lsx_vssrarni_w_d:
2145   case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2146   case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2147   case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2148   case Intrinsic::loongarch_lasx_xvsrani_w_d:
2149   case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2150   case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2151   case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2152   case Intrinsic::loongarch_lasx_xvssrani_w_d:
2153   case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2154   case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2155   case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2156   case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2157   case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2158   case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2159     return checkIntrinsicImmArg<6>(Op, 3, DAG);
2160   case Intrinsic::loongarch_lsx_vsrlni_d_q:
2161   case Intrinsic::loongarch_lsx_vsrani_d_q:
2162   case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2163   case Intrinsic::loongarch_lsx_vsrarni_d_q:
2164   case Intrinsic::loongarch_lsx_vssrlni_d_q:
2165   case Intrinsic::loongarch_lsx_vssrani_d_q:
2166   case Intrinsic::loongarch_lsx_vssrlni_du_q:
2167   case Intrinsic::loongarch_lsx_vssrani_du_q:
2168   case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2169   case Intrinsic::loongarch_lsx_vssrarni_d_q:
2170   case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2171   case Intrinsic::loongarch_lsx_vssrarni_du_q:
2172   case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2173   case Intrinsic::loongarch_lasx_xvsrani_d_q:
2174   case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2175   case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2176   case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2177   case Intrinsic::loongarch_lasx_xvssrani_d_q:
2178   case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2179   case Intrinsic::loongarch_lasx_xvssrani_du_q:
2180   case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2181   case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2182   case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2183   case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2184     return checkIntrinsicImmArg<7>(Op, 3, DAG);
2185   case Intrinsic::loongarch_lsx_vnori_b:
2186   case Intrinsic::loongarch_lsx_vshuf4i_b:
2187   case Intrinsic::loongarch_lsx_vshuf4i_h:
2188   case Intrinsic::loongarch_lsx_vshuf4i_w:
2189   case Intrinsic::loongarch_lasx_xvnori_b:
2190   case Intrinsic::loongarch_lasx_xvshuf4i_b:
2191   case Intrinsic::loongarch_lasx_xvshuf4i_h:
2192   case Intrinsic::loongarch_lasx_xvshuf4i_w:
2193   case Intrinsic::loongarch_lasx_xvpermi_d:
2194     return checkIntrinsicImmArg<8>(Op, 2, DAG);
2195   case Intrinsic::loongarch_lsx_vshuf4i_d:
2196   case Intrinsic::loongarch_lsx_vpermi_w:
2197   case Intrinsic::loongarch_lsx_vbitseli_b:
2198   case Intrinsic::loongarch_lsx_vextrins_b:
2199   case Intrinsic::loongarch_lsx_vextrins_h:
2200   case Intrinsic::loongarch_lsx_vextrins_w:
2201   case Intrinsic::loongarch_lsx_vextrins_d:
2202   case Intrinsic::loongarch_lasx_xvshuf4i_d:
2203   case Intrinsic::loongarch_lasx_xvpermi_w:
2204   case Intrinsic::loongarch_lasx_xvpermi_q:
2205   case Intrinsic::loongarch_lasx_xvbitseli_b:
2206   case Intrinsic::loongarch_lasx_xvextrins_b:
2207   case Intrinsic::loongarch_lasx_xvextrins_h:
2208   case Intrinsic::loongarch_lasx_xvextrins_w:
2209   case Intrinsic::loongarch_lasx_xvextrins_d:
2210     return checkIntrinsicImmArg<8>(Op, 3, DAG);
2211   case Intrinsic::loongarch_lsx_vrepli_b:
2212   case Intrinsic::loongarch_lsx_vrepli_h:
2213   case Intrinsic::loongarch_lsx_vrepli_w:
2214   case Intrinsic::loongarch_lsx_vrepli_d:
2215   case Intrinsic::loongarch_lasx_xvrepli_b:
2216   case Intrinsic::loongarch_lasx_xvrepli_h:
2217   case Intrinsic::loongarch_lasx_xvrepli_w:
2218   case Intrinsic::loongarch_lasx_xvrepli_d:
2219     return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2220   case Intrinsic::loongarch_lsx_vldi:
2221   case Intrinsic::loongarch_lasx_xvldi:
2222     return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2223   }
2224 }
2225 
2226 // Helper function that emits error message for intrinsics with chain and return
2227 // merge values of a UNDEF and the chain.
2228 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
2229                                                   StringRef ErrorMsg,
2230                                                   SelectionDAG &DAG) {
2231   DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2232   return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2233                             SDLoc(Op));
2234 }
2235 
2236 SDValue
2237 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2238                                                 SelectionDAG &DAG) const {
2239   SDLoc DL(Op);
2240   MVT GRLenVT = Subtarget.getGRLenVT();
2241   EVT VT = Op.getValueType();
2242   SDValue Chain = Op.getOperand(0);
2243   const StringRef ErrorMsgOOR = "argument out of range";
2244   const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2245   const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2246 
2247   switch (Op.getConstantOperandVal(1)) {
2248   default:
2249     return Op;
2250   case Intrinsic::loongarch_crc_w_b_w:
2251   case Intrinsic::loongarch_crc_w_h_w:
2252   case Intrinsic::loongarch_crc_w_w_w:
2253   case Intrinsic::loongarch_crc_w_d_w:
2254   case Intrinsic::loongarch_crcc_w_b_w:
2255   case Intrinsic::loongarch_crcc_w_h_w:
2256   case Intrinsic::loongarch_crcc_w_w_w:
2257   case Intrinsic::loongarch_crcc_w_d_w:
2258     return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2259   case Intrinsic::loongarch_csrrd_w:
2260   case Intrinsic::loongarch_csrrd_d: {
2261     unsigned Imm = Op.getConstantOperandVal(2);
2262     return !isUInt<14>(Imm)
2263                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2264                : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2265                              {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2266   }
2267   case Intrinsic::loongarch_csrwr_w:
2268   case Intrinsic::loongarch_csrwr_d: {
2269     unsigned Imm = Op.getConstantOperandVal(3);
2270     return !isUInt<14>(Imm)
2271                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2272                : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2273                              {Chain, Op.getOperand(2),
2274                               DAG.getConstant(Imm, DL, GRLenVT)});
2275   }
2276   case Intrinsic::loongarch_csrxchg_w:
2277   case Intrinsic::loongarch_csrxchg_d: {
2278     unsigned Imm = Op.getConstantOperandVal(4);
2279     return !isUInt<14>(Imm)
2280                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2281                : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2282                              {Chain, Op.getOperand(2), Op.getOperand(3),
2283                               DAG.getConstant(Imm, DL, GRLenVT)});
2284   }
2285   case Intrinsic::loongarch_iocsrrd_d: {
2286     return DAG.getNode(
2287         LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2288         {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2289   }
2290 #define IOCSRRD_CASE(NAME, NODE)                                               \
2291   case Intrinsic::loongarch_##NAME: {                                          \
2292     return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other},          \
2293                        {Chain, Op.getOperand(2)});                             \
2294   }
2295     IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2296     IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2297     IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2298 #undef IOCSRRD_CASE
2299   case Intrinsic::loongarch_cpucfg: {
2300     return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2301                        {Chain, Op.getOperand(2)});
2302   }
2303   case Intrinsic::loongarch_lddir_d: {
2304     unsigned Imm = Op.getConstantOperandVal(3);
2305     return !isUInt<8>(Imm)
2306                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2307                : Op;
2308   }
2309   case Intrinsic::loongarch_movfcsr2gr: {
2310     if (!Subtarget.hasBasicF())
2311       return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2312     unsigned Imm = Op.getConstantOperandVal(2);
2313     return !isUInt<2>(Imm)
2314                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2315                : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2316                              {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2317   }
2318   case Intrinsic::loongarch_lsx_vld:
2319   case Intrinsic::loongarch_lsx_vldrepl_b:
2320   case Intrinsic::loongarch_lasx_xvld:
2321   case Intrinsic::loongarch_lasx_xvldrepl_b:
2322     return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2323                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2324                : SDValue();
2325   case Intrinsic::loongarch_lsx_vldrepl_h:
2326   case Intrinsic::loongarch_lasx_xvldrepl_h:
2327     return !isShiftedInt<11, 1>(
2328                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2329                ? emitIntrinsicWithChainErrorMessage(
2330                      Op, "argument out of range or not a multiple of 2", DAG)
2331                : SDValue();
2332   case Intrinsic::loongarch_lsx_vldrepl_w:
2333   case Intrinsic::loongarch_lasx_xvldrepl_w:
2334     return !isShiftedInt<10, 2>(
2335                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2336                ? emitIntrinsicWithChainErrorMessage(
2337                      Op, "argument out of range or not a multiple of 4", DAG)
2338                : SDValue();
2339   case Intrinsic::loongarch_lsx_vldrepl_d:
2340   case Intrinsic::loongarch_lasx_xvldrepl_d:
2341     return !isShiftedInt<9, 3>(
2342                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2343                ? emitIntrinsicWithChainErrorMessage(
2344                      Op, "argument out of range or not a multiple of 8", DAG)
2345                : SDValue();
2346   }
2347 }
2348 
2349 // Helper function that emits error message for intrinsics with void return
2350 // value and return the chain.
2351 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
2352                                          SelectionDAG &DAG) {
2353 
2354   DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2355   return Op.getOperand(0);
2356 }
2357 
2358 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2359                                                      SelectionDAG &DAG) const {
2360   SDLoc DL(Op);
2361   MVT GRLenVT = Subtarget.getGRLenVT();
2362   SDValue Chain = Op.getOperand(0);
2363   uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2364   SDValue Op2 = Op.getOperand(2);
2365   const StringRef ErrorMsgOOR = "argument out of range";
2366   const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2367   const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2368   const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2369 
2370   switch (IntrinsicEnum) {
2371   default:
2372     // TODO: Add more Intrinsics.
2373     return SDValue();
2374   case Intrinsic::loongarch_cacop_d:
2375   case Intrinsic::loongarch_cacop_w: {
2376     if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2377       return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2378     if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2379       return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2380     // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2381     unsigned Imm1 = Op2->getAsZExtVal();
2382     int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2383     if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2384       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2385     return Op;
2386   }
2387   case Intrinsic::loongarch_dbar: {
2388     unsigned Imm = Op2->getAsZExtVal();
2389     return !isUInt<15>(Imm)
2390                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2391                : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2392                              DAG.getConstant(Imm, DL, GRLenVT));
2393   }
2394   case Intrinsic::loongarch_ibar: {
2395     unsigned Imm = Op2->getAsZExtVal();
2396     return !isUInt<15>(Imm)
2397                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2398                : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2399                              DAG.getConstant(Imm, DL, GRLenVT));
2400   }
2401   case Intrinsic::loongarch_break: {
2402     unsigned Imm = Op2->getAsZExtVal();
2403     return !isUInt<15>(Imm)
2404                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2405                : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2406                              DAG.getConstant(Imm, DL, GRLenVT));
2407   }
2408   case Intrinsic::loongarch_movgr2fcsr: {
2409     if (!Subtarget.hasBasicF())
2410       return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2411     unsigned Imm = Op2->getAsZExtVal();
2412     return !isUInt<2>(Imm)
2413                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2414                : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2415                              DAG.getConstant(Imm, DL, GRLenVT),
2416                              DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2417                                          Op.getOperand(3)));
2418   }
2419   case Intrinsic::loongarch_syscall: {
2420     unsigned Imm = Op2->getAsZExtVal();
2421     return !isUInt<15>(Imm)
2422                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2423                : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2424                              DAG.getConstant(Imm, DL, GRLenVT));
2425   }
2426 #define IOCSRWR_CASE(NAME, NODE)                                               \
2427   case Intrinsic::loongarch_##NAME: {                                          \
2428     SDValue Op3 = Op.getOperand(3);                                            \
2429     return Subtarget.is64Bit()                                                 \
2430                ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain,        \
2431                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),  \
2432                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3))  \
2433                : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2,   \
2434                              Op3);                                             \
2435   }
2436     IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2437     IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2438     IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2439 #undef IOCSRWR_CASE
2440   case Intrinsic::loongarch_iocsrwr_d: {
2441     return !Subtarget.is64Bit()
2442                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2443                : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2444                              Op2,
2445                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2446                                          Op.getOperand(3)));
2447   }
2448 #define ASRT_LE_GT_CASE(NAME)                                                  \
2449   case Intrinsic::loongarch_##NAME: {                                          \
2450     return !Subtarget.is64Bit()                                                \
2451                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)           \
2452                : Op;                                                           \
2453   }
2454     ASRT_LE_GT_CASE(asrtle_d)
2455     ASRT_LE_GT_CASE(asrtgt_d)
2456 #undef ASRT_LE_GT_CASE
2457   case Intrinsic::loongarch_ldpte_d: {
2458     unsigned Imm = Op.getConstantOperandVal(3);
2459     return !Subtarget.is64Bit()
2460                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2461            : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2462                              : Op;
2463   }
2464   case Intrinsic::loongarch_lsx_vst:
2465   case Intrinsic::loongarch_lasx_xvst:
2466     return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2467                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2468                : SDValue();
2469   case Intrinsic::loongarch_lasx_xvstelm_b:
2470     return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2471             !isUInt<5>(Op.getConstantOperandVal(5)))
2472                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2473                : SDValue();
2474   case Intrinsic::loongarch_lsx_vstelm_b:
2475     return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2476             !isUInt<4>(Op.getConstantOperandVal(5)))
2477                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2478                : SDValue();
2479   case Intrinsic::loongarch_lasx_xvstelm_h:
2480     return (!isShiftedInt<8, 1>(
2481                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2482             !isUInt<4>(Op.getConstantOperandVal(5)))
2483                ? emitIntrinsicErrorMessage(
2484                      Op, "argument out of range or not a multiple of 2", DAG)
2485                : SDValue();
2486   case Intrinsic::loongarch_lsx_vstelm_h:
2487     return (!isShiftedInt<8, 1>(
2488                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2489             !isUInt<3>(Op.getConstantOperandVal(5)))
2490                ? emitIntrinsicErrorMessage(
2491                      Op, "argument out of range or not a multiple of 2", DAG)
2492                : SDValue();
2493   case Intrinsic::loongarch_lasx_xvstelm_w:
2494     return (!isShiftedInt<8, 2>(
2495                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2496             !isUInt<3>(Op.getConstantOperandVal(5)))
2497                ? emitIntrinsicErrorMessage(
2498                      Op, "argument out of range or not a multiple of 4", DAG)
2499                : SDValue();
2500   case Intrinsic::loongarch_lsx_vstelm_w:
2501     return (!isShiftedInt<8, 2>(
2502                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2503             !isUInt<2>(Op.getConstantOperandVal(5)))
2504                ? emitIntrinsicErrorMessage(
2505                      Op, "argument out of range or not a multiple of 4", DAG)
2506                : SDValue();
2507   case Intrinsic::loongarch_lasx_xvstelm_d:
2508     return (!isShiftedInt<8, 3>(
2509                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2510             !isUInt<2>(Op.getConstantOperandVal(5)))
2511                ? emitIntrinsicErrorMessage(
2512                      Op, "argument out of range or not a multiple of 8", DAG)
2513                : SDValue();
2514   case Intrinsic::loongarch_lsx_vstelm_d:
2515     return (!isShiftedInt<8, 3>(
2516                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2517             !isUInt<1>(Op.getConstantOperandVal(5)))
2518                ? emitIntrinsicErrorMessage(
2519                      Op, "argument out of range or not a multiple of 8", DAG)
2520                : SDValue();
2521   }
2522 }
2523 
2524 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2525                                                      SelectionDAG &DAG) const {
2526   SDLoc DL(Op);
2527   SDValue Lo = Op.getOperand(0);
2528   SDValue Hi = Op.getOperand(1);
2529   SDValue Shamt = Op.getOperand(2);
2530   EVT VT = Lo.getValueType();
2531 
2532   // if Shamt-GRLen < 0: // Shamt < GRLen
2533   //   Lo = Lo << Shamt
2534   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2535   // else:
2536   //   Lo = 0
2537   //   Hi = Lo << (Shamt-GRLen)
2538 
2539   SDValue Zero = DAG.getConstant(0, DL, VT);
2540   SDValue One = DAG.getConstant(1, DL, VT);
2541   SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
2542   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2543   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2544   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2545 
2546   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2547   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2548   SDValue ShiftRightLo =
2549       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2550   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2551   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2552   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2553 
2554   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2555 
2556   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2557   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2558 
2559   SDValue Parts[2] = {Lo, Hi};
2560   return DAG.getMergeValues(Parts, DL);
2561 }
2562 
2563 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2564                                                       SelectionDAG &DAG,
2565                                                       bool IsSRA) const {
2566   SDLoc DL(Op);
2567   SDValue Lo = Op.getOperand(0);
2568   SDValue Hi = Op.getOperand(1);
2569   SDValue Shamt = Op.getOperand(2);
2570   EVT VT = Lo.getValueType();
2571 
2572   // SRA expansion:
2573   //   if Shamt-GRLen < 0: // Shamt < GRLen
2574   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2575   //     Hi = Hi >>s Shamt
2576   //   else:
2577   //     Lo = Hi >>s (Shamt-GRLen);
2578   //     Hi = Hi >>s (GRLen-1)
2579   //
2580   // SRL expansion:
2581   //   if Shamt-GRLen < 0: // Shamt < GRLen
2582   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2583   //     Hi = Hi >>u Shamt
2584   //   else:
2585   //     Lo = Hi >>u (Shamt-GRLen);
2586   //     Hi = 0;
2587 
2588   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2589 
2590   SDValue Zero = DAG.getConstant(0, DL, VT);
2591   SDValue One = DAG.getConstant(1, DL, VT);
2592   SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
2593   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2594   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2595   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2596 
2597   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2598   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2599   SDValue ShiftLeftHi =
2600       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2601   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2602   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2603   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2604   SDValue HiFalse =
2605       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2606 
2607   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2608 
2609   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2610   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2611 
2612   SDValue Parts[2] = {Lo, Hi};
2613   return DAG.getMergeValues(Parts, DL);
2614 }
2615 
2616 // Returns the opcode of the target-specific SDNode that implements the 32-bit
2617 // form of the given Opcode.
2618 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
2619   switch (Opcode) {
2620   default:
2621     llvm_unreachable("Unexpected opcode");
2622   case ISD::UDIV:
2623     return LoongArchISD::DIV_WU;
2624   case ISD::UREM:
2625     return LoongArchISD::MOD_WU;
2626   case ISD::SHL:
2627     return LoongArchISD::SLL_W;
2628   case ISD::SRA:
2629     return LoongArchISD::SRA_W;
2630   case ISD::SRL:
2631     return LoongArchISD::SRL_W;
2632   case ISD::ROTL:
2633   case ISD::ROTR:
2634     return LoongArchISD::ROTR_W;
2635   case ISD::CTTZ:
2636     return LoongArchISD::CTZ_W;
2637   case ISD::CTLZ:
2638     return LoongArchISD::CLZ_W;
2639   }
2640 }
2641 
2642 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2643 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2644 // otherwise be promoted to i64, making it difficult to select the
2645 // SLL_W/.../*W later one because the fact the operation was originally of
2646 // type i8/i16/i32 is lost.
2647 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
2648                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
2649   SDLoc DL(N);
2650   LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2651   SDValue NewOp0, NewRes;
2652 
2653   switch (NumOp) {
2654   default:
2655     llvm_unreachable("Unexpected NumOp");
2656   case 1: {
2657     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2658     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2659     break;
2660   }
2661   case 2: {
2662     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2663     SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2664     if (N->getOpcode() == ISD::ROTL) {
2665       SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2666       NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2667     }
2668     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2669     break;
2670   }
2671     // TODO:Handle more NumOp.
2672   }
2673 
2674   // ReplaceNodeResults requires we maintain the same type for the return
2675   // value.
2676   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2677 }
2678 
2679 // Converts the given 32-bit operation to a i64 operation with signed extension
2680 // semantic to reduce the signed extension instructions.
2681 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
2682   SDLoc DL(N);
2683   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2684   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2685   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2686   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2687                                DAG.getValueType(MVT::i32));
2688   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2689 }
2690 
2691 // Helper function that emits error message for intrinsics with/without chain
2692 // and return a UNDEF or and the chain as the results.
2693 static void emitErrorAndReplaceIntrinsicResults(
2694     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
2695     StringRef ErrorMsg, bool WithChain = true) {
2696   DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2697   Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2698   if (!WithChain)
2699     return;
2700   Results.push_back(N->getOperand(0));
2701 }
2702 
2703 template <unsigned N>
2704 static void
2705 replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
2706                          SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2707                          unsigned ResOp) {
2708   const StringRef ErrorMsgOOR = "argument out of range";
2709   unsigned Imm = Node->getConstantOperandVal(2);
2710   if (!isUInt<N>(Imm)) {
2711     emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,
2712                                         /*WithChain=*/false);
2713     return;
2714   }
2715   SDLoc DL(Node);
2716   SDValue Vec = Node->getOperand(1);
2717 
2718   SDValue PickElt =
2719       DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2720                   DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2721                   DAG.getValueType(Vec.getValueType().getVectorElementType()));
2722   Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2723                                 PickElt.getValue(0)));
2724 }
2725 
2726 static void replaceVecCondBranchResults(SDNode *N,
2727                                         SmallVectorImpl<SDValue> &Results,
2728                                         SelectionDAG &DAG,
2729                                         const LoongArchSubtarget &Subtarget,
2730                                         unsigned ResOp) {
2731   SDLoc DL(N);
2732   SDValue Vec = N->getOperand(1);
2733 
2734   SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2735   Results.push_back(
2736       DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2737 }
2738 
2739 static void
2740 replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
2741                                  SelectionDAG &DAG,
2742                                  const LoongArchSubtarget &Subtarget) {
2743   switch (N->getConstantOperandVal(0)) {
2744   default:
2745     llvm_unreachable("Unexpected Intrinsic.");
2746   case Intrinsic::loongarch_lsx_vpickve2gr_b:
2747     replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2748                                 LoongArchISD::VPICK_SEXT_ELT);
2749     break;
2750   case Intrinsic::loongarch_lsx_vpickve2gr_h:
2751   case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2752     replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2753                                 LoongArchISD::VPICK_SEXT_ELT);
2754     break;
2755   case Intrinsic::loongarch_lsx_vpickve2gr_w:
2756     replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2757                                 LoongArchISD::VPICK_SEXT_ELT);
2758     break;
2759   case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2760     replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2761                                 LoongArchISD::VPICK_ZEXT_ELT);
2762     break;
2763   case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2764   case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2765     replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2766                                 LoongArchISD::VPICK_ZEXT_ELT);
2767     break;
2768   case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2769     replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2770                                 LoongArchISD::VPICK_ZEXT_ELT);
2771     break;
2772   case Intrinsic::loongarch_lsx_bz_b:
2773   case Intrinsic::loongarch_lsx_bz_h:
2774   case Intrinsic::loongarch_lsx_bz_w:
2775   case Intrinsic::loongarch_lsx_bz_d:
2776   case Intrinsic::loongarch_lasx_xbz_b:
2777   case Intrinsic::loongarch_lasx_xbz_h:
2778   case Intrinsic::loongarch_lasx_xbz_w:
2779   case Intrinsic::loongarch_lasx_xbz_d:
2780     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2781                                 LoongArchISD::VALL_ZERO);
2782     break;
2783   case Intrinsic::loongarch_lsx_bz_v:
2784   case Intrinsic::loongarch_lasx_xbz_v:
2785     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2786                                 LoongArchISD::VANY_ZERO);
2787     break;
2788   case Intrinsic::loongarch_lsx_bnz_b:
2789   case Intrinsic::loongarch_lsx_bnz_h:
2790   case Intrinsic::loongarch_lsx_bnz_w:
2791   case Intrinsic::loongarch_lsx_bnz_d:
2792   case Intrinsic::loongarch_lasx_xbnz_b:
2793   case Intrinsic::loongarch_lasx_xbnz_h:
2794   case Intrinsic::loongarch_lasx_xbnz_w:
2795   case Intrinsic::loongarch_lasx_xbnz_d:
2796     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2797                                 LoongArchISD::VALL_NONZERO);
2798     break;
2799   case Intrinsic::loongarch_lsx_bnz_v:
2800   case Intrinsic::loongarch_lasx_xbnz_v:
2801     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2802                                 LoongArchISD::VANY_NONZERO);
2803     break;
2804   }
2805 }
2806 
2807 void LoongArchTargetLowering::ReplaceNodeResults(
2808     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
2809   SDLoc DL(N);
2810   EVT VT = N->getValueType(0);
2811   switch (N->getOpcode()) {
2812   default:
2813     llvm_unreachable("Don't know how to legalize this operation");
2814   case ISD::ADD:
2815   case ISD::SUB:
2816     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2817            "Unexpected custom legalisation");
2818     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2819     break;
2820   case ISD::UDIV:
2821   case ISD::UREM:
2822     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2823            "Unexpected custom legalisation");
2824     Results.push_back(customLegalizeToWOp(N, DAG, 2, ISD::SIGN_EXTEND));
2825     break;
2826   case ISD::SHL:
2827   case ISD::SRA:
2828   case ISD::SRL:
2829     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2830            "Unexpected custom legalisation");
2831     if (N->getOperand(1).getOpcode() != ISD::Constant) {
2832       Results.push_back(customLegalizeToWOp(N, DAG, 2));
2833       break;
2834     }
2835     break;
2836   case ISD::ROTL:
2837   case ISD::ROTR:
2838     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2839            "Unexpected custom legalisation");
2840     Results.push_back(customLegalizeToWOp(N, DAG, 2));
2841     break;
2842   case ISD::FP_TO_SINT: {
2843     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2844            "Unexpected custom legalisation");
2845     SDValue Src = N->getOperand(0);
2846     EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2847     if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2848         TargetLowering::TypeSoftenFloat) {
2849       SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
2850       Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
2851       return;
2852     }
2853     // If the FP type needs to be softened, emit a library call using the 'si'
2854     // version. If we left it to default legalization we'd end up with 'di'.
2855     RTLIB::Libcall LC;
2856     LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
2857     MakeLibCallOptions CallOptions;
2858     EVT OpVT = Src.getValueType();
2859     CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
2860     SDValue Chain = SDValue();
2861     SDValue Result;
2862     std::tie(Result, Chain) =
2863         makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
2864     Results.push_back(Result);
2865     break;
2866   }
2867   case ISD::BITCAST: {
2868     SDValue Src = N->getOperand(0);
2869     EVT SrcVT = Src.getValueType();
2870     if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2871         Subtarget.hasBasicF()) {
2872       SDValue Dst =
2873           DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
2874       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
2875     }
2876     break;
2877   }
2878   case ISD::FP_TO_UINT: {
2879     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2880            "Unexpected custom legalisation");
2881     auto &TLI = DAG.getTargetLoweringInfo();
2882     SDValue Tmp1, Tmp2;
2883     TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
2884     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
2885     break;
2886   }
2887   case ISD::BSWAP: {
2888     SDValue Src = N->getOperand(0);
2889     assert((VT == MVT::i16 || VT == MVT::i32) &&
2890            "Unexpected custom legalization");
2891     MVT GRLenVT = Subtarget.getGRLenVT();
2892     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2893     SDValue Tmp;
2894     switch (VT.getSizeInBits()) {
2895     default:
2896       llvm_unreachable("Unexpected operand width");
2897     case 16:
2898       Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
2899       break;
2900     case 32:
2901       // Only LA64 will get to here due to the size mismatch between VT and
2902       // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2903       Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
2904       break;
2905     }
2906     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2907     break;
2908   }
2909   case ISD::BITREVERSE: {
2910     SDValue Src = N->getOperand(0);
2911     assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
2912            "Unexpected custom legalization");
2913     MVT GRLenVT = Subtarget.getGRLenVT();
2914     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2915     SDValue Tmp;
2916     switch (VT.getSizeInBits()) {
2917     default:
2918       llvm_unreachable("Unexpected operand width");
2919     case 8:
2920       Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
2921       break;
2922     case 32:
2923       Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
2924       break;
2925     }
2926     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2927     break;
2928   }
2929   case ISD::CTLZ:
2930   case ISD::CTTZ: {
2931     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2932            "Unexpected custom legalisation");
2933     Results.push_back(customLegalizeToWOp(N, DAG, 1));
2934     break;
2935   }
2936   case ISD::INTRINSIC_W_CHAIN: {
2937     SDValue Chain = N->getOperand(0);
2938     SDValue Op2 = N->getOperand(2);
2939     MVT GRLenVT = Subtarget.getGRLenVT();
2940     const StringRef ErrorMsgOOR = "argument out of range";
2941     const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2942     const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2943 
2944     switch (N->getConstantOperandVal(1)) {
2945     default:
2946       llvm_unreachable("Unexpected Intrinsic.");
2947     case Intrinsic::loongarch_movfcsr2gr: {
2948       if (!Subtarget.hasBasicF()) {
2949         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
2950         return;
2951       }
2952       unsigned Imm = Op2->getAsZExtVal();
2953       if (!isUInt<2>(Imm)) {
2954         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2955         return;
2956       }
2957       SDValue MOVFCSR2GRResults = DAG.getNode(
2958           LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
2959           {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2960       Results.push_back(
2961           DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
2962       Results.push_back(MOVFCSR2GRResults.getValue(1));
2963       break;
2964     }
2965 #define CRC_CASE_EXT_BINARYOP(NAME, NODE)                                      \
2966   case Intrinsic::loongarch_##NAME: {                                          \
2967     SDValue NODE = DAG.getNode(                                                \
2968         LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
2969         {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),               \
2970          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
2971     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
2972     Results.push_back(NODE.getValue(1));                                       \
2973     break;                                                                     \
2974   }
2975       CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
2976       CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
2977       CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
2978       CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
2979       CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
2980       CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
2981 #undef CRC_CASE_EXT_BINARYOP
2982 
2983 #define CRC_CASE_EXT_UNARYOP(NAME, NODE)                                       \
2984   case Intrinsic::loongarch_##NAME: {                                          \
2985     SDValue NODE = DAG.getNode(                                                \
2986         LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
2987         {Chain, Op2,                                                           \
2988          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
2989     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
2990     Results.push_back(NODE.getValue(1));                                       \
2991     break;                                                                     \
2992   }
2993       CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
2994       CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
2995 #undef CRC_CASE_EXT_UNARYOP
2996 #define CSR_CASE(ID)                                                           \
2997   case Intrinsic::loongarch_##ID: {                                            \
2998     if (!Subtarget.is64Bit())                                                  \
2999       emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);   \
3000     break;                                                                     \
3001   }
3002       CSR_CASE(csrrd_d);
3003       CSR_CASE(csrwr_d);
3004       CSR_CASE(csrxchg_d);
3005       CSR_CASE(iocsrrd_d);
3006 #undef CSR_CASE
3007     case Intrinsic::loongarch_csrrd_w: {
3008       unsigned Imm = Op2->getAsZExtVal();
3009       if (!isUInt<14>(Imm)) {
3010         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3011         return;
3012       }
3013       SDValue CSRRDResults =
3014           DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3015                       {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3016       Results.push_back(
3017           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3018       Results.push_back(CSRRDResults.getValue(1));
3019       break;
3020     }
3021     case Intrinsic::loongarch_csrwr_w: {
3022       unsigned Imm = N->getConstantOperandVal(3);
3023       if (!isUInt<14>(Imm)) {
3024         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3025         return;
3026       }
3027       SDValue CSRWRResults =
3028           DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3029                       {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3030                        DAG.getConstant(Imm, DL, GRLenVT)});
3031       Results.push_back(
3032           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3033       Results.push_back(CSRWRResults.getValue(1));
3034       break;
3035     }
3036     case Intrinsic::loongarch_csrxchg_w: {
3037       unsigned Imm = N->getConstantOperandVal(4);
3038       if (!isUInt<14>(Imm)) {
3039         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3040         return;
3041       }
3042       SDValue CSRXCHGResults = DAG.getNode(
3043           LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3044           {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3045            DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3046            DAG.getConstant(Imm, DL, GRLenVT)});
3047       Results.push_back(
3048           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3049       Results.push_back(CSRXCHGResults.getValue(1));
3050       break;
3051     }
3052 #define IOCSRRD_CASE(NAME, NODE)                                               \
3053   case Intrinsic::loongarch_##NAME: {                                          \
3054     SDValue IOCSRRDResults =                                                   \
3055         DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},            \
3056                     {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3057     Results.push_back(                                                         \
3058         DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0)));       \
3059     Results.push_back(IOCSRRDResults.getValue(1));                             \
3060     break;                                                                     \
3061   }
3062       IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3063       IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3064       IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3065 #undef IOCSRRD_CASE
3066     case Intrinsic::loongarch_cpucfg: {
3067       SDValue CPUCFGResults =
3068           DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3069                       {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3070       Results.push_back(
3071           DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3072       Results.push_back(CPUCFGResults.getValue(1));
3073       break;
3074     }
3075     case Intrinsic::loongarch_lddir_d: {
3076       if (!Subtarget.is64Bit()) {
3077         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3078         return;
3079       }
3080       break;
3081     }
3082     }
3083     break;
3084   }
3085   case ISD::READ_REGISTER: {
3086     if (Subtarget.is64Bit())
3087       DAG.getContext()->emitError(
3088           "On LA64, only 64-bit registers can be read.");
3089     else
3090       DAG.getContext()->emitError(
3091           "On LA32, only 32-bit registers can be read.");
3092     Results.push_back(DAG.getUNDEF(VT));
3093     Results.push_back(N->getOperand(0));
3094     break;
3095   }
3096   case ISD::INTRINSIC_WO_CHAIN: {
3097     replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3098     break;
3099   }
3100   }
3101 }
3102 
3103 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
3104                                  TargetLowering::DAGCombinerInfo &DCI,
3105                                  const LoongArchSubtarget &Subtarget) {
3106   if (DCI.isBeforeLegalizeOps())
3107     return SDValue();
3108 
3109   SDValue FirstOperand = N->getOperand(0);
3110   SDValue SecondOperand = N->getOperand(1);
3111   unsigned FirstOperandOpc = FirstOperand.getOpcode();
3112   EVT ValTy = N->getValueType(0);
3113   SDLoc DL(N);
3114   uint64_t lsb, msb;
3115   unsigned SMIdx, SMLen;
3116   ConstantSDNode *CN;
3117   SDValue NewOperand;
3118   MVT GRLenVT = Subtarget.getGRLenVT();
3119 
3120   // Op's second operand must be a shifted mask.
3121   if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3122       !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3123     return SDValue();
3124 
3125   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3126     // Pattern match BSTRPICK.
3127     //  $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3128     //  => BSTRPICK $dst, $src, msb, lsb
3129     //  where msb = lsb + len - 1
3130 
3131     // The second operand of the shift must be an immediate.
3132     if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3133       return SDValue();
3134 
3135     lsb = CN->getZExtValue();
3136 
3137     // Return if the shifted mask does not start at bit 0 or the sum of its
3138     // length and lsb exceeds the word's size.
3139     if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3140       return SDValue();
3141 
3142     NewOperand = FirstOperand.getOperand(0);
3143   } else {
3144     // Pattern match BSTRPICK.
3145     //  $dst = and $src, (2**len- 1) , if len > 12
3146     //  => BSTRPICK $dst, $src, msb, lsb
3147     //  where lsb = 0 and msb = len - 1
3148 
3149     // If the mask is <= 0xfff, andi can be used instead.
3150     if (CN->getZExtValue() <= 0xfff)
3151       return SDValue();
3152 
3153     // Return if the MSB exceeds.
3154     if (SMIdx + SMLen > ValTy.getSizeInBits())
3155       return SDValue();
3156 
3157     if (SMIdx > 0) {
3158       // Omit if the constant has more than 2 uses. This a conservative
3159       // decision. Whether it is a win depends on the HW microarchitecture.
3160       // However it should always be better for 1 and 2 uses.
3161       if (CN->use_size() > 2)
3162         return SDValue();
3163       // Return if the constant can be composed by a single LU12I.W.
3164       if ((CN->getZExtValue() & 0xfff) == 0)
3165         return SDValue();
3166       // Return if the constand can be composed by a single ADDI with
3167       // the zero register.
3168       if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3169         return SDValue();
3170     }
3171 
3172     lsb = SMIdx;
3173     NewOperand = FirstOperand;
3174   }
3175 
3176   msb = lsb + SMLen - 1;
3177   SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3178                             DAG.getConstant(msb, DL, GRLenVT),
3179                             DAG.getConstant(lsb, DL, GRLenVT));
3180   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3181     return NR0;
3182   // Try to optimize to
3183   //   bstrpick $Rd, $Rs, msb, lsb
3184   //   slli     $Rd, $Rd, lsb
3185   return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3186                      DAG.getConstant(lsb, DL, GRLenVT));
3187 }
3188 
3189 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
3190                                  TargetLowering::DAGCombinerInfo &DCI,
3191                                  const LoongArchSubtarget &Subtarget) {
3192   if (DCI.isBeforeLegalizeOps())
3193     return SDValue();
3194 
3195   // $dst = srl (and $src, Mask), Shamt
3196   // =>
3197   // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3198   // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3199   //
3200 
3201   SDValue FirstOperand = N->getOperand(0);
3202   ConstantSDNode *CN;
3203   EVT ValTy = N->getValueType(0);
3204   SDLoc DL(N);
3205   MVT GRLenVT = Subtarget.getGRLenVT();
3206   unsigned MaskIdx, MaskLen;
3207   uint64_t Shamt;
3208 
3209   // The first operand must be an AND and the second operand of the AND must be
3210   // a shifted mask.
3211   if (FirstOperand.getOpcode() != ISD::AND ||
3212       !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3213       !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3214     return SDValue();
3215 
3216   // The second operand (shift amount) must be an immediate.
3217   if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3218     return SDValue();
3219 
3220   Shamt = CN->getZExtValue();
3221   if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3222     return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3223                        FirstOperand->getOperand(0),
3224                        DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3225                        DAG.getConstant(Shamt, DL, GRLenVT));
3226 
3227   return SDValue();
3228 }
3229 
3230 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
3231                                 TargetLowering::DAGCombinerInfo &DCI,
3232                                 const LoongArchSubtarget &Subtarget) {
3233   MVT GRLenVT = Subtarget.getGRLenVT();
3234   EVT ValTy = N->getValueType(0);
3235   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3236   ConstantSDNode *CN0, *CN1;
3237   SDLoc DL(N);
3238   unsigned ValBits = ValTy.getSizeInBits();
3239   unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3240   unsigned Shamt;
3241   bool SwapAndRetried = false;
3242 
3243   if (DCI.isBeforeLegalizeOps())
3244     return SDValue();
3245 
3246   if (ValBits != 32 && ValBits != 64)
3247     return SDValue();
3248 
3249 Retry:
3250   // 1st pattern to match BSTRINS:
3251   //  R = or (and X, mask0), (and (shl Y, lsb), mask1)
3252   //  where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3253   //  =>
3254   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3255   if (N0.getOpcode() == ISD::AND &&
3256       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3257       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3258       N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3259       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3260       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3261       MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3262       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3263       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3264       (MaskIdx0 + MaskLen0 <= ValBits)) {
3265     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3266     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3267                        N1.getOperand(0).getOperand(0),
3268                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3269                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
3270   }
3271 
3272   // 2nd pattern to match BSTRINS:
3273   //  R = or (and X, mask0), (shl (and Y, mask1), lsb)
3274   //  where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3275   //  =>
3276   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3277   if (N0.getOpcode() == ISD::AND &&
3278       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3279       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3280       N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3281       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3282       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3283       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3284       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3285       MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3286       (MaskIdx0 + MaskLen0 <= ValBits)) {
3287     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3288     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3289                        N1.getOperand(0).getOperand(0),
3290                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3291                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
3292   }
3293 
3294   // 3rd pattern to match BSTRINS:
3295   //  R = or (and X, mask0), (and Y, mask1)
3296   //  where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3297   //  =>
3298   //  R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3299   //  where msb = lsb + size - 1
3300   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3301       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3302       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3303       (MaskIdx0 + MaskLen0 <= 64) &&
3304       (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3305       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3306     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3307     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3308                        DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3309                                    DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3310                        DAG.getConstant(ValBits == 32
3311                                            ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3312                                            : (MaskIdx0 + MaskLen0 - 1),
3313                                        DL, GRLenVT),
3314                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
3315   }
3316 
3317   // 4th pattern to match BSTRINS:
3318   //  R = or (and X, mask), (shl Y, shamt)
3319   //  where mask = (2**shamt - 1)
3320   //  =>
3321   //  R = BSTRINS X, Y, ValBits - 1, shamt
3322   //  where ValBits = 32 or 64
3323   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3324       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3325       isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3326       MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3327       (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3328       (MaskIdx0 + MaskLen0 <= ValBits)) {
3329     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3330     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3331                        N1.getOperand(0),
3332                        DAG.getConstant((ValBits - 1), DL, GRLenVT),
3333                        DAG.getConstant(Shamt, DL, GRLenVT));
3334   }
3335 
3336   // 5th pattern to match BSTRINS:
3337   //  R = or (and X, mask), const
3338   //  where ~mask = (2**size - 1) << lsb, mask & const = 0
3339   //  =>
3340   //  R = BSTRINS X, (const >> lsb), msb, lsb
3341   //  where msb = lsb + size - 1
3342   if (N0.getOpcode() == ISD::AND &&
3343       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3344       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3345       (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3346       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3347     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3348     return DAG.getNode(
3349         LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3350         DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3351         DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3352                                       : (MaskIdx0 + MaskLen0 - 1),
3353                         DL, GRLenVT),
3354         DAG.getConstant(MaskIdx0, DL, GRLenVT));
3355   }
3356 
3357   // 6th pattern.
3358   // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3359   // by the incoming bits are known to be zero.
3360   // =>
3361   // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3362   //
3363   // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3364   // pattern is more common than the 1st. So we put the 1st before the 6th in
3365   // order to match as many nodes as possible.
3366   ConstantSDNode *CNMask, *CNShamt;
3367   unsigned MaskIdx, MaskLen;
3368   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3369       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3370       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3371       MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3372       CNShamt->getZExtValue() + MaskLen <= ValBits) {
3373     Shamt = CNShamt->getZExtValue();
3374     APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3375     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3376       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3377       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3378                          N1.getOperand(0).getOperand(0),
3379                          DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3380                          DAG.getConstant(Shamt, DL, GRLenVT));
3381     }
3382   }
3383 
3384   // 7th pattern.
3385   // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3386   // overwritten by the incoming bits are known to be zero.
3387   // =>
3388   // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3389   //
3390   // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3391   // before the 7th in order to match as many nodes as possible.
3392   if (N1.getOpcode() == ISD::AND &&
3393       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3394       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3395       N1.getOperand(0).getOpcode() == ISD::SHL &&
3396       (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3397       CNShamt->getZExtValue() == MaskIdx) {
3398     APInt ShMask(ValBits, CNMask->getZExtValue());
3399     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3400       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3401       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3402                          N1.getOperand(0).getOperand(0),
3403                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3404                          DAG.getConstant(MaskIdx, DL, GRLenVT));
3405     }
3406   }
3407 
3408   // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3409   if (!SwapAndRetried) {
3410     std::swap(N0, N1);
3411     SwapAndRetried = true;
3412     goto Retry;
3413   }
3414 
3415   SwapAndRetried = false;
3416 Retry2:
3417   // 8th pattern.
3418   // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3419   // the incoming bits are known to be zero.
3420   // =>
3421   // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3422   //
3423   // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3424   // we put it here in order to match as many nodes as possible or generate less
3425   // instructions.
3426   if (N1.getOpcode() == ISD::AND &&
3427       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3428       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3429     APInt ShMask(ValBits, CNMask->getZExtValue());
3430     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3431       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3432       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3433                          DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3434                                      N1->getOperand(0),
3435                                      DAG.getConstant(MaskIdx, DL, GRLenVT)),
3436                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3437                          DAG.getConstant(MaskIdx, DL, GRLenVT));
3438     }
3439   }
3440   // Swap N0/N1 and retry.
3441   if (!SwapAndRetried) {
3442     std::swap(N0, N1);
3443     SwapAndRetried = true;
3444     goto Retry2;
3445   }
3446 
3447   return SDValue();
3448 }
3449 
3450 static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3451   ExtType = ISD::NON_EXTLOAD;
3452 
3453   switch (V.getNode()->getOpcode()) {
3454   case ISD::LOAD: {
3455     LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3456     if ((LoadNode->getMemoryVT() == MVT::i8) ||
3457         (LoadNode->getMemoryVT() == MVT::i16)) {
3458       ExtType = LoadNode->getExtensionType();
3459       return true;
3460     }
3461     return false;
3462   }
3463   case ISD::AssertSext: {
3464     VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3465     if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3466       ExtType = ISD::SEXTLOAD;
3467       return true;
3468     }
3469     return false;
3470   }
3471   case ISD::AssertZext: {
3472     VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3473     if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3474       ExtType = ISD::ZEXTLOAD;
3475       return true;
3476     }
3477     return false;
3478   }
3479   default:
3480     return false;
3481   }
3482 
3483   return false;
3484 }
3485 
3486 // Eliminate redundant truncation and zero-extension nodes.
3487 // * Case 1:
3488 //  +------------+ +------------+ +------------+
3489 //  |   Input1   | |   Input2   | |     CC     |
3490 //  +------------+ +------------+ +------------+
3491 //         |              |              |
3492 //         V              V              +----+
3493 //  +------------+ +------------+             |
3494 //  |  TRUNCATE  | |  TRUNCATE  |             |
3495 //  +------------+ +------------+             |
3496 //         |              |                   |
3497 //         V              V                   |
3498 //  +------------+ +------------+             |
3499 //  |  ZERO_EXT  | |  ZERO_EXT  |             |
3500 //  +------------+ +------------+             |
3501 //         |              |                   |
3502 //         |              +-------------+     |
3503 //         V              V             |     |
3504 //        +----------------+            |     |
3505 //        |      AND       |            |     |
3506 //        +----------------+            |     |
3507 //                |                     |     |
3508 //                +---------------+     |     |
3509 //                                |     |     |
3510 //                                V     V     V
3511 //                               +-------------+
3512 //                               |     CMP     |
3513 //                               +-------------+
3514 // * Case 2:
3515 //  +------------+ +------------+ +-------------+ +------------+ +------------+
3516 //  |   Input1   | |   Input2   | | Constant -1 | | Constant 0 | |     CC     |
3517 //  +------------+ +------------+ +-------------+ +------------+ +------------+
3518 //         |              |             |               |               |
3519 //         V              |             |               |               |
3520 //  +------------+        |             |               |               |
3521 //  |     XOR    |<---------------------+               |               |
3522 //  +------------+        |                             |               |
3523 //         |              |                             |               |
3524 //         V              V             +---------------+               |
3525 //  +------------+ +------------+       |                               |
3526 //  |  TRUNCATE  | |  TRUNCATE  |       |     +-------------------------+
3527 //  +------------+ +------------+       |     |
3528 //         |              |             |     |
3529 //         V              V             |     |
3530 //  +------------+ +------------+       |     |
3531 //  |  ZERO_EXT  | |  ZERO_EXT  |       |     |
3532 //  +------------+ +------------+       |     |
3533 //         |              |             |     |
3534 //         V              V             |     |
3535 //        +----------------+            |     |
3536 //        |      AND       |            |     |
3537 //        +----------------+            |     |
3538 //                |                     |     |
3539 //                +---------------+     |     |
3540 //                                |     |     |
3541 //                                V     V     V
3542 //                               +-------------+
3543 //                               |     CMP     |
3544 //                               +-------------+
3545 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
3546                                    TargetLowering::DAGCombinerInfo &DCI,
3547                                    const LoongArchSubtarget &Subtarget) {
3548   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3549 
3550   SDNode *AndNode = N->getOperand(0).getNode();
3551   if (AndNode->getOpcode() != ISD::AND)
3552     return SDValue();
3553 
3554   SDValue AndInputValue2 = AndNode->getOperand(1);
3555   if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3556     return SDValue();
3557 
3558   SDValue CmpInputValue = N->getOperand(1);
3559   SDValue AndInputValue1 = AndNode->getOperand(0);
3560   if (AndInputValue1.getOpcode() == ISD::XOR) {
3561     if (CC != ISD::SETEQ && CC != ISD::SETNE)
3562       return SDValue();
3563     ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3564     if (!CN || CN->getSExtValue() != -1)
3565       return SDValue();
3566     CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3567     if (!CN || CN->getSExtValue() != 0)
3568       return SDValue();
3569     AndInputValue1 = AndInputValue1.getOperand(0);
3570     if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3571       return SDValue();
3572   } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3573     if (AndInputValue2 != CmpInputValue)
3574       return SDValue();
3575   } else {
3576     return SDValue();
3577   }
3578 
3579   SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3580   if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3581     return SDValue();
3582 
3583   SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3584   if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3585     return SDValue();
3586 
3587   SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3588   SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3589   ISD::LoadExtType ExtType1;
3590   ISD::LoadExtType ExtType2;
3591 
3592   if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3593       !checkValueWidth(TruncInputValue2, ExtType2))
3594     return SDValue();
3595 
3596   if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3597       AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3598     return SDValue();
3599 
3600   if ((ExtType2 != ISD::ZEXTLOAD) &&
3601       ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3602     return SDValue();
3603 
3604   // These truncation and zero-extension nodes are not necessary, remove them.
3605   SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3606                                TruncInputValue1, TruncInputValue2);
3607   SDValue NewSetCC =
3608       DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3609   DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3610   return SDValue(N, 0);
3611 }
3612 
3613 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3614 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
3615                                       TargetLowering::DAGCombinerInfo &DCI,
3616                                       const LoongArchSubtarget &Subtarget) {
3617   if (DCI.isBeforeLegalizeOps())
3618     return SDValue();
3619 
3620   SDValue Src = N->getOperand(0);
3621   if (Src.getOpcode() != LoongArchISD::REVB_2W)
3622     return SDValue();
3623 
3624   return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3625                      Src.getOperand(0));
3626 }
3627 
3628 template <unsigned N>
3629 static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
3630                                        SelectionDAG &DAG,
3631                                        const LoongArchSubtarget &Subtarget,
3632                                        bool IsSigned = false) {
3633   SDLoc DL(Node);
3634   auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3635   // Check the ImmArg.
3636   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3637       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3638     DAG.getContext()->emitError(Node->getOperationName(0) +
3639                                 ": argument out of range.");
3640     return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3641   }
3642   return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3643 }
3644 
3645 template <unsigned N>
3646 static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3647                                    SelectionDAG &DAG, bool IsSigned = false) {
3648   SDLoc DL(Node);
3649   EVT ResTy = Node->getValueType(0);
3650   auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3651 
3652   // Check the ImmArg.
3653   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3654       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3655     DAG.getContext()->emitError(Node->getOperationName(0) +
3656                                 ": argument out of range.");
3657     return DAG.getNode(ISD::UNDEF, DL, ResTy);
3658   }
3659   return DAG.getConstant(
3660       APInt(ResTy.getScalarType().getSizeInBits(),
3661             IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3662       DL, ResTy);
3663 }
3664 
3665 static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
3666   SDLoc DL(Node);
3667   EVT ResTy = Node->getValueType(0);
3668   SDValue Vec = Node->getOperand(2);
3669   SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3670   return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3671 }
3672 
3673 static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
3674   SDLoc DL(Node);
3675   EVT ResTy = Node->getValueType(0);
3676   SDValue One = DAG.getConstant(1, DL, ResTy);
3677   SDValue Bit =
3678       DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3679 
3680   return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3681                      DAG.getNOT(DL, Bit, ResTy));
3682 }
3683 
3684 template <unsigned N>
3685 static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
3686   SDLoc DL(Node);
3687   EVT ResTy = Node->getValueType(0);
3688   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3689   // Check the unsigned ImmArg.
3690   if (!isUInt<N>(CImm->getZExtValue())) {
3691     DAG.getContext()->emitError(Node->getOperationName(0) +
3692                                 ": argument out of range.");
3693     return DAG.getNode(ISD::UNDEF, DL, ResTy);
3694   }
3695 
3696   APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3697   SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3698 
3699   return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3700 }
3701 
3702 template <unsigned N>
3703 static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
3704   SDLoc DL(Node);
3705   EVT ResTy = Node->getValueType(0);
3706   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3707   // Check the unsigned ImmArg.
3708   if (!isUInt<N>(CImm->getZExtValue())) {
3709     DAG.getContext()->emitError(Node->getOperationName(0) +
3710                                 ": argument out of range.");
3711     return DAG.getNode(ISD::UNDEF, DL, ResTy);
3712   }
3713 
3714   APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3715   SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3716   return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3717 }
3718 
3719 template <unsigned N>
3720 static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
3721   SDLoc DL(Node);
3722   EVT ResTy = Node->getValueType(0);
3723   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3724   // Check the unsigned ImmArg.
3725   if (!isUInt<N>(CImm->getZExtValue())) {
3726     DAG.getContext()->emitError(Node->getOperationName(0) +
3727                                 ": argument out of range.");
3728     return DAG.getNode(ISD::UNDEF, DL, ResTy);
3729   }
3730 
3731   APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3732   SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3733   return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3734 }
3735 
3736 static SDValue
3737 performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
3738                                  TargetLowering::DAGCombinerInfo &DCI,
3739                                  const LoongArchSubtarget &Subtarget) {
3740   SDLoc DL(N);
3741   switch (N->getConstantOperandVal(0)) {
3742   default:
3743     break;
3744   case Intrinsic::loongarch_lsx_vadd_b:
3745   case Intrinsic::loongarch_lsx_vadd_h:
3746   case Intrinsic::loongarch_lsx_vadd_w:
3747   case Intrinsic::loongarch_lsx_vadd_d:
3748   case Intrinsic::loongarch_lasx_xvadd_b:
3749   case Intrinsic::loongarch_lasx_xvadd_h:
3750   case Intrinsic::loongarch_lasx_xvadd_w:
3751   case Intrinsic::loongarch_lasx_xvadd_d:
3752     return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3753                        N->getOperand(2));
3754   case Intrinsic::loongarch_lsx_vaddi_bu:
3755   case Intrinsic::loongarch_lsx_vaddi_hu:
3756   case Intrinsic::loongarch_lsx_vaddi_wu:
3757   case Intrinsic::loongarch_lsx_vaddi_du:
3758   case Intrinsic::loongarch_lasx_xvaddi_bu:
3759   case Intrinsic::loongarch_lasx_xvaddi_hu:
3760   case Intrinsic::loongarch_lasx_xvaddi_wu:
3761   case Intrinsic::loongarch_lasx_xvaddi_du:
3762     return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3763                        lowerVectorSplatImm<5>(N, 2, DAG));
3764   case Intrinsic::loongarch_lsx_vsub_b:
3765   case Intrinsic::loongarch_lsx_vsub_h:
3766   case Intrinsic::loongarch_lsx_vsub_w:
3767   case Intrinsic::loongarch_lsx_vsub_d:
3768   case Intrinsic::loongarch_lasx_xvsub_b:
3769   case Intrinsic::loongarch_lasx_xvsub_h:
3770   case Intrinsic::loongarch_lasx_xvsub_w:
3771   case Intrinsic::loongarch_lasx_xvsub_d:
3772     return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3773                        N->getOperand(2));
3774   case Intrinsic::loongarch_lsx_vsubi_bu:
3775   case Intrinsic::loongarch_lsx_vsubi_hu:
3776   case Intrinsic::loongarch_lsx_vsubi_wu:
3777   case Intrinsic::loongarch_lsx_vsubi_du:
3778   case Intrinsic::loongarch_lasx_xvsubi_bu:
3779   case Intrinsic::loongarch_lasx_xvsubi_hu:
3780   case Intrinsic::loongarch_lasx_xvsubi_wu:
3781   case Intrinsic::loongarch_lasx_xvsubi_du:
3782     return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3783                        lowerVectorSplatImm<5>(N, 2, DAG));
3784   case Intrinsic::loongarch_lsx_vneg_b:
3785   case Intrinsic::loongarch_lsx_vneg_h:
3786   case Intrinsic::loongarch_lsx_vneg_w:
3787   case Intrinsic::loongarch_lsx_vneg_d:
3788   case Intrinsic::loongarch_lasx_xvneg_b:
3789   case Intrinsic::loongarch_lasx_xvneg_h:
3790   case Intrinsic::loongarch_lasx_xvneg_w:
3791   case Intrinsic::loongarch_lasx_xvneg_d:
3792     return DAG.getNode(
3793         ISD::SUB, DL, N->getValueType(0),
3794         DAG.getConstant(
3795             APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3796                   /*isSigned=*/true),
3797             SDLoc(N), N->getValueType(0)),
3798         N->getOperand(1));
3799   case Intrinsic::loongarch_lsx_vmax_b:
3800   case Intrinsic::loongarch_lsx_vmax_h:
3801   case Intrinsic::loongarch_lsx_vmax_w:
3802   case Intrinsic::loongarch_lsx_vmax_d:
3803   case Intrinsic::loongarch_lasx_xvmax_b:
3804   case Intrinsic::loongarch_lasx_xvmax_h:
3805   case Intrinsic::loongarch_lasx_xvmax_w:
3806   case Intrinsic::loongarch_lasx_xvmax_d:
3807     return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3808                        N->getOperand(2));
3809   case Intrinsic::loongarch_lsx_vmax_bu:
3810   case Intrinsic::loongarch_lsx_vmax_hu:
3811   case Intrinsic::loongarch_lsx_vmax_wu:
3812   case Intrinsic::loongarch_lsx_vmax_du:
3813   case Intrinsic::loongarch_lasx_xvmax_bu:
3814   case Intrinsic::loongarch_lasx_xvmax_hu:
3815   case Intrinsic::loongarch_lasx_xvmax_wu:
3816   case Intrinsic::loongarch_lasx_xvmax_du:
3817     return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3818                        N->getOperand(2));
3819   case Intrinsic::loongarch_lsx_vmaxi_b:
3820   case Intrinsic::loongarch_lsx_vmaxi_h:
3821   case Intrinsic::loongarch_lsx_vmaxi_w:
3822   case Intrinsic::loongarch_lsx_vmaxi_d:
3823   case Intrinsic::loongarch_lasx_xvmaxi_b:
3824   case Intrinsic::loongarch_lasx_xvmaxi_h:
3825   case Intrinsic::loongarch_lasx_xvmaxi_w:
3826   case Intrinsic::loongarch_lasx_xvmaxi_d:
3827     return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3828                        lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3829   case Intrinsic::loongarch_lsx_vmaxi_bu:
3830   case Intrinsic::loongarch_lsx_vmaxi_hu:
3831   case Intrinsic::loongarch_lsx_vmaxi_wu:
3832   case Intrinsic::loongarch_lsx_vmaxi_du:
3833   case Intrinsic::loongarch_lasx_xvmaxi_bu:
3834   case Intrinsic::loongarch_lasx_xvmaxi_hu:
3835   case Intrinsic::loongarch_lasx_xvmaxi_wu:
3836   case Intrinsic::loongarch_lasx_xvmaxi_du:
3837     return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3838                        lowerVectorSplatImm<5>(N, 2, DAG));
3839   case Intrinsic::loongarch_lsx_vmin_b:
3840   case Intrinsic::loongarch_lsx_vmin_h:
3841   case Intrinsic::loongarch_lsx_vmin_w:
3842   case Intrinsic::loongarch_lsx_vmin_d:
3843   case Intrinsic::loongarch_lasx_xvmin_b:
3844   case Intrinsic::loongarch_lasx_xvmin_h:
3845   case Intrinsic::loongarch_lasx_xvmin_w:
3846   case Intrinsic::loongarch_lasx_xvmin_d:
3847     return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3848                        N->getOperand(2));
3849   case Intrinsic::loongarch_lsx_vmin_bu:
3850   case Intrinsic::loongarch_lsx_vmin_hu:
3851   case Intrinsic::loongarch_lsx_vmin_wu:
3852   case Intrinsic::loongarch_lsx_vmin_du:
3853   case Intrinsic::loongarch_lasx_xvmin_bu:
3854   case Intrinsic::loongarch_lasx_xvmin_hu:
3855   case Intrinsic::loongarch_lasx_xvmin_wu:
3856   case Intrinsic::loongarch_lasx_xvmin_du:
3857     return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3858                        N->getOperand(2));
3859   case Intrinsic::loongarch_lsx_vmini_b:
3860   case Intrinsic::loongarch_lsx_vmini_h:
3861   case Intrinsic::loongarch_lsx_vmini_w:
3862   case Intrinsic::loongarch_lsx_vmini_d:
3863   case Intrinsic::loongarch_lasx_xvmini_b:
3864   case Intrinsic::loongarch_lasx_xvmini_h:
3865   case Intrinsic::loongarch_lasx_xvmini_w:
3866   case Intrinsic::loongarch_lasx_xvmini_d:
3867     return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3868                        lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3869   case Intrinsic::loongarch_lsx_vmini_bu:
3870   case Intrinsic::loongarch_lsx_vmini_hu:
3871   case Intrinsic::loongarch_lsx_vmini_wu:
3872   case Intrinsic::loongarch_lsx_vmini_du:
3873   case Intrinsic::loongarch_lasx_xvmini_bu:
3874   case Intrinsic::loongarch_lasx_xvmini_hu:
3875   case Intrinsic::loongarch_lasx_xvmini_wu:
3876   case Intrinsic::loongarch_lasx_xvmini_du:
3877     return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3878                        lowerVectorSplatImm<5>(N, 2, DAG));
3879   case Intrinsic::loongarch_lsx_vmul_b:
3880   case Intrinsic::loongarch_lsx_vmul_h:
3881   case Intrinsic::loongarch_lsx_vmul_w:
3882   case Intrinsic::loongarch_lsx_vmul_d:
3883   case Intrinsic::loongarch_lasx_xvmul_b:
3884   case Intrinsic::loongarch_lasx_xvmul_h:
3885   case Intrinsic::loongarch_lasx_xvmul_w:
3886   case Intrinsic::loongarch_lasx_xvmul_d:
3887     return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
3888                        N->getOperand(2));
3889   case Intrinsic::loongarch_lsx_vmadd_b:
3890   case Intrinsic::loongarch_lsx_vmadd_h:
3891   case Intrinsic::loongarch_lsx_vmadd_w:
3892   case Intrinsic::loongarch_lsx_vmadd_d:
3893   case Intrinsic::loongarch_lasx_xvmadd_b:
3894   case Intrinsic::loongarch_lasx_xvmadd_h:
3895   case Intrinsic::loongarch_lasx_xvmadd_w:
3896   case Intrinsic::loongarch_lasx_xvmadd_d: {
3897     EVT ResTy = N->getValueType(0);
3898     return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
3899                        DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3900                                    N->getOperand(3)));
3901   }
3902   case Intrinsic::loongarch_lsx_vmsub_b:
3903   case Intrinsic::loongarch_lsx_vmsub_h:
3904   case Intrinsic::loongarch_lsx_vmsub_w:
3905   case Intrinsic::loongarch_lsx_vmsub_d:
3906   case Intrinsic::loongarch_lasx_xvmsub_b:
3907   case Intrinsic::loongarch_lasx_xvmsub_h:
3908   case Intrinsic::loongarch_lasx_xvmsub_w:
3909   case Intrinsic::loongarch_lasx_xvmsub_d: {
3910     EVT ResTy = N->getValueType(0);
3911     return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
3912                        DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3913                                    N->getOperand(3)));
3914   }
3915   case Intrinsic::loongarch_lsx_vdiv_b:
3916   case Intrinsic::loongarch_lsx_vdiv_h:
3917   case Intrinsic::loongarch_lsx_vdiv_w:
3918   case Intrinsic::loongarch_lsx_vdiv_d:
3919   case Intrinsic::loongarch_lasx_xvdiv_b:
3920   case Intrinsic::loongarch_lasx_xvdiv_h:
3921   case Intrinsic::loongarch_lasx_xvdiv_w:
3922   case Intrinsic::loongarch_lasx_xvdiv_d:
3923     return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
3924                        N->getOperand(2));
3925   case Intrinsic::loongarch_lsx_vdiv_bu:
3926   case Intrinsic::loongarch_lsx_vdiv_hu:
3927   case Intrinsic::loongarch_lsx_vdiv_wu:
3928   case Intrinsic::loongarch_lsx_vdiv_du:
3929   case Intrinsic::loongarch_lasx_xvdiv_bu:
3930   case Intrinsic::loongarch_lasx_xvdiv_hu:
3931   case Intrinsic::loongarch_lasx_xvdiv_wu:
3932   case Intrinsic::loongarch_lasx_xvdiv_du:
3933     return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
3934                        N->getOperand(2));
3935   case Intrinsic::loongarch_lsx_vmod_b:
3936   case Intrinsic::loongarch_lsx_vmod_h:
3937   case Intrinsic::loongarch_lsx_vmod_w:
3938   case Intrinsic::loongarch_lsx_vmod_d:
3939   case Intrinsic::loongarch_lasx_xvmod_b:
3940   case Intrinsic::loongarch_lasx_xvmod_h:
3941   case Intrinsic::loongarch_lasx_xvmod_w:
3942   case Intrinsic::loongarch_lasx_xvmod_d:
3943     return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
3944                        N->getOperand(2));
3945   case Intrinsic::loongarch_lsx_vmod_bu:
3946   case Intrinsic::loongarch_lsx_vmod_hu:
3947   case Intrinsic::loongarch_lsx_vmod_wu:
3948   case Intrinsic::loongarch_lsx_vmod_du:
3949   case Intrinsic::loongarch_lasx_xvmod_bu:
3950   case Intrinsic::loongarch_lasx_xvmod_hu:
3951   case Intrinsic::loongarch_lasx_xvmod_wu:
3952   case Intrinsic::loongarch_lasx_xvmod_du:
3953     return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
3954                        N->getOperand(2));
3955   case Intrinsic::loongarch_lsx_vand_v:
3956   case Intrinsic::loongarch_lasx_xvand_v:
3957     return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
3958                        N->getOperand(2));
3959   case Intrinsic::loongarch_lsx_vor_v:
3960   case Intrinsic::loongarch_lasx_xvor_v:
3961     return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3962                        N->getOperand(2));
3963   case Intrinsic::loongarch_lsx_vxor_v:
3964   case Intrinsic::loongarch_lasx_xvxor_v:
3965     return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
3966                        N->getOperand(2));
3967   case Intrinsic::loongarch_lsx_vnor_v:
3968   case Intrinsic::loongarch_lasx_xvnor_v: {
3969     SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3970                               N->getOperand(2));
3971     return DAG.getNOT(DL, Res, Res->getValueType(0));
3972   }
3973   case Intrinsic::loongarch_lsx_vandi_b:
3974   case Intrinsic::loongarch_lasx_xvandi_b:
3975     return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
3976                        lowerVectorSplatImm<8>(N, 2, DAG));
3977   case Intrinsic::loongarch_lsx_vori_b:
3978   case Intrinsic::loongarch_lasx_xvori_b:
3979     return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3980                        lowerVectorSplatImm<8>(N, 2, DAG));
3981   case Intrinsic::loongarch_lsx_vxori_b:
3982   case Intrinsic::loongarch_lasx_xvxori_b:
3983     return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
3984                        lowerVectorSplatImm<8>(N, 2, DAG));
3985   case Intrinsic::loongarch_lsx_vsll_b:
3986   case Intrinsic::loongarch_lsx_vsll_h:
3987   case Intrinsic::loongarch_lsx_vsll_w:
3988   case Intrinsic::loongarch_lsx_vsll_d:
3989   case Intrinsic::loongarch_lasx_xvsll_b:
3990   case Intrinsic::loongarch_lasx_xvsll_h:
3991   case Intrinsic::loongarch_lasx_xvsll_w:
3992   case Intrinsic::loongarch_lasx_xvsll_d:
3993     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3994                        truncateVecElts(N, DAG));
3995   case Intrinsic::loongarch_lsx_vslli_b:
3996   case Intrinsic::loongarch_lasx_xvslli_b:
3997     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3998                        lowerVectorSplatImm<3>(N, 2, DAG));
3999   case Intrinsic::loongarch_lsx_vslli_h:
4000   case Intrinsic::loongarch_lasx_xvslli_h:
4001     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4002                        lowerVectorSplatImm<4>(N, 2, DAG));
4003   case Intrinsic::loongarch_lsx_vslli_w:
4004   case Intrinsic::loongarch_lasx_xvslli_w:
4005     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4006                        lowerVectorSplatImm<5>(N, 2, DAG));
4007   case Intrinsic::loongarch_lsx_vslli_d:
4008   case Intrinsic::loongarch_lasx_xvslli_d:
4009     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4010                        lowerVectorSplatImm<6>(N, 2, DAG));
4011   case Intrinsic::loongarch_lsx_vsrl_b:
4012   case Intrinsic::loongarch_lsx_vsrl_h:
4013   case Intrinsic::loongarch_lsx_vsrl_w:
4014   case Intrinsic::loongarch_lsx_vsrl_d:
4015   case Intrinsic::loongarch_lasx_xvsrl_b:
4016   case Intrinsic::loongarch_lasx_xvsrl_h:
4017   case Intrinsic::loongarch_lasx_xvsrl_w:
4018   case Intrinsic::loongarch_lasx_xvsrl_d:
4019     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4020                        truncateVecElts(N, DAG));
4021   case Intrinsic::loongarch_lsx_vsrli_b:
4022   case Intrinsic::loongarch_lasx_xvsrli_b:
4023     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4024                        lowerVectorSplatImm<3>(N, 2, DAG));
4025   case Intrinsic::loongarch_lsx_vsrli_h:
4026   case Intrinsic::loongarch_lasx_xvsrli_h:
4027     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4028                        lowerVectorSplatImm<4>(N, 2, DAG));
4029   case Intrinsic::loongarch_lsx_vsrli_w:
4030   case Intrinsic::loongarch_lasx_xvsrli_w:
4031     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4032                        lowerVectorSplatImm<5>(N, 2, DAG));
4033   case Intrinsic::loongarch_lsx_vsrli_d:
4034   case Intrinsic::loongarch_lasx_xvsrli_d:
4035     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4036                        lowerVectorSplatImm<6>(N, 2, DAG));
4037   case Intrinsic::loongarch_lsx_vsra_b:
4038   case Intrinsic::loongarch_lsx_vsra_h:
4039   case Intrinsic::loongarch_lsx_vsra_w:
4040   case Intrinsic::loongarch_lsx_vsra_d:
4041   case Intrinsic::loongarch_lasx_xvsra_b:
4042   case Intrinsic::loongarch_lasx_xvsra_h:
4043   case Intrinsic::loongarch_lasx_xvsra_w:
4044   case Intrinsic::loongarch_lasx_xvsra_d:
4045     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4046                        truncateVecElts(N, DAG));
4047   case Intrinsic::loongarch_lsx_vsrai_b:
4048   case Intrinsic::loongarch_lasx_xvsrai_b:
4049     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4050                        lowerVectorSplatImm<3>(N, 2, DAG));
4051   case Intrinsic::loongarch_lsx_vsrai_h:
4052   case Intrinsic::loongarch_lasx_xvsrai_h:
4053     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4054                        lowerVectorSplatImm<4>(N, 2, DAG));
4055   case Intrinsic::loongarch_lsx_vsrai_w:
4056   case Intrinsic::loongarch_lasx_xvsrai_w:
4057     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4058                        lowerVectorSplatImm<5>(N, 2, DAG));
4059   case Intrinsic::loongarch_lsx_vsrai_d:
4060   case Intrinsic::loongarch_lasx_xvsrai_d:
4061     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4062                        lowerVectorSplatImm<6>(N, 2, DAG));
4063   case Intrinsic::loongarch_lsx_vclz_b:
4064   case Intrinsic::loongarch_lsx_vclz_h:
4065   case Intrinsic::loongarch_lsx_vclz_w:
4066   case Intrinsic::loongarch_lsx_vclz_d:
4067   case Intrinsic::loongarch_lasx_xvclz_b:
4068   case Intrinsic::loongarch_lasx_xvclz_h:
4069   case Intrinsic::loongarch_lasx_xvclz_w:
4070   case Intrinsic::loongarch_lasx_xvclz_d:
4071     return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4072   case Intrinsic::loongarch_lsx_vpcnt_b:
4073   case Intrinsic::loongarch_lsx_vpcnt_h:
4074   case Intrinsic::loongarch_lsx_vpcnt_w:
4075   case Intrinsic::loongarch_lsx_vpcnt_d:
4076   case Intrinsic::loongarch_lasx_xvpcnt_b:
4077   case Intrinsic::loongarch_lasx_xvpcnt_h:
4078   case Intrinsic::loongarch_lasx_xvpcnt_w:
4079   case Intrinsic::loongarch_lasx_xvpcnt_d:
4080     return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4081   case Intrinsic::loongarch_lsx_vbitclr_b:
4082   case Intrinsic::loongarch_lsx_vbitclr_h:
4083   case Intrinsic::loongarch_lsx_vbitclr_w:
4084   case Intrinsic::loongarch_lsx_vbitclr_d:
4085   case Intrinsic::loongarch_lasx_xvbitclr_b:
4086   case Intrinsic::loongarch_lasx_xvbitclr_h:
4087   case Intrinsic::loongarch_lasx_xvbitclr_w:
4088   case Intrinsic::loongarch_lasx_xvbitclr_d:
4089     return lowerVectorBitClear(N, DAG);
4090   case Intrinsic::loongarch_lsx_vbitclri_b:
4091   case Intrinsic::loongarch_lasx_xvbitclri_b:
4092     return lowerVectorBitClearImm<3>(N, DAG);
4093   case Intrinsic::loongarch_lsx_vbitclri_h:
4094   case Intrinsic::loongarch_lasx_xvbitclri_h:
4095     return lowerVectorBitClearImm<4>(N, DAG);
4096   case Intrinsic::loongarch_lsx_vbitclri_w:
4097   case Intrinsic::loongarch_lasx_xvbitclri_w:
4098     return lowerVectorBitClearImm<5>(N, DAG);
4099   case Intrinsic::loongarch_lsx_vbitclri_d:
4100   case Intrinsic::loongarch_lasx_xvbitclri_d:
4101     return lowerVectorBitClearImm<6>(N, DAG);
4102   case Intrinsic::loongarch_lsx_vbitset_b:
4103   case Intrinsic::loongarch_lsx_vbitset_h:
4104   case Intrinsic::loongarch_lsx_vbitset_w:
4105   case Intrinsic::loongarch_lsx_vbitset_d:
4106   case Intrinsic::loongarch_lasx_xvbitset_b:
4107   case Intrinsic::loongarch_lasx_xvbitset_h:
4108   case Intrinsic::loongarch_lasx_xvbitset_w:
4109   case Intrinsic::loongarch_lasx_xvbitset_d: {
4110     EVT VecTy = N->getValueType(0);
4111     SDValue One = DAG.getConstant(1, DL, VecTy);
4112     return DAG.getNode(
4113         ISD::OR, DL, VecTy, N->getOperand(1),
4114         DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4115   }
4116   case Intrinsic::loongarch_lsx_vbitseti_b:
4117   case Intrinsic::loongarch_lasx_xvbitseti_b:
4118     return lowerVectorBitSetImm<3>(N, DAG);
4119   case Intrinsic::loongarch_lsx_vbitseti_h:
4120   case Intrinsic::loongarch_lasx_xvbitseti_h:
4121     return lowerVectorBitSetImm<4>(N, DAG);
4122   case Intrinsic::loongarch_lsx_vbitseti_w:
4123   case Intrinsic::loongarch_lasx_xvbitseti_w:
4124     return lowerVectorBitSetImm<5>(N, DAG);
4125   case Intrinsic::loongarch_lsx_vbitseti_d:
4126   case Intrinsic::loongarch_lasx_xvbitseti_d:
4127     return lowerVectorBitSetImm<6>(N, DAG);
4128   case Intrinsic::loongarch_lsx_vbitrev_b:
4129   case Intrinsic::loongarch_lsx_vbitrev_h:
4130   case Intrinsic::loongarch_lsx_vbitrev_w:
4131   case Intrinsic::loongarch_lsx_vbitrev_d:
4132   case Intrinsic::loongarch_lasx_xvbitrev_b:
4133   case Intrinsic::loongarch_lasx_xvbitrev_h:
4134   case Intrinsic::loongarch_lasx_xvbitrev_w:
4135   case Intrinsic::loongarch_lasx_xvbitrev_d: {
4136     EVT VecTy = N->getValueType(0);
4137     SDValue One = DAG.getConstant(1, DL, VecTy);
4138     return DAG.getNode(
4139         ISD::XOR, DL, VecTy, N->getOperand(1),
4140         DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4141   }
4142   case Intrinsic::loongarch_lsx_vbitrevi_b:
4143   case Intrinsic::loongarch_lasx_xvbitrevi_b:
4144     return lowerVectorBitRevImm<3>(N, DAG);
4145   case Intrinsic::loongarch_lsx_vbitrevi_h:
4146   case Intrinsic::loongarch_lasx_xvbitrevi_h:
4147     return lowerVectorBitRevImm<4>(N, DAG);
4148   case Intrinsic::loongarch_lsx_vbitrevi_w:
4149   case Intrinsic::loongarch_lasx_xvbitrevi_w:
4150     return lowerVectorBitRevImm<5>(N, DAG);
4151   case Intrinsic::loongarch_lsx_vbitrevi_d:
4152   case Intrinsic::loongarch_lasx_xvbitrevi_d:
4153     return lowerVectorBitRevImm<6>(N, DAG);
4154   case Intrinsic::loongarch_lsx_vfadd_s:
4155   case Intrinsic::loongarch_lsx_vfadd_d:
4156   case Intrinsic::loongarch_lasx_xvfadd_s:
4157   case Intrinsic::loongarch_lasx_xvfadd_d:
4158     return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4159                        N->getOperand(2));
4160   case Intrinsic::loongarch_lsx_vfsub_s:
4161   case Intrinsic::loongarch_lsx_vfsub_d:
4162   case Intrinsic::loongarch_lasx_xvfsub_s:
4163   case Intrinsic::loongarch_lasx_xvfsub_d:
4164     return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4165                        N->getOperand(2));
4166   case Intrinsic::loongarch_lsx_vfmul_s:
4167   case Intrinsic::loongarch_lsx_vfmul_d:
4168   case Intrinsic::loongarch_lasx_xvfmul_s:
4169   case Intrinsic::loongarch_lasx_xvfmul_d:
4170     return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4171                        N->getOperand(2));
4172   case Intrinsic::loongarch_lsx_vfdiv_s:
4173   case Intrinsic::loongarch_lsx_vfdiv_d:
4174   case Intrinsic::loongarch_lasx_xvfdiv_s:
4175   case Intrinsic::loongarch_lasx_xvfdiv_d:
4176     return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
4177                        N->getOperand(2));
4178   case Intrinsic::loongarch_lsx_vfmadd_s:
4179   case Intrinsic::loongarch_lsx_vfmadd_d:
4180   case Intrinsic::loongarch_lasx_xvfmadd_s:
4181   case Intrinsic::loongarch_lasx_xvfmadd_d:
4182     return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
4183                        N->getOperand(2), N->getOperand(3));
4184   case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4185     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4186                        N->getOperand(1), N->getOperand(2),
4187                        legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
4188   case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4189   case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4190     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4191                        N->getOperand(1), N->getOperand(2),
4192                        legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
4193   case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4194   case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4195     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4196                        N->getOperand(1), N->getOperand(2),
4197                        legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
4198   case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4199     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4200                        N->getOperand(1), N->getOperand(2),
4201                        legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
4202   case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4203   case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4204   case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4205   case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4206   case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4207   case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4208   case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4209   case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
4210     EVT ResTy = N->getValueType(0);
4211     SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
4212     return DAG.getBuildVector(ResTy, DL, Ops);
4213   }
4214   case Intrinsic::loongarch_lsx_vreplve_b:
4215   case Intrinsic::loongarch_lsx_vreplve_h:
4216   case Intrinsic::loongarch_lsx_vreplve_w:
4217   case Intrinsic::loongarch_lsx_vreplve_d:
4218   case Intrinsic::loongarch_lasx_xvreplve_b:
4219   case Intrinsic::loongarch_lasx_xvreplve_h:
4220   case Intrinsic::loongarch_lasx_xvreplve_w:
4221   case Intrinsic::loongarch_lasx_xvreplve_d:
4222     return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
4223                        N->getOperand(1),
4224                        DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4225                                    N->getOperand(2)));
4226   }
4227   return SDValue();
4228 }
4229 
4230 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
4231                                                    DAGCombinerInfo &DCI) const {
4232   SelectionDAG &DAG = DCI.DAG;
4233   switch (N->getOpcode()) {
4234   default:
4235     break;
4236   case ISD::AND:
4237     return performANDCombine(N, DAG, DCI, Subtarget);
4238   case ISD::OR:
4239     return performORCombine(N, DAG, DCI, Subtarget);
4240   case ISD::SETCC:
4241     return performSETCCCombine(N, DAG, DCI, Subtarget);
4242   case ISD::SRL:
4243     return performSRLCombine(N, DAG, DCI, Subtarget);
4244   case LoongArchISD::BITREV_W:
4245     return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4246   case ISD::INTRINSIC_WO_CHAIN:
4247     return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4248   }
4249   return SDValue();
4250 }
4251 
4252 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
4253                                               MachineBasicBlock *MBB) {
4254   if (!ZeroDivCheck)
4255     return MBB;
4256 
4257   // Build instructions:
4258   // MBB:
4259   //   div(or mod)   $dst, $dividend, $divisor
4260   //   bnez          $divisor, SinkMBB
4261   // BreakMBB:
4262   //   break         7 // BRK_DIVZERO
4263   // SinkMBB:
4264   //   fallthrough
4265   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4266   MachineFunction::iterator It = ++MBB->getIterator();
4267   MachineFunction *MF = MBB->getParent();
4268   auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4269   auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4270   MF->insert(It, BreakMBB);
4271   MF->insert(It, SinkMBB);
4272 
4273   // Transfer the remainder of MBB and its successor edges to SinkMBB.
4274   SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
4275   SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
4276 
4277   const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4278   DebugLoc DL = MI.getDebugLoc();
4279   MachineOperand &Divisor = MI.getOperand(2);
4280   Register DivisorReg = Divisor.getReg();
4281 
4282   // MBB:
4283   BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
4284       .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
4285       .addMBB(SinkMBB);
4286   MBB->addSuccessor(BreakMBB);
4287   MBB->addSuccessor(SinkMBB);
4288 
4289   // BreakMBB:
4290   // See linux header file arch/loongarch/include/uapi/asm/break.h for the
4291   // definition of BRK_DIVZERO.
4292   BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
4293   BreakMBB->addSuccessor(SinkMBB);
4294 
4295   // Clear Divisor's kill flag.
4296   Divisor.setIsKill(false);
4297 
4298   return SinkMBB;
4299 }
4300 
4301 static MachineBasicBlock *
4302 emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
4303                         const LoongArchSubtarget &Subtarget) {
4304   unsigned CondOpc;
4305   switch (MI.getOpcode()) {
4306   default:
4307     llvm_unreachable("Unexpected opcode");
4308   case LoongArch::PseudoVBZ:
4309     CondOpc = LoongArch::VSETEQZ_V;
4310     break;
4311   case LoongArch::PseudoVBZ_B:
4312     CondOpc = LoongArch::VSETANYEQZ_B;
4313     break;
4314   case LoongArch::PseudoVBZ_H:
4315     CondOpc = LoongArch::VSETANYEQZ_H;
4316     break;
4317   case LoongArch::PseudoVBZ_W:
4318     CondOpc = LoongArch::VSETANYEQZ_W;
4319     break;
4320   case LoongArch::PseudoVBZ_D:
4321     CondOpc = LoongArch::VSETANYEQZ_D;
4322     break;
4323   case LoongArch::PseudoVBNZ:
4324     CondOpc = LoongArch::VSETNEZ_V;
4325     break;
4326   case LoongArch::PseudoVBNZ_B:
4327     CondOpc = LoongArch::VSETALLNEZ_B;
4328     break;
4329   case LoongArch::PseudoVBNZ_H:
4330     CondOpc = LoongArch::VSETALLNEZ_H;
4331     break;
4332   case LoongArch::PseudoVBNZ_W:
4333     CondOpc = LoongArch::VSETALLNEZ_W;
4334     break;
4335   case LoongArch::PseudoVBNZ_D:
4336     CondOpc = LoongArch::VSETALLNEZ_D;
4337     break;
4338   case LoongArch::PseudoXVBZ:
4339     CondOpc = LoongArch::XVSETEQZ_V;
4340     break;
4341   case LoongArch::PseudoXVBZ_B:
4342     CondOpc = LoongArch::XVSETANYEQZ_B;
4343     break;
4344   case LoongArch::PseudoXVBZ_H:
4345     CondOpc = LoongArch::XVSETANYEQZ_H;
4346     break;
4347   case LoongArch::PseudoXVBZ_W:
4348     CondOpc = LoongArch::XVSETANYEQZ_W;
4349     break;
4350   case LoongArch::PseudoXVBZ_D:
4351     CondOpc = LoongArch::XVSETANYEQZ_D;
4352     break;
4353   case LoongArch::PseudoXVBNZ:
4354     CondOpc = LoongArch::XVSETNEZ_V;
4355     break;
4356   case LoongArch::PseudoXVBNZ_B:
4357     CondOpc = LoongArch::XVSETALLNEZ_B;
4358     break;
4359   case LoongArch::PseudoXVBNZ_H:
4360     CondOpc = LoongArch::XVSETALLNEZ_H;
4361     break;
4362   case LoongArch::PseudoXVBNZ_W:
4363     CondOpc = LoongArch::XVSETALLNEZ_W;
4364     break;
4365   case LoongArch::PseudoXVBNZ_D:
4366     CondOpc = LoongArch::XVSETALLNEZ_D;
4367     break;
4368   }
4369 
4370   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4371   const BasicBlock *LLVM_BB = BB->getBasicBlock();
4372   DebugLoc DL = MI.getDebugLoc();
4373   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4374   MachineFunction::iterator It = ++BB->getIterator();
4375 
4376   MachineFunction *F = BB->getParent();
4377   MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
4378   MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
4379   MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
4380 
4381   F->insert(It, FalseBB);
4382   F->insert(It, TrueBB);
4383   F->insert(It, SinkBB);
4384 
4385   // Transfer the remainder of MBB and its successor edges to Sink.
4386   SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
4387   SinkBB->transferSuccessorsAndUpdatePHIs(BB);
4388 
4389   // Insert the real instruction to BB.
4390   Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
4391   BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
4392 
4393   // Insert branch.
4394   BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
4395   BB->addSuccessor(FalseBB);
4396   BB->addSuccessor(TrueBB);
4397 
4398   // FalseBB.
4399   Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4400   BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
4401       .addReg(LoongArch::R0)
4402       .addImm(0);
4403   BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
4404   FalseBB->addSuccessor(SinkBB);
4405 
4406   // TrueBB.
4407   Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4408   BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
4409       .addReg(LoongArch::R0)
4410       .addImm(1);
4411   TrueBB->addSuccessor(SinkBB);
4412 
4413   // SinkBB: merge the results.
4414   BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
4415           MI.getOperand(0).getReg())
4416       .addReg(RD1)
4417       .addMBB(FalseBB)
4418       .addReg(RD2)
4419       .addMBB(TrueBB);
4420 
4421   // The pseudo instruction is gone now.
4422   MI.eraseFromParent();
4423   return SinkBB;
4424 }
4425 
4426 static MachineBasicBlock *
4427 emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
4428                      const LoongArchSubtarget &Subtarget) {
4429   unsigned InsOp;
4430   unsigned HalfSize;
4431   switch (MI.getOpcode()) {
4432   default:
4433     llvm_unreachable("Unexpected opcode");
4434   case LoongArch::PseudoXVINSGR2VR_B:
4435     HalfSize = 16;
4436     InsOp = LoongArch::VINSGR2VR_B;
4437     break;
4438   case LoongArch::PseudoXVINSGR2VR_H:
4439     HalfSize = 8;
4440     InsOp = LoongArch::VINSGR2VR_H;
4441     break;
4442   }
4443   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4444   const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4445   const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4446   DebugLoc DL = MI.getDebugLoc();
4447   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4448   // XDst = vector_insert XSrc, Elt, Idx
4449   Register XDst = MI.getOperand(0).getReg();
4450   Register XSrc = MI.getOperand(1).getReg();
4451   Register Elt = MI.getOperand(2).getReg();
4452   unsigned Idx = MI.getOperand(3).getImm();
4453 
4454   Register ScratchReg1 = XSrc;
4455   if (Idx >= HalfSize) {
4456     ScratchReg1 = MRI.createVirtualRegister(RC);
4457     BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
4458         .addReg(XSrc)
4459         .addReg(XSrc)
4460         .addImm(1);
4461   }
4462 
4463   Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
4464   Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
4465   BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
4466       .addReg(ScratchReg1, 0, LoongArch::sub_128);
4467   BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
4468       .addReg(ScratchSubReg1)
4469       .addReg(Elt)
4470       .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
4471 
4472   Register ScratchReg2 = XDst;
4473   if (Idx >= HalfSize)
4474     ScratchReg2 = MRI.createVirtualRegister(RC);
4475 
4476   BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
4477       .addImm(0)
4478       .addReg(ScratchSubReg2)
4479       .addImm(LoongArch::sub_128);
4480 
4481   if (Idx >= HalfSize)
4482     BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
4483         .addReg(XSrc)
4484         .addReg(ScratchReg2)
4485         .addImm(2);
4486 
4487   MI.eraseFromParent();
4488   return BB;
4489 }
4490 
4491 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4492     MachineInstr &MI, MachineBasicBlock *BB) const {
4493   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4494   DebugLoc DL = MI.getDebugLoc();
4495 
4496   switch (MI.getOpcode()) {
4497   default:
4498     llvm_unreachable("Unexpected instr type to insert");
4499   case LoongArch::DIV_W:
4500   case LoongArch::DIV_WU:
4501   case LoongArch::MOD_W:
4502   case LoongArch::MOD_WU:
4503   case LoongArch::DIV_D:
4504   case LoongArch::DIV_DU:
4505   case LoongArch::MOD_D:
4506   case LoongArch::MOD_DU:
4507     return insertDivByZeroTrap(MI, BB);
4508     break;
4509   case LoongArch::WRFCSR: {
4510     BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
4511             LoongArch::FCSR0 + MI.getOperand(0).getImm())
4512         .addReg(MI.getOperand(1).getReg());
4513     MI.eraseFromParent();
4514     return BB;
4515   }
4516   case LoongArch::RDFCSR: {
4517     MachineInstr *ReadFCSR =
4518         BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
4519                 MI.getOperand(0).getReg())
4520             .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
4521     ReadFCSR->getOperand(1).setIsUndef();
4522     MI.eraseFromParent();
4523     return BB;
4524   }
4525   case LoongArch::PseudoVBZ:
4526   case LoongArch::PseudoVBZ_B:
4527   case LoongArch::PseudoVBZ_H:
4528   case LoongArch::PseudoVBZ_W:
4529   case LoongArch::PseudoVBZ_D:
4530   case LoongArch::PseudoVBNZ:
4531   case LoongArch::PseudoVBNZ_B:
4532   case LoongArch::PseudoVBNZ_H:
4533   case LoongArch::PseudoVBNZ_W:
4534   case LoongArch::PseudoVBNZ_D:
4535   case LoongArch::PseudoXVBZ:
4536   case LoongArch::PseudoXVBZ_B:
4537   case LoongArch::PseudoXVBZ_H:
4538   case LoongArch::PseudoXVBZ_W:
4539   case LoongArch::PseudoXVBZ_D:
4540   case LoongArch::PseudoXVBNZ:
4541   case LoongArch::PseudoXVBNZ_B:
4542   case LoongArch::PseudoXVBNZ_H:
4543   case LoongArch::PseudoXVBNZ_W:
4544   case LoongArch::PseudoXVBNZ_D:
4545     return emitVecCondBranchPseudo(MI, BB, Subtarget);
4546   case LoongArch::PseudoXVINSGR2VR_B:
4547   case LoongArch::PseudoXVINSGR2VR_H:
4548     return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4549   }
4550 }
4551 
4552 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
4553     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4554     unsigned *Fast) const {
4555   if (!Subtarget.hasUAL())
4556     return false;
4557 
4558   // TODO: set reasonable speed number.
4559   if (Fast)
4560     *Fast = 1;
4561   return true;
4562 }
4563 
4564 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
4565   switch ((LoongArchISD::NodeType)Opcode) {
4566   case LoongArchISD::FIRST_NUMBER:
4567     break;
4568 
4569 #define NODE_NAME_CASE(node)                                                   \
4570   case LoongArchISD::node:                                                     \
4571     return "LoongArchISD::" #node;
4572 
4573     // TODO: Add more target-dependent nodes later.
4574     NODE_NAME_CASE(CALL)
4575     NODE_NAME_CASE(CALL_MEDIUM)
4576     NODE_NAME_CASE(CALL_LARGE)
4577     NODE_NAME_CASE(RET)
4578     NODE_NAME_CASE(TAIL)
4579     NODE_NAME_CASE(TAIL_MEDIUM)
4580     NODE_NAME_CASE(TAIL_LARGE)
4581     NODE_NAME_CASE(SLL_W)
4582     NODE_NAME_CASE(SRA_W)
4583     NODE_NAME_CASE(SRL_W)
4584     NODE_NAME_CASE(BSTRINS)
4585     NODE_NAME_CASE(BSTRPICK)
4586     NODE_NAME_CASE(MOVGR2FR_W_LA64)
4587     NODE_NAME_CASE(MOVFR2GR_S_LA64)
4588     NODE_NAME_CASE(FTINT)
4589     NODE_NAME_CASE(REVB_2H)
4590     NODE_NAME_CASE(REVB_2W)
4591     NODE_NAME_CASE(BITREV_4B)
4592     NODE_NAME_CASE(BITREV_W)
4593     NODE_NAME_CASE(ROTR_W)
4594     NODE_NAME_CASE(ROTL_W)
4595     NODE_NAME_CASE(DIV_WU)
4596     NODE_NAME_CASE(MOD_WU)
4597     NODE_NAME_CASE(CLZ_W)
4598     NODE_NAME_CASE(CTZ_W)
4599     NODE_NAME_CASE(DBAR)
4600     NODE_NAME_CASE(IBAR)
4601     NODE_NAME_CASE(BREAK)
4602     NODE_NAME_CASE(SYSCALL)
4603     NODE_NAME_CASE(CRC_W_B_W)
4604     NODE_NAME_CASE(CRC_W_H_W)
4605     NODE_NAME_CASE(CRC_W_W_W)
4606     NODE_NAME_CASE(CRC_W_D_W)
4607     NODE_NAME_CASE(CRCC_W_B_W)
4608     NODE_NAME_CASE(CRCC_W_H_W)
4609     NODE_NAME_CASE(CRCC_W_W_W)
4610     NODE_NAME_CASE(CRCC_W_D_W)
4611     NODE_NAME_CASE(CSRRD)
4612     NODE_NAME_CASE(CSRWR)
4613     NODE_NAME_CASE(CSRXCHG)
4614     NODE_NAME_CASE(IOCSRRD_B)
4615     NODE_NAME_CASE(IOCSRRD_H)
4616     NODE_NAME_CASE(IOCSRRD_W)
4617     NODE_NAME_CASE(IOCSRRD_D)
4618     NODE_NAME_CASE(IOCSRWR_B)
4619     NODE_NAME_CASE(IOCSRWR_H)
4620     NODE_NAME_CASE(IOCSRWR_W)
4621     NODE_NAME_CASE(IOCSRWR_D)
4622     NODE_NAME_CASE(CPUCFG)
4623     NODE_NAME_CASE(MOVGR2FCSR)
4624     NODE_NAME_CASE(MOVFCSR2GR)
4625     NODE_NAME_CASE(CACOP_D)
4626     NODE_NAME_CASE(CACOP_W)
4627     NODE_NAME_CASE(VSHUF)
4628     NODE_NAME_CASE(VPICKEV)
4629     NODE_NAME_CASE(VPICKOD)
4630     NODE_NAME_CASE(VPACKEV)
4631     NODE_NAME_CASE(VPACKOD)
4632     NODE_NAME_CASE(VILVL)
4633     NODE_NAME_CASE(VILVH)
4634     NODE_NAME_CASE(VSHUF4I)
4635     NODE_NAME_CASE(VREPLVEI)
4636     NODE_NAME_CASE(XVPERMI)
4637     NODE_NAME_CASE(VPICK_SEXT_ELT)
4638     NODE_NAME_CASE(VPICK_ZEXT_ELT)
4639     NODE_NAME_CASE(VREPLVE)
4640     NODE_NAME_CASE(VALL_ZERO)
4641     NODE_NAME_CASE(VANY_ZERO)
4642     NODE_NAME_CASE(VALL_NONZERO)
4643     NODE_NAME_CASE(VANY_NONZERO)
4644   }
4645 #undef NODE_NAME_CASE
4646   return nullptr;
4647 }
4648 
4649 //===----------------------------------------------------------------------===//
4650 //                     Calling Convention Implementation
4651 //===----------------------------------------------------------------------===//
4652 
4653 // Eight general-purpose registers a0-a7 used for passing integer arguments,
4654 // with a0-a1 reused to return values. Generally, the GPRs are used to pass
4655 // fixed-point arguments, and floating-point arguments when no FPR is available
4656 // or with soft float ABI.
4657 const MCPhysReg ArgGPRs[] = {LoongArch::R4,  LoongArch::R5, LoongArch::R6,
4658                              LoongArch::R7,  LoongArch::R8, LoongArch::R9,
4659                              LoongArch::R10, LoongArch::R11};
4660 // Eight floating-point registers fa0-fa7 used for passing floating-point
4661 // arguments, and fa0-fa1 are also used to return values.
4662 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4663                                LoongArch::F3, LoongArch::F4, LoongArch::F5,
4664                                LoongArch::F6, LoongArch::F7};
4665 // FPR32 and FPR64 alias each other.
4666 const MCPhysReg ArgFPR64s[] = {
4667     LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4668     LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4669 
4670 const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4671                             LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4672                             LoongArch::VR6, LoongArch::VR7};
4673 
4674 const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4675                             LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4676                             LoongArch::XR6, LoongArch::XR7};
4677 
4678 // Pass a 2*GRLen argument that has been split into two GRLen values through
4679 // registers or the stack as necessary.
4680 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4681                                      CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4682                                      unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4683                                      ISD::ArgFlagsTy ArgFlags2) {
4684   unsigned GRLenInBytes = GRLen / 8;
4685   if (Register Reg = State.AllocateReg(ArgGPRs)) {
4686     // At least one half can be passed via register.
4687     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4688                                      VA1.getLocVT(), CCValAssign::Full));
4689   } else {
4690     // Both halves must be passed on the stack, with proper alignment.
4691     Align StackAlign =
4692         std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4693     State.addLoc(
4694         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
4695                             State.AllocateStack(GRLenInBytes, StackAlign),
4696                             VA1.getLocVT(), CCValAssign::Full));
4697     State.addLoc(CCValAssign::getMem(
4698         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4699         LocVT2, CCValAssign::Full));
4700     return false;
4701   }
4702   if (Register Reg = State.AllocateReg(ArgGPRs)) {
4703     // The second half can also be passed via register.
4704     State.addLoc(
4705         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4706   } else {
4707     // The second half is passed via the stack, without additional alignment.
4708     State.addLoc(CCValAssign::getMem(
4709         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4710         LocVT2, CCValAssign::Full));
4711   }
4712   return false;
4713 }
4714 
4715 // Implements the LoongArch calling convention. Returns true upon failure.
4716 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
4717                          unsigned ValNo, MVT ValVT,
4718                          CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4719                          CCState &State, bool IsFixed, bool IsRet,
4720                          Type *OrigTy) {
4721   unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4722   assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
4723   MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
4724   MVT LocVT = ValVT;
4725 
4726   // Any return value split into more than two values can't be returned
4727   // directly.
4728   if (IsRet && ValNo > 1)
4729     return true;
4730 
4731   // If passing a variadic argument, or if no FPR is available.
4732   bool UseGPRForFloat = true;
4733 
4734   switch (ABI) {
4735   default:
4736     llvm_unreachable("Unexpected ABI");
4737     break;
4738   case LoongArchABI::ABI_ILP32F:
4739   case LoongArchABI::ABI_LP64F:
4740   case LoongArchABI::ABI_ILP32D:
4741   case LoongArchABI::ABI_LP64D:
4742     UseGPRForFloat = !IsFixed;
4743     break;
4744   case LoongArchABI::ABI_ILP32S:
4745   case LoongArchABI::ABI_LP64S:
4746     break;
4747   }
4748 
4749   // FPR32 and FPR64 alias each other.
4750   if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
4751     UseGPRForFloat = true;
4752 
4753   if (UseGPRForFloat && ValVT == MVT::f32) {
4754     LocVT = GRLenVT;
4755     LocInfo = CCValAssign::BCvt;
4756   } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
4757     LocVT = MVT::i64;
4758     LocInfo = CCValAssign::BCvt;
4759   } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
4760     // TODO: Handle passing f64 on LA32 with D feature.
4761     report_fatal_error("Passing f64 with GPR on LA32 is undefined");
4762   }
4763 
4764   // If this is a variadic argument, the LoongArch calling convention requires
4765   // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4766   // byte alignment. An aligned register should be used regardless of whether
4767   // the original argument was split during legalisation or not. The argument
4768   // will not be passed by registers if the original type is larger than
4769   // 2*GRLen, so the register alignment rule does not apply.
4770   unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
4771   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4772       DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
4773     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
4774     // Skip 'odd' register if necessary.
4775     if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
4776       State.AllocateReg(ArgGPRs);
4777   }
4778 
4779   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4780   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4781       State.getPendingArgFlags();
4782 
4783   assert(PendingLocs.size() == PendingArgFlags.size() &&
4784          "PendingLocs and PendingArgFlags out of sync");
4785 
4786   // Split arguments might be passed indirectly, so keep track of the pending
4787   // values.
4788   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
4789     LocVT = GRLenVT;
4790     LocInfo = CCValAssign::Indirect;
4791     PendingLocs.push_back(
4792         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
4793     PendingArgFlags.push_back(ArgFlags);
4794     if (!ArgFlags.isSplitEnd()) {
4795       return false;
4796     }
4797   }
4798 
4799   // If the split argument only had two elements, it should be passed directly
4800   // in registers or on the stack.
4801   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
4802       PendingLocs.size() <= 2) {
4803     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
4804     // Apply the normal calling convention rules to the first half of the
4805     // split argument.
4806     CCValAssign VA = PendingLocs[0];
4807     ISD::ArgFlagsTy AF = PendingArgFlags[0];
4808     PendingLocs.clear();
4809     PendingArgFlags.clear();
4810     return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
4811                                     ArgFlags);
4812   }
4813 
4814   // Allocate to a register if possible, or else a stack slot.
4815   Register Reg;
4816   unsigned StoreSizeBytes = GRLen / 8;
4817   Align StackAlign = Align(GRLen / 8);
4818 
4819   if (ValVT == MVT::f32 && !UseGPRForFloat)
4820     Reg = State.AllocateReg(ArgFPR32s);
4821   else if (ValVT == MVT::f64 && !UseGPRForFloat)
4822     Reg = State.AllocateReg(ArgFPR64s);
4823   else if (ValVT.is128BitVector())
4824     Reg = State.AllocateReg(ArgVRs);
4825   else if (ValVT.is256BitVector())
4826     Reg = State.AllocateReg(ArgXRs);
4827   else
4828     Reg = State.AllocateReg(ArgGPRs);
4829 
4830   unsigned StackOffset =
4831       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
4832 
4833   // If we reach this point and PendingLocs is non-empty, we must be at the
4834   // end of a split argument that must be passed indirectly.
4835   if (!PendingLocs.empty()) {
4836     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4837     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
4838     for (auto &It : PendingLocs) {
4839       if (Reg)
4840         It.convertToReg(Reg);
4841       else
4842         It.convertToMem(StackOffset);
4843       State.addLoc(It);
4844     }
4845     PendingLocs.clear();
4846     PendingArgFlags.clear();
4847     return false;
4848   }
4849   assert((!UseGPRForFloat || LocVT == GRLenVT) &&
4850          "Expected an GRLenVT at this stage");
4851 
4852   if (Reg) {
4853     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4854     return false;
4855   }
4856 
4857   // When a floating-point value is passed on the stack, no bit-cast is needed.
4858   if (ValVT.isFloatingPoint()) {
4859     LocVT = ValVT;
4860     LocInfo = CCValAssign::Full;
4861   }
4862 
4863   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
4864   return false;
4865 }
4866 
4867 void LoongArchTargetLowering::analyzeInputArgs(
4868     MachineFunction &MF, CCState &CCInfo,
4869     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
4870     LoongArchCCAssignFn Fn) const {
4871   FunctionType *FType = MF.getFunction().getFunctionType();
4872   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4873     MVT ArgVT = Ins[i].VT;
4874     Type *ArgTy = nullptr;
4875     if (IsRet)
4876       ArgTy = FType->getReturnType();
4877     else if (Ins[i].isOrigArg())
4878       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
4879     LoongArchABI::ABI ABI =
4880         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4881     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
4882            CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
4883       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
4884                         << '\n');
4885       llvm_unreachable("");
4886     }
4887   }
4888 }
4889 
4890 void LoongArchTargetLowering::analyzeOutputArgs(
4891     MachineFunction &MF, CCState &CCInfo,
4892     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
4893     CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
4894   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4895     MVT ArgVT = Outs[i].VT;
4896     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
4897     LoongArchABI::ABI ABI =
4898         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4899     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
4900            CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
4901       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
4902                         << "\n");
4903       llvm_unreachable("");
4904     }
4905   }
4906 }
4907 
4908 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
4909 // values.
4910 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
4911                                    const CCValAssign &VA, const SDLoc &DL) {
4912   switch (VA.getLocInfo()) {
4913   default:
4914     llvm_unreachable("Unexpected CCValAssign::LocInfo");
4915   case CCValAssign::Full:
4916   case CCValAssign::Indirect:
4917     break;
4918   case CCValAssign::BCvt:
4919     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4920       Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
4921     else
4922       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
4923     break;
4924   }
4925   return Val;
4926 }
4927 
4928 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
4929                                 const CCValAssign &VA, const SDLoc &DL,
4930                                 const ISD::InputArg &In,
4931                                 const LoongArchTargetLowering &TLI) {
4932   MachineFunction &MF = DAG.getMachineFunction();
4933   MachineRegisterInfo &RegInfo = MF.getRegInfo();
4934   EVT LocVT = VA.getLocVT();
4935   SDValue Val;
4936   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
4937   Register VReg = RegInfo.createVirtualRegister(RC);
4938   RegInfo.addLiveIn(VA.getLocReg(), VReg);
4939   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
4940 
4941   // If input is sign extended from 32 bits, note it for the OptW pass.
4942   if (In.isOrigArg()) {
4943     Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
4944     if (OrigArg->getType()->isIntegerTy()) {
4945       unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
4946       // An input zero extended from i31 can also be considered sign extended.
4947       if ((BitWidth <= 32 && In.Flags.isSExt()) ||
4948           (BitWidth < 32 && In.Flags.isZExt())) {
4949         LoongArchMachineFunctionInfo *LAFI =
4950             MF.getInfo<LoongArchMachineFunctionInfo>();
4951         LAFI->addSExt32Register(VReg);
4952       }
4953     }
4954   }
4955 
4956   return convertLocVTToValVT(DAG, Val, VA, DL);
4957 }
4958 
4959 // The caller is responsible for loading the full value if the argument is
4960 // passed with CCValAssign::Indirect.
4961 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
4962                                 const CCValAssign &VA, const SDLoc &DL) {
4963   MachineFunction &MF = DAG.getMachineFunction();
4964   MachineFrameInfo &MFI = MF.getFrameInfo();
4965   EVT ValVT = VA.getValVT();
4966   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
4967                                  /*IsImmutable=*/true);
4968   SDValue FIN = DAG.getFrameIndex(
4969       FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
4970 
4971   ISD::LoadExtType ExtType;
4972   switch (VA.getLocInfo()) {
4973   default:
4974     llvm_unreachable("Unexpected CCValAssign::LocInfo");
4975   case CCValAssign::Full:
4976   case CCValAssign::Indirect:
4977   case CCValAssign::BCvt:
4978     ExtType = ISD::NON_EXTLOAD;
4979     break;
4980   }
4981   return DAG.getExtLoad(
4982       ExtType, DL, VA.getLocVT(), Chain, FIN,
4983       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
4984 }
4985 
4986 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
4987                                    const CCValAssign &VA, const SDLoc &DL) {
4988   EVT LocVT = VA.getLocVT();
4989 
4990   switch (VA.getLocInfo()) {
4991   default:
4992     llvm_unreachable("Unexpected CCValAssign::LocInfo");
4993   case CCValAssign::Full:
4994     break;
4995   case CCValAssign::BCvt:
4996     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4997       Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
4998     else
4999       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5000     break;
5001   }
5002   return Val;
5003 }
5004 
5005 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5006                              CCValAssign::LocInfo LocInfo,
5007                              ISD::ArgFlagsTy ArgFlags, CCState &State) {
5008   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5009     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5010     //                        s0    s1  s2  s3  s4  s5  s6  s7  s8
5011     static const MCPhysReg GPRList[] = {
5012         LoongArch::R23, LoongArch::R24, LoongArch::R25,
5013         LoongArch::R26, LoongArch::R27, LoongArch::R28,
5014         LoongArch::R29, LoongArch::R30, LoongArch::R31};
5015     if (unsigned Reg = State.AllocateReg(GPRList)) {
5016       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5017       return false;
5018     }
5019   }
5020 
5021   if (LocVT == MVT::f32) {
5022     // Pass in STG registers: F1, F2, F3, F4
5023     //                        fs0,fs1,fs2,fs3
5024     static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5025                                           LoongArch::F26, LoongArch::F27};
5026     if (unsigned Reg = State.AllocateReg(FPR32List)) {
5027       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5028       return false;
5029     }
5030   }
5031 
5032   if (LocVT == MVT::f64) {
5033     // Pass in STG registers: D1, D2, D3, D4
5034     //                        fs4,fs5,fs6,fs7
5035     static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5036                                           LoongArch::F30_64, LoongArch::F31_64};
5037     if (unsigned Reg = State.AllocateReg(FPR64List)) {
5038       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5039       return false;
5040     }
5041   }
5042 
5043   report_fatal_error("No registers left in GHC calling convention");
5044   return true;
5045 }
5046 
5047 // Transform physical registers into virtual registers.
5048 SDValue LoongArchTargetLowering::LowerFormalArguments(
5049     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5050     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5051     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5052 
5053   MachineFunction &MF = DAG.getMachineFunction();
5054 
5055   switch (CallConv) {
5056   default:
5057     llvm_unreachable("Unsupported calling convention");
5058   case CallingConv::C:
5059   case CallingConv::Fast:
5060     break;
5061   case CallingConv::GHC:
5062     if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
5063         !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
5064       report_fatal_error(
5065           "GHC calling convention requires the F and D extensions");
5066   }
5067 
5068   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5069   MVT GRLenVT = Subtarget.getGRLenVT();
5070   unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
5071   // Used with varargs to acumulate store chains.
5072   std::vector<SDValue> OutChains;
5073 
5074   // Assign locations to all of the incoming arguments.
5075   SmallVector<CCValAssign> ArgLocs;
5076   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5077 
5078   if (CallConv == CallingConv::GHC)
5079     CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
5080   else
5081     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
5082 
5083   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5084     CCValAssign &VA = ArgLocs[i];
5085     SDValue ArgValue;
5086     if (VA.isRegLoc())
5087       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
5088     else
5089       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5090     if (VA.getLocInfo() == CCValAssign::Indirect) {
5091       // If the original argument was split and passed by reference, we need to
5092       // load all parts of it here (using the same address).
5093       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5094                                    MachinePointerInfo()));
5095       unsigned ArgIndex = Ins[i].OrigArgIndex;
5096       unsigned ArgPartOffset = Ins[i].PartOffset;
5097       assert(ArgPartOffset == 0);
5098       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5099         CCValAssign &PartVA = ArgLocs[i + 1];
5100         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5101         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5102         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
5103         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5104                                      MachinePointerInfo()));
5105         ++i;
5106       }
5107       continue;
5108     }
5109     InVals.push_back(ArgValue);
5110   }
5111 
5112   if (IsVarArg) {
5113     ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
5114     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5115     const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5116     MachineFrameInfo &MFI = MF.getFrameInfo();
5117     MachineRegisterInfo &RegInfo = MF.getRegInfo();
5118     auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5119 
5120     // Offset of the first variable argument from stack pointer, and size of
5121     // the vararg save area. For now, the varargs save area is either zero or
5122     // large enough to hold a0-a7.
5123     int VaArgOffset, VarArgsSaveSize;
5124 
5125     // If all registers are allocated, then all varargs must be passed on the
5126     // stack and we don't need to save any argregs.
5127     if (ArgRegs.size() == Idx) {
5128       VaArgOffset = CCInfo.getStackSize();
5129       VarArgsSaveSize = 0;
5130     } else {
5131       VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5132       VaArgOffset = -VarArgsSaveSize;
5133     }
5134 
5135     // Record the frame index of the first variable argument
5136     // which is a value necessary to VASTART.
5137     int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5138     LoongArchFI->setVarArgsFrameIndex(FI);
5139 
5140     // If saving an odd number of registers then create an extra stack slot to
5141     // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5142     // offsets to even-numbered registered remain 2*GRLen-aligned.
5143     if (Idx % 2) {
5144       MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
5145                             true);
5146       VarArgsSaveSize += GRLenInBytes;
5147     }
5148 
5149     // Copy the integer registers that may have been used for passing varargs
5150     // to the vararg save area.
5151     for (unsigned I = Idx; I < ArgRegs.size();
5152          ++I, VaArgOffset += GRLenInBytes) {
5153       const Register Reg = RegInfo.createVirtualRegister(RC);
5154       RegInfo.addLiveIn(ArgRegs[I], Reg);
5155       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
5156       FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5157       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5158       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5159                                    MachinePointerInfo::getFixedStack(MF, FI));
5160       cast<StoreSDNode>(Store.getNode())
5161           ->getMemOperand()
5162           ->setValue((Value *)nullptr);
5163       OutChains.push_back(Store);
5164     }
5165     LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5166   }
5167 
5168   // All stores are grouped in one node to allow the matching between
5169   // the size of Ins and InVals. This only happens for vararg functions.
5170   if (!OutChains.empty()) {
5171     OutChains.push_back(Chain);
5172     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5173   }
5174 
5175   return Chain;
5176 }
5177 
5178 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
5179   return CI->isTailCall();
5180 }
5181 
5182 // Check if the return value is used as only a return value, as otherwise
5183 // we can't perform a tail-call.
5184 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
5185                                                  SDValue &Chain) const {
5186   if (N->getNumValues() != 1)
5187     return false;
5188   if (!N->hasNUsesOfValue(1, 0))
5189     return false;
5190 
5191   SDNode *Copy = *N->use_begin();
5192   if (Copy->getOpcode() != ISD::CopyToReg)
5193     return false;
5194 
5195   // If the ISD::CopyToReg has a glue operand, we conservatively assume it
5196   // isn't safe to perform a tail call.
5197   if (Copy->getGluedNode())
5198     return false;
5199 
5200   // The copy must be used by a LoongArchISD::RET, and nothing else.
5201   bool HasRet = false;
5202   for (SDNode *Node : Copy->uses()) {
5203     if (Node->getOpcode() != LoongArchISD::RET)
5204       return false;
5205     HasRet = true;
5206   }
5207 
5208   if (!HasRet)
5209     return false;
5210 
5211   Chain = Copy->getOperand(0);
5212   return true;
5213 }
5214 
5215 // Check whether the call is eligible for tail call optimization.
5216 bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5217     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5218     const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5219 
5220   auto CalleeCC = CLI.CallConv;
5221   auto &Outs = CLI.Outs;
5222   auto &Caller = MF.getFunction();
5223   auto CallerCC = Caller.getCallingConv();
5224 
5225   // Do not tail call opt if the stack is used to pass parameters.
5226   if (CCInfo.getStackSize() != 0)
5227     return false;
5228 
5229   // Do not tail call opt if any parameters need to be passed indirectly.
5230   for (auto &VA : ArgLocs)
5231     if (VA.getLocInfo() == CCValAssign::Indirect)
5232       return false;
5233 
5234   // Do not tail call opt if either caller or callee uses struct return
5235   // semantics.
5236   auto IsCallerStructRet = Caller.hasStructRetAttr();
5237   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5238   if (IsCallerStructRet || IsCalleeStructRet)
5239     return false;
5240 
5241   // Do not tail call opt if either the callee or caller has a byval argument.
5242   for (auto &Arg : Outs)
5243     if (Arg.Flags.isByVal())
5244       return false;
5245 
5246   // The callee has to preserve all registers the caller needs to preserve.
5247   const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5248   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5249   if (CalleeCC != CallerCC) {
5250     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5251     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5252       return false;
5253   }
5254   return true;
5255 }
5256 
5257 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
5258   return DAG.getDataLayout().getPrefTypeAlign(
5259       VT.getTypeForEVT(*DAG.getContext()));
5260 }
5261 
5262 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5263 // and output parameter nodes.
5264 SDValue
5265 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
5266                                    SmallVectorImpl<SDValue> &InVals) const {
5267   SelectionDAG &DAG = CLI.DAG;
5268   SDLoc &DL = CLI.DL;
5269   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5270   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5271   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5272   SDValue Chain = CLI.Chain;
5273   SDValue Callee = CLI.Callee;
5274   CallingConv::ID CallConv = CLI.CallConv;
5275   bool IsVarArg = CLI.IsVarArg;
5276   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5277   MVT GRLenVT = Subtarget.getGRLenVT();
5278   bool &IsTailCall = CLI.IsTailCall;
5279 
5280   MachineFunction &MF = DAG.getMachineFunction();
5281 
5282   // Analyze the operands of the call, assigning locations to each operand.
5283   SmallVector<CCValAssign> ArgLocs;
5284   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5285 
5286   if (CallConv == CallingConv::GHC)
5287     ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
5288   else
5289     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
5290 
5291   // Check if it's really possible to do a tail call.
5292   if (IsTailCall)
5293     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5294 
5295   if (IsTailCall)
5296     ++NumTailCalls;
5297   else if (CLI.CB && CLI.CB->isMustTailCall())
5298     report_fatal_error("failed to perform tail call elimination on a call "
5299                        "site marked musttail");
5300 
5301   // Get a count of how many bytes are to be pushed on the stack.
5302   unsigned NumBytes = ArgCCInfo.getStackSize();
5303 
5304   // Create local copies for byval args.
5305   SmallVector<SDValue> ByValArgs;
5306   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5307     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5308     if (!Flags.isByVal())
5309       continue;
5310 
5311     SDValue Arg = OutVals[i];
5312     unsigned Size = Flags.getByValSize();
5313     Align Alignment = Flags.getNonZeroByValAlign();
5314 
5315     int FI =
5316         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5317     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5318     SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
5319 
5320     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5321                           /*IsVolatile=*/false,
5322                           /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
5323                           MachinePointerInfo(), MachinePointerInfo());
5324     ByValArgs.push_back(FIPtr);
5325   }
5326 
5327   if (!IsTailCall)
5328     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5329 
5330   // Copy argument values to their designated locations.
5331   SmallVector<std::pair<Register, SDValue>> RegsToPass;
5332   SmallVector<SDValue> MemOpChains;
5333   SDValue StackPtr;
5334   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5335     CCValAssign &VA = ArgLocs[i];
5336     SDValue ArgValue = OutVals[i];
5337     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5338 
5339     // Promote the value if needed.
5340     // For now, only handle fully promoted and indirect arguments.
5341     if (VA.getLocInfo() == CCValAssign::Indirect) {
5342       // Store the argument in a stack slot and pass its address.
5343       Align StackAlign =
5344           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
5345                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
5346       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5347       // If the original argument was split and passed by reference, we need to
5348       // store the required parts of it here (and pass just one address).
5349       unsigned ArgIndex = Outs[i].OrigArgIndex;
5350       unsigned ArgPartOffset = Outs[i].PartOffset;
5351       assert(ArgPartOffset == 0);
5352       // Calculate the total size to store. We don't have access to what we're
5353       // actually storing other than performing the loop and collecting the
5354       // info.
5355       SmallVector<std::pair<SDValue, SDValue>> Parts;
5356       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5357         SDValue PartValue = OutVals[i + 1];
5358         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5359         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5360         EVT PartVT = PartValue.getValueType();
5361 
5362         StoredSize += PartVT.getStoreSize();
5363         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
5364         Parts.push_back(std::make_pair(PartValue, Offset));
5365         ++i;
5366       }
5367       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
5368       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5369       MemOpChains.push_back(
5370           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5371                        MachinePointerInfo::getFixedStack(MF, FI)));
5372       for (const auto &Part : Parts) {
5373         SDValue PartValue = Part.first;
5374         SDValue PartOffset = Part.second;
5375         SDValue Address =
5376             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
5377         MemOpChains.push_back(
5378             DAG.getStore(Chain, DL, PartValue, Address,
5379                          MachinePointerInfo::getFixedStack(MF, FI)));
5380       }
5381       ArgValue = SpillSlot;
5382     } else {
5383       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5384     }
5385 
5386     // Use local copy if it is a byval arg.
5387     if (Flags.isByVal())
5388       ArgValue = ByValArgs[j++];
5389 
5390     if (VA.isRegLoc()) {
5391       // Queue up the argument copies and emit them at the end.
5392       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5393     } else {
5394       assert(VA.isMemLoc() && "Argument not register or memory");
5395       assert(!IsTailCall && "Tail call not allowed if stack is used "
5396                             "for passing parameters");
5397 
5398       // Work out the address of the stack slot.
5399       if (!StackPtr.getNode())
5400         StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
5401       SDValue Address =
5402           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5403                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
5404 
5405       // Emit the store.
5406       MemOpChains.push_back(
5407           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5408     }
5409   }
5410 
5411   // Join the stores, which are independent of one another.
5412   if (!MemOpChains.empty())
5413     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5414 
5415   SDValue Glue;
5416 
5417   // Build a sequence of copy-to-reg nodes, chained and glued together.
5418   for (auto &Reg : RegsToPass) {
5419     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5420     Glue = Chain.getValue(1);
5421   }
5422 
5423   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5424   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5425   // split it and then direct call can be matched by PseudoCALL.
5426   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5427     const GlobalValue *GV = S->getGlobal();
5428     unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5429                            ? LoongArchII::MO_CALL
5430                            : LoongArchII::MO_CALL_PLT;
5431     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
5432   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5433     unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
5434                            ? LoongArchII::MO_CALL
5435                            : LoongArchII::MO_CALL_PLT;
5436     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5437   }
5438 
5439   // The first call operand is the chain and the second is the target address.
5440   SmallVector<SDValue> Ops;
5441   Ops.push_back(Chain);
5442   Ops.push_back(Callee);
5443 
5444   // Add argument registers to the end of the list so that they are
5445   // known live into the call.
5446   for (auto &Reg : RegsToPass)
5447     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5448 
5449   if (!IsTailCall) {
5450     // Add a register mask operand representing the call-preserved registers.
5451     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5452     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5453     assert(Mask && "Missing call preserved mask for calling convention");
5454     Ops.push_back(DAG.getRegisterMask(Mask));
5455   }
5456 
5457   // Glue the call to the argument copies, if any.
5458   if (Glue.getNode())
5459     Ops.push_back(Glue);
5460 
5461   // Emit the call.
5462   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5463   unsigned Op;
5464   switch (DAG.getTarget().getCodeModel()) {
5465   default:
5466     report_fatal_error("Unsupported code model");
5467   case CodeModel::Small:
5468     Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5469     break;
5470   case CodeModel::Medium:
5471     assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5472     Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
5473     break;
5474   case CodeModel::Large:
5475     assert(Subtarget.is64Bit() && "Large code model requires LA64");
5476     Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
5477     break;
5478   }
5479 
5480   if (IsTailCall) {
5481     MF.getFrameInfo().setHasTailCall();
5482     SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
5483     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
5484     return Ret;
5485   }
5486 
5487   Chain = DAG.getNode(Op, DL, NodeTys, Ops);
5488   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5489   Glue = Chain.getValue(1);
5490 
5491   // Mark the end of the call, which is glued to the call itself.
5492   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
5493   Glue = Chain.getValue(1);
5494 
5495   // Assign locations to each value returned by this call.
5496   SmallVector<CCValAssign> RVLocs;
5497   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5498   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
5499 
5500   // Copy all of the result registers out of their specified physreg.
5501   for (auto &VA : RVLocs) {
5502     // Copy the value out.
5503     SDValue RetValue =
5504         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5505     // Glue the RetValue to the end of the call sequence.
5506     Chain = RetValue.getValue(1);
5507     Glue = RetValue.getValue(2);
5508 
5509     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5510 
5511     InVals.push_back(RetValue);
5512   }
5513 
5514   return Chain;
5515 }
5516 
5517 bool LoongArchTargetLowering::CanLowerReturn(
5518     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5519     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5520   SmallVector<CCValAssign> RVLocs;
5521   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5522 
5523   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5524     LoongArchABI::ABI ABI =
5525         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5526     if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
5527                      Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
5528                      nullptr))
5529       return false;
5530   }
5531   return true;
5532 }
5533 
5534 SDValue LoongArchTargetLowering::LowerReturn(
5535     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5536     const SmallVectorImpl<ISD::OutputArg> &Outs,
5537     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5538     SelectionDAG &DAG) const {
5539   // Stores the assignment of the return value to a location.
5540   SmallVector<CCValAssign> RVLocs;
5541 
5542   // Info about the registers and stack slot.
5543   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5544                  *DAG.getContext());
5545 
5546   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5547                     nullptr, CC_LoongArch);
5548   if (CallConv == CallingConv::GHC && !RVLocs.empty())
5549     report_fatal_error("GHC functions return void only");
5550   SDValue Glue;
5551   SmallVector<SDValue, 4> RetOps(1, Chain);
5552 
5553   // Copy the result values into the output registers.
5554   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5555     CCValAssign &VA = RVLocs[i];
5556     assert(VA.isRegLoc() && "Can only return in registers!");
5557 
5558     // Handle a 'normal' return.
5559     SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
5560     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5561 
5562     // Guarantee that all emitted copies are stuck together.
5563     Glue = Chain.getValue(1);
5564     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5565   }
5566 
5567   RetOps[0] = Chain; // Update chain.
5568 
5569   // Add the glue node if we have it.
5570   if (Glue.getNode())
5571     RetOps.push_back(Glue);
5572 
5573   return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
5574 }
5575 
5576 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5577                                            bool ForCodeSize) const {
5578   // TODO: Maybe need more checks here after vector extension is supported.
5579   if (VT == MVT::f32 && !Subtarget.hasBasicF())
5580     return false;
5581   if (VT == MVT::f64 && !Subtarget.hasBasicD())
5582     return false;
5583   return (Imm.isZero() || Imm.isExactlyValue(+1.0));
5584 }
5585 
5586 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
5587   return true;
5588 }
5589 
5590 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
5591   return true;
5592 }
5593 
5594 bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5595     const Instruction *I) const {
5596   if (!Subtarget.is64Bit())
5597     return isa<LoadInst>(I) || isa<StoreInst>(I);
5598 
5599   if (isa<LoadInst>(I))
5600     return true;
5601 
5602   // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5603   // require fences beacuse we can use amswap_db.[w/d].
5604   Type *Ty = I->getOperand(0)->getType();
5605   if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
5606     unsigned Size = Ty->getIntegerBitWidth();
5607     return (Size == 8 || Size == 16);
5608   }
5609 
5610   return false;
5611 }
5612 
5613 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
5614                                                 LLVMContext &Context,
5615                                                 EVT VT) const {
5616   if (!VT.isVector())
5617     return getPointerTy(DL);
5618   return VT.changeVectorElementTypeToInteger();
5619 }
5620 
5621 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
5622   // TODO: Support vectors.
5623   return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
5624 }
5625 
5626 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5627                                                  const CallInst &I,
5628                                                  MachineFunction &MF,
5629                                                  unsigned Intrinsic) const {
5630   switch (Intrinsic) {
5631   default:
5632     return false;
5633   case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5634   case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5635   case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5636   case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5637     Info.opc = ISD::INTRINSIC_W_CHAIN;
5638     Info.memVT = MVT::i32;
5639     Info.ptrVal = I.getArgOperand(0);
5640     Info.offset = 0;
5641     Info.align = Align(4);
5642     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5643                  MachineMemOperand::MOVolatile;
5644     return true;
5645     // TODO: Add more Intrinsics later.
5646   }
5647 }
5648 
5649 TargetLowering::AtomicExpansionKind
5650 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
5651   // TODO: Add more AtomicRMWInst that needs to be extended.
5652 
5653   // Since floating-point operation requires a non-trivial set of data
5654   // operations, use CmpXChg to expand.
5655   if (AI->isFloatingPointOperation() ||
5656       AI->getOperation() == AtomicRMWInst::UIncWrap ||
5657       AI->getOperation() == AtomicRMWInst::UDecWrap)
5658     return AtomicExpansionKind::CmpXChg;
5659 
5660   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5661   if (Size == 8 || Size == 16)
5662     return AtomicExpansionKind::MaskedIntrinsic;
5663   return AtomicExpansionKind::None;
5664 }
5665 
5666 static Intrinsic::ID
5667 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
5668                                     AtomicRMWInst::BinOp BinOp) {
5669   if (GRLen == 64) {
5670     switch (BinOp) {
5671     default:
5672       llvm_unreachable("Unexpected AtomicRMW BinOp");
5673     case AtomicRMWInst::Xchg:
5674       return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5675     case AtomicRMWInst::Add:
5676       return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5677     case AtomicRMWInst::Sub:
5678       return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5679     case AtomicRMWInst::Nand:
5680       return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
5681     case AtomicRMWInst::UMax:
5682       return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
5683     case AtomicRMWInst::UMin:
5684       return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
5685     case AtomicRMWInst::Max:
5686       return Intrinsic::loongarch_masked_atomicrmw_max_i64;
5687     case AtomicRMWInst::Min:
5688       return Intrinsic::loongarch_masked_atomicrmw_min_i64;
5689       // TODO: support other AtomicRMWInst.
5690     }
5691   }
5692 
5693   if (GRLen == 32) {
5694     switch (BinOp) {
5695     default:
5696       llvm_unreachable("Unexpected AtomicRMW BinOp");
5697     case AtomicRMWInst::Xchg:
5698       return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
5699     case AtomicRMWInst::Add:
5700       return Intrinsic::loongarch_masked_atomicrmw_add_i32;
5701     case AtomicRMWInst::Sub:
5702       return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
5703     case AtomicRMWInst::Nand:
5704       return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
5705       // TODO: support other AtomicRMWInst.
5706     }
5707   }
5708 
5709   llvm_unreachable("Unexpected GRLen\n");
5710 }
5711 
5712 TargetLowering::AtomicExpansionKind
5713 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
5714     AtomicCmpXchgInst *CI) const {
5715   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
5716   if (Size == 8 || Size == 16)
5717     return AtomicExpansionKind::MaskedIntrinsic;
5718   return AtomicExpansionKind::None;
5719 }
5720 
5721 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
5722     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
5723     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
5724   AtomicOrdering FailOrd = CI->getFailureOrdering();
5725   Value *FailureOrdering =
5726       Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
5727 
5728   // TODO: Support cmpxchg on LA32.
5729   Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
5730   CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
5731   NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
5732   Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5733   Type *Tys[] = {AlignedAddr->getType()};
5734   Function *MaskedCmpXchg =
5735       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
5736   Value *Result = Builder.CreateCall(
5737       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
5738   Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5739   return Result;
5740 }
5741 
5742 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
5743     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
5744     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
5745   // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
5746   // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
5747   // mask, as this produces better code than the LL/SC loop emitted by
5748   // int_loongarch_masked_atomicrmw_xchg.
5749   if (AI->getOperation() == AtomicRMWInst::Xchg &&
5750       isa<ConstantInt>(AI->getValOperand())) {
5751     ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
5752     if (CVal->isZero())
5753       return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
5754                                      Builder.CreateNot(Mask, "Inv_Mask"),
5755                                      AI->getAlign(), Ord);
5756     if (CVal->isMinusOne())
5757       return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
5758                                      AI->getAlign(), Ord);
5759   }
5760 
5761   unsigned GRLen = Subtarget.getGRLen();
5762   Value *Ordering =
5763       Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
5764   Type *Tys[] = {AlignedAddr->getType()};
5765   Function *LlwOpScwLoop = Intrinsic::getDeclaration(
5766       AI->getModule(),
5767       getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
5768 
5769   if (GRLen == 64) {
5770     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
5771     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5772     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
5773   }
5774 
5775   Value *Result;
5776 
5777   // Must pass the shift amount needed to sign extend the loaded value prior
5778   // to performing a signed comparison for min/max. ShiftAmt is the number of
5779   // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
5780   // is the number of bits to left+right shift the value in order to
5781   // sign-extend.
5782   if (AI->getOperation() == AtomicRMWInst::Min ||
5783       AI->getOperation() == AtomicRMWInst::Max) {
5784     const DataLayout &DL = AI->getDataLayout();
5785     unsigned ValWidth =
5786         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
5787     Value *SextShamt =
5788         Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
5789     Result = Builder.CreateCall(LlwOpScwLoop,
5790                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
5791   } else {
5792     Result =
5793         Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
5794   }
5795 
5796   if (GRLen == 64)
5797     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5798   return Result;
5799 }
5800 
5801 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
5802     const MachineFunction &MF, EVT VT) const {
5803   VT = VT.getScalarType();
5804 
5805   if (!VT.isSimple())
5806     return false;
5807 
5808   switch (VT.getSimpleVT().SimpleTy) {
5809   case MVT::f32:
5810   case MVT::f64:
5811     return true;
5812   default:
5813     break;
5814   }
5815 
5816   return false;
5817 }
5818 
5819 Register LoongArchTargetLowering::getExceptionPointerRegister(
5820     const Constant *PersonalityFn) const {
5821   return LoongArch::R4;
5822 }
5823 
5824 Register LoongArchTargetLowering::getExceptionSelectorRegister(
5825     const Constant *PersonalityFn) const {
5826   return LoongArch::R5;
5827 }
5828 
5829 //===----------------------------------------------------------------------===//
5830 //                           LoongArch Inline Assembly Support
5831 //===----------------------------------------------------------------------===//
5832 
5833 LoongArchTargetLowering::ConstraintType
5834 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
5835   // LoongArch specific constraints in GCC: config/loongarch/constraints.md
5836   //
5837   // 'f':  A floating-point register (if available).
5838   // 'k':  A memory operand whose address is formed by a base register and
5839   //       (optionally scaled) index register.
5840   // 'l':  A signed 16-bit constant.
5841   // 'm':  A memory operand whose address is formed by a base register and
5842   //       offset that is suitable for use in instructions with the same
5843   //       addressing mode as st.w and ld.w.
5844   // 'I':  A signed 12-bit constant (for arithmetic instructions).
5845   // 'J':  Integer zero.
5846   // 'K':  An unsigned 12-bit constant (for logic instructions).
5847   // "ZB": An address that is held in a general-purpose register. The offset is
5848   //       zero.
5849   // "ZC": A memory operand whose address is formed by a base register and
5850   //       offset that is suitable for use in instructions with the same
5851   //       addressing mode as ll.w and sc.w.
5852   if (Constraint.size() == 1) {
5853     switch (Constraint[0]) {
5854     default:
5855       break;
5856     case 'f':
5857       return C_RegisterClass;
5858     case 'l':
5859     case 'I':
5860     case 'J':
5861     case 'K':
5862       return C_Immediate;
5863     case 'k':
5864       return C_Memory;
5865     }
5866   }
5867 
5868   if (Constraint == "ZC" || Constraint == "ZB")
5869     return C_Memory;
5870 
5871   // 'm' is handled here.
5872   return TargetLowering::getConstraintType(Constraint);
5873 }
5874 
5875 InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
5876     StringRef ConstraintCode) const {
5877   return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
5878       .Case("k", InlineAsm::ConstraintCode::k)
5879       .Case("ZB", InlineAsm::ConstraintCode::ZB)
5880       .Case("ZC", InlineAsm::ConstraintCode::ZC)
5881       .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
5882 }
5883 
5884 std::pair<unsigned, const TargetRegisterClass *>
5885 LoongArchTargetLowering::getRegForInlineAsmConstraint(
5886     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5887   // First, see if this is a constraint that directly corresponds to a LoongArch
5888   // register class.
5889   if (Constraint.size() == 1) {
5890     switch (Constraint[0]) {
5891     case 'r':
5892       // TODO: Support fixed vectors up to GRLen?
5893       if (VT.isVector())
5894         break;
5895       return std::make_pair(0U, &LoongArch::GPRRegClass);
5896     case 'f':
5897       if (Subtarget.hasBasicF() && VT == MVT::f32)
5898         return std::make_pair(0U, &LoongArch::FPR32RegClass);
5899       if (Subtarget.hasBasicD() && VT == MVT::f64)
5900         return std::make_pair(0U, &LoongArch::FPR64RegClass);
5901       if (Subtarget.hasExtLSX() &&
5902           TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
5903         return std::make_pair(0U, &LoongArch::LSX128RegClass);
5904       if (Subtarget.hasExtLASX() &&
5905           TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
5906         return std::make_pair(0U, &LoongArch::LASX256RegClass);
5907       break;
5908     default:
5909       break;
5910     }
5911   }
5912 
5913   // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
5914   // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
5915   // constraints while the official register name is prefixed with a '$'. So we
5916   // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
5917   // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
5918   // case insensitive, so no need to convert the constraint to upper case here.
5919   //
5920   // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
5921   // decode the usage of register name aliases into their official names. And
5922   // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
5923   // official register names.
5924   if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
5925       Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
5926     bool IsFP = Constraint[2] == 'f';
5927     std::pair<StringRef, StringRef> Temp = Constraint.split('$');
5928     std::pair<unsigned, const TargetRegisterClass *> R;
5929     R = TargetLowering::getRegForInlineAsmConstraint(
5930         TRI, join_items("", Temp.first, Temp.second), VT);
5931     // Match those names to the widest floating point register type available.
5932     if (IsFP) {
5933       unsigned RegNo = R.first;
5934       if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
5935         if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
5936           unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
5937           return std::make_pair(DReg, &LoongArch::FPR64RegClass);
5938         }
5939       }
5940     }
5941     return R;
5942   }
5943 
5944   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5945 }
5946 
5947 void LoongArchTargetLowering::LowerAsmOperandForConstraint(
5948     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
5949     SelectionDAG &DAG) const {
5950   // Currently only support length 1 constraints.
5951   if (Constraint.size() == 1) {
5952     switch (Constraint[0]) {
5953     case 'l':
5954       // Validate & create a 16-bit signed immediate operand.
5955       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5956         uint64_t CVal = C->getSExtValue();
5957         if (isInt<16>(CVal))
5958           Ops.push_back(
5959               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5960       }
5961       return;
5962     case 'I':
5963       // Validate & create a 12-bit signed immediate operand.
5964       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5965         uint64_t CVal = C->getSExtValue();
5966         if (isInt<12>(CVal))
5967           Ops.push_back(
5968               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5969       }
5970       return;
5971     case 'J':
5972       // Validate & create an integer zero operand.
5973       if (auto *C = dyn_cast<ConstantSDNode>(Op))
5974         if (C->getZExtValue() == 0)
5975           Ops.push_back(
5976               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
5977       return;
5978     case 'K':
5979       // Validate & create a 12-bit unsigned immediate operand.
5980       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5981         uint64_t CVal = C->getZExtValue();
5982         if (isUInt<12>(CVal))
5983           Ops.push_back(
5984               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5985       }
5986       return;
5987     default:
5988       break;
5989     }
5990   }
5991   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5992 }
5993 
5994 #define GET_REGISTER_MATCHER
5995 #include "LoongArchGenAsmMatcher.inc"
5996 
5997 Register
5998 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
5999                                            const MachineFunction &MF) const {
6000   std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
6001   std::string NewRegName = Name.second.str();
6002   Register Reg = MatchRegisterAltName(NewRegName);
6003   if (Reg == LoongArch::NoRegister)
6004     Reg = MatchRegisterName(NewRegName);
6005   if (Reg == LoongArch::NoRegister)
6006     report_fatal_error(
6007         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6008   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6009   if (!ReservedRegs.test(Reg))
6010     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6011                              StringRef(RegName) + "\"."));
6012   return Reg;
6013 }
6014 
6015 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
6016                                                      EVT VT, SDValue C) const {
6017   // TODO: Support vectors.
6018   if (!VT.isScalarInteger())
6019     return false;
6020 
6021   // Omit the optimization if the data size exceeds GRLen.
6022   if (VT.getSizeInBits() > Subtarget.getGRLen())
6023     return false;
6024 
6025   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6026     const APInt &Imm = ConstNode->getAPIntValue();
6027     // Break MUL into (SLLI + ADD/SUB) or ALSL.
6028     if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6029         (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6030       return true;
6031     // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6032     if (ConstNode->hasOneUse() &&
6033         ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
6034          (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
6035       return true;
6036     // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6037     // in which the immediate has two set bits. Or Break (MUL x, imm)
6038     // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6039     // equals to (1 << s0) - (1 << s1).
6040     if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
6041       unsigned Shifts = Imm.countr_zero();
6042       // Reject immediates which can be composed via a single LUI.
6043       if (Shifts >= 12)
6044         return false;
6045       // Reject multiplications can be optimized to
6046       // (SLLI (ALSL x, x, 1/2/3/4), s).
6047       APInt ImmPop = Imm.ashr(Shifts);
6048       if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
6049         return false;
6050       // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6051       // since it needs one more instruction than other 3 cases.
6052       APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
6053       if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
6054           (ImmSmall - Imm).isPowerOf2())
6055         return true;
6056     }
6057   }
6058 
6059   return false;
6060 }
6061 
6062 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
6063                                                     const AddrMode &AM,
6064                                                     Type *Ty, unsigned AS,
6065                                                     Instruction *I) const {
6066   // LoongArch has four basic addressing modes:
6067   //  1. reg
6068   //  2. reg + 12-bit signed offset
6069   //  3. reg + 14-bit signed offset left-shifted by 2
6070   //  4. reg1 + reg2
6071   // TODO: Add more checks after support vector extension.
6072 
6073   // No global is ever allowed as a base.
6074   if (AM.BaseGV)
6075     return false;
6076 
6077   // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6078   // with `UAL` feature.
6079   if (!isInt<12>(AM.BaseOffs) &&
6080       !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
6081     return false;
6082 
6083   switch (AM.Scale) {
6084   case 0:
6085     // "r+i" or just "i", depending on HasBaseReg.
6086     break;
6087   case 1:
6088     // "r+r+i" is not allowed.
6089     if (AM.HasBaseReg && AM.BaseOffs)
6090       return false;
6091     // Otherwise we have "r+r" or "r+i".
6092     break;
6093   case 2:
6094     // "2*r+r" or "2*r+i" is not allowed.
6095     if (AM.HasBaseReg || AM.BaseOffs)
6096       return false;
6097     // Allow "2*r" as "r+r".
6098     break;
6099   default:
6100     return false;
6101   }
6102 
6103   return true;
6104 }
6105 
6106 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
6107   return isInt<12>(Imm);
6108 }
6109 
6110 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
6111   return isInt<12>(Imm);
6112 }
6113 
6114 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
6115   // Zexts are free if they can be combined with a load.
6116   // Don't advertise i32->i64 zextload as being free for LA64. It interacts
6117   // poorly with type legalization of compares preferring sext.
6118   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6119     EVT MemVT = LD->getMemoryVT();
6120     if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
6121         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
6122          LD->getExtensionType() == ISD::ZEXTLOAD))
6123       return true;
6124   }
6125 
6126   return TargetLowering::isZExtFree(Val, VT2);
6127 }
6128 
6129 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
6130                                                     EVT DstVT) const {
6131   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6132 }
6133 
6134 bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {
6135   return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
6136 }
6137 
6138 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
6139   // TODO: Support vectors.
6140   if (Y.getValueType().isVector())
6141     return false;
6142 
6143   return !isa<ConstantSDNode>(Y);
6144 }
6145 
6146 ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
6147   // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
6148   return ISD::SIGN_EXTEND;
6149 }
6150 
6151 bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
6152     EVT Type, bool IsSigned) const {
6153   if (Subtarget.is64Bit() && Type == MVT::i32)
6154     return true;
6155 
6156   return IsSigned;
6157 }
6158 
6159 bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
6160   // Return false to suppress the unnecessary extensions if the LibCall
6161   // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6162   if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6163                                   Type.getSizeInBits() < Subtarget.getGRLen()))
6164     return false;
6165   return true;
6166 }
6167