xref: /freebsd/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation  ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that LoongArch uses to lower LLVM code into
10 // a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LoongArchISelLowering.h"
15 #include "LoongArch.h"
16 #include "LoongArchMachineFunctionInfo.h"
17 #include "LoongArchRegisterInfo.h"
18 #include "LoongArchSubtarget.h"
19 #include "LoongArchTargetMachine.h"
20 #include "MCTargetDesc/LoongArchBaseInfo.h"
21 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/CodeGen/ISDOpcodes.h"
25 #include "llvm/CodeGen/RuntimeLibcalls.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/IntrinsicsLoongArch.h"
29 #include "llvm/Support/CodeGen.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/KnownBits.h"
33 #include "llvm/Support/MathExtras.h"
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "loongarch-isel-lowering"
38 
39 STATISTIC(NumTailCalls, "Number of tail calls");
40 
41 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42                                   cl::desc("Trap on integer division by zero."),
43                                   cl::init(false));
44 
45 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
46                                                  const LoongArchSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   MVT GRLenVT = Subtarget.getGRLenVT();
50 
51   // Set up the register classes.
52 
53   addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54   if (Subtarget.hasBasicF())
55     addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56   if (Subtarget.hasBasicD())
57     addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58 
59   static const MVT::SimpleValueType LSXVTs[] = {
60       MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61   static const MVT::SimpleValueType LASXVTs[] = {
62       MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63 
64   if (Subtarget.hasExtLSX())
65     for (MVT VT : LSXVTs)
66       addRegisterClass(VT, &LoongArch::LSX128RegClass);
67 
68   if (Subtarget.hasExtLASX())
69     for (MVT VT : LASXVTs)
70       addRegisterClass(VT, &LoongArch::LASX256RegClass);
71 
72   // Set operations for LA32 and LA64.
73 
74   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
75                    MVT::i1, Promote);
76 
77   setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
78   setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
79   setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
80   setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
81   setOperationAction(ISD::ROTL, GRLenVT, Expand);
82   setOperationAction(ISD::CTPOP, GRLenVT, Expand);
83 
84   setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
85                       ISD::JumpTable, ISD::GlobalTLSAddress},
86                      GRLenVT, Custom);
87 
88   setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom);
89 
90   setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
91   setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
92   setOperationAction(ISD::VASTART, MVT::Other, Custom);
93   setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
94 
95   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
96   setOperationAction(ISD::TRAP, MVT::Other, Legal);
97 
98   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
99   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
100   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
101 
102   // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103   // we get to know which of sll and revb.2h is faster.
104   setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
105   setOperationAction(ISD::BITREVERSE, GRLenVT, Legal);
106 
107   // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108   // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109   // and i32 could still be byte-swapped relatively cheaply.
110   setOperationAction(ISD::BSWAP, MVT::i16, Custom);
111 
112   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
113   setOperationAction(ISD::BR_CC, GRLenVT, Expand);
114   setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
115   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
116   setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
117 
118   setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
119   setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
120 
121   // Set operations for LA64 only.
122 
123   if (Subtarget.is64Bit()) {
124     setOperationAction(ISD::SHL, MVT::i32, Custom);
125     setOperationAction(ISD::SRA, MVT::i32, Custom);
126     setOperationAction(ISD::SRL, MVT::i32, Custom);
127     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
128     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
129     setOperationAction(ISD::ROTR, MVT::i32, Custom);
130     setOperationAction(ISD::ROTL, MVT::i32, Custom);
131     setOperationAction(ISD::CTTZ, MVT::i32, Custom);
132     setOperationAction(ISD::CTLZ, MVT::i32, Custom);
133     setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
134     setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
135     setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
136     setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
137     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
138     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
139 
140     setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
141     setOperationAction(ISD::BSWAP, MVT::i32, Custom);
142   }
143 
144   // Set operations for LA32 only.
145 
146   if (!Subtarget.is64Bit()) {
147     setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
148     setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
149     setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
150     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
151     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
152 
153     // Set libcalls.
154     setLibcallName(RTLIB::MUL_I128, nullptr);
155     // The MULO libcall is not part of libgcc, only compiler-rt.
156     setLibcallName(RTLIB::MULO_I64, nullptr);
157   }
158 
159   // The MULO libcall is not part of libgcc, only compiler-rt.
160   setLibcallName(RTLIB::MULO_I128, nullptr);
161 
162   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
163 
164   static const ISD::CondCode FPCCToExpand[] = {
165       ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
166       ISD::SETGE,  ISD::SETNE,  ISD::SETGT};
167 
168   // Set operations for 'F' feature.
169 
170   if (Subtarget.hasBasicF()) {
171     setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
172 
173     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
174     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
175     setOperationAction(ISD::FMA, MVT::f32, Legal);
176     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
177     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
178     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
179     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
180     setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
181     setOperationAction(ISD::FSIN, MVT::f32, Expand);
182     setOperationAction(ISD::FCOS, MVT::f32, Expand);
183     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
184     setOperationAction(ISD::FPOW, MVT::f32, Expand);
185     setOperationAction(ISD::FREM, MVT::f32, Expand);
186 
187     if (Subtarget.is64Bit())
188       setOperationAction(ISD::FRINT, MVT::f32, Legal);
189 
190     if (!Subtarget.hasBasicD()) {
191       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
192       if (Subtarget.is64Bit()) {
193         setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
194         setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
195       }
196     }
197   }
198 
199   // Set operations for 'D' feature.
200 
201   if (Subtarget.hasBasicD()) {
202     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
204     setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
205 
206     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
207     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
208     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
209     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
210     setOperationAction(ISD::FMA, MVT::f64, Legal);
211     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
212     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
213     setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
214     setOperationAction(ISD::FSIN, MVT::f64, Expand);
215     setOperationAction(ISD::FCOS, MVT::f64, Expand);
216     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
217     setOperationAction(ISD::FPOW, MVT::f64, Expand);
218     setOperationAction(ISD::FREM, MVT::f64, Expand);
219 
220     if (Subtarget.is64Bit())
221       setOperationAction(ISD::FRINT, MVT::f64, Legal);
222   }
223 
224   // Set operations for 'LSX' feature.
225 
226   if (Subtarget.hasExtLSX()) {
227     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
228       // Expand all truncating stores and extending loads.
229       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
230         setTruncStoreAction(VT, InnerVT, Expand);
231         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
232         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
233         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
234       }
235       // By default everything must be expanded. Then we will selectively turn
236       // on ones that can be effectively codegen'd.
237       for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
238         setOperationAction(Op, VT, Expand);
239     }
240 
241     for (MVT VT : LSXVTs) {
242       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
243       setOperationAction(ISD::BITCAST, VT, Legal);
244       setOperationAction(ISD::UNDEF, VT, Legal);
245 
246       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
247       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
248       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
249 
250       setOperationAction(ISD::SETCC, VT, Legal);
251       setOperationAction(ISD::VSELECT, VT, Legal);
252     }
253     for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
254       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
255       setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
256       setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
257                          Legal);
258       setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
259                          VT, Legal);
260       setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
261       setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
262       setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
263       setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
264       setCondCodeAction(
265           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
266           Expand);
267     }
268     for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
269       setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
270       setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
271       setOperationAction(ISD::FMA, VT, Legal);
272       setOperationAction(ISD::FSQRT, VT, Legal);
273       setOperationAction(ISD::FNEG, VT, Legal);
274       setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
275                          ISD::SETUGE, ISD::SETUGT},
276                         VT, Expand);
277     }
278   }
279 
280   // Set operations for 'LASX' feature.
281 
282   if (Subtarget.hasExtLASX()) {
283     for (MVT VT : LASXVTs) {
284       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
285       setOperationAction(ISD::BITCAST, VT, Legal);
286       setOperationAction(ISD::UNDEF, VT, Legal);
287 
288       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
289       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
290       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
291 
292       setOperationAction(ISD::SETCC, VT, Legal);
293       setOperationAction(ISD::VSELECT, VT, Legal);
294     }
295     for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
296       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
297       setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
298       setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
299                          Legal);
300       setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
301                          VT, Legal);
302       setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
303       setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
304       setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
305       setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
306       setCondCodeAction(
307           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
308           Expand);
309     }
310     for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
311       setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
312       setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
313       setOperationAction(ISD::FMA, VT, Legal);
314       setOperationAction(ISD::FSQRT, VT, Legal);
315       setOperationAction(ISD::FNEG, VT, Legal);
316       setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
317                          ISD::SETUGE, ISD::SETUGT},
318                         VT, Expand);
319     }
320   }
321 
322   // Set DAG combine for LA32 and LA64.
323 
324   setTargetDAGCombine(ISD::AND);
325   setTargetDAGCombine(ISD::OR);
326   setTargetDAGCombine(ISD::SRL);
327 
328   // Set DAG combine for 'LSX' feature.
329 
330   if (Subtarget.hasExtLSX())
331     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
332 
333   // Compute derived properties from the register classes.
334   computeRegisterProperties(Subtarget.getRegisterInfo());
335 
336   setStackPointerRegisterToSaveRestore(LoongArch::R3);
337 
338   setBooleanContents(ZeroOrOneBooleanContent);
339   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
340 
341   setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
342 
343   setMinCmpXchgSizeInBits(32);
344 
345   // Function alignments.
346   setMinFunctionAlignment(Align(4));
347   // Set preferred alignments.
348   setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
349   setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
350   setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
351 }
352 
353 bool LoongArchTargetLowering::isOffsetFoldingLegal(
354     const GlobalAddressSDNode *GA) const {
355   // In order to maximise the opportunity for common subexpression elimination,
356   // keep a separate ADD node for the global address offset instead of folding
357   // it in the global address node. Later peephole optimisations may choose to
358   // fold it back in when profitable.
359   return false;
360 }
361 
362 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
363                                                 SelectionDAG &DAG) const {
364   switch (Op.getOpcode()) {
365   case ISD::ATOMIC_FENCE:
366     return lowerATOMIC_FENCE(Op, DAG);
367   case ISD::EH_DWARF_CFA:
368     return lowerEH_DWARF_CFA(Op, DAG);
369   case ISD::GlobalAddress:
370     return lowerGlobalAddress(Op, DAG);
371   case ISD::GlobalTLSAddress:
372     return lowerGlobalTLSAddress(Op, DAG);
373   case ISD::INTRINSIC_WO_CHAIN:
374     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
375   case ISD::INTRINSIC_W_CHAIN:
376     return lowerINTRINSIC_W_CHAIN(Op, DAG);
377   case ISD::INTRINSIC_VOID:
378     return lowerINTRINSIC_VOID(Op, DAG);
379   case ISD::BlockAddress:
380     return lowerBlockAddress(Op, DAG);
381   case ISD::JumpTable:
382     return lowerJumpTable(Op, DAG);
383   case ISD::SHL_PARTS:
384     return lowerShiftLeftParts(Op, DAG);
385   case ISD::SRA_PARTS:
386     return lowerShiftRightParts(Op, DAG, true);
387   case ISD::SRL_PARTS:
388     return lowerShiftRightParts(Op, DAG, false);
389   case ISD::ConstantPool:
390     return lowerConstantPool(Op, DAG);
391   case ISD::FP_TO_SINT:
392     return lowerFP_TO_SINT(Op, DAG);
393   case ISD::BITCAST:
394     return lowerBITCAST(Op, DAG);
395   case ISD::UINT_TO_FP:
396     return lowerUINT_TO_FP(Op, DAG);
397   case ISD::SINT_TO_FP:
398     return lowerSINT_TO_FP(Op, DAG);
399   case ISD::VASTART:
400     return lowerVASTART(Op, DAG);
401   case ISD::FRAMEADDR:
402     return lowerFRAMEADDR(Op, DAG);
403   case ISD::RETURNADDR:
404     return lowerRETURNADDR(Op, DAG);
405   case ISD::WRITE_REGISTER:
406     return lowerWRITE_REGISTER(Op, DAG);
407   case ISD::INSERT_VECTOR_ELT:
408     return lowerINSERT_VECTOR_ELT(Op, DAG);
409   case ISD::EXTRACT_VECTOR_ELT:
410     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
411   case ISD::BUILD_VECTOR:
412     return lowerBUILD_VECTOR(Op, DAG);
413   case ISD::VECTOR_SHUFFLE:
414     return lowerVECTOR_SHUFFLE(Op, DAG);
415   }
416   return SDValue();
417 }
418 
419 SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
420                                                      SelectionDAG &DAG) const {
421   // TODO: custom shuffle.
422   return SDValue();
423 }
424 
425 static bool isConstantOrUndef(const SDValue Op) {
426   if (Op->isUndef())
427     return true;
428   if (isa<ConstantSDNode>(Op))
429     return true;
430   if (isa<ConstantFPSDNode>(Op))
431     return true;
432   return false;
433 }
434 
435 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
436   for (unsigned i = 0; i < Op->getNumOperands(); ++i)
437     if (isConstantOrUndef(Op->getOperand(i)))
438       return true;
439   return false;
440 }
441 
442 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
443                                                    SelectionDAG &DAG) const {
444   BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
445   EVT ResTy = Op->getValueType(0);
446   SDLoc DL(Op);
447   APInt SplatValue, SplatUndef;
448   unsigned SplatBitSize;
449   bool HasAnyUndefs;
450   bool Is128Vec = ResTy.is128BitVector();
451   bool Is256Vec = ResTy.is256BitVector();
452 
453   if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
454       (!Subtarget.hasExtLASX() || !Is256Vec))
455     return SDValue();
456 
457   if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
458                             /*MinSplatBits=*/8) &&
459       SplatBitSize <= 64) {
460     // We can only cope with 8, 16, 32, or 64-bit elements.
461     if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
462         SplatBitSize != 64)
463       return SDValue();
464 
465     EVT ViaVecTy;
466 
467     switch (SplatBitSize) {
468     default:
469       return SDValue();
470     case 8:
471       ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
472       break;
473     case 16:
474       ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
475       break;
476     case 32:
477       ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
478       break;
479     case 64:
480       ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
481       break;
482     }
483 
484     // SelectionDAG::getConstant will promote SplatValue appropriately.
485     SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
486 
487     // Bitcast to the type we originally wanted.
488     if (ViaVecTy != ResTy)
489       Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
490 
491     return Result;
492   }
493 
494   if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
495     return Op;
496 
497   if (!isConstantOrUndefBUILD_VECTOR(Node)) {
498     // Use INSERT_VECTOR_ELT operations rather than expand to stores.
499     // The resulting code is the same length as the expansion, but it doesn't
500     // use memory operations.
501     EVT ResTy = Node->getValueType(0);
502 
503     assert(ResTy.isVector());
504 
505     unsigned NumElts = ResTy.getVectorNumElements();
506     SDValue Vector = DAG.getUNDEF(ResTy);
507     for (unsigned i = 0; i < NumElts; ++i) {
508       Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
509                            Node->getOperand(i),
510                            DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
511     }
512     return Vector;
513   }
514 
515   return SDValue();
516 }
517 
518 SDValue
519 LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
520                                                  SelectionDAG &DAG) const {
521   EVT VecTy = Op->getOperand(0)->getValueType(0);
522   SDValue Idx = Op->getOperand(1);
523   EVT EltTy = VecTy.getVectorElementType();
524   unsigned NumElts = VecTy.getVectorNumElements();
525 
526   if (isa<ConstantSDNode>(Idx) &&
527       (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
528        EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
529     return Op;
530 
531   return SDValue();
532 }
533 
534 SDValue
535 LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
536                                                 SelectionDAG &DAG) const {
537   if (isa<ConstantSDNode>(Op->getOperand(2)))
538     return Op;
539   return SDValue();
540 }
541 
542 SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
543                                                    SelectionDAG &DAG) const {
544   SDLoc DL(Op);
545   SyncScope::ID FenceSSID =
546       static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
547 
548   // singlethread fences only synchronize with signal handlers on the same
549   // thread and thus only need to preserve instruction order, not actually
550   // enforce memory ordering.
551   if (FenceSSID == SyncScope::SingleThread)
552     // MEMBARRIER is a compiler barrier; it codegens to a no-op.
553     return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
554 
555   return Op;
556 }
557 
558 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
559                                                      SelectionDAG &DAG) const {
560 
561   if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
562     DAG.getContext()->emitError(
563         "On LA64, only 64-bit registers can be written.");
564     return Op.getOperand(0);
565   }
566 
567   if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
568     DAG.getContext()->emitError(
569         "On LA32, only 32-bit registers can be written.");
570     return Op.getOperand(0);
571   }
572 
573   return Op;
574 }
575 
576 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
577                                                 SelectionDAG &DAG) const {
578   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
579     DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
580                                 "be a constant integer");
581     return SDValue();
582   }
583 
584   MachineFunction &MF = DAG.getMachineFunction();
585   MF.getFrameInfo().setFrameAddressIsTaken(true);
586   Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
587   EVT VT = Op.getValueType();
588   SDLoc DL(Op);
589   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
590   unsigned Depth = Op.getConstantOperandVal(0);
591   int GRLenInBytes = Subtarget.getGRLen() / 8;
592 
593   while (Depth--) {
594     int Offset = -(GRLenInBytes * 2);
595     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
596                               DAG.getIntPtrConstant(Offset, DL));
597     FrameAddr =
598         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
599   }
600   return FrameAddr;
601 }
602 
603 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
604                                                  SelectionDAG &DAG) const {
605   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
606     return SDValue();
607 
608   // Currently only support lowering return address for current frame.
609   if (Op.getConstantOperandVal(0) != 0) {
610     DAG.getContext()->emitError(
611         "return address can only be determined for the current frame");
612     return SDValue();
613   }
614 
615   MachineFunction &MF = DAG.getMachineFunction();
616   MF.getFrameInfo().setReturnAddressIsTaken(true);
617   MVT GRLenVT = Subtarget.getGRLenVT();
618 
619   // Return the value of the return address register, marking it an implicit
620   // live-in.
621   Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
622                               getRegClassFor(GRLenVT));
623   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
624 }
625 
626 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
627                                                    SelectionDAG &DAG) const {
628   MachineFunction &MF = DAG.getMachineFunction();
629   auto Size = Subtarget.getGRLen() / 8;
630   auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
631   return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
632 }
633 
634 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
635                                               SelectionDAG &DAG) const {
636   MachineFunction &MF = DAG.getMachineFunction();
637   auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
638 
639   SDLoc DL(Op);
640   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
641                                  getPointerTy(MF.getDataLayout()));
642 
643   // vastart just stores the address of the VarArgsFrameIndex slot into the
644   // memory location argument.
645   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
646   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
647                       MachinePointerInfo(SV));
648 }
649 
650 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
651                                                  SelectionDAG &DAG) const {
652   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
653          !Subtarget.hasBasicD() && "unexpected target features");
654 
655   SDLoc DL(Op);
656   SDValue Op0 = Op.getOperand(0);
657   if (Op0->getOpcode() == ISD::AND) {
658     auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
659     if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
660       return Op;
661   }
662 
663   if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
664       Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
665       Op0.getConstantOperandVal(2) == UINT64_C(0))
666     return Op;
667 
668   if (Op0.getOpcode() == ISD::AssertZext &&
669       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
670     return Op;
671 
672   EVT OpVT = Op0.getValueType();
673   EVT RetVT = Op.getValueType();
674   RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
675   MakeLibCallOptions CallOptions;
676   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
677   SDValue Chain = SDValue();
678   SDValue Result;
679   std::tie(Result, Chain) =
680       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
681   return Result;
682 }
683 
684 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
685                                                  SelectionDAG &DAG) const {
686   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
687          !Subtarget.hasBasicD() && "unexpected target features");
688 
689   SDLoc DL(Op);
690   SDValue Op0 = Op.getOperand(0);
691 
692   if ((Op0.getOpcode() == ISD::AssertSext ||
693        Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
694       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
695     return Op;
696 
697   EVT OpVT = Op0.getValueType();
698   EVT RetVT = Op.getValueType();
699   RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
700   MakeLibCallOptions CallOptions;
701   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
702   SDValue Chain = SDValue();
703   SDValue Result;
704   std::tie(Result, Chain) =
705       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
706   return Result;
707 }
708 
709 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
710                                               SelectionDAG &DAG) const {
711 
712   SDLoc DL(Op);
713   SDValue Op0 = Op.getOperand(0);
714 
715   if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
716       Subtarget.is64Bit() && Subtarget.hasBasicF()) {
717     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
718     return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
719   }
720   return Op;
721 }
722 
723 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
724                                                  SelectionDAG &DAG) const {
725 
726   SDLoc DL(Op);
727 
728   if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
729       !Subtarget.hasBasicD()) {
730     SDValue Dst =
731         DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
732     return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
733   }
734 
735   EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
736   SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
737   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
738 }
739 
740 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
741                              SelectionDAG &DAG, unsigned Flags) {
742   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
743 }
744 
745 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
746                              SelectionDAG &DAG, unsigned Flags) {
747   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
748                                    Flags);
749 }
750 
751 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
752                              SelectionDAG &DAG, unsigned Flags) {
753   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
754                                    N->getOffset(), Flags);
755 }
756 
757 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
758                              SelectionDAG &DAG, unsigned Flags) {
759   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
760 }
761 
762 template <class NodeTy>
763 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
764                                          CodeModel::Model M,
765                                          bool IsLocal) const {
766   SDLoc DL(N);
767   EVT Ty = getPointerTy(DAG.getDataLayout());
768   SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
769 
770   switch (M) {
771   default:
772     report_fatal_error("Unsupported code model");
773 
774   case CodeModel::Large: {
775     assert(Subtarget.is64Bit() && "Large code model requires LA64");
776 
777     // This is not actually used, but is necessary for successfully matching
778     // the PseudoLA_*_LARGE nodes.
779     SDValue Tmp = DAG.getConstant(0, DL, Ty);
780     if (IsLocal)
781       // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
782       // eventually becomes the desired 5-insn code sequence.
783       return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
784                                         Tmp, Addr),
785                      0);
786 
787     // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually
788     // becomes the desired 5-insn code sequence.
789     return SDValue(
790         DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
791         0);
792   }
793 
794   case CodeModel::Small:
795   case CodeModel::Medium:
796     if (IsLocal)
797       // This generates the pattern (PseudoLA_PCREL sym), which expands to
798       // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
799       return SDValue(
800           DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
801 
802     // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
803     // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
804     return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr),
805                    0);
806   }
807 }
808 
809 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
810                                                    SelectionDAG &DAG) const {
811   return getAddr(cast<BlockAddressSDNode>(Op), DAG,
812                  DAG.getTarget().getCodeModel());
813 }
814 
815 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
816                                                 SelectionDAG &DAG) const {
817   return getAddr(cast<JumpTableSDNode>(Op), DAG,
818                  DAG.getTarget().getCodeModel());
819 }
820 
821 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
822                                                    SelectionDAG &DAG) const {
823   return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
824                  DAG.getTarget().getCodeModel());
825 }
826 
827 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
828                                                     SelectionDAG &DAG) const {
829   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
830   assert(N->getOffset() == 0 && "unexpected offset in global node");
831   auto CM = DAG.getTarget().getCodeModel();
832   const GlobalValue *GV = N->getGlobal();
833 
834   if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
835     if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
836       CM = *GCM;
837   }
838 
839   return getAddr(N, DAG, CM, GV->isDSOLocal());
840 }
841 
842 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
843                                                   SelectionDAG &DAG,
844                                                   unsigned Opc,
845                                                   bool Large) const {
846   SDLoc DL(N);
847   EVT Ty = getPointerTy(DAG.getDataLayout());
848   MVT GRLenVT = Subtarget.getGRLenVT();
849 
850   // This is not actually used, but is necessary for successfully matching the
851   // PseudoLA_*_LARGE nodes.
852   SDValue Tmp = DAG.getConstant(0, DL, Ty);
853   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
854   SDValue Offset = Large
855                        ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
856                        : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
857 
858   // Add the thread pointer.
859   return DAG.getNode(ISD::ADD, DL, Ty, Offset,
860                      DAG.getRegister(LoongArch::R2, GRLenVT));
861 }
862 
863 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
864                                                    SelectionDAG &DAG,
865                                                    unsigned Opc,
866                                                    bool Large) const {
867   SDLoc DL(N);
868   EVT Ty = getPointerTy(DAG.getDataLayout());
869   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
870 
871   // This is not actually used, but is necessary for successfully matching the
872   // PseudoLA_*_LARGE nodes.
873   SDValue Tmp = DAG.getConstant(0, DL, Ty);
874 
875   // Use a PC-relative addressing mode to access the dynamic GOT address.
876   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
877   SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
878                        : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
879 
880   // Prepare argument list to generate call.
881   ArgListTy Args;
882   ArgListEntry Entry;
883   Entry.Node = Load;
884   Entry.Ty = CallTy;
885   Args.push_back(Entry);
886 
887   // Setup call to __tls_get_addr.
888   TargetLowering::CallLoweringInfo CLI(DAG);
889   CLI.setDebugLoc(DL)
890       .setChain(DAG.getEntryNode())
891       .setLibCallee(CallingConv::C, CallTy,
892                     DAG.getExternalSymbol("__tls_get_addr", Ty),
893                     std::move(Args));
894 
895   return LowerCallTo(CLI).first;
896 }
897 
898 SDValue
899 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
900                                                SelectionDAG &DAG) const {
901   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
902       CallingConv::GHC)
903     report_fatal_error("In GHC calling convention TLS is not supported");
904 
905   bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
906   assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
907 
908   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
909   assert(N->getOffset() == 0 && "unexpected offset in global node");
910 
911   SDValue Addr;
912   switch (getTargetMachine().getTLSModel(N->getGlobal())) {
913   case TLSModel::GeneralDynamic:
914     // In this model, application code calls the dynamic linker function
915     // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
916     // runtime.
917     Addr = getDynamicTLSAddr(N, DAG,
918                              Large ? LoongArch::PseudoLA_TLS_GD_LARGE
919                                    : LoongArch::PseudoLA_TLS_GD,
920                              Large);
921     break;
922   case TLSModel::LocalDynamic:
923     // Same as GeneralDynamic, except for assembly modifiers and relocation
924     // records.
925     Addr = getDynamicTLSAddr(N, DAG,
926                              Large ? LoongArch::PseudoLA_TLS_LD_LARGE
927                                    : LoongArch::PseudoLA_TLS_LD,
928                              Large);
929     break;
930   case TLSModel::InitialExec:
931     // This model uses the GOT to resolve TLS offsets.
932     Addr = getStaticTLSAddr(N, DAG,
933                             Large ? LoongArch::PseudoLA_TLS_IE_LARGE
934                                   : LoongArch::PseudoLA_TLS_IE,
935                             Large);
936     break;
937   case TLSModel::LocalExec:
938     // This model is used when static linking as the TLS offsets are resolved
939     // during program linking.
940     //
941     // This node doesn't need an extra argument for the large code model.
942     Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
943     break;
944   }
945 
946   return Addr;
947 }
948 
949 template <unsigned N>
950 static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
951                                     SelectionDAG &DAG, bool IsSigned = false) {
952   auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
953   // Check the ImmArg.
954   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
955       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
956     DAG.getContext()->emitError(Op->getOperationName(0) +
957                                 ": argument out of range.");
958     return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
959   }
960   return SDValue();
961 }
962 
963 SDValue
964 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
965                                                  SelectionDAG &DAG) const {
966   SDLoc DL(Op);
967   switch (Op.getConstantOperandVal(0)) {
968   default:
969     return SDValue(); // Don't custom lower most intrinsics.
970   case Intrinsic::thread_pointer: {
971     EVT PtrVT = getPointerTy(DAG.getDataLayout());
972     return DAG.getRegister(LoongArch::R2, PtrVT);
973   }
974   case Intrinsic::loongarch_lsx_vpickve2gr_d:
975   case Intrinsic::loongarch_lsx_vpickve2gr_du:
976   case Intrinsic::loongarch_lsx_vreplvei_d:
977   case Intrinsic::loongarch_lasx_xvrepl128vei_d:
978     return checkIntrinsicImmArg<1>(Op, 2, DAG);
979   case Intrinsic::loongarch_lsx_vreplvei_w:
980   case Intrinsic::loongarch_lasx_xvrepl128vei_w:
981   case Intrinsic::loongarch_lasx_xvpickve2gr_d:
982   case Intrinsic::loongarch_lasx_xvpickve2gr_du:
983   case Intrinsic::loongarch_lasx_xvpickve_d:
984   case Intrinsic::loongarch_lasx_xvpickve_d_f:
985     return checkIntrinsicImmArg<2>(Op, 2, DAG);
986   case Intrinsic::loongarch_lasx_xvinsve0_d:
987     return checkIntrinsicImmArg<2>(Op, 3, DAG);
988   case Intrinsic::loongarch_lsx_vsat_b:
989   case Intrinsic::loongarch_lsx_vsat_bu:
990   case Intrinsic::loongarch_lsx_vrotri_b:
991   case Intrinsic::loongarch_lsx_vsllwil_h_b:
992   case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
993   case Intrinsic::loongarch_lsx_vsrlri_b:
994   case Intrinsic::loongarch_lsx_vsrari_b:
995   case Intrinsic::loongarch_lsx_vreplvei_h:
996   case Intrinsic::loongarch_lasx_xvsat_b:
997   case Intrinsic::loongarch_lasx_xvsat_bu:
998   case Intrinsic::loongarch_lasx_xvrotri_b:
999   case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1000   case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1001   case Intrinsic::loongarch_lasx_xvsrlri_b:
1002   case Intrinsic::loongarch_lasx_xvsrari_b:
1003   case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1004   case Intrinsic::loongarch_lasx_xvpickve_w:
1005   case Intrinsic::loongarch_lasx_xvpickve_w_f:
1006     return checkIntrinsicImmArg<3>(Op, 2, DAG);
1007   case Intrinsic::loongarch_lasx_xvinsve0_w:
1008     return checkIntrinsicImmArg<3>(Op, 3, DAG);
1009   case Intrinsic::loongarch_lsx_vsat_h:
1010   case Intrinsic::loongarch_lsx_vsat_hu:
1011   case Intrinsic::loongarch_lsx_vrotri_h:
1012   case Intrinsic::loongarch_lsx_vsllwil_w_h:
1013   case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1014   case Intrinsic::loongarch_lsx_vsrlri_h:
1015   case Intrinsic::loongarch_lsx_vsrari_h:
1016   case Intrinsic::loongarch_lsx_vreplvei_b:
1017   case Intrinsic::loongarch_lasx_xvsat_h:
1018   case Intrinsic::loongarch_lasx_xvsat_hu:
1019   case Intrinsic::loongarch_lasx_xvrotri_h:
1020   case Intrinsic::loongarch_lasx_xvsllwil_w_h:
1021   case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
1022   case Intrinsic::loongarch_lasx_xvsrlri_h:
1023   case Intrinsic::loongarch_lasx_xvsrari_h:
1024   case Intrinsic::loongarch_lasx_xvrepl128vei_b:
1025     return checkIntrinsicImmArg<4>(Op, 2, DAG);
1026   case Intrinsic::loongarch_lsx_vsrlni_b_h:
1027   case Intrinsic::loongarch_lsx_vsrani_b_h:
1028   case Intrinsic::loongarch_lsx_vsrlrni_b_h:
1029   case Intrinsic::loongarch_lsx_vsrarni_b_h:
1030   case Intrinsic::loongarch_lsx_vssrlni_b_h:
1031   case Intrinsic::loongarch_lsx_vssrani_b_h:
1032   case Intrinsic::loongarch_lsx_vssrlni_bu_h:
1033   case Intrinsic::loongarch_lsx_vssrani_bu_h:
1034   case Intrinsic::loongarch_lsx_vssrlrni_b_h:
1035   case Intrinsic::loongarch_lsx_vssrarni_b_h:
1036   case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
1037   case Intrinsic::loongarch_lsx_vssrarni_bu_h:
1038   case Intrinsic::loongarch_lasx_xvsrlni_b_h:
1039   case Intrinsic::loongarch_lasx_xvsrani_b_h:
1040   case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
1041   case Intrinsic::loongarch_lasx_xvsrarni_b_h:
1042   case Intrinsic::loongarch_lasx_xvssrlni_b_h:
1043   case Intrinsic::loongarch_lasx_xvssrani_b_h:
1044   case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
1045   case Intrinsic::loongarch_lasx_xvssrani_bu_h:
1046   case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
1047   case Intrinsic::loongarch_lasx_xvssrarni_b_h:
1048   case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
1049   case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
1050     return checkIntrinsicImmArg<4>(Op, 3, DAG);
1051   case Intrinsic::loongarch_lsx_vsat_w:
1052   case Intrinsic::loongarch_lsx_vsat_wu:
1053   case Intrinsic::loongarch_lsx_vrotri_w:
1054   case Intrinsic::loongarch_lsx_vsllwil_d_w:
1055   case Intrinsic::loongarch_lsx_vsllwil_du_wu:
1056   case Intrinsic::loongarch_lsx_vsrlri_w:
1057   case Intrinsic::loongarch_lsx_vsrari_w:
1058   case Intrinsic::loongarch_lsx_vslei_bu:
1059   case Intrinsic::loongarch_lsx_vslei_hu:
1060   case Intrinsic::loongarch_lsx_vslei_wu:
1061   case Intrinsic::loongarch_lsx_vslei_du:
1062   case Intrinsic::loongarch_lsx_vslti_bu:
1063   case Intrinsic::loongarch_lsx_vslti_hu:
1064   case Intrinsic::loongarch_lsx_vslti_wu:
1065   case Intrinsic::loongarch_lsx_vslti_du:
1066   case Intrinsic::loongarch_lsx_vbsll_v:
1067   case Intrinsic::loongarch_lsx_vbsrl_v:
1068   case Intrinsic::loongarch_lasx_xvsat_w:
1069   case Intrinsic::loongarch_lasx_xvsat_wu:
1070   case Intrinsic::loongarch_lasx_xvrotri_w:
1071   case Intrinsic::loongarch_lasx_xvsllwil_d_w:
1072   case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
1073   case Intrinsic::loongarch_lasx_xvsrlri_w:
1074   case Intrinsic::loongarch_lasx_xvsrari_w:
1075   case Intrinsic::loongarch_lasx_xvslei_bu:
1076   case Intrinsic::loongarch_lasx_xvslei_hu:
1077   case Intrinsic::loongarch_lasx_xvslei_wu:
1078   case Intrinsic::loongarch_lasx_xvslei_du:
1079   case Intrinsic::loongarch_lasx_xvslti_bu:
1080   case Intrinsic::loongarch_lasx_xvslti_hu:
1081   case Intrinsic::loongarch_lasx_xvslti_wu:
1082   case Intrinsic::loongarch_lasx_xvslti_du:
1083   case Intrinsic::loongarch_lasx_xvbsll_v:
1084   case Intrinsic::loongarch_lasx_xvbsrl_v:
1085     return checkIntrinsicImmArg<5>(Op, 2, DAG);
1086   case Intrinsic::loongarch_lsx_vseqi_b:
1087   case Intrinsic::loongarch_lsx_vseqi_h:
1088   case Intrinsic::loongarch_lsx_vseqi_w:
1089   case Intrinsic::loongarch_lsx_vseqi_d:
1090   case Intrinsic::loongarch_lsx_vslei_b:
1091   case Intrinsic::loongarch_lsx_vslei_h:
1092   case Intrinsic::loongarch_lsx_vslei_w:
1093   case Intrinsic::loongarch_lsx_vslei_d:
1094   case Intrinsic::loongarch_lsx_vslti_b:
1095   case Intrinsic::loongarch_lsx_vslti_h:
1096   case Intrinsic::loongarch_lsx_vslti_w:
1097   case Intrinsic::loongarch_lsx_vslti_d:
1098   case Intrinsic::loongarch_lasx_xvseqi_b:
1099   case Intrinsic::loongarch_lasx_xvseqi_h:
1100   case Intrinsic::loongarch_lasx_xvseqi_w:
1101   case Intrinsic::loongarch_lasx_xvseqi_d:
1102   case Intrinsic::loongarch_lasx_xvslei_b:
1103   case Intrinsic::loongarch_lasx_xvslei_h:
1104   case Intrinsic::loongarch_lasx_xvslei_w:
1105   case Intrinsic::loongarch_lasx_xvslei_d:
1106   case Intrinsic::loongarch_lasx_xvslti_b:
1107   case Intrinsic::loongarch_lasx_xvslti_h:
1108   case Intrinsic::loongarch_lasx_xvslti_w:
1109   case Intrinsic::loongarch_lasx_xvslti_d:
1110     return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
1111   case Intrinsic::loongarch_lsx_vsrlni_h_w:
1112   case Intrinsic::loongarch_lsx_vsrani_h_w:
1113   case Intrinsic::loongarch_lsx_vsrlrni_h_w:
1114   case Intrinsic::loongarch_lsx_vsrarni_h_w:
1115   case Intrinsic::loongarch_lsx_vssrlni_h_w:
1116   case Intrinsic::loongarch_lsx_vssrani_h_w:
1117   case Intrinsic::loongarch_lsx_vssrlni_hu_w:
1118   case Intrinsic::loongarch_lsx_vssrani_hu_w:
1119   case Intrinsic::loongarch_lsx_vssrlrni_h_w:
1120   case Intrinsic::loongarch_lsx_vssrarni_h_w:
1121   case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
1122   case Intrinsic::loongarch_lsx_vssrarni_hu_w:
1123   case Intrinsic::loongarch_lsx_vfrstpi_b:
1124   case Intrinsic::loongarch_lsx_vfrstpi_h:
1125   case Intrinsic::loongarch_lasx_xvsrlni_h_w:
1126   case Intrinsic::loongarch_lasx_xvsrani_h_w:
1127   case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
1128   case Intrinsic::loongarch_lasx_xvsrarni_h_w:
1129   case Intrinsic::loongarch_lasx_xvssrlni_h_w:
1130   case Intrinsic::loongarch_lasx_xvssrani_h_w:
1131   case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
1132   case Intrinsic::loongarch_lasx_xvssrani_hu_w:
1133   case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
1134   case Intrinsic::loongarch_lasx_xvssrarni_h_w:
1135   case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
1136   case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
1137   case Intrinsic::loongarch_lasx_xvfrstpi_b:
1138   case Intrinsic::loongarch_lasx_xvfrstpi_h:
1139     return checkIntrinsicImmArg<5>(Op, 3, DAG);
1140   case Intrinsic::loongarch_lsx_vsat_d:
1141   case Intrinsic::loongarch_lsx_vsat_du:
1142   case Intrinsic::loongarch_lsx_vrotri_d:
1143   case Intrinsic::loongarch_lsx_vsrlri_d:
1144   case Intrinsic::loongarch_lsx_vsrari_d:
1145   case Intrinsic::loongarch_lasx_xvsat_d:
1146   case Intrinsic::loongarch_lasx_xvsat_du:
1147   case Intrinsic::loongarch_lasx_xvrotri_d:
1148   case Intrinsic::loongarch_lasx_xvsrlri_d:
1149   case Intrinsic::loongarch_lasx_xvsrari_d:
1150     return checkIntrinsicImmArg<6>(Op, 2, DAG);
1151   case Intrinsic::loongarch_lsx_vsrlni_w_d:
1152   case Intrinsic::loongarch_lsx_vsrani_w_d:
1153   case Intrinsic::loongarch_lsx_vsrlrni_w_d:
1154   case Intrinsic::loongarch_lsx_vsrarni_w_d:
1155   case Intrinsic::loongarch_lsx_vssrlni_w_d:
1156   case Intrinsic::loongarch_lsx_vssrani_w_d:
1157   case Intrinsic::loongarch_lsx_vssrlni_wu_d:
1158   case Intrinsic::loongarch_lsx_vssrani_wu_d:
1159   case Intrinsic::loongarch_lsx_vssrlrni_w_d:
1160   case Intrinsic::loongarch_lsx_vssrarni_w_d:
1161   case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
1162   case Intrinsic::loongarch_lsx_vssrarni_wu_d:
1163   case Intrinsic::loongarch_lasx_xvsrlni_w_d:
1164   case Intrinsic::loongarch_lasx_xvsrani_w_d:
1165   case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
1166   case Intrinsic::loongarch_lasx_xvsrarni_w_d:
1167   case Intrinsic::loongarch_lasx_xvssrlni_w_d:
1168   case Intrinsic::loongarch_lasx_xvssrani_w_d:
1169   case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
1170   case Intrinsic::loongarch_lasx_xvssrani_wu_d:
1171   case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
1172   case Intrinsic::loongarch_lasx_xvssrarni_w_d:
1173   case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
1174   case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
1175     return checkIntrinsicImmArg<6>(Op, 3, DAG);
1176   case Intrinsic::loongarch_lsx_vsrlni_d_q:
1177   case Intrinsic::loongarch_lsx_vsrani_d_q:
1178   case Intrinsic::loongarch_lsx_vsrlrni_d_q:
1179   case Intrinsic::loongarch_lsx_vsrarni_d_q:
1180   case Intrinsic::loongarch_lsx_vssrlni_d_q:
1181   case Intrinsic::loongarch_lsx_vssrani_d_q:
1182   case Intrinsic::loongarch_lsx_vssrlni_du_q:
1183   case Intrinsic::loongarch_lsx_vssrani_du_q:
1184   case Intrinsic::loongarch_lsx_vssrlrni_d_q:
1185   case Intrinsic::loongarch_lsx_vssrarni_d_q:
1186   case Intrinsic::loongarch_lsx_vssrlrni_du_q:
1187   case Intrinsic::loongarch_lsx_vssrarni_du_q:
1188   case Intrinsic::loongarch_lasx_xvsrlni_d_q:
1189   case Intrinsic::loongarch_lasx_xvsrani_d_q:
1190   case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
1191   case Intrinsic::loongarch_lasx_xvsrarni_d_q:
1192   case Intrinsic::loongarch_lasx_xvssrlni_d_q:
1193   case Intrinsic::loongarch_lasx_xvssrani_d_q:
1194   case Intrinsic::loongarch_lasx_xvssrlni_du_q:
1195   case Intrinsic::loongarch_lasx_xvssrani_du_q:
1196   case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
1197   case Intrinsic::loongarch_lasx_xvssrarni_d_q:
1198   case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
1199   case Intrinsic::loongarch_lasx_xvssrarni_du_q:
1200     return checkIntrinsicImmArg<7>(Op, 3, DAG);
1201   case Intrinsic::loongarch_lsx_vnori_b:
1202   case Intrinsic::loongarch_lsx_vshuf4i_b:
1203   case Intrinsic::loongarch_lsx_vshuf4i_h:
1204   case Intrinsic::loongarch_lsx_vshuf4i_w:
1205   case Intrinsic::loongarch_lasx_xvnori_b:
1206   case Intrinsic::loongarch_lasx_xvshuf4i_b:
1207   case Intrinsic::loongarch_lasx_xvshuf4i_h:
1208   case Intrinsic::loongarch_lasx_xvshuf4i_w:
1209   case Intrinsic::loongarch_lasx_xvpermi_d:
1210     return checkIntrinsicImmArg<8>(Op, 2, DAG);
1211   case Intrinsic::loongarch_lsx_vshuf4i_d:
1212   case Intrinsic::loongarch_lsx_vpermi_w:
1213   case Intrinsic::loongarch_lsx_vbitseli_b:
1214   case Intrinsic::loongarch_lsx_vextrins_b:
1215   case Intrinsic::loongarch_lsx_vextrins_h:
1216   case Intrinsic::loongarch_lsx_vextrins_w:
1217   case Intrinsic::loongarch_lsx_vextrins_d:
1218   case Intrinsic::loongarch_lasx_xvshuf4i_d:
1219   case Intrinsic::loongarch_lasx_xvpermi_w:
1220   case Intrinsic::loongarch_lasx_xvpermi_q:
1221   case Intrinsic::loongarch_lasx_xvbitseli_b:
1222   case Intrinsic::loongarch_lasx_xvextrins_b:
1223   case Intrinsic::loongarch_lasx_xvextrins_h:
1224   case Intrinsic::loongarch_lasx_xvextrins_w:
1225   case Intrinsic::loongarch_lasx_xvextrins_d:
1226     return checkIntrinsicImmArg<8>(Op, 3, DAG);
1227   case Intrinsic::loongarch_lsx_vrepli_b:
1228   case Intrinsic::loongarch_lsx_vrepli_h:
1229   case Intrinsic::loongarch_lsx_vrepli_w:
1230   case Intrinsic::loongarch_lsx_vrepli_d:
1231   case Intrinsic::loongarch_lasx_xvrepli_b:
1232   case Intrinsic::loongarch_lasx_xvrepli_h:
1233   case Intrinsic::loongarch_lasx_xvrepli_w:
1234   case Intrinsic::loongarch_lasx_xvrepli_d:
1235     return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
1236   case Intrinsic::loongarch_lsx_vldi:
1237   case Intrinsic::loongarch_lasx_xvldi:
1238     return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
1239   }
1240 }
1241 
1242 // Helper function that emits error message for intrinsics with chain and return
1243 // merge values of a UNDEF and the chain.
1244 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
1245                                                   StringRef ErrorMsg,
1246                                                   SelectionDAG &DAG) {
1247   DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1248   return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
1249                             SDLoc(Op));
1250 }
1251 
1252 SDValue
1253 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
1254                                                 SelectionDAG &DAG) const {
1255   SDLoc DL(Op);
1256   MVT GRLenVT = Subtarget.getGRLenVT();
1257   EVT VT = Op.getValueType();
1258   SDValue Chain = Op.getOperand(0);
1259   const StringRef ErrorMsgOOR = "argument out of range";
1260   const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1261   const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1262 
1263   switch (Op.getConstantOperandVal(1)) {
1264   default:
1265     return Op;
1266   case Intrinsic::loongarch_crc_w_b_w:
1267   case Intrinsic::loongarch_crc_w_h_w:
1268   case Intrinsic::loongarch_crc_w_w_w:
1269   case Intrinsic::loongarch_crc_w_d_w:
1270   case Intrinsic::loongarch_crcc_w_b_w:
1271   case Intrinsic::loongarch_crcc_w_h_w:
1272   case Intrinsic::loongarch_crcc_w_w_w:
1273   case Intrinsic::loongarch_crcc_w_d_w:
1274     return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
1275   case Intrinsic::loongarch_csrrd_w:
1276   case Intrinsic::loongarch_csrrd_d: {
1277     unsigned Imm = Op.getConstantOperandVal(2);
1278     return !isUInt<14>(Imm)
1279                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1280                : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1281                              {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1282   }
1283   case Intrinsic::loongarch_csrwr_w:
1284   case Intrinsic::loongarch_csrwr_d: {
1285     unsigned Imm = Op.getConstantOperandVal(3);
1286     return !isUInt<14>(Imm)
1287                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1288                : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1289                              {Chain, Op.getOperand(2),
1290                               DAG.getConstant(Imm, DL, GRLenVT)});
1291   }
1292   case Intrinsic::loongarch_csrxchg_w:
1293   case Intrinsic::loongarch_csrxchg_d: {
1294     unsigned Imm = Op.getConstantOperandVal(4);
1295     return !isUInt<14>(Imm)
1296                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1297                : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1298                              {Chain, Op.getOperand(2), Op.getOperand(3),
1299                               DAG.getConstant(Imm, DL, GRLenVT)});
1300   }
1301   case Intrinsic::loongarch_iocsrrd_d: {
1302     return DAG.getNode(
1303         LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
1304         {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
1305   }
1306 #define IOCSRRD_CASE(NAME, NODE)                                               \
1307   case Intrinsic::loongarch_##NAME: {                                          \
1308     return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other},          \
1309                        {Chain, Op.getOperand(2)});                             \
1310   }
1311     IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1312     IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1313     IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1314 #undef IOCSRRD_CASE
1315   case Intrinsic::loongarch_cpucfg: {
1316     return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
1317                        {Chain, Op.getOperand(2)});
1318   }
1319   case Intrinsic::loongarch_lddir_d: {
1320     unsigned Imm = Op.getConstantOperandVal(3);
1321     return !isUInt<8>(Imm)
1322                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1323                : Op;
1324   }
1325   case Intrinsic::loongarch_movfcsr2gr: {
1326     if (!Subtarget.hasBasicF())
1327       return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
1328     unsigned Imm = Op.getConstantOperandVal(2);
1329     return !isUInt<2>(Imm)
1330                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1331                : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
1332                              {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1333   }
1334   case Intrinsic::loongarch_lsx_vld:
1335   case Intrinsic::loongarch_lsx_vldrepl_b:
1336   case Intrinsic::loongarch_lasx_xvld:
1337   case Intrinsic::loongarch_lasx_xvldrepl_b:
1338     return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1339                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1340                : SDValue();
1341   case Intrinsic::loongarch_lsx_vldrepl_h:
1342   case Intrinsic::loongarch_lasx_xvldrepl_h:
1343     return !isShiftedInt<11, 1>(
1344                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1345                ? emitIntrinsicWithChainErrorMessage(
1346                      Op, "argument out of range or not a multiple of 2", DAG)
1347                : SDValue();
1348   case Intrinsic::loongarch_lsx_vldrepl_w:
1349   case Intrinsic::loongarch_lasx_xvldrepl_w:
1350     return !isShiftedInt<10, 2>(
1351                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1352                ? emitIntrinsicWithChainErrorMessage(
1353                      Op, "argument out of range or not a multiple of 4", DAG)
1354                : SDValue();
1355   case Intrinsic::loongarch_lsx_vldrepl_d:
1356   case Intrinsic::loongarch_lasx_xvldrepl_d:
1357     return !isShiftedInt<9, 3>(
1358                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1359                ? emitIntrinsicWithChainErrorMessage(
1360                      Op, "argument out of range or not a multiple of 8", DAG)
1361                : SDValue();
1362   }
1363 }
1364 
1365 // Helper function that emits error message for intrinsics with void return
1366 // value and return the chain.
1367 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
1368                                          SelectionDAG &DAG) {
1369 
1370   DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1371   return Op.getOperand(0);
1372 }
1373 
1374 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
1375                                                      SelectionDAG &DAG) const {
1376   SDLoc DL(Op);
1377   MVT GRLenVT = Subtarget.getGRLenVT();
1378   SDValue Chain = Op.getOperand(0);
1379   uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
1380   SDValue Op2 = Op.getOperand(2);
1381   const StringRef ErrorMsgOOR = "argument out of range";
1382   const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1383   const StringRef ErrorMsgReqLA32 = "requires loongarch32";
1384   const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1385 
1386   switch (IntrinsicEnum) {
1387   default:
1388     // TODO: Add more Intrinsics.
1389     return SDValue();
1390   case Intrinsic::loongarch_cacop_d:
1391   case Intrinsic::loongarch_cacop_w: {
1392     if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
1393       return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
1394     if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
1395       return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
1396     // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
1397     unsigned Imm1 = Op2->getAsZExtVal();
1398     int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
1399     if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
1400       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
1401     return Op;
1402   }
1403   case Intrinsic::loongarch_dbar: {
1404     unsigned Imm = Op2->getAsZExtVal();
1405     return !isUInt<15>(Imm)
1406                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1407                : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
1408                              DAG.getConstant(Imm, DL, GRLenVT));
1409   }
1410   case Intrinsic::loongarch_ibar: {
1411     unsigned Imm = Op2->getAsZExtVal();
1412     return !isUInt<15>(Imm)
1413                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1414                : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
1415                              DAG.getConstant(Imm, DL, GRLenVT));
1416   }
1417   case Intrinsic::loongarch_break: {
1418     unsigned Imm = Op2->getAsZExtVal();
1419     return !isUInt<15>(Imm)
1420                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1421                : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
1422                              DAG.getConstant(Imm, DL, GRLenVT));
1423   }
1424   case Intrinsic::loongarch_movgr2fcsr: {
1425     if (!Subtarget.hasBasicF())
1426       return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
1427     unsigned Imm = Op2->getAsZExtVal();
1428     return !isUInt<2>(Imm)
1429                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1430                : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
1431                              DAG.getConstant(Imm, DL, GRLenVT),
1432                              DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
1433                                          Op.getOperand(3)));
1434   }
1435   case Intrinsic::loongarch_syscall: {
1436     unsigned Imm = Op2->getAsZExtVal();
1437     return !isUInt<15>(Imm)
1438                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1439                : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
1440                              DAG.getConstant(Imm, DL, GRLenVT));
1441   }
1442 #define IOCSRWR_CASE(NAME, NODE)                                               \
1443   case Intrinsic::loongarch_##NAME: {                                          \
1444     SDValue Op3 = Op.getOperand(3);                                            \
1445     return Subtarget.is64Bit()                                                 \
1446                ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain,        \
1447                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),  \
1448                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3))  \
1449                : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2,   \
1450                              Op3);                                             \
1451   }
1452     IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
1453     IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
1454     IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
1455 #undef IOCSRWR_CASE
1456   case Intrinsic::loongarch_iocsrwr_d: {
1457     return !Subtarget.is64Bit()
1458                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1459                : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
1460                              Op2,
1461                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1462                                          Op.getOperand(3)));
1463   }
1464 #define ASRT_LE_GT_CASE(NAME)                                                  \
1465   case Intrinsic::loongarch_##NAME: {                                          \
1466     return !Subtarget.is64Bit()                                                \
1467                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)           \
1468                : Op;                                                           \
1469   }
1470     ASRT_LE_GT_CASE(asrtle_d)
1471     ASRT_LE_GT_CASE(asrtgt_d)
1472 #undef ASRT_LE_GT_CASE
1473   case Intrinsic::loongarch_ldpte_d: {
1474     unsigned Imm = Op.getConstantOperandVal(3);
1475     return !Subtarget.is64Bit()
1476                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1477            : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1478                              : Op;
1479   }
1480   case Intrinsic::loongarch_lsx_vst:
1481   case Intrinsic::loongarch_lasx_xvst:
1482     return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
1483                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1484                : SDValue();
1485   case Intrinsic::loongarch_lasx_xvstelm_b:
1486     return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1487             !isUInt<5>(Op.getConstantOperandVal(5)))
1488                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1489                : SDValue();
1490   case Intrinsic::loongarch_lsx_vstelm_b:
1491     return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1492             !isUInt<4>(Op.getConstantOperandVal(5)))
1493                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1494                : SDValue();
1495   case Intrinsic::loongarch_lasx_xvstelm_h:
1496     return (!isShiftedInt<8, 1>(
1497                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1498             !isUInt<4>(Op.getConstantOperandVal(5)))
1499                ? emitIntrinsicErrorMessage(
1500                      Op, "argument out of range or not a multiple of 2", DAG)
1501                : SDValue();
1502   case Intrinsic::loongarch_lsx_vstelm_h:
1503     return (!isShiftedInt<8, 1>(
1504                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1505             !isUInt<3>(Op.getConstantOperandVal(5)))
1506                ? emitIntrinsicErrorMessage(
1507                      Op, "argument out of range or not a multiple of 2", DAG)
1508                : SDValue();
1509   case Intrinsic::loongarch_lasx_xvstelm_w:
1510     return (!isShiftedInt<8, 2>(
1511                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1512             !isUInt<3>(Op.getConstantOperandVal(5)))
1513                ? emitIntrinsicErrorMessage(
1514                      Op, "argument out of range or not a multiple of 4", DAG)
1515                : SDValue();
1516   case Intrinsic::loongarch_lsx_vstelm_w:
1517     return (!isShiftedInt<8, 2>(
1518                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1519             !isUInt<2>(Op.getConstantOperandVal(5)))
1520                ? emitIntrinsicErrorMessage(
1521                      Op, "argument out of range or not a multiple of 4", DAG)
1522                : SDValue();
1523   case Intrinsic::loongarch_lasx_xvstelm_d:
1524     return (!isShiftedInt<8, 3>(
1525                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1526             !isUInt<2>(Op.getConstantOperandVal(5)))
1527                ? emitIntrinsicErrorMessage(
1528                      Op, "argument out of range or not a multiple of 8", DAG)
1529                : SDValue();
1530   case Intrinsic::loongarch_lsx_vstelm_d:
1531     return (!isShiftedInt<8, 3>(
1532                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1533             !isUInt<1>(Op.getConstantOperandVal(5)))
1534                ? emitIntrinsicErrorMessage(
1535                      Op, "argument out of range or not a multiple of 8", DAG)
1536                : SDValue();
1537   }
1538 }
1539 
1540 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
1541                                                      SelectionDAG &DAG) const {
1542   SDLoc DL(Op);
1543   SDValue Lo = Op.getOperand(0);
1544   SDValue Hi = Op.getOperand(1);
1545   SDValue Shamt = Op.getOperand(2);
1546   EVT VT = Lo.getValueType();
1547 
1548   // if Shamt-GRLen < 0: // Shamt < GRLen
1549   //   Lo = Lo << Shamt
1550   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
1551   // else:
1552   //   Lo = 0
1553   //   Hi = Lo << (Shamt-GRLen)
1554 
1555   SDValue Zero = DAG.getConstant(0, DL, VT);
1556   SDValue One = DAG.getConstant(1, DL, VT);
1557   SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1558   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1559   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1560   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1561 
1562   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1563   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1564   SDValue ShiftRightLo =
1565       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
1566   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1567   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1568   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
1569 
1570   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1571 
1572   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1573   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1574 
1575   SDValue Parts[2] = {Lo, Hi};
1576   return DAG.getMergeValues(Parts, DL);
1577 }
1578 
1579 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
1580                                                       SelectionDAG &DAG,
1581                                                       bool IsSRA) const {
1582   SDLoc DL(Op);
1583   SDValue Lo = Op.getOperand(0);
1584   SDValue Hi = Op.getOperand(1);
1585   SDValue Shamt = Op.getOperand(2);
1586   EVT VT = Lo.getValueType();
1587 
1588   // SRA expansion:
1589   //   if Shamt-GRLen < 0: // Shamt < GRLen
1590   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1591   //     Hi = Hi >>s Shamt
1592   //   else:
1593   //     Lo = Hi >>s (Shamt-GRLen);
1594   //     Hi = Hi >>s (GRLen-1)
1595   //
1596   // SRL expansion:
1597   //   if Shamt-GRLen < 0: // Shamt < GRLen
1598   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1599   //     Hi = Hi >>u Shamt
1600   //   else:
1601   //     Lo = Hi >>u (Shamt-GRLen);
1602   //     Hi = 0;
1603 
1604   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1605 
1606   SDValue Zero = DAG.getConstant(0, DL, VT);
1607   SDValue One = DAG.getConstant(1, DL, VT);
1608   SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1609   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1610   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1611   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1612 
1613   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1614   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1615   SDValue ShiftLeftHi =
1616       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
1617   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1618   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1619   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
1620   SDValue HiFalse =
1621       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
1622 
1623   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1624 
1625   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1626   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1627 
1628   SDValue Parts[2] = {Lo, Hi};
1629   return DAG.getMergeValues(Parts, DL);
1630 }
1631 
1632 // Returns the opcode of the target-specific SDNode that implements the 32-bit
1633 // form of the given Opcode.
1634 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
1635   switch (Opcode) {
1636   default:
1637     llvm_unreachable("Unexpected opcode");
1638   case ISD::SHL:
1639     return LoongArchISD::SLL_W;
1640   case ISD::SRA:
1641     return LoongArchISD::SRA_W;
1642   case ISD::SRL:
1643     return LoongArchISD::SRL_W;
1644   case ISD::ROTR:
1645     return LoongArchISD::ROTR_W;
1646   case ISD::ROTL:
1647     return LoongArchISD::ROTL_W;
1648   case ISD::CTTZ:
1649     return LoongArchISD::CTZ_W;
1650   case ISD::CTLZ:
1651     return LoongArchISD::CLZ_W;
1652   }
1653 }
1654 
1655 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
1656 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
1657 // otherwise be promoted to i64, making it difficult to select the
1658 // SLL_W/.../*W later one because the fact the operation was originally of
1659 // type i8/i16/i32 is lost.
1660 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
1661                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
1662   SDLoc DL(N);
1663   LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
1664   SDValue NewOp0, NewRes;
1665 
1666   switch (NumOp) {
1667   default:
1668     llvm_unreachable("Unexpected NumOp");
1669   case 1: {
1670     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1671     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1672     break;
1673   }
1674   case 2: {
1675     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1676     SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1677     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1678     break;
1679   }
1680     // TODO:Handle more NumOp.
1681   }
1682 
1683   // ReplaceNodeResults requires we maintain the same type for the return
1684   // value.
1685   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1686 }
1687 
1688 // Helper function that emits error message for intrinsics with/without chain
1689 // and return a UNDEF or and the chain as the results.
1690 static void emitErrorAndReplaceIntrinsicResults(
1691     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
1692     StringRef ErrorMsg, bool WithChain = true) {
1693   DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
1694   Results.push_back(DAG.getUNDEF(N->getValueType(0)));
1695   if (!WithChain)
1696     return;
1697   Results.push_back(N->getOperand(0));
1698 }
1699 
1700 template <unsigned N>
1701 static void
1702 replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
1703                          SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
1704                          unsigned ResOp) {
1705   const StringRef ErrorMsgOOR = "argument out of range";
1706   unsigned Imm = Node->getConstantOperandVal(2);
1707   if (!isUInt<N>(Imm)) {
1708     emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,
1709                                         /*WithChain=*/false);
1710     return;
1711   }
1712   SDLoc DL(Node);
1713   SDValue Vec = Node->getOperand(1);
1714 
1715   SDValue PickElt =
1716       DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
1717                   DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
1718                   DAG.getValueType(Vec.getValueType().getVectorElementType()));
1719   Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
1720                                 PickElt.getValue(0)));
1721 }
1722 
1723 static void replaceVecCondBranchResults(SDNode *N,
1724                                         SmallVectorImpl<SDValue> &Results,
1725                                         SelectionDAG &DAG,
1726                                         const LoongArchSubtarget &Subtarget,
1727                                         unsigned ResOp) {
1728   SDLoc DL(N);
1729   SDValue Vec = N->getOperand(1);
1730 
1731   SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
1732   Results.push_back(
1733       DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
1734 }
1735 
1736 static void
1737 replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1738                                  SelectionDAG &DAG,
1739                                  const LoongArchSubtarget &Subtarget) {
1740   switch (N->getConstantOperandVal(0)) {
1741   default:
1742     llvm_unreachable("Unexpected Intrinsic.");
1743   case Intrinsic::loongarch_lsx_vpickve2gr_b:
1744     replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1745                                 LoongArchISD::VPICK_SEXT_ELT);
1746     break;
1747   case Intrinsic::loongarch_lsx_vpickve2gr_h:
1748   case Intrinsic::loongarch_lasx_xvpickve2gr_w:
1749     replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1750                                 LoongArchISD::VPICK_SEXT_ELT);
1751     break;
1752   case Intrinsic::loongarch_lsx_vpickve2gr_w:
1753     replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1754                                 LoongArchISD::VPICK_SEXT_ELT);
1755     break;
1756   case Intrinsic::loongarch_lsx_vpickve2gr_bu:
1757     replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1758                                 LoongArchISD::VPICK_ZEXT_ELT);
1759     break;
1760   case Intrinsic::loongarch_lsx_vpickve2gr_hu:
1761   case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
1762     replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1763                                 LoongArchISD::VPICK_ZEXT_ELT);
1764     break;
1765   case Intrinsic::loongarch_lsx_vpickve2gr_wu:
1766     replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1767                                 LoongArchISD::VPICK_ZEXT_ELT);
1768     break;
1769   case Intrinsic::loongarch_lsx_bz_b:
1770   case Intrinsic::loongarch_lsx_bz_h:
1771   case Intrinsic::loongarch_lsx_bz_w:
1772   case Intrinsic::loongarch_lsx_bz_d:
1773   case Intrinsic::loongarch_lasx_xbz_b:
1774   case Intrinsic::loongarch_lasx_xbz_h:
1775   case Intrinsic::loongarch_lasx_xbz_w:
1776   case Intrinsic::loongarch_lasx_xbz_d:
1777     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1778                                 LoongArchISD::VALL_ZERO);
1779     break;
1780   case Intrinsic::loongarch_lsx_bz_v:
1781   case Intrinsic::loongarch_lasx_xbz_v:
1782     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1783                                 LoongArchISD::VANY_ZERO);
1784     break;
1785   case Intrinsic::loongarch_lsx_bnz_b:
1786   case Intrinsic::loongarch_lsx_bnz_h:
1787   case Intrinsic::loongarch_lsx_bnz_w:
1788   case Intrinsic::loongarch_lsx_bnz_d:
1789   case Intrinsic::loongarch_lasx_xbnz_b:
1790   case Intrinsic::loongarch_lasx_xbnz_h:
1791   case Intrinsic::loongarch_lasx_xbnz_w:
1792   case Intrinsic::loongarch_lasx_xbnz_d:
1793     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1794                                 LoongArchISD::VALL_NONZERO);
1795     break;
1796   case Intrinsic::loongarch_lsx_bnz_v:
1797   case Intrinsic::loongarch_lasx_xbnz_v:
1798     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1799                                 LoongArchISD::VANY_NONZERO);
1800     break;
1801   }
1802 }
1803 
1804 void LoongArchTargetLowering::ReplaceNodeResults(
1805     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1806   SDLoc DL(N);
1807   EVT VT = N->getValueType(0);
1808   switch (N->getOpcode()) {
1809   default:
1810     llvm_unreachable("Don't know how to legalize this operation");
1811   case ISD::SHL:
1812   case ISD::SRA:
1813   case ISD::SRL:
1814   case ISD::ROTR:
1815     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1816            "Unexpected custom legalisation");
1817     if (N->getOperand(1).getOpcode() != ISD::Constant) {
1818       Results.push_back(customLegalizeToWOp(N, DAG, 2));
1819       break;
1820     }
1821     break;
1822   case ISD::ROTL:
1823     ConstantSDNode *CN;
1824     if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
1825       Results.push_back(customLegalizeToWOp(N, DAG, 2));
1826       break;
1827     }
1828     break;
1829   case ISD::FP_TO_SINT: {
1830     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1831            "Unexpected custom legalisation");
1832     SDValue Src = N->getOperand(0);
1833     EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
1834     if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
1835         TargetLowering::TypeSoftenFloat) {
1836       SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
1837       Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
1838       return;
1839     }
1840     // If the FP type needs to be softened, emit a library call using the 'si'
1841     // version. If we left it to default legalization we'd end up with 'di'.
1842     RTLIB::Libcall LC;
1843     LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
1844     MakeLibCallOptions CallOptions;
1845     EVT OpVT = Src.getValueType();
1846     CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
1847     SDValue Chain = SDValue();
1848     SDValue Result;
1849     std::tie(Result, Chain) =
1850         makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
1851     Results.push_back(Result);
1852     break;
1853   }
1854   case ISD::BITCAST: {
1855     SDValue Src = N->getOperand(0);
1856     EVT SrcVT = Src.getValueType();
1857     if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1858         Subtarget.hasBasicF()) {
1859       SDValue Dst =
1860           DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1861       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1862     }
1863     break;
1864   }
1865   case ISD::FP_TO_UINT: {
1866     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1867            "Unexpected custom legalisation");
1868     auto &TLI = DAG.getTargetLoweringInfo();
1869     SDValue Tmp1, Tmp2;
1870     TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
1871     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1872     break;
1873   }
1874   case ISD::BSWAP: {
1875     SDValue Src = N->getOperand(0);
1876     assert((VT == MVT::i16 || VT == MVT::i32) &&
1877            "Unexpected custom legalization");
1878     MVT GRLenVT = Subtarget.getGRLenVT();
1879     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1880     SDValue Tmp;
1881     switch (VT.getSizeInBits()) {
1882     default:
1883       llvm_unreachable("Unexpected operand width");
1884     case 16:
1885       Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
1886       break;
1887     case 32:
1888       // Only LA64 will get to here due to the size mismatch between VT and
1889       // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1890       Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
1891       break;
1892     }
1893     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1894     break;
1895   }
1896   case ISD::BITREVERSE: {
1897     SDValue Src = N->getOperand(0);
1898     assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1899            "Unexpected custom legalization");
1900     MVT GRLenVT = Subtarget.getGRLenVT();
1901     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1902     SDValue Tmp;
1903     switch (VT.getSizeInBits()) {
1904     default:
1905       llvm_unreachable("Unexpected operand width");
1906     case 8:
1907       Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
1908       break;
1909     case 32:
1910       Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
1911       break;
1912     }
1913     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1914     break;
1915   }
1916   case ISD::CTLZ:
1917   case ISD::CTTZ: {
1918     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1919            "Unexpected custom legalisation");
1920     Results.push_back(customLegalizeToWOp(N, DAG, 1));
1921     break;
1922   }
1923   case ISD::INTRINSIC_W_CHAIN: {
1924     SDValue Chain = N->getOperand(0);
1925     SDValue Op2 = N->getOperand(2);
1926     MVT GRLenVT = Subtarget.getGRLenVT();
1927     const StringRef ErrorMsgOOR = "argument out of range";
1928     const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1929     const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1930 
1931     switch (N->getConstantOperandVal(1)) {
1932     default:
1933       llvm_unreachable("Unexpected Intrinsic.");
1934     case Intrinsic::loongarch_movfcsr2gr: {
1935       if (!Subtarget.hasBasicF()) {
1936         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
1937         return;
1938       }
1939       unsigned Imm = Op2->getAsZExtVal();
1940       if (!isUInt<2>(Imm)) {
1941         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1942         return;
1943       }
1944       SDValue MOVFCSR2GRResults = DAG.getNode(
1945           LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
1946           {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1947       Results.push_back(
1948           DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
1949       Results.push_back(MOVFCSR2GRResults.getValue(1));
1950       break;
1951     }
1952 #define CRC_CASE_EXT_BINARYOP(NAME, NODE)                                      \
1953   case Intrinsic::loongarch_##NAME: {                                          \
1954     SDValue NODE = DAG.getNode(                                                \
1955         LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
1956         {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),               \
1957          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
1958     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
1959     Results.push_back(NODE.getValue(1));                                       \
1960     break;                                                                     \
1961   }
1962       CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
1963       CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
1964       CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
1965       CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
1966       CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
1967       CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
1968 #undef CRC_CASE_EXT_BINARYOP
1969 
1970 #define CRC_CASE_EXT_UNARYOP(NAME, NODE)                                       \
1971   case Intrinsic::loongarch_##NAME: {                                          \
1972     SDValue NODE = DAG.getNode(                                                \
1973         LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
1974         {Chain, Op2,                                                           \
1975          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
1976     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
1977     Results.push_back(NODE.getValue(1));                                       \
1978     break;                                                                     \
1979   }
1980       CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
1981       CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
1982 #undef CRC_CASE_EXT_UNARYOP
1983 #define CSR_CASE(ID)                                                           \
1984   case Intrinsic::loongarch_##ID: {                                            \
1985     if (!Subtarget.is64Bit())                                                  \
1986       emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);   \
1987     break;                                                                     \
1988   }
1989       CSR_CASE(csrrd_d);
1990       CSR_CASE(csrwr_d);
1991       CSR_CASE(csrxchg_d);
1992       CSR_CASE(iocsrrd_d);
1993 #undef CSR_CASE
1994     case Intrinsic::loongarch_csrrd_w: {
1995       unsigned Imm = Op2->getAsZExtVal();
1996       if (!isUInt<14>(Imm)) {
1997         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1998         return;
1999       }
2000       SDValue CSRRDResults =
2001           DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2002                       {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2003       Results.push_back(
2004           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
2005       Results.push_back(CSRRDResults.getValue(1));
2006       break;
2007     }
2008     case Intrinsic::loongarch_csrwr_w: {
2009       unsigned Imm = N->getConstantOperandVal(3);
2010       if (!isUInt<14>(Imm)) {
2011         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2012         return;
2013       }
2014       SDValue CSRWRResults =
2015           DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2016                       {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2017                        DAG.getConstant(Imm, DL, GRLenVT)});
2018       Results.push_back(
2019           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
2020       Results.push_back(CSRWRResults.getValue(1));
2021       break;
2022     }
2023     case Intrinsic::loongarch_csrxchg_w: {
2024       unsigned Imm = N->getConstantOperandVal(4);
2025       if (!isUInt<14>(Imm)) {
2026         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2027         return;
2028       }
2029       SDValue CSRXCHGResults = DAG.getNode(
2030           LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2031           {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2032            DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
2033            DAG.getConstant(Imm, DL, GRLenVT)});
2034       Results.push_back(
2035           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
2036       Results.push_back(CSRXCHGResults.getValue(1));
2037       break;
2038     }
2039 #define IOCSRRD_CASE(NAME, NODE)                                               \
2040   case Intrinsic::loongarch_##NAME: {                                          \
2041     SDValue IOCSRRDResults =                                                   \
2042         DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},            \
2043                     {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
2044     Results.push_back(                                                         \
2045         DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0)));       \
2046     Results.push_back(IOCSRRDResults.getValue(1));                             \
2047     break;                                                                     \
2048   }
2049       IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2050       IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2051       IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2052 #undef IOCSRRD_CASE
2053     case Intrinsic::loongarch_cpucfg: {
2054       SDValue CPUCFGResults =
2055           DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2056                       {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
2057       Results.push_back(
2058           DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
2059       Results.push_back(CPUCFGResults.getValue(1));
2060       break;
2061     }
2062     case Intrinsic::loongarch_lddir_d: {
2063       if (!Subtarget.is64Bit()) {
2064         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
2065         return;
2066       }
2067       break;
2068     }
2069     }
2070     break;
2071   }
2072   case ISD::READ_REGISTER: {
2073     if (Subtarget.is64Bit())
2074       DAG.getContext()->emitError(
2075           "On LA64, only 64-bit registers can be read.");
2076     else
2077       DAG.getContext()->emitError(
2078           "On LA32, only 32-bit registers can be read.");
2079     Results.push_back(DAG.getUNDEF(VT));
2080     Results.push_back(N->getOperand(0));
2081     break;
2082   }
2083   case ISD::INTRINSIC_WO_CHAIN: {
2084     replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
2085     break;
2086   }
2087   }
2088 }
2089 
2090 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
2091                                  TargetLowering::DAGCombinerInfo &DCI,
2092                                  const LoongArchSubtarget &Subtarget) {
2093   if (DCI.isBeforeLegalizeOps())
2094     return SDValue();
2095 
2096   SDValue FirstOperand = N->getOperand(0);
2097   SDValue SecondOperand = N->getOperand(1);
2098   unsigned FirstOperandOpc = FirstOperand.getOpcode();
2099   EVT ValTy = N->getValueType(0);
2100   SDLoc DL(N);
2101   uint64_t lsb, msb;
2102   unsigned SMIdx, SMLen;
2103   ConstantSDNode *CN;
2104   SDValue NewOperand;
2105   MVT GRLenVT = Subtarget.getGRLenVT();
2106 
2107   // Op's second operand must be a shifted mask.
2108   if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
2109       !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
2110     return SDValue();
2111 
2112   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
2113     // Pattern match BSTRPICK.
2114     //  $dst = and ((sra or srl) $src , lsb), (2**len - 1)
2115     //  => BSTRPICK $dst, $src, msb, lsb
2116     //  where msb = lsb + len - 1
2117 
2118     // The second operand of the shift must be an immediate.
2119     if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
2120       return SDValue();
2121 
2122     lsb = CN->getZExtValue();
2123 
2124     // Return if the shifted mask does not start at bit 0 or the sum of its
2125     // length and lsb exceeds the word's size.
2126     if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
2127       return SDValue();
2128 
2129     NewOperand = FirstOperand.getOperand(0);
2130   } else {
2131     // Pattern match BSTRPICK.
2132     //  $dst = and $src, (2**len- 1) , if len > 12
2133     //  => BSTRPICK $dst, $src, msb, lsb
2134     //  where lsb = 0 and msb = len - 1
2135 
2136     // If the mask is <= 0xfff, andi can be used instead.
2137     if (CN->getZExtValue() <= 0xfff)
2138       return SDValue();
2139 
2140     // Return if the MSB exceeds.
2141     if (SMIdx + SMLen > ValTy.getSizeInBits())
2142       return SDValue();
2143 
2144     if (SMIdx > 0) {
2145       // Omit if the constant has more than 2 uses. This a conservative
2146       // decision. Whether it is a win depends on the HW microarchitecture.
2147       // However it should always be better for 1 and 2 uses.
2148       if (CN->use_size() > 2)
2149         return SDValue();
2150       // Return if the constant can be composed by a single LU12I.W.
2151       if ((CN->getZExtValue() & 0xfff) == 0)
2152         return SDValue();
2153       // Return if the constand can be composed by a single ADDI with
2154       // the zero register.
2155       if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
2156         return SDValue();
2157     }
2158 
2159     lsb = SMIdx;
2160     NewOperand = FirstOperand;
2161   }
2162 
2163   msb = lsb + SMLen - 1;
2164   SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
2165                             DAG.getConstant(msb, DL, GRLenVT),
2166                             DAG.getConstant(lsb, DL, GRLenVT));
2167   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
2168     return NR0;
2169   // Try to optimize to
2170   //   bstrpick $Rd, $Rs, msb, lsb
2171   //   slli     $Rd, $Rd, lsb
2172   return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
2173                      DAG.getConstant(lsb, DL, GRLenVT));
2174 }
2175 
2176 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
2177                                  TargetLowering::DAGCombinerInfo &DCI,
2178                                  const LoongArchSubtarget &Subtarget) {
2179   if (DCI.isBeforeLegalizeOps())
2180     return SDValue();
2181 
2182   // $dst = srl (and $src, Mask), Shamt
2183   // =>
2184   // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
2185   // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
2186   //
2187 
2188   SDValue FirstOperand = N->getOperand(0);
2189   ConstantSDNode *CN;
2190   EVT ValTy = N->getValueType(0);
2191   SDLoc DL(N);
2192   MVT GRLenVT = Subtarget.getGRLenVT();
2193   unsigned MaskIdx, MaskLen;
2194   uint64_t Shamt;
2195 
2196   // The first operand must be an AND and the second operand of the AND must be
2197   // a shifted mask.
2198   if (FirstOperand.getOpcode() != ISD::AND ||
2199       !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
2200       !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
2201     return SDValue();
2202 
2203   // The second operand (shift amount) must be an immediate.
2204   if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
2205     return SDValue();
2206 
2207   Shamt = CN->getZExtValue();
2208   if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
2209     return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
2210                        FirstOperand->getOperand(0),
2211                        DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2212                        DAG.getConstant(Shamt, DL, GRLenVT));
2213 
2214   return SDValue();
2215 }
2216 
2217 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
2218                                 TargetLowering::DAGCombinerInfo &DCI,
2219                                 const LoongArchSubtarget &Subtarget) {
2220   MVT GRLenVT = Subtarget.getGRLenVT();
2221   EVT ValTy = N->getValueType(0);
2222   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2223   ConstantSDNode *CN0, *CN1;
2224   SDLoc DL(N);
2225   unsigned ValBits = ValTy.getSizeInBits();
2226   unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
2227   unsigned Shamt;
2228   bool SwapAndRetried = false;
2229 
2230   if (DCI.isBeforeLegalizeOps())
2231     return SDValue();
2232 
2233   if (ValBits != 32 && ValBits != 64)
2234     return SDValue();
2235 
2236 Retry:
2237   // 1st pattern to match BSTRINS:
2238   //  R = or (and X, mask0), (and (shl Y, lsb), mask1)
2239   //  where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
2240   //  =>
2241   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2242   if (N0.getOpcode() == ISD::AND &&
2243       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2244       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2245       N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
2246       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2247       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2248       MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
2249       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2250       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2251       (MaskIdx0 + MaskLen0 <= ValBits)) {
2252     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
2253     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2254                        N1.getOperand(0).getOperand(0),
2255                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2256                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
2257   }
2258 
2259   // 2nd pattern to match BSTRINS:
2260   //  R = or (and X, mask0), (shl (and Y, mask1), lsb)
2261   //  where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
2262   //  =>
2263   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2264   if (N0.getOpcode() == ISD::AND &&
2265       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2266       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2267       N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2268       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2269       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2270       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2271       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2272       MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
2273       (MaskIdx0 + MaskLen0 <= ValBits)) {
2274     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
2275     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2276                        N1.getOperand(0).getOperand(0),
2277                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2278                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
2279   }
2280 
2281   // 3rd pattern to match BSTRINS:
2282   //  R = or (and X, mask0), (and Y, mask1)
2283   //  where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
2284   //  =>
2285   //  R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
2286   //  where msb = lsb + size - 1
2287   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
2288       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2289       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2290       (MaskIdx0 + MaskLen0 <= 64) &&
2291       (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
2292       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2293     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
2294     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2295                        DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
2296                                    DAG.getConstant(MaskIdx0, DL, GRLenVT)),
2297                        DAG.getConstant(ValBits == 32
2298                                            ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2299                                            : (MaskIdx0 + MaskLen0 - 1),
2300                                        DL, GRLenVT),
2301                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
2302   }
2303 
2304   // 4th pattern to match BSTRINS:
2305   //  R = or (and X, mask), (shl Y, shamt)
2306   //  where mask = (2**shamt - 1)
2307   //  =>
2308   //  R = BSTRINS X, Y, ValBits - 1, shamt
2309   //  where ValBits = 32 or 64
2310   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
2311       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2312       isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
2313       MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2314       (Shamt = CN1->getZExtValue()) == MaskLen0 &&
2315       (MaskIdx0 + MaskLen0 <= ValBits)) {
2316     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
2317     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2318                        N1.getOperand(0),
2319                        DAG.getConstant((ValBits - 1), DL, GRLenVT),
2320                        DAG.getConstant(Shamt, DL, GRLenVT));
2321   }
2322 
2323   // 5th pattern to match BSTRINS:
2324   //  R = or (and X, mask), const
2325   //  where ~mask = (2**size - 1) << lsb, mask & const = 0
2326   //  =>
2327   //  R = BSTRINS X, (const >> lsb), msb, lsb
2328   //  where msb = lsb + size - 1
2329   if (N0.getOpcode() == ISD::AND &&
2330       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2331       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2332       (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
2333       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2334     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
2335     return DAG.getNode(
2336         LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2337         DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
2338         DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2339         DAG.getConstant(MaskIdx0, DL, GRLenVT));
2340   }
2341 
2342   // 6th pattern.
2343   // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
2344   // by the incoming bits are known to be zero.
2345   // =>
2346   // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
2347   //
2348   // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
2349   // pattern is more common than the 1st. So we put the 1st before the 6th in
2350   // order to match as many nodes as possible.
2351   ConstantSDNode *CNMask, *CNShamt;
2352   unsigned MaskIdx, MaskLen;
2353   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2354       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2355       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2356       MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2357       CNShamt->getZExtValue() + MaskLen <= ValBits) {
2358     Shamt = CNShamt->getZExtValue();
2359     APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
2360     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2361       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
2362       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2363                          N1.getOperand(0).getOperand(0),
2364                          DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
2365                          DAG.getConstant(Shamt, DL, GRLenVT));
2366     }
2367   }
2368 
2369   // 7th pattern.
2370   // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
2371   // overwritten by the incoming bits are known to be zero.
2372   // =>
2373   // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
2374   //
2375   // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
2376   // before the 7th in order to match as many nodes as possible.
2377   if (N1.getOpcode() == ISD::AND &&
2378       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2379       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2380       N1.getOperand(0).getOpcode() == ISD::SHL &&
2381       (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2382       CNShamt->getZExtValue() == MaskIdx) {
2383     APInt ShMask(ValBits, CNMask->getZExtValue());
2384     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2385       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
2386       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2387                          N1.getOperand(0).getOperand(0),
2388                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2389                          DAG.getConstant(MaskIdx, DL, GRLenVT));
2390     }
2391   }
2392 
2393   // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
2394   if (!SwapAndRetried) {
2395     std::swap(N0, N1);
2396     SwapAndRetried = true;
2397     goto Retry;
2398   }
2399 
2400   SwapAndRetried = false;
2401 Retry2:
2402   // 8th pattern.
2403   // a = b | (c & shifted_mask), where all positions in b to be overwritten by
2404   // the incoming bits are known to be zero.
2405   // =>
2406   // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
2407   //
2408   // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
2409   // we put it here in order to match as many nodes as possible or generate less
2410   // instructions.
2411   if (N1.getOpcode() == ISD::AND &&
2412       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2413       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
2414     APInt ShMask(ValBits, CNMask->getZExtValue());
2415     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2416       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
2417       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2418                          DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
2419                                      N1->getOperand(0),
2420                                      DAG.getConstant(MaskIdx, DL, GRLenVT)),
2421                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2422                          DAG.getConstant(MaskIdx, DL, GRLenVT));
2423     }
2424   }
2425   // Swap N0/N1 and retry.
2426   if (!SwapAndRetried) {
2427     std::swap(N0, N1);
2428     SwapAndRetried = true;
2429     goto Retry2;
2430   }
2431 
2432   return SDValue();
2433 }
2434 
2435 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
2436 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
2437                                       TargetLowering::DAGCombinerInfo &DCI,
2438                                       const LoongArchSubtarget &Subtarget) {
2439   if (DCI.isBeforeLegalizeOps())
2440     return SDValue();
2441 
2442   SDValue Src = N->getOperand(0);
2443   if (Src.getOpcode() != LoongArchISD::REVB_2W)
2444     return SDValue();
2445 
2446   return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
2447                      Src.getOperand(0));
2448 }
2449 
2450 template <unsigned N>
2451 static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
2452                                        SelectionDAG &DAG,
2453                                        const LoongArchSubtarget &Subtarget,
2454                                        bool IsSigned = false) {
2455   SDLoc DL(Node);
2456   auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2457   // Check the ImmArg.
2458   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2459       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2460     DAG.getContext()->emitError(Node->getOperationName(0) +
2461                                 ": argument out of range.");
2462     return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
2463   }
2464   return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
2465 }
2466 
2467 template <unsigned N>
2468 static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
2469                                    SelectionDAG &DAG, bool IsSigned = false) {
2470   SDLoc DL(Node);
2471   EVT ResTy = Node->getValueType(0);
2472   auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2473 
2474   // Check the ImmArg.
2475   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2476       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2477     DAG.getContext()->emitError(Node->getOperationName(0) +
2478                                 ": argument out of range.");
2479     return DAG.getNode(ISD::UNDEF, DL, ResTy);
2480   }
2481   return DAG.getConstant(
2482       APInt(ResTy.getScalarType().getSizeInBits(),
2483             IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
2484       DL, ResTy);
2485 }
2486 
2487 static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
2488   SDLoc DL(Node);
2489   EVT ResTy = Node->getValueType(0);
2490   SDValue Vec = Node->getOperand(2);
2491   SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
2492   return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
2493 }
2494 
2495 static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
2496   SDLoc DL(Node);
2497   EVT ResTy = Node->getValueType(0);
2498   SDValue One = DAG.getConstant(1, DL, ResTy);
2499   SDValue Bit =
2500       DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
2501 
2502   return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
2503                      DAG.getNOT(DL, Bit, ResTy));
2504 }
2505 
2506 template <unsigned N>
2507 static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
2508   SDLoc DL(Node);
2509   EVT ResTy = Node->getValueType(0);
2510   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2511   // Check the unsigned ImmArg.
2512   if (!isUInt<N>(CImm->getZExtValue())) {
2513     DAG.getContext()->emitError(Node->getOperationName(0) +
2514                                 ": argument out of range.");
2515     return DAG.getNode(ISD::UNDEF, DL, ResTy);
2516   }
2517 
2518   APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2519   SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
2520 
2521   return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
2522 }
2523 
2524 template <unsigned N>
2525 static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
2526   SDLoc DL(Node);
2527   EVT ResTy = Node->getValueType(0);
2528   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2529   // Check the unsigned ImmArg.
2530   if (!isUInt<N>(CImm->getZExtValue())) {
2531     DAG.getContext()->emitError(Node->getOperationName(0) +
2532                                 ": argument out of range.");
2533     return DAG.getNode(ISD::UNDEF, DL, ResTy);
2534   }
2535 
2536   APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2537   SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2538   return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
2539 }
2540 
2541 template <unsigned N>
2542 static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
2543   SDLoc DL(Node);
2544   EVT ResTy = Node->getValueType(0);
2545   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2546   // Check the unsigned ImmArg.
2547   if (!isUInt<N>(CImm->getZExtValue())) {
2548     DAG.getContext()->emitError(Node->getOperationName(0) +
2549                                 ": argument out of range.");
2550     return DAG.getNode(ISD::UNDEF, DL, ResTy);
2551   }
2552 
2553   APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2554   SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2555   return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
2556 }
2557 
2558 static SDValue
2559 performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
2560                                  TargetLowering::DAGCombinerInfo &DCI,
2561                                  const LoongArchSubtarget &Subtarget) {
2562   SDLoc DL(N);
2563   switch (N->getConstantOperandVal(0)) {
2564   default:
2565     break;
2566   case Intrinsic::loongarch_lsx_vadd_b:
2567   case Intrinsic::loongarch_lsx_vadd_h:
2568   case Intrinsic::loongarch_lsx_vadd_w:
2569   case Intrinsic::loongarch_lsx_vadd_d:
2570   case Intrinsic::loongarch_lasx_xvadd_b:
2571   case Intrinsic::loongarch_lasx_xvadd_h:
2572   case Intrinsic::loongarch_lasx_xvadd_w:
2573   case Intrinsic::loongarch_lasx_xvadd_d:
2574     return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2575                        N->getOperand(2));
2576   case Intrinsic::loongarch_lsx_vaddi_bu:
2577   case Intrinsic::loongarch_lsx_vaddi_hu:
2578   case Intrinsic::loongarch_lsx_vaddi_wu:
2579   case Intrinsic::loongarch_lsx_vaddi_du:
2580   case Intrinsic::loongarch_lasx_xvaddi_bu:
2581   case Intrinsic::loongarch_lasx_xvaddi_hu:
2582   case Intrinsic::loongarch_lasx_xvaddi_wu:
2583   case Intrinsic::loongarch_lasx_xvaddi_du:
2584     return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2585                        lowerVectorSplatImm<5>(N, 2, DAG));
2586   case Intrinsic::loongarch_lsx_vsub_b:
2587   case Intrinsic::loongarch_lsx_vsub_h:
2588   case Intrinsic::loongarch_lsx_vsub_w:
2589   case Intrinsic::loongarch_lsx_vsub_d:
2590   case Intrinsic::loongarch_lasx_xvsub_b:
2591   case Intrinsic::loongarch_lasx_xvsub_h:
2592   case Intrinsic::loongarch_lasx_xvsub_w:
2593   case Intrinsic::loongarch_lasx_xvsub_d:
2594     return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2595                        N->getOperand(2));
2596   case Intrinsic::loongarch_lsx_vsubi_bu:
2597   case Intrinsic::loongarch_lsx_vsubi_hu:
2598   case Intrinsic::loongarch_lsx_vsubi_wu:
2599   case Intrinsic::loongarch_lsx_vsubi_du:
2600   case Intrinsic::loongarch_lasx_xvsubi_bu:
2601   case Intrinsic::loongarch_lasx_xvsubi_hu:
2602   case Intrinsic::loongarch_lasx_xvsubi_wu:
2603   case Intrinsic::loongarch_lasx_xvsubi_du:
2604     return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2605                        lowerVectorSplatImm<5>(N, 2, DAG));
2606   case Intrinsic::loongarch_lsx_vneg_b:
2607   case Intrinsic::loongarch_lsx_vneg_h:
2608   case Intrinsic::loongarch_lsx_vneg_w:
2609   case Intrinsic::loongarch_lsx_vneg_d:
2610   case Intrinsic::loongarch_lasx_xvneg_b:
2611   case Intrinsic::loongarch_lasx_xvneg_h:
2612   case Intrinsic::loongarch_lasx_xvneg_w:
2613   case Intrinsic::loongarch_lasx_xvneg_d:
2614     return DAG.getNode(
2615         ISD::SUB, DL, N->getValueType(0),
2616         DAG.getConstant(
2617             APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
2618                   /*isSigned=*/true),
2619             SDLoc(N), N->getValueType(0)),
2620         N->getOperand(1));
2621   case Intrinsic::loongarch_lsx_vmax_b:
2622   case Intrinsic::loongarch_lsx_vmax_h:
2623   case Intrinsic::loongarch_lsx_vmax_w:
2624   case Intrinsic::loongarch_lsx_vmax_d:
2625   case Intrinsic::loongarch_lasx_xvmax_b:
2626   case Intrinsic::loongarch_lasx_xvmax_h:
2627   case Intrinsic::loongarch_lasx_xvmax_w:
2628   case Intrinsic::loongarch_lasx_xvmax_d:
2629     return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2630                        N->getOperand(2));
2631   case Intrinsic::loongarch_lsx_vmax_bu:
2632   case Intrinsic::loongarch_lsx_vmax_hu:
2633   case Intrinsic::loongarch_lsx_vmax_wu:
2634   case Intrinsic::loongarch_lsx_vmax_du:
2635   case Intrinsic::loongarch_lasx_xvmax_bu:
2636   case Intrinsic::loongarch_lasx_xvmax_hu:
2637   case Intrinsic::loongarch_lasx_xvmax_wu:
2638   case Intrinsic::loongarch_lasx_xvmax_du:
2639     return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2640                        N->getOperand(2));
2641   case Intrinsic::loongarch_lsx_vmaxi_b:
2642   case Intrinsic::loongarch_lsx_vmaxi_h:
2643   case Intrinsic::loongarch_lsx_vmaxi_w:
2644   case Intrinsic::loongarch_lsx_vmaxi_d:
2645   case Intrinsic::loongarch_lasx_xvmaxi_b:
2646   case Intrinsic::loongarch_lasx_xvmaxi_h:
2647   case Intrinsic::loongarch_lasx_xvmaxi_w:
2648   case Intrinsic::loongarch_lasx_xvmaxi_d:
2649     return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2650                        lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2651   case Intrinsic::loongarch_lsx_vmaxi_bu:
2652   case Intrinsic::loongarch_lsx_vmaxi_hu:
2653   case Intrinsic::loongarch_lsx_vmaxi_wu:
2654   case Intrinsic::loongarch_lsx_vmaxi_du:
2655   case Intrinsic::loongarch_lasx_xvmaxi_bu:
2656   case Intrinsic::loongarch_lasx_xvmaxi_hu:
2657   case Intrinsic::loongarch_lasx_xvmaxi_wu:
2658   case Intrinsic::loongarch_lasx_xvmaxi_du:
2659     return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2660                        lowerVectorSplatImm<5>(N, 2, DAG));
2661   case Intrinsic::loongarch_lsx_vmin_b:
2662   case Intrinsic::loongarch_lsx_vmin_h:
2663   case Intrinsic::loongarch_lsx_vmin_w:
2664   case Intrinsic::loongarch_lsx_vmin_d:
2665   case Intrinsic::loongarch_lasx_xvmin_b:
2666   case Intrinsic::loongarch_lasx_xvmin_h:
2667   case Intrinsic::loongarch_lasx_xvmin_w:
2668   case Intrinsic::loongarch_lasx_xvmin_d:
2669     return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2670                        N->getOperand(2));
2671   case Intrinsic::loongarch_lsx_vmin_bu:
2672   case Intrinsic::loongarch_lsx_vmin_hu:
2673   case Intrinsic::loongarch_lsx_vmin_wu:
2674   case Intrinsic::loongarch_lsx_vmin_du:
2675   case Intrinsic::loongarch_lasx_xvmin_bu:
2676   case Intrinsic::loongarch_lasx_xvmin_hu:
2677   case Intrinsic::loongarch_lasx_xvmin_wu:
2678   case Intrinsic::loongarch_lasx_xvmin_du:
2679     return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2680                        N->getOperand(2));
2681   case Intrinsic::loongarch_lsx_vmini_b:
2682   case Intrinsic::loongarch_lsx_vmini_h:
2683   case Intrinsic::loongarch_lsx_vmini_w:
2684   case Intrinsic::loongarch_lsx_vmini_d:
2685   case Intrinsic::loongarch_lasx_xvmini_b:
2686   case Intrinsic::loongarch_lasx_xvmini_h:
2687   case Intrinsic::loongarch_lasx_xvmini_w:
2688   case Intrinsic::loongarch_lasx_xvmini_d:
2689     return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2690                        lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2691   case Intrinsic::loongarch_lsx_vmini_bu:
2692   case Intrinsic::loongarch_lsx_vmini_hu:
2693   case Intrinsic::loongarch_lsx_vmini_wu:
2694   case Intrinsic::loongarch_lsx_vmini_du:
2695   case Intrinsic::loongarch_lasx_xvmini_bu:
2696   case Intrinsic::loongarch_lasx_xvmini_hu:
2697   case Intrinsic::loongarch_lasx_xvmini_wu:
2698   case Intrinsic::loongarch_lasx_xvmini_du:
2699     return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2700                        lowerVectorSplatImm<5>(N, 2, DAG));
2701   case Intrinsic::loongarch_lsx_vmul_b:
2702   case Intrinsic::loongarch_lsx_vmul_h:
2703   case Intrinsic::loongarch_lsx_vmul_w:
2704   case Intrinsic::loongarch_lsx_vmul_d:
2705   case Intrinsic::loongarch_lasx_xvmul_b:
2706   case Intrinsic::loongarch_lasx_xvmul_h:
2707   case Intrinsic::loongarch_lasx_xvmul_w:
2708   case Intrinsic::loongarch_lasx_xvmul_d:
2709     return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
2710                        N->getOperand(2));
2711   case Intrinsic::loongarch_lsx_vmadd_b:
2712   case Intrinsic::loongarch_lsx_vmadd_h:
2713   case Intrinsic::loongarch_lsx_vmadd_w:
2714   case Intrinsic::loongarch_lsx_vmadd_d:
2715   case Intrinsic::loongarch_lasx_xvmadd_b:
2716   case Intrinsic::loongarch_lasx_xvmadd_h:
2717   case Intrinsic::loongarch_lasx_xvmadd_w:
2718   case Intrinsic::loongarch_lasx_xvmadd_d: {
2719     EVT ResTy = N->getValueType(0);
2720     return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
2721                        DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2722                                    N->getOperand(3)));
2723   }
2724   case Intrinsic::loongarch_lsx_vmsub_b:
2725   case Intrinsic::loongarch_lsx_vmsub_h:
2726   case Intrinsic::loongarch_lsx_vmsub_w:
2727   case Intrinsic::loongarch_lsx_vmsub_d:
2728   case Intrinsic::loongarch_lasx_xvmsub_b:
2729   case Intrinsic::loongarch_lasx_xvmsub_h:
2730   case Intrinsic::loongarch_lasx_xvmsub_w:
2731   case Intrinsic::loongarch_lasx_xvmsub_d: {
2732     EVT ResTy = N->getValueType(0);
2733     return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
2734                        DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2735                                    N->getOperand(3)));
2736   }
2737   case Intrinsic::loongarch_lsx_vdiv_b:
2738   case Intrinsic::loongarch_lsx_vdiv_h:
2739   case Intrinsic::loongarch_lsx_vdiv_w:
2740   case Intrinsic::loongarch_lsx_vdiv_d:
2741   case Intrinsic::loongarch_lasx_xvdiv_b:
2742   case Intrinsic::loongarch_lasx_xvdiv_h:
2743   case Intrinsic::loongarch_lasx_xvdiv_w:
2744   case Intrinsic::loongarch_lasx_xvdiv_d:
2745     return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
2746                        N->getOperand(2));
2747   case Intrinsic::loongarch_lsx_vdiv_bu:
2748   case Intrinsic::loongarch_lsx_vdiv_hu:
2749   case Intrinsic::loongarch_lsx_vdiv_wu:
2750   case Intrinsic::loongarch_lsx_vdiv_du:
2751   case Intrinsic::loongarch_lasx_xvdiv_bu:
2752   case Intrinsic::loongarch_lasx_xvdiv_hu:
2753   case Intrinsic::loongarch_lasx_xvdiv_wu:
2754   case Intrinsic::loongarch_lasx_xvdiv_du:
2755     return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
2756                        N->getOperand(2));
2757   case Intrinsic::loongarch_lsx_vmod_b:
2758   case Intrinsic::loongarch_lsx_vmod_h:
2759   case Intrinsic::loongarch_lsx_vmod_w:
2760   case Intrinsic::loongarch_lsx_vmod_d:
2761   case Intrinsic::loongarch_lasx_xvmod_b:
2762   case Intrinsic::loongarch_lasx_xvmod_h:
2763   case Intrinsic::loongarch_lasx_xvmod_w:
2764   case Intrinsic::loongarch_lasx_xvmod_d:
2765     return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
2766                        N->getOperand(2));
2767   case Intrinsic::loongarch_lsx_vmod_bu:
2768   case Intrinsic::loongarch_lsx_vmod_hu:
2769   case Intrinsic::loongarch_lsx_vmod_wu:
2770   case Intrinsic::loongarch_lsx_vmod_du:
2771   case Intrinsic::loongarch_lasx_xvmod_bu:
2772   case Intrinsic::loongarch_lasx_xvmod_hu:
2773   case Intrinsic::loongarch_lasx_xvmod_wu:
2774   case Intrinsic::loongarch_lasx_xvmod_du:
2775     return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
2776                        N->getOperand(2));
2777   case Intrinsic::loongarch_lsx_vand_v:
2778   case Intrinsic::loongarch_lasx_xvand_v:
2779     return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2780                        N->getOperand(2));
2781   case Intrinsic::loongarch_lsx_vor_v:
2782   case Intrinsic::loongarch_lasx_xvor_v:
2783     return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2784                        N->getOperand(2));
2785   case Intrinsic::loongarch_lsx_vxor_v:
2786   case Intrinsic::loongarch_lasx_xvxor_v:
2787     return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2788                        N->getOperand(2));
2789   case Intrinsic::loongarch_lsx_vnor_v:
2790   case Intrinsic::loongarch_lasx_xvnor_v: {
2791     SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2792                               N->getOperand(2));
2793     return DAG.getNOT(DL, Res, Res->getValueType(0));
2794   }
2795   case Intrinsic::loongarch_lsx_vandi_b:
2796   case Intrinsic::loongarch_lasx_xvandi_b:
2797     return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2798                        lowerVectorSplatImm<8>(N, 2, DAG));
2799   case Intrinsic::loongarch_lsx_vori_b:
2800   case Intrinsic::loongarch_lasx_xvori_b:
2801     return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2802                        lowerVectorSplatImm<8>(N, 2, DAG));
2803   case Intrinsic::loongarch_lsx_vxori_b:
2804   case Intrinsic::loongarch_lasx_xvxori_b:
2805     return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2806                        lowerVectorSplatImm<8>(N, 2, DAG));
2807   case Intrinsic::loongarch_lsx_vsll_b:
2808   case Intrinsic::loongarch_lsx_vsll_h:
2809   case Intrinsic::loongarch_lsx_vsll_w:
2810   case Intrinsic::loongarch_lsx_vsll_d:
2811   case Intrinsic::loongarch_lasx_xvsll_b:
2812   case Intrinsic::loongarch_lasx_xvsll_h:
2813   case Intrinsic::loongarch_lasx_xvsll_w:
2814   case Intrinsic::loongarch_lasx_xvsll_d:
2815     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2816                        truncateVecElts(N, DAG));
2817   case Intrinsic::loongarch_lsx_vslli_b:
2818   case Intrinsic::loongarch_lasx_xvslli_b:
2819     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2820                        lowerVectorSplatImm<3>(N, 2, DAG));
2821   case Intrinsic::loongarch_lsx_vslli_h:
2822   case Intrinsic::loongarch_lasx_xvslli_h:
2823     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2824                        lowerVectorSplatImm<4>(N, 2, DAG));
2825   case Intrinsic::loongarch_lsx_vslli_w:
2826   case Intrinsic::loongarch_lasx_xvslli_w:
2827     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2828                        lowerVectorSplatImm<5>(N, 2, DAG));
2829   case Intrinsic::loongarch_lsx_vslli_d:
2830   case Intrinsic::loongarch_lasx_xvslli_d:
2831     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2832                        lowerVectorSplatImm<6>(N, 2, DAG));
2833   case Intrinsic::loongarch_lsx_vsrl_b:
2834   case Intrinsic::loongarch_lsx_vsrl_h:
2835   case Intrinsic::loongarch_lsx_vsrl_w:
2836   case Intrinsic::loongarch_lsx_vsrl_d:
2837   case Intrinsic::loongarch_lasx_xvsrl_b:
2838   case Intrinsic::loongarch_lasx_xvsrl_h:
2839   case Intrinsic::loongarch_lasx_xvsrl_w:
2840   case Intrinsic::loongarch_lasx_xvsrl_d:
2841     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2842                        truncateVecElts(N, DAG));
2843   case Intrinsic::loongarch_lsx_vsrli_b:
2844   case Intrinsic::loongarch_lasx_xvsrli_b:
2845     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2846                        lowerVectorSplatImm<3>(N, 2, DAG));
2847   case Intrinsic::loongarch_lsx_vsrli_h:
2848   case Intrinsic::loongarch_lasx_xvsrli_h:
2849     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2850                        lowerVectorSplatImm<4>(N, 2, DAG));
2851   case Intrinsic::loongarch_lsx_vsrli_w:
2852   case Intrinsic::loongarch_lasx_xvsrli_w:
2853     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2854                        lowerVectorSplatImm<5>(N, 2, DAG));
2855   case Intrinsic::loongarch_lsx_vsrli_d:
2856   case Intrinsic::loongarch_lasx_xvsrli_d:
2857     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2858                        lowerVectorSplatImm<6>(N, 2, DAG));
2859   case Intrinsic::loongarch_lsx_vsra_b:
2860   case Intrinsic::loongarch_lsx_vsra_h:
2861   case Intrinsic::loongarch_lsx_vsra_w:
2862   case Intrinsic::loongarch_lsx_vsra_d:
2863   case Intrinsic::loongarch_lasx_xvsra_b:
2864   case Intrinsic::loongarch_lasx_xvsra_h:
2865   case Intrinsic::loongarch_lasx_xvsra_w:
2866   case Intrinsic::loongarch_lasx_xvsra_d:
2867     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2868                        truncateVecElts(N, DAG));
2869   case Intrinsic::loongarch_lsx_vsrai_b:
2870   case Intrinsic::loongarch_lasx_xvsrai_b:
2871     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2872                        lowerVectorSplatImm<3>(N, 2, DAG));
2873   case Intrinsic::loongarch_lsx_vsrai_h:
2874   case Intrinsic::loongarch_lasx_xvsrai_h:
2875     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2876                        lowerVectorSplatImm<4>(N, 2, DAG));
2877   case Intrinsic::loongarch_lsx_vsrai_w:
2878   case Intrinsic::loongarch_lasx_xvsrai_w:
2879     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2880                        lowerVectorSplatImm<5>(N, 2, DAG));
2881   case Intrinsic::loongarch_lsx_vsrai_d:
2882   case Intrinsic::loongarch_lasx_xvsrai_d:
2883     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2884                        lowerVectorSplatImm<6>(N, 2, DAG));
2885   case Intrinsic::loongarch_lsx_vclz_b:
2886   case Intrinsic::loongarch_lsx_vclz_h:
2887   case Intrinsic::loongarch_lsx_vclz_w:
2888   case Intrinsic::loongarch_lsx_vclz_d:
2889   case Intrinsic::loongarch_lasx_xvclz_b:
2890   case Intrinsic::loongarch_lasx_xvclz_h:
2891   case Intrinsic::loongarch_lasx_xvclz_w:
2892   case Intrinsic::loongarch_lasx_xvclz_d:
2893     return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
2894   case Intrinsic::loongarch_lsx_vpcnt_b:
2895   case Intrinsic::loongarch_lsx_vpcnt_h:
2896   case Intrinsic::loongarch_lsx_vpcnt_w:
2897   case Intrinsic::loongarch_lsx_vpcnt_d:
2898   case Intrinsic::loongarch_lasx_xvpcnt_b:
2899   case Intrinsic::loongarch_lasx_xvpcnt_h:
2900   case Intrinsic::loongarch_lasx_xvpcnt_w:
2901   case Intrinsic::loongarch_lasx_xvpcnt_d:
2902     return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
2903   case Intrinsic::loongarch_lsx_vbitclr_b:
2904   case Intrinsic::loongarch_lsx_vbitclr_h:
2905   case Intrinsic::loongarch_lsx_vbitclr_w:
2906   case Intrinsic::loongarch_lsx_vbitclr_d:
2907   case Intrinsic::loongarch_lasx_xvbitclr_b:
2908   case Intrinsic::loongarch_lasx_xvbitclr_h:
2909   case Intrinsic::loongarch_lasx_xvbitclr_w:
2910   case Intrinsic::loongarch_lasx_xvbitclr_d:
2911     return lowerVectorBitClear(N, DAG);
2912   case Intrinsic::loongarch_lsx_vbitclri_b:
2913   case Intrinsic::loongarch_lasx_xvbitclri_b:
2914     return lowerVectorBitClearImm<3>(N, DAG);
2915   case Intrinsic::loongarch_lsx_vbitclri_h:
2916   case Intrinsic::loongarch_lasx_xvbitclri_h:
2917     return lowerVectorBitClearImm<4>(N, DAG);
2918   case Intrinsic::loongarch_lsx_vbitclri_w:
2919   case Intrinsic::loongarch_lasx_xvbitclri_w:
2920     return lowerVectorBitClearImm<5>(N, DAG);
2921   case Intrinsic::loongarch_lsx_vbitclri_d:
2922   case Intrinsic::loongarch_lasx_xvbitclri_d:
2923     return lowerVectorBitClearImm<6>(N, DAG);
2924   case Intrinsic::loongarch_lsx_vbitset_b:
2925   case Intrinsic::loongarch_lsx_vbitset_h:
2926   case Intrinsic::loongarch_lsx_vbitset_w:
2927   case Intrinsic::loongarch_lsx_vbitset_d:
2928   case Intrinsic::loongarch_lasx_xvbitset_b:
2929   case Intrinsic::loongarch_lasx_xvbitset_h:
2930   case Intrinsic::loongarch_lasx_xvbitset_w:
2931   case Intrinsic::loongarch_lasx_xvbitset_d: {
2932     EVT VecTy = N->getValueType(0);
2933     SDValue One = DAG.getConstant(1, DL, VecTy);
2934     return DAG.getNode(
2935         ISD::OR, DL, VecTy, N->getOperand(1),
2936         DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2937   }
2938   case Intrinsic::loongarch_lsx_vbitseti_b:
2939   case Intrinsic::loongarch_lasx_xvbitseti_b:
2940     return lowerVectorBitSetImm<3>(N, DAG);
2941   case Intrinsic::loongarch_lsx_vbitseti_h:
2942   case Intrinsic::loongarch_lasx_xvbitseti_h:
2943     return lowerVectorBitSetImm<4>(N, DAG);
2944   case Intrinsic::loongarch_lsx_vbitseti_w:
2945   case Intrinsic::loongarch_lasx_xvbitseti_w:
2946     return lowerVectorBitSetImm<5>(N, DAG);
2947   case Intrinsic::loongarch_lsx_vbitseti_d:
2948   case Intrinsic::loongarch_lasx_xvbitseti_d:
2949     return lowerVectorBitSetImm<6>(N, DAG);
2950   case Intrinsic::loongarch_lsx_vbitrev_b:
2951   case Intrinsic::loongarch_lsx_vbitrev_h:
2952   case Intrinsic::loongarch_lsx_vbitrev_w:
2953   case Intrinsic::loongarch_lsx_vbitrev_d:
2954   case Intrinsic::loongarch_lasx_xvbitrev_b:
2955   case Intrinsic::loongarch_lasx_xvbitrev_h:
2956   case Intrinsic::loongarch_lasx_xvbitrev_w:
2957   case Intrinsic::loongarch_lasx_xvbitrev_d: {
2958     EVT VecTy = N->getValueType(0);
2959     SDValue One = DAG.getConstant(1, DL, VecTy);
2960     return DAG.getNode(
2961         ISD::XOR, DL, VecTy, N->getOperand(1),
2962         DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2963   }
2964   case Intrinsic::loongarch_lsx_vbitrevi_b:
2965   case Intrinsic::loongarch_lasx_xvbitrevi_b:
2966     return lowerVectorBitRevImm<3>(N, DAG);
2967   case Intrinsic::loongarch_lsx_vbitrevi_h:
2968   case Intrinsic::loongarch_lasx_xvbitrevi_h:
2969     return lowerVectorBitRevImm<4>(N, DAG);
2970   case Intrinsic::loongarch_lsx_vbitrevi_w:
2971   case Intrinsic::loongarch_lasx_xvbitrevi_w:
2972     return lowerVectorBitRevImm<5>(N, DAG);
2973   case Intrinsic::loongarch_lsx_vbitrevi_d:
2974   case Intrinsic::loongarch_lasx_xvbitrevi_d:
2975     return lowerVectorBitRevImm<6>(N, DAG);
2976   case Intrinsic::loongarch_lsx_vfadd_s:
2977   case Intrinsic::loongarch_lsx_vfadd_d:
2978   case Intrinsic::loongarch_lasx_xvfadd_s:
2979   case Intrinsic::loongarch_lasx_xvfadd_d:
2980     return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
2981                        N->getOperand(2));
2982   case Intrinsic::loongarch_lsx_vfsub_s:
2983   case Intrinsic::loongarch_lsx_vfsub_d:
2984   case Intrinsic::loongarch_lasx_xvfsub_s:
2985   case Intrinsic::loongarch_lasx_xvfsub_d:
2986     return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
2987                        N->getOperand(2));
2988   case Intrinsic::loongarch_lsx_vfmul_s:
2989   case Intrinsic::loongarch_lsx_vfmul_d:
2990   case Intrinsic::loongarch_lasx_xvfmul_s:
2991   case Intrinsic::loongarch_lasx_xvfmul_d:
2992     return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
2993                        N->getOperand(2));
2994   case Intrinsic::loongarch_lsx_vfdiv_s:
2995   case Intrinsic::loongarch_lsx_vfdiv_d:
2996   case Intrinsic::loongarch_lasx_xvfdiv_s:
2997   case Intrinsic::loongarch_lasx_xvfdiv_d:
2998     return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
2999                        N->getOperand(2));
3000   case Intrinsic::loongarch_lsx_vfmadd_s:
3001   case Intrinsic::loongarch_lsx_vfmadd_d:
3002   case Intrinsic::loongarch_lasx_xvfmadd_s:
3003   case Intrinsic::loongarch_lasx_xvfmadd_d:
3004     return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
3005                        N->getOperand(2), N->getOperand(3));
3006   case Intrinsic::loongarch_lsx_vinsgr2vr_b:
3007     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3008                        N->getOperand(1), N->getOperand(2),
3009                        legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
3010   case Intrinsic::loongarch_lsx_vinsgr2vr_h:
3011   case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
3012     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3013                        N->getOperand(1), N->getOperand(2),
3014                        legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
3015   case Intrinsic::loongarch_lsx_vinsgr2vr_w:
3016   case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
3017     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3018                        N->getOperand(1), N->getOperand(2),
3019                        legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
3020   case Intrinsic::loongarch_lsx_vinsgr2vr_d:
3021     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3022                        N->getOperand(1), N->getOperand(2),
3023                        legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
3024   case Intrinsic::loongarch_lsx_vreplgr2vr_b:
3025   case Intrinsic::loongarch_lsx_vreplgr2vr_h:
3026   case Intrinsic::loongarch_lsx_vreplgr2vr_w:
3027   case Intrinsic::loongarch_lsx_vreplgr2vr_d:
3028   case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
3029   case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
3030   case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
3031   case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
3032     EVT ResTy = N->getValueType(0);
3033     SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
3034     return DAG.getBuildVector(ResTy, DL, Ops);
3035   }
3036   case Intrinsic::loongarch_lsx_vreplve_b:
3037   case Intrinsic::loongarch_lsx_vreplve_h:
3038   case Intrinsic::loongarch_lsx_vreplve_w:
3039   case Intrinsic::loongarch_lsx_vreplve_d:
3040   case Intrinsic::loongarch_lasx_xvreplve_b:
3041   case Intrinsic::loongarch_lasx_xvreplve_h:
3042   case Intrinsic::loongarch_lasx_xvreplve_w:
3043   case Intrinsic::loongarch_lasx_xvreplve_d:
3044     return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
3045                        N->getOperand(1),
3046                        DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
3047                                    N->getOperand(2)));
3048   }
3049   return SDValue();
3050 }
3051 
3052 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
3053                                                    DAGCombinerInfo &DCI) const {
3054   SelectionDAG &DAG = DCI.DAG;
3055   switch (N->getOpcode()) {
3056   default:
3057     break;
3058   case ISD::AND:
3059     return performANDCombine(N, DAG, DCI, Subtarget);
3060   case ISD::OR:
3061     return performORCombine(N, DAG, DCI, Subtarget);
3062   case ISD::SRL:
3063     return performSRLCombine(N, DAG, DCI, Subtarget);
3064   case LoongArchISD::BITREV_W:
3065     return performBITREV_WCombine(N, DAG, DCI, Subtarget);
3066   case ISD::INTRINSIC_WO_CHAIN:
3067     return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
3068   }
3069   return SDValue();
3070 }
3071 
3072 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
3073                                               MachineBasicBlock *MBB) {
3074   if (!ZeroDivCheck)
3075     return MBB;
3076 
3077   // Build instructions:
3078   // MBB:
3079   //   div(or mod)   $dst, $dividend, $divisor
3080   //   bnez          $divisor, SinkMBB
3081   // BreakMBB:
3082   //   break         7 // BRK_DIVZERO
3083   // SinkMBB:
3084   //   fallthrough
3085   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
3086   MachineFunction::iterator It = ++MBB->getIterator();
3087   MachineFunction *MF = MBB->getParent();
3088   auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3089   auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3090   MF->insert(It, BreakMBB);
3091   MF->insert(It, SinkMBB);
3092 
3093   // Transfer the remainder of MBB and its successor edges to SinkMBB.
3094   SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
3095   SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
3096 
3097   const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
3098   DebugLoc DL = MI.getDebugLoc();
3099   MachineOperand &Divisor = MI.getOperand(2);
3100   Register DivisorReg = Divisor.getReg();
3101 
3102   // MBB:
3103   BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
3104       .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
3105       .addMBB(SinkMBB);
3106   MBB->addSuccessor(BreakMBB);
3107   MBB->addSuccessor(SinkMBB);
3108 
3109   // BreakMBB:
3110   // See linux header file arch/loongarch/include/uapi/asm/break.h for the
3111   // definition of BRK_DIVZERO.
3112   BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
3113   BreakMBB->addSuccessor(SinkMBB);
3114 
3115   // Clear Divisor's kill flag.
3116   Divisor.setIsKill(false);
3117 
3118   return SinkMBB;
3119 }
3120 
3121 static MachineBasicBlock *
3122 emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
3123                         const LoongArchSubtarget &Subtarget) {
3124   unsigned CondOpc;
3125   switch (MI.getOpcode()) {
3126   default:
3127     llvm_unreachable("Unexpected opcode");
3128   case LoongArch::PseudoVBZ:
3129     CondOpc = LoongArch::VSETEQZ_V;
3130     break;
3131   case LoongArch::PseudoVBZ_B:
3132     CondOpc = LoongArch::VSETANYEQZ_B;
3133     break;
3134   case LoongArch::PseudoVBZ_H:
3135     CondOpc = LoongArch::VSETANYEQZ_H;
3136     break;
3137   case LoongArch::PseudoVBZ_W:
3138     CondOpc = LoongArch::VSETANYEQZ_W;
3139     break;
3140   case LoongArch::PseudoVBZ_D:
3141     CondOpc = LoongArch::VSETANYEQZ_D;
3142     break;
3143   case LoongArch::PseudoVBNZ:
3144     CondOpc = LoongArch::VSETNEZ_V;
3145     break;
3146   case LoongArch::PseudoVBNZ_B:
3147     CondOpc = LoongArch::VSETALLNEZ_B;
3148     break;
3149   case LoongArch::PseudoVBNZ_H:
3150     CondOpc = LoongArch::VSETALLNEZ_H;
3151     break;
3152   case LoongArch::PseudoVBNZ_W:
3153     CondOpc = LoongArch::VSETALLNEZ_W;
3154     break;
3155   case LoongArch::PseudoVBNZ_D:
3156     CondOpc = LoongArch::VSETALLNEZ_D;
3157     break;
3158   case LoongArch::PseudoXVBZ:
3159     CondOpc = LoongArch::XVSETEQZ_V;
3160     break;
3161   case LoongArch::PseudoXVBZ_B:
3162     CondOpc = LoongArch::XVSETANYEQZ_B;
3163     break;
3164   case LoongArch::PseudoXVBZ_H:
3165     CondOpc = LoongArch::XVSETANYEQZ_H;
3166     break;
3167   case LoongArch::PseudoXVBZ_W:
3168     CondOpc = LoongArch::XVSETANYEQZ_W;
3169     break;
3170   case LoongArch::PseudoXVBZ_D:
3171     CondOpc = LoongArch::XVSETANYEQZ_D;
3172     break;
3173   case LoongArch::PseudoXVBNZ:
3174     CondOpc = LoongArch::XVSETNEZ_V;
3175     break;
3176   case LoongArch::PseudoXVBNZ_B:
3177     CondOpc = LoongArch::XVSETALLNEZ_B;
3178     break;
3179   case LoongArch::PseudoXVBNZ_H:
3180     CondOpc = LoongArch::XVSETALLNEZ_H;
3181     break;
3182   case LoongArch::PseudoXVBNZ_W:
3183     CondOpc = LoongArch::XVSETALLNEZ_W;
3184     break;
3185   case LoongArch::PseudoXVBNZ_D:
3186     CondOpc = LoongArch::XVSETALLNEZ_D;
3187     break;
3188   }
3189 
3190   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3191   const BasicBlock *LLVM_BB = BB->getBasicBlock();
3192   DebugLoc DL = MI.getDebugLoc();
3193   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
3194   MachineFunction::iterator It = ++BB->getIterator();
3195 
3196   MachineFunction *F = BB->getParent();
3197   MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
3198   MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
3199   MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
3200 
3201   F->insert(It, FalseBB);
3202   F->insert(It, TrueBB);
3203   F->insert(It, SinkBB);
3204 
3205   // Transfer the remainder of MBB and its successor edges to Sink.
3206   SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
3207   SinkBB->transferSuccessorsAndUpdatePHIs(BB);
3208 
3209   // Insert the real instruction to BB.
3210   Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
3211   BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
3212 
3213   // Insert branch.
3214   BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
3215   BB->addSuccessor(FalseBB);
3216   BB->addSuccessor(TrueBB);
3217 
3218   // FalseBB.
3219   Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3220   BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
3221       .addReg(LoongArch::R0)
3222       .addImm(0);
3223   BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
3224   FalseBB->addSuccessor(SinkBB);
3225 
3226   // TrueBB.
3227   Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3228   BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
3229       .addReg(LoongArch::R0)
3230       .addImm(1);
3231   TrueBB->addSuccessor(SinkBB);
3232 
3233   // SinkBB: merge the results.
3234   BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
3235           MI.getOperand(0).getReg())
3236       .addReg(RD1)
3237       .addMBB(FalseBB)
3238       .addReg(RD2)
3239       .addMBB(TrueBB);
3240 
3241   // The pseudo instruction is gone now.
3242   MI.eraseFromParent();
3243   return SinkBB;
3244 }
3245 
3246 static MachineBasicBlock *
3247 emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
3248                      const LoongArchSubtarget &Subtarget) {
3249   unsigned InsOp;
3250   unsigned HalfSize;
3251   switch (MI.getOpcode()) {
3252   default:
3253     llvm_unreachable("Unexpected opcode");
3254   case LoongArch::PseudoXVINSGR2VR_B:
3255     HalfSize = 16;
3256     InsOp = LoongArch::VINSGR2VR_B;
3257     break;
3258   case LoongArch::PseudoXVINSGR2VR_H:
3259     HalfSize = 8;
3260     InsOp = LoongArch::VINSGR2VR_H;
3261     break;
3262   }
3263   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3264   const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
3265   const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
3266   DebugLoc DL = MI.getDebugLoc();
3267   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
3268   // XDst = vector_insert XSrc, Elt, Idx
3269   Register XDst = MI.getOperand(0).getReg();
3270   Register XSrc = MI.getOperand(1).getReg();
3271   Register Elt = MI.getOperand(2).getReg();
3272   unsigned Idx = MI.getOperand(3).getImm();
3273 
3274   Register ScratchReg1 = XSrc;
3275   if (Idx >= HalfSize) {
3276     ScratchReg1 = MRI.createVirtualRegister(RC);
3277     BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
3278         .addReg(XSrc)
3279         .addReg(XSrc)
3280         .addImm(1);
3281   }
3282 
3283   Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
3284   Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
3285   BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
3286       .addReg(ScratchReg1, 0, LoongArch::sub_128);
3287   BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
3288       .addReg(ScratchSubReg1)
3289       .addReg(Elt)
3290       .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
3291 
3292   Register ScratchReg2 = XDst;
3293   if (Idx >= HalfSize)
3294     ScratchReg2 = MRI.createVirtualRegister(RC);
3295 
3296   BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
3297       .addImm(0)
3298       .addReg(ScratchSubReg2)
3299       .addImm(LoongArch::sub_128);
3300 
3301   if (Idx >= HalfSize)
3302     BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
3303         .addReg(XSrc)
3304         .addReg(ScratchReg2)
3305         .addImm(2);
3306 
3307   MI.eraseFromParent();
3308   return BB;
3309 }
3310 
3311 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
3312     MachineInstr &MI, MachineBasicBlock *BB) const {
3313   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3314   DebugLoc DL = MI.getDebugLoc();
3315 
3316   switch (MI.getOpcode()) {
3317   default:
3318     llvm_unreachable("Unexpected instr type to insert");
3319   case LoongArch::DIV_W:
3320   case LoongArch::DIV_WU:
3321   case LoongArch::MOD_W:
3322   case LoongArch::MOD_WU:
3323   case LoongArch::DIV_D:
3324   case LoongArch::DIV_DU:
3325   case LoongArch::MOD_D:
3326   case LoongArch::MOD_DU:
3327     return insertDivByZeroTrap(MI, BB);
3328     break;
3329   case LoongArch::WRFCSR: {
3330     BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
3331             LoongArch::FCSR0 + MI.getOperand(0).getImm())
3332         .addReg(MI.getOperand(1).getReg());
3333     MI.eraseFromParent();
3334     return BB;
3335   }
3336   case LoongArch::RDFCSR: {
3337     MachineInstr *ReadFCSR =
3338         BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
3339                 MI.getOperand(0).getReg())
3340             .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
3341     ReadFCSR->getOperand(1).setIsUndef();
3342     MI.eraseFromParent();
3343     return BB;
3344   }
3345   case LoongArch::PseudoVBZ:
3346   case LoongArch::PseudoVBZ_B:
3347   case LoongArch::PseudoVBZ_H:
3348   case LoongArch::PseudoVBZ_W:
3349   case LoongArch::PseudoVBZ_D:
3350   case LoongArch::PseudoVBNZ:
3351   case LoongArch::PseudoVBNZ_B:
3352   case LoongArch::PseudoVBNZ_H:
3353   case LoongArch::PseudoVBNZ_W:
3354   case LoongArch::PseudoVBNZ_D:
3355   case LoongArch::PseudoXVBZ:
3356   case LoongArch::PseudoXVBZ_B:
3357   case LoongArch::PseudoXVBZ_H:
3358   case LoongArch::PseudoXVBZ_W:
3359   case LoongArch::PseudoXVBZ_D:
3360   case LoongArch::PseudoXVBNZ:
3361   case LoongArch::PseudoXVBNZ_B:
3362   case LoongArch::PseudoXVBNZ_H:
3363   case LoongArch::PseudoXVBNZ_W:
3364   case LoongArch::PseudoXVBNZ_D:
3365     return emitVecCondBranchPseudo(MI, BB, Subtarget);
3366   case LoongArch::PseudoXVINSGR2VR_B:
3367   case LoongArch::PseudoXVINSGR2VR_H:
3368     return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
3369   }
3370 }
3371 
3372 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
3373     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3374     unsigned *Fast) const {
3375   if (!Subtarget.hasUAL())
3376     return false;
3377 
3378   // TODO: set reasonable speed number.
3379   if (Fast)
3380     *Fast = 1;
3381   return true;
3382 }
3383 
3384 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
3385   switch ((LoongArchISD::NodeType)Opcode) {
3386   case LoongArchISD::FIRST_NUMBER:
3387     break;
3388 
3389 #define NODE_NAME_CASE(node)                                                   \
3390   case LoongArchISD::node:                                                     \
3391     return "LoongArchISD::" #node;
3392 
3393     // TODO: Add more target-dependent nodes later.
3394     NODE_NAME_CASE(CALL)
3395     NODE_NAME_CASE(CALL_MEDIUM)
3396     NODE_NAME_CASE(CALL_LARGE)
3397     NODE_NAME_CASE(RET)
3398     NODE_NAME_CASE(TAIL)
3399     NODE_NAME_CASE(TAIL_MEDIUM)
3400     NODE_NAME_CASE(TAIL_LARGE)
3401     NODE_NAME_CASE(SLL_W)
3402     NODE_NAME_CASE(SRA_W)
3403     NODE_NAME_CASE(SRL_W)
3404     NODE_NAME_CASE(BSTRINS)
3405     NODE_NAME_CASE(BSTRPICK)
3406     NODE_NAME_CASE(MOVGR2FR_W_LA64)
3407     NODE_NAME_CASE(MOVFR2GR_S_LA64)
3408     NODE_NAME_CASE(FTINT)
3409     NODE_NAME_CASE(REVB_2H)
3410     NODE_NAME_CASE(REVB_2W)
3411     NODE_NAME_CASE(BITREV_4B)
3412     NODE_NAME_CASE(BITREV_W)
3413     NODE_NAME_CASE(ROTR_W)
3414     NODE_NAME_CASE(ROTL_W)
3415     NODE_NAME_CASE(CLZ_W)
3416     NODE_NAME_CASE(CTZ_W)
3417     NODE_NAME_CASE(DBAR)
3418     NODE_NAME_CASE(IBAR)
3419     NODE_NAME_CASE(BREAK)
3420     NODE_NAME_CASE(SYSCALL)
3421     NODE_NAME_CASE(CRC_W_B_W)
3422     NODE_NAME_CASE(CRC_W_H_W)
3423     NODE_NAME_CASE(CRC_W_W_W)
3424     NODE_NAME_CASE(CRC_W_D_W)
3425     NODE_NAME_CASE(CRCC_W_B_W)
3426     NODE_NAME_CASE(CRCC_W_H_W)
3427     NODE_NAME_CASE(CRCC_W_W_W)
3428     NODE_NAME_CASE(CRCC_W_D_W)
3429     NODE_NAME_CASE(CSRRD)
3430     NODE_NAME_CASE(CSRWR)
3431     NODE_NAME_CASE(CSRXCHG)
3432     NODE_NAME_CASE(IOCSRRD_B)
3433     NODE_NAME_CASE(IOCSRRD_H)
3434     NODE_NAME_CASE(IOCSRRD_W)
3435     NODE_NAME_CASE(IOCSRRD_D)
3436     NODE_NAME_CASE(IOCSRWR_B)
3437     NODE_NAME_CASE(IOCSRWR_H)
3438     NODE_NAME_CASE(IOCSRWR_W)
3439     NODE_NAME_CASE(IOCSRWR_D)
3440     NODE_NAME_CASE(CPUCFG)
3441     NODE_NAME_CASE(MOVGR2FCSR)
3442     NODE_NAME_CASE(MOVFCSR2GR)
3443     NODE_NAME_CASE(CACOP_D)
3444     NODE_NAME_CASE(CACOP_W)
3445     NODE_NAME_CASE(VPICK_SEXT_ELT)
3446     NODE_NAME_CASE(VPICK_ZEXT_ELT)
3447     NODE_NAME_CASE(VREPLVE)
3448     NODE_NAME_CASE(VALL_ZERO)
3449     NODE_NAME_CASE(VANY_ZERO)
3450     NODE_NAME_CASE(VALL_NONZERO)
3451     NODE_NAME_CASE(VANY_NONZERO)
3452   }
3453 #undef NODE_NAME_CASE
3454   return nullptr;
3455 }
3456 
3457 //===----------------------------------------------------------------------===//
3458 //                     Calling Convention Implementation
3459 //===----------------------------------------------------------------------===//
3460 
3461 // Eight general-purpose registers a0-a7 used for passing integer arguments,
3462 // with a0-a1 reused to return values. Generally, the GPRs are used to pass
3463 // fixed-point arguments, and floating-point arguments when no FPR is available
3464 // or with soft float ABI.
3465 const MCPhysReg ArgGPRs[] = {LoongArch::R4,  LoongArch::R5, LoongArch::R6,
3466                              LoongArch::R7,  LoongArch::R8, LoongArch::R9,
3467                              LoongArch::R10, LoongArch::R11};
3468 // Eight floating-point registers fa0-fa7 used for passing floating-point
3469 // arguments, and fa0-fa1 are also used to return values.
3470 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
3471                                LoongArch::F3, LoongArch::F4, LoongArch::F5,
3472                                LoongArch::F6, LoongArch::F7};
3473 // FPR32 and FPR64 alias each other.
3474 const MCPhysReg ArgFPR64s[] = {
3475     LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
3476     LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
3477 
3478 const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
3479                             LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
3480                             LoongArch::VR6, LoongArch::VR7};
3481 
3482 const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
3483                             LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
3484                             LoongArch::XR6, LoongArch::XR7};
3485 
3486 // Pass a 2*GRLen argument that has been split into two GRLen values through
3487 // registers or the stack as necessary.
3488 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
3489                                      CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
3490                                      unsigned ValNo2, MVT ValVT2, MVT LocVT2,
3491                                      ISD::ArgFlagsTy ArgFlags2) {
3492   unsigned GRLenInBytes = GRLen / 8;
3493   if (Register Reg = State.AllocateReg(ArgGPRs)) {
3494     // At least one half can be passed via register.
3495     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
3496                                      VA1.getLocVT(), CCValAssign::Full));
3497   } else {
3498     // Both halves must be passed on the stack, with proper alignment.
3499     Align StackAlign =
3500         std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
3501     State.addLoc(
3502         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
3503                             State.AllocateStack(GRLenInBytes, StackAlign),
3504                             VA1.getLocVT(), CCValAssign::Full));
3505     State.addLoc(CCValAssign::getMem(
3506         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3507         LocVT2, CCValAssign::Full));
3508     return false;
3509   }
3510   if (Register Reg = State.AllocateReg(ArgGPRs)) {
3511     // The second half can also be passed via register.
3512     State.addLoc(
3513         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
3514   } else {
3515     // The second half is passed via the stack, without additional alignment.
3516     State.addLoc(CCValAssign::getMem(
3517         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3518         LocVT2, CCValAssign::Full));
3519   }
3520   return false;
3521 }
3522 
3523 // Implements the LoongArch calling convention. Returns true upon failure.
3524 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
3525                          unsigned ValNo, MVT ValVT,
3526                          CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
3527                          CCState &State, bool IsFixed, bool IsRet,
3528                          Type *OrigTy) {
3529   unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
3530   assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
3531   MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
3532   MVT LocVT = ValVT;
3533 
3534   // Any return value split into more than two values can't be returned
3535   // directly.
3536   if (IsRet && ValNo > 1)
3537     return true;
3538 
3539   // If passing a variadic argument, or if no FPR is available.
3540   bool UseGPRForFloat = true;
3541 
3542   switch (ABI) {
3543   default:
3544     llvm_unreachable("Unexpected ABI");
3545   case LoongArchABI::ABI_ILP32S:
3546   case LoongArchABI::ABI_ILP32F:
3547   case LoongArchABI::ABI_LP64F:
3548     report_fatal_error("Unimplemented ABI");
3549     break;
3550   case LoongArchABI::ABI_ILP32D:
3551   case LoongArchABI::ABI_LP64D:
3552     UseGPRForFloat = !IsFixed;
3553     break;
3554   case LoongArchABI::ABI_LP64S:
3555     break;
3556   }
3557 
3558   // FPR32 and FPR64 alias each other.
3559   if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
3560     UseGPRForFloat = true;
3561 
3562   if (UseGPRForFloat && ValVT == MVT::f32) {
3563     LocVT = GRLenVT;
3564     LocInfo = CCValAssign::BCvt;
3565   } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
3566     LocVT = MVT::i64;
3567     LocInfo = CCValAssign::BCvt;
3568   } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
3569     // TODO: Handle passing f64 on LA32 with D feature.
3570     report_fatal_error("Passing f64 with GPR on LA32 is undefined");
3571   }
3572 
3573   // If this is a variadic argument, the LoongArch calling convention requires
3574   // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
3575   // byte alignment. An aligned register should be used regardless of whether
3576   // the original argument was split during legalisation or not. The argument
3577   // will not be passed by registers if the original type is larger than
3578   // 2*GRLen, so the register alignment rule does not apply.
3579   unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
3580   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
3581       DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
3582     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3583     // Skip 'odd' register if necessary.
3584     if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
3585       State.AllocateReg(ArgGPRs);
3586   }
3587 
3588   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3589   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3590       State.getPendingArgFlags();
3591 
3592   assert(PendingLocs.size() == PendingArgFlags.size() &&
3593          "PendingLocs and PendingArgFlags out of sync");
3594 
3595   // Split arguments might be passed indirectly, so keep track of the pending
3596   // values.
3597   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
3598     LocVT = GRLenVT;
3599     LocInfo = CCValAssign::Indirect;
3600     PendingLocs.push_back(
3601         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3602     PendingArgFlags.push_back(ArgFlags);
3603     if (!ArgFlags.isSplitEnd()) {
3604       return false;
3605     }
3606   }
3607 
3608   // If the split argument only had two elements, it should be passed directly
3609   // in registers or on the stack.
3610   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
3611       PendingLocs.size() <= 2) {
3612     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3613     // Apply the normal calling convention rules to the first half of the
3614     // split argument.
3615     CCValAssign VA = PendingLocs[0];
3616     ISD::ArgFlagsTy AF = PendingArgFlags[0];
3617     PendingLocs.clear();
3618     PendingArgFlags.clear();
3619     return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
3620                                     ArgFlags);
3621   }
3622 
3623   // Allocate to a register if possible, or else a stack slot.
3624   Register Reg;
3625   unsigned StoreSizeBytes = GRLen / 8;
3626   Align StackAlign = Align(GRLen / 8);
3627 
3628   if (ValVT == MVT::f32 && !UseGPRForFloat)
3629     Reg = State.AllocateReg(ArgFPR32s);
3630   else if (ValVT == MVT::f64 && !UseGPRForFloat)
3631     Reg = State.AllocateReg(ArgFPR64s);
3632   else if (ValVT.is128BitVector())
3633     Reg = State.AllocateReg(ArgVRs);
3634   else if (ValVT.is256BitVector())
3635     Reg = State.AllocateReg(ArgXRs);
3636   else
3637     Reg = State.AllocateReg(ArgGPRs);
3638 
3639   unsigned StackOffset =
3640       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
3641 
3642   // If we reach this point and PendingLocs is non-empty, we must be at the
3643   // end of a split argument that must be passed indirectly.
3644   if (!PendingLocs.empty()) {
3645     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3646     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3647     for (auto &It : PendingLocs) {
3648       if (Reg)
3649         It.convertToReg(Reg);
3650       else
3651         It.convertToMem(StackOffset);
3652       State.addLoc(It);
3653     }
3654     PendingLocs.clear();
3655     PendingArgFlags.clear();
3656     return false;
3657   }
3658   assert((!UseGPRForFloat || LocVT == GRLenVT) &&
3659          "Expected an GRLenVT at this stage");
3660 
3661   if (Reg) {
3662     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3663     return false;
3664   }
3665 
3666   // When a floating-point value is passed on the stack, no bit-cast is needed.
3667   if (ValVT.isFloatingPoint()) {
3668     LocVT = ValVT;
3669     LocInfo = CCValAssign::Full;
3670   }
3671 
3672   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3673   return false;
3674 }
3675 
3676 void LoongArchTargetLowering::analyzeInputArgs(
3677     MachineFunction &MF, CCState &CCInfo,
3678     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
3679     LoongArchCCAssignFn Fn) const {
3680   FunctionType *FType = MF.getFunction().getFunctionType();
3681   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3682     MVT ArgVT = Ins[i].VT;
3683     Type *ArgTy = nullptr;
3684     if (IsRet)
3685       ArgTy = FType->getReturnType();
3686     else if (Ins[i].isOrigArg())
3687       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3688     LoongArchABI::ABI ABI =
3689         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
3690     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
3691            CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
3692       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
3693                         << '\n');
3694       llvm_unreachable("");
3695     }
3696   }
3697 }
3698 
3699 void LoongArchTargetLowering::analyzeOutputArgs(
3700     MachineFunction &MF, CCState &CCInfo,
3701     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3702     CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
3703   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3704     MVT ArgVT = Outs[i].VT;
3705     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3706     LoongArchABI::ABI ABI =
3707         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
3708     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
3709            CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
3710       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
3711                         << "\n");
3712       llvm_unreachable("");
3713     }
3714   }
3715 }
3716 
3717 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3718 // values.
3719 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
3720                                    const CCValAssign &VA, const SDLoc &DL) {
3721   switch (VA.getLocInfo()) {
3722   default:
3723     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3724   case CCValAssign::Full:
3725   case CCValAssign::Indirect:
3726     break;
3727   case CCValAssign::BCvt:
3728     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3729       Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
3730     else
3731       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3732     break;
3733   }
3734   return Val;
3735 }
3736 
3737 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
3738                                 const CCValAssign &VA, const SDLoc &DL,
3739                                 const LoongArchTargetLowering &TLI) {
3740   MachineFunction &MF = DAG.getMachineFunction();
3741   MachineRegisterInfo &RegInfo = MF.getRegInfo();
3742   EVT LocVT = VA.getLocVT();
3743   SDValue Val;
3744   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
3745   Register VReg = RegInfo.createVirtualRegister(RC);
3746   RegInfo.addLiveIn(VA.getLocReg(), VReg);
3747   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
3748 
3749   return convertLocVTToValVT(DAG, Val, VA, DL);
3750 }
3751 
3752 // The caller is responsible for loading the full value if the argument is
3753 // passed with CCValAssign::Indirect.
3754 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
3755                                 const CCValAssign &VA, const SDLoc &DL) {
3756   MachineFunction &MF = DAG.getMachineFunction();
3757   MachineFrameInfo &MFI = MF.getFrameInfo();
3758   EVT ValVT = VA.getValVT();
3759   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
3760                                  /*IsImmutable=*/true);
3761   SDValue FIN = DAG.getFrameIndex(
3762       FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
3763 
3764   ISD::LoadExtType ExtType;
3765   switch (VA.getLocInfo()) {
3766   default:
3767     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3768   case CCValAssign::Full:
3769   case CCValAssign::Indirect:
3770   case CCValAssign::BCvt:
3771     ExtType = ISD::NON_EXTLOAD;
3772     break;
3773   }
3774   return DAG.getExtLoad(
3775       ExtType, DL, VA.getLocVT(), Chain, FIN,
3776       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
3777 }
3778 
3779 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
3780                                    const CCValAssign &VA, const SDLoc &DL) {
3781   EVT LocVT = VA.getLocVT();
3782 
3783   switch (VA.getLocInfo()) {
3784   default:
3785     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3786   case CCValAssign::Full:
3787     break;
3788   case CCValAssign::BCvt:
3789     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3790       Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
3791     else
3792       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
3793     break;
3794   }
3795   return Val;
3796 }
3797 
3798 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3799                              CCValAssign::LocInfo LocInfo,
3800                              ISD::ArgFlagsTy ArgFlags, CCState &State) {
3801   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3802     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
3803     //                        s0    s1  s2  s3  s4  s5  s6  s7  s8
3804     static const MCPhysReg GPRList[] = {
3805         LoongArch::R23, LoongArch::R24, LoongArch::R25,
3806         LoongArch::R26, LoongArch::R27, LoongArch::R28,
3807         LoongArch::R29, LoongArch::R30, LoongArch::R31};
3808     if (unsigned Reg = State.AllocateReg(GPRList)) {
3809       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3810       return false;
3811     }
3812   }
3813 
3814   if (LocVT == MVT::f32) {
3815     // Pass in STG registers: F1, F2, F3, F4
3816     //                        fs0,fs1,fs2,fs3
3817     static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
3818                                           LoongArch::F26, LoongArch::F27};
3819     if (unsigned Reg = State.AllocateReg(FPR32List)) {
3820       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3821       return false;
3822     }
3823   }
3824 
3825   if (LocVT == MVT::f64) {
3826     // Pass in STG registers: D1, D2, D3, D4
3827     //                        fs4,fs5,fs6,fs7
3828     static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
3829                                           LoongArch::F30_64, LoongArch::F31_64};
3830     if (unsigned Reg = State.AllocateReg(FPR64List)) {
3831       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3832       return false;
3833     }
3834   }
3835 
3836   report_fatal_error("No registers left in GHC calling convention");
3837   return true;
3838 }
3839 
3840 // Transform physical registers into virtual registers.
3841 SDValue LoongArchTargetLowering::LowerFormalArguments(
3842     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3843     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3844     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3845 
3846   MachineFunction &MF = DAG.getMachineFunction();
3847 
3848   switch (CallConv) {
3849   default:
3850     llvm_unreachable("Unsupported calling convention");
3851   case CallingConv::C:
3852   case CallingConv::Fast:
3853     break;
3854   case CallingConv::GHC:
3855     if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
3856         !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
3857       report_fatal_error(
3858           "GHC calling convention requires the F and D extensions");
3859   }
3860 
3861   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3862   MVT GRLenVT = Subtarget.getGRLenVT();
3863   unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
3864   // Used with varargs to acumulate store chains.
3865   std::vector<SDValue> OutChains;
3866 
3867   // Assign locations to all of the incoming arguments.
3868   SmallVector<CCValAssign> ArgLocs;
3869   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3870 
3871   if (CallConv == CallingConv::GHC)
3872     CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
3873   else
3874     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
3875 
3876   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3877     CCValAssign &VA = ArgLocs[i];
3878     SDValue ArgValue;
3879     if (VA.isRegLoc())
3880       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
3881     else
3882       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3883     if (VA.getLocInfo() == CCValAssign::Indirect) {
3884       // If the original argument was split and passed by reference, we need to
3885       // load all parts of it here (using the same address).
3886       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
3887                                    MachinePointerInfo()));
3888       unsigned ArgIndex = Ins[i].OrigArgIndex;
3889       unsigned ArgPartOffset = Ins[i].PartOffset;
3890       assert(ArgPartOffset == 0);
3891       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3892         CCValAssign &PartVA = ArgLocs[i + 1];
3893         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
3894         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
3895         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
3896         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
3897                                      MachinePointerInfo()));
3898         ++i;
3899       }
3900       continue;
3901     }
3902     InVals.push_back(ArgValue);
3903   }
3904 
3905   if (IsVarArg) {
3906     ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
3907     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
3908     const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
3909     MachineFrameInfo &MFI = MF.getFrameInfo();
3910     MachineRegisterInfo &RegInfo = MF.getRegInfo();
3911     auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
3912 
3913     // Offset of the first variable argument from stack pointer, and size of
3914     // the vararg save area. For now, the varargs save area is either zero or
3915     // large enough to hold a0-a7.
3916     int VaArgOffset, VarArgsSaveSize;
3917 
3918     // If all registers are allocated, then all varargs must be passed on the
3919     // stack and we don't need to save any argregs.
3920     if (ArgRegs.size() == Idx) {
3921       VaArgOffset = CCInfo.getStackSize();
3922       VarArgsSaveSize = 0;
3923     } else {
3924       VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
3925       VaArgOffset = -VarArgsSaveSize;
3926     }
3927 
3928     // Record the frame index of the first variable argument
3929     // which is a value necessary to VASTART.
3930     int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3931     LoongArchFI->setVarArgsFrameIndex(FI);
3932 
3933     // If saving an odd number of registers then create an extra stack slot to
3934     // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
3935     // offsets to even-numbered registered remain 2*GRLen-aligned.
3936     if (Idx % 2) {
3937       MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
3938                             true);
3939       VarArgsSaveSize += GRLenInBytes;
3940     }
3941 
3942     // Copy the integer registers that may have been used for passing varargs
3943     // to the vararg save area.
3944     for (unsigned I = Idx; I < ArgRegs.size();
3945          ++I, VaArgOffset += GRLenInBytes) {
3946       const Register Reg = RegInfo.createVirtualRegister(RC);
3947       RegInfo.addLiveIn(ArgRegs[I], Reg);
3948       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
3949       FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3950       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3951       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
3952                                    MachinePointerInfo::getFixedStack(MF, FI));
3953       cast<StoreSDNode>(Store.getNode())
3954           ->getMemOperand()
3955           ->setValue((Value *)nullptr);
3956       OutChains.push_back(Store);
3957     }
3958     LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
3959   }
3960 
3961   // All stores are grouped in one node to allow the matching between
3962   // the size of Ins and InVals. This only happens for vararg functions.
3963   if (!OutChains.empty()) {
3964     OutChains.push_back(Chain);
3965     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
3966   }
3967 
3968   return Chain;
3969 }
3970 
3971 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3972   return CI->isTailCall();
3973 }
3974 
3975 // Check if the return value is used as only a return value, as otherwise
3976 // we can't perform a tail-call.
3977 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
3978                                                  SDValue &Chain) const {
3979   if (N->getNumValues() != 1)
3980     return false;
3981   if (!N->hasNUsesOfValue(1, 0))
3982     return false;
3983 
3984   SDNode *Copy = *N->use_begin();
3985   if (Copy->getOpcode() != ISD::CopyToReg)
3986     return false;
3987 
3988   // If the ISD::CopyToReg has a glue operand, we conservatively assume it
3989   // isn't safe to perform a tail call.
3990   if (Copy->getGluedNode())
3991     return false;
3992 
3993   // The copy must be used by a LoongArchISD::RET, and nothing else.
3994   bool HasRet = false;
3995   for (SDNode *Node : Copy->uses()) {
3996     if (Node->getOpcode() != LoongArchISD::RET)
3997       return false;
3998     HasRet = true;
3999   }
4000 
4001   if (!HasRet)
4002     return false;
4003 
4004   Chain = Copy->getOperand(0);
4005   return true;
4006 }
4007 
4008 // Check whether the call is eligible for tail call optimization.
4009 bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
4010     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
4011     const SmallVectorImpl<CCValAssign> &ArgLocs) const {
4012 
4013   auto CalleeCC = CLI.CallConv;
4014   auto &Outs = CLI.Outs;
4015   auto &Caller = MF.getFunction();
4016   auto CallerCC = Caller.getCallingConv();
4017 
4018   // Do not tail call opt if the stack is used to pass parameters.
4019   if (CCInfo.getStackSize() != 0)
4020     return false;
4021 
4022   // Do not tail call opt if any parameters need to be passed indirectly.
4023   for (auto &VA : ArgLocs)
4024     if (VA.getLocInfo() == CCValAssign::Indirect)
4025       return false;
4026 
4027   // Do not tail call opt if either caller or callee uses struct return
4028   // semantics.
4029   auto IsCallerStructRet = Caller.hasStructRetAttr();
4030   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
4031   if (IsCallerStructRet || IsCalleeStructRet)
4032     return false;
4033 
4034   // Do not tail call opt if either the callee or caller has a byval argument.
4035   for (auto &Arg : Outs)
4036     if (Arg.Flags.isByVal())
4037       return false;
4038 
4039   // The callee has to preserve all registers the caller needs to preserve.
4040   const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
4041   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4042   if (CalleeCC != CallerCC) {
4043     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4044     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4045       return false;
4046   }
4047   return true;
4048 }
4049 
4050 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
4051   return DAG.getDataLayout().getPrefTypeAlign(
4052       VT.getTypeForEVT(*DAG.getContext()));
4053 }
4054 
4055 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
4056 // and output parameter nodes.
4057 SDValue
4058 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
4059                                    SmallVectorImpl<SDValue> &InVals) const {
4060   SelectionDAG &DAG = CLI.DAG;
4061   SDLoc &DL = CLI.DL;
4062   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4063   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4064   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4065   SDValue Chain = CLI.Chain;
4066   SDValue Callee = CLI.Callee;
4067   CallingConv::ID CallConv = CLI.CallConv;
4068   bool IsVarArg = CLI.IsVarArg;
4069   EVT PtrVT = getPointerTy(DAG.getDataLayout());
4070   MVT GRLenVT = Subtarget.getGRLenVT();
4071   bool &IsTailCall = CLI.IsTailCall;
4072 
4073   MachineFunction &MF = DAG.getMachineFunction();
4074 
4075   // Analyze the operands of the call, assigning locations to each operand.
4076   SmallVector<CCValAssign> ArgLocs;
4077   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4078 
4079   if (CallConv == CallingConv::GHC)
4080     ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
4081   else
4082     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
4083 
4084   // Check if it's really possible to do a tail call.
4085   if (IsTailCall)
4086     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
4087 
4088   if (IsTailCall)
4089     ++NumTailCalls;
4090   else if (CLI.CB && CLI.CB->isMustTailCall())
4091     report_fatal_error("failed to perform tail call elimination on a call "
4092                        "site marked musttail");
4093 
4094   // Get a count of how many bytes are to be pushed on the stack.
4095   unsigned NumBytes = ArgCCInfo.getStackSize();
4096 
4097   // Create local copies for byval args.
4098   SmallVector<SDValue> ByValArgs;
4099   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4100     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4101     if (!Flags.isByVal())
4102       continue;
4103 
4104     SDValue Arg = OutVals[i];
4105     unsigned Size = Flags.getByValSize();
4106     Align Alignment = Flags.getNonZeroByValAlign();
4107 
4108     int FI =
4109         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
4110     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4111     SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
4112 
4113     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
4114                           /*IsVolatile=*/false,
4115                           /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
4116                           MachinePointerInfo(), MachinePointerInfo());
4117     ByValArgs.push_back(FIPtr);
4118   }
4119 
4120   if (!IsTailCall)
4121     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
4122 
4123   // Copy argument values to their designated locations.
4124   SmallVector<std::pair<Register, SDValue>> RegsToPass;
4125   SmallVector<SDValue> MemOpChains;
4126   SDValue StackPtr;
4127   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
4128     CCValAssign &VA = ArgLocs[i];
4129     SDValue ArgValue = OutVals[i];
4130     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4131 
4132     // Promote the value if needed.
4133     // For now, only handle fully promoted and indirect arguments.
4134     if (VA.getLocInfo() == CCValAssign::Indirect) {
4135       // Store the argument in a stack slot and pass its address.
4136       Align StackAlign =
4137           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
4138                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
4139       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
4140       // If the original argument was split and passed by reference, we need to
4141       // store the required parts of it here (and pass just one address).
4142       unsigned ArgIndex = Outs[i].OrigArgIndex;
4143       unsigned ArgPartOffset = Outs[i].PartOffset;
4144       assert(ArgPartOffset == 0);
4145       // Calculate the total size to store. We don't have access to what we're
4146       // actually storing other than performing the loop and collecting the
4147       // info.
4148       SmallVector<std::pair<SDValue, SDValue>> Parts;
4149       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
4150         SDValue PartValue = OutVals[i + 1];
4151         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
4152         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
4153         EVT PartVT = PartValue.getValueType();
4154 
4155         StoredSize += PartVT.getStoreSize();
4156         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
4157         Parts.push_back(std::make_pair(PartValue, Offset));
4158         ++i;
4159       }
4160       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
4161       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4162       MemOpChains.push_back(
4163           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
4164                        MachinePointerInfo::getFixedStack(MF, FI)));
4165       for (const auto &Part : Parts) {
4166         SDValue PartValue = Part.first;
4167         SDValue PartOffset = Part.second;
4168         SDValue Address =
4169             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
4170         MemOpChains.push_back(
4171             DAG.getStore(Chain, DL, PartValue, Address,
4172                          MachinePointerInfo::getFixedStack(MF, FI)));
4173       }
4174       ArgValue = SpillSlot;
4175     } else {
4176       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
4177     }
4178 
4179     // Use local copy if it is a byval arg.
4180     if (Flags.isByVal())
4181       ArgValue = ByValArgs[j++];
4182 
4183     if (VA.isRegLoc()) {
4184       // Queue up the argument copies and emit them at the end.
4185       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
4186     } else {
4187       assert(VA.isMemLoc() && "Argument not register or memory");
4188       assert(!IsTailCall && "Tail call not allowed if stack is used "
4189                             "for passing parameters");
4190 
4191       // Work out the address of the stack slot.
4192       if (!StackPtr.getNode())
4193         StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
4194       SDValue Address =
4195           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
4196                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
4197 
4198       // Emit the store.
4199       MemOpChains.push_back(
4200           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
4201     }
4202   }
4203 
4204   // Join the stores, which are independent of one another.
4205   if (!MemOpChains.empty())
4206     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4207 
4208   SDValue Glue;
4209 
4210   // Build a sequence of copy-to-reg nodes, chained and glued together.
4211   for (auto &Reg : RegsToPass) {
4212     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
4213     Glue = Chain.getValue(1);
4214   }
4215 
4216   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
4217   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
4218   // split it and then direct call can be matched by PseudoCALL.
4219   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
4220     const GlobalValue *GV = S->getGlobal();
4221     unsigned OpFlags =
4222         getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)
4223             ? LoongArchII::MO_CALL
4224             : LoongArchII::MO_CALL_PLT;
4225     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
4226   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4227     unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(
4228                            *MF.getFunction().getParent(), nullptr)
4229                            ? LoongArchII::MO_CALL
4230                            : LoongArchII::MO_CALL_PLT;
4231     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
4232   }
4233 
4234   // The first call operand is the chain and the second is the target address.
4235   SmallVector<SDValue> Ops;
4236   Ops.push_back(Chain);
4237   Ops.push_back(Callee);
4238 
4239   // Add argument registers to the end of the list so that they are
4240   // known live into the call.
4241   for (auto &Reg : RegsToPass)
4242     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
4243 
4244   if (!IsTailCall) {
4245     // Add a register mask operand representing the call-preserved registers.
4246     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4247     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
4248     assert(Mask && "Missing call preserved mask for calling convention");
4249     Ops.push_back(DAG.getRegisterMask(Mask));
4250   }
4251 
4252   // Glue the call to the argument copies, if any.
4253   if (Glue.getNode())
4254     Ops.push_back(Glue);
4255 
4256   // Emit the call.
4257   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4258   unsigned Op;
4259   switch (DAG.getTarget().getCodeModel()) {
4260   default:
4261     report_fatal_error("Unsupported code model");
4262   case CodeModel::Small:
4263     Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
4264     break;
4265   case CodeModel::Medium:
4266     assert(Subtarget.is64Bit() && "Medium code model requires LA64");
4267     Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
4268     break;
4269   case CodeModel::Large:
4270     assert(Subtarget.is64Bit() && "Large code model requires LA64");
4271     Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
4272     break;
4273   }
4274 
4275   if (IsTailCall) {
4276     MF.getFrameInfo().setHasTailCall();
4277     SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
4278     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
4279     return Ret;
4280   }
4281 
4282   Chain = DAG.getNode(Op, DL, NodeTys, Ops);
4283   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4284   Glue = Chain.getValue(1);
4285 
4286   // Mark the end of the call, which is glued to the call itself.
4287   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
4288   Glue = Chain.getValue(1);
4289 
4290   // Assign locations to each value returned by this call.
4291   SmallVector<CCValAssign> RVLocs;
4292   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
4293   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
4294 
4295   // Copy all of the result registers out of their specified physreg.
4296   for (auto &VA : RVLocs) {
4297     // Copy the value out.
4298     SDValue RetValue =
4299         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
4300     // Glue the RetValue to the end of the call sequence.
4301     Chain = RetValue.getValue(1);
4302     Glue = RetValue.getValue(2);
4303 
4304     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
4305 
4306     InVals.push_back(RetValue);
4307   }
4308 
4309   return Chain;
4310 }
4311 
4312 bool LoongArchTargetLowering::CanLowerReturn(
4313     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
4314     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4315   SmallVector<CCValAssign> RVLocs;
4316   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
4317 
4318   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4319     LoongArchABI::ABI ABI =
4320         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4321     if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
4322                      Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
4323                      nullptr))
4324       return false;
4325   }
4326   return true;
4327 }
4328 
4329 SDValue LoongArchTargetLowering::LowerReturn(
4330     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4331     const SmallVectorImpl<ISD::OutputArg> &Outs,
4332     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
4333     SelectionDAG &DAG) const {
4334   // Stores the assignment of the return value to a location.
4335   SmallVector<CCValAssign> RVLocs;
4336 
4337   // Info about the registers and stack slot.
4338   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
4339                  *DAG.getContext());
4340 
4341   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
4342                     nullptr, CC_LoongArch);
4343   if (CallConv == CallingConv::GHC && !RVLocs.empty())
4344     report_fatal_error("GHC functions return void only");
4345   SDValue Glue;
4346   SmallVector<SDValue, 4> RetOps(1, Chain);
4347 
4348   // Copy the result values into the output registers.
4349   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
4350     CCValAssign &VA = RVLocs[i];
4351     assert(VA.isRegLoc() && "Can only return in registers!");
4352 
4353     // Handle a 'normal' return.
4354     SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
4355     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
4356 
4357     // Guarantee that all emitted copies are stuck together.
4358     Glue = Chain.getValue(1);
4359     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4360   }
4361 
4362   RetOps[0] = Chain; // Update chain.
4363 
4364   // Add the glue node if we have it.
4365   if (Glue.getNode())
4366     RetOps.push_back(Glue);
4367 
4368   return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
4369 }
4370 
4371 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
4372                                            bool ForCodeSize) const {
4373   // TODO: Maybe need more checks here after vector extension is supported.
4374   if (VT == MVT::f32 && !Subtarget.hasBasicF())
4375     return false;
4376   if (VT == MVT::f64 && !Subtarget.hasBasicD())
4377     return false;
4378   return (Imm.isZero() || Imm.isExactlyValue(+1.0));
4379 }
4380 
4381 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
4382   return true;
4383 }
4384 
4385 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
4386   return true;
4387 }
4388 
4389 bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
4390     const Instruction *I) const {
4391   if (!Subtarget.is64Bit())
4392     return isa<LoadInst>(I) || isa<StoreInst>(I);
4393 
4394   if (isa<LoadInst>(I))
4395     return true;
4396 
4397   // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
4398   // require fences beacuse we can use amswap_db.[w/d].
4399   if (isa<StoreInst>(I)) {
4400     unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
4401     return (Size == 8 || Size == 16);
4402   }
4403 
4404   return false;
4405 }
4406 
4407 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
4408                                                 LLVMContext &Context,
4409                                                 EVT VT) const {
4410   if (!VT.isVector())
4411     return getPointerTy(DL);
4412   return VT.changeVectorElementTypeToInteger();
4413 }
4414 
4415 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
4416   // TODO: Support vectors.
4417   return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
4418 }
4419 
4420 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4421                                                  const CallInst &I,
4422                                                  MachineFunction &MF,
4423                                                  unsigned Intrinsic) const {
4424   switch (Intrinsic) {
4425   default:
4426     return false;
4427   case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
4428   case Intrinsic::loongarch_masked_atomicrmw_add_i32:
4429   case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
4430   case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
4431     Info.opc = ISD::INTRINSIC_W_CHAIN;
4432     Info.memVT = MVT::i32;
4433     Info.ptrVal = I.getArgOperand(0);
4434     Info.offset = 0;
4435     Info.align = Align(4);
4436     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
4437                  MachineMemOperand::MOVolatile;
4438     return true;
4439     // TODO: Add more Intrinsics later.
4440   }
4441 }
4442 
4443 TargetLowering::AtomicExpansionKind
4444 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
4445   // TODO: Add more AtomicRMWInst that needs to be extended.
4446 
4447   // Since floating-point operation requires a non-trivial set of data
4448   // operations, use CmpXChg to expand.
4449   if (AI->isFloatingPointOperation() ||
4450       AI->getOperation() == AtomicRMWInst::UIncWrap ||
4451       AI->getOperation() == AtomicRMWInst::UDecWrap)
4452     return AtomicExpansionKind::CmpXChg;
4453 
4454   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
4455   if (Size == 8 || Size == 16)
4456     return AtomicExpansionKind::MaskedIntrinsic;
4457   return AtomicExpansionKind::None;
4458 }
4459 
4460 static Intrinsic::ID
4461 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
4462                                     AtomicRMWInst::BinOp BinOp) {
4463   if (GRLen == 64) {
4464     switch (BinOp) {
4465     default:
4466       llvm_unreachable("Unexpected AtomicRMW BinOp");
4467     case AtomicRMWInst::Xchg:
4468       return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
4469     case AtomicRMWInst::Add:
4470       return Intrinsic::loongarch_masked_atomicrmw_add_i64;
4471     case AtomicRMWInst::Sub:
4472       return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
4473     case AtomicRMWInst::Nand:
4474       return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
4475     case AtomicRMWInst::UMax:
4476       return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
4477     case AtomicRMWInst::UMin:
4478       return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
4479     case AtomicRMWInst::Max:
4480       return Intrinsic::loongarch_masked_atomicrmw_max_i64;
4481     case AtomicRMWInst::Min:
4482       return Intrinsic::loongarch_masked_atomicrmw_min_i64;
4483       // TODO: support other AtomicRMWInst.
4484     }
4485   }
4486 
4487   if (GRLen == 32) {
4488     switch (BinOp) {
4489     default:
4490       llvm_unreachable("Unexpected AtomicRMW BinOp");
4491     case AtomicRMWInst::Xchg:
4492       return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
4493     case AtomicRMWInst::Add:
4494       return Intrinsic::loongarch_masked_atomicrmw_add_i32;
4495     case AtomicRMWInst::Sub:
4496       return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
4497     case AtomicRMWInst::Nand:
4498       return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
4499       // TODO: support other AtomicRMWInst.
4500     }
4501   }
4502 
4503   llvm_unreachable("Unexpected GRLen\n");
4504 }
4505 
4506 TargetLowering::AtomicExpansionKind
4507 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
4508     AtomicCmpXchgInst *CI) const {
4509   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
4510   if (Size == 8 || Size == 16)
4511     return AtomicExpansionKind::MaskedIntrinsic;
4512   return AtomicExpansionKind::None;
4513 }
4514 
4515 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
4516     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
4517     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4518   AtomicOrdering FailOrd = CI->getFailureOrdering();
4519   Value *FailureOrdering =
4520       Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
4521 
4522   // TODO: Support cmpxchg on LA32.
4523   Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
4524   CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
4525   NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
4526   Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4527   Type *Tys[] = {AlignedAddr->getType()};
4528   Function *MaskedCmpXchg =
4529       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
4530   Value *Result = Builder.CreateCall(
4531       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
4532   Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4533   return Result;
4534 }
4535 
4536 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
4537     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
4538     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
4539   // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
4540   // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
4541   // mask, as this produces better code than the LL/SC loop emitted by
4542   // int_loongarch_masked_atomicrmw_xchg.
4543   if (AI->getOperation() == AtomicRMWInst::Xchg &&
4544       isa<ConstantInt>(AI->getValOperand())) {
4545     ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
4546     if (CVal->isZero())
4547       return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
4548                                      Builder.CreateNot(Mask, "Inv_Mask"),
4549                                      AI->getAlign(), Ord);
4550     if (CVal->isMinusOne())
4551       return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
4552                                      AI->getAlign(), Ord);
4553   }
4554 
4555   unsigned GRLen = Subtarget.getGRLen();
4556   Value *Ordering =
4557       Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
4558   Type *Tys[] = {AlignedAddr->getType()};
4559   Function *LlwOpScwLoop = Intrinsic::getDeclaration(
4560       AI->getModule(),
4561       getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
4562 
4563   if (GRLen == 64) {
4564     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
4565     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4566     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
4567   }
4568 
4569   Value *Result;
4570 
4571   // Must pass the shift amount needed to sign extend the loaded value prior
4572   // to performing a signed comparison for min/max. ShiftAmt is the number of
4573   // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
4574   // is the number of bits to left+right shift the value in order to
4575   // sign-extend.
4576   if (AI->getOperation() == AtomicRMWInst::Min ||
4577       AI->getOperation() == AtomicRMWInst::Max) {
4578     const DataLayout &DL = AI->getModule()->getDataLayout();
4579     unsigned ValWidth =
4580         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
4581     Value *SextShamt =
4582         Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
4583     Result = Builder.CreateCall(LlwOpScwLoop,
4584                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
4585   } else {
4586     Result =
4587         Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
4588   }
4589 
4590   if (GRLen == 64)
4591     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4592   return Result;
4593 }
4594 
4595 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
4596     const MachineFunction &MF, EVT VT) const {
4597   VT = VT.getScalarType();
4598 
4599   if (!VT.isSimple())
4600     return false;
4601 
4602   switch (VT.getSimpleVT().SimpleTy) {
4603   case MVT::f32:
4604   case MVT::f64:
4605     return true;
4606   default:
4607     break;
4608   }
4609 
4610   return false;
4611 }
4612 
4613 Register LoongArchTargetLowering::getExceptionPointerRegister(
4614     const Constant *PersonalityFn) const {
4615   return LoongArch::R4;
4616 }
4617 
4618 Register LoongArchTargetLowering::getExceptionSelectorRegister(
4619     const Constant *PersonalityFn) const {
4620   return LoongArch::R5;
4621 }
4622 
4623 //===----------------------------------------------------------------------===//
4624 //                           LoongArch Inline Assembly Support
4625 //===----------------------------------------------------------------------===//
4626 
4627 LoongArchTargetLowering::ConstraintType
4628 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
4629   // LoongArch specific constraints in GCC: config/loongarch/constraints.md
4630   //
4631   // 'f':  A floating-point register (if available).
4632   // 'k':  A memory operand whose address is formed by a base register and
4633   //       (optionally scaled) index register.
4634   // 'l':  A signed 16-bit constant.
4635   // 'm':  A memory operand whose address is formed by a base register and
4636   //       offset that is suitable for use in instructions with the same
4637   //       addressing mode as st.w and ld.w.
4638   // 'I':  A signed 12-bit constant (for arithmetic instructions).
4639   // 'J':  Integer zero.
4640   // 'K':  An unsigned 12-bit constant (for logic instructions).
4641   // "ZB": An address that is held in a general-purpose register. The offset is
4642   //       zero.
4643   // "ZC": A memory operand whose address is formed by a base register and
4644   //       offset that is suitable for use in instructions with the same
4645   //       addressing mode as ll.w and sc.w.
4646   if (Constraint.size() == 1) {
4647     switch (Constraint[0]) {
4648     default:
4649       break;
4650     case 'f':
4651       return C_RegisterClass;
4652     case 'l':
4653     case 'I':
4654     case 'J':
4655     case 'K':
4656       return C_Immediate;
4657     case 'k':
4658       return C_Memory;
4659     }
4660   }
4661 
4662   if (Constraint == "ZC" || Constraint == "ZB")
4663     return C_Memory;
4664 
4665   // 'm' is handled here.
4666   return TargetLowering::getConstraintType(Constraint);
4667 }
4668 
4669 InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
4670     StringRef ConstraintCode) const {
4671   return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
4672       .Case("k", InlineAsm::ConstraintCode::k)
4673       .Case("ZB", InlineAsm::ConstraintCode::ZB)
4674       .Case("ZC", InlineAsm::ConstraintCode::ZC)
4675       .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
4676 }
4677 
4678 std::pair<unsigned, const TargetRegisterClass *>
4679 LoongArchTargetLowering::getRegForInlineAsmConstraint(
4680     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
4681   // First, see if this is a constraint that directly corresponds to a LoongArch
4682   // register class.
4683   if (Constraint.size() == 1) {
4684     switch (Constraint[0]) {
4685     case 'r':
4686       // TODO: Support fixed vectors up to GRLen?
4687       if (VT.isVector())
4688         break;
4689       return std::make_pair(0U, &LoongArch::GPRRegClass);
4690     case 'f':
4691       if (Subtarget.hasBasicF() && VT == MVT::f32)
4692         return std::make_pair(0U, &LoongArch::FPR32RegClass);
4693       if (Subtarget.hasBasicD() && VT == MVT::f64)
4694         return std::make_pair(0U, &LoongArch::FPR64RegClass);
4695       if (Subtarget.hasExtLSX() &&
4696           TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
4697         return std::make_pair(0U, &LoongArch::LSX128RegClass);
4698       if (Subtarget.hasExtLASX() &&
4699           TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
4700         return std::make_pair(0U, &LoongArch::LASX256RegClass);
4701       break;
4702     default:
4703       break;
4704     }
4705   }
4706 
4707   // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
4708   // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
4709   // constraints while the official register name is prefixed with a '$'. So we
4710   // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
4711   // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
4712   // case insensitive, so no need to convert the constraint to upper case here.
4713   //
4714   // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
4715   // decode the usage of register name aliases into their official names. And
4716   // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
4717   // official register names.
4718   if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
4719       Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
4720     bool IsFP = Constraint[2] == 'f';
4721     std::pair<StringRef, StringRef> Temp = Constraint.split('$');
4722     std::pair<unsigned, const TargetRegisterClass *> R;
4723     R = TargetLowering::getRegForInlineAsmConstraint(
4724         TRI, join_items("", Temp.first, Temp.second), VT);
4725     // Match those names to the widest floating point register type available.
4726     if (IsFP) {
4727       unsigned RegNo = R.first;
4728       if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
4729         if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
4730           unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
4731           return std::make_pair(DReg, &LoongArch::FPR64RegClass);
4732         }
4733       }
4734     }
4735     return R;
4736   }
4737 
4738   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4739 }
4740 
4741 void LoongArchTargetLowering::LowerAsmOperandForConstraint(
4742     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
4743     SelectionDAG &DAG) const {
4744   // Currently only support length 1 constraints.
4745   if (Constraint.size() == 1) {
4746     switch (Constraint[0]) {
4747     case 'l':
4748       // Validate & create a 16-bit signed immediate operand.
4749       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4750         uint64_t CVal = C->getSExtValue();
4751         if (isInt<16>(CVal))
4752           Ops.push_back(
4753               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4754       }
4755       return;
4756     case 'I':
4757       // Validate & create a 12-bit signed immediate operand.
4758       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4759         uint64_t CVal = C->getSExtValue();
4760         if (isInt<12>(CVal))
4761           Ops.push_back(
4762               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4763       }
4764       return;
4765     case 'J':
4766       // Validate & create an integer zero operand.
4767       if (auto *C = dyn_cast<ConstantSDNode>(Op))
4768         if (C->getZExtValue() == 0)
4769           Ops.push_back(
4770               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
4771       return;
4772     case 'K':
4773       // Validate & create a 12-bit unsigned immediate operand.
4774       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4775         uint64_t CVal = C->getZExtValue();
4776         if (isUInt<12>(CVal))
4777           Ops.push_back(
4778               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4779       }
4780       return;
4781     default:
4782       break;
4783     }
4784   }
4785   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
4786 }
4787 
4788 #define GET_REGISTER_MATCHER
4789 #include "LoongArchGenAsmMatcher.inc"
4790 
4791 Register
4792 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
4793                                            const MachineFunction &MF) const {
4794   std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
4795   std::string NewRegName = Name.second.str();
4796   Register Reg = MatchRegisterAltName(NewRegName);
4797   if (Reg == LoongArch::NoRegister)
4798     Reg = MatchRegisterName(NewRegName);
4799   if (Reg == LoongArch::NoRegister)
4800     report_fatal_error(
4801         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
4802   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
4803   if (!ReservedRegs.test(Reg))
4804     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
4805                              StringRef(RegName) + "\"."));
4806   return Reg;
4807 }
4808 
4809 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
4810                                                      EVT VT, SDValue C) const {
4811   // TODO: Support vectors.
4812   if (!VT.isScalarInteger())
4813     return false;
4814 
4815   // Omit the optimization if the data size exceeds GRLen.
4816   if (VT.getSizeInBits() > Subtarget.getGRLen())
4817     return false;
4818 
4819   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
4820     const APInt &Imm = ConstNode->getAPIntValue();
4821     // Break MUL into (SLLI + ADD/SUB) or ALSL.
4822     if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
4823         (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
4824       return true;
4825     // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
4826     if (ConstNode->hasOneUse() &&
4827         ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
4828          (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
4829       return true;
4830     // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
4831     // in which the immediate has two set bits. Or Break (MUL x, imm)
4832     // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
4833     // equals to (1 << s0) - (1 << s1).
4834     if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
4835       unsigned Shifts = Imm.countr_zero();
4836       // Reject immediates which can be composed via a single LUI.
4837       if (Shifts >= 12)
4838         return false;
4839       // Reject multiplications can be optimized to
4840       // (SLLI (ALSL x, x, 1/2/3/4), s).
4841       APInt ImmPop = Imm.ashr(Shifts);
4842       if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
4843         return false;
4844       // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
4845       // since it needs one more instruction than other 3 cases.
4846       APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
4847       if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
4848           (ImmSmall - Imm).isPowerOf2())
4849         return true;
4850     }
4851   }
4852 
4853   return false;
4854 }
4855 
4856 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
4857                                                     const AddrMode &AM,
4858                                                     Type *Ty, unsigned AS,
4859                                                     Instruction *I) const {
4860   // LoongArch has four basic addressing modes:
4861   //  1. reg
4862   //  2. reg + 12-bit signed offset
4863   //  3. reg + 14-bit signed offset left-shifted by 2
4864   //  4. reg1 + reg2
4865   // TODO: Add more checks after support vector extension.
4866 
4867   // No global is ever allowed as a base.
4868   if (AM.BaseGV)
4869     return false;
4870 
4871   // Require a 12 or 14 bit signed offset.
4872   if (!isInt<12>(AM.BaseOffs) || !isShiftedInt<14, 2>(AM.BaseOffs))
4873     return false;
4874 
4875   switch (AM.Scale) {
4876   case 0:
4877     // "i" is not allowed.
4878     if (!AM.HasBaseReg)
4879       return false;
4880     // Otherwise we have "r+i".
4881     break;
4882   case 1:
4883     // "r+r+i" is not allowed.
4884     if (AM.HasBaseReg && AM.BaseOffs != 0)
4885       return false;
4886     // Otherwise we have "r+r" or "r+i".
4887     break;
4888   case 2:
4889     // "2*r+r" or "2*r+i" is not allowed.
4890     if (AM.HasBaseReg || AM.BaseOffs)
4891       return false;
4892     // Otherwise we have "r+r".
4893     break;
4894   default:
4895     return false;
4896   }
4897 
4898   return true;
4899 }
4900 
4901 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
4902   return isInt<12>(Imm);
4903 }
4904 
4905 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
4906   return isInt<12>(Imm);
4907 }
4908 
4909 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
4910   // Zexts are free if they can be combined with a load.
4911   // Don't advertise i32->i64 zextload as being free for LA64. It interacts
4912   // poorly with type legalization of compares preferring sext.
4913   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
4914     EVT MemVT = LD->getMemoryVT();
4915     if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
4916         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
4917          LD->getExtensionType() == ISD::ZEXTLOAD))
4918       return true;
4919   }
4920 
4921   return TargetLowering::isZExtFree(Val, VT2);
4922 }
4923 
4924 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
4925   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
4926 }
4927 
4928 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
4929   // TODO: Support vectors.
4930   if (Y.getValueType().isVector())
4931     return false;
4932 
4933   return !isa<ConstantSDNode>(Y);
4934 }
4935