xref: /freebsd/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (revision 63f537551380d2dab29fa402ad1269feae17e594)
1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation  ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that LoongArch uses to lower LLVM code into
10 // a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LoongArchISelLowering.h"
15 #include "LoongArch.h"
16 #include "LoongArchMachineFunctionInfo.h"
17 #include "LoongArchRegisterInfo.h"
18 #include "LoongArchSubtarget.h"
19 #include "LoongArchTargetMachine.h"
20 #include "MCTargetDesc/LoongArchBaseInfo.h"
21 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/ISDOpcodes.h"
24 #include "llvm/CodeGen/RuntimeLibcalls.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/IntrinsicsLoongArch.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/KnownBits.h"
29 #include "llvm/Support/MathExtras.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "loongarch-isel-lowering"
34 
35 STATISTIC(NumTailCalls, "Number of tail calls");
36 
37 static cl::opt<bool> ZeroDivCheck(
38     "loongarch-check-zero-division", cl::Hidden,
39     cl::desc("Trap on integer division by zero."),
40     cl::init(false));
41 
42 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
43                                                  const LoongArchSubtarget &STI)
44     : TargetLowering(TM), Subtarget(STI) {
45 
46   MVT GRLenVT = Subtarget.getGRLenVT();
47   // Set up the register classes.
48   addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
49   if (Subtarget.hasBasicF())
50     addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
51   if (Subtarget.hasBasicD())
52     addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
53 
54   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
55                    MVT::i1, Promote);
56 
57   // TODO: add necessary setOperationAction calls later.
58   setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
59   setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
60   setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
61   setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
62   setOperationAction(ISD::ROTL, GRLenVT, Expand);
63   setOperationAction(ISD::CTPOP, GRLenVT, Expand);
64   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
65   setOperationAction(ISD::TRAP, MVT::Other, Legal);
66   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
67   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
68 
69   setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
70                       ISD::JumpTable},
71                      GRLenVT, Custom);
72 
73   setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom);
74 
75   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
76 
77   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
78   if (Subtarget.is64Bit())
79     setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
80 
81   setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
82   setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
83   setOperationAction(ISD::VASTART, MVT::Other, Custom);
84   setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
85 
86   if (Subtarget.is64Bit()) {
87     setOperationAction(ISD::SHL, MVT::i32, Custom);
88     setOperationAction(ISD::SRA, MVT::i32, Custom);
89     setOperationAction(ISD::SRL, MVT::i32, Custom);
90     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
91     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
92     setOperationAction(ISD::ROTR, MVT::i32, Custom);
93     setOperationAction(ISD::ROTL, MVT::i32, Custom);
94     setOperationAction(ISD::CTTZ, MVT::i32, Custom);
95     setOperationAction(ISD::CTLZ, MVT::i32, Custom);
96     setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
97     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
98     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
99     setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
100     setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
101     if (Subtarget.hasBasicF() && !Subtarget.hasBasicD())
102       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
103     if (Subtarget.hasBasicF())
104       setOperationAction(ISD::FRINT, MVT::f32, Legal);
105     if (Subtarget.hasBasicD())
106       setOperationAction(ISD::FRINT, MVT::f64, Legal);
107   }
108 
109   // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
110   // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
111   // and i32 could still be byte-swapped relatively cheaply.
112   setOperationAction(ISD::BSWAP, MVT::i16, Custom);
113   if (Subtarget.is64Bit()) {
114     setOperationAction(ISD::BSWAP, MVT::i32, Custom);
115   }
116 
117   // Expand bitreverse.i16 with native-width bitrev and shift for now, before
118   // we get to know which of sll and revb.2h is faster.
119   setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
120   if (Subtarget.is64Bit()) {
121     setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
122     setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
123   } else {
124     setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
125     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
126     setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
127     setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
128     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
129     setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
130   }
131 
132   static const ISD::CondCode FPCCToExpand[] = {
133       ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
134       ISD::SETGE,  ISD::SETNE,  ISD::SETGT};
135 
136   if (Subtarget.hasBasicF()) {
137     setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
138     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
139     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
140     setOperationAction(ISD::FMA, MVT::f32, Legal);
141     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
142     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
143     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
144     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
145     setOperationAction(ISD::FSIN, MVT::f32, Expand);
146     setOperationAction(ISD::FCOS, MVT::f32, Expand);
147     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
148     setOperationAction(ISD::FPOW, MVT::f32, Expand);
149     setOperationAction(ISD::FREM, MVT::f32, Expand);
150   }
151   if (Subtarget.hasBasicD()) {
152     setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
153     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
154     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
155     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
156     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
157     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
158     setOperationAction(ISD::FMA, MVT::f64, Legal);
159     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
160     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
161     setOperationAction(ISD::FSIN, MVT::f64, Expand);
162     setOperationAction(ISD::FCOS, MVT::f64, Expand);
163     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
164     setOperationAction(ISD::FPOW, MVT::f64, Expand);
165     setOperationAction(ISD::FREM, MVT::f64, Expand);
166     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
167   }
168 
169   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
170 
171   setOperationAction(ISD::BR_CC, GRLenVT, Expand);
172   setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
173   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
174   setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
175   if (!Subtarget.is64Bit())
176     setLibcallName(RTLIB::MUL_I128, nullptr);
177 
178   setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
179   setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
180   if ((Subtarget.is64Bit() && Subtarget.hasBasicF() &&
181        !Subtarget.hasBasicD())) {
182     setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom);
183     setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom);
184   }
185 
186   // Compute derived properties from the register classes.
187   computeRegisterProperties(STI.getRegisterInfo());
188 
189   setStackPointerRegisterToSaveRestore(LoongArch::R3);
190 
191   setBooleanContents(ZeroOrOneBooleanContent);
192 
193   setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
194 
195   setMinCmpXchgSizeInBits(32);
196 
197   // Function alignments.
198   const Align FunctionAlignment(4);
199   setMinFunctionAlignment(FunctionAlignment);
200 
201   setTargetDAGCombine(ISD::AND);
202   setTargetDAGCombine(ISD::OR);
203   setTargetDAGCombine(ISD::SRL);
204 }
205 
206 bool LoongArchTargetLowering::isOffsetFoldingLegal(
207     const GlobalAddressSDNode *GA) const {
208   // In order to maximise the opportunity for common subexpression elimination,
209   // keep a separate ADD node for the global address offset instead of folding
210   // it in the global address node. Later peephole optimisations may choose to
211   // fold it back in when profitable.
212   return false;
213 }
214 
215 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
216                                                 SelectionDAG &DAG) const {
217   switch (Op.getOpcode()) {
218   case ISD::EH_DWARF_CFA:
219     return lowerEH_DWARF_CFA(Op, DAG);
220   case ISD::GlobalAddress:
221     return lowerGlobalAddress(Op, DAG);
222   case ISD::GlobalTLSAddress:
223     return lowerGlobalTLSAddress(Op, DAG);
224   case ISD::INTRINSIC_WO_CHAIN:
225     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
226   case ISD::INTRINSIC_W_CHAIN:
227     return lowerINTRINSIC_W_CHAIN(Op, DAG);
228   case ISD::INTRINSIC_VOID:
229     return lowerINTRINSIC_VOID(Op, DAG);
230   case ISD::BlockAddress:
231     return lowerBlockAddress(Op, DAG);
232   case ISD::JumpTable:
233     return lowerJumpTable(Op, DAG);
234   case ISD::SHL_PARTS:
235     return lowerShiftLeftParts(Op, DAG);
236   case ISD::SRA_PARTS:
237     return lowerShiftRightParts(Op, DAG, true);
238   case ISD::SRL_PARTS:
239     return lowerShiftRightParts(Op, DAG, false);
240   case ISD::ConstantPool:
241     return lowerConstantPool(Op, DAG);
242   case ISD::FP_TO_SINT:
243     return lowerFP_TO_SINT(Op, DAG);
244   case ISD::BITCAST:
245     return lowerBITCAST(Op, DAG);
246   case ISD::UINT_TO_FP:
247     return lowerUINT_TO_FP(Op, DAG);
248   case ISD::SINT_TO_FP:
249     return lowerSINT_TO_FP(Op, DAG);
250   case ISD::VASTART:
251     return lowerVASTART(Op, DAG);
252   case ISD::FRAMEADDR:
253     return lowerFRAMEADDR(Op, DAG);
254   case ISD::RETURNADDR:
255     return lowerRETURNADDR(Op, DAG);
256   case ISD::WRITE_REGISTER:
257     return lowerWRITE_REGISTER(Op, DAG);
258   }
259   return SDValue();
260 }
261 
262 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
263                                                      SelectionDAG &DAG) const {
264 
265   if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
266     DAG.getContext()->emitError(
267         "On LA64, only 64-bit registers can be written.");
268     return Op.getOperand(0);
269   }
270 
271   if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
272     DAG.getContext()->emitError(
273         "On LA32, only 32-bit registers can be written.");
274     return Op.getOperand(0);
275   }
276 
277   return Op;
278 }
279 
280 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
281                                                 SelectionDAG &DAG) const {
282   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
283     DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
284                                 "be a constant integer");
285     return SDValue();
286   }
287 
288   MachineFunction &MF = DAG.getMachineFunction();
289   MF.getFrameInfo().setFrameAddressIsTaken(true);
290   Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
291   EVT VT = Op.getValueType();
292   SDLoc DL(Op);
293   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
294   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
295   int GRLenInBytes = Subtarget.getGRLen() / 8;
296 
297   while (Depth--) {
298     int Offset = -(GRLenInBytes * 2);
299     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
300                               DAG.getIntPtrConstant(Offset, DL));
301     FrameAddr =
302         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
303   }
304   return FrameAddr;
305 }
306 
307 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
308                                                  SelectionDAG &DAG) const {
309   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
310     return SDValue();
311 
312   // Currently only support lowering return address for current frame.
313   if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
314     DAG.getContext()->emitError(
315         "return address can only be determined for the current frame");
316     return SDValue();
317   }
318 
319   MachineFunction &MF = DAG.getMachineFunction();
320   MF.getFrameInfo().setReturnAddressIsTaken(true);
321   MVT GRLenVT = Subtarget.getGRLenVT();
322 
323   // Return the value of the return address register, marking it an implicit
324   // live-in.
325   Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
326                               getRegClassFor(GRLenVT));
327   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
328 }
329 
330 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
331                                                    SelectionDAG &DAG) const {
332   MachineFunction &MF = DAG.getMachineFunction();
333   auto Size = Subtarget.getGRLen() / 8;
334   auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
335   return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
336 }
337 
338 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
339                                               SelectionDAG &DAG) const {
340   MachineFunction &MF = DAG.getMachineFunction();
341   auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
342 
343   SDLoc DL(Op);
344   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
345                                  getPointerTy(MF.getDataLayout()));
346 
347   // vastart just stores the address of the VarArgsFrameIndex slot into the
348   // memory location argument.
349   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
350   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
351                       MachinePointerInfo(SV));
352 }
353 
354 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
355                                                  SelectionDAG &DAG) const {
356   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
357          !Subtarget.hasBasicD() && "unexpected target features");
358 
359   SDLoc DL(Op);
360   SDValue Op0 = Op.getOperand(0);
361   if (Op0->getOpcode() == ISD::AND) {
362     auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
363     if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
364       return Op;
365   }
366 
367   if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
368       Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
369       Op0.getConstantOperandVal(2) == UINT64_C(0))
370     return Op;
371 
372   if (Op0.getOpcode() == ISD::AssertZext &&
373       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
374     return Op;
375 
376   EVT OpVT = Op0.getValueType();
377   EVT RetVT = Op.getValueType();
378   RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
379   MakeLibCallOptions CallOptions;
380   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
381   SDValue Chain = SDValue();
382   SDValue Result;
383   std::tie(Result, Chain) =
384       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
385   return Result;
386 }
387 
388 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
389                                                  SelectionDAG &DAG) const {
390   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
391          !Subtarget.hasBasicD() && "unexpected target features");
392 
393   SDLoc DL(Op);
394   SDValue Op0 = Op.getOperand(0);
395 
396   if ((Op0.getOpcode() == ISD::AssertSext ||
397        Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
398       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
399     return Op;
400 
401   EVT OpVT = Op0.getValueType();
402   EVT RetVT = Op.getValueType();
403   RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
404   MakeLibCallOptions CallOptions;
405   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
406   SDValue Chain = SDValue();
407   SDValue Result;
408   std::tie(Result, Chain) =
409       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
410   return Result;
411 }
412 
413 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
414                                               SelectionDAG &DAG) const {
415 
416   SDLoc DL(Op);
417   SDValue Op0 = Op.getOperand(0);
418 
419   if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
420       Subtarget.is64Bit() && Subtarget.hasBasicF()) {
421     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
422     return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
423   }
424   return Op;
425 }
426 
427 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
428                                                  SelectionDAG &DAG) const {
429 
430   SDLoc DL(Op);
431 
432   if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
433       !Subtarget.hasBasicD()) {
434     SDValue Dst =
435         DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
436     return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
437   }
438 
439   EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
440   SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
441   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
442 }
443 
444 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
445                              SelectionDAG &DAG, unsigned Flags) {
446   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
447 }
448 
449 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
450                              SelectionDAG &DAG, unsigned Flags) {
451   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
452                                    Flags);
453 }
454 
455 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
456                              SelectionDAG &DAG, unsigned Flags) {
457   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
458                                    N->getOffset(), Flags);
459 }
460 
461 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
462                              SelectionDAG &DAG, unsigned Flags) {
463   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
464 }
465 
466 template <class NodeTy>
467 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
468                                          bool IsLocal) const {
469   SDLoc DL(N);
470   EVT Ty = getPointerTy(DAG.getDataLayout());
471   SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
472   // TODO: Check CodeModel.
473   if (IsLocal)
474     // This generates the pattern (PseudoLA_PCREL sym), which expands to
475     // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
476     return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr),
477                    0);
478 
479   // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
480   // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
481   return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
482 }
483 
484 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
485                                                    SelectionDAG &DAG) const {
486   return getAddr(cast<BlockAddressSDNode>(Op), DAG);
487 }
488 
489 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
490                                                 SelectionDAG &DAG) const {
491   return getAddr(cast<JumpTableSDNode>(Op), DAG);
492 }
493 
494 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
495                                                    SelectionDAG &DAG) const {
496   return getAddr(cast<ConstantPoolSDNode>(Op), DAG);
497 }
498 
499 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
500                                                     SelectionDAG &DAG) const {
501   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
502   assert(N->getOffset() == 0 && "unexpected offset in global node");
503   return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
504 }
505 
506 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
507                                                   SelectionDAG &DAG,
508                                                   unsigned Opc) const {
509   SDLoc DL(N);
510   EVT Ty = getPointerTy(DAG.getDataLayout());
511   MVT GRLenVT = Subtarget.getGRLenVT();
512 
513   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
514   SDValue Offset = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
515 
516   // Add the thread pointer.
517   return DAG.getNode(ISD::ADD, DL, Ty, Offset,
518                      DAG.getRegister(LoongArch::R2, GRLenVT));
519 }
520 
521 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
522                                                    SelectionDAG &DAG,
523                                                    unsigned Opc) const {
524   SDLoc DL(N);
525   EVT Ty = getPointerTy(DAG.getDataLayout());
526   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
527 
528   // Use a PC-relative addressing mode to access the dynamic GOT address.
529   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
530   SDValue Load = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
531 
532   // Prepare argument list to generate call.
533   ArgListTy Args;
534   ArgListEntry Entry;
535   Entry.Node = Load;
536   Entry.Ty = CallTy;
537   Args.push_back(Entry);
538 
539   // Setup call to __tls_get_addr.
540   TargetLowering::CallLoweringInfo CLI(DAG);
541   CLI.setDebugLoc(DL)
542       .setChain(DAG.getEntryNode())
543       .setLibCallee(CallingConv::C, CallTy,
544                     DAG.getExternalSymbol("__tls_get_addr", Ty),
545                     std::move(Args));
546 
547   return LowerCallTo(CLI).first;
548 }
549 
550 SDValue
551 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
552                                                SelectionDAG &DAG) const {
553   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
554       CallingConv::GHC)
555     report_fatal_error("In GHC calling convention TLS is not supported");
556 
557   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
558   assert(N->getOffset() == 0 && "unexpected offset in global node");
559 
560   SDValue Addr;
561   switch (getTargetMachine().getTLSModel(N->getGlobal())) {
562   case TLSModel::GeneralDynamic:
563     // In this model, application code calls the dynamic linker function
564     // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
565     // runtime.
566     Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_GD);
567     break;
568   case TLSModel::LocalDynamic:
569     // Same as GeneralDynamic, except for assembly modifiers and relocation
570     // records.
571     Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LD);
572     break;
573   case TLSModel::InitialExec:
574     // This model uses the GOT to resolve TLS offsets.
575     Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_IE);
576     break;
577   case TLSModel::LocalExec:
578     // This model is used when static linking as the TLS offsets are resolved
579     // during program linking.
580     Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
581     break;
582   }
583 
584   return Addr;
585 }
586 
587 SDValue
588 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
589                                                  SelectionDAG &DAG) const {
590   switch (Op.getConstantOperandVal(0)) {
591   default:
592     return SDValue(); // Don't custom lower most intrinsics.
593   case Intrinsic::thread_pointer: {
594     EVT PtrVT = getPointerTy(DAG.getDataLayout());
595     return DAG.getRegister(LoongArch::R2, PtrVT);
596   }
597   }
598 }
599 
600 // Helper function that emits error message for intrinsics with chain.
601 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
602                                                   StringRef ErrorMsg,
603                                                   SelectionDAG &DAG) {
604 
605   DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " +
606                               ErrorMsg);
607   return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
608                             SDLoc(Op));
609 }
610 
611 SDValue
612 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
613                                                 SelectionDAG &DAG) const {
614   SDLoc DL(Op);
615   MVT GRLenVT = Subtarget.getGRLenVT();
616   SDValue Op0 = Op.getOperand(0);
617   std::string Name = Op->getOperationName(0);
618   const StringRef ErrorMsgOOR = "out of range";
619 
620   switch (Op.getConstantOperandVal(1)) {
621   default:
622     return Op;
623   case Intrinsic::loongarch_crc_w_b_w:
624   case Intrinsic::loongarch_crc_w_h_w:
625   case Intrinsic::loongarch_crc_w_w_w:
626   case Intrinsic::loongarch_crc_w_d_w:
627   case Intrinsic::loongarch_crcc_w_b_w:
628   case Intrinsic::loongarch_crcc_w_h_w:
629   case Intrinsic::loongarch_crcc_w_w_w:
630   case Intrinsic::loongarch_crcc_w_d_w: {
631     std::string Name = Op->getOperationName(0);
632     DAG.getContext()->emitError(Name + " requires target: loongarch64");
633     return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
634   }
635   case Intrinsic::loongarch_csrrd_w:
636   case Intrinsic::loongarch_csrrd_d: {
637     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
638     if (!isUInt<14>(Imm))
639       return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
640     return DAG.getMergeValues(
641         {DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0,
642                      DAG.getConstant(Imm, DL, GRLenVT)),
643          Op0},
644         DL);
645   }
646   case Intrinsic::loongarch_csrwr_w:
647   case Intrinsic::loongarch_csrwr_d: {
648     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
649     if (!isUInt<14>(Imm))
650       return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
651     return DAG.getMergeValues(
652         {DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0, Op.getOperand(2),
653                      DAG.getConstant(Imm, DL, GRLenVT)),
654          Op0},
655         DL);
656   }
657   case Intrinsic::loongarch_csrxchg_w:
658   case Intrinsic::loongarch_csrxchg_d: {
659     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
660     if (!isUInt<14>(Imm))
661       return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
662     return DAG.getMergeValues(
663         {DAG.getNode(LoongArchISD::CSRXCHG, DL, GRLenVT, Op0, Op.getOperand(2),
664                      Op.getOperand(3), DAG.getConstant(Imm, DL, GRLenVT)),
665          Op0},
666         DL);
667   }
668   case Intrinsic::loongarch_iocsrrd_d: {
669     if (Subtarget.is64Bit())
670       return DAG.getMergeValues(
671           {DAG.getNode(
672                LoongArchISD::IOCSRRD_D, DL, GRLenVT, Op0,
673                DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))),
674            Op0},
675           DL);
676     else {
677       DAG.getContext()->emitError(
678           "llvm.loongarch.crc.w.d.w requires target: loongarch64");
679       return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
680     }
681   }
682 #define IOCSRRD_CASE(NAME, NODE)                                               \
683   case Intrinsic::loongarch_##NAME: {                                          \
684     return DAG.getMergeValues(                                                 \
685         {DAG.getNode(LoongArchISD::NODE, DL, GRLenVT, Op0, Op.getOperand(2)),  \
686          Op0},                                                                 \
687         DL);                                                                   \
688   }
689     IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
690     IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
691     IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
692 #undef IOCSRRD_CASE
693   case Intrinsic::loongarch_cpucfg: {
694     return DAG.getMergeValues(
695         {DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0, Op.getOperand(2)),
696          Op0},
697         DL);
698   }
699   case Intrinsic::loongarch_lddir_d: {
700     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
701     if (!isUInt<8>(Imm)) {
702       DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) +
703                                   "' out of range");
704       return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
705     }
706 
707     return Op;
708   }
709   case Intrinsic::loongarch_movfcsr2gr: {
710     if (!Subtarget.hasBasicF()) {
711       DAG.getContext()->emitError(
712           "llvm.loongarch.movfcsr2gr expects basic f target feature");
713       return DAG.getMergeValues(
714           {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op));
715     }
716     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
717     if (!isUInt<2>(Imm)) {
718       DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) +
719                                   "' " + ErrorMsgOOR);
720       return DAG.getMergeValues(
721           {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op));
722     }
723     return DAG.getMergeValues(
724         {DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, Op.getValueType(),
725                      DAG.getConstant(Imm, DL, GRLenVT)),
726          Op.getOperand(0)},
727         DL);
728   }
729   }
730 }
731 
732 // Helper function that emits error message for intrinsics with void return
733 // value.
734 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
735                                          SelectionDAG &DAG) {
736 
737   DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " +
738                               ErrorMsg);
739   return Op.getOperand(0);
740 }
741 
742 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
743                                                      SelectionDAG &DAG) const {
744   SDLoc DL(Op);
745   MVT GRLenVT = Subtarget.getGRLenVT();
746   SDValue Op0 = Op.getOperand(0);
747   uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
748   SDValue Op2 = Op.getOperand(2);
749   const StringRef ErrorMsgOOR = "out of range";
750 
751   switch (IntrinsicEnum) {
752   default:
753     // TODO: Add more Intrinsics.
754     return SDValue();
755   case Intrinsic::loongarch_cacop_d:
756   case Intrinsic::loongarch_cacop_w: {
757     if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) {
758       DAG.getContext()->emitError(
759           "llvm.loongarch.cacop.d requires target: loongarch64");
760       return Op.getOperand(0);
761     }
762     if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) {
763       DAG.getContext()->emitError(
764           "llvm.loongarch.cacop.w requires target: loongarch32");
765       return Op.getOperand(0);
766     }
767     // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
768     unsigned Imm1 = cast<ConstantSDNode>(Op2)->getZExtValue();
769     if (!isUInt<5>(Imm1))
770       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
771     SDValue Op4 = Op.getOperand(4);
772     int Imm2 = cast<ConstantSDNode>(Op4)->getSExtValue();
773     if (!isInt<12>(Imm2))
774       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
775 
776     return Op;
777   }
778 
779   case Intrinsic::loongarch_dbar: {
780     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
781     if (!isUInt<15>(Imm))
782       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
783 
784     return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op0,
785                        DAG.getConstant(Imm, DL, GRLenVT));
786   }
787   case Intrinsic::loongarch_ibar: {
788     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
789     if (!isUInt<15>(Imm))
790       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
791 
792     return DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Op0,
793                        DAG.getConstant(Imm, DL, GRLenVT));
794   }
795   case Intrinsic::loongarch_break: {
796     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
797     if (!isUInt<15>(Imm))
798       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
799 
800     return DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Op0,
801                        DAG.getConstant(Imm, DL, GRLenVT));
802   }
803   case Intrinsic::loongarch_movgr2fcsr: {
804     if (!Subtarget.hasBasicF()) {
805       DAG.getContext()->emitError(
806           "llvm.loongarch.movgr2fcsr expects basic f target feature");
807       return Op0;
808     }
809     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
810     if (!isUInt<2>(Imm))
811       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
812 
813     return DAG.getNode(
814         LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Op0,
815         DAG.getConstant(Imm, DL, GRLenVT),
816         DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Op.getOperand(3)));
817   }
818   case Intrinsic::loongarch_syscall: {
819     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
820     if (!isUInt<15>(Imm))
821       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
822 
823     return DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Op0,
824                        DAG.getConstant(Imm, DL, GRLenVT));
825   }
826 #define IOCSRWR_CASE(NAME, NODE)                                               \
827   case Intrinsic::loongarch_##NAME: {                                          \
828     SDValue Op3 = Op.getOperand(3);                                            \
829     if (Subtarget.is64Bit())                                                   \
830       return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0,              \
831                          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),      \
832                          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3));     \
833     else                                                                       \
834       return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, Op2, Op3);   \
835   }
836     IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
837     IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
838     IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
839 #undef IOCSRWR_CASE
840   case Intrinsic::loongarch_iocsrwr_d: {
841     if (Subtarget.is64Bit())
842       return DAG.getNode(
843           LoongArchISD::IOCSRWR_D, DL, MVT::Other, Op0, Op2,
844           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(3)));
845     else {
846       DAG.getContext()->emitError(
847           "llvm.loongarch.iocsrwr.d requires target: loongarch64");
848       return Op.getOperand(0);
849     }
850   }
851 #define ASRT_LE_GT_CASE(NAME)                                                  \
852   case Intrinsic::loongarch_##NAME: {                                          \
853     if (!Subtarget.is64Bit()) {                                                \
854       DAG.getContext()->emitError(Op->getOperationName(0) +                    \
855                                   " requires target: loongarch64");            \
856       return Op.getOperand(0);                                                 \
857     }                                                                          \
858     return Op;                                                                 \
859   }
860     ASRT_LE_GT_CASE(asrtle_d)
861     ASRT_LE_GT_CASE(asrtgt_d)
862 #undef ASRT_LE_GT_CASE
863   case Intrinsic::loongarch_ldpte_d: {
864     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
865     if (!isUInt<8>(Imm))
866       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
867     if (!Subtarget.is64Bit()) {
868       DAG.getContext()->emitError(Op->getOperationName(0) +
869                                   " requires target: loongarch64");
870       return Op.getOperand(0);
871     }
872     return Op;
873   }
874   }
875 }
876 
877 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
878                                                      SelectionDAG &DAG) const {
879   SDLoc DL(Op);
880   SDValue Lo = Op.getOperand(0);
881   SDValue Hi = Op.getOperand(1);
882   SDValue Shamt = Op.getOperand(2);
883   EVT VT = Lo.getValueType();
884 
885   // if Shamt-GRLen < 0: // Shamt < GRLen
886   //   Lo = Lo << Shamt
887   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
888   // else:
889   //   Lo = 0
890   //   Hi = Lo << (Shamt-GRLen)
891 
892   SDValue Zero = DAG.getConstant(0, DL, VT);
893   SDValue One = DAG.getConstant(1, DL, VT);
894   SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
895   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
896   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
897   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
898 
899   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
900   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
901   SDValue ShiftRightLo =
902       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
903   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
904   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
905   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
906 
907   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
908 
909   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
910   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
911 
912   SDValue Parts[2] = {Lo, Hi};
913   return DAG.getMergeValues(Parts, DL);
914 }
915 
916 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
917                                                       SelectionDAG &DAG,
918                                                       bool IsSRA) const {
919   SDLoc DL(Op);
920   SDValue Lo = Op.getOperand(0);
921   SDValue Hi = Op.getOperand(1);
922   SDValue Shamt = Op.getOperand(2);
923   EVT VT = Lo.getValueType();
924 
925   // SRA expansion:
926   //   if Shamt-GRLen < 0: // Shamt < GRLen
927   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
928   //     Hi = Hi >>s Shamt
929   //   else:
930   //     Lo = Hi >>s (Shamt-GRLen);
931   //     Hi = Hi >>s (GRLen-1)
932   //
933   // SRL expansion:
934   //   if Shamt-GRLen < 0: // Shamt < GRLen
935   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
936   //     Hi = Hi >>u Shamt
937   //   else:
938   //     Lo = Hi >>u (Shamt-GRLen);
939   //     Hi = 0;
940 
941   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
942 
943   SDValue Zero = DAG.getConstant(0, DL, VT);
944   SDValue One = DAG.getConstant(1, DL, VT);
945   SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
946   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
947   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
948   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
949 
950   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
951   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
952   SDValue ShiftLeftHi =
953       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
954   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
955   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
956   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
957   SDValue HiFalse =
958       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
959 
960   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
961 
962   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
963   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
964 
965   SDValue Parts[2] = {Lo, Hi};
966   return DAG.getMergeValues(Parts, DL);
967 }
968 
969 // Returns the opcode of the target-specific SDNode that implements the 32-bit
970 // form of the given Opcode.
971 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
972   switch (Opcode) {
973   default:
974     llvm_unreachable("Unexpected opcode");
975   case ISD::SHL:
976     return LoongArchISD::SLL_W;
977   case ISD::SRA:
978     return LoongArchISD::SRA_W;
979   case ISD::SRL:
980     return LoongArchISD::SRL_W;
981   case ISD::ROTR:
982     return LoongArchISD::ROTR_W;
983   case ISD::ROTL:
984     return LoongArchISD::ROTL_W;
985   case ISD::CTTZ:
986     return LoongArchISD::CTZ_W;
987   case ISD::CTLZ:
988     return LoongArchISD::CLZ_W;
989   }
990 }
991 
992 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
993 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
994 // otherwise be promoted to i64, making it difficult to select the
995 // SLL_W/.../*W later one because the fact the operation was originally of
996 // type i8/i16/i32 is lost.
997 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
998                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
999   SDLoc DL(N);
1000   LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
1001   SDValue NewOp0, NewRes;
1002 
1003   switch (NumOp) {
1004   default:
1005     llvm_unreachable("Unexpected NumOp");
1006   case 1: {
1007     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1008     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1009     break;
1010   }
1011   case 2: {
1012     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1013     SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1014     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1015     break;
1016   }
1017     // TODO:Handle more NumOp.
1018   }
1019 
1020   // ReplaceNodeResults requires we maintain the same type for the return
1021   // value.
1022   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1023 }
1024 
1025 void LoongArchTargetLowering::ReplaceNodeResults(
1026     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1027   SDLoc DL(N);
1028   EVT VT = N->getValueType(0);
1029   switch (N->getOpcode()) {
1030   default:
1031     llvm_unreachable("Don't know how to legalize this operation");
1032   case ISD::SHL:
1033   case ISD::SRA:
1034   case ISD::SRL:
1035   case ISD::ROTR:
1036     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1037            "Unexpected custom legalisation");
1038     if (N->getOperand(1).getOpcode() != ISD::Constant) {
1039       Results.push_back(customLegalizeToWOp(N, DAG, 2));
1040       break;
1041     }
1042     break;
1043   case ISD::ROTL:
1044     ConstantSDNode *CN;
1045     if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
1046       Results.push_back(customLegalizeToWOp(N, DAG, 2));
1047       break;
1048     }
1049     break;
1050   case ISD::FP_TO_SINT: {
1051     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1052            "Unexpected custom legalisation");
1053     SDValue Src = N->getOperand(0);
1054     EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
1055     if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
1056         TargetLowering::TypeSoftenFloat) {
1057       SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
1058       Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
1059       return;
1060     }
1061     // If the FP type needs to be softened, emit a library call using the 'si'
1062     // version. If we left it to default legalization we'd end up with 'di'.
1063     RTLIB::Libcall LC;
1064     LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
1065     MakeLibCallOptions CallOptions;
1066     EVT OpVT = Src.getValueType();
1067     CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
1068     SDValue Chain = SDValue();
1069     SDValue Result;
1070     std::tie(Result, Chain) =
1071         makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
1072     Results.push_back(Result);
1073     break;
1074   }
1075   case ISD::BITCAST: {
1076     SDValue Src = N->getOperand(0);
1077     EVT SrcVT = Src.getValueType();
1078     if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1079         Subtarget.hasBasicF()) {
1080       SDValue Dst =
1081           DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1082       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1083     }
1084     break;
1085   }
1086   case ISD::FP_TO_UINT: {
1087     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1088            "Unexpected custom legalisation");
1089     auto &TLI = DAG.getTargetLoweringInfo();
1090     SDValue Tmp1, Tmp2;
1091     TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
1092     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1093     break;
1094   }
1095   case ISD::BSWAP: {
1096     SDValue Src = N->getOperand(0);
1097     assert((VT == MVT::i16 || VT == MVT::i32) &&
1098            "Unexpected custom legalization");
1099     MVT GRLenVT = Subtarget.getGRLenVT();
1100     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1101     SDValue Tmp;
1102     switch (VT.getSizeInBits()) {
1103     default:
1104       llvm_unreachable("Unexpected operand width");
1105     case 16:
1106       Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
1107       break;
1108     case 32:
1109       // Only LA64 will get to here due to the size mismatch between VT and
1110       // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1111       Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
1112       break;
1113     }
1114     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1115     break;
1116   }
1117   case ISD::BITREVERSE: {
1118     SDValue Src = N->getOperand(0);
1119     assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1120            "Unexpected custom legalization");
1121     MVT GRLenVT = Subtarget.getGRLenVT();
1122     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1123     SDValue Tmp;
1124     switch (VT.getSizeInBits()) {
1125     default:
1126       llvm_unreachable("Unexpected operand width");
1127     case 8:
1128       Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
1129       break;
1130     case 32:
1131       Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
1132       break;
1133     }
1134     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1135     break;
1136   }
1137   case ISD::CTLZ:
1138   case ISD::CTTZ: {
1139     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1140            "Unexpected custom legalisation");
1141     Results.push_back(customLegalizeToWOp(N, DAG, 1));
1142     break;
1143   }
1144   case ISD::INTRINSIC_W_CHAIN: {
1145     SDValue Op0 = N->getOperand(0);
1146     EVT VT = N->getValueType(0);
1147     uint64_t Op1 = N->getConstantOperandVal(1);
1148     MVT GRLenVT = Subtarget.getGRLenVT();
1149     if (Op1 == Intrinsic::loongarch_movfcsr2gr) {
1150       if (!Subtarget.hasBasicF()) {
1151         DAG.getContext()->emitError(
1152             "llvm.loongarch.movfcsr2gr expects basic f target feature");
1153         Results.push_back(DAG.getMergeValues(
1154             {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N)));
1155         Results.push_back(N->getOperand(0));
1156         return;
1157       }
1158       unsigned Imm = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
1159       if (!isUInt<2>(Imm)) {
1160         DAG.getContext()->emitError("argument to '" + N->getOperationName(0) +
1161                                     "' " + "out of range");
1162         Results.push_back(DAG.getMergeValues(
1163             {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N)));
1164         Results.push_back(N->getOperand(0));
1165         return;
1166       }
1167       Results.push_back(
1168           DAG.getNode(ISD::TRUNCATE, DL, VT,
1169                       DAG.getNode(LoongArchISD::MOVFCSR2GR, SDLoc(N), MVT::i64,
1170                                   DAG.getConstant(Imm, DL, GRLenVT))));
1171       Results.push_back(N->getOperand(0));
1172       return;
1173     }
1174     SDValue Op2 = N->getOperand(2);
1175     std::string Name = N->getOperationName(0);
1176 
1177     switch (Op1) {
1178     default:
1179       llvm_unreachable("Unexpected Intrinsic.");
1180 #define CRC_CASE_EXT_BINARYOP(NAME, NODE)                                      \
1181   case Intrinsic::loongarch_##NAME: {                                          \
1182     Results.push_back(DAG.getNode(                                             \
1183         ISD::TRUNCATE, DL, VT,                                                 \
1184         DAG.getNode(                                                           \
1185             LoongArchISD::NODE, DL, MVT::i64,                                  \
1186             DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),                   \
1187             DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)))));   \
1188     Results.push_back(N->getOperand(0));                                       \
1189     break;                                                                     \
1190   }
1191       CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
1192       CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
1193       CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
1194       CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
1195       CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
1196       CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
1197 #undef CRC_CASE_EXT_BINARYOP
1198 
1199 #define CRC_CASE_EXT_UNARYOP(NAME, NODE)                                       \
1200   case Intrinsic::loongarch_##NAME: {                                          \
1201     Results.push_back(                                                         \
1202         DAG.getNode(ISD::TRUNCATE, DL, VT,                                     \
1203                     DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op2,         \
1204                                 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,     \
1205                                             N->getOperand(3)))));              \
1206     Results.push_back(N->getOperand(0));                                       \
1207     break;                                                                     \
1208   }
1209       CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
1210       CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
1211 #undef CRC_CASE_EXT_UNARYOP
1212 #define CSR_CASE(ID)                                                           \
1213   case Intrinsic::loongarch_##ID: {                                            \
1214     if (!Subtarget.is64Bit()) {                                                \
1215       DAG.getContext()->emitError(Name + " requires target: loongarch64");     \
1216       Results.push_back(DAG.getUNDEF(VT));                                     \
1217       Results.push_back(N->getOperand(0));                                     \
1218     }                                                                          \
1219     break;                                                                     \
1220   }
1221       CSR_CASE(csrrd_d);
1222       CSR_CASE(csrwr_d);
1223       CSR_CASE(csrxchg_d);
1224       CSR_CASE(iocsrrd_d);
1225 #undef CSR_CASE
1226     case Intrinsic::loongarch_csrrd_w: {
1227       unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1228       if (!isUInt<14>(Imm)) {
1229         DAG.getContext()->emitError("argument to '" + Name + "' out of range");
1230         Results.push_back(DAG.getUNDEF(VT));
1231         Results.push_back(N->getOperand(0));
1232         break;
1233       }
1234 
1235       Results.push_back(
1236           DAG.getNode(ISD::TRUNCATE, DL, VT,
1237                       DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0,
1238                                   DAG.getConstant(Imm, DL, GRLenVT))));
1239       Results.push_back(N->getOperand(0));
1240       break;
1241     }
1242     case Intrinsic::loongarch_csrwr_w: {
1243       unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
1244       if (!isUInt<14>(Imm)) {
1245         DAG.getContext()->emitError("argument to '" + Name + "' out of range");
1246         Results.push_back(DAG.getUNDEF(VT));
1247         Results.push_back(N->getOperand(0));
1248         break;
1249       }
1250 
1251       Results.push_back(DAG.getNode(
1252           ISD::TRUNCATE, DL, VT,
1253           DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0,
1254                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
1255                       DAG.getConstant(Imm, DL, GRLenVT))));
1256       Results.push_back(N->getOperand(0));
1257       break;
1258     }
1259     case Intrinsic::loongarch_csrxchg_w: {
1260       unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
1261       if (!isUInt<14>(Imm)) {
1262         DAG.getContext()->emitError("argument to '" + Name + "' out of range");
1263         Results.push_back(DAG.getUNDEF(VT));
1264         Results.push_back(N->getOperand(0));
1265         break;
1266       }
1267 
1268       Results.push_back(DAG.getNode(
1269           ISD::TRUNCATE, DL, VT,
1270           DAG.getNode(
1271               LoongArchISD::CSRXCHG, DL, GRLenVT, Op0,
1272               DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
1273               DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
1274               DAG.getConstant(Imm, DL, GRLenVT))));
1275       Results.push_back(N->getOperand(0));
1276       break;
1277     }
1278 #define IOCSRRD_CASE(NAME, NODE)                                               \
1279   case Intrinsic::loongarch_##NAME: {                                          \
1280     Results.push_back(DAG.getNode(                                             \
1281         ISD::TRUNCATE, DL, N->getValueType(0),                                 \
1282         DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op0,                     \
1283                     DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2))));        \
1284     Results.push_back(N->getOperand(0));                                       \
1285     break;                                                                     \
1286   }
1287       IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1288       IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1289       IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1290 #undef IOCSRRD_CASE
1291     case Intrinsic::loongarch_cpucfg: {
1292       Results.push_back(DAG.getNode(
1293           ISD::TRUNCATE, DL, VT,
1294           DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0,
1295                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2))));
1296       Results.push_back(Op0);
1297       break;
1298     }
1299     case Intrinsic::loongarch_lddir_d: {
1300       if (!Subtarget.is64Bit()) {
1301         DAG.getContext()->emitError(N->getOperationName(0) +
1302                                     " requires target: loongarch64");
1303         Results.push_back(DAG.getUNDEF(VT));
1304         Results.push_back(Op0);
1305         break;
1306       }
1307       break;
1308     }
1309     }
1310     break;
1311   }
1312   case ISD::READ_REGISTER: {
1313     if (Subtarget.is64Bit())
1314       DAG.getContext()->emitError(
1315           "On LA64, only 64-bit registers can be read.");
1316     else
1317       DAG.getContext()->emitError(
1318           "On LA32, only 32-bit registers can be read.");
1319     Results.push_back(DAG.getUNDEF(VT));
1320     Results.push_back(N->getOperand(0));
1321     break;
1322   }
1323   }
1324 }
1325 
1326 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
1327                                  TargetLowering::DAGCombinerInfo &DCI,
1328                                  const LoongArchSubtarget &Subtarget) {
1329   if (DCI.isBeforeLegalizeOps())
1330     return SDValue();
1331 
1332   SDValue FirstOperand = N->getOperand(0);
1333   SDValue SecondOperand = N->getOperand(1);
1334   unsigned FirstOperandOpc = FirstOperand.getOpcode();
1335   EVT ValTy = N->getValueType(0);
1336   SDLoc DL(N);
1337   uint64_t lsb, msb;
1338   unsigned SMIdx, SMLen;
1339   ConstantSDNode *CN;
1340   SDValue NewOperand;
1341   MVT GRLenVT = Subtarget.getGRLenVT();
1342 
1343   // Op's second operand must be a shifted mask.
1344   if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
1345       !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
1346     return SDValue();
1347 
1348   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
1349     // Pattern match BSTRPICK.
1350     //  $dst = and ((sra or srl) $src , lsb), (2**len - 1)
1351     //  => BSTRPICK $dst, $src, msb, lsb
1352     //  where msb = lsb + len - 1
1353 
1354     // The second operand of the shift must be an immediate.
1355     if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
1356       return SDValue();
1357 
1358     lsb = CN->getZExtValue();
1359 
1360     // Return if the shifted mask does not start at bit 0 or the sum of its
1361     // length and lsb exceeds the word's size.
1362     if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
1363       return SDValue();
1364 
1365     NewOperand = FirstOperand.getOperand(0);
1366   } else {
1367     // Pattern match BSTRPICK.
1368     //  $dst = and $src, (2**len- 1) , if len > 12
1369     //  => BSTRPICK $dst, $src, msb, lsb
1370     //  where lsb = 0 and msb = len - 1
1371 
1372     // If the mask is <= 0xfff, andi can be used instead.
1373     if (CN->getZExtValue() <= 0xfff)
1374       return SDValue();
1375 
1376     // Return if the mask doesn't start at position 0.
1377     if (SMIdx)
1378       return SDValue();
1379 
1380     lsb = 0;
1381     NewOperand = FirstOperand;
1382   }
1383   msb = lsb + SMLen - 1;
1384   return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
1385                      DAG.getConstant(msb, DL, GRLenVT),
1386                      DAG.getConstant(lsb, DL, GRLenVT));
1387 }
1388 
1389 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
1390                                  TargetLowering::DAGCombinerInfo &DCI,
1391                                  const LoongArchSubtarget &Subtarget) {
1392   if (DCI.isBeforeLegalizeOps())
1393     return SDValue();
1394 
1395   // $dst = srl (and $src, Mask), Shamt
1396   // =>
1397   // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
1398   // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
1399   //
1400 
1401   SDValue FirstOperand = N->getOperand(0);
1402   ConstantSDNode *CN;
1403   EVT ValTy = N->getValueType(0);
1404   SDLoc DL(N);
1405   MVT GRLenVT = Subtarget.getGRLenVT();
1406   unsigned MaskIdx, MaskLen;
1407   uint64_t Shamt;
1408 
1409   // The first operand must be an AND and the second operand of the AND must be
1410   // a shifted mask.
1411   if (FirstOperand.getOpcode() != ISD::AND ||
1412       !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
1413       !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
1414     return SDValue();
1415 
1416   // The second operand (shift amount) must be an immediate.
1417   if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
1418     return SDValue();
1419 
1420   Shamt = CN->getZExtValue();
1421   if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
1422     return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
1423                        FirstOperand->getOperand(0),
1424                        DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
1425                        DAG.getConstant(Shamt, DL, GRLenVT));
1426 
1427   return SDValue();
1428 }
1429 
1430 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
1431                                 TargetLowering::DAGCombinerInfo &DCI,
1432                                 const LoongArchSubtarget &Subtarget) {
1433   MVT GRLenVT = Subtarget.getGRLenVT();
1434   EVT ValTy = N->getValueType(0);
1435   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
1436   ConstantSDNode *CN0, *CN1;
1437   SDLoc DL(N);
1438   unsigned ValBits = ValTy.getSizeInBits();
1439   unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
1440   unsigned Shamt;
1441   bool SwapAndRetried = false;
1442 
1443   if (DCI.isBeforeLegalizeOps())
1444     return SDValue();
1445 
1446   if (ValBits != 32 && ValBits != 64)
1447     return SDValue();
1448 
1449 Retry:
1450   // 1st pattern to match BSTRINS:
1451   //  R = or (and X, mask0), (and (shl Y, lsb), mask1)
1452   //  where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
1453   //  =>
1454   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
1455   if (N0.getOpcode() == ISD::AND &&
1456       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1457       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
1458       N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
1459       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1460       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
1461       MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
1462       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
1463       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
1464       (MaskIdx0 + MaskLen0 <= ValBits)) {
1465     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
1466     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1467                        N1.getOperand(0).getOperand(0),
1468                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
1469                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
1470   }
1471 
1472   // 2nd pattern to match BSTRINS:
1473   //  R = or (and X, mask0), (shl (and Y, mask1), lsb)
1474   //  where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
1475   //  =>
1476   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
1477   if (N0.getOpcode() == ISD::AND &&
1478       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1479       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
1480       N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
1481       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1482       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
1483       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
1484       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
1485       MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
1486       (MaskIdx0 + MaskLen0 <= ValBits)) {
1487     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
1488     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1489                        N1.getOperand(0).getOperand(0),
1490                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
1491                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
1492   }
1493 
1494   // 3rd pattern to match BSTRINS:
1495   //  R = or (and X, mask0), (and Y, mask1)
1496   //  where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
1497   //  =>
1498   //  R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
1499   //  where msb = lsb + size - 1
1500   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
1501       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1502       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
1503       (MaskIdx0 + MaskLen0 <= 64) &&
1504       (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
1505       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
1506     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
1507     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1508                        DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
1509                                    DAG.getConstant(MaskIdx0, DL, GRLenVT)),
1510                        DAG.getConstant(ValBits == 32
1511                                            ? (MaskIdx0 + (MaskLen0 & 31) - 1)
1512                                            : (MaskIdx0 + MaskLen0 - 1),
1513                                        DL, GRLenVT),
1514                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
1515   }
1516 
1517   // 4th pattern to match BSTRINS:
1518   //  R = or (and X, mask), (shl Y, shamt)
1519   //  where mask = (2**shamt - 1)
1520   //  =>
1521   //  R = BSTRINS X, Y, ValBits - 1, shamt
1522   //  where ValBits = 32 or 64
1523   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
1524       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1525       isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
1526       MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1527       (Shamt = CN1->getZExtValue()) == MaskLen0 &&
1528       (MaskIdx0 + MaskLen0 <= ValBits)) {
1529     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
1530     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1531                        N1.getOperand(0),
1532                        DAG.getConstant((ValBits - 1), DL, GRLenVT),
1533                        DAG.getConstant(Shamt, DL, GRLenVT));
1534   }
1535 
1536   // 5th pattern to match BSTRINS:
1537   //  R = or (and X, mask), const
1538   //  where ~mask = (2**size - 1) << lsb, mask & const = 0
1539   //  =>
1540   //  R = BSTRINS X, (const >> lsb), msb, lsb
1541   //  where msb = lsb + size - 1
1542   if (N0.getOpcode() == ISD::AND &&
1543       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1544       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
1545       (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
1546       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
1547     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
1548     return DAG.getNode(
1549         LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1550         DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
1551         DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
1552         DAG.getConstant(MaskIdx0, DL, GRLenVT));
1553   }
1554 
1555   // 6th pattern.
1556   // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
1557   // by the incoming bits are known to be zero.
1558   // =>
1559   // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
1560   //
1561   // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
1562   // pattern is more common than the 1st. So we put the 1st before the 6th in
1563   // order to match as many nodes as possible.
1564   ConstantSDNode *CNMask, *CNShamt;
1565   unsigned MaskIdx, MaskLen;
1566   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
1567       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
1568       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
1569       MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1570       CNShamt->getZExtValue() + MaskLen <= ValBits) {
1571     Shamt = CNShamt->getZExtValue();
1572     APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
1573     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
1574       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
1575       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
1576                          N1.getOperand(0).getOperand(0),
1577                          DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
1578                          DAG.getConstant(Shamt, DL, GRLenVT));
1579     }
1580   }
1581 
1582   // 7th pattern.
1583   // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
1584   // overwritten by the incoming bits are known to be zero.
1585   // =>
1586   // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
1587   //
1588   // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
1589   // before the 7th in order to match as many nodes as possible.
1590   if (N1.getOpcode() == ISD::AND &&
1591       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1592       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
1593       N1.getOperand(0).getOpcode() == ISD::SHL &&
1594       (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
1595       CNShamt->getZExtValue() == MaskIdx) {
1596     APInt ShMask(ValBits, CNMask->getZExtValue());
1597     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
1598       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
1599       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
1600                          N1.getOperand(0).getOperand(0),
1601                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
1602                          DAG.getConstant(MaskIdx, DL, GRLenVT));
1603     }
1604   }
1605 
1606   // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
1607   if (!SwapAndRetried) {
1608     std::swap(N0, N1);
1609     SwapAndRetried = true;
1610     goto Retry;
1611   }
1612 
1613   SwapAndRetried = false;
1614 Retry2:
1615   // 8th pattern.
1616   // a = b | (c & shifted_mask), where all positions in b to be overwritten by
1617   // the incoming bits are known to be zero.
1618   // =>
1619   // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
1620   //
1621   // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
1622   // we put it here in order to match as many nodes as possible or generate less
1623   // instructions.
1624   if (N1.getOpcode() == ISD::AND &&
1625       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1626       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
1627     APInt ShMask(ValBits, CNMask->getZExtValue());
1628     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
1629       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
1630       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
1631                          DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
1632                                      N1->getOperand(0),
1633                                      DAG.getConstant(MaskIdx, DL, GRLenVT)),
1634                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
1635                          DAG.getConstant(MaskIdx, DL, GRLenVT));
1636     }
1637   }
1638   // Swap N0/N1 and retry.
1639   if (!SwapAndRetried) {
1640     std::swap(N0, N1);
1641     SwapAndRetried = true;
1642     goto Retry2;
1643   }
1644 
1645   return SDValue();
1646 }
1647 
1648 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
1649 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
1650                                       TargetLowering::DAGCombinerInfo &DCI,
1651                                       const LoongArchSubtarget &Subtarget) {
1652   if (DCI.isBeforeLegalizeOps())
1653     return SDValue();
1654 
1655   SDValue Src = N->getOperand(0);
1656   if (Src.getOpcode() != LoongArchISD::REVB_2W)
1657     return SDValue();
1658 
1659   return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
1660                      Src.getOperand(0));
1661 }
1662 
1663 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
1664                                                    DAGCombinerInfo &DCI) const {
1665   SelectionDAG &DAG = DCI.DAG;
1666   switch (N->getOpcode()) {
1667   default:
1668     break;
1669   case ISD::AND:
1670     return performANDCombine(N, DAG, DCI, Subtarget);
1671   case ISD::OR:
1672     return performORCombine(N, DAG, DCI, Subtarget);
1673   case ISD::SRL:
1674     return performSRLCombine(N, DAG, DCI, Subtarget);
1675   case LoongArchISD::BITREV_W:
1676     return performBITREV_WCombine(N, DAG, DCI, Subtarget);
1677   }
1678   return SDValue();
1679 }
1680 
1681 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
1682                                               MachineBasicBlock *MBB) {
1683   if (!ZeroDivCheck)
1684     return MBB;
1685 
1686   // Build instructions:
1687   // MBB:
1688   //   div(or mod)   $dst, $dividend, $divisor
1689   //   bnez          $divisor, SinkMBB
1690   // BreakMBB:
1691   //   break         7 // BRK_DIVZERO
1692   // SinkMBB:
1693   //   fallthrough
1694   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1695   MachineFunction::iterator It = ++MBB->getIterator();
1696   MachineFunction *MF = MBB->getParent();
1697   auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
1698   auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
1699   MF->insert(It, BreakMBB);
1700   MF->insert(It, SinkMBB);
1701 
1702   // Transfer the remainder of MBB and its successor edges to SinkMBB.
1703   SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
1704   SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
1705 
1706   const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
1707   DebugLoc DL = MI.getDebugLoc();
1708   MachineOperand &Divisor = MI.getOperand(2);
1709   Register DivisorReg = Divisor.getReg();
1710 
1711   // MBB:
1712   BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
1713       .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
1714       .addMBB(SinkMBB);
1715   MBB->addSuccessor(BreakMBB);
1716   MBB->addSuccessor(SinkMBB);
1717 
1718   // BreakMBB:
1719   // See linux header file arch/loongarch/include/uapi/asm/break.h for the
1720   // definition of BRK_DIVZERO.
1721   BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
1722   BreakMBB->addSuccessor(SinkMBB);
1723 
1724   // Clear Divisor's kill flag.
1725   Divisor.setIsKill(false);
1726 
1727   return SinkMBB;
1728 }
1729 
1730 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
1731     MachineInstr &MI, MachineBasicBlock *BB) const {
1732   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1733   DebugLoc DL = MI.getDebugLoc();
1734 
1735   switch (MI.getOpcode()) {
1736   default:
1737     llvm_unreachable("Unexpected instr type to insert");
1738   case LoongArch::DIV_W:
1739   case LoongArch::DIV_WU:
1740   case LoongArch::MOD_W:
1741   case LoongArch::MOD_WU:
1742   case LoongArch::DIV_D:
1743   case LoongArch::DIV_DU:
1744   case LoongArch::MOD_D:
1745   case LoongArch::MOD_DU:
1746     return insertDivByZeroTrap(MI, BB);
1747     break;
1748   case LoongArch::WRFCSR: {
1749     BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
1750             LoongArch::FCSR0 + MI.getOperand(0).getImm())
1751         .addReg(MI.getOperand(1).getReg());
1752     MI.eraseFromParent();
1753     return BB;
1754   }
1755   case LoongArch::RDFCSR: {
1756     MachineInstr *ReadFCSR =
1757         BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
1758                 MI.getOperand(0).getReg())
1759             .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
1760     ReadFCSR->getOperand(1).setIsUndef();
1761     MI.eraseFromParent();
1762     return BB;
1763   }
1764   }
1765 }
1766 
1767 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
1768   switch ((LoongArchISD::NodeType)Opcode) {
1769   case LoongArchISD::FIRST_NUMBER:
1770     break;
1771 
1772 #define NODE_NAME_CASE(node)                                                   \
1773   case LoongArchISD::node:                                                     \
1774     return "LoongArchISD::" #node;
1775 
1776     // TODO: Add more target-dependent nodes later.
1777     NODE_NAME_CASE(CALL)
1778     NODE_NAME_CASE(RET)
1779     NODE_NAME_CASE(TAIL)
1780     NODE_NAME_CASE(SLL_W)
1781     NODE_NAME_CASE(SRA_W)
1782     NODE_NAME_CASE(SRL_W)
1783     NODE_NAME_CASE(BSTRINS)
1784     NODE_NAME_CASE(BSTRPICK)
1785     NODE_NAME_CASE(MOVGR2FR_W_LA64)
1786     NODE_NAME_CASE(MOVFR2GR_S_LA64)
1787     NODE_NAME_CASE(FTINT)
1788     NODE_NAME_CASE(REVB_2H)
1789     NODE_NAME_CASE(REVB_2W)
1790     NODE_NAME_CASE(BITREV_4B)
1791     NODE_NAME_CASE(BITREV_W)
1792     NODE_NAME_CASE(ROTR_W)
1793     NODE_NAME_CASE(ROTL_W)
1794     NODE_NAME_CASE(CLZ_W)
1795     NODE_NAME_CASE(CTZ_W)
1796     NODE_NAME_CASE(DBAR)
1797     NODE_NAME_CASE(IBAR)
1798     NODE_NAME_CASE(BREAK)
1799     NODE_NAME_CASE(SYSCALL)
1800     NODE_NAME_CASE(CRC_W_B_W)
1801     NODE_NAME_CASE(CRC_W_H_W)
1802     NODE_NAME_CASE(CRC_W_W_W)
1803     NODE_NAME_CASE(CRC_W_D_W)
1804     NODE_NAME_CASE(CRCC_W_B_W)
1805     NODE_NAME_CASE(CRCC_W_H_W)
1806     NODE_NAME_CASE(CRCC_W_W_W)
1807     NODE_NAME_CASE(CRCC_W_D_W)
1808     NODE_NAME_CASE(CSRRD)
1809     NODE_NAME_CASE(CSRWR)
1810     NODE_NAME_CASE(CSRXCHG)
1811     NODE_NAME_CASE(IOCSRRD_B)
1812     NODE_NAME_CASE(IOCSRRD_H)
1813     NODE_NAME_CASE(IOCSRRD_W)
1814     NODE_NAME_CASE(IOCSRRD_D)
1815     NODE_NAME_CASE(IOCSRWR_B)
1816     NODE_NAME_CASE(IOCSRWR_H)
1817     NODE_NAME_CASE(IOCSRWR_W)
1818     NODE_NAME_CASE(IOCSRWR_D)
1819     NODE_NAME_CASE(CPUCFG)
1820     NODE_NAME_CASE(MOVGR2FCSR)
1821     NODE_NAME_CASE(MOVFCSR2GR)
1822     NODE_NAME_CASE(CACOP_D)
1823     NODE_NAME_CASE(CACOP_W)
1824   }
1825 #undef NODE_NAME_CASE
1826   return nullptr;
1827 }
1828 
1829 //===----------------------------------------------------------------------===//
1830 //                     Calling Convention Implementation
1831 //===----------------------------------------------------------------------===//
1832 
1833 // Eight general-purpose registers a0-a7 used for passing integer arguments,
1834 // with a0-a1 reused to return values. Generally, the GPRs are used to pass
1835 // fixed-point arguments, and floating-point arguments when no FPR is available
1836 // or with soft float ABI.
1837 const MCPhysReg ArgGPRs[] = {LoongArch::R4,  LoongArch::R5, LoongArch::R6,
1838                              LoongArch::R7,  LoongArch::R8, LoongArch::R9,
1839                              LoongArch::R10, LoongArch::R11};
1840 // Eight floating-point registers fa0-fa7 used for passing floating-point
1841 // arguments, and fa0-fa1 are also used to return values.
1842 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
1843                                LoongArch::F3, LoongArch::F4, LoongArch::F5,
1844                                LoongArch::F6, LoongArch::F7};
1845 // FPR32 and FPR64 alias each other.
1846 const MCPhysReg ArgFPR64s[] = {
1847     LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
1848     LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
1849 
1850 // Pass a 2*GRLen argument that has been split into two GRLen values through
1851 // registers or the stack as necessary.
1852 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
1853                                      CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
1854                                      unsigned ValNo2, MVT ValVT2, MVT LocVT2,
1855                                      ISD::ArgFlagsTy ArgFlags2) {
1856   unsigned GRLenInBytes = GRLen / 8;
1857   if (Register Reg = State.AllocateReg(ArgGPRs)) {
1858     // At least one half can be passed via register.
1859     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
1860                                      VA1.getLocVT(), CCValAssign::Full));
1861   } else {
1862     // Both halves must be passed on the stack, with proper alignment.
1863     Align StackAlign =
1864         std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
1865     State.addLoc(
1866         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
1867                             State.AllocateStack(GRLenInBytes, StackAlign),
1868                             VA1.getLocVT(), CCValAssign::Full));
1869     State.addLoc(CCValAssign::getMem(
1870         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
1871         LocVT2, CCValAssign::Full));
1872     return false;
1873   }
1874   if (Register Reg = State.AllocateReg(ArgGPRs)) {
1875     // The second half can also be passed via register.
1876     State.addLoc(
1877         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
1878   } else {
1879     // The second half is passed via the stack, without additional alignment.
1880     State.addLoc(CCValAssign::getMem(
1881         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
1882         LocVT2, CCValAssign::Full));
1883   }
1884   return false;
1885 }
1886 
1887 // Implements the LoongArch calling convention. Returns true upon failure.
1888 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
1889                          unsigned ValNo, MVT ValVT,
1890                          CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
1891                          CCState &State, bool IsFixed, bool IsRet,
1892                          Type *OrigTy) {
1893   unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
1894   assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
1895   MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
1896   MVT LocVT = ValVT;
1897 
1898   // Any return value split into more than two values can't be returned
1899   // directly.
1900   if (IsRet && ValNo > 1)
1901     return true;
1902 
1903   // If passing a variadic argument, or if no FPR is available.
1904   bool UseGPRForFloat = true;
1905 
1906   switch (ABI) {
1907   default:
1908     llvm_unreachable("Unexpected ABI");
1909   case LoongArchABI::ABI_ILP32S:
1910   case LoongArchABI::ABI_LP64S:
1911   case LoongArchABI::ABI_ILP32F:
1912   case LoongArchABI::ABI_LP64F:
1913     report_fatal_error("Unimplemented ABI");
1914     break;
1915   case LoongArchABI::ABI_ILP32D:
1916   case LoongArchABI::ABI_LP64D:
1917     UseGPRForFloat = !IsFixed;
1918     break;
1919   }
1920 
1921   // FPR32 and FPR64 alias each other.
1922   if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
1923     UseGPRForFloat = true;
1924 
1925   if (UseGPRForFloat && ValVT == MVT::f32) {
1926     LocVT = GRLenVT;
1927     LocInfo = CCValAssign::BCvt;
1928   } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
1929     LocVT = MVT::i64;
1930     LocInfo = CCValAssign::BCvt;
1931   } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
1932     // TODO: Handle passing f64 on LA32 with D feature.
1933     report_fatal_error("Passing f64 with GPR on LA32 is undefined");
1934   }
1935 
1936   // If this is a variadic argument, the LoongArch calling convention requires
1937   // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
1938   // byte alignment. An aligned register should be used regardless of whether
1939   // the original argument was split during legalisation or not. The argument
1940   // will not be passed by registers if the original type is larger than
1941   // 2*GRLen, so the register alignment rule does not apply.
1942   unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
1943   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
1944       DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
1945     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
1946     // Skip 'odd' register if necessary.
1947     if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
1948       State.AllocateReg(ArgGPRs);
1949   }
1950 
1951   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
1952   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
1953       State.getPendingArgFlags();
1954 
1955   assert(PendingLocs.size() == PendingArgFlags.size() &&
1956          "PendingLocs and PendingArgFlags out of sync");
1957 
1958   // Split arguments might be passed indirectly, so keep track of the pending
1959   // values.
1960   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
1961     LocVT = GRLenVT;
1962     LocInfo = CCValAssign::Indirect;
1963     PendingLocs.push_back(
1964         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
1965     PendingArgFlags.push_back(ArgFlags);
1966     if (!ArgFlags.isSplitEnd()) {
1967       return false;
1968     }
1969   }
1970 
1971   // If the split argument only had two elements, it should be passed directly
1972   // in registers or on the stack.
1973   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
1974       PendingLocs.size() <= 2) {
1975     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
1976     // Apply the normal calling convention rules to the first half of the
1977     // split argument.
1978     CCValAssign VA = PendingLocs[0];
1979     ISD::ArgFlagsTy AF = PendingArgFlags[0];
1980     PendingLocs.clear();
1981     PendingArgFlags.clear();
1982     return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
1983                                     ArgFlags);
1984   }
1985 
1986   // Allocate to a register if possible, or else a stack slot.
1987   Register Reg;
1988   unsigned StoreSizeBytes = GRLen / 8;
1989   Align StackAlign = Align(GRLen / 8);
1990 
1991   if (ValVT == MVT::f32 && !UseGPRForFloat)
1992     Reg = State.AllocateReg(ArgFPR32s);
1993   else if (ValVT == MVT::f64 && !UseGPRForFloat)
1994     Reg = State.AllocateReg(ArgFPR64s);
1995   else
1996     Reg = State.AllocateReg(ArgGPRs);
1997 
1998   unsigned StackOffset =
1999       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
2000 
2001   // If we reach this point and PendingLocs is non-empty, we must be at the
2002   // end of a split argument that must be passed indirectly.
2003   if (!PendingLocs.empty()) {
2004     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
2005     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
2006     for (auto &It : PendingLocs) {
2007       if (Reg)
2008         It.convertToReg(Reg);
2009       else
2010         It.convertToMem(StackOffset);
2011       State.addLoc(It);
2012     }
2013     PendingLocs.clear();
2014     PendingArgFlags.clear();
2015     return false;
2016   }
2017   assert((!UseGPRForFloat || LocVT == GRLenVT) &&
2018          "Expected an GRLenVT at this stage");
2019 
2020   if (Reg) {
2021     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2022     return false;
2023   }
2024 
2025   // When a floating-point value is passed on the stack, no bit-cast is needed.
2026   if (ValVT.isFloatingPoint()) {
2027     LocVT = ValVT;
2028     LocInfo = CCValAssign::Full;
2029   }
2030 
2031   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
2032   return false;
2033 }
2034 
2035 void LoongArchTargetLowering::analyzeInputArgs(
2036     MachineFunction &MF, CCState &CCInfo,
2037     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
2038     LoongArchCCAssignFn Fn) const {
2039   FunctionType *FType = MF.getFunction().getFunctionType();
2040   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
2041     MVT ArgVT = Ins[i].VT;
2042     Type *ArgTy = nullptr;
2043     if (IsRet)
2044       ArgTy = FType->getReturnType();
2045     else if (Ins[i].isOrigArg())
2046       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
2047     LoongArchABI::ABI ABI =
2048         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
2049     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
2050            CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
2051       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
2052                         << EVT(ArgVT).getEVTString() << '\n');
2053       llvm_unreachable("");
2054     }
2055   }
2056 }
2057 
2058 void LoongArchTargetLowering::analyzeOutputArgs(
2059     MachineFunction &MF, CCState &CCInfo,
2060     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
2061     CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
2062   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2063     MVT ArgVT = Outs[i].VT;
2064     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
2065     LoongArchABI::ABI ABI =
2066         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
2067     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
2068            CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
2069       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
2070                         << EVT(ArgVT).getEVTString() << "\n");
2071       llvm_unreachable("");
2072     }
2073   }
2074 }
2075 
2076 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
2077 // values.
2078 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
2079                                    const CCValAssign &VA, const SDLoc &DL) {
2080   switch (VA.getLocInfo()) {
2081   default:
2082     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2083   case CCValAssign::Full:
2084   case CCValAssign::Indirect:
2085     break;
2086   case CCValAssign::BCvt:
2087     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
2088       Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
2089     else
2090       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
2091     break;
2092   }
2093   return Val;
2094 }
2095 
2096 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
2097                                 const CCValAssign &VA, const SDLoc &DL,
2098                                 const LoongArchTargetLowering &TLI) {
2099   MachineFunction &MF = DAG.getMachineFunction();
2100   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2101   EVT LocVT = VA.getLocVT();
2102   SDValue Val;
2103   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
2104   Register VReg = RegInfo.createVirtualRegister(RC);
2105   RegInfo.addLiveIn(VA.getLocReg(), VReg);
2106   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2107 
2108   return convertLocVTToValVT(DAG, Val, VA, DL);
2109 }
2110 
2111 // The caller is responsible for loading the full value if the argument is
2112 // passed with CCValAssign::Indirect.
2113 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
2114                                 const CCValAssign &VA, const SDLoc &DL) {
2115   MachineFunction &MF = DAG.getMachineFunction();
2116   MachineFrameInfo &MFI = MF.getFrameInfo();
2117   EVT ValVT = VA.getValVT();
2118   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
2119                                  /*IsImmutable=*/true);
2120   SDValue FIN = DAG.getFrameIndex(
2121       FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
2122 
2123   ISD::LoadExtType ExtType;
2124   switch (VA.getLocInfo()) {
2125   default:
2126     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2127   case CCValAssign::Full:
2128   case CCValAssign::Indirect:
2129   case CCValAssign::BCvt:
2130     ExtType = ISD::NON_EXTLOAD;
2131     break;
2132   }
2133   return DAG.getExtLoad(
2134       ExtType, DL, VA.getLocVT(), Chain, FIN,
2135       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
2136 }
2137 
2138 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
2139                                    const CCValAssign &VA, const SDLoc &DL) {
2140   EVT LocVT = VA.getLocVT();
2141 
2142   switch (VA.getLocInfo()) {
2143   default:
2144     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2145   case CCValAssign::Full:
2146     break;
2147   case CCValAssign::BCvt:
2148     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
2149       Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
2150     else
2151       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
2152     break;
2153   }
2154   return Val;
2155 }
2156 
2157 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
2158                             CCValAssign::LocInfo LocInfo,
2159                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
2160   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
2161     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
2162     //                        s0    s1  s2  s3  s4  s5  s6  s7  s8
2163     static const MCPhysReg GPRList[] = {
2164         LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26, LoongArch::R27,
2165         LoongArch::R28, LoongArch::R29, LoongArch::R30, LoongArch::R31};
2166     if (unsigned Reg = State.AllocateReg(GPRList)) {
2167       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2168       return false;
2169     }
2170   }
2171 
2172   if (LocVT == MVT::f32) {
2173     // Pass in STG registers: F1, F2, F3, F4
2174     //                        fs0,fs1,fs2,fs3
2175     static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
2176                                           LoongArch::F26, LoongArch::F27};
2177     if (unsigned Reg = State.AllocateReg(FPR32List)) {
2178       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2179       return false;
2180     }
2181   }
2182 
2183   if (LocVT == MVT::f64) {
2184     // Pass in STG registers: D1, D2, D3, D4
2185     //                        fs4,fs5,fs6,fs7
2186     static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
2187                                           LoongArch::F30_64, LoongArch::F31_64};
2188     if (unsigned Reg = State.AllocateReg(FPR64List)) {
2189       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2190       return false;
2191     }
2192   }
2193 
2194   report_fatal_error("No registers left in GHC calling convention");
2195   return true;
2196 }
2197 
2198 // Transform physical registers into virtual registers.
2199 SDValue LoongArchTargetLowering::LowerFormalArguments(
2200     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2201     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2202     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2203 
2204   MachineFunction &MF = DAG.getMachineFunction();
2205 
2206   switch (CallConv) {
2207   default:
2208     llvm_unreachable("Unsupported calling convention");
2209   case CallingConv::C:
2210   case CallingConv::Fast:
2211     break;
2212   case CallingConv::GHC:
2213     if (!MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicF] ||
2214         !MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicD])
2215       report_fatal_error(
2216         "GHC calling convention requires the F and D extensions");
2217   }
2218 
2219   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2220   MVT GRLenVT = Subtarget.getGRLenVT();
2221   unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
2222   // Used with varargs to acumulate store chains.
2223   std::vector<SDValue> OutChains;
2224 
2225   // Assign locations to all of the incoming arguments.
2226   SmallVector<CCValAssign> ArgLocs;
2227   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2228 
2229   if (CallConv == CallingConv::GHC)
2230     CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
2231   else
2232     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
2233 
2234   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2235     CCValAssign &VA = ArgLocs[i];
2236     SDValue ArgValue;
2237     if (VA.isRegLoc())
2238       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
2239     else
2240       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
2241     if (VA.getLocInfo() == CCValAssign::Indirect) {
2242       // If the original argument was split and passed by reference, we need to
2243       // load all parts of it here (using the same address).
2244       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2245                                    MachinePointerInfo()));
2246       unsigned ArgIndex = Ins[i].OrigArgIndex;
2247       unsigned ArgPartOffset = Ins[i].PartOffset;
2248       assert(ArgPartOffset == 0);
2249       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
2250         CCValAssign &PartVA = ArgLocs[i + 1];
2251         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
2252         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
2253         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
2254         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2255                                      MachinePointerInfo()));
2256         ++i;
2257       }
2258       continue;
2259     }
2260     InVals.push_back(ArgValue);
2261   }
2262 
2263   if (IsVarArg) {
2264     ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
2265     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
2266     const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
2267     MachineFrameInfo &MFI = MF.getFrameInfo();
2268     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2269     auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
2270 
2271     // Offset of the first variable argument from stack pointer, and size of
2272     // the vararg save area. For now, the varargs save area is either zero or
2273     // large enough to hold a0-a7.
2274     int VaArgOffset, VarArgsSaveSize;
2275 
2276     // If all registers are allocated, then all varargs must be passed on the
2277     // stack and we don't need to save any argregs.
2278     if (ArgRegs.size() == Idx) {
2279       VaArgOffset = CCInfo.getNextStackOffset();
2280       VarArgsSaveSize = 0;
2281     } else {
2282       VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
2283       VaArgOffset = -VarArgsSaveSize;
2284     }
2285 
2286     // Record the frame index of the first variable argument
2287     // which is a value necessary to VASTART.
2288     int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
2289     LoongArchFI->setVarArgsFrameIndex(FI);
2290 
2291     // If saving an odd number of registers then create an extra stack slot to
2292     // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
2293     // offsets to even-numbered registered remain 2*GRLen-aligned.
2294     if (Idx % 2) {
2295       MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
2296                             true);
2297       VarArgsSaveSize += GRLenInBytes;
2298     }
2299 
2300     // Copy the integer registers that may have been used for passing varargs
2301     // to the vararg save area.
2302     for (unsigned I = Idx; I < ArgRegs.size();
2303          ++I, VaArgOffset += GRLenInBytes) {
2304       const Register Reg = RegInfo.createVirtualRegister(RC);
2305       RegInfo.addLiveIn(ArgRegs[I], Reg);
2306       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
2307       FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
2308       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2309       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
2310                                    MachinePointerInfo::getFixedStack(MF, FI));
2311       cast<StoreSDNode>(Store.getNode())
2312           ->getMemOperand()
2313           ->setValue((Value *)nullptr);
2314       OutChains.push_back(Store);
2315     }
2316     LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
2317   }
2318 
2319   // All stores are grouped in one node to allow the matching between
2320   // the size of Ins and InVals. This only happens for vararg functions.
2321   if (!OutChains.empty()) {
2322     OutChains.push_back(Chain);
2323     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
2324   }
2325 
2326   return Chain;
2327 }
2328 
2329 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2330   return CI->isTailCall();
2331 }
2332 
2333 // Check whether the call is eligible for tail call optimization.
2334 bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
2335     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
2336     const SmallVectorImpl<CCValAssign> &ArgLocs) const {
2337 
2338   auto CalleeCC = CLI.CallConv;
2339   auto &Outs = CLI.Outs;
2340   auto &Caller = MF.getFunction();
2341   auto CallerCC = Caller.getCallingConv();
2342 
2343   // Do not tail call opt if the stack is used to pass parameters.
2344   if (CCInfo.getNextStackOffset() != 0)
2345     return false;
2346 
2347   // Do not tail call opt if any parameters need to be passed indirectly.
2348   for (auto &VA : ArgLocs)
2349     if (VA.getLocInfo() == CCValAssign::Indirect)
2350       return false;
2351 
2352   // Do not tail call opt if either caller or callee uses struct return
2353   // semantics.
2354   auto IsCallerStructRet = Caller.hasStructRetAttr();
2355   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
2356   if (IsCallerStructRet || IsCalleeStructRet)
2357     return false;
2358 
2359   // Do not tail call opt if either the callee or caller has a byval argument.
2360   for (auto &Arg : Outs)
2361     if (Arg.Flags.isByVal())
2362       return false;
2363 
2364   // The callee has to preserve all registers the caller needs to preserve.
2365   const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
2366   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2367   if (CalleeCC != CallerCC) {
2368     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2369     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2370       return false;
2371   }
2372   return true;
2373 }
2374 
2375 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
2376   return DAG.getDataLayout().getPrefTypeAlign(
2377       VT.getTypeForEVT(*DAG.getContext()));
2378 }
2379 
2380 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
2381 // and output parameter nodes.
2382 SDValue
2383 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
2384                                    SmallVectorImpl<SDValue> &InVals) const {
2385   SelectionDAG &DAG = CLI.DAG;
2386   SDLoc &DL = CLI.DL;
2387   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2388   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2389   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2390   SDValue Chain = CLI.Chain;
2391   SDValue Callee = CLI.Callee;
2392   CallingConv::ID CallConv = CLI.CallConv;
2393   bool IsVarArg = CLI.IsVarArg;
2394   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2395   MVT GRLenVT = Subtarget.getGRLenVT();
2396   bool &IsTailCall = CLI.IsTailCall;
2397 
2398   MachineFunction &MF = DAG.getMachineFunction();
2399 
2400   // Analyze the operands of the call, assigning locations to each operand.
2401   SmallVector<CCValAssign> ArgLocs;
2402   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2403 
2404   if (CallConv == CallingConv::GHC)
2405     ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
2406   else
2407     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
2408 
2409   // Check if it's really possible to do a tail call.
2410   if (IsTailCall)
2411     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
2412 
2413   if (IsTailCall)
2414     ++NumTailCalls;
2415   else if (CLI.CB && CLI.CB->isMustTailCall())
2416     report_fatal_error("failed to perform tail call elimination on a call "
2417                        "site marked musttail");
2418 
2419   // Get a count of how many bytes are to be pushed on the stack.
2420   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
2421 
2422   // Create local copies for byval args.
2423   SmallVector<SDValue> ByValArgs;
2424   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2425     ISD::ArgFlagsTy Flags = Outs[i].Flags;
2426     if (!Flags.isByVal())
2427       continue;
2428 
2429     SDValue Arg = OutVals[i];
2430     unsigned Size = Flags.getByValSize();
2431     Align Alignment = Flags.getNonZeroByValAlign();
2432 
2433     int FI =
2434         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
2435     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2436     SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
2437 
2438     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
2439                           /*IsVolatile=*/false,
2440                           /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
2441                           MachinePointerInfo(), MachinePointerInfo());
2442     ByValArgs.push_back(FIPtr);
2443   }
2444 
2445   if (!IsTailCall)
2446     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
2447 
2448   // Copy argument values to their designated locations.
2449   SmallVector<std::pair<Register, SDValue>> RegsToPass;
2450   SmallVector<SDValue> MemOpChains;
2451   SDValue StackPtr;
2452   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
2453     CCValAssign &VA = ArgLocs[i];
2454     SDValue ArgValue = OutVals[i];
2455     ISD::ArgFlagsTy Flags = Outs[i].Flags;
2456 
2457     // Promote the value if needed.
2458     // For now, only handle fully promoted and indirect arguments.
2459     if (VA.getLocInfo() == CCValAssign::Indirect) {
2460       // Store the argument in a stack slot and pass its address.
2461       Align StackAlign =
2462           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
2463                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
2464       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
2465       // If the original argument was split and passed by reference, we need to
2466       // store the required parts of it here (and pass just one address).
2467       unsigned ArgIndex = Outs[i].OrigArgIndex;
2468       unsigned ArgPartOffset = Outs[i].PartOffset;
2469       assert(ArgPartOffset == 0);
2470       // Calculate the total size to store. We don't have access to what we're
2471       // actually storing other than performing the loop and collecting the
2472       // info.
2473       SmallVector<std::pair<SDValue, SDValue>> Parts;
2474       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
2475         SDValue PartValue = OutVals[i + 1];
2476         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
2477         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
2478         EVT PartVT = PartValue.getValueType();
2479 
2480         StoredSize += PartVT.getStoreSize();
2481         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
2482         Parts.push_back(std::make_pair(PartValue, Offset));
2483         ++i;
2484       }
2485       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
2486       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2487       MemOpChains.push_back(
2488           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2489                        MachinePointerInfo::getFixedStack(MF, FI)));
2490       for (const auto &Part : Parts) {
2491         SDValue PartValue = Part.first;
2492         SDValue PartOffset = Part.second;
2493         SDValue Address =
2494             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
2495         MemOpChains.push_back(
2496             DAG.getStore(Chain, DL, PartValue, Address,
2497                          MachinePointerInfo::getFixedStack(MF, FI)));
2498       }
2499       ArgValue = SpillSlot;
2500     } else {
2501       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
2502     }
2503 
2504     // Use local copy if it is a byval arg.
2505     if (Flags.isByVal())
2506       ArgValue = ByValArgs[j++];
2507 
2508     if (VA.isRegLoc()) {
2509       // Queue up the argument copies and emit them at the end.
2510       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2511     } else {
2512       assert(VA.isMemLoc() && "Argument not register or memory");
2513       assert(!IsTailCall && "Tail call not allowed if stack is used "
2514                             "for passing parameters");
2515 
2516       // Work out the address of the stack slot.
2517       if (!StackPtr.getNode())
2518         StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
2519       SDValue Address =
2520           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2521                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
2522 
2523       // Emit the store.
2524       MemOpChains.push_back(
2525           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2526     }
2527   }
2528 
2529   // Join the stores, which are independent of one another.
2530   if (!MemOpChains.empty())
2531     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2532 
2533   SDValue Glue;
2534 
2535   // Build a sequence of copy-to-reg nodes, chained and glued together.
2536   for (auto &Reg : RegsToPass) {
2537     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
2538     Glue = Chain.getValue(1);
2539   }
2540 
2541   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
2542   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
2543   // split it and then direct call can be matched by PseudoCALL.
2544   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
2545     const GlobalValue *GV = S->getGlobal();
2546     unsigned OpFlags =
2547         getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)
2548             ? LoongArchII::MO_CALL
2549             : LoongArchII::MO_CALL_PLT;
2550     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
2551   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2552     unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(
2553                            *MF.getFunction().getParent(), nullptr)
2554                            ? LoongArchII::MO_CALL
2555                            : LoongArchII::MO_CALL_PLT;
2556     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
2557   }
2558 
2559   // The first call operand is the chain and the second is the target address.
2560   SmallVector<SDValue> Ops;
2561   Ops.push_back(Chain);
2562   Ops.push_back(Callee);
2563 
2564   // Add argument registers to the end of the list so that they are
2565   // known live into the call.
2566   for (auto &Reg : RegsToPass)
2567     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2568 
2569   if (!IsTailCall) {
2570     // Add a register mask operand representing the call-preserved registers.
2571     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2572     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2573     assert(Mask && "Missing call preserved mask for calling convention");
2574     Ops.push_back(DAG.getRegisterMask(Mask));
2575   }
2576 
2577   // Glue the call to the argument copies, if any.
2578   if (Glue.getNode())
2579     Ops.push_back(Glue);
2580 
2581   // Emit the call.
2582   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2583 
2584   if (IsTailCall) {
2585     MF.getFrameInfo().setHasTailCall();
2586     return DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops);
2587   }
2588 
2589   Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops);
2590   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2591   Glue = Chain.getValue(1);
2592 
2593   // Mark the end of the call, which is glued to the call itself.
2594   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2595   Glue = Chain.getValue(1);
2596 
2597   // Assign locations to each value returned by this call.
2598   SmallVector<CCValAssign> RVLocs;
2599   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
2600   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
2601 
2602   // Copy all of the result registers out of their specified physreg.
2603   for (auto &VA : RVLocs) {
2604     // Copy the value out.
2605     SDValue RetValue =
2606         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
2607     // Glue the RetValue to the end of the call sequence.
2608     Chain = RetValue.getValue(1);
2609     Glue = RetValue.getValue(2);
2610 
2611     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
2612 
2613     InVals.push_back(RetValue);
2614   }
2615 
2616   return Chain;
2617 }
2618 
2619 bool LoongArchTargetLowering::CanLowerReturn(
2620     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2621     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2622   SmallVector<CCValAssign> RVLocs;
2623   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
2624 
2625   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2626     LoongArchABI::ABI ABI =
2627         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
2628     if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
2629                      Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
2630                      nullptr))
2631       return false;
2632   }
2633   return true;
2634 }
2635 
2636 SDValue LoongArchTargetLowering::LowerReturn(
2637     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2638     const SmallVectorImpl<ISD::OutputArg> &Outs,
2639     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
2640     SelectionDAG &DAG) const {
2641   // Stores the assignment of the return value to a location.
2642   SmallVector<CCValAssign> RVLocs;
2643 
2644   // Info about the registers and stack slot.
2645   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
2646                  *DAG.getContext());
2647 
2648   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
2649                     nullptr, CC_LoongArch);
2650   if (CallConv == CallingConv::GHC && !RVLocs.empty())
2651     report_fatal_error("GHC functions return void only");
2652   SDValue Glue;
2653   SmallVector<SDValue, 4> RetOps(1, Chain);
2654 
2655   // Copy the result values into the output registers.
2656   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
2657     CCValAssign &VA = RVLocs[i];
2658     assert(VA.isRegLoc() && "Can only return in registers!");
2659 
2660     // Handle a 'normal' return.
2661     SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
2662     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
2663 
2664     // Guarantee that all emitted copies are stuck together.
2665     Glue = Chain.getValue(1);
2666     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2667   }
2668 
2669   RetOps[0] = Chain; // Update chain.
2670 
2671   // Add the glue node if we have it.
2672   if (Glue.getNode())
2673     RetOps.push_back(Glue);
2674 
2675   return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
2676 }
2677 
2678 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
2679                                            bool ForCodeSize) const {
2680   // TODO: Maybe need more checks here after vector extension is supported.
2681   if (VT == MVT::f32 && !Subtarget.hasBasicF())
2682     return false;
2683   if (VT == MVT::f64 && !Subtarget.hasBasicD())
2684     return false;
2685   return (Imm.isZero() || Imm.isExactlyValue(+1.0));
2686 }
2687 
2688 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
2689   return true;
2690 }
2691 
2692 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
2693   return true;
2694 }
2695 
2696 bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
2697     const Instruction *I) const {
2698   if (!Subtarget.is64Bit())
2699     return isa<LoadInst>(I) || isa<StoreInst>(I);
2700 
2701   if (isa<LoadInst>(I))
2702     return true;
2703 
2704   // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
2705   // require fences beacuse we can use amswap_db.[w/d].
2706   if (isa<StoreInst>(I)) {
2707     unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
2708     return (Size == 8 || Size == 16);
2709   }
2710 
2711   return false;
2712 }
2713 
2714 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
2715                                                 LLVMContext &Context,
2716                                                 EVT VT) const {
2717   if (!VT.isVector())
2718     return getPointerTy(DL);
2719   return VT.changeVectorElementTypeToInteger();
2720 }
2721 
2722 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
2723   // TODO: Support vectors.
2724   return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
2725 }
2726 
2727 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
2728                                                  const CallInst &I,
2729                                                  MachineFunction &MF,
2730                                                  unsigned Intrinsic) const {
2731   switch (Intrinsic) {
2732   default:
2733     return false;
2734   case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
2735   case Intrinsic::loongarch_masked_atomicrmw_add_i32:
2736   case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
2737   case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
2738     Info.opc = ISD::INTRINSIC_W_CHAIN;
2739     Info.memVT = MVT::i32;
2740     Info.ptrVal = I.getArgOperand(0);
2741     Info.offset = 0;
2742     Info.align = Align(4);
2743     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
2744                  MachineMemOperand::MOVolatile;
2745     return true;
2746     // TODO: Add more Intrinsics later.
2747   }
2748 }
2749 
2750 TargetLowering::AtomicExpansionKind
2751 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2752   // TODO: Add more AtomicRMWInst that needs to be extended.
2753 
2754   // Since floating-point operation requires a non-trivial set of data
2755   // operations, use CmpXChg to expand.
2756   if (AI->isFloatingPointOperation() ||
2757       AI->getOperation() == AtomicRMWInst::UIncWrap ||
2758       AI->getOperation() == AtomicRMWInst::UDecWrap)
2759     return AtomicExpansionKind::CmpXChg;
2760 
2761   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
2762   if (Size == 8 || Size == 16)
2763     return AtomicExpansionKind::MaskedIntrinsic;
2764   return AtomicExpansionKind::None;
2765 }
2766 
2767 static Intrinsic::ID
2768 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
2769                                     AtomicRMWInst::BinOp BinOp) {
2770   if (GRLen == 64) {
2771     switch (BinOp) {
2772     default:
2773       llvm_unreachable("Unexpected AtomicRMW BinOp");
2774     case AtomicRMWInst::Xchg:
2775       return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
2776     case AtomicRMWInst::Add:
2777       return Intrinsic::loongarch_masked_atomicrmw_add_i64;
2778     case AtomicRMWInst::Sub:
2779       return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
2780     case AtomicRMWInst::Nand:
2781       return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
2782     case AtomicRMWInst::UMax:
2783       return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
2784     case AtomicRMWInst::UMin:
2785       return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
2786     case AtomicRMWInst::Max:
2787       return Intrinsic::loongarch_masked_atomicrmw_max_i64;
2788     case AtomicRMWInst::Min:
2789       return Intrinsic::loongarch_masked_atomicrmw_min_i64;
2790       // TODO: support other AtomicRMWInst.
2791     }
2792   }
2793 
2794   if (GRLen == 32) {
2795     switch (BinOp) {
2796     default:
2797       llvm_unreachable("Unexpected AtomicRMW BinOp");
2798     case AtomicRMWInst::Xchg:
2799       return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
2800     case AtomicRMWInst::Add:
2801       return Intrinsic::loongarch_masked_atomicrmw_add_i32;
2802     case AtomicRMWInst::Sub:
2803       return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
2804     case AtomicRMWInst::Nand:
2805       return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
2806       // TODO: support other AtomicRMWInst.
2807     }
2808   }
2809 
2810   llvm_unreachable("Unexpected GRLen\n");
2811 }
2812 
2813 TargetLowering::AtomicExpansionKind
2814 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
2815     AtomicCmpXchgInst *CI) const {
2816   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
2817   if (Size == 8 || Size == 16)
2818     return AtomicExpansionKind::MaskedIntrinsic;
2819   return AtomicExpansionKind::None;
2820 }
2821 
2822 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
2823     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
2824     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
2825   Value *Ordering =
2826       Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(Ord));
2827 
2828   // TODO: Support cmpxchg on LA32.
2829   Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
2830   CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
2831   NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
2832   Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2833   Type *Tys[] = {AlignedAddr->getType()};
2834   Function *MaskedCmpXchg =
2835       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
2836   Value *Result = Builder.CreateCall(
2837       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
2838   Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2839   return Result;
2840 }
2841 
2842 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
2843     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
2844     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
2845   unsigned GRLen = Subtarget.getGRLen();
2846   Value *Ordering =
2847       Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
2848   Type *Tys[] = {AlignedAddr->getType()};
2849   Function *LlwOpScwLoop = Intrinsic::getDeclaration(
2850       AI->getModule(),
2851       getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
2852 
2853   if (GRLen == 64) {
2854     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
2855     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2856     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
2857   }
2858 
2859   Value *Result;
2860 
2861   // Must pass the shift amount needed to sign extend the loaded value prior
2862   // to performing a signed comparison for min/max. ShiftAmt is the number of
2863   // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
2864   // is the number of bits to left+right shift the value in order to
2865   // sign-extend.
2866   if (AI->getOperation() == AtomicRMWInst::Min ||
2867       AI->getOperation() == AtomicRMWInst::Max) {
2868     const DataLayout &DL = AI->getModule()->getDataLayout();
2869     unsigned ValWidth =
2870         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
2871     Value *SextShamt =
2872         Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
2873     Result = Builder.CreateCall(LlwOpScwLoop,
2874                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
2875   } else {
2876     Result =
2877         Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
2878   }
2879 
2880   if (GRLen == 64)
2881     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2882   return Result;
2883 }
2884 
2885 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
2886     const MachineFunction &MF, EVT VT) const {
2887   VT = VT.getScalarType();
2888 
2889   if (!VT.isSimple())
2890     return false;
2891 
2892   switch (VT.getSimpleVT().SimpleTy) {
2893   case MVT::f32:
2894   case MVT::f64:
2895     return true;
2896   default:
2897     break;
2898   }
2899 
2900   return false;
2901 }
2902 
2903 Register LoongArchTargetLowering::getExceptionPointerRegister(
2904     const Constant *PersonalityFn) const {
2905   return LoongArch::R4;
2906 }
2907 
2908 Register LoongArchTargetLowering::getExceptionSelectorRegister(
2909     const Constant *PersonalityFn) const {
2910   return LoongArch::R5;
2911 }
2912 
2913 //===----------------------------------------------------------------------===//
2914 //                           LoongArch Inline Assembly Support
2915 //===----------------------------------------------------------------------===//
2916 
2917 LoongArchTargetLowering::ConstraintType
2918 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
2919   // LoongArch specific constraints in GCC: config/loongarch/constraints.md
2920   //
2921   // 'f':  A floating-point register (if available).
2922   // 'k':  A memory operand whose address is formed by a base register and
2923   //       (optionally scaled) index register.
2924   // 'l':  A signed 16-bit constant.
2925   // 'm':  A memory operand whose address is formed by a base register and
2926   //       offset that is suitable for use in instructions with the same
2927   //       addressing mode as st.w and ld.w.
2928   // 'I':  A signed 12-bit constant (for arithmetic instructions).
2929   // 'J':  Integer zero.
2930   // 'K':  An unsigned 12-bit constant (for logic instructions).
2931   // "ZB": An address that is held in a general-purpose register. The offset is
2932   //       zero.
2933   // "ZC": A memory operand whose address is formed by a base register and
2934   //       offset that is suitable for use in instructions with the same
2935   //       addressing mode as ll.w and sc.w.
2936   if (Constraint.size() == 1) {
2937     switch (Constraint[0]) {
2938     default:
2939       break;
2940     case 'f':
2941       return C_RegisterClass;
2942     case 'l':
2943     case 'I':
2944     case 'J':
2945     case 'K':
2946       return C_Immediate;
2947     case 'k':
2948       return C_Memory;
2949     }
2950   }
2951 
2952   if (Constraint == "ZC" || Constraint == "ZB")
2953     return C_Memory;
2954 
2955   // 'm' is handled here.
2956   return TargetLowering::getConstraintType(Constraint);
2957 }
2958 
2959 unsigned LoongArchTargetLowering::getInlineAsmMemConstraint(
2960     StringRef ConstraintCode) const {
2961   return StringSwitch<unsigned>(ConstraintCode)
2962       .Case("k", InlineAsm::Constraint_k)
2963       .Case("ZB", InlineAsm::Constraint_ZB)
2964       .Case("ZC", InlineAsm::Constraint_ZC)
2965       .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
2966 }
2967 
2968 std::pair<unsigned, const TargetRegisterClass *>
2969 LoongArchTargetLowering::getRegForInlineAsmConstraint(
2970     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
2971   // First, see if this is a constraint that directly corresponds to a LoongArch
2972   // register class.
2973   if (Constraint.size() == 1) {
2974     switch (Constraint[0]) {
2975     case 'r':
2976       // TODO: Support fixed vectors up to GRLen?
2977       if (VT.isVector())
2978         break;
2979       return std::make_pair(0U, &LoongArch::GPRRegClass);
2980     case 'f':
2981       if (Subtarget.hasBasicF() && VT == MVT::f32)
2982         return std::make_pair(0U, &LoongArch::FPR32RegClass);
2983       if (Subtarget.hasBasicD() && VT == MVT::f64)
2984         return std::make_pair(0U, &LoongArch::FPR64RegClass);
2985       break;
2986     default:
2987       break;
2988     }
2989   }
2990 
2991   // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
2992   // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
2993   // constraints while the official register name is prefixed with a '$'. So we
2994   // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
2995   // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
2996   // case insensitive, so no need to convert the constraint to upper case here.
2997   //
2998   // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
2999   // decode the usage of register name aliases into their official names. And
3000   // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
3001   // official register names.
3002   if (Constraint.startswith("{$r") || Constraint.startswith("{$f")) {
3003     bool IsFP = Constraint[2] == 'f';
3004     std::pair<StringRef, StringRef> Temp = Constraint.split('$');
3005     std::pair<unsigned, const TargetRegisterClass *> R;
3006     R = TargetLowering::getRegForInlineAsmConstraint(
3007         TRI, join_items("", Temp.first, Temp.second), VT);
3008     // Match those names to the widest floating point register type available.
3009     if (IsFP) {
3010       unsigned RegNo = R.first;
3011       if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
3012         if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
3013           unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
3014           return std::make_pair(DReg, &LoongArch::FPR64RegClass);
3015         }
3016       }
3017     }
3018     return R;
3019   }
3020 
3021   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3022 }
3023 
3024 void LoongArchTargetLowering::LowerAsmOperandForConstraint(
3025     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
3026     SelectionDAG &DAG) const {
3027   // Currently only support length 1 constraints.
3028   if (Constraint.length() == 1) {
3029     switch (Constraint[0]) {
3030     case 'l':
3031       // Validate & create a 16-bit signed immediate operand.
3032       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3033         uint64_t CVal = C->getSExtValue();
3034         if (isInt<16>(CVal))
3035           Ops.push_back(
3036               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
3037       }
3038       return;
3039     case 'I':
3040       // Validate & create a 12-bit signed immediate operand.
3041       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3042         uint64_t CVal = C->getSExtValue();
3043         if (isInt<12>(CVal))
3044           Ops.push_back(
3045               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
3046       }
3047       return;
3048     case 'J':
3049       // Validate & create an integer zero operand.
3050       if (auto *C = dyn_cast<ConstantSDNode>(Op))
3051         if (C->getZExtValue() == 0)
3052           Ops.push_back(
3053               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
3054       return;
3055     case 'K':
3056       // Validate & create a 12-bit unsigned immediate operand.
3057       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3058         uint64_t CVal = C->getZExtValue();
3059         if (isUInt<12>(CVal))
3060           Ops.push_back(
3061               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
3062       }
3063       return;
3064     default:
3065       break;
3066     }
3067   }
3068   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
3069 }
3070 
3071 #define GET_REGISTER_MATCHER
3072 #include "LoongArchGenAsmMatcher.inc"
3073 
3074 Register
3075 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
3076                                            const MachineFunction &MF) const {
3077   std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
3078   std::string NewRegName = Name.second.str();
3079   Register Reg = MatchRegisterAltName(NewRegName);
3080   if (Reg == LoongArch::NoRegister)
3081     Reg = MatchRegisterName(NewRegName);
3082   if (Reg == LoongArch::NoRegister)
3083     report_fatal_error(
3084         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
3085   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
3086   if (!ReservedRegs.test(Reg))
3087     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
3088                              StringRef(RegName) + "\"."));
3089   return Reg;
3090 }
3091 
3092 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
3093                                                      EVT VT, SDValue C) const {
3094   // TODO: Support vectors.
3095   if (!VT.isScalarInteger())
3096     return false;
3097 
3098   // Omit the optimization if the data size exceeds GRLen.
3099   if (VT.getSizeInBits() > Subtarget.getGRLen())
3100     return false;
3101 
3102   // Break MUL into (SLLI + ADD/SUB) or ALSL.
3103   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
3104     const APInt &Imm = ConstNode->getAPIntValue();
3105     if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
3106         (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
3107       return true;
3108   }
3109 
3110   return false;
3111 }
3112