xref: /freebsd/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (revision 5fb307d29b364982acbde82cbf77db3cae486f8c)
1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation  ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that LoongArch uses to lower LLVM code into
10 // a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LoongArchISelLowering.h"
15 #include "LoongArch.h"
16 #include "LoongArchMachineFunctionInfo.h"
17 #include "LoongArchRegisterInfo.h"
18 #include "LoongArchSubtarget.h"
19 #include "LoongArchTargetMachine.h"
20 #include "MCTargetDesc/LoongArchBaseInfo.h"
21 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/CodeGen/ISDOpcodes.h"
25 #include "llvm/CodeGen/RuntimeLibcalls.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/IntrinsicsLoongArch.h"
29 #include "llvm/Support/CodeGen.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/KnownBits.h"
33 #include "llvm/Support/MathExtras.h"
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "loongarch-isel-lowering"
38 
39 STATISTIC(NumTailCalls, "Number of tail calls");
40 
41 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42                                   cl::desc("Trap on integer division by zero."),
43                                   cl::init(false));
44 
45 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
46                                                  const LoongArchSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   MVT GRLenVT = Subtarget.getGRLenVT();
50   // Set up the register classes.
51   addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
52   if (Subtarget.hasBasicF())
53     addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
54   if (Subtarget.hasBasicD())
55     addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
56   if (Subtarget.hasExtLSX())
57     for (auto VT : {MVT::v4f32, MVT::v2f64, MVT::v16i8, MVT::v8i16, MVT::v4i32,
58                     MVT::v2i64})
59       addRegisterClass(VT, &LoongArch::LSX128RegClass);
60   if (Subtarget.hasExtLASX())
61     for (auto VT : {MVT::v8f32, MVT::v4f64, MVT::v32i8, MVT::v16i16, MVT::v8i32,
62                     MVT::v4i64})
63       addRegisterClass(VT, &LoongArch::LASX256RegClass);
64 
65   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
66                    MVT::i1, Promote);
67 
68   // TODO: add necessary setOperationAction calls later.
69   setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
70   setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
71   setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
72   setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
73   setOperationAction(ISD::ROTL, GRLenVT, Expand);
74   setOperationAction(ISD::CTPOP, GRLenVT, Expand);
75   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
76   setOperationAction(ISD::TRAP, MVT::Other, Legal);
77   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
78   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
79 
80   setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
81                       ISD::JumpTable},
82                      GRLenVT, Custom);
83 
84   setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom);
85 
86   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
87 
88   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
89   if (Subtarget.is64Bit())
90     setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
91 
92   setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
93   setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
94   setOperationAction(ISD::VASTART, MVT::Other, Custom);
95   setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
96 
97   if (Subtarget.is64Bit()) {
98     setOperationAction(ISD::SHL, MVT::i32, Custom);
99     setOperationAction(ISD::SRA, MVT::i32, Custom);
100     setOperationAction(ISD::SRL, MVT::i32, Custom);
101     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
102     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
103     setOperationAction(ISD::ROTR, MVT::i32, Custom);
104     setOperationAction(ISD::ROTL, MVT::i32, Custom);
105     setOperationAction(ISD::CTTZ, MVT::i32, Custom);
106     setOperationAction(ISD::CTLZ, MVT::i32, Custom);
107     setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
108     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
109     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
110     setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
111     setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
112     if (Subtarget.hasBasicF() && !Subtarget.hasBasicD())
113       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
114     if (Subtarget.hasBasicF())
115       setOperationAction(ISD::FRINT, MVT::f32, Legal);
116     if (Subtarget.hasBasicD())
117       setOperationAction(ISD::FRINT, MVT::f64, Legal);
118   }
119 
120   // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
121   // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
122   // and i32 could still be byte-swapped relatively cheaply.
123   setOperationAction(ISD::BSWAP, MVT::i16, Custom);
124   if (Subtarget.is64Bit()) {
125     setOperationAction(ISD::BSWAP, MVT::i32, Custom);
126   }
127 
128   // Expand bitreverse.i16 with native-width bitrev and shift for now, before
129   // we get to know which of sll and revb.2h is faster.
130   setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
131   if (Subtarget.is64Bit()) {
132     setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
133     setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
134   } else {
135     setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
136     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
137     setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
138     setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
139     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
140     setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
141   }
142 
143   static const ISD::CondCode FPCCToExpand[] = {
144       ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
145       ISD::SETGE,  ISD::SETNE,  ISD::SETGT};
146 
147   if (Subtarget.hasBasicF()) {
148     setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
149     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
150     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
151     setOperationAction(ISD::FMA, MVT::f32, Legal);
152     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
153     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
154     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
155     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
156     setOperationAction(ISD::FSIN, MVT::f32, Expand);
157     setOperationAction(ISD::FCOS, MVT::f32, Expand);
158     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
159     setOperationAction(ISD::FPOW, MVT::f32, Expand);
160     setOperationAction(ISD::FREM, MVT::f32, Expand);
161   }
162   if (Subtarget.hasBasicD()) {
163     setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
164     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
165     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
166     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
167     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
168     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
169     setOperationAction(ISD::FMA, MVT::f64, Legal);
170     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
171     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
172     setOperationAction(ISD::FSIN, MVT::f64, Expand);
173     setOperationAction(ISD::FCOS, MVT::f64, Expand);
174     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
175     setOperationAction(ISD::FPOW, MVT::f64, Expand);
176     setOperationAction(ISD::FREM, MVT::f64, Expand);
177     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
178   }
179 
180   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
181 
182   setOperationAction(ISD::BR_CC, GRLenVT, Expand);
183   setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
184   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
185   setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
186   if (!Subtarget.is64Bit())
187     setLibcallName(RTLIB::MUL_I128, nullptr);
188 
189   setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
190   setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
191   if ((Subtarget.is64Bit() && Subtarget.hasBasicF() &&
192        !Subtarget.hasBasicD())) {
193     setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom);
194     setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom);
195   }
196 
197   // Compute derived properties from the register classes.
198   computeRegisterProperties(Subtarget.getRegisterInfo());
199 
200   setStackPointerRegisterToSaveRestore(LoongArch::R3);
201 
202   setBooleanContents(ZeroOrOneBooleanContent);
203 
204   setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
205 
206   setMinCmpXchgSizeInBits(32);
207 
208   // Function alignments.
209   setMinFunctionAlignment(Align(4));
210   // Set preferred alignments.
211   setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
212   setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
213   setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
214 
215   setTargetDAGCombine(ISD::AND);
216   setTargetDAGCombine(ISD::OR);
217   setTargetDAGCombine(ISD::SRL);
218 }
219 
220 bool LoongArchTargetLowering::isOffsetFoldingLegal(
221     const GlobalAddressSDNode *GA) const {
222   // In order to maximise the opportunity for common subexpression elimination,
223   // keep a separate ADD node for the global address offset instead of folding
224   // it in the global address node. Later peephole optimisations may choose to
225   // fold it back in when profitable.
226   return false;
227 }
228 
229 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
230                                                 SelectionDAG &DAG) const {
231   switch (Op.getOpcode()) {
232   case ISD::EH_DWARF_CFA:
233     return lowerEH_DWARF_CFA(Op, DAG);
234   case ISD::GlobalAddress:
235     return lowerGlobalAddress(Op, DAG);
236   case ISD::GlobalTLSAddress:
237     return lowerGlobalTLSAddress(Op, DAG);
238   case ISD::INTRINSIC_WO_CHAIN:
239     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
240   case ISD::INTRINSIC_W_CHAIN:
241     return lowerINTRINSIC_W_CHAIN(Op, DAG);
242   case ISD::INTRINSIC_VOID:
243     return lowerINTRINSIC_VOID(Op, DAG);
244   case ISD::BlockAddress:
245     return lowerBlockAddress(Op, DAG);
246   case ISD::JumpTable:
247     return lowerJumpTable(Op, DAG);
248   case ISD::SHL_PARTS:
249     return lowerShiftLeftParts(Op, DAG);
250   case ISD::SRA_PARTS:
251     return lowerShiftRightParts(Op, DAG, true);
252   case ISD::SRL_PARTS:
253     return lowerShiftRightParts(Op, DAG, false);
254   case ISD::ConstantPool:
255     return lowerConstantPool(Op, DAG);
256   case ISD::FP_TO_SINT:
257     return lowerFP_TO_SINT(Op, DAG);
258   case ISD::BITCAST:
259     return lowerBITCAST(Op, DAG);
260   case ISD::UINT_TO_FP:
261     return lowerUINT_TO_FP(Op, DAG);
262   case ISD::SINT_TO_FP:
263     return lowerSINT_TO_FP(Op, DAG);
264   case ISD::VASTART:
265     return lowerVASTART(Op, DAG);
266   case ISD::FRAMEADDR:
267     return lowerFRAMEADDR(Op, DAG);
268   case ISD::RETURNADDR:
269     return lowerRETURNADDR(Op, DAG);
270   case ISD::WRITE_REGISTER:
271     return lowerWRITE_REGISTER(Op, DAG);
272   }
273   return SDValue();
274 }
275 
276 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
277                                                      SelectionDAG &DAG) const {
278 
279   if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
280     DAG.getContext()->emitError(
281         "On LA64, only 64-bit registers can be written.");
282     return Op.getOperand(0);
283   }
284 
285   if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
286     DAG.getContext()->emitError(
287         "On LA32, only 32-bit registers can be written.");
288     return Op.getOperand(0);
289   }
290 
291   return Op;
292 }
293 
294 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
295                                                 SelectionDAG &DAG) const {
296   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
297     DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
298                                 "be a constant integer");
299     return SDValue();
300   }
301 
302   MachineFunction &MF = DAG.getMachineFunction();
303   MF.getFrameInfo().setFrameAddressIsTaken(true);
304   Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
305   EVT VT = Op.getValueType();
306   SDLoc DL(Op);
307   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
308   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
309   int GRLenInBytes = Subtarget.getGRLen() / 8;
310 
311   while (Depth--) {
312     int Offset = -(GRLenInBytes * 2);
313     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
314                               DAG.getIntPtrConstant(Offset, DL));
315     FrameAddr =
316         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
317   }
318   return FrameAddr;
319 }
320 
321 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
322                                                  SelectionDAG &DAG) const {
323   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
324     return SDValue();
325 
326   // Currently only support lowering return address for current frame.
327   if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
328     DAG.getContext()->emitError(
329         "return address can only be determined for the current frame");
330     return SDValue();
331   }
332 
333   MachineFunction &MF = DAG.getMachineFunction();
334   MF.getFrameInfo().setReturnAddressIsTaken(true);
335   MVT GRLenVT = Subtarget.getGRLenVT();
336 
337   // Return the value of the return address register, marking it an implicit
338   // live-in.
339   Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
340                               getRegClassFor(GRLenVT));
341   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
342 }
343 
344 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
345                                                    SelectionDAG &DAG) const {
346   MachineFunction &MF = DAG.getMachineFunction();
347   auto Size = Subtarget.getGRLen() / 8;
348   auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
349   return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
350 }
351 
352 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
353                                               SelectionDAG &DAG) const {
354   MachineFunction &MF = DAG.getMachineFunction();
355   auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
356 
357   SDLoc DL(Op);
358   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
359                                  getPointerTy(MF.getDataLayout()));
360 
361   // vastart just stores the address of the VarArgsFrameIndex slot into the
362   // memory location argument.
363   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
364   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
365                       MachinePointerInfo(SV));
366 }
367 
368 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
369                                                  SelectionDAG &DAG) const {
370   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
371          !Subtarget.hasBasicD() && "unexpected target features");
372 
373   SDLoc DL(Op);
374   SDValue Op0 = Op.getOperand(0);
375   if (Op0->getOpcode() == ISD::AND) {
376     auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
377     if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
378       return Op;
379   }
380 
381   if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
382       Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
383       Op0.getConstantOperandVal(2) == UINT64_C(0))
384     return Op;
385 
386   if (Op0.getOpcode() == ISD::AssertZext &&
387       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
388     return Op;
389 
390   EVT OpVT = Op0.getValueType();
391   EVT RetVT = Op.getValueType();
392   RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
393   MakeLibCallOptions CallOptions;
394   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
395   SDValue Chain = SDValue();
396   SDValue Result;
397   std::tie(Result, Chain) =
398       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
399   return Result;
400 }
401 
402 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
403                                                  SelectionDAG &DAG) const {
404   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
405          !Subtarget.hasBasicD() && "unexpected target features");
406 
407   SDLoc DL(Op);
408   SDValue Op0 = Op.getOperand(0);
409 
410   if ((Op0.getOpcode() == ISD::AssertSext ||
411        Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
412       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
413     return Op;
414 
415   EVT OpVT = Op0.getValueType();
416   EVT RetVT = Op.getValueType();
417   RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
418   MakeLibCallOptions CallOptions;
419   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
420   SDValue Chain = SDValue();
421   SDValue Result;
422   std::tie(Result, Chain) =
423       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
424   return Result;
425 }
426 
427 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
428                                               SelectionDAG &DAG) const {
429 
430   SDLoc DL(Op);
431   SDValue Op0 = Op.getOperand(0);
432 
433   if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
434       Subtarget.is64Bit() && Subtarget.hasBasicF()) {
435     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
436     return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
437   }
438   return Op;
439 }
440 
441 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
442                                                  SelectionDAG &DAG) const {
443 
444   SDLoc DL(Op);
445 
446   if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
447       !Subtarget.hasBasicD()) {
448     SDValue Dst =
449         DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
450     return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
451   }
452 
453   EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
454   SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
455   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
456 }
457 
458 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
459                              SelectionDAG &DAG, unsigned Flags) {
460   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
461 }
462 
463 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
464                              SelectionDAG &DAG, unsigned Flags) {
465   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
466                                    Flags);
467 }
468 
469 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
470                              SelectionDAG &DAG, unsigned Flags) {
471   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
472                                    N->getOffset(), Flags);
473 }
474 
475 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
476                              SelectionDAG &DAG, unsigned Flags) {
477   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
478 }
479 
480 template <class NodeTy>
481 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
482                                          bool IsLocal) const {
483   SDLoc DL(N);
484   EVT Ty = getPointerTy(DAG.getDataLayout());
485   SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
486 
487   switch (DAG.getTarget().getCodeModel()) {
488   default:
489     report_fatal_error("Unsupported code model");
490 
491   case CodeModel::Large: {
492     assert(Subtarget.is64Bit() && "Large code model requires LA64");
493 
494     // This is not actually used, but is necessary for successfully matching
495     // the PseudoLA_*_LARGE nodes.
496     SDValue Tmp = DAG.getConstant(0, DL, Ty);
497     if (IsLocal)
498       // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
499       // eventually becomes the desired 5-insn code sequence.
500       return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
501                                         Tmp, Addr),
502                      0);
503 
504     // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually
505     // becomes the desired 5-insn code sequence.
506     return SDValue(
507         DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
508         0);
509   }
510 
511   case CodeModel::Small:
512   case CodeModel::Medium:
513     if (IsLocal)
514       // This generates the pattern (PseudoLA_PCREL sym), which expands to
515       // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
516       return SDValue(
517           DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
518 
519     // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
520     // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
521     return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr),
522                    0);
523   }
524 }
525 
526 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
527                                                    SelectionDAG &DAG) const {
528   return getAddr(cast<BlockAddressSDNode>(Op), DAG);
529 }
530 
531 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
532                                                 SelectionDAG &DAG) const {
533   return getAddr(cast<JumpTableSDNode>(Op), DAG);
534 }
535 
536 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
537                                                    SelectionDAG &DAG) const {
538   return getAddr(cast<ConstantPoolSDNode>(Op), DAG);
539 }
540 
541 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
542                                                     SelectionDAG &DAG) const {
543   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
544   assert(N->getOffset() == 0 && "unexpected offset in global node");
545   return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
546 }
547 
548 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
549                                                   SelectionDAG &DAG,
550                                                   unsigned Opc,
551                                                   bool Large) const {
552   SDLoc DL(N);
553   EVT Ty = getPointerTy(DAG.getDataLayout());
554   MVT GRLenVT = Subtarget.getGRLenVT();
555 
556   // This is not actually used, but is necessary for successfully matching the
557   // PseudoLA_*_LARGE nodes.
558   SDValue Tmp = DAG.getConstant(0, DL, Ty);
559   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
560   SDValue Offset = Large
561                        ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
562                        : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
563 
564   // Add the thread pointer.
565   return DAG.getNode(ISD::ADD, DL, Ty, Offset,
566                      DAG.getRegister(LoongArch::R2, GRLenVT));
567 }
568 
569 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
570                                                    SelectionDAG &DAG,
571                                                    unsigned Opc,
572                                                    bool Large) const {
573   SDLoc DL(N);
574   EVT Ty = getPointerTy(DAG.getDataLayout());
575   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
576 
577   // This is not actually used, but is necessary for successfully matching the
578   // PseudoLA_*_LARGE nodes.
579   SDValue Tmp = DAG.getConstant(0, DL, Ty);
580 
581   // Use a PC-relative addressing mode to access the dynamic GOT address.
582   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
583   SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
584                        : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
585 
586   // Prepare argument list to generate call.
587   ArgListTy Args;
588   ArgListEntry Entry;
589   Entry.Node = Load;
590   Entry.Ty = CallTy;
591   Args.push_back(Entry);
592 
593   // Setup call to __tls_get_addr.
594   TargetLowering::CallLoweringInfo CLI(DAG);
595   CLI.setDebugLoc(DL)
596       .setChain(DAG.getEntryNode())
597       .setLibCallee(CallingConv::C, CallTy,
598                     DAG.getExternalSymbol("__tls_get_addr", Ty),
599                     std::move(Args));
600 
601   return LowerCallTo(CLI).first;
602 }
603 
604 SDValue
605 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
606                                                SelectionDAG &DAG) const {
607   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
608       CallingConv::GHC)
609     report_fatal_error("In GHC calling convention TLS is not supported");
610 
611   bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
612   assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
613 
614   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
615   assert(N->getOffset() == 0 && "unexpected offset in global node");
616 
617   SDValue Addr;
618   switch (getTargetMachine().getTLSModel(N->getGlobal())) {
619   case TLSModel::GeneralDynamic:
620     // In this model, application code calls the dynamic linker function
621     // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
622     // runtime.
623     Addr = getDynamicTLSAddr(N, DAG,
624                              Large ? LoongArch::PseudoLA_TLS_GD_LARGE
625                                    : LoongArch::PseudoLA_TLS_GD,
626                              Large);
627     break;
628   case TLSModel::LocalDynamic:
629     // Same as GeneralDynamic, except for assembly modifiers and relocation
630     // records.
631     Addr = getDynamicTLSAddr(N, DAG,
632                              Large ? LoongArch::PseudoLA_TLS_LD_LARGE
633                                    : LoongArch::PseudoLA_TLS_LD,
634                              Large);
635     break;
636   case TLSModel::InitialExec:
637     // This model uses the GOT to resolve TLS offsets.
638     Addr = getStaticTLSAddr(N, DAG,
639                             Large ? LoongArch::PseudoLA_TLS_IE_LARGE
640                                   : LoongArch::PseudoLA_TLS_IE,
641                             Large);
642     break;
643   case TLSModel::LocalExec:
644     // This model is used when static linking as the TLS offsets are resolved
645     // during program linking.
646     //
647     // This node doesn't need an extra argument for the large code model.
648     Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
649     break;
650   }
651 
652   return Addr;
653 }
654 
655 SDValue
656 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
657                                                  SelectionDAG &DAG) const {
658   switch (Op.getConstantOperandVal(0)) {
659   default:
660     return SDValue(); // Don't custom lower most intrinsics.
661   case Intrinsic::thread_pointer: {
662     EVT PtrVT = getPointerTy(DAG.getDataLayout());
663     return DAG.getRegister(LoongArch::R2, PtrVT);
664   }
665   }
666 }
667 
668 // Helper function that emits error message for intrinsics with chain and return
669 // merge values of a UNDEF and the chain.
670 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
671                                                   StringRef ErrorMsg,
672                                                   SelectionDAG &DAG) {
673   DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
674   return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
675                             SDLoc(Op));
676 }
677 
678 SDValue
679 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
680                                                 SelectionDAG &DAG) const {
681   SDLoc DL(Op);
682   MVT GRLenVT = Subtarget.getGRLenVT();
683   EVT VT = Op.getValueType();
684   SDValue Chain = Op.getOperand(0);
685   const StringRef ErrorMsgOOR = "argument out of range";
686   const StringRef ErrorMsgReqLA64 = "requires loongarch64";
687   const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
688 
689   switch (Op.getConstantOperandVal(1)) {
690   default:
691     return Op;
692   case Intrinsic::loongarch_crc_w_b_w:
693   case Intrinsic::loongarch_crc_w_h_w:
694   case Intrinsic::loongarch_crc_w_w_w:
695   case Intrinsic::loongarch_crc_w_d_w:
696   case Intrinsic::loongarch_crcc_w_b_w:
697   case Intrinsic::loongarch_crcc_w_h_w:
698   case Intrinsic::loongarch_crcc_w_w_w:
699   case Intrinsic::loongarch_crcc_w_d_w:
700     return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
701   case Intrinsic::loongarch_csrrd_w:
702   case Intrinsic::loongarch_csrrd_d: {
703     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
704     return !isUInt<14>(Imm)
705                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
706                : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
707                              {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
708   }
709   case Intrinsic::loongarch_csrwr_w:
710   case Intrinsic::loongarch_csrwr_d: {
711     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
712     return !isUInt<14>(Imm)
713                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
714                : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
715                              {Chain, Op.getOperand(2),
716                               DAG.getConstant(Imm, DL, GRLenVT)});
717   }
718   case Intrinsic::loongarch_csrxchg_w:
719   case Intrinsic::loongarch_csrxchg_d: {
720     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
721     return !isUInt<14>(Imm)
722                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
723                : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
724                              {Chain, Op.getOperand(2), Op.getOperand(3),
725                               DAG.getConstant(Imm, DL, GRLenVT)});
726   }
727   case Intrinsic::loongarch_iocsrrd_d: {
728     return DAG.getNode(
729         LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
730         {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
731   }
732 #define IOCSRRD_CASE(NAME, NODE)                                               \
733   case Intrinsic::loongarch_##NAME: {                                          \
734     return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other},          \
735                        {Chain, Op.getOperand(2)});                             \
736   }
737     IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
738     IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
739     IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
740 #undef IOCSRRD_CASE
741   case Intrinsic::loongarch_cpucfg: {
742     return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
743                        {Chain, Op.getOperand(2)});
744   }
745   case Intrinsic::loongarch_lddir_d: {
746     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
747     return !isUInt<8>(Imm)
748                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
749                : Op;
750   }
751   case Intrinsic::loongarch_movfcsr2gr: {
752     if (!Subtarget.hasBasicF())
753       return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
754     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
755     return !isUInt<2>(Imm)
756                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
757                : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
758                              {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
759   }
760   }
761 }
762 
763 // Helper function that emits error message for intrinsics with void return
764 // value and return the chain.
765 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
766                                          SelectionDAG &DAG) {
767 
768   DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
769   return Op.getOperand(0);
770 }
771 
772 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
773                                                      SelectionDAG &DAG) const {
774   SDLoc DL(Op);
775   MVT GRLenVT = Subtarget.getGRLenVT();
776   SDValue Chain = Op.getOperand(0);
777   uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
778   SDValue Op2 = Op.getOperand(2);
779   const StringRef ErrorMsgOOR = "argument out of range";
780   const StringRef ErrorMsgReqLA64 = "requires loongarch64";
781   const StringRef ErrorMsgReqLA32 = "requires loongarch32";
782   const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
783 
784   switch (IntrinsicEnum) {
785   default:
786     // TODO: Add more Intrinsics.
787     return SDValue();
788   case Intrinsic::loongarch_cacop_d:
789   case Intrinsic::loongarch_cacop_w: {
790     if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
791       return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
792     if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
793       return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
794     // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
795     unsigned Imm1 = cast<ConstantSDNode>(Op2)->getZExtValue();
796     int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
797     if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
798       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
799     return Op;
800   }
801   case Intrinsic::loongarch_dbar: {
802     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
803     return !isUInt<15>(Imm)
804                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
805                : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
806                              DAG.getConstant(Imm, DL, GRLenVT));
807   }
808   case Intrinsic::loongarch_ibar: {
809     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
810     return !isUInt<15>(Imm)
811                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
812                : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
813                              DAG.getConstant(Imm, DL, GRLenVT));
814   }
815   case Intrinsic::loongarch_break: {
816     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
817     return !isUInt<15>(Imm)
818                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
819                : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
820                              DAG.getConstant(Imm, DL, GRLenVT));
821   }
822   case Intrinsic::loongarch_movgr2fcsr: {
823     if (!Subtarget.hasBasicF())
824       return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
825     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
826     return !isUInt<2>(Imm)
827                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
828                : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
829                              DAG.getConstant(Imm, DL, GRLenVT),
830                              DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
831                                          Op.getOperand(3)));
832   }
833   case Intrinsic::loongarch_syscall: {
834     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
835     return !isUInt<15>(Imm)
836                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
837                : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
838                              DAG.getConstant(Imm, DL, GRLenVT));
839   }
840 #define IOCSRWR_CASE(NAME, NODE)                                               \
841   case Intrinsic::loongarch_##NAME: {                                          \
842     SDValue Op3 = Op.getOperand(3);                                            \
843     return Subtarget.is64Bit()                                                 \
844                ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain,        \
845                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),  \
846                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3))  \
847                : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2,   \
848                              Op3);                                             \
849   }
850     IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
851     IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
852     IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
853 #undef IOCSRWR_CASE
854   case Intrinsic::loongarch_iocsrwr_d: {
855     return !Subtarget.is64Bit()
856                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
857                : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
858                              Op2,
859                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
860                                          Op.getOperand(3)));
861   }
862 #define ASRT_LE_GT_CASE(NAME)                                                  \
863   case Intrinsic::loongarch_##NAME: {                                          \
864     return !Subtarget.is64Bit()                                                \
865                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)           \
866                : Op;                                                           \
867   }
868     ASRT_LE_GT_CASE(asrtle_d)
869     ASRT_LE_GT_CASE(asrtgt_d)
870 #undef ASRT_LE_GT_CASE
871   case Intrinsic::loongarch_ldpte_d: {
872     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
873     return !Subtarget.is64Bit()
874                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
875            : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
876                              : Op;
877   }
878   }
879 }
880 
881 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
882                                                      SelectionDAG &DAG) const {
883   SDLoc DL(Op);
884   SDValue Lo = Op.getOperand(0);
885   SDValue Hi = Op.getOperand(1);
886   SDValue Shamt = Op.getOperand(2);
887   EVT VT = Lo.getValueType();
888 
889   // if Shamt-GRLen < 0: // Shamt < GRLen
890   //   Lo = Lo << Shamt
891   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
892   // else:
893   //   Lo = 0
894   //   Hi = Lo << (Shamt-GRLen)
895 
896   SDValue Zero = DAG.getConstant(0, DL, VT);
897   SDValue One = DAG.getConstant(1, DL, VT);
898   SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
899   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
900   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
901   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
902 
903   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
904   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
905   SDValue ShiftRightLo =
906       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
907   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
908   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
909   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
910 
911   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
912 
913   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
914   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
915 
916   SDValue Parts[2] = {Lo, Hi};
917   return DAG.getMergeValues(Parts, DL);
918 }
919 
920 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
921                                                       SelectionDAG &DAG,
922                                                       bool IsSRA) const {
923   SDLoc DL(Op);
924   SDValue Lo = Op.getOperand(0);
925   SDValue Hi = Op.getOperand(1);
926   SDValue Shamt = Op.getOperand(2);
927   EVT VT = Lo.getValueType();
928 
929   // SRA expansion:
930   //   if Shamt-GRLen < 0: // Shamt < GRLen
931   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
932   //     Hi = Hi >>s Shamt
933   //   else:
934   //     Lo = Hi >>s (Shamt-GRLen);
935   //     Hi = Hi >>s (GRLen-1)
936   //
937   // SRL expansion:
938   //   if Shamt-GRLen < 0: // Shamt < GRLen
939   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
940   //     Hi = Hi >>u Shamt
941   //   else:
942   //     Lo = Hi >>u (Shamt-GRLen);
943   //     Hi = 0;
944 
945   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
946 
947   SDValue Zero = DAG.getConstant(0, DL, VT);
948   SDValue One = DAG.getConstant(1, DL, VT);
949   SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
950   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
951   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
952   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
953 
954   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
955   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
956   SDValue ShiftLeftHi =
957       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
958   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
959   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
960   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
961   SDValue HiFalse =
962       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
963 
964   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
965 
966   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
967   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
968 
969   SDValue Parts[2] = {Lo, Hi};
970   return DAG.getMergeValues(Parts, DL);
971 }
972 
973 // Returns the opcode of the target-specific SDNode that implements the 32-bit
974 // form of the given Opcode.
975 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
976   switch (Opcode) {
977   default:
978     llvm_unreachable("Unexpected opcode");
979   case ISD::SHL:
980     return LoongArchISD::SLL_W;
981   case ISD::SRA:
982     return LoongArchISD::SRA_W;
983   case ISD::SRL:
984     return LoongArchISD::SRL_W;
985   case ISD::ROTR:
986     return LoongArchISD::ROTR_W;
987   case ISD::ROTL:
988     return LoongArchISD::ROTL_W;
989   case ISD::CTTZ:
990     return LoongArchISD::CTZ_W;
991   case ISD::CTLZ:
992     return LoongArchISD::CLZ_W;
993   }
994 }
995 
996 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
997 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
998 // otherwise be promoted to i64, making it difficult to select the
999 // SLL_W/.../*W later one because the fact the operation was originally of
1000 // type i8/i16/i32 is lost.
1001 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
1002                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
1003   SDLoc DL(N);
1004   LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
1005   SDValue NewOp0, NewRes;
1006 
1007   switch (NumOp) {
1008   default:
1009     llvm_unreachable("Unexpected NumOp");
1010   case 1: {
1011     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1012     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1013     break;
1014   }
1015   case 2: {
1016     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1017     SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1018     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1019     break;
1020   }
1021     // TODO:Handle more NumOp.
1022   }
1023 
1024   // ReplaceNodeResults requires we maintain the same type for the return
1025   // value.
1026   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1027 }
1028 
1029 // Helper function that emits error message for intrinsics with chain and return
1030 // a UNDEF and the chain as the results.
1031 static void emitErrorAndReplaceIntrinsicWithChainResults(
1032     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
1033     StringRef ErrorMsg) {
1034   DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
1035   Results.push_back(DAG.getUNDEF(N->getValueType(0)));
1036   Results.push_back(N->getOperand(0));
1037 }
1038 
1039 void LoongArchTargetLowering::ReplaceNodeResults(
1040     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1041   SDLoc DL(N);
1042   EVT VT = N->getValueType(0);
1043   switch (N->getOpcode()) {
1044   default:
1045     llvm_unreachable("Don't know how to legalize this operation");
1046   case ISD::SHL:
1047   case ISD::SRA:
1048   case ISD::SRL:
1049   case ISD::ROTR:
1050     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1051            "Unexpected custom legalisation");
1052     if (N->getOperand(1).getOpcode() != ISD::Constant) {
1053       Results.push_back(customLegalizeToWOp(N, DAG, 2));
1054       break;
1055     }
1056     break;
1057   case ISD::ROTL:
1058     ConstantSDNode *CN;
1059     if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
1060       Results.push_back(customLegalizeToWOp(N, DAG, 2));
1061       break;
1062     }
1063     break;
1064   case ISD::FP_TO_SINT: {
1065     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1066            "Unexpected custom legalisation");
1067     SDValue Src = N->getOperand(0);
1068     EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
1069     if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
1070         TargetLowering::TypeSoftenFloat) {
1071       SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
1072       Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
1073       return;
1074     }
1075     // If the FP type needs to be softened, emit a library call using the 'si'
1076     // version. If we left it to default legalization we'd end up with 'di'.
1077     RTLIB::Libcall LC;
1078     LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
1079     MakeLibCallOptions CallOptions;
1080     EVT OpVT = Src.getValueType();
1081     CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
1082     SDValue Chain = SDValue();
1083     SDValue Result;
1084     std::tie(Result, Chain) =
1085         makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
1086     Results.push_back(Result);
1087     break;
1088   }
1089   case ISD::BITCAST: {
1090     SDValue Src = N->getOperand(0);
1091     EVT SrcVT = Src.getValueType();
1092     if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1093         Subtarget.hasBasicF()) {
1094       SDValue Dst =
1095           DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1096       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1097     }
1098     break;
1099   }
1100   case ISD::FP_TO_UINT: {
1101     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1102            "Unexpected custom legalisation");
1103     auto &TLI = DAG.getTargetLoweringInfo();
1104     SDValue Tmp1, Tmp2;
1105     TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
1106     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1107     break;
1108   }
1109   case ISD::BSWAP: {
1110     SDValue Src = N->getOperand(0);
1111     assert((VT == MVT::i16 || VT == MVT::i32) &&
1112            "Unexpected custom legalization");
1113     MVT GRLenVT = Subtarget.getGRLenVT();
1114     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1115     SDValue Tmp;
1116     switch (VT.getSizeInBits()) {
1117     default:
1118       llvm_unreachable("Unexpected operand width");
1119     case 16:
1120       Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
1121       break;
1122     case 32:
1123       // Only LA64 will get to here due to the size mismatch between VT and
1124       // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1125       Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
1126       break;
1127     }
1128     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1129     break;
1130   }
1131   case ISD::BITREVERSE: {
1132     SDValue Src = N->getOperand(0);
1133     assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1134            "Unexpected custom legalization");
1135     MVT GRLenVT = Subtarget.getGRLenVT();
1136     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1137     SDValue Tmp;
1138     switch (VT.getSizeInBits()) {
1139     default:
1140       llvm_unreachable("Unexpected operand width");
1141     case 8:
1142       Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
1143       break;
1144     case 32:
1145       Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
1146       break;
1147     }
1148     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1149     break;
1150   }
1151   case ISD::CTLZ:
1152   case ISD::CTTZ: {
1153     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1154            "Unexpected custom legalisation");
1155     Results.push_back(customLegalizeToWOp(N, DAG, 1));
1156     break;
1157   }
1158   case ISD::INTRINSIC_W_CHAIN: {
1159     SDValue Chain = N->getOperand(0);
1160     SDValue Op2 = N->getOperand(2);
1161     MVT GRLenVT = Subtarget.getGRLenVT();
1162     const StringRef ErrorMsgOOR = "argument out of range";
1163     const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1164     const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1165 
1166     switch (N->getConstantOperandVal(1)) {
1167     default:
1168       llvm_unreachable("Unexpected Intrinsic.");
1169     case Intrinsic::loongarch_movfcsr2gr: {
1170       if (!Subtarget.hasBasicF()) {
1171         emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
1172                                                      ErrorMsgReqF);
1173         return;
1174       }
1175       unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1176       if (!isUInt<2>(Imm)) {
1177         emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
1178                                                      ErrorMsgOOR);
1179         return;
1180       }
1181       SDValue MOVFCSR2GRResults = DAG.getNode(
1182           LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
1183           {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1184       Results.push_back(
1185           DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
1186       Results.push_back(MOVFCSR2GRResults.getValue(1));
1187       break;
1188     }
1189 #define CRC_CASE_EXT_BINARYOP(NAME, NODE)                                      \
1190   case Intrinsic::loongarch_##NAME: {                                          \
1191     SDValue NODE = DAG.getNode(                                                \
1192         LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
1193         {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),               \
1194          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
1195     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
1196     Results.push_back(NODE.getValue(1));                                       \
1197     break;                                                                     \
1198   }
1199       CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
1200       CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
1201       CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
1202       CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
1203       CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
1204       CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
1205 #undef CRC_CASE_EXT_BINARYOP
1206 
1207 #define CRC_CASE_EXT_UNARYOP(NAME, NODE)                                       \
1208   case Intrinsic::loongarch_##NAME: {                                          \
1209     SDValue NODE = DAG.getNode(                                                \
1210         LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
1211         {Chain, Op2,                                                           \
1212          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
1213     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
1214     Results.push_back(NODE.getValue(1));                                                  \
1215     break;                                                                     \
1216   }
1217       CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
1218       CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
1219 #undef CRC_CASE_EXT_UNARYOP
1220 #define CSR_CASE(ID)                                                           \
1221   case Intrinsic::loongarch_##ID: {                                            \
1222     if (!Subtarget.is64Bit())                                                  \
1223       emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,            \
1224                                                    ErrorMsgReqLA64);           \
1225     break;                                                                     \
1226   }
1227       CSR_CASE(csrrd_d);
1228       CSR_CASE(csrwr_d);
1229       CSR_CASE(csrxchg_d);
1230       CSR_CASE(iocsrrd_d);
1231 #undef CSR_CASE
1232     case Intrinsic::loongarch_csrrd_w: {
1233       unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1234       if (!isUInt<14>(Imm)) {
1235         emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
1236                                                      ErrorMsgOOR);
1237         return;
1238       }
1239       SDValue CSRRDResults =
1240           DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1241                       {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1242       Results.push_back(
1243           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
1244       Results.push_back(CSRRDResults.getValue(1));
1245       break;
1246     }
1247     case Intrinsic::loongarch_csrwr_w: {
1248       unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
1249       if (!isUInt<14>(Imm)) {
1250         emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
1251                                                      ErrorMsgOOR);
1252         return;
1253       }
1254       SDValue CSRWRResults =
1255           DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1256                       {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
1257                        DAG.getConstant(Imm, DL, GRLenVT)});
1258       Results.push_back(
1259           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
1260       Results.push_back(CSRWRResults.getValue(1));
1261       break;
1262     }
1263     case Intrinsic::loongarch_csrxchg_w: {
1264       unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
1265       if (!isUInt<14>(Imm)) {
1266         emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
1267                                                      ErrorMsgOOR);
1268         return;
1269       }
1270       SDValue CSRXCHGResults = DAG.getNode(
1271           LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1272           {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
1273            DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
1274            DAG.getConstant(Imm, DL, GRLenVT)});
1275       Results.push_back(
1276           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
1277       Results.push_back(CSRXCHGResults.getValue(1));
1278       break;
1279     }
1280 #define IOCSRRD_CASE(NAME, NODE)                                               \
1281   case Intrinsic::loongarch_##NAME: {                                          \
1282     SDValue IOCSRRDResults =                                                   \
1283         DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},            \
1284                     {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
1285     Results.push_back(                                                         \
1286         DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0)));       \
1287     Results.push_back(IOCSRRDResults.getValue(1));                             \
1288     break;                                                                     \
1289   }
1290       IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1291       IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1292       IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1293 #undef IOCSRRD_CASE
1294     case Intrinsic::loongarch_cpucfg: {
1295       SDValue CPUCFGResults =
1296           DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
1297                       {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
1298       Results.push_back(
1299           DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
1300       Results.push_back(CPUCFGResults.getValue(1));
1301       break;
1302     }
1303     case Intrinsic::loongarch_lddir_d: {
1304       if (!Subtarget.is64Bit()) {
1305         emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
1306                                                      ErrorMsgReqLA64);
1307         return;
1308       }
1309       break;
1310     }
1311     }
1312     break;
1313   }
1314   case ISD::READ_REGISTER: {
1315     if (Subtarget.is64Bit())
1316       DAG.getContext()->emitError(
1317           "On LA64, only 64-bit registers can be read.");
1318     else
1319       DAG.getContext()->emitError(
1320           "On LA32, only 32-bit registers can be read.");
1321     Results.push_back(DAG.getUNDEF(VT));
1322     Results.push_back(N->getOperand(0));
1323     break;
1324   }
1325   }
1326 }
1327 
1328 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
1329                                  TargetLowering::DAGCombinerInfo &DCI,
1330                                  const LoongArchSubtarget &Subtarget) {
1331   if (DCI.isBeforeLegalizeOps())
1332     return SDValue();
1333 
1334   SDValue FirstOperand = N->getOperand(0);
1335   SDValue SecondOperand = N->getOperand(1);
1336   unsigned FirstOperandOpc = FirstOperand.getOpcode();
1337   EVT ValTy = N->getValueType(0);
1338   SDLoc DL(N);
1339   uint64_t lsb, msb;
1340   unsigned SMIdx, SMLen;
1341   ConstantSDNode *CN;
1342   SDValue NewOperand;
1343   MVT GRLenVT = Subtarget.getGRLenVT();
1344 
1345   // Op's second operand must be a shifted mask.
1346   if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
1347       !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
1348     return SDValue();
1349 
1350   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
1351     // Pattern match BSTRPICK.
1352     //  $dst = and ((sra or srl) $src , lsb), (2**len - 1)
1353     //  => BSTRPICK $dst, $src, msb, lsb
1354     //  where msb = lsb + len - 1
1355 
1356     // The second operand of the shift must be an immediate.
1357     if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
1358       return SDValue();
1359 
1360     lsb = CN->getZExtValue();
1361 
1362     // Return if the shifted mask does not start at bit 0 or the sum of its
1363     // length and lsb exceeds the word's size.
1364     if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
1365       return SDValue();
1366 
1367     NewOperand = FirstOperand.getOperand(0);
1368   } else {
1369     // Pattern match BSTRPICK.
1370     //  $dst = and $src, (2**len- 1) , if len > 12
1371     //  => BSTRPICK $dst, $src, msb, lsb
1372     //  where lsb = 0 and msb = len - 1
1373 
1374     // If the mask is <= 0xfff, andi can be used instead.
1375     if (CN->getZExtValue() <= 0xfff)
1376       return SDValue();
1377 
1378     // Return if the MSB exceeds.
1379     if (SMIdx + SMLen > ValTy.getSizeInBits())
1380       return SDValue();
1381 
1382     if (SMIdx > 0) {
1383       // Omit if the constant has more than 2 uses. This a conservative
1384       // decision. Whether it is a win depends on the HW microarchitecture.
1385       // However it should always be better for 1 and 2 uses.
1386       if (CN->use_size() > 2)
1387         return SDValue();
1388       // Return if the constant can be composed by a single LU12I.W.
1389       if ((CN->getZExtValue() & 0xfff) == 0)
1390         return SDValue();
1391       // Return if the constand can be composed by a single ADDI with
1392       // the zero register.
1393       if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
1394         return SDValue();
1395     }
1396 
1397     lsb = SMIdx;
1398     NewOperand = FirstOperand;
1399   }
1400 
1401   msb = lsb + SMLen - 1;
1402   SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
1403                             DAG.getConstant(msb, DL, GRLenVT),
1404                             DAG.getConstant(lsb, DL, GRLenVT));
1405   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
1406     return NR0;
1407   // Try to optimize to
1408   //   bstrpick $Rd, $Rs, msb, lsb
1409   //   slli     $Rd, $Rd, lsb
1410   return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
1411                      DAG.getConstant(lsb, DL, GRLenVT));
1412 }
1413 
1414 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
1415                                  TargetLowering::DAGCombinerInfo &DCI,
1416                                  const LoongArchSubtarget &Subtarget) {
1417   if (DCI.isBeforeLegalizeOps())
1418     return SDValue();
1419 
1420   // $dst = srl (and $src, Mask), Shamt
1421   // =>
1422   // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
1423   // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
1424   //
1425 
1426   SDValue FirstOperand = N->getOperand(0);
1427   ConstantSDNode *CN;
1428   EVT ValTy = N->getValueType(0);
1429   SDLoc DL(N);
1430   MVT GRLenVT = Subtarget.getGRLenVT();
1431   unsigned MaskIdx, MaskLen;
1432   uint64_t Shamt;
1433 
1434   // The first operand must be an AND and the second operand of the AND must be
1435   // a shifted mask.
1436   if (FirstOperand.getOpcode() != ISD::AND ||
1437       !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
1438       !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
1439     return SDValue();
1440 
1441   // The second operand (shift amount) must be an immediate.
1442   if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
1443     return SDValue();
1444 
1445   Shamt = CN->getZExtValue();
1446   if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
1447     return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
1448                        FirstOperand->getOperand(0),
1449                        DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
1450                        DAG.getConstant(Shamt, DL, GRLenVT));
1451 
1452   return SDValue();
1453 }
1454 
1455 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
1456                                 TargetLowering::DAGCombinerInfo &DCI,
1457                                 const LoongArchSubtarget &Subtarget) {
1458   MVT GRLenVT = Subtarget.getGRLenVT();
1459   EVT ValTy = N->getValueType(0);
1460   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
1461   ConstantSDNode *CN0, *CN1;
1462   SDLoc DL(N);
1463   unsigned ValBits = ValTy.getSizeInBits();
1464   unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
1465   unsigned Shamt;
1466   bool SwapAndRetried = false;
1467 
1468   if (DCI.isBeforeLegalizeOps())
1469     return SDValue();
1470 
1471   if (ValBits != 32 && ValBits != 64)
1472     return SDValue();
1473 
1474 Retry:
1475   // 1st pattern to match BSTRINS:
1476   //  R = or (and X, mask0), (and (shl Y, lsb), mask1)
1477   //  where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
1478   //  =>
1479   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
1480   if (N0.getOpcode() == ISD::AND &&
1481       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1482       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
1483       N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
1484       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1485       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
1486       MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
1487       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
1488       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
1489       (MaskIdx0 + MaskLen0 <= ValBits)) {
1490     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
1491     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1492                        N1.getOperand(0).getOperand(0),
1493                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
1494                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
1495   }
1496 
1497   // 2nd pattern to match BSTRINS:
1498   //  R = or (and X, mask0), (shl (and Y, mask1), lsb)
1499   //  where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
1500   //  =>
1501   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
1502   if (N0.getOpcode() == ISD::AND &&
1503       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1504       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
1505       N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
1506       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1507       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
1508       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
1509       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
1510       MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
1511       (MaskIdx0 + MaskLen0 <= ValBits)) {
1512     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
1513     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1514                        N1.getOperand(0).getOperand(0),
1515                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
1516                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
1517   }
1518 
1519   // 3rd pattern to match BSTRINS:
1520   //  R = or (and X, mask0), (and Y, mask1)
1521   //  where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
1522   //  =>
1523   //  R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
1524   //  where msb = lsb + size - 1
1525   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
1526       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1527       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
1528       (MaskIdx0 + MaskLen0 <= 64) &&
1529       (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
1530       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
1531     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
1532     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1533                        DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
1534                                    DAG.getConstant(MaskIdx0, DL, GRLenVT)),
1535                        DAG.getConstant(ValBits == 32
1536                                            ? (MaskIdx0 + (MaskLen0 & 31) - 1)
1537                                            : (MaskIdx0 + MaskLen0 - 1),
1538                                        DL, GRLenVT),
1539                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
1540   }
1541 
1542   // 4th pattern to match BSTRINS:
1543   //  R = or (and X, mask), (shl Y, shamt)
1544   //  where mask = (2**shamt - 1)
1545   //  =>
1546   //  R = BSTRINS X, Y, ValBits - 1, shamt
1547   //  where ValBits = 32 or 64
1548   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
1549       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1550       isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
1551       MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1552       (Shamt = CN1->getZExtValue()) == MaskLen0 &&
1553       (MaskIdx0 + MaskLen0 <= ValBits)) {
1554     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
1555     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1556                        N1.getOperand(0),
1557                        DAG.getConstant((ValBits - 1), DL, GRLenVT),
1558                        DAG.getConstant(Shamt, DL, GRLenVT));
1559   }
1560 
1561   // 5th pattern to match BSTRINS:
1562   //  R = or (and X, mask), const
1563   //  where ~mask = (2**size - 1) << lsb, mask & const = 0
1564   //  =>
1565   //  R = BSTRINS X, (const >> lsb), msb, lsb
1566   //  where msb = lsb + size - 1
1567   if (N0.getOpcode() == ISD::AND &&
1568       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1569       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
1570       (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
1571       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
1572     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
1573     return DAG.getNode(
1574         LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1575         DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
1576         DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
1577         DAG.getConstant(MaskIdx0, DL, GRLenVT));
1578   }
1579 
1580   // 6th pattern.
1581   // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
1582   // by the incoming bits are known to be zero.
1583   // =>
1584   // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
1585   //
1586   // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
1587   // pattern is more common than the 1st. So we put the 1st before the 6th in
1588   // order to match as many nodes as possible.
1589   ConstantSDNode *CNMask, *CNShamt;
1590   unsigned MaskIdx, MaskLen;
1591   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
1592       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
1593       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
1594       MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1595       CNShamt->getZExtValue() + MaskLen <= ValBits) {
1596     Shamt = CNShamt->getZExtValue();
1597     APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
1598     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
1599       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
1600       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
1601                          N1.getOperand(0).getOperand(0),
1602                          DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
1603                          DAG.getConstant(Shamt, DL, GRLenVT));
1604     }
1605   }
1606 
1607   // 7th pattern.
1608   // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
1609   // overwritten by the incoming bits are known to be zero.
1610   // =>
1611   // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
1612   //
1613   // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
1614   // before the 7th in order to match as many nodes as possible.
1615   if (N1.getOpcode() == ISD::AND &&
1616       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1617       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
1618       N1.getOperand(0).getOpcode() == ISD::SHL &&
1619       (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
1620       CNShamt->getZExtValue() == MaskIdx) {
1621     APInt ShMask(ValBits, CNMask->getZExtValue());
1622     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
1623       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
1624       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
1625                          N1.getOperand(0).getOperand(0),
1626                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
1627                          DAG.getConstant(MaskIdx, DL, GRLenVT));
1628     }
1629   }
1630 
1631   // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
1632   if (!SwapAndRetried) {
1633     std::swap(N0, N1);
1634     SwapAndRetried = true;
1635     goto Retry;
1636   }
1637 
1638   SwapAndRetried = false;
1639 Retry2:
1640   // 8th pattern.
1641   // a = b | (c & shifted_mask), where all positions in b to be overwritten by
1642   // the incoming bits are known to be zero.
1643   // =>
1644   // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
1645   //
1646   // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
1647   // we put it here in order to match as many nodes as possible or generate less
1648   // instructions.
1649   if (N1.getOpcode() == ISD::AND &&
1650       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1651       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
1652     APInt ShMask(ValBits, CNMask->getZExtValue());
1653     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
1654       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
1655       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
1656                          DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
1657                                      N1->getOperand(0),
1658                                      DAG.getConstant(MaskIdx, DL, GRLenVT)),
1659                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
1660                          DAG.getConstant(MaskIdx, DL, GRLenVT));
1661     }
1662   }
1663   // Swap N0/N1 and retry.
1664   if (!SwapAndRetried) {
1665     std::swap(N0, N1);
1666     SwapAndRetried = true;
1667     goto Retry2;
1668   }
1669 
1670   return SDValue();
1671 }
1672 
1673 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
1674 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
1675                                       TargetLowering::DAGCombinerInfo &DCI,
1676                                       const LoongArchSubtarget &Subtarget) {
1677   if (DCI.isBeforeLegalizeOps())
1678     return SDValue();
1679 
1680   SDValue Src = N->getOperand(0);
1681   if (Src.getOpcode() != LoongArchISD::REVB_2W)
1682     return SDValue();
1683 
1684   return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
1685                      Src.getOperand(0));
1686 }
1687 
1688 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
1689                                                    DAGCombinerInfo &DCI) const {
1690   SelectionDAG &DAG = DCI.DAG;
1691   switch (N->getOpcode()) {
1692   default:
1693     break;
1694   case ISD::AND:
1695     return performANDCombine(N, DAG, DCI, Subtarget);
1696   case ISD::OR:
1697     return performORCombine(N, DAG, DCI, Subtarget);
1698   case ISD::SRL:
1699     return performSRLCombine(N, DAG, DCI, Subtarget);
1700   case LoongArchISD::BITREV_W:
1701     return performBITREV_WCombine(N, DAG, DCI, Subtarget);
1702   }
1703   return SDValue();
1704 }
1705 
1706 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
1707                                               MachineBasicBlock *MBB) {
1708   if (!ZeroDivCheck)
1709     return MBB;
1710 
1711   // Build instructions:
1712   // MBB:
1713   //   div(or mod)   $dst, $dividend, $divisor
1714   //   bnez          $divisor, SinkMBB
1715   // BreakMBB:
1716   //   break         7 // BRK_DIVZERO
1717   // SinkMBB:
1718   //   fallthrough
1719   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1720   MachineFunction::iterator It = ++MBB->getIterator();
1721   MachineFunction *MF = MBB->getParent();
1722   auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
1723   auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
1724   MF->insert(It, BreakMBB);
1725   MF->insert(It, SinkMBB);
1726 
1727   // Transfer the remainder of MBB and its successor edges to SinkMBB.
1728   SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
1729   SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
1730 
1731   const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
1732   DebugLoc DL = MI.getDebugLoc();
1733   MachineOperand &Divisor = MI.getOperand(2);
1734   Register DivisorReg = Divisor.getReg();
1735 
1736   // MBB:
1737   BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
1738       .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
1739       .addMBB(SinkMBB);
1740   MBB->addSuccessor(BreakMBB);
1741   MBB->addSuccessor(SinkMBB);
1742 
1743   // BreakMBB:
1744   // See linux header file arch/loongarch/include/uapi/asm/break.h for the
1745   // definition of BRK_DIVZERO.
1746   BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
1747   BreakMBB->addSuccessor(SinkMBB);
1748 
1749   // Clear Divisor's kill flag.
1750   Divisor.setIsKill(false);
1751 
1752   return SinkMBB;
1753 }
1754 
1755 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
1756     MachineInstr &MI, MachineBasicBlock *BB) const {
1757   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1758   DebugLoc DL = MI.getDebugLoc();
1759 
1760   switch (MI.getOpcode()) {
1761   default:
1762     llvm_unreachable("Unexpected instr type to insert");
1763   case LoongArch::DIV_W:
1764   case LoongArch::DIV_WU:
1765   case LoongArch::MOD_W:
1766   case LoongArch::MOD_WU:
1767   case LoongArch::DIV_D:
1768   case LoongArch::DIV_DU:
1769   case LoongArch::MOD_D:
1770   case LoongArch::MOD_DU:
1771     return insertDivByZeroTrap(MI, BB);
1772     break;
1773   case LoongArch::WRFCSR: {
1774     BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
1775             LoongArch::FCSR0 + MI.getOperand(0).getImm())
1776         .addReg(MI.getOperand(1).getReg());
1777     MI.eraseFromParent();
1778     return BB;
1779   }
1780   case LoongArch::RDFCSR: {
1781     MachineInstr *ReadFCSR =
1782         BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
1783                 MI.getOperand(0).getReg())
1784             .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
1785     ReadFCSR->getOperand(1).setIsUndef();
1786     MI.eraseFromParent();
1787     return BB;
1788   }
1789   }
1790 }
1791 
1792 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
1793     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1794     unsigned *Fast) const {
1795   if (!Subtarget.hasUAL())
1796     return false;
1797 
1798   // TODO: set reasonable speed number.
1799   if (Fast)
1800     *Fast = 1;
1801   return true;
1802 }
1803 
1804 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
1805   switch ((LoongArchISD::NodeType)Opcode) {
1806   case LoongArchISD::FIRST_NUMBER:
1807     break;
1808 
1809 #define NODE_NAME_CASE(node)                                                   \
1810   case LoongArchISD::node:                                                     \
1811     return "LoongArchISD::" #node;
1812 
1813     // TODO: Add more target-dependent nodes later.
1814     NODE_NAME_CASE(CALL)
1815     NODE_NAME_CASE(RET)
1816     NODE_NAME_CASE(TAIL)
1817     NODE_NAME_CASE(SLL_W)
1818     NODE_NAME_CASE(SRA_W)
1819     NODE_NAME_CASE(SRL_W)
1820     NODE_NAME_CASE(BSTRINS)
1821     NODE_NAME_CASE(BSTRPICK)
1822     NODE_NAME_CASE(MOVGR2FR_W_LA64)
1823     NODE_NAME_CASE(MOVFR2GR_S_LA64)
1824     NODE_NAME_CASE(FTINT)
1825     NODE_NAME_CASE(REVB_2H)
1826     NODE_NAME_CASE(REVB_2W)
1827     NODE_NAME_CASE(BITREV_4B)
1828     NODE_NAME_CASE(BITREV_W)
1829     NODE_NAME_CASE(ROTR_W)
1830     NODE_NAME_CASE(ROTL_W)
1831     NODE_NAME_CASE(CLZ_W)
1832     NODE_NAME_CASE(CTZ_W)
1833     NODE_NAME_CASE(DBAR)
1834     NODE_NAME_CASE(IBAR)
1835     NODE_NAME_CASE(BREAK)
1836     NODE_NAME_CASE(SYSCALL)
1837     NODE_NAME_CASE(CRC_W_B_W)
1838     NODE_NAME_CASE(CRC_W_H_W)
1839     NODE_NAME_CASE(CRC_W_W_W)
1840     NODE_NAME_CASE(CRC_W_D_W)
1841     NODE_NAME_CASE(CRCC_W_B_W)
1842     NODE_NAME_CASE(CRCC_W_H_W)
1843     NODE_NAME_CASE(CRCC_W_W_W)
1844     NODE_NAME_CASE(CRCC_W_D_W)
1845     NODE_NAME_CASE(CSRRD)
1846     NODE_NAME_CASE(CSRWR)
1847     NODE_NAME_CASE(CSRXCHG)
1848     NODE_NAME_CASE(IOCSRRD_B)
1849     NODE_NAME_CASE(IOCSRRD_H)
1850     NODE_NAME_CASE(IOCSRRD_W)
1851     NODE_NAME_CASE(IOCSRRD_D)
1852     NODE_NAME_CASE(IOCSRWR_B)
1853     NODE_NAME_CASE(IOCSRWR_H)
1854     NODE_NAME_CASE(IOCSRWR_W)
1855     NODE_NAME_CASE(IOCSRWR_D)
1856     NODE_NAME_CASE(CPUCFG)
1857     NODE_NAME_CASE(MOVGR2FCSR)
1858     NODE_NAME_CASE(MOVFCSR2GR)
1859     NODE_NAME_CASE(CACOP_D)
1860     NODE_NAME_CASE(CACOP_W)
1861   }
1862 #undef NODE_NAME_CASE
1863   return nullptr;
1864 }
1865 
1866 //===----------------------------------------------------------------------===//
1867 //                     Calling Convention Implementation
1868 //===----------------------------------------------------------------------===//
1869 
1870 // Eight general-purpose registers a0-a7 used for passing integer arguments,
1871 // with a0-a1 reused to return values. Generally, the GPRs are used to pass
1872 // fixed-point arguments, and floating-point arguments when no FPR is available
1873 // or with soft float ABI.
1874 const MCPhysReg ArgGPRs[] = {LoongArch::R4,  LoongArch::R5, LoongArch::R6,
1875                              LoongArch::R7,  LoongArch::R8, LoongArch::R9,
1876                              LoongArch::R10, LoongArch::R11};
1877 // Eight floating-point registers fa0-fa7 used for passing floating-point
1878 // arguments, and fa0-fa1 are also used to return values.
1879 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
1880                                LoongArch::F3, LoongArch::F4, LoongArch::F5,
1881                                LoongArch::F6, LoongArch::F7};
1882 // FPR32 and FPR64 alias each other.
1883 const MCPhysReg ArgFPR64s[] = {
1884     LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
1885     LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
1886 
1887 // Pass a 2*GRLen argument that has been split into two GRLen values through
1888 // registers or the stack as necessary.
1889 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
1890                                      CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
1891                                      unsigned ValNo2, MVT ValVT2, MVT LocVT2,
1892                                      ISD::ArgFlagsTy ArgFlags2) {
1893   unsigned GRLenInBytes = GRLen / 8;
1894   if (Register Reg = State.AllocateReg(ArgGPRs)) {
1895     // At least one half can be passed via register.
1896     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
1897                                      VA1.getLocVT(), CCValAssign::Full));
1898   } else {
1899     // Both halves must be passed on the stack, with proper alignment.
1900     Align StackAlign =
1901         std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
1902     State.addLoc(
1903         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
1904                             State.AllocateStack(GRLenInBytes, StackAlign),
1905                             VA1.getLocVT(), CCValAssign::Full));
1906     State.addLoc(CCValAssign::getMem(
1907         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
1908         LocVT2, CCValAssign::Full));
1909     return false;
1910   }
1911   if (Register Reg = State.AllocateReg(ArgGPRs)) {
1912     // The second half can also be passed via register.
1913     State.addLoc(
1914         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
1915   } else {
1916     // The second half is passed via the stack, without additional alignment.
1917     State.addLoc(CCValAssign::getMem(
1918         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
1919         LocVT2, CCValAssign::Full));
1920   }
1921   return false;
1922 }
1923 
1924 // Implements the LoongArch calling convention. Returns true upon failure.
1925 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
1926                          unsigned ValNo, MVT ValVT,
1927                          CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
1928                          CCState &State, bool IsFixed, bool IsRet,
1929                          Type *OrigTy) {
1930   unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
1931   assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
1932   MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
1933   MVT LocVT = ValVT;
1934 
1935   // Any return value split into more than two values can't be returned
1936   // directly.
1937   if (IsRet && ValNo > 1)
1938     return true;
1939 
1940   // If passing a variadic argument, or if no FPR is available.
1941   bool UseGPRForFloat = true;
1942 
1943   switch (ABI) {
1944   default:
1945     llvm_unreachable("Unexpected ABI");
1946   case LoongArchABI::ABI_ILP32S:
1947   case LoongArchABI::ABI_ILP32F:
1948   case LoongArchABI::ABI_LP64F:
1949     report_fatal_error("Unimplemented ABI");
1950     break;
1951   case LoongArchABI::ABI_ILP32D:
1952   case LoongArchABI::ABI_LP64D:
1953     UseGPRForFloat = !IsFixed;
1954     break;
1955   case LoongArchABI::ABI_LP64S:
1956     break;
1957   }
1958 
1959   // FPR32 and FPR64 alias each other.
1960   if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
1961     UseGPRForFloat = true;
1962 
1963   if (UseGPRForFloat && ValVT == MVT::f32) {
1964     LocVT = GRLenVT;
1965     LocInfo = CCValAssign::BCvt;
1966   } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
1967     LocVT = MVT::i64;
1968     LocInfo = CCValAssign::BCvt;
1969   } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
1970     // TODO: Handle passing f64 on LA32 with D feature.
1971     report_fatal_error("Passing f64 with GPR on LA32 is undefined");
1972   }
1973 
1974   // If this is a variadic argument, the LoongArch calling convention requires
1975   // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
1976   // byte alignment. An aligned register should be used regardless of whether
1977   // the original argument was split during legalisation or not. The argument
1978   // will not be passed by registers if the original type is larger than
1979   // 2*GRLen, so the register alignment rule does not apply.
1980   unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
1981   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
1982       DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
1983     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
1984     // Skip 'odd' register if necessary.
1985     if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
1986       State.AllocateReg(ArgGPRs);
1987   }
1988 
1989   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
1990   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
1991       State.getPendingArgFlags();
1992 
1993   assert(PendingLocs.size() == PendingArgFlags.size() &&
1994          "PendingLocs and PendingArgFlags out of sync");
1995 
1996   // Split arguments might be passed indirectly, so keep track of the pending
1997   // values.
1998   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
1999     LocVT = GRLenVT;
2000     LocInfo = CCValAssign::Indirect;
2001     PendingLocs.push_back(
2002         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
2003     PendingArgFlags.push_back(ArgFlags);
2004     if (!ArgFlags.isSplitEnd()) {
2005       return false;
2006     }
2007   }
2008 
2009   // If the split argument only had two elements, it should be passed directly
2010   // in registers or on the stack.
2011   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
2012       PendingLocs.size() <= 2) {
2013     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
2014     // Apply the normal calling convention rules to the first half of the
2015     // split argument.
2016     CCValAssign VA = PendingLocs[0];
2017     ISD::ArgFlagsTy AF = PendingArgFlags[0];
2018     PendingLocs.clear();
2019     PendingArgFlags.clear();
2020     return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
2021                                     ArgFlags);
2022   }
2023 
2024   // Allocate to a register if possible, or else a stack slot.
2025   Register Reg;
2026   unsigned StoreSizeBytes = GRLen / 8;
2027   Align StackAlign = Align(GRLen / 8);
2028 
2029   if (ValVT == MVT::f32 && !UseGPRForFloat)
2030     Reg = State.AllocateReg(ArgFPR32s);
2031   else if (ValVT == MVT::f64 && !UseGPRForFloat)
2032     Reg = State.AllocateReg(ArgFPR64s);
2033   else
2034     Reg = State.AllocateReg(ArgGPRs);
2035 
2036   unsigned StackOffset =
2037       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
2038 
2039   // If we reach this point and PendingLocs is non-empty, we must be at the
2040   // end of a split argument that must be passed indirectly.
2041   if (!PendingLocs.empty()) {
2042     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
2043     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
2044     for (auto &It : PendingLocs) {
2045       if (Reg)
2046         It.convertToReg(Reg);
2047       else
2048         It.convertToMem(StackOffset);
2049       State.addLoc(It);
2050     }
2051     PendingLocs.clear();
2052     PendingArgFlags.clear();
2053     return false;
2054   }
2055   assert((!UseGPRForFloat || LocVT == GRLenVT) &&
2056          "Expected an GRLenVT at this stage");
2057 
2058   if (Reg) {
2059     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2060     return false;
2061   }
2062 
2063   // When a floating-point value is passed on the stack, no bit-cast is needed.
2064   if (ValVT.isFloatingPoint()) {
2065     LocVT = ValVT;
2066     LocInfo = CCValAssign::Full;
2067   }
2068 
2069   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
2070   return false;
2071 }
2072 
2073 void LoongArchTargetLowering::analyzeInputArgs(
2074     MachineFunction &MF, CCState &CCInfo,
2075     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
2076     LoongArchCCAssignFn Fn) const {
2077   FunctionType *FType = MF.getFunction().getFunctionType();
2078   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
2079     MVT ArgVT = Ins[i].VT;
2080     Type *ArgTy = nullptr;
2081     if (IsRet)
2082       ArgTy = FType->getReturnType();
2083     else if (Ins[i].isOrigArg())
2084       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
2085     LoongArchABI::ABI ABI =
2086         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
2087     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
2088            CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
2089       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
2090                         << '\n');
2091       llvm_unreachable("");
2092     }
2093   }
2094 }
2095 
2096 void LoongArchTargetLowering::analyzeOutputArgs(
2097     MachineFunction &MF, CCState &CCInfo,
2098     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
2099     CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
2100   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2101     MVT ArgVT = Outs[i].VT;
2102     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
2103     LoongArchABI::ABI ABI =
2104         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
2105     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
2106            CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
2107       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
2108                         << "\n");
2109       llvm_unreachable("");
2110     }
2111   }
2112 }
2113 
2114 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
2115 // values.
2116 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
2117                                    const CCValAssign &VA, const SDLoc &DL) {
2118   switch (VA.getLocInfo()) {
2119   default:
2120     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2121   case CCValAssign::Full:
2122   case CCValAssign::Indirect:
2123     break;
2124   case CCValAssign::BCvt:
2125     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
2126       Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
2127     else
2128       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
2129     break;
2130   }
2131   return Val;
2132 }
2133 
2134 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
2135                                 const CCValAssign &VA, const SDLoc &DL,
2136                                 const LoongArchTargetLowering &TLI) {
2137   MachineFunction &MF = DAG.getMachineFunction();
2138   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2139   EVT LocVT = VA.getLocVT();
2140   SDValue Val;
2141   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
2142   Register VReg = RegInfo.createVirtualRegister(RC);
2143   RegInfo.addLiveIn(VA.getLocReg(), VReg);
2144   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2145 
2146   return convertLocVTToValVT(DAG, Val, VA, DL);
2147 }
2148 
2149 // The caller is responsible for loading the full value if the argument is
2150 // passed with CCValAssign::Indirect.
2151 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
2152                                 const CCValAssign &VA, const SDLoc &DL) {
2153   MachineFunction &MF = DAG.getMachineFunction();
2154   MachineFrameInfo &MFI = MF.getFrameInfo();
2155   EVT ValVT = VA.getValVT();
2156   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
2157                                  /*IsImmutable=*/true);
2158   SDValue FIN = DAG.getFrameIndex(
2159       FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
2160 
2161   ISD::LoadExtType ExtType;
2162   switch (VA.getLocInfo()) {
2163   default:
2164     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2165   case CCValAssign::Full:
2166   case CCValAssign::Indirect:
2167   case CCValAssign::BCvt:
2168     ExtType = ISD::NON_EXTLOAD;
2169     break;
2170   }
2171   return DAG.getExtLoad(
2172       ExtType, DL, VA.getLocVT(), Chain, FIN,
2173       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
2174 }
2175 
2176 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
2177                                    const CCValAssign &VA, const SDLoc &DL) {
2178   EVT LocVT = VA.getLocVT();
2179 
2180   switch (VA.getLocInfo()) {
2181   default:
2182     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2183   case CCValAssign::Full:
2184     break;
2185   case CCValAssign::BCvt:
2186     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
2187       Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
2188     else
2189       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
2190     break;
2191   }
2192   return Val;
2193 }
2194 
2195 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
2196                              CCValAssign::LocInfo LocInfo,
2197                              ISD::ArgFlagsTy ArgFlags, CCState &State) {
2198   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
2199     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
2200     //                        s0    s1  s2  s3  s4  s5  s6  s7  s8
2201     static const MCPhysReg GPRList[] = {
2202         LoongArch::R23, LoongArch::R24, LoongArch::R25,
2203         LoongArch::R26, LoongArch::R27, LoongArch::R28,
2204         LoongArch::R29, LoongArch::R30, LoongArch::R31};
2205     if (unsigned Reg = State.AllocateReg(GPRList)) {
2206       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2207       return false;
2208     }
2209   }
2210 
2211   if (LocVT == MVT::f32) {
2212     // Pass in STG registers: F1, F2, F3, F4
2213     //                        fs0,fs1,fs2,fs3
2214     static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
2215                                           LoongArch::F26, LoongArch::F27};
2216     if (unsigned Reg = State.AllocateReg(FPR32List)) {
2217       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2218       return false;
2219     }
2220   }
2221 
2222   if (LocVT == MVT::f64) {
2223     // Pass in STG registers: D1, D2, D3, D4
2224     //                        fs4,fs5,fs6,fs7
2225     static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
2226                                           LoongArch::F30_64, LoongArch::F31_64};
2227     if (unsigned Reg = State.AllocateReg(FPR64List)) {
2228       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2229       return false;
2230     }
2231   }
2232 
2233   report_fatal_error("No registers left in GHC calling convention");
2234   return true;
2235 }
2236 
2237 // Transform physical registers into virtual registers.
2238 SDValue LoongArchTargetLowering::LowerFormalArguments(
2239     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2240     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2241     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2242 
2243   MachineFunction &MF = DAG.getMachineFunction();
2244 
2245   switch (CallConv) {
2246   default:
2247     llvm_unreachable("Unsupported calling convention");
2248   case CallingConv::C:
2249   case CallingConv::Fast:
2250     break;
2251   case CallingConv::GHC:
2252     if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
2253         !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
2254       report_fatal_error(
2255           "GHC calling convention requires the F and D extensions");
2256   }
2257 
2258   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2259   MVT GRLenVT = Subtarget.getGRLenVT();
2260   unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
2261   // Used with varargs to acumulate store chains.
2262   std::vector<SDValue> OutChains;
2263 
2264   // Assign locations to all of the incoming arguments.
2265   SmallVector<CCValAssign> ArgLocs;
2266   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2267 
2268   if (CallConv == CallingConv::GHC)
2269     CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
2270   else
2271     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
2272 
2273   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2274     CCValAssign &VA = ArgLocs[i];
2275     SDValue ArgValue;
2276     if (VA.isRegLoc())
2277       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
2278     else
2279       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
2280     if (VA.getLocInfo() == CCValAssign::Indirect) {
2281       // If the original argument was split and passed by reference, we need to
2282       // load all parts of it here (using the same address).
2283       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2284                                    MachinePointerInfo()));
2285       unsigned ArgIndex = Ins[i].OrigArgIndex;
2286       unsigned ArgPartOffset = Ins[i].PartOffset;
2287       assert(ArgPartOffset == 0);
2288       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
2289         CCValAssign &PartVA = ArgLocs[i + 1];
2290         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
2291         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
2292         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
2293         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2294                                      MachinePointerInfo()));
2295         ++i;
2296       }
2297       continue;
2298     }
2299     InVals.push_back(ArgValue);
2300   }
2301 
2302   if (IsVarArg) {
2303     ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
2304     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
2305     const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
2306     MachineFrameInfo &MFI = MF.getFrameInfo();
2307     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2308     auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
2309 
2310     // Offset of the first variable argument from stack pointer, and size of
2311     // the vararg save area. For now, the varargs save area is either zero or
2312     // large enough to hold a0-a7.
2313     int VaArgOffset, VarArgsSaveSize;
2314 
2315     // If all registers are allocated, then all varargs must be passed on the
2316     // stack and we don't need to save any argregs.
2317     if (ArgRegs.size() == Idx) {
2318       VaArgOffset = CCInfo.getStackSize();
2319       VarArgsSaveSize = 0;
2320     } else {
2321       VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
2322       VaArgOffset = -VarArgsSaveSize;
2323     }
2324 
2325     // Record the frame index of the first variable argument
2326     // which is a value necessary to VASTART.
2327     int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
2328     LoongArchFI->setVarArgsFrameIndex(FI);
2329 
2330     // If saving an odd number of registers then create an extra stack slot to
2331     // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
2332     // offsets to even-numbered registered remain 2*GRLen-aligned.
2333     if (Idx % 2) {
2334       MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
2335                             true);
2336       VarArgsSaveSize += GRLenInBytes;
2337     }
2338 
2339     // Copy the integer registers that may have been used for passing varargs
2340     // to the vararg save area.
2341     for (unsigned I = Idx; I < ArgRegs.size();
2342          ++I, VaArgOffset += GRLenInBytes) {
2343       const Register Reg = RegInfo.createVirtualRegister(RC);
2344       RegInfo.addLiveIn(ArgRegs[I], Reg);
2345       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
2346       FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
2347       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2348       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
2349                                    MachinePointerInfo::getFixedStack(MF, FI));
2350       cast<StoreSDNode>(Store.getNode())
2351           ->getMemOperand()
2352           ->setValue((Value *)nullptr);
2353       OutChains.push_back(Store);
2354     }
2355     LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
2356   }
2357 
2358   // All stores are grouped in one node to allow the matching between
2359   // the size of Ins and InVals. This only happens for vararg functions.
2360   if (!OutChains.empty()) {
2361     OutChains.push_back(Chain);
2362     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
2363   }
2364 
2365   return Chain;
2366 }
2367 
2368 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2369   return CI->isTailCall();
2370 }
2371 
2372 // Check if the return value is used as only a return value, as otherwise
2373 // we can't perform a tail-call.
2374 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
2375                                                  SDValue &Chain) const {
2376   if (N->getNumValues() != 1)
2377     return false;
2378   if (!N->hasNUsesOfValue(1, 0))
2379     return false;
2380 
2381   SDNode *Copy = *N->use_begin();
2382   if (Copy->getOpcode() != ISD::CopyToReg)
2383     return false;
2384 
2385   // If the ISD::CopyToReg has a glue operand, we conservatively assume it
2386   // isn't safe to perform a tail call.
2387   if (Copy->getGluedNode())
2388     return false;
2389 
2390   // The copy must be used by a LoongArchISD::RET, and nothing else.
2391   bool HasRet = false;
2392   for (SDNode *Node : Copy->uses()) {
2393     if (Node->getOpcode() != LoongArchISD::RET)
2394       return false;
2395     HasRet = true;
2396   }
2397 
2398   if (!HasRet)
2399     return false;
2400 
2401   Chain = Copy->getOperand(0);
2402   return true;
2403 }
2404 
2405 // Check whether the call is eligible for tail call optimization.
2406 bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
2407     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
2408     const SmallVectorImpl<CCValAssign> &ArgLocs) const {
2409 
2410   auto CalleeCC = CLI.CallConv;
2411   auto &Outs = CLI.Outs;
2412   auto &Caller = MF.getFunction();
2413   auto CallerCC = Caller.getCallingConv();
2414 
2415   // Do not tail call opt if the stack is used to pass parameters.
2416   if (CCInfo.getStackSize() != 0)
2417     return false;
2418 
2419   // Do not tail call opt if any parameters need to be passed indirectly.
2420   for (auto &VA : ArgLocs)
2421     if (VA.getLocInfo() == CCValAssign::Indirect)
2422       return false;
2423 
2424   // Do not tail call opt if either caller or callee uses struct return
2425   // semantics.
2426   auto IsCallerStructRet = Caller.hasStructRetAttr();
2427   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
2428   if (IsCallerStructRet || IsCalleeStructRet)
2429     return false;
2430 
2431   // Do not tail call opt if either the callee or caller has a byval argument.
2432   for (auto &Arg : Outs)
2433     if (Arg.Flags.isByVal())
2434       return false;
2435 
2436   // The callee has to preserve all registers the caller needs to preserve.
2437   const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
2438   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2439   if (CalleeCC != CallerCC) {
2440     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2441     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2442       return false;
2443   }
2444   return true;
2445 }
2446 
2447 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
2448   return DAG.getDataLayout().getPrefTypeAlign(
2449       VT.getTypeForEVT(*DAG.getContext()));
2450 }
2451 
2452 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
2453 // and output parameter nodes.
2454 SDValue
2455 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
2456                                    SmallVectorImpl<SDValue> &InVals) const {
2457   SelectionDAG &DAG = CLI.DAG;
2458   SDLoc &DL = CLI.DL;
2459   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2460   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2461   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2462   SDValue Chain = CLI.Chain;
2463   SDValue Callee = CLI.Callee;
2464   CallingConv::ID CallConv = CLI.CallConv;
2465   bool IsVarArg = CLI.IsVarArg;
2466   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2467   MVT GRLenVT = Subtarget.getGRLenVT();
2468   bool &IsTailCall = CLI.IsTailCall;
2469 
2470   MachineFunction &MF = DAG.getMachineFunction();
2471 
2472   // Analyze the operands of the call, assigning locations to each operand.
2473   SmallVector<CCValAssign> ArgLocs;
2474   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2475 
2476   if (CallConv == CallingConv::GHC)
2477     ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
2478   else
2479     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
2480 
2481   // Check if it's really possible to do a tail call.
2482   if (IsTailCall)
2483     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
2484 
2485   if (IsTailCall)
2486     ++NumTailCalls;
2487   else if (CLI.CB && CLI.CB->isMustTailCall())
2488     report_fatal_error("failed to perform tail call elimination on a call "
2489                        "site marked musttail");
2490 
2491   // Get a count of how many bytes are to be pushed on the stack.
2492   unsigned NumBytes = ArgCCInfo.getStackSize();
2493 
2494   // Create local copies for byval args.
2495   SmallVector<SDValue> ByValArgs;
2496   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2497     ISD::ArgFlagsTy Flags = Outs[i].Flags;
2498     if (!Flags.isByVal())
2499       continue;
2500 
2501     SDValue Arg = OutVals[i];
2502     unsigned Size = Flags.getByValSize();
2503     Align Alignment = Flags.getNonZeroByValAlign();
2504 
2505     int FI =
2506         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
2507     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2508     SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
2509 
2510     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
2511                           /*IsVolatile=*/false,
2512                           /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
2513                           MachinePointerInfo(), MachinePointerInfo());
2514     ByValArgs.push_back(FIPtr);
2515   }
2516 
2517   if (!IsTailCall)
2518     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
2519 
2520   // Copy argument values to their designated locations.
2521   SmallVector<std::pair<Register, SDValue>> RegsToPass;
2522   SmallVector<SDValue> MemOpChains;
2523   SDValue StackPtr;
2524   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
2525     CCValAssign &VA = ArgLocs[i];
2526     SDValue ArgValue = OutVals[i];
2527     ISD::ArgFlagsTy Flags = Outs[i].Flags;
2528 
2529     // Promote the value if needed.
2530     // For now, only handle fully promoted and indirect arguments.
2531     if (VA.getLocInfo() == CCValAssign::Indirect) {
2532       // Store the argument in a stack slot and pass its address.
2533       Align StackAlign =
2534           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
2535                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
2536       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
2537       // If the original argument was split and passed by reference, we need to
2538       // store the required parts of it here (and pass just one address).
2539       unsigned ArgIndex = Outs[i].OrigArgIndex;
2540       unsigned ArgPartOffset = Outs[i].PartOffset;
2541       assert(ArgPartOffset == 0);
2542       // Calculate the total size to store. We don't have access to what we're
2543       // actually storing other than performing the loop and collecting the
2544       // info.
2545       SmallVector<std::pair<SDValue, SDValue>> Parts;
2546       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
2547         SDValue PartValue = OutVals[i + 1];
2548         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
2549         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
2550         EVT PartVT = PartValue.getValueType();
2551 
2552         StoredSize += PartVT.getStoreSize();
2553         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
2554         Parts.push_back(std::make_pair(PartValue, Offset));
2555         ++i;
2556       }
2557       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
2558       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2559       MemOpChains.push_back(
2560           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2561                        MachinePointerInfo::getFixedStack(MF, FI)));
2562       for (const auto &Part : Parts) {
2563         SDValue PartValue = Part.first;
2564         SDValue PartOffset = Part.second;
2565         SDValue Address =
2566             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
2567         MemOpChains.push_back(
2568             DAG.getStore(Chain, DL, PartValue, Address,
2569                          MachinePointerInfo::getFixedStack(MF, FI)));
2570       }
2571       ArgValue = SpillSlot;
2572     } else {
2573       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
2574     }
2575 
2576     // Use local copy if it is a byval arg.
2577     if (Flags.isByVal())
2578       ArgValue = ByValArgs[j++];
2579 
2580     if (VA.isRegLoc()) {
2581       // Queue up the argument copies and emit them at the end.
2582       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2583     } else {
2584       assert(VA.isMemLoc() && "Argument not register or memory");
2585       assert(!IsTailCall && "Tail call not allowed if stack is used "
2586                             "for passing parameters");
2587 
2588       // Work out the address of the stack slot.
2589       if (!StackPtr.getNode())
2590         StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
2591       SDValue Address =
2592           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2593                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
2594 
2595       // Emit the store.
2596       MemOpChains.push_back(
2597           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2598     }
2599   }
2600 
2601   // Join the stores, which are independent of one another.
2602   if (!MemOpChains.empty())
2603     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2604 
2605   SDValue Glue;
2606 
2607   // Build a sequence of copy-to-reg nodes, chained and glued together.
2608   for (auto &Reg : RegsToPass) {
2609     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
2610     Glue = Chain.getValue(1);
2611   }
2612 
2613   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
2614   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
2615   // split it and then direct call can be matched by PseudoCALL.
2616   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
2617     const GlobalValue *GV = S->getGlobal();
2618     unsigned OpFlags =
2619         getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)
2620             ? LoongArchII::MO_CALL
2621             : LoongArchII::MO_CALL_PLT;
2622     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
2623   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2624     unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(
2625                            *MF.getFunction().getParent(), nullptr)
2626                            ? LoongArchII::MO_CALL
2627                            : LoongArchII::MO_CALL_PLT;
2628     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
2629   }
2630 
2631   // The first call operand is the chain and the second is the target address.
2632   SmallVector<SDValue> Ops;
2633   Ops.push_back(Chain);
2634   Ops.push_back(Callee);
2635 
2636   // Add argument registers to the end of the list so that they are
2637   // known live into the call.
2638   for (auto &Reg : RegsToPass)
2639     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2640 
2641   if (!IsTailCall) {
2642     // Add a register mask operand representing the call-preserved registers.
2643     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2644     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2645     assert(Mask && "Missing call preserved mask for calling convention");
2646     Ops.push_back(DAG.getRegisterMask(Mask));
2647   }
2648 
2649   // Glue the call to the argument copies, if any.
2650   if (Glue.getNode())
2651     Ops.push_back(Glue);
2652 
2653   // Emit the call.
2654   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2655 
2656   if (IsTailCall) {
2657     MF.getFrameInfo().setHasTailCall();
2658     SDValue Ret = DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops);
2659     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2660     return Ret;
2661   }
2662 
2663   Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops);
2664   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2665   Glue = Chain.getValue(1);
2666 
2667   // Mark the end of the call, which is glued to the call itself.
2668   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2669   Glue = Chain.getValue(1);
2670 
2671   // Assign locations to each value returned by this call.
2672   SmallVector<CCValAssign> RVLocs;
2673   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
2674   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
2675 
2676   // Copy all of the result registers out of their specified physreg.
2677   for (auto &VA : RVLocs) {
2678     // Copy the value out.
2679     SDValue RetValue =
2680         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
2681     // Glue the RetValue to the end of the call sequence.
2682     Chain = RetValue.getValue(1);
2683     Glue = RetValue.getValue(2);
2684 
2685     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
2686 
2687     InVals.push_back(RetValue);
2688   }
2689 
2690   return Chain;
2691 }
2692 
2693 bool LoongArchTargetLowering::CanLowerReturn(
2694     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2695     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2696   SmallVector<CCValAssign> RVLocs;
2697   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
2698 
2699   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2700     LoongArchABI::ABI ABI =
2701         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
2702     if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
2703                      Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
2704                      nullptr))
2705       return false;
2706   }
2707   return true;
2708 }
2709 
2710 SDValue LoongArchTargetLowering::LowerReturn(
2711     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2712     const SmallVectorImpl<ISD::OutputArg> &Outs,
2713     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
2714     SelectionDAG &DAG) const {
2715   // Stores the assignment of the return value to a location.
2716   SmallVector<CCValAssign> RVLocs;
2717 
2718   // Info about the registers and stack slot.
2719   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
2720                  *DAG.getContext());
2721 
2722   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
2723                     nullptr, CC_LoongArch);
2724   if (CallConv == CallingConv::GHC && !RVLocs.empty())
2725     report_fatal_error("GHC functions return void only");
2726   SDValue Glue;
2727   SmallVector<SDValue, 4> RetOps(1, Chain);
2728 
2729   // Copy the result values into the output registers.
2730   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
2731     CCValAssign &VA = RVLocs[i];
2732     assert(VA.isRegLoc() && "Can only return in registers!");
2733 
2734     // Handle a 'normal' return.
2735     SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
2736     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
2737 
2738     // Guarantee that all emitted copies are stuck together.
2739     Glue = Chain.getValue(1);
2740     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2741   }
2742 
2743   RetOps[0] = Chain; // Update chain.
2744 
2745   // Add the glue node if we have it.
2746   if (Glue.getNode())
2747     RetOps.push_back(Glue);
2748 
2749   return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
2750 }
2751 
2752 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
2753                                            bool ForCodeSize) const {
2754   // TODO: Maybe need more checks here after vector extension is supported.
2755   if (VT == MVT::f32 && !Subtarget.hasBasicF())
2756     return false;
2757   if (VT == MVT::f64 && !Subtarget.hasBasicD())
2758     return false;
2759   return (Imm.isZero() || Imm.isExactlyValue(+1.0));
2760 }
2761 
2762 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
2763   return true;
2764 }
2765 
2766 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
2767   return true;
2768 }
2769 
2770 bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
2771     const Instruction *I) const {
2772   if (!Subtarget.is64Bit())
2773     return isa<LoadInst>(I) || isa<StoreInst>(I);
2774 
2775   if (isa<LoadInst>(I))
2776     return true;
2777 
2778   // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
2779   // require fences beacuse we can use amswap_db.[w/d].
2780   if (isa<StoreInst>(I)) {
2781     unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
2782     return (Size == 8 || Size == 16);
2783   }
2784 
2785   return false;
2786 }
2787 
2788 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
2789                                                 LLVMContext &Context,
2790                                                 EVT VT) const {
2791   if (!VT.isVector())
2792     return getPointerTy(DL);
2793   return VT.changeVectorElementTypeToInteger();
2794 }
2795 
2796 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
2797   // TODO: Support vectors.
2798   return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
2799 }
2800 
2801 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
2802                                                  const CallInst &I,
2803                                                  MachineFunction &MF,
2804                                                  unsigned Intrinsic) const {
2805   switch (Intrinsic) {
2806   default:
2807     return false;
2808   case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
2809   case Intrinsic::loongarch_masked_atomicrmw_add_i32:
2810   case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
2811   case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
2812     Info.opc = ISD::INTRINSIC_W_CHAIN;
2813     Info.memVT = MVT::i32;
2814     Info.ptrVal = I.getArgOperand(0);
2815     Info.offset = 0;
2816     Info.align = Align(4);
2817     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
2818                  MachineMemOperand::MOVolatile;
2819     return true;
2820     // TODO: Add more Intrinsics later.
2821   }
2822 }
2823 
2824 TargetLowering::AtomicExpansionKind
2825 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2826   // TODO: Add more AtomicRMWInst that needs to be extended.
2827 
2828   // Since floating-point operation requires a non-trivial set of data
2829   // operations, use CmpXChg to expand.
2830   if (AI->isFloatingPointOperation() ||
2831       AI->getOperation() == AtomicRMWInst::UIncWrap ||
2832       AI->getOperation() == AtomicRMWInst::UDecWrap)
2833     return AtomicExpansionKind::CmpXChg;
2834 
2835   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
2836   if (Size == 8 || Size == 16)
2837     return AtomicExpansionKind::MaskedIntrinsic;
2838   return AtomicExpansionKind::None;
2839 }
2840 
2841 static Intrinsic::ID
2842 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
2843                                     AtomicRMWInst::BinOp BinOp) {
2844   if (GRLen == 64) {
2845     switch (BinOp) {
2846     default:
2847       llvm_unreachable("Unexpected AtomicRMW BinOp");
2848     case AtomicRMWInst::Xchg:
2849       return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
2850     case AtomicRMWInst::Add:
2851       return Intrinsic::loongarch_masked_atomicrmw_add_i64;
2852     case AtomicRMWInst::Sub:
2853       return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
2854     case AtomicRMWInst::Nand:
2855       return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
2856     case AtomicRMWInst::UMax:
2857       return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
2858     case AtomicRMWInst::UMin:
2859       return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
2860     case AtomicRMWInst::Max:
2861       return Intrinsic::loongarch_masked_atomicrmw_max_i64;
2862     case AtomicRMWInst::Min:
2863       return Intrinsic::loongarch_masked_atomicrmw_min_i64;
2864       // TODO: support other AtomicRMWInst.
2865     }
2866   }
2867 
2868   if (GRLen == 32) {
2869     switch (BinOp) {
2870     default:
2871       llvm_unreachable("Unexpected AtomicRMW BinOp");
2872     case AtomicRMWInst::Xchg:
2873       return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
2874     case AtomicRMWInst::Add:
2875       return Intrinsic::loongarch_masked_atomicrmw_add_i32;
2876     case AtomicRMWInst::Sub:
2877       return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
2878     case AtomicRMWInst::Nand:
2879       return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
2880       // TODO: support other AtomicRMWInst.
2881     }
2882   }
2883 
2884   llvm_unreachable("Unexpected GRLen\n");
2885 }
2886 
2887 TargetLowering::AtomicExpansionKind
2888 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
2889     AtomicCmpXchgInst *CI) const {
2890   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
2891   if (Size == 8 || Size == 16)
2892     return AtomicExpansionKind::MaskedIntrinsic;
2893   return AtomicExpansionKind::None;
2894 }
2895 
2896 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
2897     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
2898     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
2899   Value *Ordering =
2900       Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(Ord));
2901 
2902   // TODO: Support cmpxchg on LA32.
2903   Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
2904   CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
2905   NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
2906   Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2907   Type *Tys[] = {AlignedAddr->getType()};
2908   Function *MaskedCmpXchg =
2909       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
2910   Value *Result = Builder.CreateCall(
2911       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
2912   Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2913   return Result;
2914 }
2915 
2916 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
2917     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
2918     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
2919   unsigned GRLen = Subtarget.getGRLen();
2920   Value *Ordering =
2921       Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
2922   Type *Tys[] = {AlignedAddr->getType()};
2923   Function *LlwOpScwLoop = Intrinsic::getDeclaration(
2924       AI->getModule(),
2925       getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
2926 
2927   if (GRLen == 64) {
2928     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
2929     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2930     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
2931   }
2932 
2933   Value *Result;
2934 
2935   // Must pass the shift amount needed to sign extend the loaded value prior
2936   // to performing a signed comparison for min/max. ShiftAmt is the number of
2937   // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
2938   // is the number of bits to left+right shift the value in order to
2939   // sign-extend.
2940   if (AI->getOperation() == AtomicRMWInst::Min ||
2941       AI->getOperation() == AtomicRMWInst::Max) {
2942     const DataLayout &DL = AI->getModule()->getDataLayout();
2943     unsigned ValWidth =
2944         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
2945     Value *SextShamt =
2946         Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
2947     Result = Builder.CreateCall(LlwOpScwLoop,
2948                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
2949   } else {
2950     Result =
2951         Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
2952   }
2953 
2954   if (GRLen == 64)
2955     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2956   return Result;
2957 }
2958 
2959 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
2960     const MachineFunction &MF, EVT VT) const {
2961   VT = VT.getScalarType();
2962 
2963   if (!VT.isSimple())
2964     return false;
2965 
2966   switch (VT.getSimpleVT().SimpleTy) {
2967   case MVT::f32:
2968   case MVT::f64:
2969     return true;
2970   default:
2971     break;
2972   }
2973 
2974   return false;
2975 }
2976 
2977 Register LoongArchTargetLowering::getExceptionPointerRegister(
2978     const Constant *PersonalityFn) const {
2979   return LoongArch::R4;
2980 }
2981 
2982 Register LoongArchTargetLowering::getExceptionSelectorRegister(
2983     const Constant *PersonalityFn) const {
2984   return LoongArch::R5;
2985 }
2986 
2987 //===----------------------------------------------------------------------===//
2988 //                           LoongArch Inline Assembly Support
2989 //===----------------------------------------------------------------------===//
2990 
2991 LoongArchTargetLowering::ConstraintType
2992 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
2993   // LoongArch specific constraints in GCC: config/loongarch/constraints.md
2994   //
2995   // 'f':  A floating-point register (if available).
2996   // 'k':  A memory operand whose address is formed by a base register and
2997   //       (optionally scaled) index register.
2998   // 'l':  A signed 16-bit constant.
2999   // 'm':  A memory operand whose address is formed by a base register and
3000   //       offset that is suitable for use in instructions with the same
3001   //       addressing mode as st.w and ld.w.
3002   // 'I':  A signed 12-bit constant (for arithmetic instructions).
3003   // 'J':  Integer zero.
3004   // 'K':  An unsigned 12-bit constant (for logic instructions).
3005   // "ZB": An address that is held in a general-purpose register. The offset is
3006   //       zero.
3007   // "ZC": A memory operand whose address is formed by a base register and
3008   //       offset that is suitable for use in instructions with the same
3009   //       addressing mode as ll.w and sc.w.
3010   if (Constraint.size() == 1) {
3011     switch (Constraint[0]) {
3012     default:
3013       break;
3014     case 'f':
3015       return C_RegisterClass;
3016     case 'l':
3017     case 'I':
3018     case 'J':
3019     case 'K':
3020       return C_Immediate;
3021     case 'k':
3022       return C_Memory;
3023     }
3024   }
3025 
3026   if (Constraint == "ZC" || Constraint == "ZB")
3027     return C_Memory;
3028 
3029   // 'm' is handled here.
3030   return TargetLowering::getConstraintType(Constraint);
3031 }
3032 
3033 unsigned LoongArchTargetLowering::getInlineAsmMemConstraint(
3034     StringRef ConstraintCode) const {
3035   return StringSwitch<unsigned>(ConstraintCode)
3036       .Case("k", InlineAsm::Constraint_k)
3037       .Case("ZB", InlineAsm::Constraint_ZB)
3038       .Case("ZC", InlineAsm::Constraint_ZC)
3039       .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
3040 }
3041 
3042 std::pair<unsigned, const TargetRegisterClass *>
3043 LoongArchTargetLowering::getRegForInlineAsmConstraint(
3044     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
3045   // First, see if this is a constraint that directly corresponds to a LoongArch
3046   // register class.
3047   if (Constraint.size() == 1) {
3048     switch (Constraint[0]) {
3049     case 'r':
3050       // TODO: Support fixed vectors up to GRLen?
3051       if (VT.isVector())
3052         break;
3053       return std::make_pair(0U, &LoongArch::GPRRegClass);
3054     case 'f':
3055       if (Subtarget.hasBasicF() && VT == MVT::f32)
3056         return std::make_pair(0U, &LoongArch::FPR32RegClass);
3057       if (Subtarget.hasBasicD() && VT == MVT::f64)
3058         return std::make_pair(0U, &LoongArch::FPR64RegClass);
3059       if (Subtarget.hasExtLSX() &&
3060           TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
3061         return std::make_pair(0U, &LoongArch::LSX128RegClass);
3062       if (Subtarget.hasExtLASX() &&
3063           TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
3064         return std::make_pair(0U, &LoongArch::LASX256RegClass);
3065       break;
3066     default:
3067       break;
3068     }
3069   }
3070 
3071   // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
3072   // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
3073   // constraints while the official register name is prefixed with a '$'. So we
3074   // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
3075   // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
3076   // case insensitive, so no need to convert the constraint to upper case here.
3077   //
3078   // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
3079   // decode the usage of register name aliases into their official names. And
3080   // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
3081   // official register names.
3082   if (Constraint.startswith("{$r") || Constraint.startswith("{$f") ||
3083       Constraint.startswith("{$vr") || Constraint.startswith("{$xr")) {
3084     bool IsFP = Constraint[2] == 'f';
3085     std::pair<StringRef, StringRef> Temp = Constraint.split('$');
3086     std::pair<unsigned, const TargetRegisterClass *> R;
3087     R = TargetLowering::getRegForInlineAsmConstraint(
3088         TRI, join_items("", Temp.first, Temp.second), VT);
3089     // Match those names to the widest floating point register type available.
3090     if (IsFP) {
3091       unsigned RegNo = R.first;
3092       if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
3093         if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
3094           unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
3095           return std::make_pair(DReg, &LoongArch::FPR64RegClass);
3096         }
3097       }
3098     }
3099     return R;
3100   }
3101 
3102   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3103 }
3104 
3105 void LoongArchTargetLowering::LowerAsmOperandForConstraint(
3106     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
3107     SelectionDAG &DAG) const {
3108   // Currently only support length 1 constraints.
3109   if (Constraint.length() == 1) {
3110     switch (Constraint[0]) {
3111     case 'l':
3112       // Validate & create a 16-bit signed immediate operand.
3113       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3114         uint64_t CVal = C->getSExtValue();
3115         if (isInt<16>(CVal))
3116           Ops.push_back(
3117               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
3118       }
3119       return;
3120     case 'I':
3121       // Validate & create a 12-bit signed immediate operand.
3122       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3123         uint64_t CVal = C->getSExtValue();
3124         if (isInt<12>(CVal))
3125           Ops.push_back(
3126               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
3127       }
3128       return;
3129     case 'J':
3130       // Validate & create an integer zero operand.
3131       if (auto *C = dyn_cast<ConstantSDNode>(Op))
3132         if (C->getZExtValue() == 0)
3133           Ops.push_back(
3134               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
3135       return;
3136     case 'K':
3137       // Validate & create a 12-bit unsigned immediate operand.
3138       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3139         uint64_t CVal = C->getZExtValue();
3140         if (isUInt<12>(CVal))
3141           Ops.push_back(
3142               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
3143       }
3144       return;
3145     default:
3146       break;
3147     }
3148   }
3149   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
3150 }
3151 
3152 #define GET_REGISTER_MATCHER
3153 #include "LoongArchGenAsmMatcher.inc"
3154 
3155 Register
3156 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
3157                                            const MachineFunction &MF) const {
3158   std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
3159   std::string NewRegName = Name.second.str();
3160   Register Reg = MatchRegisterAltName(NewRegName);
3161   if (Reg == LoongArch::NoRegister)
3162     Reg = MatchRegisterName(NewRegName);
3163   if (Reg == LoongArch::NoRegister)
3164     report_fatal_error(
3165         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
3166   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
3167   if (!ReservedRegs.test(Reg))
3168     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
3169                              StringRef(RegName) + "\"."));
3170   return Reg;
3171 }
3172 
3173 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
3174                                                      EVT VT, SDValue C) const {
3175   // TODO: Support vectors.
3176   if (!VT.isScalarInteger())
3177     return false;
3178 
3179   // Omit the optimization if the data size exceeds GRLen.
3180   if (VT.getSizeInBits() > Subtarget.getGRLen())
3181     return false;
3182 
3183   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
3184     const APInt &Imm = ConstNode->getAPIntValue();
3185     // Break MUL into (SLLI + ADD/SUB) or ALSL.
3186     if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
3187         (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
3188       return true;
3189     // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
3190     if (ConstNode->hasOneUse() &&
3191         ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
3192          (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
3193       return true;
3194     // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
3195     // in which the immediate has two set bits. Or Break (MUL x, imm)
3196     // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
3197     // equals to (1 << s0) - (1 << s1).
3198     if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
3199       unsigned Shifts = Imm.countr_zero();
3200       // Reject immediates which can be composed via a single LUI.
3201       if (Shifts >= 12)
3202         return false;
3203       // Reject multiplications can be optimized to
3204       // (SLLI (ALSL x, x, 1/2/3/4), s).
3205       APInt ImmPop = Imm.ashr(Shifts);
3206       if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
3207         return false;
3208       // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
3209       // since it needs one more instruction than other 3 cases.
3210       APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
3211       if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
3212           (ImmSmall - Imm).isPowerOf2())
3213         return true;
3214     }
3215   }
3216 
3217   return false;
3218 }
3219 
3220 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
3221                                                     const AddrMode &AM,
3222                                                     Type *Ty, unsigned AS,
3223                                                     Instruction *I) const {
3224   // LoongArch has four basic addressing modes:
3225   //  1. reg
3226   //  2. reg + 12-bit signed offset
3227   //  3. reg + 14-bit signed offset left-shifted by 2
3228   //  4. reg1 + reg2
3229   // TODO: Add more checks after support vector extension.
3230 
3231   // No global is ever allowed as a base.
3232   if (AM.BaseGV)
3233     return false;
3234 
3235   // Require a 12 or 14 bit signed offset.
3236   if (!isInt<12>(AM.BaseOffs) || !isShiftedInt<14, 2>(AM.BaseOffs))
3237     return false;
3238 
3239   switch (AM.Scale) {
3240   case 0:
3241     // "i" is not allowed.
3242     if (!AM.HasBaseReg)
3243       return false;
3244     // Otherwise we have "r+i".
3245     break;
3246   case 1:
3247     // "r+r+i" is not allowed.
3248     if (AM.HasBaseReg && AM.BaseOffs != 0)
3249       return false;
3250     // Otherwise we have "r+r" or "r+i".
3251     break;
3252   case 2:
3253     // "2*r+r" or "2*r+i" is not allowed.
3254     if (AM.HasBaseReg || AM.BaseOffs)
3255       return false;
3256     // Otherwise we have "r+r".
3257     break;
3258   default:
3259     return false;
3260   }
3261 
3262   return true;
3263 }
3264 
3265 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3266   return isInt<12>(Imm);
3267 }
3268 
3269 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
3270   return isInt<12>(Imm);
3271 }
3272 
3273 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
3274   // Zexts are free if they can be combined with a load.
3275   // Don't advertise i32->i64 zextload as being free for LA64. It interacts
3276   // poorly with type legalization of compares preferring sext.
3277   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
3278     EVT MemVT = LD->getMemoryVT();
3279     if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
3280         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
3281          LD->getExtensionType() == ISD::ZEXTLOAD))
3282       return true;
3283   }
3284 
3285   return TargetLowering::isZExtFree(Val, VT2);
3286 }
3287 
3288 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
3289   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
3290 }
3291 
3292 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
3293   // TODO: Support vectors.
3294   if (Y.getValueType().isVector())
3295     return false;
3296 
3297   return !isa<ConstantSDNode>(Y);
3298 }
3299