1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that LoongArch uses to lower LLVM code into 10 // a selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LoongArchISelLowering.h" 15 #include "LoongArch.h" 16 #include "LoongArchMachineFunctionInfo.h" 17 #include "LoongArchRegisterInfo.h" 18 #include "LoongArchSubtarget.h" 19 #include "LoongArchTargetMachine.h" 20 #include "MCTargetDesc/LoongArchBaseInfo.h" 21 #include "MCTargetDesc/LoongArchMCTargetDesc.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/ISDOpcodes.h" 24 #include "llvm/CodeGen/RuntimeLibcalls.h" 25 #include "llvm/IR/IRBuilder.h" 26 #include "llvm/IR/IntrinsicsLoongArch.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/KnownBits.h" 29 #include "llvm/Support/MathExtras.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "loongarch-isel-lowering" 34 35 STATISTIC(NumTailCalls, "Number of tail calls"); 36 37 static cl::opt<bool> ZeroDivCheck( 38 "loongarch-check-zero-division", cl::Hidden, 39 cl::desc("Trap on integer division by zero."), 40 cl::init(false)); 41 42 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, 43 const LoongArchSubtarget &STI) 44 : TargetLowering(TM), Subtarget(STI) { 45 46 MVT GRLenVT = Subtarget.getGRLenVT(); 47 // Set up the register classes. 48 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); 49 if (Subtarget.hasBasicF()) 50 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); 51 if (Subtarget.hasBasicD()) 52 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); 53 54 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, 55 MVT::i1, Promote); 56 57 // TODO: add necessary setOperationAction calls later. 58 setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); 59 setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); 60 setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); 61 setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); 62 setOperationAction(ISD::ROTL, GRLenVT, Expand); 63 setOperationAction(ISD::CTPOP, GRLenVT, Expand); 64 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 65 setOperationAction(ISD::TRAP, MVT::Other, Legal); 66 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 67 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 68 69 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, 70 ISD::JumpTable}, 71 GRLenVT, Custom); 72 73 setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom); 74 75 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 76 77 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 78 if (Subtarget.is64Bit()) 79 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); 80 81 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); 82 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); 83 setOperationAction(ISD::VASTART, MVT::Other, Custom); 84 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); 85 86 if (Subtarget.is64Bit()) { 87 setOperationAction(ISD::SHL, MVT::i32, Custom); 88 setOperationAction(ISD::SRA, MVT::i32, Custom); 89 setOperationAction(ISD::SRL, MVT::i32, Custom); 90 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 91 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 92 setOperationAction(ISD::ROTR, MVT::i32, Custom); 93 setOperationAction(ISD::ROTL, MVT::i32, Custom); 94 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 95 setOperationAction(ISD::CTLZ, MVT::i32, Custom); 96 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); 97 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 98 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 99 setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); 100 setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); 101 if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) 102 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 103 if (Subtarget.hasBasicF()) 104 setOperationAction(ISD::FRINT, MVT::f32, Legal); 105 if (Subtarget.hasBasicD()) 106 setOperationAction(ISD::FRINT, MVT::f64, Legal); 107 } 108 109 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and 110 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 111 // and i32 could still be byte-swapped relatively cheaply. 112 setOperationAction(ISD::BSWAP, MVT::i16, Custom); 113 if (Subtarget.is64Bit()) { 114 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 115 } 116 117 // Expand bitreverse.i16 with native-width bitrev and shift for now, before 118 // we get to know which of sll and revb.2h is faster. 119 setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); 120 if (Subtarget.is64Bit()) { 121 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 122 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); 123 } else { 124 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); 125 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 126 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); 127 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); 128 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 129 setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); 130 } 131 132 static const ISD::CondCode FPCCToExpand[] = { 133 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, 134 ISD::SETGE, ISD::SETNE, ISD::SETGT}; 135 136 if (Subtarget.hasBasicF()) { 137 setCondCodeAction(FPCCToExpand, MVT::f32, Expand); 138 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 139 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 140 setOperationAction(ISD::FMA, MVT::f32, Legal); 141 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); 142 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); 143 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); 144 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); 145 setOperationAction(ISD::FSIN, MVT::f32, Expand); 146 setOperationAction(ISD::FCOS, MVT::f32, Expand); 147 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 148 setOperationAction(ISD::FPOW, MVT::f32, Expand); 149 setOperationAction(ISD::FREM, MVT::f32, Expand); 150 } 151 if (Subtarget.hasBasicD()) { 152 setCondCodeAction(FPCCToExpand, MVT::f64, Expand); 153 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 154 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 155 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); 156 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); 157 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 158 setOperationAction(ISD::FMA, MVT::f64, Legal); 159 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); 160 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); 161 setOperationAction(ISD::FSIN, MVT::f64, Expand); 162 setOperationAction(ISD::FCOS, MVT::f64, Expand); 163 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 164 setOperationAction(ISD::FPOW, MVT::f64, Expand); 165 setOperationAction(ISD::FREM, MVT::f64, Expand); 166 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 167 } 168 169 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 170 171 setOperationAction(ISD::BR_CC, GRLenVT, Expand); 172 setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); 173 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 174 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); 175 if (!Subtarget.is64Bit()) 176 setLibcallName(RTLIB::MUL_I128, nullptr); 177 178 setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); 179 setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); 180 if ((Subtarget.is64Bit() && Subtarget.hasBasicF() && 181 !Subtarget.hasBasicD())) { 182 setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom); 183 setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); 184 } 185 186 // Compute derived properties from the register classes. 187 computeRegisterProperties(STI.getRegisterInfo()); 188 189 setStackPointerRegisterToSaveRestore(LoongArch::R3); 190 191 setBooleanContents(ZeroOrOneBooleanContent); 192 193 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); 194 195 setMinCmpXchgSizeInBits(32); 196 197 // Function alignments. 198 const Align FunctionAlignment(4); 199 setMinFunctionAlignment(FunctionAlignment); 200 201 setTargetDAGCombine(ISD::AND); 202 setTargetDAGCombine(ISD::OR); 203 setTargetDAGCombine(ISD::SRL); 204 } 205 206 bool LoongArchTargetLowering::isOffsetFoldingLegal( 207 const GlobalAddressSDNode *GA) const { 208 // In order to maximise the opportunity for common subexpression elimination, 209 // keep a separate ADD node for the global address offset instead of folding 210 // it in the global address node. Later peephole optimisations may choose to 211 // fold it back in when profitable. 212 return false; 213 } 214 215 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, 216 SelectionDAG &DAG) const { 217 switch (Op.getOpcode()) { 218 case ISD::EH_DWARF_CFA: 219 return lowerEH_DWARF_CFA(Op, DAG); 220 case ISD::GlobalAddress: 221 return lowerGlobalAddress(Op, DAG); 222 case ISD::GlobalTLSAddress: 223 return lowerGlobalTLSAddress(Op, DAG); 224 case ISD::INTRINSIC_WO_CHAIN: 225 return lowerINTRINSIC_WO_CHAIN(Op, DAG); 226 case ISD::INTRINSIC_W_CHAIN: 227 return lowerINTRINSIC_W_CHAIN(Op, DAG); 228 case ISD::INTRINSIC_VOID: 229 return lowerINTRINSIC_VOID(Op, DAG); 230 case ISD::BlockAddress: 231 return lowerBlockAddress(Op, DAG); 232 case ISD::JumpTable: 233 return lowerJumpTable(Op, DAG); 234 case ISD::SHL_PARTS: 235 return lowerShiftLeftParts(Op, DAG); 236 case ISD::SRA_PARTS: 237 return lowerShiftRightParts(Op, DAG, true); 238 case ISD::SRL_PARTS: 239 return lowerShiftRightParts(Op, DAG, false); 240 case ISD::ConstantPool: 241 return lowerConstantPool(Op, DAG); 242 case ISD::FP_TO_SINT: 243 return lowerFP_TO_SINT(Op, DAG); 244 case ISD::BITCAST: 245 return lowerBITCAST(Op, DAG); 246 case ISD::UINT_TO_FP: 247 return lowerUINT_TO_FP(Op, DAG); 248 case ISD::SINT_TO_FP: 249 return lowerSINT_TO_FP(Op, DAG); 250 case ISD::VASTART: 251 return lowerVASTART(Op, DAG); 252 case ISD::FRAMEADDR: 253 return lowerFRAMEADDR(Op, DAG); 254 case ISD::RETURNADDR: 255 return lowerRETURNADDR(Op, DAG); 256 case ISD::WRITE_REGISTER: 257 return lowerWRITE_REGISTER(Op, DAG); 258 } 259 return SDValue(); 260 } 261 262 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, 263 SelectionDAG &DAG) const { 264 265 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) { 266 DAG.getContext()->emitError( 267 "On LA64, only 64-bit registers can be written."); 268 return Op.getOperand(0); 269 } 270 271 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) { 272 DAG.getContext()->emitError( 273 "On LA32, only 32-bit registers can be written."); 274 return Op.getOperand(0); 275 } 276 277 return Op; 278 } 279 280 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op, 281 SelectionDAG &DAG) const { 282 if (!isa<ConstantSDNode>(Op.getOperand(0))) { 283 DAG.getContext()->emitError("argument to '__builtin_frame_address' must " 284 "be a constant integer"); 285 return SDValue(); 286 } 287 288 MachineFunction &MF = DAG.getMachineFunction(); 289 MF.getFrameInfo().setFrameAddressIsTaken(true); 290 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF); 291 EVT VT = Op.getValueType(); 292 SDLoc DL(Op); 293 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 294 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 295 int GRLenInBytes = Subtarget.getGRLen() / 8; 296 297 while (Depth--) { 298 int Offset = -(GRLenInBytes * 2); 299 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 300 DAG.getIntPtrConstant(Offset, DL)); 301 FrameAddr = 302 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 303 } 304 return FrameAddr; 305 } 306 307 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, 308 SelectionDAG &DAG) const { 309 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 310 return SDValue(); 311 312 // Currently only support lowering return address for current frame. 313 if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) { 314 DAG.getContext()->emitError( 315 "return address can only be determined for the current frame"); 316 return SDValue(); 317 } 318 319 MachineFunction &MF = DAG.getMachineFunction(); 320 MF.getFrameInfo().setReturnAddressIsTaken(true); 321 MVT GRLenVT = Subtarget.getGRLenVT(); 322 323 // Return the value of the return address register, marking it an implicit 324 // live-in. 325 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(), 326 getRegClassFor(GRLenVT)); 327 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT); 328 } 329 330 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, 331 SelectionDAG &DAG) const { 332 MachineFunction &MF = DAG.getMachineFunction(); 333 auto Size = Subtarget.getGRLen() / 8; 334 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false); 335 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 336 } 337 338 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, 339 SelectionDAG &DAG) const { 340 MachineFunction &MF = DAG.getMachineFunction(); 341 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>(); 342 343 SDLoc DL(Op); 344 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 345 getPointerTy(MF.getDataLayout())); 346 347 // vastart just stores the address of the VarArgsFrameIndex slot into the 348 // memory location argument. 349 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 350 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 351 MachinePointerInfo(SV)); 352 } 353 354 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, 355 SelectionDAG &DAG) const { 356 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && 357 !Subtarget.hasBasicD() && "unexpected target features"); 358 359 SDLoc DL(Op); 360 SDValue Op0 = Op.getOperand(0); 361 if (Op0->getOpcode() == ISD::AND) { 362 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); 363 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF)) 364 return Op; 365 } 366 367 if (Op0->getOpcode() == LoongArchISD::BSTRPICK && 368 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) && 369 Op0.getConstantOperandVal(2) == UINT64_C(0)) 370 return Op; 371 372 if (Op0.getOpcode() == ISD::AssertZext && 373 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32)) 374 return Op; 375 376 EVT OpVT = Op0.getValueType(); 377 EVT RetVT = Op.getValueType(); 378 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT); 379 MakeLibCallOptions CallOptions; 380 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); 381 SDValue Chain = SDValue(); 382 SDValue Result; 383 std::tie(Result, Chain) = 384 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); 385 return Result; 386 } 387 388 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, 389 SelectionDAG &DAG) const { 390 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && 391 !Subtarget.hasBasicD() && "unexpected target features"); 392 393 SDLoc DL(Op); 394 SDValue Op0 = Op.getOperand(0); 395 396 if ((Op0.getOpcode() == ISD::AssertSext || 397 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) && 398 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32)) 399 return Op; 400 401 EVT OpVT = Op0.getValueType(); 402 EVT RetVT = Op.getValueType(); 403 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT); 404 MakeLibCallOptions CallOptions; 405 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); 406 SDValue Chain = SDValue(); 407 SDValue Result; 408 std::tie(Result, Chain) = 409 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); 410 return Result; 411 } 412 413 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, 414 SelectionDAG &DAG) const { 415 416 SDLoc DL(Op); 417 SDValue Op0 = Op.getOperand(0); 418 419 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && 420 Subtarget.is64Bit() && Subtarget.hasBasicF()) { 421 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 422 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0); 423 } 424 return Op; 425 } 426 427 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, 428 SelectionDAG &DAG) const { 429 430 SDLoc DL(Op); 431 432 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && 433 !Subtarget.hasBasicD()) { 434 SDValue Dst = 435 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0)); 436 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst); 437 } 438 439 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); 440 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0)); 441 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc); 442 } 443 444 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 445 SelectionDAG &DAG, unsigned Flags) { 446 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 447 } 448 449 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 450 SelectionDAG &DAG, unsigned Flags) { 451 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 452 Flags); 453 } 454 455 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 456 SelectionDAG &DAG, unsigned Flags) { 457 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 458 N->getOffset(), Flags); 459 } 460 461 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 462 SelectionDAG &DAG, unsigned Flags) { 463 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 464 } 465 466 template <class NodeTy> 467 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 468 bool IsLocal) const { 469 SDLoc DL(N); 470 EVT Ty = getPointerTy(DAG.getDataLayout()); 471 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 472 // TODO: Check CodeModel. 473 if (IsLocal) 474 // This generates the pattern (PseudoLA_PCREL sym), which expands to 475 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). 476 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 477 0); 478 479 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d 480 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). 481 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0); 482 } 483 484 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, 485 SelectionDAG &DAG) const { 486 return getAddr(cast<BlockAddressSDNode>(Op), DAG); 487 } 488 489 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op, 490 SelectionDAG &DAG) const { 491 return getAddr(cast<JumpTableSDNode>(Op), DAG); 492 } 493 494 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, 495 SelectionDAG &DAG) const { 496 return getAddr(cast<ConstantPoolSDNode>(Op), DAG); 497 } 498 499 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, 500 SelectionDAG &DAG) const { 501 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 502 assert(N->getOffset() == 0 && "unexpected offset in global node"); 503 return getAddr(N, DAG, N->getGlobal()->isDSOLocal()); 504 } 505 506 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 507 SelectionDAG &DAG, 508 unsigned Opc) const { 509 SDLoc DL(N); 510 EVT Ty = getPointerTy(DAG.getDataLayout()); 511 MVT GRLenVT = Subtarget.getGRLenVT(); 512 513 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); 514 SDValue Offset = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 515 516 // Add the thread pointer. 517 return DAG.getNode(ISD::ADD, DL, Ty, Offset, 518 DAG.getRegister(LoongArch::R2, GRLenVT)); 519 } 520 521 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 522 SelectionDAG &DAG, 523 unsigned Opc) const { 524 SDLoc DL(N); 525 EVT Ty = getPointerTy(DAG.getDataLayout()); 526 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 527 528 // Use a PC-relative addressing mode to access the dynamic GOT address. 529 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); 530 SDValue Load = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 531 532 // Prepare argument list to generate call. 533 ArgListTy Args; 534 ArgListEntry Entry; 535 Entry.Node = Load; 536 Entry.Ty = CallTy; 537 Args.push_back(Entry); 538 539 // Setup call to __tls_get_addr. 540 TargetLowering::CallLoweringInfo CLI(DAG); 541 CLI.setDebugLoc(DL) 542 .setChain(DAG.getEntryNode()) 543 .setLibCallee(CallingConv::C, CallTy, 544 DAG.getExternalSymbol("__tls_get_addr", Ty), 545 std::move(Args)); 546 547 return LowerCallTo(CLI).first; 548 } 549 550 SDValue 551 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, 552 SelectionDAG &DAG) const { 553 if (DAG.getMachineFunction().getFunction().getCallingConv() == 554 CallingConv::GHC) 555 report_fatal_error("In GHC calling convention TLS is not supported"); 556 557 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 558 assert(N->getOffset() == 0 && "unexpected offset in global node"); 559 560 SDValue Addr; 561 switch (getTargetMachine().getTLSModel(N->getGlobal())) { 562 case TLSModel::GeneralDynamic: 563 // In this model, application code calls the dynamic linker function 564 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at 565 // runtime. 566 Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_GD); 567 break; 568 case TLSModel::LocalDynamic: 569 // Same as GeneralDynamic, except for assembly modifiers and relocation 570 // records. 571 Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LD); 572 break; 573 case TLSModel::InitialExec: 574 // This model uses the GOT to resolve TLS offsets. 575 Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_IE); 576 break; 577 case TLSModel::LocalExec: 578 // This model is used when static linking as the TLS offsets are resolved 579 // during program linking. 580 Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE); 581 break; 582 } 583 584 return Addr; 585 } 586 587 SDValue 588 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 589 SelectionDAG &DAG) const { 590 switch (Op.getConstantOperandVal(0)) { 591 default: 592 return SDValue(); // Don't custom lower most intrinsics. 593 case Intrinsic::thread_pointer: { 594 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 595 return DAG.getRegister(LoongArch::R2, PtrVT); 596 } 597 } 598 } 599 600 // Helper function that emits error message for intrinsics with chain. 601 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, 602 StringRef ErrorMsg, 603 SelectionDAG &DAG) { 604 605 DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " + 606 ErrorMsg); 607 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, 608 SDLoc(Op)); 609 } 610 611 SDValue 612 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 613 SelectionDAG &DAG) const { 614 SDLoc DL(Op); 615 MVT GRLenVT = Subtarget.getGRLenVT(); 616 SDValue Op0 = Op.getOperand(0); 617 std::string Name = Op->getOperationName(0); 618 const StringRef ErrorMsgOOR = "out of range"; 619 620 switch (Op.getConstantOperandVal(1)) { 621 default: 622 return Op; 623 case Intrinsic::loongarch_crc_w_b_w: 624 case Intrinsic::loongarch_crc_w_h_w: 625 case Intrinsic::loongarch_crc_w_w_w: 626 case Intrinsic::loongarch_crc_w_d_w: 627 case Intrinsic::loongarch_crcc_w_b_w: 628 case Intrinsic::loongarch_crcc_w_h_w: 629 case Intrinsic::loongarch_crcc_w_w_w: 630 case Intrinsic::loongarch_crcc_w_d_w: { 631 std::string Name = Op->getOperationName(0); 632 DAG.getContext()->emitError(Name + " requires target: loongarch64"); 633 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL); 634 } 635 case Intrinsic::loongarch_csrrd_w: 636 case Intrinsic::loongarch_csrrd_d: { 637 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 638 if (!isUInt<14>(Imm)) 639 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG); 640 return DAG.getMergeValues( 641 {DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0, 642 DAG.getConstant(Imm, DL, GRLenVT)), 643 Op0}, 644 DL); 645 } 646 case Intrinsic::loongarch_csrwr_w: 647 case Intrinsic::loongarch_csrwr_d: { 648 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 649 if (!isUInt<14>(Imm)) 650 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG); 651 return DAG.getMergeValues( 652 {DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0, Op.getOperand(2), 653 DAG.getConstant(Imm, DL, GRLenVT)), 654 Op0}, 655 DL); 656 } 657 case Intrinsic::loongarch_csrxchg_w: 658 case Intrinsic::loongarch_csrxchg_d: { 659 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 660 if (!isUInt<14>(Imm)) 661 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG); 662 return DAG.getMergeValues( 663 {DAG.getNode(LoongArchISD::CSRXCHG, DL, GRLenVT, Op0, Op.getOperand(2), 664 Op.getOperand(3), DAG.getConstant(Imm, DL, GRLenVT)), 665 Op0}, 666 DL); 667 } 668 case Intrinsic::loongarch_iocsrrd_d: { 669 if (Subtarget.is64Bit()) 670 return DAG.getMergeValues( 671 {DAG.getNode( 672 LoongArchISD::IOCSRRD_D, DL, GRLenVT, Op0, 673 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))), 674 Op0}, 675 DL); 676 else { 677 DAG.getContext()->emitError( 678 "llvm.loongarch.crc.w.d.w requires target: loongarch64"); 679 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL); 680 } 681 } 682 #define IOCSRRD_CASE(NAME, NODE) \ 683 case Intrinsic::loongarch_##NAME: { \ 684 return DAG.getMergeValues( \ 685 {DAG.getNode(LoongArchISD::NODE, DL, GRLenVT, Op0, Op.getOperand(2)), \ 686 Op0}, \ 687 DL); \ 688 } 689 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); 690 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); 691 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); 692 #undef IOCSRRD_CASE 693 case Intrinsic::loongarch_cpucfg: { 694 return DAG.getMergeValues( 695 {DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0, Op.getOperand(2)), 696 Op0}, 697 DL); 698 } 699 case Intrinsic::loongarch_lddir_d: { 700 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 701 if (!isUInt<8>(Imm)) { 702 DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + 703 "' out of range"); 704 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL); 705 } 706 707 return Op; 708 } 709 case Intrinsic::loongarch_movfcsr2gr: { 710 if (!Subtarget.hasBasicF()) { 711 DAG.getContext()->emitError( 712 "llvm.loongarch.movfcsr2gr expects basic f target feature"); 713 return DAG.getMergeValues( 714 {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op)); 715 } 716 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 717 if (!isUInt<2>(Imm)) { 718 DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + 719 "' " + ErrorMsgOOR); 720 return DAG.getMergeValues( 721 {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op)); 722 } 723 return DAG.getMergeValues( 724 {DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, Op.getValueType(), 725 DAG.getConstant(Imm, DL, GRLenVT)), 726 Op.getOperand(0)}, 727 DL); 728 } 729 } 730 } 731 732 // Helper function that emits error message for intrinsics with void return 733 // value. 734 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, 735 SelectionDAG &DAG) { 736 737 DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " + 738 ErrorMsg); 739 return Op.getOperand(0); 740 } 741 742 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, 743 SelectionDAG &DAG) const { 744 SDLoc DL(Op); 745 MVT GRLenVT = Subtarget.getGRLenVT(); 746 SDValue Op0 = Op.getOperand(0); 747 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1); 748 SDValue Op2 = Op.getOperand(2); 749 const StringRef ErrorMsgOOR = "out of range"; 750 751 switch (IntrinsicEnum) { 752 default: 753 // TODO: Add more Intrinsics. 754 return SDValue(); 755 case Intrinsic::loongarch_cacop_d: 756 case Intrinsic::loongarch_cacop_w: { 757 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) { 758 DAG.getContext()->emitError( 759 "llvm.loongarch.cacop.d requires target: loongarch64"); 760 return Op.getOperand(0); 761 } 762 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) { 763 DAG.getContext()->emitError( 764 "llvm.loongarch.cacop.w requires target: loongarch32"); 765 return Op.getOperand(0); 766 } 767 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) 768 unsigned Imm1 = cast<ConstantSDNode>(Op2)->getZExtValue(); 769 if (!isUInt<5>(Imm1)) 770 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); 771 SDValue Op4 = Op.getOperand(4); 772 int Imm2 = cast<ConstantSDNode>(Op4)->getSExtValue(); 773 if (!isInt<12>(Imm2)) 774 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); 775 776 return Op; 777 } 778 779 case Intrinsic::loongarch_dbar: { 780 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); 781 if (!isUInt<15>(Imm)) 782 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); 783 784 return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op0, 785 DAG.getConstant(Imm, DL, GRLenVT)); 786 } 787 case Intrinsic::loongarch_ibar: { 788 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); 789 if (!isUInt<15>(Imm)) 790 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); 791 792 return DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Op0, 793 DAG.getConstant(Imm, DL, GRLenVT)); 794 } 795 case Intrinsic::loongarch_break: { 796 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); 797 if (!isUInt<15>(Imm)) 798 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); 799 800 return DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Op0, 801 DAG.getConstant(Imm, DL, GRLenVT)); 802 } 803 case Intrinsic::loongarch_movgr2fcsr: { 804 if (!Subtarget.hasBasicF()) { 805 DAG.getContext()->emitError( 806 "llvm.loongarch.movgr2fcsr expects basic f target feature"); 807 return Op0; 808 } 809 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); 810 if (!isUInt<2>(Imm)) 811 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); 812 813 return DAG.getNode( 814 LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Op0, 815 DAG.getConstant(Imm, DL, GRLenVT), 816 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Op.getOperand(3))); 817 } 818 case Intrinsic::loongarch_syscall: { 819 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); 820 if (!isUInt<15>(Imm)) 821 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); 822 823 return DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Op0, 824 DAG.getConstant(Imm, DL, GRLenVT)); 825 } 826 #define IOCSRWR_CASE(NAME, NODE) \ 827 case Intrinsic::loongarch_##NAME: { \ 828 SDValue Op3 = Op.getOperand(3); \ 829 if (Subtarget.is64Bit()) \ 830 return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, \ 831 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ 832 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)); \ 833 else \ 834 return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, Op2, Op3); \ 835 } 836 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B); 837 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H); 838 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W); 839 #undef IOCSRWR_CASE 840 case Intrinsic::loongarch_iocsrwr_d: { 841 if (Subtarget.is64Bit()) 842 return DAG.getNode( 843 LoongArchISD::IOCSRWR_D, DL, MVT::Other, Op0, Op2, 844 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(3))); 845 else { 846 DAG.getContext()->emitError( 847 "llvm.loongarch.iocsrwr.d requires target: loongarch64"); 848 return Op.getOperand(0); 849 } 850 } 851 #define ASRT_LE_GT_CASE(NAME) \ 852 case Intrinsic::loongarch_##NAME: { \ 853 if (!Subtarget.is64Bit()) { \ 854 DAG.getContext()->emitError(Op->getOperationName(0) + \ 855 " requires target: loongarch64"); \ 856 return Op.getOperand(0); \ 857 } \ 858 return Op; \ 859 } 860 ASRT_LE_GT_CASE(asrtle_d) 861 ASRT_LE_GT_CASE(asrtgt_d) 862 #undef ASRT_LE_GT_CASE 863 case Intrinsic::loongarch_ldpte_d: { 864 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 865 if (!isUInt<8>(Imm)) 866 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); 867 if (!Subtarget.is64Bit()) { 868 DAG.getContext()->emitError(Op->getOperationName(0) + 869 " requires target: loongarch64"); 870 return Op.getOperand(0); 871 } 872 return Op; 873 } 874 } 875 } 876 877 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, 878 SelectionDAG &DAG) const { 879 SDLoc DL(Op); 880 SDValue Lo = Op.getOperand(0); 881 SDValue Hi = Op.getOperand(1); 882 SDValue Shamt = Op.getOperand(2); 883 EVT VT = Lo.getValueType(); 884 885 // if Shamt-GRLen < 0: // Shamt < GRLen 886 // Lo = Lo << Shamt 887 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt)) 888 // else: 889 // Lo = 0 890 // Hi = Lo << (Shamt-GRLen) 891 892 SDValue Zero = DAG.getConstant(0, DL, VT); 893 SDValue One = DAG.getConstant(1, DL, VT); 894 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); 895 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); 896 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); 897 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); 898 899 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 900 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 901 SDValue ShiftRightLo = 902 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt); 903 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 904 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 905 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen); 906 907 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); 908 909 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 910 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 911 912 SDValue Parts[2] = {Lo, Hi}; 913 return DAG.getMergeValues(Parts, DL); 914 } 915 916 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, 917 SelectionDAG &DAG, 918 bool IsSRA) const { 919 SDLoc DL(Op); 920 SDValue Lo = Op.getOperand(0); 921 SDValue Hi = Op.getOperand(1); 922 SDValue Shamt = Op.getOperand(2); 923 EVT VT = Lo.getValueType(); 924 925 // SRA expansion: 926 // if Shamt-GRLen < 0: // Shamt < GRLen 927 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) 928 // Hi = Hi >>s Shamt 929 // else: 930 // Lo = Hi >>s (Shamt-GRLen); 931 // Hi = Hi >>s (GRLen-1) 932 // 933 // SRL expansion: 934 // if Shamt-GRLen < 0: // Shamt < GRLen 935 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) 936 // Hi = Hi >>u Shamt 937 // else: 938 // Lo = Hi >>u (Shamt-GRLen); 939 // Hi = 0; 940 941 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 942 943 SDValue Zero = DAG.getConstant(0, DL, VT); 944 SDValue One = DAG.getConstant(1, DL, VT); 945 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); 946 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); 947 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); 948 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); 949 950 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 951 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 952 SDValue ShiftLeftHi = 953 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt); 954 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 955 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 956 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen); 957 SDValue HiFalse = 958 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero; 959 960 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); 961 962 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 963 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 964 965 SDValue Parts[2] = {Lo, Hi}; 966 return DAG.getMergeValues(Parts, DL); 967 } 968 969 // Returns the opcode of the target-specific SDNode that implements the 32-bit 970 // form of the given Opcode. 971 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { 972 switch (Opcode) { 973 default: 974 llvm_unreachable("Unexpected opcode"); 975 case ISD::SHL: 976 return LoongArchISD::SLL_W; 977 case ISD::SRA: 978 return LoongArchISD::SRA_W; 979 case ISD::SRL: 980 return LoongArchISD::SRL_W; 981 case ISD::ROTR: 982 return LoongArchISD::ROTR_W; 983 case ISD::ROTL: 984 return LoongArchISD::ROTL_W; 985 case ISD::CTTZ: 986 return LoongArchISD::CTZ_W; 987 case ISD::CTLZ: 988 return LoongArchISD::CLZ_W; 989 } 990 } 991 992 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG 993 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would 994 // otherwise be promoted to i64, making it difficult to select the 995 // SLL_W/.../*W later one because the fact the operation was originally of 996 // type i8/i16/i32 is lost. 997 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, 998 unsigned ExtOpc = ISD::ANY_EXTEND) { 999 SDLoc DL(N); 1000 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); 1001 SDValue NewOp0, NewRes; 1002 1003 switch (NumOp) { 1004 default: 1005 llvm_unreachable("Unexpected NumOp"); 1006 case 1: { 1007 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 1008 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0); 1009 break; 1010 } 1011 case 2: { 1012 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 1013 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 1014 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 1015 break; 1016 } 1017 // TODO:Handle more NumOp. 1018 } 1019 1020 // ReplaceNodeResults requires we maintain the same type for the return 1021 // value. 1022 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 1023 } 1024 1025 void LoongArchTargetLowering::ReplaceNodeResults( 1026 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 1027 SDLoc DL(N); 1028 EVT VT = N->getValueType(0); 1029 switch (N->getOpcode()) { 1030 default: 1031 llvm_unreachable("Don't know how to legalize this operation"); 1032 case ISD::SHL: 1033 case ISD::SRA: 1034 case ISD::SRL: 1035 case ISD::ROTR: 1036 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1037 "Unexpected custom legalisation"); 1038 if (N->getOperand(1).getOpcode() != ISD::Constant) { 1039 Results.push_back(customLegalizeToWOp(N, DAG, 2)); 1040 break; 1041 } 1042 break; 1043 case ISD::ROTL: 1044 ConstantSDNode *CN; 1045 if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) { 1046 Results.push_back(customLegalizeToWOp(N, DAG, 2)); 1047 break; 1048 } 1049 break; 1050 case ISD::FP_TO_SINT: { 1051 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1052 "Unexpected custom legalisation"); 1053 SDValue Src = N->getOperand(0); 1054 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); 1055 if (getTypeAction(*DAG.getContext(), Src.getValueType()) != 1056 TargetLowering::TypeSoftenFloat) { 1057 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src); 1058 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst)); 1059 return; 1060 } 1061 // If the FP type needs to be softened, emit a library call using the 'si' 1062 // version. If we left it to default legalization we'd end up with 'di'. 1063 RTLIB::Libcall LC; 1064 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT); 1065 MakeLibCallOptions CallOptions; 1066 EVT OpVT = Src.getValueType(); 1067 CallOptions.setTypeListBeforeSoften(OpVT, VT, true); 1068 SDValue Chain = SDValue(); 1069 SDValue Result; 1070 std::tie(Result, Chain) = 1071 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain); 1072 Results.push_back(Result); 1073 break; 1074 } 1075 case ISD::BITCAST: { 1076 SDValue Src = N->getOperand(0); 1077 EVT SrcVT = Src.getValueType(); 1078 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && 1079 Subtarget.hasBasicF()) { 1080 SDValue Dst = 1081 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src); 1082 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst)); 1083 } 1084 break; 1085 } 1086 case ISD::FP_TO_UINT: { 1087 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1088 "Unexpected custom legalisation"); 1089 auto &TLI = DAG.getTargetLoweringInfo(); 1090 SDValue Tmp1, Tmp2; 1091 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG); 1092 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1)); 1093 break; 1094 } 1095 case ISD::BSWAP: { 1096 SDValue Src = N->getOperand(0); 1097 assert((VT == MVT::i16 || VT == MVT::i32) && 1098 "Unexpected custom legalization"); 1099 MVT GRLenVT = Subtarget.getGRLenVT(); 1100 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); 1101 SDValue Tmp; 1102 switch (VT.getSizeInBits()) { 1103 default: 1104 llvm_unreachable("Unexpected operand width"); 1105 case 16: 1106 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc); 1107 break; 1108 case 32: 1109 // Only LA64 will get to here due to the size mismatch between VT and 1110 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo. 1111 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc); 1112 break; 1113 } 1114 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); 1115 break; 1116 } 1117 case ISD::BITREVERSE: { 1118 SDValue Src = N->getOperand(0); 1119 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) && 1120 "Unexpected custom legalization"); 1121 MVT GRLenVT = Subtarget.getGRLenVT(); 1122 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); 1123 SDValue Tmp; 1124 switch (VT.getSizeInBits()) { 1125 default: 1126 llvm_unreachable("Unexpected operand width"); 1127 case 8: 1128 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc); 1129 break; 1130 case 32: 1131 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc); 1132 break; 1133 } 1134 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); 1135 break; 1136 } 1137 case ISD::CTLZ: 1138 case ISD::CTTZ: { 1139 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1140 "Unexpected custom legalisation"); 1141 Results.push_back(customLegalizeToWOp(N, DAG, 1)); 1142 break; 1143 } 1144 case ISD::INTRINSIC_W_CHAIN: { 1145 SDValue Op0 = N->getOperand(0); 1146 EVT VT = N->getValueType(0); 1147 uint64_t Op1 = N->getConstantOperandVal(1); 1148 MVT GRLenVT = Subtarget.getGRLenVT(); 1149 if (Op1 == Intrinsic::loongarch_movfcsr2gr) { 1150 if (!Subtarget.hasBasicF()) { 1151 DAG.getContext()->emitError( 1152 "llvm.loongarch.movfcsr2gr expects basic f target feature"); 1153 Results.push_back(DAG.getMergeValues( 1154 {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N))); 1155 Results.push_back(N->getOperand(0)); 1156 return; 1157 } 1158 unsigned Imm = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 1159 if (!isUInt<2>(Imm)) { 1160 DAG.getContext()->emitError("argument to '" + N->getOperationName(0) + 1161 "' " + "out of range"); 1162 Results.push_back(DAG.getMergeValues( 1163 {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N))); 1164 Results.push_back(N->getOperand(0)); 1165 return; 1166 } 1167 Results.push_back( 1168 DAG.getNode(ISD::TRUNCATE, DL, VT, 1169 DAG.getNode(LoongArchISD::MOVFCSR2GR, SDLoc(N), MVT::i64, 1170 DAG.getConstant(Imm, DL, GRLenVT)))); 1171 Results.push_back(N->getOperand(0)); 1172 return; 1173 } 1174 SDValue Op2 = N->getOperand(2); 1175 std::string Name = N->getOperationName(0); 1176 1177 switch (Op1) { 1178 default: 1179 llvm_unreachable("Unexpected Intrinsic."); 1180 #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \ 1181 case Intrinsic::loongarch_##NAME: { \ 1182 Results.push_back(DAG.getNode( \ 1183 ISD::TRUNCATE, DL, VT, \ 1184 DAG.getNode( \ 1185 LoongArchISD::NODE, DL, MVT::i64, \ 1186 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ 1187 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))))); \ 1188 Results.push_back(N->getOperand(0)); \ 1189 break; \ 1190 } 1191 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W) 1192 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W) 1193 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W) 1194 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W) 1195 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W) 1196 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W) 1197 #undef CRC_CASE_EXT_BINARYOP 1198 1199 #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \ 1200 case Intrinsic::loongarch_##NAME: { \ 1201 Results.push_back( \ 1202 DAG.getNode(ISD::TRUNCATE, DL, VT, \ 1203 DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op2, \ 1204 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, \ 1205 N->getOperand(3))))); \ 1206 Results.push_back(N->getOperand(0)); \ 1207 break; \ 1208 } 1209 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) 1210 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W) 1211 #undef CRC_CASE_EXT_UNARYOP 1212 #define CSR_CASE(ID) \ 1213 case Intrinsic::loongarch_##ID: { \ 1214 if (!Subtarget.is64Bit()) { \ 1215 DAG.getContext()->emitError(Name + " requires target: loongarch64"); \ 1216 Results.push_back(DAG.getUNDEF(VT)); \ 1217 Results.push_back(N->getOperand(0)); \ 1218 } \ 1219 break; \ 1220 } 1221 CSR_CASE(csrrd_d); 1222 CSR_CASE(csrwr_d); 1223 CSR_CASE(csrxchg_d); 1224 CSR_CASE(iocsrrd_d); 1225 #undef CSR_CASE 1226 case Intrinsic::loongarch_csrrd_w: { 1227 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); 1228 if (!isUInt<14>(Imm)) { 1229 DAG.getContext()->emitError("argument to '" + Name + "' out of range"); 1230 Results.push_back(DAG.getUNDEF(VT)); 1231 Results.push_back(N->getOperand(0)); 1232 break; 1233 } 1234 1235 Results.push_back( 1236 DAG.getNode(ISD::TRUNCATE, DL, VT, 1237 DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0, 1238 DAG.getConstant(Imm, DL, GRLenVT)))); 1239 Results.push_back(N->getOperand(0)); 1240 break; 1241 } 1242 case Intrinsic::loongarch_csrwr_w: { 1243 unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 1244 if (!isUInt<14>(Imm)) { 1245 DAG.getContext()->emitError("argument to '" + Name + "' out of range"); 1246 Results.push_back(DAG.getUNDEF(VT)); 1247 Results.push_back(N->getOperand(0)); 1248 break; 1249 } 1250 1251 Results.push_back(DAG.getNode( 1252 ISD::TRUNCATE, DL, VT, 1253 DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0, 1254 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), 1255 DAG.getConstant(Imm, DL, GRLenVT)))); 1256 Results.push_back(N->getOperand(0)); 1257 break; 1258 } 1259 case Intrinsic::loongarch_csrxchg_w: { 1260 unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); 1261 if (!isUInt<14>(Imm)) { 1262 DAG.getContext()->emitError("argument to '" + Name + "' out of range"); 1263 Results.push_back(DAG.getUNDEF(VT)); 1264 Results.push_back(N->getOperand(0)); 1265 break; 1266 } 1267 1268 Results.push_back(DAG.getNode( 1269 ISD::TRUNCATE, DL, VT, 1270 DAG.getNode( 1271 LoongArchISD::CSRXCHG, DL, GRLenVT, Op0, 1272 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), 1273 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)), 1274 DAG.getConstant(Imm, DL, GRLenVT)))); 1275 Results.push_back(N->getOperand(0)); 1276 break; 1277 } 1278 #define IOCSRRD_CASE(NAME, NODE) \ 1279 case Intrinsic::loongarch_##NAME: { \ 1280 Results.push_back(DAG.getNode( \ 1281 ISD::TRUNCATE, DL, N->getValueType(0), \ 1282 DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op0, \ 1283 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)))); \ 1284 Results.push_back(N->getOperand(0)); \ 1285 break; \ 1286 } 1287 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); 1288 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); 1289 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); 1290 #undef IOCSRRD_CASE 1291 case Intrinsic::loongarch_cpucfg: { 1292 Results.push_back(DAG.getNode( 1293 ISD::TRUNCATE, DL, VT, 1294 DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0, 1295 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)))); 1296 Results.push_back(Op0); 1297 break; 1298 } 1299 case Intrinsic::loongarch_lddir_d: { 1300 if (!Subtarget.is64Bit()) { 1301 DAG.getContext()->emitError(N->getOperationName(0) + 1302 " requires target: loongarch64"); 1303 Results.push_back(DAG.getUNDEF(VT)); 1304 Results.push_back(Op0); 1305 break; 1306 } 1307 break; 1308 } 1309 } 1310 break; 1311 } 1312 case ISD::READ_REGISTER: { 1313 if (Subtarget.is64Bit()) 1314 DAG.getContext()->emitError( 1315 "On LA64, only 64-bit registers can be read."); 1316 else 1317 DAG.getContext()->emitError( 1318 "On LA32, only 32-bit registers can be read."); 1319 Results.push_back(DAG.getUNDEF(VT)); 1320 Results.push_back(N->getOperand(0)); 1321 break; 1322 } 1323 } 1324 } 1325 1326 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 1327 TargetLowering::DAGCombinerInfo &DCI, 1328 const LoongArchSubtarget &Subtarget) { 1329 if (DCI.isBeforeLegalizeOps()) 1330 return SDValue(); 1331 1332 SDValue FirstOperand = N->getOperand(0); 1333 SDValue SecondOperand = N->getOperand(1); 1334 unsigned FirstOperandOpc = FirstOperand.getOpcode(); 1335 EVT ValTy = N->getValueType(0); 1336 SDLoc DL(N); 1337 uint64_t lsb, msb; 1338 unsigned SMIdx, SMLen; 1339 ConstantSDNode *CN; 1340 SDValue NewOperand; 1341 MVT GRLenVT = Subtarget.getGRLenVT(); 1342 1343 // Op's second operand must be a shifted mask. 1344 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) || 1345 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen)) 1346 return SDValue(); 1347 1348 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { 1349 // Pattern match BSTRPICK. 1350 // $dst = and ((sra or srl) $src , lsb), (2**len - 1) 1351 // => BSTRPICK $dst, $src, msb, lsb 1352 // where msb = lsb + len - 1 1353 1354 // The second operand of the shift must be an immediate. 1355 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1)))) 1356 return SDValue(); 1357 1358 lsb = CN->getZExtValue(); 1359 1360 // Return if the shifted mask does not start at bit 0 or the sum of its 1361 // length and lsb exceeds the word's size. 1362 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) 1363 return SDValue(); 1364 1365 NewOperand = FirstOperand.getOperand(0); 1366 } else { 1367 // Pattern match BSTRPICK. 1368 // $dst = and $src, (2**len- 1) , if len > 12 1369 // => BSTRPICK $dst, $src, msb, lsb 1370 // where lsb = 0 and msb = len - 1 1371 1372 // If the mask is <= 0xfff, andi can be used instead. 1373 if (CN->getZExtValue() <= 0xfff) 1374 return SDValue(); 1375 1376 // Return if the mask doesn't start at position 0. 1377 if (SMIdx) 1378 return SDValue(); 1379 1380 lsb = 0; 1381 NewOperand = FirstOperand; 1382 } 1383 msb = lsb + SMLen - 1; 1384 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand, 1385 DAG.getConstant(msb, DL, GRLenVT), 1386 DAG.getConstant(lsb, DL, GRLenVT)); 1387 } 1388 1389 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 1390 TargetLowering::DAGCombinerInfo &DCI, 1391 const LoongArchSubtarget &Subtarget) { 1392 if (DCI.isBeforeLegalizeOps()) 1393 return SDValue(); 1394 1395 // $dst = srl (and $src, Mask), Shamt 1396 // => 1397 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt 1398 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1 1399 // 1400 1401 SDValue FirstOperand = N->getOperand(0); 1402 ConstantSDNode *CN; 1403 EVT ValTy = N->getValueType(0); 1404 SDLoc DL(N); 1405 MVT GRLenVT = Subtarget.getGRLenVT(); 1406 unsigned MaskIdx, MaskLen; 1407 uint64_t Shamt; 1408 1409 // The first operand must be an AND and the second operand of the AND must be 1410 // a shifted mask. 1411 if (FirstOperand.getOpcode() != ISD::AND || 1412 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) || 1413 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen)) 1414 return SDValue(); 1415 1416 // The second operand (shift amount) must be an immediate. 1417 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) 1418 return SDValue(); 1419 1420 Shamt = CN->getZExtValue(); 1421 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1) 1422 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, 1423 FirstOperand->getOperand(0), 1424 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 1425 DAG.getConstant(Shamt, DL, GRLenVT)); 1426 1427 return SDValue(); 1428 } 1429 1430 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 1431 TargetLowering::DAGCombinerInfo &DCI, 1432 const LoongArchSubtarget &Subtarget) { 1433 MVT GRLenVT = Subtarget.getGRLenVT(); 1434 EVT ValTy = N->getValueType(0); 1435 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 1436 ConstantSDNode *CN0, *CN1; 1437 SDLoc DL(N); 1438 unsigned ValBits = ValTy.getSizeInBits(); 1439 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; 1440 unsigned Shamt; 1441 bool SwapAndRetried = false; 1442 1443 if (DCI.isBeforeLegalizeOps()) 1444 return SDValue(); 1445 1446 if (ValBits != 32 && ValBits != 64) 1447 return SDValue(); 1448 1449 Retry: 1450 // 1st pattern to match BSTRINS: 1451 // R = or (and X, mask0), (and (shl Y, lsb), mask1) 1452 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 1453 // => 1454 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) 1455 if (N0.getOpcode() == ISD::AND && 1456 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 1457 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 1458 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL && 1459 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 1460 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && 1461 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && 1462 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 1463 (Shamt = CN1->getZExtValue()) == MaskIdx0 && 1464 (MaskIdx0 + MaskLen0 <= ValBits)) { 1465 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n"); 1466 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 1467 N1.getOperand(0).getOperand(0), 1468 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 1469 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 1470 } 1471 1472 // 2nd pattern to match BSTRINS: 1473 // R = or (and X, mask0), (shl (and Y, mask1), lsb) 1474 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) 1475 // => 1476 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) 1477 if (N0.getOpcode() == ISD::AND && 1478 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 1479 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 1480 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && 1481 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 1482 (Shamt = CN1->getZExtValue()) == MaskIdx0 && 1483 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 1484 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && 1485 MaskLen0 == MaskLen1 && MaskIdx1 == 0 && 1486 (MaskIdx0 + MaskLen0 <= ValBits)) { 1487 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n"); 1488 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 1489 N1.getOperand(0).getOperand(0), 1490 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 1491 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 1492 } 1493 1494 // 3rd pattern to match BSTRINS: 1495 // R = or (and X, mask0), (and Y, mask1) 1496 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 1497 // => 1498 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb 1499 // where msb = lsb + size - 1 1500 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && 1501 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 1502 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 1503 (MaskIdx0 + MaskLen0 <= 64) && 1504 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) && 1505 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { 1506 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n"); 1507 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 1508 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1, 1509 DAG.getConstant(MaskIdx0, DL, GRLenVT)), 1510 DAG.getConstant(ValBits == 32 1511 ? (MaskIdx0 + (MaskLen0 & 31) - 1) 1512 : (MaskIdx0 + MaskLen0 - 1), 1513 DL, GRLenVT), 1514 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 1515 } 1516 1517 // 4th pattern to match BSTRINS: 1518 // R = or (and X, mask), (shl Y, shamt) 1519 // where mask = (2**shamt - 1) 1520 // => 1521 // R = BSTRINS X, Y, ValBits - 1, shamt 1522 // where ValBits = 32 or 64 1523 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && 1524 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 1525 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) && 1526 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 1527 (Shamt = CN1->getZExtValue()) == MaskLen0 && 1528 (MaskIdx0 + MaskLen0 <= ValBits)) { 1529 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n"); 1530 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 1531 N1.getOperand(0), 1532 DAG.getConstant((ValBits - 1), DL, GRLenVT), 1533 DAG.getConstant(Shamt, DL, GRLenVT)); 1534 } 1535 1536 // 5th pattern to match BSTRINS: 1537 // R = or (and X, mask), const 1538 // where ~mask = (2**size - 1) << lsb, mask & const = 0 1539 // => 1540 // R = BSTRINS X, (const >> lsb), msb, lsb 1541 // where msb = lsb + size - 1 1542 if (N0.getOpcode() == ISD::AND && 1543 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 1544 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 1545 (CN1 = dyn_cast<ConstantSDNode>(N1)) && 1546 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { 1547 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n"); 1548 return DAG.getNode( 1549 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 1550 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), 1551 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 1552 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 1553 } 1554 1555 // 6th pattern. 1556 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten 1557 // by the incoming bits are known to be zero. 1558 // => 1559 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt 1560 // 1561 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th 1562 // pattern is more common than the 1st. So we put the 1st before the 6th in 1563 // order to match as many nodes as possible. 1564 ConstantSDNode *CNMask, *CNShamt; 1565 unsigned MaskIdx, MaskLen; 1566 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && 1567 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 1568 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && 1569 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 1570 CNShamt->getZExtValue() + MaskLen <= ValBits) { 1571 Shamt = CNShamt->getZExtValue(); 1572 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); 1573 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 1574 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n"); 1575 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 1576 N1.getOperand(0).getOperand(0), 1577 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT), 1578 DAG.getConstant(Shamt, DL, GRLenVT)); 1579 } 1580 } 1581 1582 // 7th pattern. 1583 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be 1584 // overwritten by the incoming bits are known to be zero. 1585 // => 1586 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx 1587 // 1588 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd 1589 // before the 7th in order to match as many nodes as possible. 1590 if (N1.getOpcode() == ISD::AND && 1591 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 1592 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && 1593 N1.getOperand(0).getOpcode() == ISD::SHL && 1594 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 1595 CNShamt->getZExtValue() == MaskIdx) { 1596 APInt ShMask(ValBits, CNMask->getZExtValue()); 1597 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 1598 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n"); 1599 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 1600 N1.getOperand(0).getOperand(0), 1601 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 1602 DAG.getConstant(MaskIdx, DL, GRLenVT)); 1603 } 1604 } 1605 1606 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. 1607 if (!SwapAndRetried) { 1608 std::swap(N0, N1); 1609 SwapAndRetried = true; 1610 goto Retry; 1611 } 1612 1613 SwapAndRetried = false; 1614 Retry2: 1615 // 8th pattern. 1616 // a = b | (c & shifted_mask), where all positions in b to be overwritten by 1617 // the incoming bits are known to be zero. 1618 // => 1619 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx 1620 // 1621 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So 1622 // we put it here in order to match as many nodes as possible or generate less 1623 // instructions. 1624 if (N1.getOpcode() == ISD::AND && 1625 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 1626 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) { 1627 APInt ShMask(ValBits, CNMask->getZExtValue()); 1628 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 1629 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n"); 1630 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 1631 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), 1632 N1->getOperand(0), 1633 DAG.getConstant(MaskIdx, DL, GRLenVT)), 1634 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 1635 DAG.getConstant(MaskIdx, DL, GRLenVT)); 1636 } 1637 } 1638 // Swap N0/N1 and retry. 1639 if (!SwapAndRetried) { 1640 std::swap(N0, N1); 1641 SwapAndRetried = true; 1642 goto Retry2; 1643 } 1644 1645 return SDValue(); 1646 } 1647 1648 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. 1649 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, 1650 TargetLowering::DAGCombinerInfo &DCI, 1651 const LoongArchSubtarget &Subtarget) { 1652 if (DCI.isBeforeLegalizeOps()) 1653 return SDValue(); 1654 1655 SDValue Src = N->getOperand(0); 1656 if (Src.getOpcode() != LoongArchISD::REVB_2W) 1657 return SDValue(); 1658 1659 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0), 1660 Src.getOperand(0)); 1661 } 1662 1663 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, 1664 DAGCombinerInfo &DCI) const { 1665 SelectionDAG &DAG = DCI.DAG; 1666 switch (N->getOpcode()) { 1667 default: 1668 break; 1669 case ISD::AND: 1670 return performANDCombine(N, DAG, DCI, Subtarget); 1671 case ISD::OR: 1672 return performORCombine(N, DAG, DCI, Subtarget); 1673 case ISD::SRL: 1674 return performSRLCombine(N, DAG, DCI, Subtarget); 1675 case LoongArchISD::BITREV_W: 1676 return performBITREV_WCombine(N, DAG, DCI, Subtarget); 1677 } 1678 return SDValue(); 1679 } 1680 1681 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, 1682 MachineBasicBlock *MBB) { 1683 if (!ZeroDivCheck) 1684 return MBB; 1685 1686 // Build instructions: 1687 // MBB: 1688 // div(or mod) $dst, $dividend, $divisor 1689 // bnez $divisor, SinkMBB 1690 // BreakMBB: 1691 // break 7 // BRK_DIVZERO 1692 // SinkMBB: 1693 // fallthrough 1694 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 1695 MachineFunction::iterator It = ++MBB->getIterator(); 1696 MachineFunction *MF = MBB->getParent(); 1697 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB); 1698 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); 1699 MF->insert(It, BreakMBB); 1700 MF->insert(It, SinkMBB); 1701 1702 // Transfer the remainder of MBB and its successor edges to SinkMBB. 1703 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end()); 1704 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 1705 1706 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); 1707 DebugLoc DL = MI.getDebugLoc(); 1708 MachineOperand &Divisor = MI.getOperand(2); 1709 Register DivisorReg = Divisor.getReg(); 1710 1711 // MBB: 1712 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ)) 1713 .addReg(DivisorReg, getKillRegState(Divisor.isKill())) 1714 .addMBB(SinkMBB); 1715 MBB->addSuccessor(BreakMBB); 1716 MBB->addSuccessor(SinkMBB); 1717 1718 // BreakMBB: 1719 // See linux header file arch/loongarch/include/uapi/asm/break.h for the 1720 // definition of BRK_DIVZERO. 1721 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/); 1722 BreakMBB->addSuccessor(SinkMBB); 1723 1724 // Clear Divisor's kill flag. 1725 Divisor.setIsKill(false); 1726 1727 return SinkMBB; 1728 } 1729 1730 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( 1731 MachineInstr &MI, MachineBasicBlock *BB) const { 1732 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 1733 DebugLoc DL = MI.getDebugLoc(); 1734 1735 switch (MI.getOpcode()) { 1736 default: 1737 llvm_unreachable("Unexpected instr type to insert"); 1738 case LoongArch::DIV_W: 1739 case LoongArch::DIV_WU: 1740 case LoongArch::MOD_W: 1741 case LoongArch::MOD_WU: 1742 case LoongArch::DIV_D: 1743 case LoongArch::DIV_DU: 1744 case LoongArch::MOD_D: 1745 case LoongArch::MOD_DU: 1746 return insertDivByZeroTrap(MI, BB); 1747 break; 1748 case LoongArch::WRFCSR: { 1749 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR), 1750 LoongArch::FCSR0 + MI.getOperand(0).getImm()) 1751 .addReg(MI.getOperand(1).getReg()); 1752 MI.eraseFromParent(); 1753 return BB; 1754 } 1755 case LoongArch::RDFCSR: { 1756 MachineInstr *ReadFCSR = 1757 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR), 1758 MI.getOperand(0).getReg()) 1759 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm()); 1760 ReadFCSR->getOperand(1).setIsUndef(); 1761 MI.eraseFromParent(); 1762 return BB; 1763 } 1764 } 1765 } 1766 1767 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { 1768 switch ((LoongArchISD::NodeType)Opcode) { 1769 case LoongArchISD::FIRST_NUMBER: 1770 break; 1771 1772 #define NODE_NAME_CASE(node) \ 1773 case LoongArchISD::node: \ 1774 return "LoongArchISD::" #node; 1775 1776 // TODO: Add more target-dependent nodes later. 1777 NODE_NAME_CASE(CALL) 1778 NODE_NAME_CASE(RET) 1779 NODE_NAME_CASE(TAIL) 1780 NODE_NAME_CASE(SLL_W) 1781 NODE_NAME_CASE(SRA_W) 1782 NODE_NAME_CASE(SRL_W) 1783 NODE_NAME_CASE(BSTRINS) 1784 NODE_NAME_CASE(BSTRPICK) 1785 NODE_NAME_CASE(MOVGR2FR_W_LA64) 1786 NODE_NAME_CASE(MOVFR2GR_S_LA64) 1787 NODE_NAME_CASE(FTINT) 1788 NODE_NAME_CASE(REVB_2H) 1789 NODE_NAME_CASE(REVB_2W) 1790 NODE_NAME_CASE(BITREV_4B) 1791 NODE_NAME_CASE(BITREV_W) 1792 NODE_NAME_CASE(ROTR_W) 1793 NODE_NAME_CASE(ROTL_W) 1794 NODE_NAME_CASE(CLZ_W) 1795 NODE_NAME_CASE(CTZ_W) 1796 NODE_NAME_CASE(DBAR) 1797 NODE_NAME_CASE(IBAR) 1798 NODE_NAME_CASE(BREAK) 1799 NODE_NAME_CASE(SYSCALL) 1800 NODE_NAME_CASE(CRC_W_B_W) 1801 NODE_NAME_CASE(CRC_W_H_W) 1802 NODE_NAME_CASE(CRC_W_W_W) 1803 NODE_NAME_CASE(CRC_W_D_W) 1804 NODE_NAME_CASE(CRCC_W_B_W) 1805 NODE_NAME_CASE(CRCC_W_H_W) 1806 NODE_NAME_CASE(CRCC_W_W_W) 1807 NODE_NAME_CASE(CRCC_W_D_W) 1808 NODE_NAME_CASE(CSRRD) 1809 NODE_NAME_CASE(CSRWR) 1810 NODE_NAME_CASE(CSRXCHG) 1811 NODE_NAME_CASE(IOCSRRD_B) 1812 NODE_NAME_CASE(IOCSRRD_H) 1813 NODE_NAME_CASE(IOCSRRD_W) 1814 NODE_NAME_CASE(IOCSRRD_D) 1815 NODE_NAME_CASE(IOCSRWR_B) 1816 NODE_NAME_CASE(IOCSRWR_H) 1817 NODE_NAME_CASE(IOCSRWR_W) 1818 NODE_NAME_CASE(IOCSRWR_D) 1819 NODE_NAME_CASE(CPUCFG) 1820 NODE_NAME_CASE(MOVGR2FCSR) 1821 NODE_NAME_CASE(MOVFCSR2GR) 1822 NODE_NAME_CASE(CACOP_D) 1823 NODE_NAME_CASE(CACOP_W) 1824 } 1825 #undef NODE_NAME_CASE 1826 return nullptr; 1827 } 1828 1829 //===----------------------------------------------------------------------===// 1830 // Calling Convention Implementation 1831 //===----------------------------------------------------------------------===// 1832 1833 // Eight general-purpose registers a0-a7 used for passing integer arguments, 1834 // with a0-a1 reused to return values. Generally, the GPRs are used to pass 1835 // fixed-point arguments, and floating-point arguments when no FPR is available 1836 // or with soft float ABI. 1837 const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, 1838 LoongArch::R7, LoongArch::R8, LoongArch::R9, 1839 LoongArch::R10, LoongArch::R11}; 1840 // Eight floating-point registers fa0-fa7 used for passing floating-point 1841 // arguments, and fa0-fa1 are also used to return values. 1842 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, 1843 LoongArch::F3, LoongArch::F4, LoongArch::F5, 1844 LoongArch::F6, LoongArch::F7}; 1845 // FPR32 and FPR64 alias each other. 1846 const MCPhysReg ArgFPR64s[] = { 1847 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, 1848 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; 1849 1850 // Pass a 2*GRLen argument that has been split into two GRLen values through 1851 // registers or the stack as necessary. 1852 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, 1853 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, 1854 unsigned ValNo2, MVT ValVT2, MVT LocVT2, 1855 ISD::ArgFlagsTy ArgFlags2) { 1856 unsigned GRLenInBytes = GRLen / 8; 1857 if (Register Reg = State.AllocateReg(ArgGPRs)) { 1858 // At least one half can be passed via register. 1859 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 1860 VA1.getLocVT(), CCValAssign::Full)); 1861 } else { 1862 // Both halves must be passed on the stack, with proper alignment. 1863 Align StackAlign = 1864 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 1865 State.addLoc( 1866 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 1867 State.AllocateStack(GRLenInBytes, StackAlign), 1868 VA1.getLocVT(), CCValAssign::Full)); 1869 State.addLoc(CCValAssign::getMem( 1870 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), 1871 LocVT2, CCValAssign::Full)); 1872 return false; 1873 } 1874 if (Register Reg = State.AllocateReg(ArgGPRs)) { 1875 // The second half can also be passed via register. 1876 State.addLoc( 1877 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 1878 } else { 1879 // The second half is passed via the stack, without additional alignment. 1880 State.addLoc(CCValAssign::getMem( 1881 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), 1882 LocVT2, CCValAssign::Full)); 1883 } 1884 return false; 1885 } 1886 1887 // Implements the LoongArch calling convention. Returns true upon failure. 1888 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, 1889 unsigned ValNo, MVT ValVT, 1890 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 1891 CCState &State, bool IsFixed, bool IsRet, 1892 Type *OrigTy) { 1893 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits(); 1894 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen"); 1895 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64; 1896 MVT LocVT = ValVT; 1897 1898 // Any return value split into more than two values can't be returned 1899 // directly. 1900 if (IsRet && ValNo > 1) 1901 return true; 1902 1903 // If passing a variadic argument, or if no FPR is available. 1904 bool UseGPRForFloat = true; 1905 1906 switch (ABI) { 1907 default: 1908 llvm_unreachable("Unexpected ABI"); 1909 case LoongArchABI::ABI_ILP32S: 1910 case LoongArchABI::ABI_LP64S: 1911 case LoongArchABI::ABI_ILP32F: 1912 case LoongArchABI::ABI_LP64F: 1913 report_fatal_error("Unimplemented ABI"); 1914 break; 1915 case LoongArchABI::ABI_ILP32D: 1916 case LoongArchABI::ABI_LP64D: 1917 UseGPRForFloat = !IsFixed; 1918 break; 1919 } 1920 1921 // FPR32 and FPR64 alias each other. 1922 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) 1923 UseGPRForFloat = true; 1924 1925 if (UseGPRForFloat && ValVT == MVT::f32) { 1926 LocVT = GRLenVT; 1927 LocInfo = CCValAssign::BCvt; 1928 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) { 1929 LocVT = MVT::i64; 1930 LocInfo = CCValAssign::BCvt; 1931 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) { 1932 // TODO: Handle passing f64 on LA32 with D feature. 1933 report_fatal_error("Passing f64 with GPR on LA32 is undefined"); 1934 } 1935 1936 // If this is a variadic argument, the LoongArch calling convention requires 1937 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8 1938 // byte alignment. An aligned register should be used regardless of whether 1939 // the original argument was split during legalisation or not. The argument 1940 // will not be passed by registers if the original type is larger than 1941 // 2*GRLen, so the register alignment rule does not apply. 1942 unsigned TwoGRLenInBytes = (2 * GRLen) / 8; 1943 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes && 1944 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) { 1945 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 1946 // Skip 'odd' register if necessary. 1947 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) 1948 State.AllocateReg(ArgGPRs); 1949 } 1950 1951 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 1952 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 1953 State.getPendingArgFlags(); 1954 1955 assert(PendingLocs.size() == PendingArgFlags.size() && 1956 "PendingLocs and PendingArgFlags out of sync"); 1957 1958 // Split arguments might be passed indirectly, so keep track of the pending 1959 // values. 1960 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 1961 LocVT = GRLenVT; 1962 LocInfo = CCValAssign::Indirect; 1963 PendingLocs.push_back( 1964 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 1965 PendingArgFlags.push_back(ArgFlags); 1966 if (!ArgFlags.isSplitEnd()) { 1967 return false; 1968 } 1969 } 1970 1971 // If the split argument only had two elements, it should be passed directly 1972 // in registers or on the stack. 1973 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && 1974 PendingLocs.size() <= 2) { 1975 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 1976 // Apply the normal calling convention rules to the first half of the 1977 // split argument. 1978 CCValAssign VA = PendingLocs[0]; 1979 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 1980 PendingLocs.clear(); 1981 PendingArgFlags.clear(); 1982 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT, 1983 ArgFlags); 1984 } 1985 1986 // Allocate to a register if possible, or else a stack slot. 1987 Register Reg; 1988 unsigned StoreSizeBytes = GRLen / 8; 1989 Align StackAlign = Align(GRLen / 8); 1990 1991 if (ValVT == MVT::f32 && !UseGPRForFloat) 1992 Reg = State.AllocateReg(ArgFPR32s); 1993 else if (ValVT == MVT::f64 && !UseGPRForFloat) 1994 Reg = State.AllocateReg(ArgFPR64s); 1995 else 1996 Reg = State.AllocateReg(ArgGPRs); 1997 1998 unsigned StackOffset = 1999 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); 2000 2001 // If we reach this point and PendingLocs is non-empty, we must be at the 2002 // end of a split argument that must be passed indirectly. 2003 if (!PendingLocs.empty()) { 2004 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 2005 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 2006 for (auto &It : PendingLocs) { 2007 if (Reg) 2008 It.convertToReg(Reg); 2009 else 2010 It.convertToMem(StackOffset); 2011 State.addLoc(It); 2012 } 2013 PendingLocs.clear(); 2014 PendingArgFlags.clear(); 2015 return false; 2016 } 2017 assert((!UseGPRForFloat || LocVT == GRLenVT) && 2018 "Expected an GRLenVT at this stage"); 2019 2020 if (Reg) { 2021 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2022 return false; 2023 } 2024 2025 // When a floating-point value is passed on the stack, no bit-cast is needed. 2026 if (ValVT.isFloatingPoint()) { 2027 LocVT = ValVT; 2028 LocInfo = CCValAssign::Full; 2029 } 2030 2031 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 2032 return false; 2033 } 2034 2035 void LoongArchTargetLowering::analyzeInputArgs( 2036 MachineFunction &MF, CCState &CCInfo, 2037 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 2038 LoongArchCCAssignFn Fn) const { 2039 FunctionType *FType = MF.getFunction().getFunctionType(); 2040 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 2041 MVT ArgVT = Ins[i].VT; 2042 Type *ArgTy = nullptr; 2043 if (IsRet) 2044 ArgTy = FType->getReturnType(); 2045 else if (Ins[i].isOrigArg()) 2046 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 2047 LoongArchABI::ABI ABI = 2048 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 2049 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags, 2050 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { 2051 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 2052 << EVT(ArgVT).getEVTString() << '\n'); 2053 llvm_unreachable(""); 2054 } 2055 } 2056 } 2057 2058 void LoongArchTargetLowering::analyzeOutputArgs( 2059 MachineFunction &MF, CCState &CCInfo, 2060 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 2061 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const { 2062 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2063 MVT ArgVT = Outs[i].VT; 2064 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 2065 LoongArchABI::ABI ABI = 2066 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 2067 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags, 2068 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 2069 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 2070 << EVT(ArgVT).getEVTString() << "\n"); 2071 llvm_unreachable(""); 2072 } 2073 } 2074 } 2075 2076 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 2077 // values. 2078 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 2079 const CCValAssign &VA, const SDLoc &DL) { 2080 switch (VA.getLocInfo()) { 2081 default: 2082 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 2083 case CCValAssign::Full: 2084 case CCValAssign::Indirect: 2085 break; 2086 case CCValAssign::BCvt: 2087 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 2088 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val); 2089 else 2090 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 2091 break; 2092 } 2093 return Val; 2094 } 2095 2096 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 2097 const CCValAssign &VA, const SDLoc &DL, 2098 const LoongArchTargetLowering &TLI) { 2099 MachineFunction &MF = DAG.getMachineFunction(); 2100 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2101 EVT LocVT = VA.getLocVT(); 2102 SDValue Val; 2103 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 2104 Register VReg = RegInfo.createVirtualRegister(RC); 2105 RegInfo.addLiveIn(VA.getLocReg(), VReg); 2106 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 2107 2108 return convertLocVTToValVT(DAG, Val, VA, DL); 2109 } 2110 2111 // The caller is responsible for loading the full value if the argument is 2112 // passed with CCValAssign::Indirect. 2113 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 2114 const CCValAssign &VA, const SDLoc &DL) { 2115 MachineFunction &MF = DAG.getMachineFunction(); 2116 MachineFrameInfo &MFI = MF.getFrameInfo(); 2117 EVT ValVT = VA.getValVT(); 2118 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), 2119 /*IsImmutable=*/true); 2120 SDValue FIN = DAG.getFrameIndex( 2121 FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0))); 2122 2123 ISD::LoadExtType ExtType; 2124 switch (VA.getLocInfo()) { 2125 default: 2126 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 2127 case CCValAssign::Full: 2128 case CCValAssign::Indirect: 2129 case CCValAssign::BCvt: 2130 ExtType = ISD::NON_EXTLOAD; 2131 break; 2132 } 2133 return DAG.getExtLoad( 2134 ExtType, DL, VA.getLocVT(), Chain, FIN, 2135 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 2136 } 2137 2138 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 2139 const CCValAssign &VA, const SDLoc &DL) { 2140 EVT LocVT = VA.getLocVT(); 2141 2142 switch (VA.getLocInfo()) { 2143 default: 2144 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 2145 case CCValAssign::Full: 2146 break; 2147 case CCValAssign::BCvt: 2148 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 2149 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val); 2150 else 2151 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 2152 break; 2153 } 2154 return Val; 2155 } 2156 2157 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 2158 CCValAssign::LocInfo LocInfo, 2159 ISD::ArgFlagsTy ArgFlags, CCState &State) { 2160 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 2161 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim 2162 // s0 s1 s2 s3 s4 s5 s6 s7 s8 2163 static const MCPhysReg GPRList[] = { 2164 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26, LoongArch::R27, 2165 LoongArch::R28, LoongArch::R29, LoongArch::R30, LoongArch::R31}; 2166 if (unsigned Reg = State.AllocateReg(GPRList)) { 2167 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2168 return false; 2169 } 2170 } 2171 2172 if (LocVT == MVT::f32) { 2173 // Pass in STG registers: F1, F2, F3, F4 2174 // fs0,fs1,fs2,fs3 2175 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, 2176 LoongArch::F26, LoongArch::F27}; 2177 if (unsigned Reg = State.AllocateReg(FPR32List)) { 2178 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2179 return false; 2180 } 2181 } 2182 2183 if (LocVT == MVT::f64) { 2184 // Pass in STG registers: D1, D2, D3, D4 2185 // fs4,fs5,fs6,fs7 2186 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, 2187 LoongArch::F30_64, LoongArch::F31_64}; 2188 if (unsigned Reg = State.AllocateReg(FPR64List)) { 2189 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2190 return false; 2191 } 2192 } 2193 2194 report_fatal_error("No registers left in GHC calling convention"); 2195 return true; 2196 } 2197 2198 // Transform physical registers into virtual registers. 2199 SDValue LoongArchTargetLowering::LowerFormalArguments( 2200 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 2201 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 2202 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2203 2204 MachineFunction &MF = DAG.getMachineFunction(); 2205 2206 switch (CallConv) { 2207 default: 2208 llvm_unreachable("Unsupported calling convention"); 2209 case CallingConv::C: 2210 case CallingConv::Fast: 2211 break; 2212 case CallingConv::GHC: 2213 if (!MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicF] || 2214 !MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicD]) 2215 report_fatal_error( 2216 "GHC calling convention requires the F and D extensions"); 2217 } 2218 2219 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2220 MVT GRLenVT = Subtarget.getGRLenVT(); 2221 unsigned GRLenInBytes = Subtarget.getGRLen() / 8; 2222 // Used with varargs to acumulate store chains. 2223 std::vector<SDValue> OutChains; 2224 2225 // Assign locations to all of the incoming arguments. 2226 SmallVector<CCValAssign> ArgLocs; 2227 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 2228 2229 if (CallConv == CallingConv::GHC) 2230 CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC); 2231 else 2232 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch); 2233 2234 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2235 CCValAssign &VA = ArgLocs[i]; 2236 SDValue ArgValue; 2237 if (VA.isRegLoc()) 2238 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 2239 else 2240 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 2241 if (VA.getLocInfo() == CCValAssign::Indirect) { 2242 // If the original argument was split and passed by reference, we need to 2243 // load all parts of it here (using the same address). 2244 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 2245 MachinePointerInfo())); 2246 unsigned ArgIndex = Ins[i].OrigArgIndex; 2247 unsigned ArgPartOffset = Ins[i].PartOffset; 2248 assert(ArgPartOffset == 0); 2249 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 2250 CCValAssign &PartVA = ArgLocs[i + 1]; 2251 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; 2252 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 2253 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); 2254 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 2255 MachinePointerInfo())); 2256 ++i; 2257 } 2258 continue; 2259 } 2260 InVals.push_back(ArgValue); 2261 } 2262 2263 if (IsVarArg) { 2264 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs); 2265 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 2266 const TargetRegisterClass *RC = &LoongArch::GPRRegClass; 2267 MachineFrameInfo &MFI = MF.getFrameInfo(); 2268 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2269 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>(); 2270 2271 // Offset of the first variable argument from stack pointer, and size of 2272 // the vararg save area. For now, the varargs save area is either zero or 2273 // large enough to hold a0-a7. 2274 int VaArgOffset, VarArgsSaveSize; 2275 2276 // If all registers are allocated, then all varargs must be passed on the 2277 // stack and we don't need to save any argregs. 2278 if (ArgRegs.size() == Idx) { 2279 VaArgOffset = CCInfo.getNextStackOffset(); 2280 VarArgsSaveSize = 0; 2281 } else { 2282 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx); 2283 VaArgOffset = -VarArgsSaveSize; 2284 } 2285 2286 // Record the frame index of the first variable argument 2287 // which is a value necessary to VASTART. 2288 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); 2289 LoongArchFI->setVarArgsFrameIndex(FI); 2290 2291 // If saving an odd number of registers then create an extra stack slot to 2292 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures 2293 // offsets to even-numbered registered remain 2*GRLen-aligned. 2294 if (Idx % 2) { 2295 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes, 2296 true); 2297 VarArgsSaveSize += GRLenInBytes; 2298 } 2299 2300 // Copy the integer registers that may have been used for passing varargs 2301 // to the vararg save area. 2302 for (unsigned I = Idx; I < ArgRegs.size(); 2303 ++I, VaArgOffset += GRLenInBytes) { 2304 const Register Reg = RegInfo.createVirtualRegister(RC); 2305 RegInfo.addLiveIn(ArgRegs[I], Reg); 2306 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT); 2307 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); 2308 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2309 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 2310 MachinePointerInfo::getFixedStack(MF, FI)); 2311 cast<StoreSDNode>(Store.getNode()) 2312 ->getMemOperand() 2313 ->setValue((Value *)nullptr); 2314 OutChains.push_back(Store); 2315 } 2316 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); 2317 } 2318 2319 // All stores are grouped in one node to allow the matching between 2320 // the size of Ins and InVals. This only happens for vararg functions. 2321 if (!OutChains.empty()) { 2322 OutChains.push_back(Chain); 2323 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 2324 } 2325 2326 return Chain; 2327 } 2328 2329 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 2330 return CI->isTailCall(); 2331 } 2332 2333 // Check whether the call is eligible for tail call optimization. 2334 bool LoongArchTargetLowering::isEligibleForTailCallOptimization( 2335 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 2336 const SmallVectorImpl<CCValAssign> &ArgLocs) const { 2337 2338 auto CalleeCC = CLI.CallConv; 2339 auto &Outs = CLI.Outs; 2340 auto &Caller = MF.getFunction(); 2341 auto CallerCC = Caller.getCallingConv(); 2342 2343 // Do not tail call opt if the stack is used to pass parameters. 2344 if (CCInfo.getNextStackOffset() != 0) 2345 return false; 2346 2347 // Do not tail call opt if any parameters need to be passed indirectly. 2348 for (auto &VA : ArgLocs) 2349 if (VA.getLocInfo() == CCValAssign::Indirect) 2350 return false; 2351 2352 // Do not tail call opt if either caller or callee uses struct return 2353 // semantics. 2354 auto IsCallerStructRet = Caller.hasStructRetAttr(); 2355 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 2356 if (IsCallerStructRet || IsCalleeStructRet) 2357 return false; 2358 2359 // Do not tail call opt if either the callee or caller has a byval argument. 2360 for (auto &Arg : Outs) 2361 if (Arg.Flags.isByVal()) 2362 return false; 2363 2364 // The callee has to preserve all registers the caller needs to preserve. 2365 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2366 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 2367 if (CalleeCC != CallerCC) { 2368 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 2369 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 2370 return false; 2371 } 2372 return true; 2373 } 2374 2375 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { 2376 return DAG.getDataLayout().getPrefTypeAlign( 2377 VT.getTypeForEVT(*DAG.getContext())); 2378 } 2379 2380 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 2381 // and output parameter nodes. 2382 SDValue 2383 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, 2384 SmallVectorImpl<SDValue> &InVals) const { 2385 SelectionDAG &DAG = CLI.DAG; 2386 SDLoc &DL = CLI.DL; 2387 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 2388 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 2389 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 2390 SDValue Chain = CLI.Chain; 2391 SDValue Callee = CLI.Callee; 2392 CallingConv::ID CallConv = CLI.CallConv; 2393 bool IsVarArg = CLI.IsVarArg; 2394 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2395 MVT GRLenVT = Subtarget.getGRLenVT(); 2396 bool &IsTailCall = CLI.IsTailCall; 2397 2398 MachineFunction &MF = DAG.getMachineFunction(); 2399 2400 // Analyze the operands of the call, assigning locations to each operand. 2401 SmallVector<CCValAssign> ArgLocs; 2402 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 2403 2404 if (CallConv == CallingConv::GHC) 2405 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC); 2406 else 2407 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch); 2408 2409 // Check if it's really possible to do a tail call. 2410 if (IsTailCall) 2411 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 2412 2413 if (IsTailCall) 2414 ++NumTailCalls; 2415 else if (CLI.CB && CLI.CB->isMustTailCall()) 2416 report_fatal_error("failed to perform tail call elimination on a call " 2417 "site marked musttail"); 2418 2419 // Get a count of how many bytes are to be pushed on the stack. 2420 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 2421 2422 // Create local copies for byval args. 2423 SmallVector<SDValue> ByValArgs; 2424 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2425 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2426 if (!Flags.isByVal()) 2427 continue; 2428 2429 SDValue Arg = OutVals[i]; 2430 unsigned Size = Flags.getByValSize(); 2431 Align Alignment = Flags.getNonZeroByValAlign(); 2432 2433 int FI = 2434 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 2435 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2436 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT); 2437 2438 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 2439 /*IsVolatile=*/false, 2440 /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall, 2441 MachinePointerInfo(), MachinePointerInfo()); 2442 ByValArgs.push_back(FIPtr); 2443 } 2444 2445 if (!IsTailCall) 2446 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 2447 2448 // Copy argument values to their designated locations. 2449 SmallVector<std::pair<Register, SDValue>> RegsToPass; 2450 SmallVector<SDValue> MemOpChains; 2451 SDValue StackPtr; 2452 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 2453 CCValAssign &VA = ArgLocs[i]; 2454 SDValue ArgValue = OutVals[i]; 2455 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2456 2457 // Promote the value if needed. 2458 // For now, only handle fully promoted and indirect arguments. 2459 if (VA.getLocInfo() == CCValAssign::Indirect) { 2460 // Store the argument in a stack slot and pass its address. 2461 Align StackAlign = 2462 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), 2463 getPrefTypeAlign(ArgValue.getValueType(), DAG)); 2464 TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); 2465 // If the original argument was split and passed by reference, we need to 2466 // store the required parts of it here (and pass just one address). 2467 unsigned ArgIndex = Outs[i].OrigArgIndex; 2468 unsigned ArgPartOffset = Outs[i].PartOffset; 2469 assert(ArgPartOffset == 0); 2470 // Calculate the total size to store. We don't have access to what we're 2471 // actually storing other than performing the loop and collecting the 2472 // info. 2473 SmallVector<std::pair<SDValue, SDValue>> Parts; 2474 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 2475 SDValue PartValue = OutVals[i + 1]; 2476 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; 2477 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 2478 EVT PartVT = PartValue.getValueType(); 2479 2480 StoredSize += PartVT.getStoreSize(); 2481 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); 2482 Parts.push_back(std::make_pair(PartValue, Offset)); 2483 ++i; 2484 } 2485 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); 2486 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 2487 MemOpChains.push_back( 2488 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 2489 MachinePointerInfo::getFixedStack(MF, FI))); 2490 for (const auto &Part : Parts) { 2491 SDValue PartValue = Part.first; 2492 SDValue PartOffset = Part.second; 2493 SDValue Address = 2494 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); 2495 MemOpChains.push_back( 2496 DAG.getStore(Chain, DL, PartValue, Address, 2497 MachinePointerInfo::getFixedStack(MF, FI))); 2498 } 2499 ArgValue = SpillSlot; 2500 } else { 2501 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 2502 } 2503 2504 // Use local copy if it is a byval arg. 2505 if (Flags.isByVal()) 2506 ArgValue = ByValArgs[j++]; 2507 2508 if (VA.isRegLoc()) { 2509 // Queue up the argument copies and emit them at the end. 2510 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 2511 } else { 2512 assert(VA.isMemLoc() && "Argument not register or memory"); 2513 assert(!IsTailCall && "Tail call not allowed if stack is used " 2514 "for passing parameters"); 2515 2516 // Work out the address of the stack slot. 2517 if (!StackPtr.getNode()) 2518 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT); 2519 SDValue Address = 2520 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 2521 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 2522 2523 // Emit the store. 2524 MemOpChains.push_back( 2525 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 2526 } 2527 } 2528 2529 // Join the stores, which are independent of one another. 2530 if (!MemOpChains.empty()) 2531 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 2532 2533 SDValue Glue; 2534 2535 // Build a sequence of copy-to-reg nodes, chained and glued together. 2536 for (auto &Reg : RegsToPass) { 2537 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 2538 Glue = Chain.getValue(1); 2539 } 2540 2541 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 2542 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 2543 // split it and then direct call can be matched by PseudoCALL. 2544 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 2545 const GlobalValue *GV = S->getGlobal(); 2546 unsigned OpFlags = 2547 getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV) 2548 ? LoongArchII::MO_CALL 2549 : LoongArchII::MO_CALL_PLT; 2550 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags); 2551 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2552 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal( 2553 *MF.getFunction().getParent(), nullptr) 2554 ? LoongArchII::MO_CALL 2555 : LoongArchII::MO_CALL_PLT; 2556 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 2557 } 2558 2559 // The first call operand is the chain and the second is the target address. 2560 SmallVector<SDValue> Ops; 2561 Ops.push_back(Chain); 2562 Ops.push_back(Callee); 2563 2564 // Add argument registers to the end of the list so that they are 2565 // known live into the call. 2566 for (auto &Reg : RegsToPass) 2567 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 2568 2569 if (!IsTailCall) { 2570 // Add a register mask operand representing the call-preserved registers. 2571 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2572 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 2573 assert(Mask && "Missing call preserved mask for calling convention"); 2574 Ops.push_back(DAG.getRegisterMask(Mask)); 2575 } 2576 2577 // Glue the call to the argument copies, if any. 2578 if (Glue.getNode()) 2579 Ops.push_back(Glue); 2580 2581 // Emit the call. 2582 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 2583 2584 if (IsTailCall) { 2585 MF.getFrameInfo().setHasTailCall(); 2586 return DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops); 2587 } 2588 2589 Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops); 2590 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 2591 Glue = Chain.getValue(1); 2592 2593 // Mark the end of the call, which is glued to the call itself. 2594 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 2595 Glue = Chain.getValue(1); 2596 2597 // Assign locations to each value returned by this call. 2598 SmallVector<CCValAssign> RVLocs; 2599 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 2600 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch); 2601 2602 // Copy all of the result registers out of their specified physreg. 2603 for (auto &VA : RVLocs) { 2604 // Copy the value out. 2605 SDValue RetValue = 2606 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 2607 // Glue the RetValue to the end of the call sequence. 2608 Chain = RetValue.getValue(1); 2609 Glue = RetValue.getValue(2); 2610 2611 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 2612 2613 InVals.push_back(RetValue); 2614 } 2615 2616 return Chain; 2617 } 2618 2619 bool LoongArchTargetLowering::CanLowerReturn( 2620 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 2621 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 2622 SmallVector<CCValAssign> RVLocs; 2623 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 2624 2625 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2626 LoongArchABI::ABI ABI = 2627 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 2628 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full, 2629 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, 2630 nullptr)) 2631 return false; 2632 } 2633 return true; 2634 } 2635 2636 SDValue LoongArchTargetLowering::LowerReturn( 2637 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 2638 const SmallVectorImpl<ISD::OutputArg> &Outs, 2639 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 2640 SelectionDAG &DAG) const { 2641 // Stores the assignment of the return value to a location. 2642 SmallVector<CCValAssign> RVLocs; 2643 2644 // Info about the registers and stack slot. 2645 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 2646 *DAG.getContext()); 2647 2648 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 2649 nullptr, CC_LoongArch); 2650 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 2651 report_fatal_error("GHC functions return void only"); 2652 SDValue Glue; 2653 SmallVector<SDValue, 4> RetOps(1, Chain); 2654 2655 // Copy the result values into the output registers. 2656 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 2657 CCValAssign &VA = RVLocs[i]; 2658 assert(VA.isRegLoc() && "Can only return in registers!"); 2659 2660 // Handle a 'normal' return. 2661 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL); 2662 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 2663 2664 // Guarantee that all emitted copies are stuck together. 2665 Glue = Chain.getValue(1); 2666 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2667 } 2668 2669 RetOps[0] = Chain; // Update chain. 2670 2671 // Add the glue node if we have it. 2672 if (Glue.getNode()) 2673 RetOps.push_back(Glue); 2674 2675 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps); 2676 } 2677 2678 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 2679 bool ForCodeSize) const { 2680 // TODO: Maybe need more checks here after vector extension is supported. 2681 if (VT == MVT::f32 && !Subtarget.hasBasicF()) 2682 return false; 2683 if (VT == MVT::f64 && !Subtarget.hasBasicD()) 2684 return false; 2685 return (Imm.isZero() || Imm.isExactlyValue(+1.0)); 2686 } 2687 2688 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const { 2689 return true; 2690 } 2691 2692 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const { 2693 return true; 2694 } 2695 2696 bool LoongArchTargetLowering::shouldInsertFencesForAtomic( 2697 const Instruction *I) const { 2698 if (!Subtarget.is64Bit()) 2699 return isa<LoadInst>(I) || isa<StoreInst>(I); 2700 2701 if (isa<LoadInst>(I)) 2702 return true; 2703 2704 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not 2705 // require fences beacuse we can use amswap_db.[w/d]. 2706 if (isa<StoreInst>(I)) { 2707 unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth(); 2708 return (Size == 8 || Size == 16); 2709 } 2710 2711 return false; 2712 } 2713 2714 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL, 2715 LLVMContext &Context, 2716 EVT VT) const { 2717 if (!VT.isVector()) 2718 return getPointerTy(DL); 2719 return VT.changeVectorElementTypeToInteger(); 2720 } 2721 2722 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { 2723 // TODO: Support vectors. 2724 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y); 2725 } 2726 2727 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 2728 const CallInst &I, 2729 MachineFunction &MF, 2730 unsigned Intrinsic) const { 2731 switch (Intrinsic) { 2732 default: 2733 return false; 2734 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32: 2735 case Intrinsic::loongarch_masked_atomicrmw_add_i32: 2736 case Intrinsic::loongarch_masked_atomicrmw_sub_i32: 2737 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: 2738 Info.opc = ISD::INTRINSIC_W_CHAIN; 2739 Info.memVT = MVT::i32; 2740 Info.ptrVal = I.getArgOperand(0); 2741 Info.offset = 0; 2742 Info.align = Align(4); 2743 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 2744 MachineMemOperand::MOVolatile; 2745 return true; 2746 // TODO: Add more Intrinsics later. 2747 } 2748 } 2749 2750 TargetLowering::AtomicExpansionKind 2751 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 2752 // TODO: Add more AtomicRMWInst that needs to be extended. 2753 2754 // Since floating-point operation requires a non-trivial set of data 2755 // operations, use CmpXChg to expand. 2756 if (AI->isFloatingPointOperation() || 2757 AI->getOperation() == AtomicRMWInst::UIncWrap || 2758 AI->getOperation() == AtomicRMWInst::UDecWrap) 2759 return AtomicExpansionKind::CmpXChg; 2760 2761 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 2762 if (Size == 8 || Size == 16) 2763 return AtomicExpansionKind::MaskedIntrinsic; 2764 return AtomicExpansionKind::None; 2765 } 2766 2767 static Intrinsic::ID 2768 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, 2769 AtomicRMWInst::BinOp BinOp) { 2770 if (GRLen == 64) { 2771 switch (BinOp) { 2772 default: 2773 llvm_unreachable("Unexpected AtomicRMW BinOp"); 2774 case AtomicRMWInst::Xchg: 2775 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64; 2776 case AtomicRMWInst::Add: 2777 return Intrinsic::loongarch_masked_atomicrmw_add_i64; 2778 case AtomicRMWInst::Sub: 2779 return Intrinsic::loongarch_masked_atomicrmw_sub_i64; 2780 case AtomicRMWInst::Nand: 2781 return Intrinsic::loongarch_masked_atomicrmw_nand_i64; 2782 case AtomicRMWInst::UMax: 2783 return Intrinsic::loongarch_masked_atomicrmw_umax_i64; 2784 case AtomicRMWInst::UMin: 2785 return Intrinsic::loongarch_masked_atomicrmw_umin_i64; 2786 case AtomicRMWInst::Max: 2787 return Intrinsic::loongarch_masked_atomicrmw_max_i64; 2788 case AtomicRMWInst::Min: 2789 return Intrinsic::loongarch_masked_atomicrmw_min_i64; 2790 // TODO: support other AtomicRMWInst. 2791 } 2792 } 2793 2794 if (GRLen == 32) { 2795 switch (BinOp) { 2796 default: 2797 llvm_unreachable("Unexpected AtomicRMW BinOp"); 2798 case AtomicRMWInst::Xchg: 2799 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32; 2800 case AtomicRMWInst::Add: 2801 return Intrinsic::loongarch_masked_atomicrmw_add_i32; 2802 case AtomicRMWInst::Sub: 2803 return Intrinsic::loongarch_masked_atomicrmw_sub_i32; 2804 case AtomicRMWInst::Nand: 2805 return Intrinsic::loongarch_masked_atomicrmw_nand_i32; 2806 // TODO: support other AtomicRMWInst. 2807 } 2808 } 2809 2810 llvm_unreachable("Unexpected GRLen\n"); 2811 } 2812 2813 TargetLowering::AtomicExpansionKind 2814 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( 2815 AtomicCmpXchgInst *CI) const { 2816 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 2817 if (Size == 8 || Size == 16) 2818 return AtomicExpansionKind::MaskedIntrinsic; 2819 return AtomicExpansionKind::None; 2820 } 2821 2822 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 2823 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 2824 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 2825 Value *Ordering = 2826 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(Ord)); 2827 2828 // TODO: Support cmpxchg on LA32. 2829 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; 2830 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 2831 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 2832 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 2833 Type *Tys[] = {AlignedAddr->getType()}; 2834 Function *MaskedCmpXchg = 2835 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 2836 Value *Result = Builder.CreateCall( 2837 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 2838 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 2839 return Result; 2840 } 2841 2842 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( 2843 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 2844 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 2845 unsigned GRLen = Subtarget.getGRLen(); 2846 Value *Ordering = 2847 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering())); 2848 Type *Tys[] = {AlignedAddr->getType()}; 2849 Function *LlwOpScwLoop = Intrinsic::getDeclaration( 2850 AI->getModule(), 2851 getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys); 2852 2853 if (GRLen == 64) { 2854 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 2855 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 2856 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 2857 } 2858 2859 Value *Result; 2860 2861 // Must pass the shift amount needed to sign extend the loaded value prior 2862 // to performing a signed comparison for min/max. ShiftAmt is the number of 2863 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which 2864 // is the number of bits to left+right shift the value in order to 2865 // sign-extend. 2866 if (AI->getOperation() == AtomicRMWInst::Min || 2867 AI->getOperation() == AtomicRMWInst::Max) { 2868 const DataLayout &DL = AI->getModule()->getDataLayout(); 2869 unsigned ValWidth = 2870 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 2871 Value *SextShamt = 2872 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt); 2873 Result = Builder.CreateCall(LlwOpScwLoop, 2874 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 2875 } else { 2876 Result = 2877 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 2878 } 2879 2880 if (GRLen == 64) 2881 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 2882 return Result; 2883 } 2884 2885 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( 2886 const MachineFunction &MF, EVT VT) const { 2887 VT = VT.getScalarType(); 2888 2889 if (!VT.isSimple()) 2890 return false; 2891 2892 switch (VT.getSimpleVT().SimpleTy) { 2893 case MVT::f32: 2894 case MVT::f64: 2895 return true; 2896 default: 2897 break; 2898 } 2899 2900 return false; 2901 } 2902 2903 Register LoongArchTargetLowering::getExceptionPointerRegister( 2904 const Constant *PersonalityFn) const { 2905 return LoongArch::R4; 2906 } 2907 2908 Register LoongArchTargetLowering::getExceptionSelectorRegister( 2909 const Constant *PersonalityFn) const { 2910 return LoongArch::R5; 2911 } 2912 2913 //===----------------------------------------------------------------------===// 2914 // LoongArch Inline Assembly Support 2915 //===----------------------------------------------------------------------===// 2916 2917 LoongArchTargetLowering::ConstraintType 2918 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { 2919 // LoongArch specific constraints in GCC: config/loongarch/constraints.md 2920 // 2921 // 'f': A floating-point register (if available). 2922 // 'k': A memory operand whose address is formed by a base register and 2923 // (optionally scaled) index register. 2924 // 'l': A signed 16-bit constant. 2925 // 'm': A memory operand whose address is formed by a base register and 2926 // offset that is suitable for use in instructions with the same 2927 // addressing mode as st.w and ld.w. 2928 // 'I': A signed 12-bit constant (for arithmetic instructions). 2929 // 'J': Integer zero. 2930 // 'K': An unsigned 12-bit constant (for logic instructions). 2931 // "ZB": An address that is held in a general-purpose register. The offset is 2932 // zero. 2933 // "ZC": A memory operand whose address is formed by a base register and 2934 // offset that is suitable for use in instructions with the same 2935 // addressing mode as ll.w and sc.w. 2936 if (Constraint.size() == 1) { 2937 switch (Constraint[0]) { 2938 default: 2939 break; 2940 case 'f': 2941 return C_RegisterClass; 2942 case 'l': 2943 case 'I': 2944 case 'J': 2945 case 'K': 2946 return C_Immediate; 2947 case 'k': 2948 return C_Memory; 2949 } 2950 } 2951 2952 if (Constraint == "ZC" || Constraint == "ZB") 2953 return C_Memory; 2954 2955 // 'm' is handled here. 2956 return TargetLowering::getConstraintType(Constraint); 2957 } 2958 2959 unsigned LoongArchTargetLowering::getInlineAsmMemConstraint( 2960 StringRef ConstraintCode) const { 2961 return StringSwitch<unsigned>(ConstraintCode) 2962 .Case("k", InlineAsm::Constraint_k) 2963 .Case("ZB", InlineAsm::Constraint_ZB) 2964 .Case("ZC", InlineAsm::Constraint_ZC) 2965 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode)); 2966 } 2967 2968 std::pair<unsigned, const TargetRegisterClass *> 2969 LoongArchTargetLowering::getRegForInlineAsmConstraint( 2970 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 2971 // First, see if this is a constraint that directly corresponds to a LoongArch 2972 // register class. 2973 if (Constraint.size() == 1) { 2974 switch (Constraint[0]) { 2975 case 'r': 2976 // TODO: Support fixed vectors up to GRLen? 2977 if (VT.isVector()) 2978 break; 2979 return std::make_pair(0U, &LoongArch::GPRRegClass); 2980 case 'f': 2981 if (Subtarget.hasBasicF() && VT == MVT::f32) 2982 return std::make_pair(0U, &LoongArch::FPR32RegClass); 2983 if (Subtarget.hasBasicD() && VT == MVT::f64) 2984 return std::make_pair(0U, &LoongArch::FPR64RegClass); 2985 break; 2986 default: 2987 break; 2988 } 2989 } 2990 2991 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen 2992 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm 2993 // constraints while the official register name is prefixed with a '$'. So we 2994 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.) 2995 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is 2996 // case insensitive, so no need to convert the constraint to upper case here. 2997 // 2998 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly 2999 // decode the usage of register name aliases into their official names. And 3000 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use 3001 // official register names. 3002 if (Constraint.startswith("{$r") || Constraint.startswith("{$f")) { 3003 bool IsFP = Constraint[2] == 'f'; 3004 std::pair<StringRef, StringRef> Temp = Constraint.split('$'); 3005 std::pair<unsigned, const TargetRegisterClass *> R; 3006 R = TargetLowering::getRegForInlineAsmConstraint( 3007 TRI, join_items("", Temp.first, Temp.second), VT); 3008 // Match those names to the widest floating point register type available. 3009 if (IsFP) { 3010 unsigned RegNo = R.first; 3011 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) { 3012 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) { 3013 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64; 3014 return std::make_pair(DReg, &LoongArch::FPR64RegClass); 3015 } 3016 } 3017 } 3018 return R; 3019 } 3020 3021 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 3022 } 3023 3024 void LoongArchTargetLowering::LowerAsmOperandForConstraint( 3025 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 3026 SelectionDAG &DAG) const { 3027 // Currently only support length 1 constraints. 3028 if (Constraint.length() == 1) { 3029 switch (Constraint[0]) { 3030 case 'l': 3031 // Validate & create a 16-bit signed immediate operand. 3032 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 3033 uint64_t CVal = C->getSExtValue(); 3034 if (isInt<16>(CVal)) 3035 Ops.push_back( 3036 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 3037 } 3038 return; 3039 case 'I': 3040 // Validate & create a 12-bit signed immediate operand. 3041 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 3042 uint64_t CVal = C->getSExtValue(); 3043 if (isInt<12>(CVal)) 3044 Ops.push_back( 3045 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 3046 } 3047 return; 3048 case 'J': 3049 // Validate & create an integer zero operand. 3050 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 3051 if (C->getZExtValue() == 0) 3052 Ops.push_back( 3053 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT())); 3054 return; 3055 case 'K': 3056 // Validate & create a 12-bit unsigned immediate operand. 3057 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 3058 uint64_t CVal = C->getZExtValue(); 3059 if (isUInt<12>(CVal)) 3060 Ops.push_back( 3061 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 3062 } 3063 return; 3064 default: 3065 break; 3066 } 3067 } 3068 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 3069 } 3070 3071 #define GET_REGISTER_MATCHER 3072 #include "LoongArchGenAsmMatcher.inc" 3073 3074 Register 3075 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, 3076 const MachineFunction &MF) const { 3077 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$'); 3078 std::string NewRegName = Name.second.str(); 3079 Register Reg = MatchRegisterAltName(NewRegName); 3080 if (Reg == LoongArch::NoRegister) 3081 Reg = MatchRegisterName(NewRegName); 3082 if (Reg == LoongArch::NoRegister) 3083 report_fatal_error( 3084 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 3085 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 3086 if (!ReservedRegs.test(Reg)) 3087 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 3088 StringRef(RegName) + "\".")); 3089 return Reg; 3090 } 3091 3092 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context, 3093 EVT VT, SDValue C) const { 3094 // TODO: Support vectors. 3095 if (!VT.isScalarInteger()) 3096 return false; 3097 3098 // Omit the optimization if the data size exceeds GRLen. 3099 if (VT.getSizeInBits() > Subtarget.getGRLen()) 3100 return false; 3101 3102 // Break MUL into (SLLI + ADD/SUB) or ALSL. 3103 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 3104 const APInt &Imm = ConstNode->getAPIntValue(); 3105 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 3106 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 3107 return true; 3108 } 3109 3110 return false; 3111 } 3112