1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that LoongArch uses to lower LLVM code into 10 // a selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LoongArchISelLowering.h" 15 #include "LoongArch.h" 16 #include "LoongArchMachineFunctionInfo.h" 17 #include "LoongArchRegisterInfo.h" 18 #include "LoongArchSubtarget.h" 19 #include "LoongArchTargetMachine.h" 20 #include "MCTargetDesc/LoongArchBaseInfo.h" 21 #include "MCTargetDesc/LoongArchMCTargetDesc.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/ADT/StringExtras.h" 24 #include "llvm/CodeGen/ISDOpcodes.h" 25 #include "llvm/CodeGen/RuntimeLibcalls.h" 26 #include "llvm/CodeGen/SelectionDAGNodes.h" 27 #include "llvm/IR/IRBuilder.h" 28 #include "llvm/IR/IntrinsicsLoongArch.h" 29 #include "llvm/Support/CodeGen.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/KnownBits.h" 33 #include "llvm/Support/MathExtras.h" 34 35 using namespace llvm; 36 37 #define DEBUG_TYPE "loongarch-isel-lowering" 38 39 STATISTIC(NumTailCalls, "Number of tail calls"); 40 41 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, 42 cl::desc("Trap on integer division by zero."), 43 cl::init(false)); 44 45 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, 46 const LoongArchSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 MVT GRLenVT = Subtarget.getGRLenVT(); 50 // Set up the register classes. 51 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); 52 if (Subtarget.hasBasicF()) 53 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); 54 if (Subtarget.hasBasicD()) 55 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); 56 if (Subtarget.hasExtLSX()) 57 for (auto VT : {MVT::v4f32, MVT::v2f64, MVT::v16i8, MVT::v8i16, MVT::v4i32, 58 MVT::v2i64}) 59 addRegisterClass(VT, &LoongArch::LSX128RegClass); 60 if (Subtarget.hasExtLASX()) 61 for (auto VT : {MVT::v8f32, MVT::v4f64, MVT::v32i8, MVT::v16i16, MVT::v8i32, 62 MVT::v4i64}) 63 addRegisterClass(VT, &LoongArch::LASX256RegClass); 64 65 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, 66 MVT::i1, Promote); 67 68 // TODO: add necessary setOperationAction calls later. 69 setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); 70 setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); 71 setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); 72 setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); 73 setOperationAction(ISD::ROTL, GRLenVT, Expand); 74 setOperationAction(ISD::CTPOP, GRLenVT, Expand); 75 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 76 setOperationAction(ISD::TRAP, MVT::Other, Legal); 77 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 78 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 79 80 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, 81 ISD::JumpTable}, 82 GRLenVT, Custom); 83 84 setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom); 85 86 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 87 88 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 89 if (Subtarget.is64Bit()) 90 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); 91 92 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); 93 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); 94 setOperationAction(ISD::VASTART, MVT::Other, Custom); 95 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); 96 97 if (Subtarget.is64Bit()) { 98 setOperationAction(ISD::SHL, MVT::i32, Custom); 99 setOperationAction(ISD::SRA, MVT::i32, Custom); 100 setOperationAction(ISD::SRL, MVT::i32, Custom); 101 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 102 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 103 setOperationAction(ISD::ROTR, MVT::i32, Custom); 104 setOperationAction(ISD::ROTL, MVT::i32, Custom); 105 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 106 setOperationAction(ISD::CTLZ, MVT::i32, Custom); 107 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); 108 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 109 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 110 setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); 111 setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); 112 if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) 113 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 114 if (Subtarget.hasBasicF()) 115 setOperationAction(ISD::FRINT, MVT::f32, Legal); 116 if (Subtarget.hasBasicD()) 117 setOperationAction(ISD::FRINT, MVT::f64, Legal); 118 } 119 120 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and 121 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 122 // and i32 could still be byte-swapped relatively cheaply. 123 setOperationAction(ISD::BSWAP, MVT::i16, Custom); 124 if (Subtarget.is64Bit()) { 125 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 126 } 127 128 // Expand bitreverse.i16 with native-width bitrev and shift for now, before 129 // we get to know which of sll and revb.2h is faster. 130 setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); 131 if (Subtarget.is64Bit()) { 132 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 133 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); 134 } else { 135 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); 136 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 137 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); 138 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); 139 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 140 setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); 141 } 142 143 static const ISD::CondCode FPCCToExpand[] = { 144 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, 145 ISD::SETGE, ISD::SETNE, ISD::SETGT}; 146 147 if (Subtarget.hasBasicF()) { 148 setCondCodeAction(FPCCToExpand, MVT::f32, Expand); 149 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 150 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 151 setOperationAction(ISD::FMA, MVT::f32, Legal); 152 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); 153 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); 154 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); 155 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); 156 setOperationAction(ISD::FSIN, MVT::f32, Expand); 157 setOperationAction(ISD::FCOS, MVT::f32, Expand); 158 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 159 setOperationAction(ISD::FPOW, MVT::f32, Expand); 160 setOperationAction(ISD::FREM, MVT::f32, Expand); 161 } 162 if (Subtarget.hasBasicD()) { 163 setCondCodeAction(FPCCToExpand, MVT::f64, Expand); 164 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 165 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 166 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); 167 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); 168 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 169 setOperationAction(ISD::FMA, MVT::f64, Legal); 170 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); 171 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); 172 setOperationAction(ISD::FSIN, MVT::f64, Expand); 173 setOperationAction(ISD::FCOS, MVT::f64, Expand); 174 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 175 setOperationAction(ISD::FPOW, MVT::f64, Expand); 176 setOperationAction(ISD::FREM, MVT::f64, Expand); 177 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 178 } 179 180 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 181 182 setOperationAction(ISD::BR_CC, GRLenVT, Expand); 183 setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); 184 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 185 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); 186 if (!Subtarget.is64Bit()) 187 setLibcallName(RTLIB::MUL_I128, nullptr); 188 189 setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); 190 setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); 191 if ((Subtarget.is64Bit() && Subtarget.hasBasicF() && 192 !Subtarget.hasBasicD())) { 193 setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom); 194 setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); 195 } 196 197 // Compute derived properties from the register classes. 198 computeRegisterProperties(Subtarget.getRegisterInfo()); 199 200 setStackPointerRegisterToSaveRestore(LoongArch::R3); 201 202 setBooleanContents(ZeroOrOneBooleanContent); 203 204 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); 205 206 setMinCmpXchgSizeInBits(32); 207 208 // Function alignments. 209 setMinFunctionAlignment(Align(4)); 210 // Set preferred alignments. 211 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); 212 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); 213 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); 214 215 setTargetDAGCombine(ISD::AND); 216 setTargetDAGCombine(ISD::OR); 217 setTargetDAGCombine(ISD::SRL); 218 } 219 220 bool LoongArchTargetLowering::isOffsetFoldingLegal( 221 const GlobalAddressSDNode *GA) const { 222 // In order to maximise the opportunity for common subexpression elimination, 223 // keep a separate ADD node for the global address offset instead of folding 224 // it in the global address node. Later peephole optimisations may choose to 225 // fold it back in when profitable. 226 return false; 227 } 228 229 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, 230 SelectionDAG &DAG) const { 231 switch (Op.getOpcode()) { 232 case ISD::EH_DWARF_CFA: 233 return lowerEH_DWARF_CFA(Op, DAG); 234 case ISD::GlobalAddress: 235 return lowerGlobalAddress(Op, DAG); 236 case ISD::GlobalTLSAddress: 237 return lowerGlobalTLSAddress(Op, DAG); 238 case ISD::INTRINSIC_WO_CHAIN: 239 return lowerINTRINSIC_WO_CHAIN(Op, DAG); 240 case ISD::INTRINSIC_W_CHAIN: 241 return lowerINTRINSIC_W_CHAIN(Op, DAG); 242 case ISD::INTRINSIC_VOID: 243 return lowerINTRINSIC_VOID(Op, DAG); 244 case ISD::BlockAddress: 245 return lowerBlockAddress(Op, DAG); 246 case ISD::JumpTable: 247 return lowerJumpTable(Op, DAG); 248 case ISD::SHL_PARTS: 249 return lowerShiftLeftParts(Op, DAG); 250 case ISD::SRA_PARTS: 251 return lowerShiftRightParts(Op, DAG, true); 252 case ISD::SRL_PARTS: 253 return lowerShiftRightParts(Op, DAG, false); 254 case ISD::ConstantPool: 255 return lowerConstantPool(Op, DAG); 256 case ISD::FP_TO_SINT: 257 return lowerFP_TO_SINT(Op, DAG); 258 case ISD::BITCAST: 259 return lowerBITCAST(Op, DAG); 260 case ISD::UINT_TO_FP: 261 return lowerUINT_TO_FP(Op, DAG); 262 case ISD::SINT_TO_FP: 263 return lowerSINT_TO_FP(Op, DAG); 264 case ISD::VASTART: 265 return lowerVASTART(Op, DAG); 266 case ISD::FRAMEADDR: 267 return lowerFRAMEADDR(Op, DAG); 268 case ISD::RETURNADDR: 269 return lowerRETURNADDR(Op, DAG); 270 case ISD::WRITE_REGISTER: 271 return lowerWRITE_REGISTER(Op, DAG); 272 } 273 return SDValue(); 274 } 275 276 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, 277 SelectionDAG &DAG) const { 278 279 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) { 280 DAG.getContext()->emitError( 281 "On LA64, only 64-bit registers can be written."); 282 return Op.getOperand(0); 283 } 284 285 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) { 286 DAG.getContext()->emitError( 287 "On LA32, only 32-bit registers can be written."); 288 return Op.getOperand(0); 289 } 290 291 return Op; 292 } 293 294 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op, 295 SelectionDAG &DAG) const { 296 if (!isa<ConstantSDNode>(Op.getOperand(0))) { 297 DAG.getContext()->emitError("argument to '__builtin_frame_address' must " 298 "be a constant integer"); 299 return SDValue(); 300 } 301 302 MachineFunction &MF = DAG.getMachineFunction(); 303 MF.getFrameInfo().setFrameAddressIsTaken(true); 304 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF); 305 EVT VT = Op.getValueType(); 306 SDLoc DL(Op); 307 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 308 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 309 int GRLenInBytes = Subtarget.getGRLen() / 8; 310 311 while (Depth--) { 312 int Offset = -(GRLenInBytes * 2); 313 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 314 DAG.getIntPtrConstant(Offset, DL)); 315 FrameAddr = 316 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 317 } 318 return FrameAddr; 319 } 320 321 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, 322 SelectionDAG &DAG) const { 323 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 324 return SDValue(); 325 326 // Currently only support lowering return address for current frame. 327 if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) { 328 DAG.getContext()->emitError( 329 "return address can only be determined for the current frame"); 330 return SDValue(); 331 } 332 333 MachineFunction &MF = DAG.getMachineFunction(); 334 MF.getFrameInfo().setReturnAddressIsTaken(true); 335 MVT GRLenVT = Subtarget.getGRLenVT(); 336 337 // Return the value of the return address register, marking it an implicit 338 // live-in. 339 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(), 340 getRegClassFor(GRLenVT)); 341 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT); 342 } 343 344 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, 345 SelectionDAG &DAG) const { 346 MachineFunction &MF = DAG.getMachineFunction(); 347 auto Size = Subtarget.getGRLen() / 8; 348 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false); 349 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 350 } 351 352 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, 353 SelectionDAG &DAG) const { 354 MachineFunction &MF = DAG.getMachineFunction(); 355 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>(); 356 357 SDLoc DL(Op); 358 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 359 getPointerTy(MF.getDataLayout())); 360 361 // vastart just stores the address of the VarArgsFrameIndex slot into the 362 // memory location argument. 363 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 364 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 365 MachinePointerInfo(SV)); 366 } 367 368 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, 369 SelectionDAG &DAG) const { 370 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && 371 !Subtarget.hasBasicD() && "unexpected target features"); 372 373 SDLoc DL(Op); 374 SDValue Op0 = Op.getOperand(0); 375 if (Op0->getOpcode() == ISD::AND) { 376 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); 377 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF)) 378 return Op; 379 } 380 381 if (Op0->getOpcode() == LoongArchISD::BSTRPICK && 382 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) && 383 Op0.getConstantOperandVal(2) == UINT64_C(0)) 384 return Op; 385 386 if (Op0.getOpcode() == ISD::AssertZext && 387 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32)) 388 return Op; 389 390 EVT OpVT = Op0.getValueType(); 391 EVT RetVT = Op.getValueType(); 392 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT); 393 MakeLibCallOptions CallOptions; 394 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); 395 SDValue Chain = SDValue(); 396 SDValue Result; 397 std::tie(Result, Chain) = 398 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); 399 return Result; 400 } 401 402 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, 403 SelectionDAG &DAG) const { 404 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && 405 !Subtarget.hasBasicD() && "unexpected target features"); 406 407 SDLoc DL(Op); 408 SDValue Op0 = Op.getOperand(0); 409 410 if ((Op0.getOpcode() == ISD::AssertSext || 411 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) && 412 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32)) 413 return Op; 414 415 EVT OpVT = Op0.getValueType(); 416 EVT RetVT = Op.getValueType(); 417 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT); 418 MakeLibCallOptions CallOptions; 419 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); 420 SDValue Chain = SDValue(); 421 SDValue Result; 422 std::tie(Result, Chain) = 423 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); 424 return Result; 425 } 426 427 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, 428 SelectionDAG &DAG) const { 429 430 SDLoc DL(Op); 431 SDValue Op0 = Op.getOperand(0); 432 433 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && 434 Subtarget.is64Bit() && Subtarget.hasBasicF()) { 435 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 436 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0); 437 } 438 return Op; 439 } 440 441 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, 442 SelectionDAG &DAG) const { 443 444 SDLoc DL(Op); 445 446 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && 447 !Subtarget.hasBasicD()) { 448 SDValue Dst = 449 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0)); 450 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst); 451 } 452 453 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); 454 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0)); 455 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc); 456 } 457 458 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 459 SelectionDAG &DAG, unsigned Flags) { 460 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 461 } 462 463 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 464 SelectionDAG &DAG, unsigned Flags) { 465 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 466 Flags); 467 } 468 469 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 470 SelectionDAG &DAG, unsigned Flags) { 471 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 472 N->getOffset(), Flags); 473 } 474 475 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 476 SelectionDAG &DAG, unsigned Flags) { 477 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 478 } 479 480 template <class NodeTy> 481 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 482 bool IsLocal) const { 483 SDLoc DL(N); 484 EVT Ty = getPointerTy(DAG.getDataLayout()); 485 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 486 487 switch (DAG.getTarget().getCodeModel()) { 488 default: 489 report_fatal_error("Unsupported code model"); 490 491 case CodeModel::Large: { 492 assert(Subtarget.is64Bit() && "Large code model requires LA64"); 493 494 // This is not actually used, but is necessary for successfully matching 495 // the PseudoLA_*_LARGE nodes. 496 SDValue Tmp = DAG.getConstant(0, DL, Ty); 497 if (IsLocal) 498 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that 499 // eventually becomes the desired 5-insn code sequence. 500 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty, 501 Tmp, Addr), 502 0); 503 504 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually 505 // becomes the desired 5-insn code sequence. 506 return SDValue( 507 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr), 508 0); 509 } 510 511 case CodeModel::Small: 512 case CodeModel::Medium: 513 if (IsLocal) 514 // This generates the pattern (PseudoLA_PCREL sym), which expands to 515 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). 516 return SDValue( 517 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0); 518 519 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d 520 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). 521 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 522 0); 523 } 524 } 525 526 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, 527 SelectionDAG &DAG) const { 528 return getAddr(cast<BlockAddressSDNode>(Op), DAG); 529 } 530 531 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op, 532 SelectionDAG &DAG) const { 533 return getAddr(cast<JumpTableSDNode>(Op), DAG); 534 } 535 536 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, 537 SelectionDAG &DAG) const { 538 return getAddr(cast<ConstantPoolSDNode>(Op), DAG); 539 } 540 541 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, 542 SelectionDAG &DAG) const { 543 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 544 assert(N->getOffset() == 0 && "unexpected offset in global node"); 545 return getAddr(N, DAG, N->getGlobal()->isDSOLocal()); 546 } 547 548 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 549 SelectionDAG &DAG, 550 unsigned Opc, 551 bool Large) const { 552 SDLoc DL(N); 553 EVT Ty = getPointerTy(DAG.getDataLayout()); 554 MVT GRLenVT = Subtarget.getGRLenVT(); 555 556 // This is not actually used, but is necessary for successfully matching the 557 // PseudoLA_*_LARGE nodes. 558 SDValue Tmp = DAG.getConstant(0, DL, Ty); 559 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); 560 SDValue Offset = Large 561 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) 562 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 563 564 // Add the thread pointer. 565 return DAG.getNode(ISD::ADD, DL, Ty, Offset, 566 DAG.getRegister(LoongArch::R2, GRLenVT)); 567 } 568 569 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 570 SelectionDAG &DAG, 571 unsigned Opc, 572 bool Large) const { 573 SDLoc DL(N); 574 EVT Ty = getPointerTy(DAG.getDataLayout()); 575 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 576 577 // This is not actually used, but is necessary for successfully matching the 578 // PseudoLA_*_LARGE nodes. 579 SDValue Tmp = DAG.getConstant(0, DL, Ty); 580 581 // Use a PC-relative addressing mode to access the dynamic GOT address. 582 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); 583 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) 584 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 585 586 // Prepare argument list to generate call. 587 ArgListTy Args; 588 ArgListEntry Entry; 589 Entry.Node = Load; 590 Entry.Ty = CallTy; 591 Args.push_back(Entry); 592 593 // Setup call to __tls_get_addr. 594 TargetLowering::CallLoweringInfo CLI(DAG); 595 CLI.setDebugLoc(DL) 596 .setChain(DAG.getEntryNode()) 597 .setLibCallee(CallingConv::C, CallTy, 598 DAG.getExternalSymbol("__tls_get_addr", Ty), 599 std::move(Args)); 600 601 return LowerCallTo(CLI).first; 602 } 603 604 SDValue 605 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, 606 SelectionDAG &DAG) const { 607 if (DAG.getMachineFunction().getFunction().getCallingConv() == 608 CallingConv::GHC) 609 report_fatal_error("In GHC calling convention TLS is not supported"); 610 611 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large; 612 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64"); 613 614 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 615 assert(N->getOffset() == 0 && "unexpected offset in global node"); 616 617 SDValue Addr; 618 switch (getTargetMachine().getTLSModel(N->getGlobal())) { 619 case TLSModel::GeneralDynamic: 620 // In this model, application code calls the dynamic linker function 621 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at 622 // runtime. 623 Addr = getDynamicTLSAddr(N, DAG, 624 Large ? LoongArch::PseudoLA_TLS_GD_LARGE 625 : LoongArch::PseudoLA_TLS_GD, 626 Large); 627 break; 628 case TLSModel::LocalDynamic: 629 // Same as GeneralDynamic, except for assembly modifiers and relocation 630 // records. 631 Addr = getDynamicTLSAddr(N, DAG, 632 Large ? LoongArch::PseudoLA_TLS_LD_LARGE 633 : LoongArch::PseudoLA_TLS_LD, 634 Large); 635 break; 636 case TLSModel::InitialExec: 637 // This model uses the GOT to resolve TLS offsets. 638 Addr = getStaticTLSAddr(N, DAG, 639 Large ? LoongArch::PseudoLA_TLS_IE_LARGE 640 : LoongArch::PseudoLA_TLS_IE, 641 Large); 642 break; 643 case TLSModel::LocalExec: 644 // This model is used when static linking as the TLS offsets are resolved 645 // during program linking. 646 // 647 // This node doesn't need an extra argument for the large code model. 648 Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE); 649 break; 650 } 651 652 return Addr; 653 } 654 655 SDValue 656 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 657 SelectionDAG &DAG) const { 658 switch (Op.getConstantOperandVal(0)) { 659 default: 660 return SDValue(); // Don't custom lower most intrinsics. 661 case Intrinsic::thread_pointer: { 662 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 663 return DAG.getRegister(LoongArch::R2, PtrVT); 664 } 665 } 666 } 667 668 // Helper function that emits error message for intrinsics with chain and return 669 // merge values of a UNDEF and the chain. 670 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, 671 StringRef ErrorMsg, 672 SelectionDAG &DAG) { 673 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); 674 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, 675 SDLoc(Op)); 676 } 677 678 SDValue 679 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 680 SelectionDAG &DAG) const { 681 SDLoc DL(Op); 682 MVT GRLenVT = Subtarget.getGRLenVT(); 683 EVT VT = Op.getValueType(); 684 SDValue Chain = Op.getOperand(0); 685 const StringRef ErrorMsgOOR = "argument out of range"; 686 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 687 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 688 689 switch (Op.getConstantOperandVal(1)) { 690 default: 691 return Op; 692 case Intrinsic::loongarch_crc_w_b_w: 693 case Intrinsic::loongarch_crc_w_h_w: 694 case Intrinsic::loongarch_crc_w_w_w: 695 case Intrinsic::loongarch_crc_w_d_w: 696 case Intrinsic::loongarch_crcc_w_b_w: 697 case Intrinsic::loongarch_crcc_w_h_w: 698 case Intrinsic::loongarch_crcc_w_w_w: 699 case Intrinsic::loongarch_crcc_w_d_w: 700 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG); 701 case Intrinsic::loongarch_csrrd_w: 702 case Intrinsic::loongarch_csrrd_d: { 703 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 704 return !isUInt<14>(Imm) 705 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 706 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, 707 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 708 } 709 case Intrinsic::loongarch_csrwr_w: 710 case Intrinsic::loongarch_csrwr_d: { 711 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 712 return !isUInt<14>(Imm) 713 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 714 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, 715 {Chain, Op.getOperand(2), 716 DAG.getConstant(Imm, DL, GRLenVT)}); 717 } 718 case Intrinsic::loongarch_csrxchg_w: 719 case Intrinsic::loongarch_csrxchg_d: { 720 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 721 return !isUInt<14>(Imm) 722 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 723 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, 724 {Chain, Op.getOperand(2), Op.getOperand(3), 725 DAG.getConstant(Imm, DL, GRLenVT)}); 726 } 727 case Intrinsic::loongarch_iocsrrd_d: { 728 return DAG.getNode( 729 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other}, 730 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))}); 731 } 732 #define IOCSRRD_CASE(NAME, NODE) \ 733 case Intrinsic::loongarch_##NAME: { \ 734 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \ 735 {Chain, Op.getOperand(2)}); \ 736 } 737 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); 738 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); 739 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); 740 #undef IOCSRRD_CASE 741 case Intrinsic::loongarch_cpucfg: { 742 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, 743 {Chain, Op.getOperand(2)}); 744 } 745 case Intrinsic::loongarch_lddir_d: { 746 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 747 return !isUInt<8>(Imm) 748 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 749 : Op; 750 } 751 case Intrinsic::loongarch_movfcsr2gr: { 752 if (!Subtarget.hasBasicF()) 753 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG); 754 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 755 return !isUInt<2>(Imm) 756 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 757 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, 758 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 759 } 760 } 761 } 762 763 // Helper function that emits error message for intrinsics with void return 764 // value and return the chain. 765 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, 766 SelectionDAG &DAG) { 767 768 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); 769 return Op.getOperand(0); 770 } 771 772 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, 773 SelectionDAG &DAG) const { 774 SDLoc DL(Op); 775 MVT GRLenVT = Subtarget.getGRLenVT(); 776 SDValue Chain = Op.getOperand(0); 777 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1); 778 SDValue Op2 = Op.getOperand(2); 779 const StringRef ErrorMsgOOR = "argument out of range"; 780 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 781 const StringRef ErrorMsgReqLA32 = "requires loongarch32"; 782 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 783 784 switch (IntrinsicEnum) { 785 default: 786 // TODO: Add more Intrinsics. 787 return SDValue(); 788 case Intrinsic::loongarch_cacop_d: 789 case Intrinsic::loongarch_cacop_w: { 790 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) 791 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG); 792 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) 793 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG); 794 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) 795 unsigned Imm1 = cast<ConstantSDNode>(Op2)->getZExtValue(); 796 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue(); 797 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2)) 798 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); 799 return Op; 800 } 801 case Intrinsic::loongarch_dbar: { 802 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); 803 return !isUInt<15>(Imm) 804 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 805 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain, 806 DAG.getConstant(Imm, DL, GRLenVT)); 807 } 808 case Intrinsic::loongarch_ibar: { 809 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); 810 return !isUInt<15>(Imm) 811 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 812 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain, 813 DAG.getConstant(Imm, DL, GRLenVT)); 814 } 815 case Intrinsic::loongarch_break: { 816 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); 817 return !isUInt<15>(Imm) 818 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 819 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain, 820 DAG.getConstant(Imm, DL, GRLenVT)); 821 } 822 case Intrinsic::loongarch_movgr2fcsr: { 823 if (!Subtarget.hasBasicF()) 824 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG); 825 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); 826 return !isUInt<2>(Imm) 827 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 828 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain, 829 DAG.getConstant(Imm, DL, GRLenVT), 830 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, 831 Op.getOperand(3))); 832 } 833 case Intrinsic::loongarch_syscall: { 834 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); 835 return !isUInt<15>(Imm) 836 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 837 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain, 838 DAG.getConstant(Imm, DL, GRLenVT)); 839 } 840 #define IOCSRWR_CASE(NAME, NODE) \ 841 case Intrinsic::loongarch_##NAME: { \ 842 SDValue Op3 = Op.getOperand(3); \ 843 return Subtarget.is64Bit() \ 844 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \ 845 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ 846 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \ 847 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \ 848 Op3); \ 849 } 850 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B); 851 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H); 852 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W); 853 #undef IOCSRWR_CASE 854 case Intrinsic::loongarch_iocsrwr_d: { 855 return !Subtarget.is64Bit() 856 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) 857 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain, 858 Op2, 859 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 860 Op.getOperand(3))); 861 } 862 #define ASRT_LE_GT_CASE(NAME) \ 863 case Intrinsic::loongarch_##NAME: { \ 864 return !Subtarget.is64Bit() \ 865 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \ 866 : Op; \ 867 } 868 ASRT_LE_GT_CASE(asrtle_d) 869 ASRT_LE_GT_CASE(asrtgt_d) 870 #undef ASRT_LE_GT_CASE 871 case Intrinsic::loongarch_ldpte_d: { 872 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 873 return !Subtarget.is64Bit() 874 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) 875 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 876 : Op; 877 } 878 } 879 } 880 881 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, 882 SelectionDAG &DAG) const { 883 SDLoc DL(Op); 884 SDValue Lo = Op.getOperand(0); 885 SDValue Hi = Op.getOperand(1); 886 SDValue Shamt = Op.getOperand(2); 887 EVT VT = Lo.getValueType(); 888 889 // if Shamt-GRLen < 0: // Shamt < GRLen 890 // Lo = Lo << Shamt 891 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt)) 892 // else: 893 // Lo = 0 894 // Hi = Lo << (Shamt-GRLen) 895 896 SDValue Zero = DAG.getConstant(0, DL, VT); 897 SDValue One = DAG.getConstant(1, DL, VT); 898 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); 899 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); 900 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); 901 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); 902 903 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 904 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 905 SDValue ShiftRightLo = 906 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt); 907 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 908 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 909 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen); 910 911 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); 912 913 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 914 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 915 916 SDValue Parts[2] = {Lo, Hi}; 917 return DAG.getMergeValues(Parts, DL); 918 } 919 920 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, 921 SelectionDAG &DAG, 922 bool IsSRA) const { 923 SDLoc DL(Op); 924 SDValue Lo = Op.getOperand(0); 925 SDValue Hi = Op.getOperand(1); 926 SDValue Shamt = Op.getOperand(2); 927 EVT VT = Lo.getValueType(); 928 929 // SRA expansion: 930 // if Shamt-GRLen < 0: // Shamt < GRLen 931 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) 932 // Hi = Hi >>s Shamt 933 // else: 934 // Lo = Hi >>s (Shamt-GRLen); 935 // Hi = Hi >>s (GRLen-1) 936 // 937 // SRL expansion: 938 // if Shamt-GRLen < 0: // Shamt < GRLen 939 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) 940 // Hi = Hi >>u Shamt 941 // else: 942 // Lo = Hi >>u (Shamt-GRLen); 943 // Hi = 0; 944 945 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 946 947 SDValue Zero = DAG.getConstant(0, DL, VT); 948 SDValue One = DAG.getConstant(1, DL, VT); 949 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); 950 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); 951 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); 952 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); 953 954 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 955 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 956 SDValue ShiftLeftHi = 957 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt); 958 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 959 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 960 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen); 961 SDValue HiFalse = 962 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero; 963 964 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); 965 966 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 967 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 968 969 SDValue Parts[2] = {Lo, Hi}; 970 return DAG.getMergeValues(Parts, DL); 971 } 972 973 // Returns the opcode of the target-specific SDNode that implements the 32-bit 974 // form of the given Opcode. 975 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { 976 switch (Opcode) { 977 default: 978 llvm_unreachable("Unexpected opcode"); 979 case ISD::SHL: 980 return LoongArchISD::SLL_W; 981 case ISD::SRA: 982 return LoongArchISD::SRA_W; 983 case ISD::SRL: 984 return LoongArchISD::SRL_W; 985 case ISD::ROTR: 986 return LoongArchISD::ROTR_W; 987 case ISD::ROTL: 988 return LoongArchISD::ROTL_W; 989 case ISD::CTTZ: 990 return LoongArchISD::CTZ_W; 991 case ISD::CTLZ: 992 return LoongArchISD::CLZ_W; 993 } 994 } 995 996 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG 997 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would 998 // otherwise be promoted to i64, making it difficult to select the 999 // SLL_W/.../*W later one because the fact the operation was originally of 1000 // type i8/i16/i32 is lost. 1001 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, 1002 unsigned ExtOpc = ISD::ANY_EXTEND) { 1003 SDLoc DL(N); 1004 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); 1005 SDValue NewOp0, NewRes; 1006 1007 switch (NumOp) { 1008 default: 1009 llvm_unreachable("Unexpected NumOp"); 1010 case 1: { 1011 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 1012 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0); 1013 break; 1014 } 1015 case 2: { 1016 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 1017 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 1018 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 1019 break; 1020 } 1021 // TODO:Handle more NumOp. 1022 } 1023 1024 // ReplaceNodeResults requires we maintain the same type for the return 1025 // value. 1026 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 1027 } 1028 1029 // Helper function that emits error message for intrinsics with chain and return 1030 // a UNDEF and the chain as the results. 1031 static void emitErrorAndReplaceIntrinsicWithChainResults( 1032 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, 1033 StringRef ErrorMsg) { 1034 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); 1035 Results.push_back(DAG.getUNDEF(N->getValueType(0))); 1036 Results.push_back(N->getOperand(0)); 1037 } 1038 1039 void LoongArchTargetLowering::ReplaceNodeResults( 1040 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 1041 SDLoc DL(N); 1042 EVT VT = N->getValueType(0); 1043 switch (N->getOpcode()) { 1044 default: 1045 llvm_unreachable("Don't know how to legalize this operation"); 1046 case ISD::SHL: 1047 case ISD::SRA: 1048 case ISD::SRL: 1049 case ISD::ROTR: 1050 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1051 "Unexpected custom legalisation"); 1052 if (N->getOperand(1).getOpcode() != ISD::Constant) { 1053 Results.push_back(customLegalizeToWOp(N, DAG, 2)); 1054 break; 1055 } 1056 break; 1057 case ISD::ROTL: 1058 ConstantSDNode *CN; 1059 if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) { 1060 Results.push_back(customLegalizeToWOp(N, DAG, 2)); 1061 break; 1062 } 1063 break; 1064 case ISD::FP_TO_SINT: { 1065 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1066 "Unexpected custom legalisation"); 1067 SDValue Src = N->getOperand(0); 1068 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); 1069 if (getTypeAction(*DAG.getContext(), Src.getValueType()) != 1070 TargetLowering::TypeSoftenFloat) { 1071 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src); 1072 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst)); 1073 return; 1074 } 1075 // If the FP type needs to be softened, emit a library call using the 'si' 1076 // version. If we left it to default legalization we'd end up with 'di'. 1077 RTLIB::Libcall LC; 1078 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT); 1079 MakeLibCallOptions CallOptions; 1080 EVT OpVT = Src.getValueType(); 1081 CallOptions.setTypeListBeforeSoften(OpVT, VT, true); 1082 SDValue Chain = SDValue(); 1083 SDValue Result; 1084 std::tie(Result, Chain) = 1085 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain); 1086 Results.push_back(Result); 1087 break; 1088 } 1089 case ISD::BITCAST: { 1090 SDValue Src = N->getOperand(0); 1091 EVT SrcVT = Src.getValueType(); 1092 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && 1093 Subtarget.hasBasicF()) { 1094 SDValue Dst = 1095 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src); 1096 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst)); 1097 } 1098 break; 1099 } 1100 case ISD::FP_TO_UINT: { 1101 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1102 "Unexpected custom legalisation"); 1103 auto &TLI = DAG.getTargetLoweringInfo(); 1104 SDValue Tmp1, Tmp2; 1105 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG); 1106 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1)); 1107 break; 1108 } 1109 case ISD::BSWAP: { 1110 SDValue Src = N->getOperand(0); 1111 assert((VT == MVT::i16 || VT == MVT::i32) && 1112 "Unexpected custom legalization"); 1113 MVT GRLenVT = Subtarget.getGRLenVT(); 1114 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); 1115 SDValue Tmp; 1116 switch (VT.getSizeInBits()) { 1117 default: 1118 llvm_unreachable("Unexpected operand width"); 1119 case 16: 1120 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc); 1121 break; 1122 case 32: 1123 // Only LA64 will get to here due to the size mismatch between VT and 1124 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo. 1125 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc); 1126 break; 1127 } 1128 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); 1129 break; 1130 } 1131 case ISD::BITREVERSE: { 1132 SDValue Src = N->getOperand(0); 1133 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) && 1134 "Unexpected custom legalization"); 1135 MVT GRLenVT = Subtarget.getGRLenVT(); 1136 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); 1137 SDValue Tmp; 1138 switch (VT.getSizeInBits()) { 1139 default: 1140 llvm_unreachable("Unexpected operand width"); 1141 case 8: 1142 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc); 1143 break; 1144 case 32: 1145 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc); 1146 break; 1147 } 1148 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); 1149 break; 1150 } 1151 case ISD::CTLZ: 1152 case ISD::CTTZ: { 1153 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1154 "Unexpected custom legalisation"); 1155 Results.push_back(customLegalizeToWOp(N, DAG, 1)); 1156 break; 1157 } 1158 case ISD::INTRINSIC_W_CHAIN: { 1159 SDValue Chain = N->getOperand(0); 1160 SDValue Op2 = N->getOperand(2); 1161 MVT GRLenVT = Subtarget.getGRLenVT(); 1162 const StringRef ErrorMsgOOR = "argument out of range"; 1163 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 1164 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 1165 1166 switch (N->getConstantOperandVal(1)) { 1167 default: 1168 llvm_unreachable("Unexpected Intrinsic."); 1169 case Intrinsic::loongarch_movfcsr2gr: { 1170 if (!Subtarget.hasBasicF()) { 1171 emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, 1172 ErrorMsgReqF); 1173 return; 1174 } 1175 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); 1176 if (!isUInt<2>(Imm)) { 1177 emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, 1178 ErrorMsgOOR); 1179 return; 1180 } 1181 SDValue MOVFCSR2GRResults = DAG.getNode( 1182 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other}, 1183 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 1184 Results.push_back( 1185 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0))); 1186 Results.push_back(MOVFCSR2GRResults.getValue(1)); 1187 break; 1188 } 1189 #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \ 1190 case Intrinsic::loongarch_##NAME: { \ 1191 SDValue NODE = DAG.getNode( \ 1192 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 1193 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ 1194 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ 1195 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ 1196 Results.push_back(NODE.getValue(1)); \ 1197 break; \ 1198 } 1199 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W) 1200 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W) 1201 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W) 1202 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W) 1203 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W) 1204 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W) 1205 #undef CRC_CASE_EXT_BINARYOP 1206 1207 #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \ 1208 case Intrinsic::loongarch_##NAME: { \ 1209 SDValue NODE = DAG.getNode( \ 1210 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 1211 {Chain, Op2, \ 1212 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ 1213 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ 1214 Results.push_back(NODE.getValue(1)); \ 1215 break; \ 1216 } 1217 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) 1218 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W) 1219 #undef CRC_CASE_EXT_UNARYOP 1220 #define CSR_CASE(ID) \ 1221 case Intrinsic::loongarch_##ID: { \ 1222 if (!Subtarget.is64Bit()) \ 1223 emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, \ 1224 ErrorMsgReqLA64); \ 1225 break; \ 1226 } 1227 CSR_CASE(csrrd_d); 1228 CSR_CASE(csrwr_d); 1229 CSR_CASE(csrxchg_d); 1230 CSR_CASE(iocsrrd_d); 1231 #undef CSR_CASE 1232 case Intrinsic::loongarch_csrrd_w: { 1233 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); 1234 if (!isUInt<14>(Imm)) { 1235 emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, 1236 ErrorMsgOOR); 1237 return; 1238 } 1239 SDValue CSRRDResults = 1240 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, 1241 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 1242 Results.push_back( 1243 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0))); 1244 Results.push_back(CSRRDResults.getValue(1)); 1245 break; 1246 } 1247 case Intrinsic::loongarch_csrwr_w: { 1248 unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 1249 if (!isUInt<14>(Imm)) { 1250 emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, 1251 ErrorMsgOOR); 1252 return; 1253 } 1254 SDValue CSRWRResults = 1255 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, 1256 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), 1257 DAG.getConstant(Imm, DL, GRLenVT)}); 1258 Results.push_back( 1259 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0))); 1260 Results.push_back(CSRWRResults.getValue(1)); 1261 break; 1262 } 1263 case Intrinsic::loongarch_csrxchg_w: { 1264 unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); 1265 if (!isUInt<14>(Imm)) { 1266 emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, 1267 ErrorMsgOOR); 1268 return; 1269 } 1270 SDValue CSRXCHGResults = DAG.getNode( 1271 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, 1272 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), 1273 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)), 1274 DAG.getConstant(Imm, DL, GRLenVT)}); 1275 Results.push_back( 1276 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0))); 1277 Results.push_back(CSRXCHGResults.getValue(1)); 1278 break; 1279 } 1280 #define IOCSRRD_CASE(NAME, NODE) \ 1281 case Intrinsic::loongarch_##NAME: { \ 1282 SDValue IOCSRRDResults = \ 1283 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 1284 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \ 1285 Results.push_back( \ 1286 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \ 1287 Results.push_back(IOCSRRDResults.getValue(1)); \ 1288 break; \ 1289 } 1290 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); 1291 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); 1292 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); 1293 #undef IOCSRRD_CASE 1294 case Intrinsic::loongarch_cpucfg: { 1295 SDValue CPUCFGResults = 1296 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, 1297 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); 1298 Results.push_back( 1299 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0))); 1300 Results.push_back(CPUCFGResults.getValue(1)); 1301 break; 1302 } 1303 case Intrinsic::loongarch_lddir_d: { 1304 if (!Subtarget.is64Bit()) { 1305 emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, 1306 ErrorMsgReqLA64); 1307 return; 1308 } 1309 break; 1310 } 1311 } 1312 break; 1313 } 1314 case ISD::READ_REGISTER: { 1315 if (Subtarget.is64Bit()) 1316 DAG.getContext()->emitError( 1317 "On LA64, only 64-bit registers can be read."); 1318 else 1319 DAG.getContext()->emitError( 1320 "On LA32, only 32-bit registers can be read."); 1321 Results.push_back(DAG.getUNDEF(VT)); 1322 Results.push_back(N->getOperand(0)); 1323 break; 1324 } 1325 } 1326 } 1327 1328 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 1329 TargetLowering::DAGCombinerInfo &DCI, 1330 const LoongArchSubtarget &Subtarget) { 1331 if (DCI.isBeforeLegalizeOps()) 1332 return SDValue(); 1333 1334 SDValue FirstOperand = N->getOperand(0); 1335 SDValue SecondOperand = N->getOperand(1); 1336 unsigned FirstOperandOpc = FirstOperand.getOpcode(); 1337 EVT ValTy = N->getValueType(0); 1338 SDLoc DL(N); 1339 uint64_t lsb, msb; 1340 unsigned SMIdx, SMLen; 1341 ConstantSDNode *CN; 1342 SDValue NewOperand; 1343 MVT GRLenVT = Subtarget.getGRLenVT(); 1344 1345 // Op's second operand must be a shifted mask. 1346 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) || 1347 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen)) 1348 return SDValue(); 1349 1350 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { 1351 // Pattern match BSTRPICK. 1352 // $dst = and ((sra or srl) $src , lsb), (2**len - 1) 1353 // => BSTRPICK $dst, $src, msb, lsb 1354 // where msb = lsb + len - 1 1355 1356 // The second operand of the shift must be an immediate. 1357 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1)))) 1358 return SDValue(); 1359 1360 lsb = CN->getZExtValue(); 1361 1362 // Return if the shifted mask does not start at bit 0 or the sum of its 1363 // length and lsb exceeds the word's size. 1364 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) 1365 return SDValue(); 1366 1367 NewOperand = FirstOperand.getOperand(0); 1368 } else { 1369 // Pattern match BSTRPICK. 1370 // $dst = and $src, (2**len- 1) , if len > 12 1371 // => BSTRPICK $dst, $src, msb, lsb 1372 // where lsb = 0 and msb = len - 1 1373 1374 // If the mask is <= 0xfff, andi can be used instead. 1375 if (CN->getZExtValue() <= 0xfff) 1376 return SDValue(); 1377 1378 // Return if the MSB exceeds. 1379 if (SMIdx + SMLen > ValTy.getSizeInBits()) 1380 return SDValue(); 1381 1382 if (SMIdx > 0) { 1383 // Omit if the constant has more than 2 uses. This a conservative 1384 // decision. Whether it is a win depends on the HW microarchitecture. 1385 // However it should always be better for 1 and 2 uses. 1386 if (CN->use_size() > 2) 1387 return SDValue(); 1388 // Return if the constant can be composed by a single LU12I.W. 1389 if ((CN->getZExtValue() & 0xfff) == 0) 1390 return SDValue(); 1391 // Return if the constand can be composed by a single ADDI with 1392 // the zero register. 1393 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0) 1394 return SDValue(); 1395 } 1396 1397 lsb = SMIdx; 1398 NewOperand = FirstOperand; 1399 } 1400 1401 msb = lsb + SMLen - 1; 1402 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand, 1403 DAG.getConstant(msb, DL, GRLenVT), 1404 DAG.getConstant(lsb, DL, GRLenVT)); 1405 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0) 1406 return NR0; 1407 // Try to optimize to 1408 // bstrpick $Rd, $Rs, msb, lsb 1409 // slli $Rd, $Rd, lsb 1410 return DAG.getNode(ISD::SHL, DL, ValTy, NR0, 1411 DAG.getConstant(lsb, DL, GRLenVT)); 1412 } 1413 1414 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 1415 TargetLowering::DAGCombinerInfo &DCI, 1416 const LoongArchSubtarget &Subtarget) { 1417 if (DCI.isBeforeLegalizeOps()) 1418 return SDValue(); 1419 1420 // $dst = srl (and $src, Mask), Shamt 1421 // => 1422 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt 1423 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1 1424 // 1425 1426 SDValue FirstOperand = N->getOperand(0); 1427 ConstantSDNode *CN; 1428 EVT ValTy = N->getValueType(0); 1429 SDLoc DL(N); 1430 MVT GRLenVT = Subtarget.getGRLenVT(); 1431 unsigned MaskIdx, MaskLen; 1432 uint64_t Shamt; 1433 1434 // The first operand must be an AND and the second operand of the AND must be 1435 // a shifted mask. 1436 if (FirstOperand.getOpcode() != ISD::AND || 1437 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) || 1438 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen)) 1439 return SDValue(); 1440 1441 // The second operand (shift amount) must be an immediate. 1442 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) 1443 return SDValue(); 1444 1445 Shamt = CN->getZExtValue(); 1446 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1) 1447 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, 1448 FirstOperand->getOperand(0), 1449 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 1450 DAG.getConstant(Shamt, DL, GRLenVT)); 1451 1452 return SDValue(); 1453 } 1454 1455 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 1456 TargetLowering::DAGCombinerInfo &DCI, 1457 const LoongArchSubtarget &Subtarget) { 1458 MVT GRLenVT = Subtarget.getGRLenVT(); 1459 EVT ValTy = N->getValueType(0); 1460 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 1461 ConstantSDNode *CN0, *CN1; 1462 SDLoc DL(N); 1463 unsigned ValBits = ValTy.getSizeInBits(); 1464 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; 1465 unsigned Shamt; 1466 bool SwapAndRetried = false; 1467 1468 if (DCI.isBeforeLegalizeOps()) 1469 return SDValue(); 1470 1471 if (ValBits != 32 && ValBits != 64) 1472 return SDValue(); 1473 1474 Retry: 1475 // 1st pattern to match BSTRINS: 1476 // R = or (and X, mask0), (and (shl Y, lsb), mask1) 1477 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 1478 // => 1479 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) 1480 if (N0.getOpcode() == ISD::AND && 1481 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 1482 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 1483 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL && 1484 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 1485 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && 1486 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && 1487 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 1488 (Shamt = CN1->getZExtValue()) == MaskIdx0 && 1489 (MaskIdx0 + MaskLen0 <= ValBits)) { 1490 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n"); 1491 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 1492 N1.getOperand(0).getOperand(0), 1493 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 1494 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 1495 } 1496 1497 // 2nd pattern to match BSTRINS: 1498 // R = or (and X, mask0), (shl (and Y, mask1), lsb) 1499 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) 1500 // => 1501 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) 1502 if (N0.getOpcode() == ISD::AND && 1503 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 1504 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 1505 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && 1506 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 1507 (Shamt = CN1->getZExtValue()) == MaskIdx0 && 1508 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 1509 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && 1510 MaskLen0 == MaskLen1 && MaskIdx1 == 0 && 1511 (MaskIdx0 + MaskLen0 <= ValBits)) { 1512 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n"); 1513 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 1514 N1.getOperand(0).getOperand(0), 1515 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 1516 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 1517 } 1518 1519 // 3rd pattern to match BSTRINS: 1520 // R = or (and X, mask0), (and Y, mask1) 1521 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 1522 // => 1523 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb 1524 // where msb = lsb + size - 1 1525 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && 1526 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 1527 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 1528 (MaskIdx0 + MaskLen0 <= 64) && 1529 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) && 1530 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { 1531 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n"); 1532 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 1533 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1, 1534 DAG.getConstant(MaskIdx0, DL, GRLenVT)), 1535 DAG.getConstant(ValBits == 32 1536 ? (MaskIdx0 + (MaskLen0 & 31) - 1) 1537 : (MaskIdx0 + MaskLen0 - 1), 1538 DL, GRLenVT), 1539 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 1540 } 1541 1542 // 4th pattern to match BSTRINS: 1543 // R = or (and X, mask), (shl Y, shamt) 1544 // where mask = (2**shamt - 1) 1545 // => 1546 // R = BSTRINS X, Y, ValBits - 1, shamt 1547 // where ValBits = 32 or 64 1548 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && 1549 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 1550 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) && 1551 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 1552 (Shamt = CN1->getZExtValue()) == MaskLen0 && 1553 (MaskIdx0 + MaskLen0 <= ValBits)) { 1554 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n"); 1555 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 1556 N1.getOperand(0), 1557 DAG.getConstant((ValBits - 1), DL, GRLenVT), 1558 DAG.getConstant(Shamt, DL, GRLenVT)); 1559 } 1560 1561 // 5th pattern to match BSTRINS: 1562 // R = or (and X, mask), const 1563 // where ~mask = (2**size - 1) << lsb, mask & const = 0 1564 // => 1565 // R = BSTRINS X, (const >> lsb), msb, lsb 1566 // where msb = lsb + size - 1 1567 if (N0.getOpcode() == ISD::AND && 1568 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 1569 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 1570 (CN1 = dyn_cast<ConstantSDNode>(N1)) && 1571 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { 1572 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n"); 1573 return DAG.getNode( 1574 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 1575 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), 1576 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 1577 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 1578 } 1579 1580 // 6th pattern. 1581 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten 1582 // by the incoming bits are known to be zero. 1583 // => 1584 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt 1585 // 1586 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th 1587 // pattern is more common than the 1st. So we put the 1st before the 6th in 1588 // order to match as many nodes as possible. 1589 ConstantSDNode *CNMask, *CNShamt; 1590 unsigned MaskIdx, MaskLen; 1591 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && 1592 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 1593 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && 1594 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 1595 CNShamt->getZExtValue() + MaskLen <= ValBits) { 1596 Shamt = CNShamt->getZExtValue(); 1597 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); 1598 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 1599 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n"); 1600 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 1601 N1.getOperand(0).getOperand(0), 1602 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT), 1603 DAG.getConstant(Shamt, DL, GRLenVT)); 1604 } 1605 } 1606 1607 // 7th pattern. 1608 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be 1609 // overwritten by the incoming bits are known to be zero. 1610 // => 1611 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx 1612 // 1613 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd 1614 // before the 7th in order to match as many nodes as possible. 1615 if (N1.getOpcode() == ISD::AND && 1616 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 1617 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && 1618 N1.getOperand(0).getOpcode() == ISD::SHL && 1619 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 1620 CNShamt->getZExtValue() == MaskIdx) { 1621 APInt ShMask(ValBits, CNMask->getZExtValue()); 1622 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 1623 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n"); 1624 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 1625 N1.getOperand(0).getOperand(0), 1626 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 1627 DAG.getConstant(MaskIdx, DL, GRLenVT)); 1628 } 1629 } 1630 1631 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. 1632 if (!SwapAndRetried) { 1633 std::swap(N0, N1); 1634 SwapAndRetried = true; 1635 goto Retry; 1636 } 1637 1638 SwapAndRetried = false; 1639 Retry2: 1640 // 8th pattern. 1641 // a = b | (c & shifted_mask), where all positions in b to be overwritten by 1642 // the incoming bits are known to be zero. 1643 // => 1644 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx 1645 // 1646 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So 1647 // we put it here in order to match as many nodes as possible or generate less 1648 // instructions. 1649 if (N1.getOpcode() == ISD::AND && 1650 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 1651 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) { 1652 APInt ShMask(ValBits, CNMask->getZExtValue()); 1653 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 1654 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n"); 1655 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 1656 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), 1657 N1->getOperand(0), 1658 DAG.getConstant(MaskIdx, DL, GRLenVT)), 1659 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 1660 DAG.getConstant(MaskIdx, DL, GRLenVT)); 1661 } 1662 } 1663 // Swap N0/N1 and retry. 1664 if (!SwapAndRetried) { 1665 std::swap(N0, N1); 1666 SwapAndRetried = true; 1667 goto Retry2; 1668 } 1669 1670 return SDValue(); 1671 } 1672 1673 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. 1674 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, 1675 TargetLowering::DAGCombinerInfo &DCI, 1676 const LoongArchSubtarget &Subtarget) { 1677 if (DCI.isBeforeLegalizeOps()) 1678 return SDValue(); 1679 1680 SDValue Src = N->getOperand(0); 1681 if (Src.getOpcode() != LoongArchISD::REVB_2W) 1682 return SDValue(); 1683 1684 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0), 1685 Src.getOperand(0)); 1686 } 1687 1688 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, 1689 DAGCombinerInfo &DCI) const { 1690 SelectionDAG &DAG = DCI.DAG; 1691 switch (N->getOpcode()) { 1692 default: 1693 break; 1694 case ISD::AND: 1695 return performANDCombine(N, DAG, DCI, Subtarget); 1696 case ISD::OR: 1697 return performORCombine(N, DAG, DCI, Subtarget); 1698 case ISD::SRL: 1699 return performSRLCombine(N, DAG, DCI, Subtarget); 1700 case LoongArchISD::BITREV_W: 1701 return performBITREV_WCombine(N, DAG, DCI, Subtarget); 1702 } 1703 return SDValue(); 1704 } 1705 1706 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, 1707 MachineBasicBlock *MBB) { 1708 if (!ZeroDivCheck) 1709 return MBB; 1710 1711 // Build instructions: 1712 // MBB: 1713 // div(or mod) $dst, $dividend, $divisor 1714 // bnez $divisor, SinkMBB 1715 // BreakMBB: 1716 // break 7 // BRK_DIVZERO 1717 // SinkMBB: 1718 // fallthrough 1719 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 1720 MachineFunction::iterator It = ++MBB->getIterator(); 1721 MachineFunction *MF = MBB->getParent(); 1722 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB); 1723 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); 1724 MF->insert(It, BreakMBB); 1725 MF->insert(It, SinkMBB); 1726 1727 // Transfer the remainder of MBB and its successor edges to SinkMBB. 1728 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end()); 1729 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 1730 1731 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); 1732 DebugLoc DL = MI.getDebugLoc(); 1733 MachineOperand &Divisor = MI.getOperand(2); 1734 Register DivisorReg = Divisor.getReg(); 1735 1736 // MBB: 1737 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ)) 1738 .addReg(DivisorReg, getKillRegState(Divisor.isKill())) 1739 .addMBB(SinkMBB); 1740 MBB->addSuccessor(BreakMBB); 1741 MBB->addSuccessor(SinkMBB); 1742 1743 // BreakMBB: 1744 // See linux header file arch/loongarch/include/uapi/asm/break.h for the 1745 // definition of BRK_DIVZERO. 1746 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/); 1747 BreakMBB->addSuccessor(SinkMBB); 1748 1749 // Clear Divisor's kill flag. 1750 Divisor.setIsKill(false); 1751 1752 return SinkMBB; 1753 } 1754 1755 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( 1756 MachineInstr &MI, MachineBasicBlock *BB) const { 1757 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 1758 DebugLoc DL = MI.getDebugLoc(); 1759 1760 switch (MI.getOpcode()) { 1761 default: 1762 llvm_unreachable("Unexpected instr type to insert"); 1763 case LoongArch::DIV_W: 1764 case LoongArch::DIV_WU: 1765 case LoongArch::MOD_W: 1766 case LoongArch::MOD_WU: 1767 case LoongArch::DIV_D: 1768 case LoongArch::DIV_DU: 1769 case LoongArch::MOD_D: 1770 case LoongArch::MOD_DU: 1771 return insertDivByZeroTrap(MI, BB); 1772 break; 1773 case LoongArch::WRFCSR: { 1774 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR), 1775 LoongArch::FCSR0 + MI.getOperand(0).getImm()) 1776 .addReg(MI.getOperand(1).getReg()); 1777 MI.eraseFromParent(); 1778 return BB; 1779 } 1780 case LoongArch::RDFCSR: { 1781 MachineInstr *ReadFCSR = 1782 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR), 1783 MI.getOperand(0).getReg()) 1784 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm()); 1785 ReadFCSR->getOperand(1).setIsUndef(); 1786 MI.eraseFromParent(); 1787 return BB; 1788 } 1789 } 1790 } 1791 1792 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( 1793 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 1794 unsigned *Fast) const { 1795 if (!Subtarget.hasUAL()) 1796 return false; 1797 1798 // TODO: set reasonable speed number. 1799 if (Fast) 1800 *Fast = 1; 1801 return true; 1802 } 1803 1804 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { 1805 switch ((LoongArchISD::NodeType)Opcode) { 1806 case LoongArchISD::FIRST_NUMBER: 1807 break; 1808 1809 #define NODE_NAME_CASE(node) \ 1810 case LoongArchISD::node: \ 1811 return "LoongArchISD::" #node; 1812 1813 // TODO: Add more target-dependent nodes later. 1814 NODE_NAME_CASE(CALL) 1815 NODE_NAME_CASE(RET) 1816 NODE_NAME_CASE(TAIL) 1817 NODE_NAME_CASE(SLL_W) 1818 NODE_NAME_CASE(SRA_W) 1819 NODE_NAME_CASE(SRL_W) 1820 NODE_NAME_CASE(BSTRINS) 1821 NODE_NAME_CASE(BSTRPICK) 1822 NODE_NAME_CASE(MOVGR2FR_W_LA64) 1823 NODE_NAME_CASE(MOVFR2GR_S_LA64) 1824 NODE_NAME_CASE(FTINT) 1825 NODE_NAME_CASE(REVB_2H) 1826 NODE_NAME_CASE(REVB_2W) 1827 NODE_NAME_CASE(BITREV_4B) 1828 NODE_NAME_CASE(BITREV_W) 1829 NODE_NAME_CASE(ROTR_W) 1830 NODE_NAME_CASE(ROTL_W) 1831 NODE_NAME_CASE(CLZ_W) 1832 NODE_NAME_CASE(CTZ_W) 1833 NODE_NAME_CASE(DBAR) 1834 NODE_NAME_CASE(IBAR) 1835 NODE_NAME_CASE(BREAK) 1836 NODE_NAME_CASE(SYSCALL) 1837 NODE_NAME_CASE(CRC_W_B_W) 1838 NODE_NAME_CASE(CRC_W_H_W) 1839 NODE_NAME_CASE(CRC_W_W_W) 1840 NODE_NAME_CASE(CRC_W_D_W) 1841 NODE_NAME_CASE(CRCC_W_B_W) 1842 NODE_NAME_CASE(CRCC_W_H_W) 1843 NODE_NAME_CASE(CRCC_W_W_W) 1844 NODE_NAME_CASE(CRCC_W_D_W) 1845 NODE_NAME_CASE(CSRRD) 1846 NODE_NAME_CASE(CSRWR) 1847 NODE_NAME_CASE(CSRXCHG) 1848 NODE_NAME_CASE(IOCSRRD_B) 1849 NODE_NAME_CASE(IOCSRRD_H) 1850 NODE_NAME_CASE(IOCSRRD_W) 1851 NODE_NAME_CASE(IOCSRRD_D) 1852 NODE_NAME_CASE(IOCSRWR_B) 1853 NODE_NAME_CASE(IOCSRWR_H) 1854 NODE_NAME_CASE(IOCSRWR_W) 1855 NODE_NAME_CASE(IOCSRWR_D) 1856 NODE_NAME_CASE(CPUCFG) 1857 NODE_NAME_CASE(MOVGR2FCSR) 1858 NODE_NAME_CASE(MOVFCSR2GR) 1859 NODE_NAME_CASE(CACOP_D) 1860 NODE_NAME_CASE(CACOP_W) 1861 } 1862 #undef NODE_NAME_CASE 1863 return nullptr; 1864 } 1865 1866 //===----------------------------------------------------------------------===// 1867 // Calling Convention Implementation 1868 //===----------------------------------------------------------------------===// 1869 1870 // Eight general-purpose registers a0-a7 used for passing integer arguments, 1871 // with a0-a1 reused to return values. Generally, the GPRs are used to pass 1872 // fixed-point arguments, and floating-point arguments when no FPR is available 1873 // or with soft float ABI. 1874 const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, 1875 LoongArch::R7, LoongArch::R8, LoongArch::R9, 1876 LoongArch::R10, LoongArch::R11}; 1877 // Eight floating-point registers fa0-fa7 used for passing floating-point 1878 // arguments, and fa0-fa1 are also used to return values. 1879 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, 1880 LoongArch::F3, LoongArch::F4, LoongArch::F5, 1881 LoongArch::F6, LoongArch::F7}; 1882 // FPR32 and FPR64 alias each other. 1883 const MCPhysReg ArgFPR64s[] = { 1884 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, 1885 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; 1886 1887 // Pass a 2*GRLen argument that has been split into two GRLen values through 1888 // registers or the stack as necessary. 1889 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, 1890 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, 1891 unsigned ValNo2, MVT ValVT2, MVT LocVT2, 1892 ISD::ArgFlagsTy ArgFlags2) { 1893 unsigned GRLenInBytes = GRLen / 8; 1894 if (Register Reg = State.AllocateReg(ArgGPRs)) { 1895 // At least one half can be passed via register. 1896 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 1897 VA1.getLocVT(), CCValAssign::Full)); 1898 } else { 1899 // Both halves must be passed on the stack, with proper alignment. 1900 Align StackAlign = 1901 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 1902 State.addLoc( 1903 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 1904 State.AllocateStack(GRLenInBytes, StackAlign), 1905 VA1.getLocVT(), CCValAssign::Full)); 1906 State.addLoc(CCValAssign::getMem( 1907 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), 1908 LocVT2, CCValAssign::Full)); 1909 return false; 1910 } 1911 if (Register Reg = State.AllocateReg(ArgGPRs)) { 1912 // The second half can also be passed via register. 1913 State.addLoc( 1914 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 1915 } else { 1916 // The second half is passed via the stack, without additional alignment. 1917 State.addLoc(CCValAssign::getMem( 1918 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), 1919 LocVT2, CCValAssign::Full)); 1920 } 1921 return false; 1922 } 1923 1924 // Implements the LoongArch calling convention. Returns true upon failure. 1925 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, 1926 unsigned ValNo, MVT ValVT, 1927 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 1928 CCState &State, bool IsFixed, bool IsRet, 1929 Type *OrigTy) { 1930 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits(); 1931 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen"); 1932 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64; 1933 MVT LocVT = ValVT; 1934 1935 // Any return value split into more than two values can't be returned 1936 // directly. 1937 if (IsRet && ValNo > 1) 1938 return true; 1939 1940 // If passing a variadic argument, or if no FPR is available. 1941 bool UseGPRForFloat = true; 1942 1943 switch (ABI) { 1944 default: 1945 llvm_unreachable("Unexpected ABI"); 1946 case LoongArchABI::ABI_ILP32S: 1947 case LoongArchABI::ABI_ILP32F: 1948 case LoongArchABI::ABI_LP64F: 1949 report_fatal_error("Unimplemented ABI"); 1950 break; 1951 case LoongArchABI::ABI_ILP32D: 1952 case LoongArchABI::ABI_LP64D: 1953 UseGPRForFloat = !IsFixed; 1954 break; 1955 case LoongArchABI::ABI_LP64S: 1956 break; 1957 } 1958 1959 // FPR32 and FPR64 alias each other. 1960 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) 1961 UseGPRForFloat = true; 1962 1963 if (UseGPRForFloat && ValVT == MVT::f32) { 1964 LocVT = GRLenVT; 1965 LocInfo = CCValAssign::BCvt; 1966 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) { 1967 LocVT = MVT::i64; 1968 LocInfo = CCValAssign::BCvt; 1969 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) { 1970 // TODO: Handle passing f64 on LA32 with D feature. 1971 report_fatal_error("Passing f64 with GPR on LA32 is undefined"); 1972 } 1973 1974 // If this is a variadic argument, the LoongArch calling convention requires 1975 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8 1976 // byte alignment. An aligned register should be used regardless of whether 1977 // the original argument was split during legalisation or not. The argument 1978 // will not be passed by registers if the original type is larger than 1979 // 2*GRLen, so the register alignment rule does not apply. 1980 unsigned TwoGRLenInBytes = (2 * GRLen) / 8; 1981 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes && 1982 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) { 1983 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 1984 // Skip 'odd' register if necessary. 1985 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) 1986 State.AllocateReg(ArgGPRs); 1987 } 1988 1989 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 1990 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 1991 State.getPendingArgFlags(); 1992 1993 assert(PendingLocs.size() == PendingArgFlags.size() && 1994 "PendingLocs and PendingArgFlags out of sync"); 1995 1996 // Split arguments might be passed indirectly, so keep track of the pending 1997 // values. 1998 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 1999 LocVT = GRLenVT; 2000 LocInfo = CCValAssign::Indirect; 2001 PendingLocs.push_back( 2002 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 2003 PendingArgFlags.push_back(ArgFlags); 2004 if (!ArgFlags.isSplitEnd()) { 2005 return false; 2006 } 2007 } 2008 2009 // If the split argument only had two elements, it should be passed directly 2010 // in registers or on the stack. 2011 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && 2012 PendingLocs.size() <= 2) { 2013 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 2014 // Apply the normal calling convention rules to the first half of the 2015 // split argument. 2016 CCValAssign VA = PendingLocs[0]; 2017 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 2018 PendingLocs.clear(); 2019 PendingArgFlags.clear(); 2020 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT, 2021 ArgFlags); 2022 } 2023 2024 // Allocate to a register if possible, or else a stack slot. 2025 Register Reg; 2026 unsigned StoreSizeBytes = GRLen / 8; 2027 Align StackAlign = Align(GRLen / 8); 2028 2029 if (ValVT == MVT::f32 && !UseGPRForFloat) 2030 Reg = State.AllocateReg(ArgFPR32s); 2031 else if (ValVT == MVT::f64 && !UseGPRForFloat) 2032 Reg = State.AllocateReg(ArgFPR64s); 2033 else 2034 Reg = State.AllocateReg(ArgGPRs); 2035 2036 unsigned StackOffset = 2037 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); 2038 2039 // If we reach this point and PendingLocs is non-empty, we must be at the 2040 // end of a split argument that must be passed indirectly. 2041 if (!PendingLocs.empty()) { 2042 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 2043 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 2044 for (auto &It : PendingLocs) { 2045 if (Reg) 2046 It.convertToReg(Reg); 2047 else 2048 It.convertToMem(StackOffset); 2049 State.addLoc(It); 2050 } 2051 PendingLocs.clear(); 2052 PendingArgFlags.clear(); 2053 return false; 2054 } 2055 assert((!UseGPRForFloat || LocVT == GRLenVT) && 2056 "Expected an GRLenVT at this stage"); 2057 2058 if (Reg) { 2059 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2060 return false; 2061 } 2062 2063 // When a floating-point value is passed on the stack, no bit-cast is needed. 2064 if (ValVT.isFloatingPoint()) { 2065 LocVT = ValVT; 2066 LocInfo = CCValAssign::Full; 2067 } 2068 2069 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 2070 return false; 2071 } 2072 2073 void LoongArchTargetLowering::analyzeInputArgs( 2074 MachineFunction &MF, CCState &CCInfo, 2075 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 2076 LoongArchCCAssignFn Fn) const { 2077 FunctionType *FType = MF.getFunction().getFunctionType(); 2078 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 2079 MVT ArgVT = Ins[i].VT; 2080 Type *ArgTy = nullptr; 2081 if (IsRet) 2082 ArgTy = FType->getReturnType(); 2083 else if (Ins[i].isOrigArg()) 2084 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 2085 LoongArchABI::ABI ABI = 2086 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 2087 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags, 2088 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { 2089 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT 2090 << '\n'); 2091 llvm_unreachable(""); 2092 } 2093 } 2094 } 2095 2096 void LoongArchTargetLowering::analyzeOutputArgs( 2097 MachineFunction &MF, CCState &CCInfo, 2098 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 2099 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const { 2100 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2101 MVT ArgVT = Outs[i].VT; 2102 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 2103 LoongArchABI::ABI ABI = 2104 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 2105 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags, 2106 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 2107 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT 2108 << "\n"); 2109 llvm_unreachable(""); 2110 } 2111 } 2112 } 2113 2114 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 2115 // values. 2116 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 2117 const CCValAssign &VA, const SDLoc &DL) { 2118 switch (VA.getLocInfo()) { 2119 default: 2120 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 2121 case CCValAssign::Full: 2122 case CCValAssign::Indirect: 2123 break; 2124 case CCValAssign::BCvt: 2125 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 2126 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val); 2127 else 2128 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 2129 break; 2130 } 2131 return Val; 2132 } 2133 2134 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 2135 const CCValAssign &VA, const SDLoc &DL, 2136 const LoongArchTargetLowering &TLI) { 2137 MachineFunction &MF = DAG.getMachineFunction(); 2138 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2139 EVT LocVT = VA.getLocVT(); 2140 SDValue Val; 2141 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 2142 Register VReg = RegInfo.createVirtualRegister(RC); 2143 RegInfo.addLiveIn(VA.getLocReg(), VReg); 2144 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 2145 2146 return convertLocVTToValVT(DAG, Val, VA, DL); 2147 } 2148 2149 // The caller is responsible for loading the full value if the argument is 2150 // passed with CCValAssign::Indirect. 2151 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 2152 const CCValAssign &VA, const SDLoc &DL) { 2153 MachineFunction &MF = DAG.getMachineFunction(); 2154 MachineFrameInfo &MFI = MF.getFrameInfo(); 2155 EVT ValVT = VA.getValVT(); 2156 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), 2157 /*IsImmutable=*/true); 2158 SDValue FIN = DAG.getFrameIndex( 2159 FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0))); 2160 2161 ISD::LoadExtType ExtType; 2162 switch (VA.getLocInfo()) { 2163 default: 2164 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 2165 case CCValAssign::Full: 2166 case CCValAssign::Indirect: 2167 case CCValAssign::BCvt: 2168 ExtType = ISD::NON_EXTLOAD; 2169 break; 2170 } 2171 return DAG.getExtLoad( 2172 ExtType, DL, VA.getLocVT(), Chain, FIN, 2173 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 2174 } 2175 2176 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 2177 const CCValAssign &VA, const SDLoc &DL) { 2178 EVT LocVT = VA.getLocVT(); 2179 2180 switch (VA.getLocInfo()) { 2181 default: 2182 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 2183 case CCValAssign::Full: 2184 break; 2185 case CCValAssign::BCvt: 2186 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 2187 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val); 2188 else 2189 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 2190 break; 2191 } 2192 return Val; 2193 } 2194 2195 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 2196 CCValAssign::LocInfo LocInfo, 2197 ISD::ArgFlagsTy ArgFlags, CCState &State) { 2198 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 2199 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim 2200 // s0 s1 s2 s3 s4 s5 s6 s7 s8 2201 static const MCPhysReg GPRList[] = { 2202 LoongArch::R23, LoongArch::R24, LoongArch::R25, 2203 LoongArch::R26, LoongArch::R27, LoongArch::R28, 2204 LoongArch::R29, LoongArch::R30, LoongArch::R31}; 2205 if (unsigned Reg = State.AllocateReg(GPRList)) { 2206 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2207 return false; 2208 } 2209 } 2210 2211 if (LocVT == MVT::f32) { 2212 // Pass in STG registers: F1, F2, F3, F4 2213 // fs0,fs1,fs2,fs3 2214 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, 2215 LoongArch::F26, LoongArch::F27}; 2216 if (unsigned Reg = State.AllocateReg(FPR32List)) { 2217 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2218 return false; 2219 } 2220 } 2221 2222 if (LocVT == MVT::f64) { 2223 // Pass in STG registers: D1, D2, D3, D4 2224 // fs4,fs5,fs6,fs7 2225 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, 2226 LoongArch::F30_64, LoongArch::F31_64}; 2227 if (unsigned Reg = State.AllocateReg(FPR64List)) { 2228 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2229 return false; 2230 } 2231 } 2232 2233 report_fatal_error("No registers left in GHC calling convention"); 2234 return true; 2235 } 2236 2237 // Transform physical registers into virtual registers. 2238 SDValue LoongArchTargetLowering::LowerFormalArguments( 2239 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 2240 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 2241 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2242 2243 MachineFunction &MF = DAG.getMachineFunction(); 2244 2245 switch (CallConv) { 2246 default: 2247 llvm_unreachable("Unsupported calling convention"); 2248 case CallingConv::C: 2249 case CallingConv::Fast: 2250 break; 2251 case CallingConv::GHC: 2252 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) || 2253 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD)) 2254 report_fatal_error( 2255 "GHC calling convention requires the F and D extensions"); 2256 } 2257 2258 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2259 MVT GRLenVT = Subtarget.getGRLenVT(); 2260 unsigned GRLenInBytes = Subtarget.getGRLen() / 8; 2261 // Used with varargs to acumulate store chains. 2262 std::vector<SDValue> OutChains; 2263 2264 // Assign locations to all of the incoming arguments. 2265 SmallVector<CCValAssign> ArgLocs; 2266 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 2267 2268 if (CallConv == CallingConv::GHC) 2269 CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC); 2270 else 2271 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch); 2272 2273 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2274 CCValAssign &VA = ArgLocs[i]; 2275 SDValue ArgValue; 2276 if (VA.isRegLoc()) 2277 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 2278 else 2279 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 2280 if (VA.getLocInfo() == CCValAssign::Indirect) { 2281 // If the original argument was split and passed by reference, we need to 2282 // load all parts of it here (using the same address). 2283 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 2284 MachinePointerInfo())); 2285 unsigned ArgIndex = Ins[i].OrigArgIndex; 2286 unsigned ArgPartOffset = Ins[i].PartOffset; 2287 assert(ArgPartOffset == 0); 2288 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 2289 CCValAssign &PartVA = ArgLocs[i + 1]; 2290 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; 2291 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 2292 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); 2293 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 2294 MachinePointerInfo())); 2295 ++i; 2296 } 2297 continue; 2298 } 2299 InVals.push_back(ArgValue); 2300 } 2301 2302 if (IsVarArg) { 2303 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs); 2304 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 2305 const TargetRegisterClass *RC = &LoongArch::GPRRegClass; 2306 MachineFrameInfo &MFI = MF.getFrameInfo(); 2307 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2308 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>(); 2309 2310 // Offset of the first variable argument from stack pointer, and size of 2311 // the vararg save area. For now, the varargs save area is either zero or 2312 // large enough to hold a0-a7. 2313 int VaArgOffset, VarArgsSaveSize; 2314 2315 // If all registers are allocated, then all varargs must be passed on the 2316 // stack and we don't need to save any argregs. 2317 if (ArgRegs.size() == Idx) { 2318 VaArgOffset = CCInfo.getStackSize(); 2319 VarArgsSaveSize = 0; 2320 } else { 2321 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx); 2322 VaArgOffset = -VarArgsSaveSize; 2323 } 2324 2325 // Record the frame index of the first variable argument 2326 // which is a value necessary to VASTART. 2327 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); 2328 LoongArchFI->setVarArgsFrameIndex(FI); 2329 2330 // If saving an odd number of registers then create an extra stack slot to 2331 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures 2332 // offsets to even-numbered registered remain 2*GRLen-aligned. 2333 if (Idx % 2) { 2334 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes, 2335 true); 2336 VarArgsSaveSize += GRLenInBytes; 2337 } 2338 2339 // Copy the integer registers that may have been used for passing varargs 2340 // to the vararg save area. 2341 for (unsigned I = Idx; I < ArgRegs.size(); 2342 ++I, VaArgOffset += GRLenInBytes) { 2343 const Register Reg = RegInfo.createVirtualRegister(RC); 2344 RegInfo.addLiveIn(ArgRegs[I], Reg); 2345 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT); 2346 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); 2347 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2348 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 2349 MachinePointerInfo::getFixedStack(MF, FI)); 2350 cast<StoreSDNode>(Store.getNode()) 2351 ->getMemOperand() 2352 ->setValue((Value *)nullptr); 2353 OutChains.push_back(Store); 2354 } 2355 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); 2356 } 2357 2358 // All stores are grouped in one node to allow the matching between 2359 // the size of Ins and InVals. This only happens for vararg functions. 2360 if (!OutChains.empty()) { 2361 OutChains.push_back(Chain); 2362 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 2363 } 2364 2365 return Chain; 2366 } 2367 2368 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 2369 return CI->isTailCall(); 2370 } 2371 2372 // Check if the return value is used as only a return value, as otherwise 2373 // we can't perform a tail-call. 2374 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N, 2375 SDValue &Chain) const { 2376 if (N->getNumValues() != 1) 2377 return false; 2378 if (!N->hasNUsesOfValue(1, 0)) 2379 return false; 2380 2381 SDNode *Copy = *N->use_begin(); 2382 if (Copy->getOpcode() != ISD::CopyToReg) 2383 return false; 2384 2385 // If the ISD::CopyToReg has a glue operand, we conservatively assume it 2386 // isn't safe to perform a tail call. 2387 if (Copy->getGluedNode()) 2388 return false; 2389 2390 // The copy must be used by a LoongArchISD::RET, and nothing else. 2391 bool HasRet = false; 2392 for (SDNode *Node : Copy->uses()) { 2393 if (Node->getOpcode() != LoongArchISD::RET) 2394 return false; 2395 HasRet = true; 2396 } 2397 2398 if (!HasRet) 2399 return false; 2400 2401 Chain = Copy->getOperand(0); 2402 return true; 2403 } 2404 2405 // Check whether the call is eligible for tail call optimization. 2406 bool LoongArchTargetLowering::isEligibleForTailCallOptimization( 2407 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 2408 const SmallVectorImpl<CCValAssign> &ArgLocs) const { 2409 2410 auto CalleeCC = CLI.CallConv; 2411 auto &Outs = CLI.Outs; 2412 auto &Caller = MF.getFunction(); 2413 auto CallerCC = Caller.getCallingConv(); 2414 2415 // Do not tail call opt if the stack is used to pass parameters. 2416 if (CCInfo.getStackSize() != 0) 2417 return false; 2418 2419 // Do not tail call opt if any parameters need to be passed indirectly. 2420 for (auto &VA : ArgLocs) 2421 if (VA.getLocInfo() == CCValAssign::Indirect) 2422 return false; 2423 2424 // Do not tail call opt if either caller or callee uses struct return 2425 // semantics. 2426 auto IsCallerStructRet = Caller.hasStructRetAttr(); 2427 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 2428 if (IsCallerStructRet || IsCalleeStructRet) 2429 return false; 2430 2431 // Do not tail call opt if either the callee or caller has a byval argument. 2432 for (auto &Arg : Outs) 2433 if (Arg.Flags.isByVal()) 2434 return false; 2435 2436 // The callee has to preserve all registers the caller needs to preserve. 2437 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2438 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 2439 if (CalleeCC != CallerCC) { 2440 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 2441 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 2442 return false; 2443 } 2444 return true; 2445 } 2446 2447 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { 2448 return DAG.getDataLayout().getPrefTypeAlign( 2449 VT.getTypeForEVT(*DAG.getContext())); 2450 } 2451 2452 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 2453 // and output parameter nodes. 2454 SDValue 2455 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, 2456 SmallVectorImpl<SDValue> &InVals) const { 2457 SelectionDAG &DAG = CLI.DAG; 2458 SDLoc &DL = CLI.DL; 2459 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 2460 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 2461 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 2462 SDValue Chain = CLI.Chain; 2463 SDValue Callee = CLI.Callee; 2464 CallingConv::ID CallConv = CLI.CallConv; 2465 bool IsVarArg = CLI.IsVarArg; 2466 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2467 MVT GRLenVT = Subtarget.getGRLenVT(); 2468 bool &IsTailCall = CLI.IsTailCall; 2469 2470 MachineFunction &MF = DAG.getMachineFunction(); 2471 2472 // Analyze the operands of the call, assigning locations to each operand. 2473 SmallVector<CCValAssign> ArgLocs; 2474 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 2475 2476 if (CallConv == CallingConv::GHC) 2477 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC); 2478 else 2479 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch); 2480 2481 // Check if it's really possible to do a tail call. 2482 if (IsTailCall) 2483 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 2484 2485 if (IsTailCall) 2486 ++NumTailCalls; 2487 else if (CLI.CB && CLI.CB->isMustTailCall()) 2488 report_fatal_error("failed to perform tail call elimination on a call " 2489 "site marked musttail"); 2490 2491 // Get a count of how many bytes are to be pushed on the stack. 2492 unsigned NumBytes = ArgCCInfo.getStackSize(); 2493 2494 // Create local copies for byval args. 2495 SmallVector<SDValue> ByValArgs; 2496 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2497 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2498 if (!Flags.isByVal()) 2499 continue; 2500 2501 SDValue Arg = OutVals[i]; 2502 unsigned Size = Flags.getByValSize(); 2503 Align Alignment = Flags.getNonZeroByValAlign(); 2504 2505 int FI = 2506 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 2507 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2508 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT); 2509 2510 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 2511 /*IsVolatile=*/false, 2512 /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall, 2513 MachinePointerInfo(), MachinePointerInfo()); 2514 ByValArgs.push_back(FIPtr); 2515 } 2516 2517 if (!IsTailCall) 2518 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 2519 2520 // Copy argument values to their designated locations. 2521 SmallVector<std::pair<Register, SDValue>> RegsToPass; 2522 SmallVector<SDValue> MemOpChains; 2523 SDValue StackPtr; 2524 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 2525 CCValAssign &VA = ArgLocs[i]; 2526 SDValue ArgValue = OutVals[i]; 2527 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2528 2529 // Promote the value if needed. 2530 // For now, only handle fully promoted and indirect arguments. 2531 if (VA.getLocInfo() == CCValAssign::Indirect) { 2532 // Store the argument in a stack slot and pass its address. 2533 Align StackAlign = 2534 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), 2535 getPrefTypeAlign(ArgValue.getValueType(), DAG)); 2536 TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); 2537 // If the original argument was split and passed by reference, we need to 2538 // store the required parts of it here (and pass just one address). 2539 unsigned ArgIndex = Outs[i].OrigArgIndex; 2540 unsigned ArgPartOffset = Outs[i].PartOffset; 2541 assert(ArgPartOffset == 0); 2542 // Calculate the total size to store. We don't have access to what we're 2543 // actually storing other than performing the loop and collecting the 2544 // info. 2545 SmallVector<std::pair<SDValue, SDValue>> Parts; 2546 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 2547 SDValue PartValue = OutVals[i + 1]; 2548 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; 2549 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 2550 EVT PartVT = PartValue.getValueType(); 2551 2552 StoredSize += PartVT.getStoreSize(); 2553 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); 2554 Parts.push_back(std::make_pair(PartValue, Offset)); 2555 ++i; 2556 } 2557 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); 2558 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 2559 MemOpChains.push_back( 2560 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 2561 MachinePointerInfo::getFixedStack(MF, FI))); 2562 for (const auto &Part : Parts) { 2563 SDValue PartValue = Part.first; 2564 SDValue PartOffset = Part.second; 2565 SDValue Address = 2566 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); 2567 MemOpChains.push_back( 2568 DAG.getStore(Chain, DL, PartValue, Address, 2569 MachinePointerInfo::getFixedStack(MF, FI))); 2570 } 2571 ArgValue = SpillSlot; 2572 } else { 2573 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 2574 } 2575 2576 // Use local copy if it is a byval arg. 2577 if (Flags.isByVal()) 2578 ArgValue = ByValArgs[j++]; 2579 2580 if (VA.isRegLoc()) { 2581 // Queue up the argument copies and emit them at the end. 2582 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 2583 } else { 2584 assert(VA.isMemLoc() && "Argument not register or memory"); 2585 assert(!IsTailCall && "Tail call not allowed if stack is used " 2586 "for passing parameters"); 2587 2588 // Work out the address of the stack slot. 2589 if (!StackPtr.getNode()) 2590 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT); 2591 SDValue Address = 2592 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 2593 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 2594 2595 // Emit the store. 2596 MemOpChains.push_back( 2597 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 2598 } 2599 } 2600 2601 // Join the stores, which are independent of one another. 2602 if (!MemOpChains.empty()) 2603 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 2604 2605 SDValue Glue; 2606 2607 // Build a sequence of copy-to-reg nodes, chained and glued together. 2608 for (auto &Reg : RegsToPass) { 2609 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 2610 Glue = Chain.getValue(1); 2611 } 2612 2613 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 2614 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 2615 // split it and then direct call can be matched by PseudoCALL. 2616 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 2617 const GlobalValue *GV = S->getGlobal(); 2618 unsigned OpFlags = 2619 getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV) 2620 ? LoongArchII::MO_CALL 2621 : LoongArchII::MO_CALL_PLT; 2622 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags); 2623 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2624 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal( 2625 *MF.getFunction().getParent(), nullptr) 2626 ? LoongArchII::MO_CALL 2627 : LoongArchII::MO_CALL_PLT; 2628 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 2629 } 2630 2631 // The first call operand is the chain and the second is the target address. 2632 SmallVector<SDValue> Ops; 2633 Ops.push_back(Chain); 2634 Ops.push_back(Callee); 2635 2636 // Add argument registers to the end of the list so that they are 2637 // known live into the call. 2638 for (auto &Reg : RegsToPass) 2639 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 2640 2641 if (!IsTailCall) { 2642 // Add a register mask operand representing the call-preserved registers. 2643 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2644 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 2645 assert(Mask && "Missing call preserved mask for calling convention"); 2646 Ops.push_back(DAG.getRegisterMask(Mask)); 2647 } 2648 2649 // Glue the call to the argument copies, if any. 2650 if (Glue.getNode()) 2651 Ops.push_back(Glue); 2652 2653 // Emit the call. 2654 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 2655 2656 if (IsTailCall) { 2657 MF.getFrameInfo().setHasTailCall(); 2658 SDValue Ret = DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops); 2659 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 2660 return Ret; 2661 } 2662 2663 Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops); 2664 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 2665 Glue = Chain.getValue(1); 2666 2667 // Mark the end of the call, which is glued to the call itself. 2668 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 2669 Glue = Chain.getValue(1); 2670 2671 // Assign locations to each value returned by this call. 2672 SmallVector<CCValAssign> RVLocs; 2673 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 2674 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch); 2675 2676 // Copy all of the result registers out of their specified physreg. 2677 for (auto &VA : RVLocs) { 2678 // Copy the value out. 2679 SDValue RetValue = 2680 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 2681 // Glue the RetValue to the end of the call sequence. 2682 Chain = RetValue.getValue(1); 2683 Glue = RetValue.getValue(2); 2684 2685 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 2686 2687 InVals.push_back(RetValue); 2688 } 2689 2690 return Chain; 2691 } 2692 2693 bool LoongArchTargetLowering::CanLowerReturn( 2694 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 2695 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 2696 SmallVector<CCValAssign> RVLocs; 2697 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 2698 2699 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2700 LoongArchABI::ABI ABI = 2701 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 2702 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full, 2703 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, 2704 nullptr)) 2705 return false; 2706 } 2707 return true; 2708 } 2709 2710 SDValue LoongArchTargetLowering::LowerReturn( 2711 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 2712 const SmallVectorImpl<ISD::OutputArg> &Outs, 2713 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 2714 SelectionDAG &DAG) const { 2715 // Stores the assignment of the return value to a location. 2716 SmallVector<CCValAssign> RVLocs; 2717 2718 // Info about the registers and stack slot. 2719 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 2720 *DAG.getContext()); 2721 2722 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 2723 nullptr, CC_LoongArch); 2724 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 2725 report_fatal_error("GHC functions return void only"); 2726 SDValue Glue; 2727 SmallVector<SDValue, 4> RetOps(1, Chain); 2728 2729 // Copy the result values into the output registers. 2730 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 2731 CCValAssign &VA = RVLocs[i]; 2732 assert(VA.isRegLoc() && "Can only return in registers!"); 2733 2734 // Handle a 'normal' return. 2735 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL); 2736 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 2737 2738 // Guarantee that all emitted copies are stuck together. 2739 Glue = Chain.getValue(1); 2740 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2741 } 2742 2743 RetOps[0] = Chain; // Update chain. 2744 2745 // Add the glue node if we have it. 2746 if (Glue.getNode()) 2747 RetOps.push_back(Glue); 2748 2749 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps); 2750 } 2751 2752 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 2753 bool ForCodeSize) const { 2754 // TODO: Maybe need more checks here after vector extension is supported. 2755 if (VT == MVT::f32 && !Subtarget.hasBasicF()) 2756 return false; 2757 if (VT == MVT::f64 && !Subtarget.hasBasicD()) 2758 return false; 2759 return (Imm.isZero() || Imm.isExactlyValue(+1.0)); 2760 } 2761 2762 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const { 2763 return true; 2764 } 2765 2766 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const { 2767 return true; 2768 } 2769 2770 bool LoongArchTargetLowering::shouldInsertFencesForAtomic( 2771 const Instruction *I) const { 2772 if (!Subtarget.is64Bit()) 2773 return isa<LoadInst>(I) || isa<StoreInst>(I); 2774 2775 if (isa<LoadInst>(I)) 2776 return true; 2777 2778 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not 2779 // require fences beacuse we can use amswap_db.[w/d]. 2780 if (isa<StoreInst>(I)) { 2781 unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth(); 2782 return (Size == 8 || Size == 16); 2783 } 2784 2785 return false; 2786 } 2787 2788 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL, 2789 LLVMContext &Context, 2790 EVT VT) const { 2791 if (!VT.isVector()) 2792 return getPointerTy(DL); 2793 return VT.changeVectorElementTypeToInteger(); 2794 } 2795 2796 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { 2797 // TODO: Support vectors. 2798 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y); 2799 } 2800 2801 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 2802 const CallInst &I, 2803 MachineFunction &MF, 2804 unsigned Intrinsic) const { 2805 switch (Intrinsic) { 2806 default: 2807 return false; 2808 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32: 2809 case Intrinsic::loongarch_masked_atomicrmw_add_i32: 2810 case Intrinsic::loongarch_masked_atomicrmw_sub_i32: 2811 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: 2812 Info.opc = ISD::INTRINSIC_W_CHAIN; 2813 Info.memVT = MVT::i32; 2814 Info.ptrVal = I.getArgOperand(0); 2815 Info.offset = 0; 2816 Info.align = Align(4); 2817 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 2818 MachineMemOperand::MOVolatile; 2819 return true; 2820 // TODO: Add more Intrinsics later. 2821 } 2822 } 2823 2824 TargetLowering::AtomicExpansionKind 2825 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 2826 // TODO: Add more AtomicRMWInst that needs to be extended. 2827 2828 // Since floating-point operation requires a non-trivial set of data 2829 // operations, use CmpXChg to expand. 2830 if (AI->isFloatingPointOperation() || 2831 AI->getOperation() == AtomicRMWInst::UIncWrap || 2832 AI->getOperation() == AtomicRMWInst::UDecWrap) 2833 return AtomicExpansionKind::CmpXChg; 2834 2835 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 2836 if (Size == 8 || Size == 16) 2837 return AtomicExpansionKind::MaskedIntrinsic; 2838 return AtomicExpansionKind::None; 2839 } 2840 2841 static Intrinsic::ID 2842 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, 2843 AtomicRMWInst::BinOp BinOp) { 2844 if (GRLen == 64) { 2845 switch (BinOp) { 2846 default: 2847 llvm_unreachable("Unexpected AtomicRMW BinOp"); 2848 case AtomicRMWInst::Xchg: 2849 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64; 2850 case AtomicRMWInst::Add: 2851 return Intrinsic::loongarch_masked_atomicrmw_add_i64; 2852 case AtomicRMWInst::Sub: 2853 return Intrinsic::loongarch_masked_atomicrmw_sub_i64; 2854 case AtomicRMWInst::Nand: 2855 return Intrinsic::loongarch_masked_atomicrmw_nand_i64; 2856 case AtomicRMWInst::UMax: 2857 return Intrinsic::loongarch_masked_atomicrmw_umax_i64; 2858 case AtomicRMWInst::UMin: 2859 return Intrinsic::loongarch_masked_atomicrmw_umin_i64; 2860 case AtomicRMWInst::Max: 2861 return Intrinsic::loongarch_masked_atomicrmw_max_i64; 2862 case AtomicRMWInst::Min: 2863 return Intrinsic::loongarch_masked_atomicrmw_min_i64; 2864 // TODO: support other AtomicRMWInst. 2865 } 2866 } 2867 2868 if (GRLen == 32) { 2869 switch (BinOp) { 2870 default: 2871 llvm_unreachable("Unexpected AtomicRMW BinOp"); 2872 case AtomicRMWInst::Xchg: 2873 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32; 2874 case AtomicRMWInst::Add: 2875 return Intrinsic::loongarch_masked_atomicrmw_add_i32; 2876 case AtomicRMWInst::Sub: 2877 return Intrinsic::loongarch_masked_atomicrmw_sub_i32; 2878 case AtomicRMWInst::Nand: 2879 return Intrinsic::loongarch_masked_atomicrmw_nand_i32; 2880 // TODO: support other AtomicRMWInst. 2881 } 2882 } 2883 2884 llvm_unreachable("Unexpected GRLen\n"); 2885 } 2886 2887 TargetLowering::AtomicExpansionKind 2888 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( 2889 AtomicCmpXchgInst *CI) const { 2890 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 2891 if (Size == 8 || Size == 16) 2892 return AtomicExpansionKind::MaskedIntrinsic; 2893 return AtomicExpansionKind::None; 2894 } 2895 2896 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 2897 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 2898 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 2899 Value *Ordering = 2900 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(Ord)); 2901 2902 // TODO: Support cmpxchg on LA32. 2903 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; 2904 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 2905 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 2906 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 2907 Type *Tys[] = {AlignedAddr->getType()}; 2908 Function *MaskedCmpXchg = 2909 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 2910 Value *Result = Builder.CreateCall( 2911 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 2912 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 2913 return Result; 2914 } 2915 2916 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( 2917 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 2918 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 2919 unsigned GRLen = Subtarget.getGRLen(); 2920 Value *Ordering = 2921 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering())); 2922 Type *Tys[] = {AlignedAddr->getType()}; 2923 Function *LlwOpScwLoop = Intrinsic::getDeclaration( 2924 AI->getModule(), 2925 getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys); 2926 2927 if (GRLen == 64) { 2928 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 2929 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 2930 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 2931 } 2932 2933 Value *Result; 2934 2935 // Must pass the shift amount needed to sign extend the loaded value prior 2936 // to performing a signed comparison for min/max. ShiftAmt is the number of 2937 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which 2938 // is the number of bits to left+right shift the value in order to 2939 // sign-extend. 2940 if (AI->getOperation() == AtomicRMWInst::Min || 2941 AI->getOperation() == AtomicRMWInst::Max) { 2942 const DataLayout &DL = AI->getModule()->getDataLayout(); 2943 unsigned ValWidth = 2944 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 2945 Value *SextShamt = 2946 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt); 2947 Result = Builder.CreateCall(LlwOpScwLoop, 2948 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 2949 } else { 2950 Result = 2951 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 2952 } 2953 2954 if (GRLen == 64) 2955 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 2956 return Result; 2957 } 2958 2959 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( 2960 const MachineFunction &MF, EVT VT) const { 2961 VT = VT.getScalarType(); 2962 2963 if (!VT.isSimple()) 2964 return false; 2965 2966 switch (VT.getSimpleVT().SimpleTy) { 2967 case MVT::f32: 2968 case MVT::f64: 2969 return true; 2970 default: 2971 break; 2972 } 2973 2974 return false; 2975 } 2976 2977 Register LoongArchTargetLowering::getExceptionPointerRegister( 2978 const Constant *PersonalityFn) const { 2979 return LoongArch::R4; 2980 } 2981 2982 Register LoongArchTargetLowering::getExceptionSelectorRegister( 2983 const Constant *PersonalityFn) const { 2984 return LoongArch::R5; 2985 } 2986 2987 //===----------------------------------------------------------------------===// 2988 // LoongArch Inline Assembly Support 2989 //===----------------------------------------------------------------------===// 2990 2991 LoongArchTargetLowering::ConstraintType 2992 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { 2993 // LoongArch specific constraints in GCC: config/loongarch/constraints.md 2994 // 2995 // 'f': A floating-point register (if available). 2996 // 'k': A memory operand whose address is formed by a base register and 2997 // (optionally scaled) index register. 2998 // 'l': A signed 16-bit constant. 2999 // 'm': A memory operand whose address is formed by a base register and 3000 // offset that is suitable for use in instructions with the same 3001 // addressing mode as st.w and ld.w. 3002 // 'I': A signed 12-bit constant (for arithmetic instructions). 3003 // 'J': Integer zero. 3004 // 'K': An unsigned 12-bit constant (for logic instructions). 3005 // "ZB": An address that is held in a general-purpose register. The offset is 3006 // zero. 3007 // "ZC": A memory operand whose address is formed by a base register and 3008 // offset that is suitable for use in instructions with the same 3009 // addressing mode as ll.w and sc.w. 3010 if (Constraint.size() == 1) { 3011 switch (Constraint[0]) { 3012 default: 3013 break; 3014 case 'f': 3015 return C_RegisterClass; 3016 case 'l': 3017 case 'I': 3018 case 'J': 3019 case 'K': 3020 return C_Immediate; 3021 case 'k': 3022 return C_Memory; 3023 } 3024 } 3025 3026 if (Constraint == "ZC" || Constraint == "ZB") 3027 return C_Memory; 3028 3029 // 'm' is handled here. 3030 return TargetLowering::getConstraintType(Constraint); 3031 } 3032 3033 unsigned LoongArchTargetLowering::getInlineAsmMemConstraint( 3034 StringRef ConstraintCode) const { 3035 return StringSwitch<unsigned>(ConstraintCode) 3036 .Case("k", InlineAsm::Constraint_k) 3037 .Case("ZB", InlineAsm::Constraint_ZB) 3038 .Case("ZC", InlineAsm::Constraint_ZC) 3039 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode)); 3040 } 3041 3042 std::pair<unsigned, const TargetRegisterClass *> 3043 LoongArchTargetLowering::getRegForInlineAsmConstraint( 3044 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 3045 // First, see if this is a constraint that directly corresponds to a LoongArch 3046 // register class. 3047 if (Constraint.size() == 1) { 3048 switch (Constraint[0]) { 3049 case 'r': 3050 // TODO: Support fixed vectors up to GRLen? 3051 if (VT.isVector()) 3052 break; 3053 return std::make_pair(0U, &LoongArch::GPRRegClass); 3054 case 'f': 3055 if (Subtarget.hasBasicF() && VT == MVT::f32) 3056 return std::make_pair(0U, &LoongArch::FPR32RegClass); 3057 if (Subtarget.hasBasicD() && VT == MVT::f64) 3058 return std::make_pair(0U, &LoongArch::FPR64RegClass); 3059 if (Subtarget.hasExtLSX() && 3060 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT)) 3061 return std::make_pair(0U, &LoongArch::LSX128RegClass); 3062 if (Subtarget.hasExtLASX() && 3063 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT)) 3064 return std::make_pair(0U, &LoongArch::LASX256RegClass); 3065 break; 3066 default: 3067 break; 3068 } 3069 } 3070 3071 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen 3072 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm 3073 // constraints while the official register name is prefixed with a '$'. So we 3074 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.) 3075 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is 3076 // case insensitive, so no need to convert the constraint to upper case here. 3077 // 3078 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly 3079 // decode the usage of register name aliases into their official names. And 3080 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use 3081 // official register names. 3082 if (Constraint.startswith("{$r") || Constraint.startswith("{$f") || 3083 Constraint.startswith("{$vr") || Constraint.startswith("{$xr")) { 3084 bool IsFP = Constraint[2] == 'f'; 3085 std::pair<StringRef, StringRef> Temp = Constraint.split('$'); 3086 std::pair<unsigned, const TargetRegisterClass *> R; 3087 R = TargetLowering::getRegForInlineAsmConstraint( 3088 TRI, join_items("", Temp.first, Temp.second), VT); 3089 // Match those names to the widest floating point register type available. 3090 if (IsFP) { 3091 unsigned RegNo = R.first; 3092 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) { 3093 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) { 3094 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64; 3095 return std::make_pair(DReg, &LoongArch::FPR64RegClass); 3096 } 3097 } 3098 } 3099 return R; 3100 } 3101 3102 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 3103 } 3104 3105 void LoongArchTargetLowering::LowerAsmOperandForConstraint( 3106 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 3107 SelectionDAG &DAG) const { 3108 // Currently only support length 1 constraints. 3109 if (Constraint.length() == 1) { 3110 switch (Constraint[0]) { 3111 case 'l': 3112 // Validate & create a 16-bit signed immediate operand. 3113 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 3114 uint64_t CVal = C->getSExtValue(); 3115 if (isInt<16>(CVal)) 3116 Ops.push_back( 3117 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 3118 } 3119 return; 3120 case 'I': 3121 // Validate & create a 12-bit signed immediate operand. 3122 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 3123 uint64_t CVal = C->getSExtValue(); 3124 if (isInt<12>(CVal)) 3125 Ops.push_back( 3126 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 3127 } 3128 return; 3129 case 'J': 3130 // Validate & create an integer zero operand. 3131 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 3132 if (C->getZExtValue() == 0) 3133 Ops.push_back( 3134 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT())); 3135 return; 3136 case 'K': 3137 // Validate & create a 12-bit unsigned immediate operand. 3138 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 3139 uint64_t CVal = C->getZExtValue(); 3140 if (isUInt<12>(CVal)) 3141 Ops.push_back( 3142 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 3143 } 3144 return; 3145 default: 3146 break; 3147 } 3148 } 3149 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 3150 } 3151 3152 #define GET_REGISTER_MATCHER 3153 #include "LoongArchGenAsmMatcher.inc" 3154 3155 Register 3156 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, 3157 const MachineFunction &MF) const { 3158 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$'); 3159 std::string NewRegName = Name.second.str(); 3160 Register Reg = MatchRegisterAltName(NewRegName); 3161 if (Reg == LoongArch::NoRegister) 3162 Reg = MatchRegisterName(NewRegName); 3163 if (Reg == LoongArch::NoRegister) 3164 report_fatal_error( 3165 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 3166 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 3167 if (!ReservedRegs.test(Reg)) 3168 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 3169 StringRef(RegName) + "\".")); 3170 return Reg; 3171 } 3172 3173 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context, 3174 EVT VT, SDValue C) const { 3175 // TODO: Support vectors. 3176 if (!VT.isScalarInteger()) 3177 return false; 3178 3179 // Omit the optimization if the data size exceeds GRLen. 3180 if (VT.getSizeInBits() > Subtarget.getGRLen()) 3181 return false; 3182 3183 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 3184 const APInt &Imm = ConstNode->getAPIntValue(); 3185 // Break MUL into (SLLI + ADD/SUB) or ALSL. 3186 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 3187 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 3188 return true; 3189 // Break MUL into (ALSL x, (SLLI x, imm0), imm1). 3190 if (ConstNode->hasOneUse() && 3191 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || 3192 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2())) 3193 return true; 3194 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)), 3195 // in which the immediate has two set bits. Or Break (MUL x, imm) 3196 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate 3197 // equals to (1 << s0) - (1 << s1). 3198 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) { 3199 unsigned Shifts = Imm.countr_zero(); 3200 // Reject immediates which can be composed via a single LUI. 3201 if (Shifts >= 12) 3202 return false; 3203 // Reject multiplications can be optimized to 3204 // (SLLI (ALSL x, x, 1/2/3/4), s). 3205 APInt ImmPop = Imm.ashr(Shifts); 3206 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17) 3207 return false; 3208 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`, 3209 // since it needs one more instruction than other 3 cases. 3210 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true); 3211 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() || 3212 (ImmSmall - Imm).isPowerOf2()) 3213 return true; 3214 } 3215 } 3216 3217 return false; 3218 } 3219 3220 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL, 3221 const AddrMode &AM, 3222 Type *Ty, unsigned AS, 3223 Instruction *I) const { 3224 // LoongArch has four basic addressing modes: 3225 // 1. reg 3226 // 2. reg + 12-bit signed offset 3227 // 3. reg + 14-bit signed offset left-shifted by 2 3228 // 4. reg1 + reg2 3229 // TODO: Add more checks after support vector extension. 3230 3231 // No global is ever allowed as a base. 3232 if (AM.BaseGV) 3233 return false; 3234 3235 // Require a 12 or 14 bit signed offset. 3236 if (!isInt<12>(AM.BaseOffs) || !isShiftedInt<14, 2>(AM.BaseOffs)) 3237 return false; 3238 3239 switch (AM.Scale) { 3240 case 0: 3241 // "i" is not allowed. 3242 if (!AM.HasBaseReg) 3243 return false; 3244 // Otherwise we have "r+i". 3245 break; 3246 case 1: 3247 // "r+r+i" is not allowed. 3248 if (AM.HasBaseReg && AM.BaseOffs != 0) 3249 return false; 3250 // Otherwise we have "r+r" or "r+i". 3251 break; 3252 case 2: 3253 // "2*r+r" or "2*r+i" is not allowed. 3254 if (AM.HasBaseReg || AM.BaseOffs) 3255 return false; 3256 // Otherwise we have "r+r". 3257 break; 3258 default: 3259 return false; 3260 } 3261 3262 return true; 3263 } 3264 3265 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 3266 return isInt<12>(Imm); 3267 } 3268 3269 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const { 3270 return isInt<12>(Imm); 3271 } 3272 3273 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 3274 // Zexts are free if they can be combined with a load. 3275 // Don't advertise i32->i64 zextload as being free for LA64. It interacts 3276 // poorly with type legalization of compares preferring sext. 3277 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 3278 EVT MemVT = LD->getMemoryVT(); 3279 if ((MemVT == MVT::i8 || MemVT == MVT::i16) && 3280 (LD->getExtensionType() == ISD::NON_EXTLOAD || 3281 LD->getExtensionType() == ISD::ZEXTLOAD)) 3282 return true; 3283 } 3284 3285 return TargetLowering::isZExtFree(Val, VT2); 3286 } 3287 3288 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 3289 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 3290 } 3291 3292 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { 3293 // TODO: Support vectors. 3294 if (Y.getValueType().isVector()) 3295 return false; 3296 3297 return !isa<ConstantSDNode>(Y); 3298 } 3299