1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that LoongArch uses to lower LLVM code into 10 // a selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LoongArchISelLowering.h" 15 #include "LoongArch.h" 16 #include "LoongArchMachineFunctionInfo.h" 17 #include "LoongArchRegisterInfo.h" 18 #include "LoongArchSubtarget.h" 19 #include "LoongArchTargetMachine.h" 20 #include "MCTargetDesc/LoongArchBaseInfo.h" 21 #include "MCTargetDesc/LoongArchMCTargetDesc.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/ADT/StringExtras.h" 24 #include "llvm/CodeGen/ISDOpcodes.h" 25 #include "llvm/CodeGen/RuntimeLibcalls.h" 26 #include "llvm/CodeGen/SelectionDAGNodes.h" 27 #include "llvm/IR/IRBuilder.h" 28 #include "llvm/IR/IntrinsicsLoongArch.h" 29 #include "llvm/Support/CodeGen.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/KnownBits.h" 33 #include "llvm/Support/MathExtras.h" 34 35 using namespace llvm; 36 37 #define DEBUG_TYPE "loongarch-isel-lowering" 38 39 STATISTIC(NumTailCalls, "Number of tail calls"); 40 41 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, 42 cl::desc("Trap on integer division by zero."), 43 cl::init(false)); 44 45 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, 46 const LoongArchSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 MVT GRLenVT = Subtarget.getGRLenVT(); 50 51 // Set up the register classes. 52 53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); 54 if (Subtarget.hasBasicF()) 55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); 56 if (Subtarget.hasBasicD()) 57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); 58 59 static const MVT::SimpleValueType LSXVTs[] = { 60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; 61 static const MVT::SimpleValueType LASXVTs[] = { 62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; 63 64 if (Subtarget.hasExtLSX()) 65 for (MVT VT : LSXVTs) 66 addRegisterClass(VT, &LoongArch::LSX128RegClass); 67 68 if (Subtarget.hasExtLASX()) 69 for (MVT VT : LASXVTs) 70 addRegisterClass(VT, &LoongArch::LASX256RegClass); 71 72 // Set operations for LA32 and LA64. 73 74 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, 75 MVT::i1, Promote); 76 77 setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); 78 setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); 79 setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); 80 setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); 81 setOperationAction(ISD::ROTL, GRLenVT, Expand); 82 setOperationAction(ISD::CTPOP, GRLenVT, Expand); 83 84 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, 85 ISD::JumpTable, ISD::GlobalTLSAddress}, 86 GRLenVT, Custom); 87 88 setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom); 89 90 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); 91 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); 92 setOperationAction(ISD::VASTART, MVT::Other, Custom); 93 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); 94 95 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 96 setOperationAction(ISD::TRAP, MVT::Other, Legal); 97 98 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 99 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 100 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 101 102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before 103 // we get to know which of sll and revb.2h is faster. 104 setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); 105 setOperationAction(ISD::BITREVERSE, GRLenVT, Legal); 106 107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and 108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 109 // and i32 could still be byte-swapped relatively cheaply. 110 setOperationAction(ISD::BSWAP, MVT::i16, Custom); 111 112 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 113 setOperationAction(ISD::BR_CC, GRLenVT, Expand); 114 setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); 115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 116 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); 117 118 setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); 119 setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); 120 121 // Set operations for LA64 only. 122 123 if (Subtarget.is64Bit()) { 124 setOperationAction(ISD::SHL, MVT::i32, Custom); 125 setOperationAction(ISD::SRA, MVT::i32, Custom); 126 setOperationAction(ISD::SRL, MVT::i32, Custom); 127 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 128 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 129 setOperationAction(ISD::ROTR, MVT::i32, Custom); 130 setOperationAction(ISD::ROTL, MVT::i32, Custom); 131 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 132 setOperationAction(ISD::CTLZ, MVT::i32, Custom); 133 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 134 setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); 135 setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); 136 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); 137 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 138 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 139 140 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 141 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 142 } 143 144 // Set operations for LA32 only. 145 146 if (!Subtarget.is64Bit()) { 147 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); 148 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); 149 setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); 150 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 151 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 152 153 // Set libcalls. 154 setLibcallName(RTLIB::MUL_I128, nullptr); 155 // The MULO libcall is not part of libgcc, only compiler-rt. 156 setLibcallName(RTLIB::MULO_I64, nullptr); 157 } 158 159 // The MULO libcall is not part of libgcc, only compiler-rt. 160 setLibcallName(RTLIB::MULO_I128, nullptr); 161 162 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 163 164 static const ISD::CondCode FPCCToExpand[] = { 165 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, 166 ISD::SETGE, ISD::SETNE, ISD::SETGT}; 167 168 // Set operations for 'F' feature. 169 170 if (Subtarget.hasBasicF()) { 171 setCondCodeAction(FPCCToExpand, MVT::f32, Expand); 172 173 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 174 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 175 setOperationAction(ISD::FMA, MVT::f32, Legal); 176 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); 177 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); 178 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); 179 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); 180 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal); 181 setOperationAction(ISD::FSIN, MVT::f32, Expand); 182 setOperationAction(ISD::FCOS, MVT::f32, Expand); 183 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 184 setOperationAction(ISD::FPOW, MVT::f32, Expand); 185 setOperationAction(ISD::FREM, MVT::f32, Expand); 186 187 if (Subtarget.is64Bit()) 188 setOperationAction(ISD::FRINT, MVT::f32, Legal); 189 190 if (!Subtarget.hasBasicD()) { 191 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 192 if (Subtarget.is64Bit()) { 193 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 194 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 195 } 196 } 197 } 198 199 // Set operations for 'D' feature. 200 201 if (Subtarget.hasBasicD()) { 202 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 203 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 204 setCondCodeAction(FPCCToExpand, MVT::f64, Expand); 205 206 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 207 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 208 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); 209 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); 210 setOperationAction(ISD::FMA, MVT::f64, Legal); 211 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); 212 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); 213 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal); 214 setOperationAction(ISD::FSIN, MVT::f64, Expand); 215 setOperationAction(ISD::FCOS, MVT::f64, Expand); 216 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 217 setOperationAction(ISD::FPOW, MVT::f64, Expand); 218 setOperationAction(ISD::FREM, MVT::f64, Expand); 219 220 if (Subtarget.is64Bit()) 221 setOperationAction(ISD::FRINT, MVT::f64, Legal); 222 } 223 224 // Set operations for 'LSX' feature. 225 226 if (Subtarget.hasExtLSX()) { 227 for (MVT VT : MVT::fixedlen_vector_valuetypes()) { 228 // Expand all truncating stores and extending loads. 229 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { 230 setTruncStoreAction(VT, InnerVT, Expand); 231 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 232 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 233 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 234 } 235 // By default everything must be expanded. Then we will selectively turn 236 // on ones that can be effectively codegen'd. 237 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 238 setOperationAction(Op, VT, Expand); 239 } 240 241 for (MVT VT : LSXVTs) { 242 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); 243 setOperationAction(ISD::BITCAST, VT, Legal); 244 setOperationAction(ISD::UNDEF, VT, Legal); 245 246 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 247 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); 248 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 249 250 setOperationAction(ISD::SETCC, VT, Legal); 251 setOperationAction(ISD::VSELECT, VT, Legal); 252 } 253 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { 254 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 255 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); 256 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, 257 Legal); 258 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, 259 VT, Legal); 260 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); 261 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); 262 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); 263 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); 264 setCondCodeAction( 265 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, 266 Expand); 267 } 268 for (MVT VT : {MVT::v4f32, MVT::v2f64}) { 269 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); 270 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); 271 setOperationAction(ISD::FMA, VT, Legal); 272 setOperationAction(ISD::FSQRT, VT, Legal); 273 setOperationAction(ISD::FNEG, VT, Legal); 274 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, 275 ISD::SETUGE, ISD::SETUGT}, 276 VT, Expand); 277 } 278 } 279 280 // Set operations for 'LASX' feature. 281 282 if (Subtarget.hasExtLASX()) { 283 for (MVT VT : LASXVTs) { 284 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); 285 setOperationAction(ISD::BITCAST, VT, Legal); 286 setOperationAction(ISD::UNDEF, VT, Legal); 287 288 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 289 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 290 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 291 292 setOperationAction(ISD::SETCC, VT, Legal); 293 setOperationAction(ISD::VSELECT, VT, Legal); 294 } 295 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { 296 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 297 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); 298 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, 299 Legal); 300 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, 301 VT, Legal); 302 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); 303 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); 304 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); 305 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); 306 setCondCodeAction( 307 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, 308 Expand); 309 } 310 for (MVT VT : {MVT::v8f32, MVT::v4f64}) { 311 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); 312 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); 313 setOperationAction(ISD::FMA, VT, Legal); 314 setOperationAction(ISD::FSQRT, VT, Legal); 315 setOperationAction(ISD::FNEG, VT, Legal); 316 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, 317 ISD::SETUGE, ISD::SETUGT}, 318 VT, Expand); 319 } 320 } 321 322 // Set DAG combine for LA32 and LA64. 323 324 setTargetDAGCombine(ISD::AND); 325 setTargetDAGCombine(ISD::OR); 326 setTargetDAGCombine(ISD::SRL); 327 328 // Set DAG combine for 'LSX' feature. 329 330 if (Subtarget.hasExtLSX()) 331 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 332 333 // Compute derived properties from the register classes. 334 computeRegisterProperties(Subtarget.getRegisterInfo()); 335 336 setStackPointerRegisterToSaveRestore(LoongArch::R3); 337 338 setBooleanContents(ZeroOrOneBooleanContent); 339 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 340 341 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); 342 343 setMinCmpXchgSizeInBits(32); 344 345 // Function alignments. 346 setMinFunctionAlignment(Align(4)); 347 // Set preferred alignments. 348 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); 349 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); 350 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); 351 } 352 353 bool LoongArchTargetLowering::isOffsetFoldingLegal( 354 const GlobalAddressSDNode *GA) const { 355 // In order to maximise the opportunity for common subexpression elimination, 356 // keep a separate ADD node for the global address offset instead of folding 357 // it in the global address node. Later peephole optimisations may choose to 358 // fold it back in when profitable. 359 return false; 360 } 361 362 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, 363 SelectionDAG &DAG) const { 364 switch (Op.getOpcode()) { 365 case ISD::ATOMIC_FENCE: 366 return lowerATOMIC_FENCE(Op, DAG); 367 case ISD::EH_DWARF_CFA: 368 return lowerEH_DWARF_CFA(Op, DAG); 369 case ISD::GlobalAddress: 370 return lowerGlobalAddress(Op, DAG); 371 case ISD::GlobalTLSAddress: 372 return lowerGlobalTLSAddress(Op, DAG); 373 case ISD::INTRINSIC_WO_CHAIN: 374 return lowerINTRINSIC_WO_CHAIN(Op, DAG); 375 case ISD::INTRINSIC_W_CHAIN: 376 return lowerINTRINSIC_W_CHAIN(Op, DAG); 377 case ISD::INTRINSIC_VOID: 378 return lowerINTRINSIC_VOID(Op, DAG); 379 case ISD::BlockAddress: 380 return lowerBlockAddress(Op, DAG); 381 case ISD::JumpTable: 382 return lowerJumpTable(Op, DAG); 383 case ISD::SHL_PARTS: 384 return lowerShiftLeftParts(Op, DAG); 385 case ISD::SRA_PARTS: 386 return lowerShiftRightParts(Op, DAG, true); 387 case ISD::SRL_PARTS: 388 return lowerShiftRightParts(Op, DAG, false); 389 case ISD::ConstantPool: 390 return lowerConstantPool(Op, DAG); 391 case ISD::FP_TO_SINT: 392 return lowerFP_TO_SINT(Op, DAG); 393 case ISD::BITCAST: 394 return lowerBITCAST(Op, DAG); 395 case ISD::UINT_TO_FP: 396 return lowerUINT_TO_FP(Op, DAG); 397 case ISD::SINT_TO_FP: 398 return lowerSINT_TO_FP(Op, DAG); 399 case ISD::VASTART: 400 return lowerVASTART(Op, DAG); 401 case ISD::FRAMEADDR: 402 return lowerFRAMEADDR(Op, DAG); 403 case ISD::RETURNADDR: 404 return lowerRETURNADDR(Op, DAG); 405 case ISD::WRITE_REGISTER: 406 return lowerWRITE_REGISTER(Op, DAG); 407 case ISD::INSERT_VECTOR_ELT: 408 return lowerINSERT_VECTOR_ELT(Op, DAG); 409 case ISD::EXTRACT_VECTOR_ELT: 410 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 411 case ISD::BUILD_VECTOR: 412 return lowerBUILD_VECTOR(Op, DAG); 413 case ISD::VECTOR_SHUFFLE: 414 return lowerVECTOR_SHUFFLE(Op, DAG); 415 } 416 return SDValue(); 417 } 418 419 SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 420 SelectionDAG &DAG) const { 421 // TODO: custom shuffle. 422 return SDValue(); 423 } 424 425 static bool isConstantOrUndef(const SDValue Op) { 426 if (Op->isUndef()) 427 return true; 428 if (isa<ConstantSDNode>(Op)) 429 return true; 430 if (isa<ConstantFPSDNode>(Op)) 431 return true; 432 return false; 433 } 434 435 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 436 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 437 if (isConstantOrUndef(Op->getOperand(i))) 438 return true; 439 return false; 440 } 441 442 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, 443 SelectionDAG &DAG) const { 444 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 445 EVT ResTy = Op->getValueType(0); 446 SDLoc DL(Op); 447 APInt SplatValue, SplatUndef; 448 unsigned SplatBitSize; 449 bool HasAnyUndefs; 450 bool Is128Vec = ResTy.is128BitVector(); 451 bool Is256Vec = ResTy.is256BitVector(); 452 453 if ((!Subtarget.hasExtLSX() || !Is128Vec) && 454 (!Subtarget.hasExtLASX() || !Is256Vec)) 455 return SDValue(); 456 457 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 458 /*MinSplatBits=*/8) && 459 SplatBitSize <= 64) { 460 // We can only cope with 8, 16, 32, or 64-bit elements. 461 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 462 SplatBitSize != 64) 463 return SDValue(); 464 465 EVT ViaVecTy; 466 467 switch (SplatBitSize) { 468 default: 469 return SDValue(); 470 case 8: 471 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; 472 break; 473 case 16: 474 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; 475 break; 476 case 32: 477 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; 478 break; 479 case 64: 480 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; 481 break; 482 } 483 484 // SelectionDAG::getConstant will promote SplatValue appropriately. 485 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 486 487 // Bitcast to the type we originally wanted. 488 if (ViaVecTy != ResTy) 489 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 490 491 return Result; 492 } 493 494 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false)) 495 return Op; 496 497 if (!isConstantOrUndefBUILD_VECTOR(Node)) { 498 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 499 // The resulting code is the same length as the expansion, but it doesn't 500 // use memory operations. 501 EVT ResTy = Node->getValueType(0); 502 503 assert(ResTy.isVector()); 504 505 unsigned NumElts = ResTy.getVectorNumElements(); 506 SDValue Vector = DAG.getUNDEF(ResTy); 507 for (unsigned i = 0; i < NumElts; ++i) { 508 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 509 Node->getOperand(i), 510 DAG.getConstant(i, DL, Subtarget.getGRLenVT())); 511 } 512 return Vector; 513 } 514 515 return SDValue(); 516 } 517 518 SDValue 519 LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 520 SelectionDAG &DAG) const { 521 EVT VecTy = Op->getOperand(0)->getValueType(0); 522 SDValue Idx = Op->getOperand(1); 523 EVT EltTy = VecTy.getVectorElementType(); 524 unsigned NumElts = VecTy.getVectorNumElements(); 525 526 if (isa<ConstantSDNode>(Idx) && 527 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || 528 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2)) 529 return Op; 530 531 return SDValue(); 532 } 533 534 SDValue 535 LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 536 SelectionDAG &DAG) const { 537 if (isa<ConstantSDNode>(Op->getOperand(2))) 538 return Op; 539 return SDValue(); 540 } 541 542 SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, 543 SelectionDAG &DAG) const { 544 SDLoc DL(Op); 545 SyncScope::ID FenceSSID = 546 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); 547 548 // singlethread fences only synchronize with signal handlers on the same 549 // thread and thus only need to preserve instruction order, not actually 550 // enforce memory ordering. 551 if (FenceSSID == SyncScope::SingleThread) 552 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 553 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); 554 555 return Op; 556 } 557 558 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, 559 SelectionDAG &DAG) const { 560 561 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) { 562 DAG.getContext()->emitError( 563 "On LA64, only 64-bit registers can be written."); 564 return Op.getOperand(0); 565 } 566 567 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) { 568 DAG.getContext()->emitError( 569 "On LA32, only 32-bit registers can be written."); 570 return Op.getOperand(0); 571 } 572 573 return Op; 574 } 575 576 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op, 577 SelectionDAG &DAG) const { 578 if (!isa<ConstantSDNode>(Op.getOperand(0))) { 579 DAG.getContext()->emitError("argument to '__builtin_frame_address' must " 580 "be a constant integer"); 581 return SDValue(); 582 } 583 584 MachineFunction &MF = DAG.getMachineFunction(); 585 MF.getFrameInfo().setFrameAddressIsTaken(true); 586 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF); 587 EVT VT = Op.getValueType(); 588 SDLoc DL(Op); 589 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 590 unsigned Depth = Op.getConstantOperandVal(0); 591 int GRLenInBytes = Subtarget.getGRLen() / 8; 592 593 while (Depth--) { 594 int Offset = -(GRLenInBytes * 2); 595 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 596 DAG.getIntPtrConstant(Offset, DL)); 597 FrameAddr = 598 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 599 } 600 return FrameAddr; 601 } 602 603 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, 604 SelectionDAG &DAG) const { 605 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 606 return SDValue(); 607 608 // Currently only support lowering return address for current frame. 609 if (Op.getConstantOperandVal(0) != 0) { 610 DAG.getContext()->emitError( 611 "return address can only be determined for the current frame"); 612 return SDValue(); 613 } 614 615 MachineFunction &MF = DAG.getMachineFunction(); 616 MF.getFrameInfo().setReturnAddressIsTaken(true); 617 MVT GRLenVT = Subtarget.getGRLenVT(); 618 619 // Return the value of the return address register, marking it an implicit 620 // live-in. 621 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(), 622 getRegClassFor(GRLenVT)); 623 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT); 624 } 625 626 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, 627 SelectionDAG &DAG) const { 628 MachineFunction &MF = DAG.getMachineFunction(); 629 auto Size = Subtarget.getGRLen() / 8; 630 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false); 631 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 632 } 633 634 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, 635 SelectionDAG &DAG) const { 636 MachineFunction &MF = DAG.getMachineFunction(); 637 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>(); 638 639 SDLoc DL(Op); 640 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 641 getPointerTy(MF.getDataLayout())); 642 643 // vastart just stores the address of the VarArgsFrameIndex slot into the 644 // memory location argument. 645 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 646 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 647 MachinePointerInfo(SV)); 648 } 649 650 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, 651 SelectionDAG &DAG) const { 652 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && 653 !Subtarget.hasBasicD() && "unexpected target features"); 654 655 SDLoc DL(Op); 656 SDValue Op0 = Op.getOperand(0); 657 if (Op0->getOpcode() == ISD::AND) { 658 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); 659 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF)) 660 return Op; 661 } 662 663 if (Op0->getOpcode() == LoongArchISD::BSTRPICK && 664 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) && 665 Op0.getConstantOperandVal(2) == UINT64_C(0)) 666 return Op; 667 668 if (Op0.getOpcode() == ISD::AssertZext && 669 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32)) 670 return Op; 671 672 EVT OpVT = Op0.getValueType(); 673 EVT RetVT = Op.getValueType(); 674 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT); 675 MakeLibCallOptions CallOptions; 676 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); 677 SDValue Chain = SDValue(); 678 SDValue Result; 679 std::tie(Result, Chain) = 680 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); 681 return Result; 682 } 683 684 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, 685 SelectionDAG &DAG) const { 686 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && 687 !Subtarget.hasBasicD() && "unexpected target features"); 688 689 SDLoc DL(Op); 690 SDValue Op0 = Op.getOperand(0); 691 692 if ((Op0.getOpcode() == ISD::AssertSext || 693 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) && 694 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32)) 695 return Op; 696 697 EVT OpVT = Op0.getValueType(); 698 EVT RetVT = Op.getValueType(); 699 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT); 700 MakeLibCallOptions CallOptions; 701 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); 702 SDValue Chain = SDValue(); 703 SDValue Result; 704 std::tie(Result, Chain) = 705 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); 706 return Result; 707 } 708 709 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, 710 SelectionDAG &DAG) const { 711 712 SDLoc DL(Op); 713 SDValue Op0 = Op.getOperand(0); 714 715 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && 716 Subtarget.is64Bit() && Subtarget.hasBasicF()) { 717 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 718 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0); 719 } 720 return Op; 721 } 722 723 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, 724 SelectionDAG &DAG) const { 725 726 SDLoc DL(Op); 727 728 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && 729 !Subtarget.hasBasicD()) { 730 SDValue Dst = 731 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0)); 732 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst); 733 } 734 735 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); 736 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0)); 737 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc); 738 } 739 740 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 741 SelectionDAG &DAG, unsigned Flags) { 742 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 743 } 744 745 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 746 SelectionDAG &DAG, unsigned Flags) { 747 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 748 Flags); 749 } 750 751 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 752 SelectionDAG &DAG, unsigned Flags) { 753 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 754 N->getOffset(), Flags); 755 } 756 757 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 758 SelectionDAG &DAG, unsigned Flags) { 759 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 760 } 761 762 template <class NodeTy> 763 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 764 CodeModel::Model M, 765 bool IsLocal) const { 766 SDLoc DL(N); 767 EVT Ty = getPointerTy(DAG.getDataLayout()); 768 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 769 770 switch (M) { 771 default: 772 report_fatal_error("Unsupported code model"); 773 774 case CodeModel::Large: { 775 assert(Subtarget.is64Bit() && "Large code model requires LA64"); 776 777 // This is not actually used, but is necessary for successfully matching 778 // the PseudoLA_*_LARGE nodes. 779 SDValue Tmp = DAG.getConstant(0, DL, Ty); 780 if (IsLocal) 781 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that 782 // eventually becomes the desired 5-insn code sequence. 783 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty, 784 Tmp, Addr), 785 0); 786 787 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually 788 // becomes the desired 5-insn code sequence. 789 return SDValue( 790 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr), 791 0); 792 } 793 794 case CodeModel::Small: 795 case CodeModel::Medium: 796 if (IsLocal) 797 // This generates the pattern (PseudoLA_PCREL sym), which expands to 798 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). 799 return SDValue( 800 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0); 801 802 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d 803 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). 804 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 805 0); 806 } 807 } 808 809 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, 810 SelectionDAG &DAG) const { 811 return getAddr(cast<BlockAddressSDNode>(Op), DAG, 812 DAG.getTarget().getCodeModel()); 813 } 814 815 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op, 816 SelectionDAG &DAG) const { 817 return getAddr(cast<JumpTableSDNode>(Op), DAG, 818 DAG.getTarget().getCodeModel()); 819 } 820 821 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, 822 SelectionDAG &DAG) const { 823 return getAddr(cast<ConstantPoolSDNode>(Op), DAG, 824 DAG.getTarget().getCodeModel()); 825 } 826 827 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, 828 SelectionDAG &DAG) const { 829 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 830 assert(N->getOffset() == 0 && "unexpected offset in global node"); 831 auto CM = DAG.getTarget().getCodeModel(); 832 const GlobalValue *GV = N->getGlobal(); 833 834 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) { 835 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel()) 836 CM = *GCM; 837 } 838 839 return getAddr(N, DAG, CM, GV->isDSOLocal()); 840 } 841 842 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 843 SelectionDAG &DAG, 844 unsigned Opc, 845 bool Large) const { 846 SDLoc DL(N); 847 EVT Ty = getPointerTy(DAG.getDataLayout()); 848 MVT GRLenVT = Subtarget.getGRLenVT(); 849 850 // This is not actually used, but is necessary for successfully matching the 851 // PseudoLA_*_LARGE nodes. 852 SDValue Tmp = DAG.getConstant(0, DL, Ty); 853 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); 854 SDValue Offset = Large 855 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) 856 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 857 858 // Add the thread pointer. 859 return DAG.getNode(ISD::ADD, DL, Ty, Offset, 860 DAG.getRegister(LoongArch::R2, GRLenVT)); 861 } 862 863 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 864 SelectionDAG &DAG, 865 unsigned Opc, 866 bool Large) const { 867 SDLoc DL(N); 868 EVT Ty = getPointerTy(DAG.getDataLayout()); 869 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 870 871 // This is not actually used, but is necessary for successfully matching the 872 // PseudoLA_*_LARGE nodes. 873 SDValue Tmp = DAG.getConstant(0, DL, Ty); 874 875 // Use a PC-relative addressing mode to access the dynamic GOT address. 876 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); 877 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) 878 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 879 880 // Prepare argument list to generate call. 881 ArgListTy Args; 882 ArgListEntry Entry; 883 Entry.Node = Load; 884 Entry.Ty = CallTy; 885 Args.push_back(Entry); 886 887 // Setup call to __tls_get_addr. 888 TargetLowering::CallLoweringInfo CLI(DAG); 889 CLI.setDebugLoc(DL) 890 .setChain(DAG.getEntryNode()) 891 .setLibCallee(CallingConv::C, CallTy, 892 DAG.getExternalSymbol("__tls_get_addr", Ty), 893 std::move(Args)); 894 895 return LowerCallTo(CLI).first; 896 } 897 898 SDValue 899 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, 900 SelectionDAG &DAG) const { 901 if (DAG.getMachineFunction().getFunction().getCallingConv() == 902 CallingConv::GHC) 903 report_fatal_error("In GHC calling convention TLS is not supported"); 904 905 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large; 906 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64"); 907 908 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 909 assert(N->getOffset() == 0 && "unexpected offset in global node"); 910 911 SDValue Addr; 912 switch (getTargetMachine().getTLSModel(N->getGlobal())) { 913 case TLSModel::GeneralDynamic: 914 // In this model, application code calls the dynamic linker function 915 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at 916 // runtime. 917 Addr = getDynamicTLSAddr(N, DAG, 918 Large ? LoongArch::PseudoLA_TLS_GD_LARGE 919 : LoongArch::PseudoLA_TLS_GD, 920 Large); 921 break; 922 case TLSModel::LocalDynamic: 923 // Same as GeneralDynamic, except for assembly modifiers and relocation 924 // records. 925 Addr = getDynamicTLSAddr(N, DAG, 926 Large ? LoongArch::PseudoLA_TLS_LD_LARGE 927 : LoongArch::PseudoLA_TLS_LD, 928 Large); 929 break; 930 case TLSModel::InitialExec: 931 // This model uses the GOT to resolve TLS offsets. 932 Addr = getStaticTLSAddr(N, DAG, 933 Large ? LoongArch::PseudoLA_TLS_IE_LARGE 934 : LoongArch::PseudoLA_TLS_IE, 935 Large); 936 break; 937 case TLSModel::LocalExec: 938 // This model is used when static linking as the TLS offsets are resolved 939 // during program linking. 940 // 941 // This node doesn't need an extra argument for the large code model. 942 Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE); 943 break; 944 } 945 946 return Addr; 947 } 948 949 template <unsigned N> 950 static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, 951 SelectionDAG &DAG, bool IsSigned = false) { 952 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp)); 953 // Check the ImmArg. 954 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || 955 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { 956 DAG.getContext()->emitError(Op->getOperationName(0) + 957 ": argument out of range."); 958 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType()); 959 } 960 return SDValue(); 961 } 962 963 SDValue 964 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 965 SelectionDAG &DAG) const { 966 SDLoc DL(Op); 967 switch (Op.getConstantOperandVal(0)) { 968 default: 969 return SDValue(); // Don't custom lower most intrinsics. 970 case Intrinsic::thread_pointer: { 971 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 972 return DAG.getRegister(LoongArch::R2, PtrVT); 973 } 974 case Intrinsic::loongarch_lsx_vpickve2gr_d: 975 case Intrinsic::loongarch_lsx_vpickve2gr_du: 976 case Intrinsic::loongarch_lsx_vreplvei_d: 977 case Intrinsic::loongarch_lasx_xvrepl128vei_d: 978 return checkIntrinsicImmArg<1>(Op, 2, DAG); 979 case Intrinsic::loongarch_lsx_vreplvei_w: 980 case Intrinsic::loongarch_lasx_xvrepl128vei_w: 981 case Intrinsic::loongarch_lasx_xvpickve2gr_d: 982 case Intrinsic::loongarch_lasx_xvpickve2gr_du: 983 case Intrinsic::loongarch_lasx_xvpickve_d: 984 case Intrinsic::loongarch_lasx_xvpickve_d_f: 985 return checkIntrinsicImmArg<2>(Op, 2, DAG); 986 case Intrinsic::loongarch_lasx_xvinsve0_d: 987 return checkIntrinsicImmArg<2>(Op, 3, DAG); 988 case Intrinsic::loongarch_lsx_vsat_b: 989 case Intrinsic::loongarch_lsx_vsat_bu: 990 case Intrinsic::loongarch_lsx_vrotri_b: 991 case Intrinsic::loongarch_lsx_vsllwil_h_b: 992 case Intrinsic::loongarch_lsx_vsllwil_hu_bu: 993 case Intrinsic::loongarch_lsx_vsrlri_b: 994 case Intrinsic::loongarch_lsx_vsrari_b: 995 case Intrinsic::loongarch_lsx_vreplvei_h: 996 case Intrinsic::loongarch_lasx_xvsat_b: 997 case Intrinsic::loongarch_lasx_xvsat_bu: 998 case Intrinsic::loongarch_lasx_xvrotri_b: 999 case Intrinsic::loongarch_lasx_xvsllwil_h_b: 1000 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: 1001 case Intrinsic::loongarch_lasx_xvsrlri_b: 1002 case Intrinsic::loongarch_lasx_xvsrari_b: 1003 case Intrinsic::loongarch_lasx_xvrepl128vei_h: 1004 case Intrinsic::loongarch_lasx_xvpickve_w: 1005 case Intrinsic::loongarch_lasx_xvpickve_w_f: 1006 return checkIntrinsicImmArg<3>(Op, 2, DAG); 1007 case Intrinsic::loongarch_lasx_xvinsve0_w: 1008 return checkIntrinsicImmArg<3>(Op, 3, DAG); 1009 case Intrinsic::loongarch_lsx_vsat_h: 1010 case Intrinsic::loongarch_lsx_vsat_hu: 1011 case Intrinsic::loongarch_lsx_vrotri_h: 1012 case Intrinsic::loongarch_lsx_vsllwil_w_h: 1013 case Intrinsic::loongarch_lsx_vsllwil_wu_hu: 1014 case Intrinsic::loongarch_lsx_vsrlri_h: 1015 case Intrinsic::loongarch_lsx_vsrari_h: 1016 case Intrinsic::loongarch_lsx_vreplvei_b: 1017 case Intrinsic::loongarch_lasx_xvsat_h: 1018 case Intrinsic::loongarch_lasx_xvsat_hu: 1019 case Intrinsic::loongarch_lasx_xvrotri_h: 1020 case Intrinsic::loongarch_lasx_xvsllwil_w_h: 1021 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: 1022 case Intrinsic::loongarch_lasx_xvsrlri_h: 1023 case Intrinsic::loongarch_lasx_xvsrari_h: 1024 case Intrinsic::loongarch_lasx_xvrepl128vei_b: 1025 return checkIntrinsicImmArg<4>(Op, 2, DAG); 1026 case Intrinsic::loongarch_lsx_vsrlni_b_h: 1027 case Intrinsic::loongarch_lsx_vsrani_b_h: 1028 case Intrinsic::loongarch_lsx_vsrlrni_b_h: 1029 case Intrinsic::loongarch_lsx_vsrarni_b_h: 1030 case Intrinsic::loongarch_lsx_vssrlni_b_h: 1031 case Intrinsic::loongarch_lsx_vssrani_b_h: 1032 case Intrinsic::loongarch_lsx_vssrlni_bu_h: 1033 case Intrinsic::loongarch_lsx_vssrani_bu_h: 1034 case Intrinsic::loongarch_lsx_vssrlrni_b_h: 1035 case Intrinsic::loongarch_lsx_vssrarni_b_h: 1036 case Intrinsic::loongarch_lsx_vssrlrni_bu_h: 1037 case Intrinsic::loongarch_lsx_vssrarni_bu_h: 1038 case Intrinsic::loongarch_lasx_xvsrlni_b_h: 1039 case Intrinsic::loongarch_lasx_xvsrani_b_h: 1040 case Intrinsic::loongarch_lasx_xvsrlrni_b_h: 1041 case Intrinsic::loongarch_lasx_xvsrarni_b_h: 1042 case Intrinsic::loongarch_lasx_xvssrlni_b_h: 1043 case Intrinsic::loongarch_lasx_xvssrani_b_h: 1044 case Intrinsic::loongarch_lasx_xvssrlni_bu_h: 1045 case Intrinsic::loongarch_lasx_xvssrani_bu_h: 1046 case Intrinsic::loongarch_lasx_xvssrlrni_b_h: 1047 case Intrinsic::loongarch_lasx_xvssrarni_b_h: 1048 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: 1049 case Intrinsic::loongarch_lasx_xvssrarni_bu_h: 1050 return checkIntrinsicImmArg<4>(Op, 3, DAG); 1051 case Intrinsic::loongarch_lsx_vsat_w: 1052 case Intrinsic::loongarch_lsx_vsat_wu: 1053 case Intrinsic::loongarch_lsx_vrotri_w: 1054 case Intrinsic::loongarch_lsx_vsllwil_d_w: 1055 case Intrinsic::loongarch_lsx_vsllwil_du_wu: 1056 case Intrinsic::loongarch_lsx_vsrlri_w: 1057 case Intrinsic::loongarch_lsx_vsrari_w: 1058 case Intrinsic::loongarch_lsx_vslei_bu: 1059 case Intrinsic::loongarch_lsx_vslei_hu: 1060 case Intrinsic::loongarch_lsx_vslei_wu: 1061 case Intrinsic::loongarch_lsx_vslei_du: 1062 case Intrinsic::loongarch_lsx_vslti_bu: 1063 case Intrinsic::loongarch_lsx_vslti_hu: 1064 case Intrinsic::loongarch_lsx_vslti_wu: 1065 case Intrinsic::loongarch_lsx_vslti_du: 1066 case Intrinsic::loongarch_lsx_vbsll_v: 1067 case Intrinsic::loongarch_lsx_vbsrl_v: 1068 case Intrinsic::loongarch_lasx_xvsat_w: 1069 case Intrinsic::loongarch_lasx_xvsat_wu: 1070 case Intrinsic::loongarch_lasx_xvrotri_w: 1071 case Intrinsic::loongarch_lasx_xvsllwil_d_w: 1072 case Intrinsic::loongarch_lasx_xvsllwil_du_wu: 1073 case Intrinsic::loongarch_lasx_xvsrlri_w: 1074 case Intrinsic::loongarch_lasx_xvsrari_w: 1075 case Intrinsic::loongarch_lasx_xvslei_bu: 1076 case Intrinsic::loongarch_lasx_xvslei_hu: 1077 case Intrinsic::loongarch_lasx_xvslei_wu: 1078 case Intrinsic::loongarch_lasx_xvslei_du: 1079 case Intrinsic::loongarch_lasx_xvslti_bu: 1080 case Intrinsic::loongarch_lasx_xvslti_hu: 1081 case Intrinsic::loongarch_lasx_xvslti_wu: 1082 case Intrinsic::loongarch_lasx_xvslti_du: 1083 case Intrinsic::loongarch_lasx_xvbsll_v: 1084 case Intrinsic::loongarch_lasx_xvbsrl_v: 1085 return checkIntrinsicImmArg<5>(Op, 2, DAG); 1086 case Intrinsic::loongarch_lsx_vseqi_b: 1087 case Intrinsic::loongarch_lsx_vseqi_h: 1088 case Intrinsic::loongarch_lsx_vseqi_w: 1089 case Intrinsic::loongarch_lsx_vseqi_d: 1090 case Intrinsic::loongarch_lsx_vslei_b: 1091 case Intrinsic::loongarch_lsx_vslei_h: 1092 case Intrinsic::loongarch_lsx_vslei_w: 1093 case Intrinsic::loongarch_lsx_vslei_d: 1094 case Intrinsic::loongarch_lsx_vslti_b: 1095 case Intrinsic::loongarch_lsx_vslti_h: 1096 case Intrinsic::loongarch_lsx_vslti_w: 1097 case Intrinsic::loongarch_lsx_vslti_d: 1098 case Intrinsic::loongarch_lasx_xvseqi_b: 1099 case Intrinsic::loongarch_lasx_xvseqi_h: 1100 case Intrinsic::loongarch_lasx_xvseqi_w: 1101 case Intrinsic::loongarch_lasx_xvseqi_d: 1102 case Intrinsic::loongarch_lasx_xvslei_b: 1103 case Intrinsic::loongarch_lasx_xvslei_h: 1104 case Intrinsic::loongarch_lasx_xvslei_w: 1105 case Intrinsic::loongarch_lasx_xvslei_d: 1106 case Intrinsic::loongarch_lasx_xvslti_b: 1107 case Intrinsic::loongarch_lasx_xvslti_h: 1108 case Intrinsic::loongarch_lasx_xvslti_w: 1109 case Intrinsic::loongarch_lasx_xvslti_d: 1110 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); 1111 case Intrinsic::loongarch_lsx_vsrlni_h_w: 1112 case Intrinsic::loongarch_lsx_vsrani_h_w: 1113 case Intrinsic::loongarch_lsx_vsrlrni_h_w: 1114 case Intrinsic::loongarch_lsx_vsrarni_h_w: 1115 case Intrinsic::loongarch_lsx_vssrlni_h_w: 1116 case Intrinsic::loongarch_lsx_vssrani_h_w: 1117 case Intrinsic::loongarch_lsx_vssrlni_hu_w: 1118 case Intrinsic::loongarch_lsx_vssrani_hu_w: 1119 case Intrinsic::loongarch_lsx_vssrlrni_h_w: 1120 case Intrinsic::loongarch_lsx_vssrarni_h_w: 1121 case Intrinsic::loongarch_lsx_vssrlrni_hu_w: 1122 case Intrinsic::loongarch_lsx_vssrarni_hu_w: 1123 case Intrinsic::loongarch_lsx_vfrstpi_b: 1124 case Intrinsic::loongarch_lsx_vfrstpi_h: 1125 case Intrinsic::loongarch_lasx_xvsrlni_h_w: 1126 case Intrinsic::loongarch_lasx_xvsrani_h_w: 1127 case Intrinsic::loongarch_lasx_xvsrlrni_h_w: 1128 case Intrinsic::loongarch_lasx_xvsrarni_h_w: 1129 case Intrinsic::loongarch_lasx_xvssrlni_h_w: 1130 case Intrinsic::loongarch_lasx_xvssrani_h_w: 1131 case Intrinsic::loongarch_lasx_xvssrlni_hu_w: 1132 case Intrinsic::loongarch_lasx_xvssrani_hu_w: 1133 case Intrinsic::loongarch_lasx_xvssrlrni_h_w: 1134 case Intrinsic::loongarch_lasx_xvssrarni_h_w: 1135 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: 1136 case Intrinsic::loongarch_lasx_xvssrarni_hu_w: 1137 case Intrinsic::loongarch_lasx_xvfrstpi_b: 1138 case Intrinsic::loongarch_lasx_xvfrstpi_h: 1139 return checkIntrinsicImmArg<5>(Op, 3, DAG); 1140 case Intrinsic::loongarch_lsx_vsat_d: 1141 case Intrinsic::loongarch_lsx_vsat_du: 1142 case Intrinsic::loongarch_lsx_vrotri_d: 1143 case Intrinsic::loongarch_lsx_vsrlri_d: 1144 case Intrinsic::loongarch_lsx_vsrari_d: 1145 case Intrinsic::loongarch_lasx_xvsat_d: 1146 case Intrinsic::loongarch_lasx_xvsat_du: 1147 case Intrinsic::loongarch_lasx_xvrotri_d: 1148 case Intrinsic::loongarch_lasx_xvsrlri_d: 1149 case Intrinsic::loongarch_lasx_xvsrari_d: 1150 return checkIntrinsicImmArg<6>(Op, 2, DAG); 1151 case Intrinsic::loongarch_lsx_vsrlni_w_d: 1152 case Intrinsic::loongarch_lsx_vsrani_w_d: 1153 case Intrinsic::loongarch_lsx_vsrlrni_w_d: 1154 case Intrinsic::loongarch_lsx_vsrarni_w_d: 1155 case Intrinsic::loongarch_lsx_vssrlni_w_d: 1156 case Intrinsic::loongarch_lsx_vssrani_w_d: 1157 case Intrinsic::loongarch_lsx_vssrlni_wu_d: 1158 case Intrinsic::loongarch_lsx_vssrani_wu_d: 1159 case Intrinsic::loongarch_lsx_vssrlrni_w_d: 1160 case Intrinsic::loongarch_lsx_vssrarni_w_d: 1161 case Intrinsic::loongarch_lsx_vssrlrni_wu_d: 1162 case Intrinsic::loongarch_lsx_vssrarni_wu_d: 1163 case Intrinsic::loongarch_lasx_xvsrlni_w_d: 1164 case Intrinsic::loongarch_lasx_xvsrani_w_d: 1165 case Intrinsic::loongarch_lasx_xvsrlrni_w_d: 1166 case Intrinsic::loongarch_lasx_xvsrarni_w_d: 1167 case Intrinsic::loongarch_lasx_xvssrlni_w_d: 1168 case Intrinsic::loongarch_lasx_xvssrani_w_d: 1169 case Intrinsic::loongarch_lasx_xvssrlni_wu_d: 1170 case Intrinsic::loongarch_lasx_xvssrani_wu_d: 1171 case Intrinsic::loongarch_lasx_xvssrlrni_w_d: 1172 case Intrinsic::loongarch_lasx_xvssrarni_w_d: 1173 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: 1174 case Intrinsic::loongarch_lasx_xvssrarni_wu_d: 1175 return checkIntrinsicImmArg<6>(Op, 3, DAG); 1176 case Intrinsic::loongarch_lsx_vsrlni_d_q: 1177 case Intrinsic::loongarch_lsx_vsrani_d_q: 1178 case Intrinsic::loongarch_lsx_vsrlrni_d_q: 1179 case Intrinsic::loongarch_lsx_vsrarni_d_q: 1180 case Intrinsic::loongarch_lsx_vssrlni_d_q: 1181 case Intrinsic::loongarch_lsx_vssrani_d_q: 1182 case Intrinsic::loongarch_lsx_vssrlni_du_q: 1183 case Intrinsic::loongarch_lsx_vssrani_du_q: 1184 case Intrinsic::loongarch_lsx_vssrlrni_d_q: 1185 case Intrinsic::loongarch_lsx_vssrarni_d_q: 1186 case Intrinsic::loongarch_lsx_vssrlrni_du_q: 1187 case Intrinsic::loongarch_lsx_vssrarni_du_q: 1188 case Intrinsic::loongarch_lasx_xvsrlni_d_q: 1189 case Intrinsic::loongarch_lasx_xvsrani_d_q: 1190 case Intrinsic::loongarch_lasx_xvsrlrni_d_q: 1191 case Intrinsic::loongarch_lasx_xvsrarni_d_q: 1192 case Intrinsic::loongarch_lasx_xvssrlni_d_q: 1193 case Intrinsic::loongarch_lasx_xvssrani_d_q: 1194 case Intrinsic::loongarch_lasx_xvssrlni_du_q: 1195 case Intrinsic::loongarch_lasx_xvssrani_du_q: 1196 case Intrinsic::loongarch_lasx_xvssrlrni_d_q: 1197 case Intrinsic::loongarch_lasx_xvssrarni_d_q: 1198 case Intrinsic::loongarch_lasx_xvssrlrni_du_q: 1199 case Intrinsic::loongarch_lasx_xvssrarni_du_q: 1200 return checkIntrinsicImmArg<7>(Op, 3, DAG); 1201 case Intrinsic::loongarch_lsx_vnori_b: 1202 case Intrinsic::loongarch_lsx_vshuf4i_b: 1203 case Intrinsic::loongarch_lsx_vshuf4i_h: 1204 case Intrinsic::loongarch_lsx_vshuf4i_w: 1205 case Intrinsic::loongarch_lasx_xvnori_b: 1206 case Intrinsic::loongarch_lasx_xvshuf4i_b: 1207 case Intrinsic::loongarch_lasx_xvshuf4i_h: 1208 case Intrinsic::loongarch_lasx_xvshuf4i_w: 1209 case Intrinsic::loongarch_lasx_xvpermi_d: 1210 return checkIntrinsicImmArg<8>(Op, 2, DAG); 1211 case Intrinsic::loongarch_lsx_vshuf4i_d: 1212 case Intrinsic::loongarch_lsx_vpermi_w: 1213 case Intrinsic::loongarch_lsx_vbitseli_b: 1214 case Intrinsic::loongarch_lsx_vextrins_b: 1215 case Intrinsic::loongarch_lsx_vextrins_h: 1216 case Intrinsic::loongarch_lsx_vextrins_w: 1217 case Intrinsic::loongarch_lsx_vextrins_d: 1218 case Intrinsic::loongarch_lasx_xvshuf4i_d: 1219 case Intrinsic::loongarch_lasx_xvpermi_w: 1220 case Intrinsic::loongarch_lasx_xvpermi_q: 1221 case Intrinsic::loongarch_lasx_xvbitseli_b: 1222 case Intrinsic::loongarch_lasx_xvextrins_b: 1223 case Intrinsic::loongarch_lasx_xvextrins_h: 1224 case Intrinsic::loongarch_lasx_xvextrins_w: 1225 case Intrinsic::loongarch_lasx_xvextrins_d: 1226 return checkIntrinsicImmArg<8>(Op, 3, DAG); 1227 case Intrinsic::loongarch_lsx_vrepli_b: 1228 case Intrinsic::loongarch_lsx_vrepli_h: 1229 case Intrinsic::loongarch_lsx_vrepli_w: 1230 case Intrinsic::loongarch_lsx_vrepli_d: 1231 case Intrinsic::loongarch_lasx_xvrepli_b: 1232 case Intrinsic::loongarch_lasx_xvrepli_h: 1233 case Intrinsic::loongarch_lasx_xvrepli_w: 1234 case Intrinsic::loongarch_lasx_xvrepli_d: 1235 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); 1236 case Intrinsic::loongarch_lsx_vldi: 1237 case Intrinsic::loongarch_lasx_xvldi: 1238 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); 1239 } 1240 } 1241 1242 // Helper function that emits error message for intrinsics with chain and return 1243 // merge values of a UNDEF and the chain. 1244 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, 1245 StringRef ErrorMsg, 1246 SelectionDAG &DAG) { 1247 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); 1248 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, 1249 SDLoc(Op)); 1250 } 1251 1252 SDValue 1253 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 1254 SelectionDAG &DAG) const { 1255 SDLoc DL(Op); 1256 MVT GRLenVT = Subtarget.getGRLenVT(); 1257 EVT VT = Op.getValueType(); 1258 SDValue Chain = Op.getOperand(0); 1259 const StringRef ErrorMsgOOR = "argument out of range"; 1260 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 1261 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 1262 1263 switch (Op.getConstantOperandVal(1)) { 1264 default: 1265 return Op; 1266 case Intrinsic::loongarch_crc_w_b_w: 1267 case Intrinsic::loongarch_crc_w_h_w: 1268 case Intrinsic::loongarch_crc_w_w_w: 1269 case Intrinsic::loongarch_crc_w_d_w: 1270 case Intrinsic::loongarch_crcc_w_b_w: 1271 case Intrinsic::loongarch_crcc_w_h_w: 1272 case Intrinsic::loongarch_crcc_w_w_w: 1273 case Intrinsic::loongarch_crcc_w_d_w: 1274 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG); 1275 case Intrinsic::loongarch_csrrd_w: 1276 case Intrinsic::loongarch_csrrd_d: { 1277 unsigned Imm = Op.getConstantOperandVal(2); 1278 return !isUInt<14>(Imm) 1279 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 1280 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, 1281 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 1282 } 1283 case Intrinsic::loongarch_csrwr_w: 1284 case Intrinsic::loongarch_csrwr_d: { 1285 unsigned Imm = Op.getConstantOperandVal(3); 1286 return !isUInt<14>(Imm) 1287 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 1288 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, 1289 {Chain, Op.getOperand(2), 1290 DAG.getConstant(Imm, DL, GRLenVT)}); 1291 } 1292 case Intrinsic::loongarch_csrxchg_w: 1293 case Intrinsic::loongarch_csrxchg_d: { 1294 unsigned Imm = Op.getConstantOperandVal(4); 1295 return !isUInt<14>(Imm) 1296 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 1297 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, 1298 {Chain, Op.getOperand(2), Op.getOperand(3), 1299 DAG.getConstant(Imm, DL, GRLenVT)}); 1300 } 1301 case Intrinsic::loongarch_iocsrrd_d: { 1302 return DAG.getNode( 1303 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other}, 1304 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))}); 1305 } 1306 #define IOCSRRD_CASE(NAME, NODE) \ 1307 case Intrinsic::loongarch_##NAME: { \ 1308 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \ 1309 {Chain, Op.getOperand(2)}); \ 1310 } 1311 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); 1312 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); 1313 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); 1314 #undef IOCSRRD_CASE 1315 case Intrinsic::loongarch_cpucfg: { 1316 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, 1317 {Chain, Op.getOperand(2)}); 1318 } 1319 case Intrinsic::loongarch_lddir_d: { 1320 unsigned Imm = Op.getConstantOperandVal(3); 1321 return !isUInt<8>(Imm) 1322 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 1323 : Op; 1324 } 1325 case Intrinsic::loongarch_movfcsr2gr: { 1326 if (!Subtarget.hasBasicF()) 1327 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG); 1328 unsigned Imm = Op.getConstantOperandVal(2); 1329 return !isUInt<2>(Imm) 1330 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 1331 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, 1332 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 1333 } 1334 case Intrinsic::loongarch_lsx_vld: 1335 case Intrinsic::loongarch_lsx_vldrepl_b: 1336 case Intrinsic::loongarch_lasx_xvld: 1337 case Intrinsic::loongarch_lasx_xvldrepl_b: 1338 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 1339 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 1340 : SDValue(); 1341 case Intrinsic::loongarch_lsx_vldrepl_h: 1342 case Intrinsic::loongarch_lasx_xvldrepl_h: 1343 return !isShiftedInt<11, 1>( 1344 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 1345 ? emitIntrinsicWithChainErrorMessage( 1346 Op, "argument out of range or not a multiple of 2", DAG) 1347 : SDValue(); 1348 case Intrinsic::loongarch_lsx_vldrepl_w: 1349 case Intrinsic::loongarch_lasx_xvldrepl_w: 1350 return !isShiftedInt<10, 2>( 1351 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 1352 ? emitIntrinsicWithChainErrorMessage( 1353 Op, "argument out of range or not a multiple of 4", DAG) 1354 : SDValue(); 1355 case Intrinsic::loongarch_lsx_vldrepl_d: 1356 case Intrinsic::loongarch_lasx_xvldrepl_d: 1357 return !isShiftedInt<9, 3>( 1358 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 1359 ? emitIntrinsicWithChainErrorMessage( 1360 Op, "argument out of range or not a multiple of 8", DAG) 1361 : SDValue(); 1362 } 1363 } 1364 1365 // Helper function that emits error message for intrinsics with void return 1366 // value and return the chain. 1367 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, 1368 SelectionDAG &DAG) { 1369 1370 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); 1371 return Op.getOperand(0); 1372 } 1373 1374 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, 1375 SelectionDAG &DAG) const { 1376 SDLoc DL(Op); 1377 MVT GRLenVT = Subtarget.getGRLenVT(); 1378 SDValue Chain = Op.getOperand(0); 1379 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1); 1380 SDValue Op2 = Op.getOperand(2); 1381 const StringRef ErrorMsgOOR = "argument out of range"; 1382 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 1383 const StringRef ErrorMsgReqLA32 = "requires loongarch32"; 1384 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 1385 1386 switch (IntrinsicEnum) { 1387 default: 1388 // TODO: Add more Intrinsics. 1389 return SDValue(); 1390 case Intrinsic::loongarch_cacop_d: 1391 case Intrinsic::loongarch_cacop_w: { 1392 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) 1393 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG); 1394 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) 1395 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG); 1396 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) 1397 unsigned Imm1 = Op2->getAsZExtVal(); 1398 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue(); 1399 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2)) 1400 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); 1401 return Op; 1402 } 1403 case Intrinsic::loongarch_dbar: { 1404 unsigned Imm = Op2->getAsZExtVal(); 1405 return !isUInt<15>(Imm) 1406 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1407 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain, 1408 DAG.getConstant(Imm, DL, GRLenVT)); 1409 } 1410 case Intrinsic::loongarch_ibar: { 1411 unsigned Imm = Op2->getAsZExtVal(); 1412 return !isUInt<15>(Imm) 1413 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1414 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain, 1415 DAG.getConstant(Imm, DL, GRLenVT)); 1416 } 1417 case Intrinsic::loongarch_break: { 1418 unsigned Imm = Op2->getAsZExtVal(); 1419 return !isUInt<15>(Imm) 1420 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1421 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain, 1422 DAG.getConstant(Imm, DL, GRLenVT)); 1423 } 1424 case Intrinsic::loongarch_movgr2fcsr: { 1425 if (!Subtarget.hasBasicF()) 1426 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG); 1427 unsigned Imm = Op2->getAsZExtVal(); 1428 return !isUInt<2>(Imm) 1429 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1430 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain, 1431 DAG.getConstant(Imm, DL, GRLenVT), 1432 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, 1433 Op.getOperand(3))); 1434 } 1435 case Intrinsic::loongarch_syscall: { 1436 unsigned Imm = Op2->getAsZExtVal(); 1437 return !isUInt<15>(Imm) 1438 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1439 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain, 1440 DAG.getConstant(Imm, DL, GRLenVT)); 1441 } 1442 #define IOCSRWR_CASE(NAME, NODE) \ 1443 case Intrinsic::loongarch_##NAME: { \ 1444 SDValue Op3 = Op.getOperand(3); \ 1445 return Subtarget.is64Bit() \ 1446 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \ 1447 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ 1448 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \ 1449 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \ 1450 Op3); \ 1451 } 1452 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B); 1453 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H); 1454 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W); 1455 #undef IOCSRWR_CASE 1456 case Intrinsic::loongarch_iocsrwr_d: { 1457 return !Subtarget.is64Bit() 1458 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) 1459 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain, 1460 Op2, 1461 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 1462 Op.getOperand(3))); 1463 } 1464 #define ASRT_LE_GT_CASE(NAME) \ 1465 case Intrinsic::loongarch_##NAME: { \ 1466 return !Subtarget.is64Bit() \ 1467 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \ 1468 : Op; \ 1469 } 1470 ASRT_LE_GT_CASE(asrtle_d) 1471 ASRT_LE_GT_CASE(asrtgt_d) 1472 #undef ASRT_LE_GT_CASE 1473 case Intrinsic::loongarch_ldpte_d: { 1474 unsigned Imm = Op.getConstantOperandVal(3); 1475 return !Subtarget.is64Bit() 1476 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) 1477 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1478 : Op; 1479 } 1480 case Intrinsic::loongarch_lsx_vst: 1481 case Intrinsic::loongarch_lasx_xvst: 1482 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) 1483 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1484 : SDValue(); 1485 case Intrinsic::loongarch_lasx_xvstelm_b: 1486 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1487 !isUInt<5>(Op.getConstantOperandVal(5))) 1488 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1489 : SDValue(); 1490 case Intrinsic::loongarch_lsx_vstelm_b: 1491 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1492 !isUInt<4>(Op.getConstantOperandVal(5))) 1493 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1494 : SDValue(); 1495 case Intrinsic::loongarch_lasx_xvstelm_h: 1496 return (!isShiftedInt<8, 1>( 1497 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1498 !isUInt<4>(Op.getConstantOperandVal(5))) 1499 ? emitIntrinsicErrorMessage( 1500 Op, "argument out of range or not a multiple of 2", DAG) 1501 : SDValue(); 1502 case Intrinsic::loongarch_lsx_vstelm_h: 1503 return (!isShiftedInt<8, 1>( 1504 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1505 !isUInt<3>(Op.getConstantOperandVal(5))) 1506 ? emitIntrinsicErrorMessage( 1507 Op, "argument out of range or not a multiple of 2", DAG) 1508 : SDValue(); 1509 case Intrinsic::loongarch_lasx_xvstelm_w: 1510 return (!isShiftedInt<8, 2>( 1511 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1512 !isUInt<3>(Op.getConstantOperandVal(5))) 1513 ? emitIntrinsicErrorMessage( 1514 Op, "argument out of range or not a multiple of 4", DAG) 1515 : SDValue(); 1516 case Intrinsic::loongarch_lsx_vstelm_w: 1517 return (!isShiftedInt<8, 2>( 1518 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1519 !isUInt<2>(Op.getConstantOperandVal(5))) 1520 ? emitIntrinsicErrorMessage( 1521 Op, "argument out of range or not a multiple of 4", DAG) 1522 : SDValue(); 1523 case Intrinsic::loongarch_lasx_xvstelm_d: 1524 return (!isShiftedInt<8, 3>( 1525 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1526 !isUInt<2>(Op.getConstantOperandVal(5))) 1527 ? emitIntrinsicErrorMessage( 1528 Op, "argument out of range or not a multiple of 8", DAG) 1529 : SDValue(); 1530 case Intrinsic::loongarch_lsx_vstelm_d: 1531 return (!isShiftedInt<8, 3>( 1532 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1533 !isUInt<1>(Op.getConstantOperandVal(5))) 1534 ? emitIntrinsicErrorMessage( 1535 Op, "argument out of range or not a multiple of 8", DAG) 1536 : SDValue(); 1537 } 1538 } 1539 1540 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, 1541 SelectionDAG &DAG) const { 1542 SDLoc DL(Op); 1543 SDValue Lo = Op.getOperand(0); 1544 SDValue Hi = Op.getOperand(1); 1545 SDValue Shamt = Op.getOperand(2); 1546 EVT VT = Lo.getValueType(); 1547 1548 // if Shamt-GRLen < 0: // Shamt < GRLen 1549 // Lo = Lo << Shamt 1550 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt)) 1551 // else: 1552 // Lo = 0 1553 // Hi = Lo << (Shamt-GRLen) 1554 1555 SDValue Zero = DAG.getConstant(0, DL, VT); 1556 SDValue One = DAG.getConstant(1, DL, VT); 1557 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); 1558 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); 1559 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); 1560 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); 1561 1562 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 1563 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 1564 SDValue ShiftRightLo = 1565 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt); 1566 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 1567 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 1568 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen); 1569 1570 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); 1571 1572 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 1573 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1574 1575 SDValue Parts[2] = {Lo, Hi}; 1576 return DAG.getMergeValues(Parts, DL); 1577 } 1578 1579 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, 1580 SelectionDAG &DAG, 1581 bool IsSRA) const { 1582 SDLoc DL(Op); 1583 SDValue Lo = Op.getOperand(0); 1584 SDValue Hi = Op.getOperand(1); 1585 SDValue Shamt = Op.getOperand(2); 1586 EVT VT = Lo.getValueType(); 1587 1588 // SRA expansion: 1589 // if Shamt-GRLen < 0: // Shamt < GRLen 1590 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) 1591 // Hi = Hi >>s Shamt 1592 // else: 1593 // Lo = Hi >>s (Shamt-GRLen); 1594 // Hi = Hi >>s (GRLen-1) 1595 // 1596 // SRL expansion: 1597 // if Shamt-GRLen < 0: // Shamt < GRLen 1598 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) 1599 // Hi = Hi >>u Shamt 1600 // else: 1601 // Lo = Hi >>u (Shamt-GRLen); 1602 // Hi = 0; 1603 1604 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 1605 1606 SDValue Zero = DAG.getConstant(0, DL, VT); 1607 SDValue One = DAG.getConstant(1, DL, VT); 1608 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); 1609 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); 1610 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); 1611 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); 1612 1613 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 1614 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 1615 SDValue ShiftLeftHi = 1616 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt); 1617 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 1618 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 1619 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen); 1620 SDValue HiFalse = 1621 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero; 1622 1623 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); 1624 1625 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 1626 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1627 1628 SDValue Parts[2] = {Lo, Hi}; 1629 return DAG.getMergeValues(Parts, DL); 1630 } 1631 1632 // Returns the opcode of the target-specific SDNode that implements the 32-bit 1633 // form of the given Opcode. 1634 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { 1635 switch (Opcode) { 1636 default: 1637 llvm_unreachable("Unexpected opcode"); 1638 case ISD::SHL: 1639 return LoongArchISD::SLL_W; 1640 case ISD::SRA: 1641 return LoongArchISD::SRA_W; 1642 case ISD::SRL: 1643 return LoongArchISD::SRL_W; 1644 case ISD::ROTR: 1645 return LoongArchISD::ROTR_W; 1646 case ISD::ROTL: 1647 return LoongArchISD::ROTL_W; 1648 case ISD::CTTZ: 1649 return LoongArchISD::CTZ_W; 1650 case ISD::CTLZ: 1651 return LoongArchISD::CLZ_W; 1652 } 1653 } 1654 1655 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG 1656 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would 1657 // otherwise be promoted to i64, making it difficult to select the 1658 // SLL_W/.../*W later one because the fact the operation was originally of 1659 // type i8/i16/i32 is lost. 1660 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, 1661 unsigned ExtOpc = ISD::ANY_EXTEND) { 1662 SDLoc DL(N); 1663 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); 1664 SDValue NewOp0, NewRes; 1665 1666 switch (NumOp) { 1667 default: 1668 llvm_unreachable("Unexpected NumOp"); 1669 case 1: { 1670 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 1671 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0); 1672 break; 1673 } 1674 case 2: { 1675 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 1676 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 1677 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 1678 break; 1679 } 1680 // TODO:Handle more NumOp. 1681 } 1682 1683 // ReplaceNodeResults requires we maintain the same type for the return 1684 // value. 1685 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 1686 } 1687 1688 // Helper function that emits error message for intrinsics with/without chain 1689 // and return a UNDEF or and the chain as the results. 1690 static void emitErrorAndReplaceIntrinsicResults( 1691 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, 1692 StringRef ErrorMsg, bool WithChain = true) { 1693 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); 1694 Results.push_back(DAG.getUNDEF(N->getValueType(0))); 1695 if (!WithChain) 1696 return; 1697 Results.push_back(N->getOperand(0)); 1698 } 1699 1700 template <unsigned N> 1701 static void 1702 replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results, 1703 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, 1704 unsigned ResOp) { 1705 const StringRef ErrorMsgOOR = "argument out of range"; 1706 unsigned Imm = Node->getConstantOperandVal(2); 1707 if (!isUInt<N>(Imm)) { 1708 emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR, 1709 /*WithChain=*/false); 1710 return; 1711 } 1712 SDLoc DL(Node); 1713 SDValue Vec = Node->getOperand(1); 1714 1715 SDValue PickElt = 1716 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec, 1717 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()), 1718 DAG.getValueType(Vec.getValueType().getVectorElementType())); 1719 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0), 1720 PickElt.getValue(0))); 1721 } 1722 1723 static void replaceVecCondBranchResults(SDNode *N, 1724 SmallVectorImpl<SDValue> &Results, 1725 SelectionDAG &DAG, 1726 const LoongArchSubtarget &Subtarget, 1727 unsigned ResOp) { 1728 SDLoc DL(N); 1729 SDValue Vec = N->getOperand(1); 1730 1731 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec); 1732 Results.push_back( 1733 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0))); 1734 } 1735 1736 static void 1737 replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1738 SelectionDAG &DAG, 1739 const LoongArchSubtarget &Subtarget) { 1740 switch (N->getConstantOperandVal(0)) { 1741 default: 1742 llvm_unreachable("Unexpected Intrinsic."); 1743 case Intrinsic::loongarch_lsx_vpickve2gr_b: 1744 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, 1745 LoongArchISD::VPICK_SEXT_ELT); 1746 break; 1747 case Intrinsic::loongarch_lsx_vpickve2gr_h: 1748 case Intrinsic::loongarch_lasx_xvpickve2gr_w: 1749 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, 1750 LoongArchISD::VPICK_SEXT_ELT); 1751 break; 1752 case Intrinsic::loongarch_lsx_vpickve2gr_w: 1753 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, 1754 LoongArchISD::VPICK_SEXT_ELT); 1755 break; 1756 case Intrinsic::loongarch_lsx_vpickve2gr_bu: 1757 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, 1758 LoongArchISD::VPICK_ZEXT_ELT); 1759 break; 1760 case Intrinsic::loongarch_lsx_vpickve2gr_hu: 1761 case Intrinsic::loongarch_lasx_xvpickve2gr_wu: 1762 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, 1763 LoongArchISD::VPICK_ZEXT_ELT); 1764 break; 1765 case Intrinsic::loongarch_lsx_vpickve2gr_wu: 1766 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, 1767 LoongArchISD::VPICK_ZEXT_ELT); 1768 break; 1769 case Intrinsic::loongarch_lsx_bz_b: 1770 case Intrinsic::loongarch_lsx_bz_h: 1771 case Intrinsic::loongarch_lsx_bz_w: 1772 case Intrinsic::loongarch_lsx_bz_d: 1773 case Intrinsic::loongarch_lasx_xbz_b: 1774 case Intrinsic::loongarch_lasx_xbz_h: 1775 case Intrinsic::loongarch_lasx_xbz_w: 1776 case Intrinsic::loongarch_lasx_xbz_d: 1777 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 1778 LoongArchISD::VALL_ZERO); 1779 break; 1780 case Intrinsic::loongarch_lsx_bz_v: 1781 case Intrinsic::loongarch_lasx_xbz_v: 1782 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 1783 LoongArchISD::VANY_ZERO); 1784 break; 1785 case Intrinsic::loongarch_lsx_bnz_b: 1786 case Intrinsic::loongarch_lsx_bnz_h: 1787 case Intrinsic::loongarch_lsx_bnz_w: 1788 case Intrinsic::loongarch_lsx_bnz_d: 1789 case Intrinsic::loongarch_lasx_xbnz_b: 1790 case Intrinsic::loongarch_lasx_xbnz_h: 1791 case Intrinsic::loongarch_lasx_xbnz_w: 1792 case Intrinsic::loongarch_lasx_xbnz_d: 1793 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 1794 LoongArchISD::VALL_NONZERO); 1795 break; 1796 case Intrinsic::loongarch_lsx_bnz_v: 1797 case Intrinsic::loongarch_lasx_xbnz_v: 1798 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 1799 LoongArchISD::VANY_NONZERO); 1800 break; 1801 } 1802 } 1803 1804 void LoongArchTargetLowering::ReplaceNodeResults( 1805 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 1806 SDLoc DL(N); 1807 EVT VT = N->getValueType(0); 1808 switch (N->getOpcode()) { 1809 default: 1810 llvm_unreachable("Don't know how to legalize this operation"); 1811 case ISD::SHL: 1812 case ISD::SRA: 1813 case ISD::SRL: 1814 case ISD::ROTR: 1815 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1816 "Unexpected custom legalisation"); 1817 if (N->getOperand(1).getOpcode() != ISD::Constant) { 1818 Results.push_back(customLegalizeToWOp(N, DAG, 2)); 1819 break; 1820 } 1821 break; 1822 case ISD::ROTL: 1823 ConstantSDNode *CN; 1824 if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) { 1825 Results.push_back(customLegalizeToWOp(N, DAG, 2)); 1826 break; 1827 } 1828 break; 1829 case ISD::FP_TO_SINT: { 1830 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1831 "Unexpected custom legalisation"); 1832 SDValue Src = N->getOperand(0); 1833 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); 1834 if (getTypeAction(*DAG.getContext(), Src.getValueType()) != 1835 TargetLowering::TypeSoftenFloat) { 1836 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src); 1837 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst)); 1838 return; 1839 } 1840 // If the FP type needs to be softened, emit a library call using the 'si' 1841 // version. If we left it to default legalization we'd end up with 'di'. 1842 RTLIB::Libcall LC; 1843 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT); 1844 MakeLibCallOptions CallOptions; 1845 EVT OpVT = Src.getValueType(); 1846 CallOptions.setTypeListBeforeSoften(OpVT, VT, true); 1847 SDValue Chain = SDValue(); 1848 SDValue Result; 1849 std::tie(Result, Chain) = 1850 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain); 1851 Results.push_back(Result); 1852 break; 1853 } 1854 case ISD::BITCAST: { 1855 SDValue Src = N->getOperand(0); 1856 EVT SrcVT = Src.getValueType(); 1857 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && 1858 Subtarget.hasBasicF()) { 1859 SDValue Dst = 1860 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src); 1861 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst)); 1862 } 1863 break; 1864 } 1865 case ISD::FP_TO_UINT: { 1866 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1867 "Unexpected custom legalisation"); 1868 auto &TLI = DAG.getTargetLoweringInfo(); 1869 SDValue Tmp1, Tmp2; 1870 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG); 1871 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1)); 1872 break; 1873 } 1874 case ISD::BSWAP: { 1875 SDValue Src = N->getOperand(0); 1876 assert((VT == MVT::i16 || VT == MVT::i32) && 1877 "Unexpected custom legalization"); 1878 MVT GRLenVT = Subtarget.getGRLenVT(); 1879 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); 1880 SDValue Tmp; 1881 switch (VT.getSizeInBits()) { 1882 default: 1883 llvm_unreachable("Unexpected operand width"); 1884 case 16: 1885 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc); 1886 break; 1887 case 32: 1888 // Only LA64 will get to here due to the size mismatch between VT and 1889 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo. 1890 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc); 1891 break; 1892 } 1893 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); 1894 break; 1895 } 1896 case ISD::BITREVERSE: { 1897 SDValue Src = N->getOperand(0); 1898 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) && 1899 "Unexpected custom legalization"); 1900 MVT GRLenVT = Subtarget.getGRLenVT(); 1901 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); 1902 SDValue Tmp; 1903 switch (VT.getSizeInBits()) { 1904 default: 1905 llvm_unreachable("Unexpected operand width"); 1906 case 8: 1907 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc); 1908 break; 1909 case 32: 1910 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc); 1911 break; 1912 } 1913 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); 1914 break; 1915 } 1916 case ISD::CTLZ: 1917 case ISD::CTTZ: { 1918 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1919 "Unexpected custom legalisation"); 1920 Results.push_back(customLegalizeToWOp(N, DAG, 1)); 1921 break; 1922 } 1923 case ISD::INTRINSIC_W_CHAIN: { 1924 SDValue Chain = N->getOperand(0); 1925 SDValue Op2 = N->getOperand(2); 1926 MVT GRLenVT = Subtarget.getGRLenVT(); 1927 const StringRef ErrorMsgOOR = "argument out of range"; 1928 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 1929 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 1930 1931 switch (N->getConstantOperandVal(1)) { 1932 default: 1933 llvm_unreachable("Unexpected Intrinsic."); 1934 case Intrinsic::loongarch_movfcsr2gr: { 1935 if (!Subtarget.hasBasicF()) { 1936 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF); 1937 return; 1938 } 1939 unsigned Imm = Op2->getAsZExtVal(); 1940 if (!isUInt<2>(Imm)) { 1941 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 1942 return; 1943 } 1944 SDValue MOVFCSR2GRResults = DAG.getNode( 1945 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other}, 1946 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 1947 Results.push_back( 1948 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0))); 1949 Results.push_back(MOVFCSR2GRResults.getValue(1)); 1950 break; 1951 } 1952 #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \ 1953 case Intrinsic::loongarch_##NAME: { \ 1954 SDValue NODE = DAG.getNode( \ 1955 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 1956 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ 1957 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ 1958 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ 1959 Results.push_back(NODE.getValue(1)); \ 1960 break; \ 1961 } 1962 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W) 1963 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W) 1964 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W) 1965 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W) 1966 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W) 1967 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W) 1968 #undef CRC_CASE_EXT_BINARYOP 1969 1970 #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \ 1971 case Intrinsic::loongarch_##NAME: { \ 1972 SDValue NODE = DAG.getNode( \ 1973 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 1974 {Chain, Op2, \ 1975 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ 1976 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ 1977 Results.push_back(NODE.getValue(1)); \ 1978 break; \ 1979 } 1980 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) 1981 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W) 1982 #undef CRC_CASE_EXT_UNARYOP 1983 #define CSR_CASE(ID) \ 1984 case Intrinsic::loongarch_##ID: { \ 1985 if (!Subtarget.is64Bit()) \ 1986 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ 1987 break; \ 1988 } 1989 CSR_CASE(csrrd_d); 1990 CSR_CASE(csrwr_d); 1991 CSR_CASE(csrxchg_d); 1992 CSR_CASE(iocsrrd_d); 1993 #undef CSR_CASE 1994 case Intrinsic::loongarch_csrrd_w: { 1995 unsigned Imm = Op2->getAsZExtVal(); 1996 if (!isUInt<14>(Imm)) { 1997 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 1998 return; 1999 } 2000 SDValue CSRRDResults = 2001 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, 2002 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 2003 Results.push_back( 2004 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0))); 2005 Results.push_back(CSRRDResults.getValue(1)); 2006 break; 2007 } 2008 case Intrinsic::loongarch_csrwr_w: { 2009 unsigned Imm = N->getConstantOperandVal(3); 2010 if (!isUInt<14>(Imm)) { 2011 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 2012 return; 2013 } 2014 SDValue CSRWRResults = 2015 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, 2016 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), 2017 DAG.getConstant(Imm, DL, GRLenVT)}); 2018 Results.push_back( 2019 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0))); 2020 Results.push_back(CSRWRResults.getValue(1)); 2021 break; 2022 } 2023 case Intrinsic::loongarch_csrxchg_w: { 2024 unsigned Imm = N->getConstantOperandVal(4); 2025 if (!isUInt<14>(Imm)) { 2026 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 2027 return; 2028 } 2029 SDValue CSRXCHGResults = DAG.getNode( 2030 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, 2031 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), 2032 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)), 2033 DAG.getConstant(Imm, DL, GRLenVT)}); 2034 Results.push_back( 2035 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0))); 2036 Results.push_back(CSRXCHGResults.getValue(1)); 2037 break; 2038 } 2039 #define IOCSRRD_CASE(NAME, NODE) \ 2040 case Intrinsic::loongarch_##NAME: { \ 2041 SDValue IOCSRRDResults = \ 2042 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 2043 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \ 2044 Results.push_back( \ 2045 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \ 2046 Results.push_back(IOCSRRDResults.getValue(1)); \ 2047 break; \ 2048 } 2049 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); 2050 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); 2051 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); 2052 #undef IOCSRRD_CASE 2053 case Intrinsic::loongarch_cpucfg: { 2054 SDValue CPUCFGResults = 2055 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, 2056 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); 2057 Results.push_back( 2058 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0))); 2059 Results.push_back(CPUCFGResults.getValue(1)); 2060 break; 2061 } 2062 case Intrinsic::loongarch_lddir_d: { 2063 if (!Subtarget.is64Bit()) { 2064 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); 2065 return; 2066 } 2067 break; 2068 } 2069 } 2070 break; 2071 } 2072 case ISD::READ_REGISTER: { 2073 if (Subtarget.is64Bit()) 2074 DAG.getContext()->emitError( 2075 "On LA64, only 64-bit registers can be read."); 2076 else 2077 DAG.getContext()->emitError( 2078 "On LA32, only 32-bit registers can be read."); 2079 Results.push_back(DAG.getUNDEF(VT)); 2080 Results.push_back(N->getOperand(0)); 2081 break; 2082 } 2083 case ISD::INTRINSIC_WO_CHAIN: { 2084 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); 2085 break; 2086 } 2087 } 2088 } 2089 2090 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 2091 TargetLowering::DAGCombinerInfo &DCI, 2092 const LoongArchSubtarget &Subtarget) { 2093 if (DCI.isBeforeLegalizeOps()) 2094 return SDValue(); 2095 2096 SDValue FirstOperand = N->getOperand(0); 2097 SDValue SecondOperand = N->getOperand(1); 2098 unsigned FirstOperandOpc = FirstOperand.getOpcode(); 2099 EVT ValTy = N->getValueType(0); 2100 SDLoc DL(N); 2101 uint64_t lsb, msb; 2102 unsigned SMIdx, SMLen; 2103 ConstantSDNode *CN; 2104 SDValue NewOperand; 2105 MVT GRLenVT = Subtarget.getGRLenVT(); 2106 2107 // Op's second operand must be a shifted mask. 2108 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) || 2109 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen)) 2110 return SDValue(); 2111 2112 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { 2113 // Pattern match BSTRPICK. 2114 // $dst = and ((sra or srl) $src , lsb), (2**len - 1) 2115 // => BSTRPICK $dst, $src, msb, lsb 2116 // where msb = lsb + len - 1 2117 2118 // The second operand of the shift must be an immediate. 2119 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1)))) 2120 return SDValue(); 2121 2122 lsb = CN->getZExtValue(); 2123 2124 // Return if the shifted mask does not start at bit 0 or the sum of its 2125 // length and lsb exceeds the word's size. 2126 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) 2127 return SDValue(); 2128 2129 NewOperand = FirstOperand.getOperand(0); 2130 } else { 2131 // Pattern match BSTRPICK. 2132 // $dst = and $src, (2**len- 1) , if len > 12 2133 // => BSTRPICK $dst, $src, msb, lsb 2134 // where lsb = 0 and msb = len - 1 2135 2136 // If the mask is <= 0xfff, andi can be used instead. 2137 if (CN->getZExtValue() <= 0xfff) 2138 return SDValue(); 2139 2140 // Return if the MSB exceeds. 2141 if (SMIdx + SMLen > ValTy.getSizeInBits()) 2142 return SDValue(); 2143 2144 if (SMIdx > 0) { 2145 // Omit if the constant has more than 2 uses. This a conservative 2146 // decision. Whether it is a win depends on the HW microarchitecture. 2147 // However it should always be better for 1 and 2 uses. 2148 if (CN->use_size() > 2) 2149 return SDValue(); 2150 // Return if the constant can be composed by a single LU12I.W. 2151 if ((CN->getZExtValue() & 0xfff) == 0) 2152 return SDValue(); 2153 // Return if the constand can be composed by a single ADDI with 2154 // the zero register. 2155 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0) 2156 return SDValue(); 2157 } 2158 2159 lsb = SMIdx; 2160 NewOperand = FirstOperand; 2161 } 2162 2163 msb = lsb + SMLen - 1; 2164 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand, 2165 DAG.getConstant(msb, DL, GRLenVT), 2166 DAG.getConstant(lsb, DL, GRLenVT)); 2167 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0) 2168 return NR0; 2169 // Try to optimize to 2170 // bstrpick $Rd, $Rs, msb, lsb 2171 // slli $Rd, $Rd, lsb 2172 return DAG.getNode(ISD::SHL, DL, ValTy, NR0, 2173 DAG.getConstant(lsb, DL, GRLenVT)); 2174 } 2175 2176 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 2177 TargetLowering::DAGCombinerInfo &DCI, 2178 const LoongArchSubtarget &Subtarget) { 2179 if (DCI.isBeforeLegalizeOps()) 2180 return SDValue(); 2181 2182 // $dst = srl (and $src, Mask), Shamt 2183 // => 2184 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt 2185 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1 2186 // 2187 2188 SDValue FirstOperand = N->getOperand(0); 2189 ConstantSDNode *CN; 2190 EVT ValTy = N->getValueType(0); 2191 SDLoc DL(N); 2192 MVT GRLenVT = Subtarget.getGRLenVT(); 2193 unsigned MaskIdx, MaskLen; 2194 uint64_t Shamt; 2195 2196 // The first operand must be an AND and the second operand of the AND must be 2197 // a shifted mask. 2198 if (FirstOperand.getOpcode() != ISD::AND || 2199 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) || 2200 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen)) 2201 return SDValue(); 2202 2203 // The second operand (shift amount) must be an immediate. 2204 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) 2205 return SDValue(); 2206 2207 Shamt = CN->getZExtValue(); 2208 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1) 2209 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, 2210 FirstOperand->getOperand(0), 2211 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 2212 DAG.getConstant(Shamt, DL, GRLenVT)); 2213 2214 return SDValue(); 2215 } 2216 2217 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 2218 TargetLowering::DAGCombinerInfo &DCI, 2219 const LoongArchSubtarget &Subtarget) { 2220 MVT GRLenVT = Subtarget.getGRLenVT(); 2221 EVT ValTy = N->getValueType(0); 2222 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2223 ConstantSDNode *CN0, *CN1; 2224 SDLoc DL(N); 2225 unsigned ValBits = ValTy.getSizeInBits(); 2226 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; 2227 unsigned Shamt; 2228 bool SwapAndRetried = false; 2229 2230 if (DCI.isBeforeLegalizeOps()) 2231 return SDValue(); 2232 2233 if (ValBits != 32 && ValBits != 64) 2234 return SDValue(); 2235 2236 Retry: 2237 // 1st pattern to match BSTRINS: 2238 // R = or (and X, mask0), (and (shl Y, lsb), mask1) 2239 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 2240 // => 2241 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) 2242 if (N0.getOpcode() == ISD::AND && 2243 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 2244 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 2245 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL && 2246 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 2247 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && 2248 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && 2249 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 2250 (Shamt = CN1->getZExtValue()) == MaskIdx0 && 2251 (MaskIdx0 + MaskLen0 <= ValBits)) { 2252 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n"); 2253 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 2254 N1.getOperand(0).getOperand(0), 2255 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 2256 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 2257 } 2258 2259 // 2nd pattern to match BSTRINS: 2260 // R = or (and X, mask0), (shl (and Y, mask1), lsb) 2261 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) 2262 // => 2263 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) 2264 if (N0.getOpcode() == ISD::AND && 2265 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 2266 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 2267 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && 2268 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 2269 (Shamt = CN1->getZExtValue()) == MaskIdx0 && 2270 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 2271 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && 2272 MaskLen0 == MaskLen1 && MaskIdx1 == 0 && 2273 (MaskIdx0 + MaskLen0 <= ValBits)) { 2274 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n"); 2275 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 2276 N1.getOperand(0).getOperand(0), 2277 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 2278 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 2279 } 2280 2281 // 3rd pattern to match BSTRINS: 2282 // R = or (and X, mask0), (and Y, mask1) 2283 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 2284 // => 2285 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb 2286 // where msb = lsb + size - 1 2287 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && 2288 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 2289 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 2290 (MaskIdx0 + MaskLen0 <= 64) && 2291 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) && 2292 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { 2293 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n"); 2294 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 2295 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1, 2296 DAG.getConstant(MaskIdx0, DL, GRLenVT)), 2297 DAG.getConstant(ValBits == 32 2298 ? (MaskIdx0 + (MaskLen0 & 31) - 1) 2299 : (MaskIdx0 + MaskLen0 - 1), 2300 DL, GRLenVT), 2301 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 2302 } 2303 2304 // 4th pattern to match BSTRINS: 2305 // R = or (and X, mask), (shl Y, shamt) 2306 // where mask = (2**shamt - 1) 2307 // => 2308 // R = BSTRINS X, Y, ValBits - 1, shamt 2309 // where ValBits = 32 or 64 2310 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && 2311 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 2312 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) && 2313 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 2314 (Shamt = CN1->getZExtValue()) == MaskLen0 && 2315 (MaskIdx0 + MaskLen0 <= ValBits)) { 2316 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n"); 2317 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 2318 N1.getOperand(0), 2319 DAG.getConstant((ValBits - 1), DL, GRLenVT), 2320 DAG.getConstant(Shamt, DL, GRLenVT)); 2321 } 2322 2323 // 5th pattern to match BSTRINS: 2324 // R = or (and X, mask), const 2325 // where ~mask = (2**size - 1) << lsb, mask & const = 0 2326 // => 2327 // R = BSTRINS X, (const >> lsb), msb, lsb 2328 // where msb = lsb + size - 1 2329 if (N0.getOpcode() == ISD::AND && 2330 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 2331 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 2332 (CN1 = dyn_cast<ConstantSDNode>(N1)) && 2333 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { 2334 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n"); 2335 return DAG.getNode( 2336 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 2337 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), 2338 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 2339 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 2340 } 2341 2342 // 6th pattern. 2343 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten 2344 // by the incoming bits are known to be zero. 2345 // => 2346 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt 2347 // 2348 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th 2349 // pattern is more common than the 1st. So we put the 1st before the 6th in 2350 // order to match as many nodes as possible. 2351 ConstantSDNode *CNMask, *CNShamt; 2352 unsigned MaskIdx, MaskLen; 2353 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && 2354 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 2355 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && 2356 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 2357 CNShamt->getZExtValue() + MaskLen <= ValBits) { 2358 Shamt = CNShamt->getZExtValue(); 2359 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); 2360 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 2361 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n"); 2362 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 2363 N1.getOperand(0).getOperand(0), 2364 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT), 2365 DAG.getConstant(Shamt, DL, GRLenVT)); 2366 } 2367 } 2368 2369 // 7th pattern. 2370 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be 2371 // overwritten by the incoming bits are known to be zero. 2372 // => 2373 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx 2374 // 2375 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd 2376 // before the 7th in order to match as many nodes as possible. 2377 if (N1.getOpcode() == ISD::AND && 2378 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 2379 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && 2380 N1.getOperand(0).getOpcode() == ISD::SHL && 2381 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 2382 CNShamt->getZExtValue() == MaskIdx) { 2383 APInt ShMask(ValBits, CNMask->getZExtValue()); 2384 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 2385 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n"); 2386 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 2387 N1.getOperand(0).getOperand(0), 2388 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 2389 DAG.getConstant(MaskIdx, DL, GRLenVT)); 2390 } 2391 } 2392 2393 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. 2394 if (!SwapAndRetried) { 2395 std::swap(N0, N1); 2396 SwapAndRetried = true; 2397 goto Retry; 2398 } 2399 2400 SwapAndRetried = false; 2401 Retry2: 2402 // 8th pattern. 2403 // a = b | (c & shifted_mask), where all positions in b to be overwritten by 2404 // the incoming bits are known to be zero. 2405 // => 2406 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx 2407 // 2408 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So 2409 // we put it here in order to match as many nodes as possible or generate less 2410 // instructions. 2411 if (N1.getOpcode() == ISD::AND && 2412 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 2413 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) { 2414 APInt ShMask(ValBits, CNMask->getZExtValue()); 2415 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 2416 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n"); 2417 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 2418 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), 2419 N1->getOperand(0), 2420 DAG.getConstant(MaskIdx, DL, GRLenVT)), 2421 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 2422 DAG.getConstant(MaskIdx, DL, GRLenVT)); 2423 } 2424 } 2425 // Swap N0/N1 and retry. 2426 if (!SwapAndRetried) { 2427 std::swap(N0, N1); 2428 SwapAndRetried = true; 2429 goto Retry2; 2430 } 2431 2432 return SDValue(); 2433 } 2434 2435 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. 2436 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, 2437 TargetLowering::DAGCombinerInfo &DCI, 2438 const LoongArchSubtarget &Subtarget) { 2439 if (DCI.isBeforeLegalizeOps()) 2440 return SDValue(); 2441 2442 SDValue Src = N->getOperand(0); 2443 if (Src.getOpcode() != LoongArchISD::REVB_2W) 2444 return SDValue(); 2445 2446 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0), 2447 Src.getOperand(0)); 2448 } 2449 2450 template <unsigned N> 2451 static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, 2452 SelectionDAG &DAG, 2453 const LoongArchSubtarget &Subtarget, 2454 bool IsSigned = false) { 2455 SDLoc DL(Node); 2456 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp)); 2457 // Check the ImmArg. 2458 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || 2459 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { 2460 DAG.getContext()->emitError(Node->getOperationName(0) + 2461 ": argument out of range."); 2462 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT()); 2463 } 2464 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT()); 2465 } 2466 2467 template <unsigned N> 2468 static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, 2469 SelectionDAG &DAG, bool IsSigned = false) { 2470 SDLoc DL(Node); 2471 EVT ResTy = Node->getValueType(0); 2472 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp)); 2473 2474 // Check the ImmArg. 2475 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || 2476 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { 2477 DAG.getContext()->emitError(Node->getOperationName(0) + 2478 ": argument out of range."); 2479 return DAG.getNode(ISD::UNDEF, DL, ResTy); 2480 } 2481 return DAG.getConstant( 2482 APInt(ResTy.getScalarType().getSizeInBits(), 2483 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), 2484 DL, ResTy); 2485 } 2486 2487 static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { 2488 SDLoc DL(Node); 2489 EVT ResTy = Node->getValueType(0); 2490 SDValue Vec = Node->getOperand(2); 2491 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy); 2492 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask); 2493 } 2494 2495 static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { 2496 SDLoc DL(Node); 2497 EVT ResTy = Node->getValueType(0); 2498 SDValue One = DAG.getConstant(1, DL, ResTy); 2499 SDValue Bit = 2500 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG)); 2501 2502 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), 2503 DAG.getNOT(DL, Bit, ResTy)); 2504 } 2505 2506 template <unsigned N> 2507 static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { 2508 SDLoc DL(Node); 2509 EVT ResTy = Node->getValueType(0); 2510 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2)); 2511 // Check the unsigned ImmArg. 2512 if (!isUInt<N>(CImm->getZExtValue())) { 2513 DAG.getContext()->emitError(Node->getOperationName(0) + 2514 ": argument out of range."); 2515 return DAG.getNode(ISD::UNDEF, DL, ResTy); 2516 } 2517 2518 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); 2519 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy); 2520 2521 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask); 2522 } 2523 2524 template <unsigned N> 2525 static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { 2526 SDLoc DL(Node); 2527 EVT ResTy = Node->getValueType(0); 2528 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2)); 2529 // Check the unsigned ImmArg. 2530 if (!isUInt<N>(CImm->getZExtValue())) { 2531 DAG.getContext()->emitError(Node->getOperationName(0) + 2532 ": argument out of range."); 2533 return DAG.getNode(ISD::UNDEF, DL, ResTy); 2534 } 2535 2536 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); 2537 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); 2538 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm); 2539 } 2540 2541 template <unsigned N> 2542 static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { 2543 SDLoc DL(Node); 2544 EVT ResTy = Node->getValueType(0); 2545 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2)); 2546 // Check the unsigned ImmArg. 2547 if (!isUInt<N>(CImm->getZExtValue())) { 2548 DAG.getContext()->emitError(Node->getOperationName(0) + 2549 ": argument out of range."); 2550 return DAG.getNode(ISD::UNDEF, DL, ResTy); 2551 } 2552 2553 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); 2554 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); 2555 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm); 2556 } 2557 2558 static SDValue 2559 performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, 2560 TargetLowering::DAGCombinerInfo &DCI, 2561 const LoongArchSubtarget &Subtarget) { 2562 SDLoc DL(N); 2563 switch (N->getConstantOperandVal(0)) { 2564 default: 2565 break; 2566 case Intrinsic::loongarch_lsx_vadd_b: 2567 case Intrinsic::loongarch_lsx_vadd_h: 2568 case Intrinsic::loongarch_lsx_vadd_w: 2569 case Intrinsic::loongarch_lsx_vadd_d: 2570 case Intrinsic::loongarch_lasx_xvadd_b: 2571 case Intrinsic::loongarch_lasx_xvadd_h: 2572 case Intrinsic::loongarch_lasx_xvadd_w: 2573 case Intrinsic::loongarch_lasx_xvadd_d: 2574 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), 2575 N->getOperand(2)); 2576 case Intrinsic::loongarch_lsx_vaddi_bu: 2577 case Intrinsic::loongarch_lsx_vaddi_hu: 2578 case Intrinsic::loongarch_lsx_vaddi_wu: 2579 case Intrinsic::loongarch_lsx_vaddi_du: 2580 case Intrinsic::loongarch_lasx_xvaddi_bu: 2581 case Intrinsic::loongarch_lasx_xvaddi_hu: 2582 case Intrinsic::loongarch_lasx_xvaddi_wu: 2583 case Intrinsic::loongarch_lasx_xvaddi_du: 2584 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), 2585 lowerVectorSplatImm<5>(N, 2, DAG)); 2586 case Intrinsic::loongarch_lsx_vsub_b: 2587 case Intrinsic::loongarch_lsx_vsub_h: 2588 case Intrinsic::loongarch_lsx_vsub_w: 2589 case Intrinsic::loongarch_lsx_vsub_d: 2590 case Intrinsic::loongarch_lasx_xvsub_b: 2591 case Intrinsic::loongarch_lasx_xvsub_h: 2592 case Intrinsic::loongarch_lasx_xvsub_w: 2593 case Intrinsic::loongarch_lasx_xvsub_d: 2594 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), 2595 N->getOperand(2)); 2596 case Intrinsic::loongarch_lsx_vsubi_bu: 2597 case Intrinsic::loongarch_lsx_vsubi_hu: 2598 case Intrinsic::loongarch_lsx_vsubi_wu: 2599 case Intrinsic::loongarch_lsx_vsubi_du: 2600 case Intrinsic::loongarch_lasx_xvsubi_bu: 2601 case Intrinsic::loongarch_lasx_xvsubi_hu: 2602 case Intrinsic::loongarch_lasx_xvsubi_wu: 2603 case Intrinsic::loongarch_lasx_xvsubi_du: 2604 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), 2605 lowerVectorSplatImm<5>(N, 2, DAG)); 2606 case Intrinsic::loongarch_lsx_vneg_b: 2607 case Intrinsic::loongarch_lsx_vneg_h: 2608 case Intrinsic::loongarch_lsx_vneg_w: 2609 case Intrinsic::loongarch_lsx_vneg_d: 2610 case Intrinsic::loongarch_lasx_xvneg_b: 2611 case Intrinsic::loongarch_lasx_xvneg_h: 2612 case Intrinsic::loongarch_lasx_xvneg_w: 2613 case Intrinsic::loongarch_lasx_xvneg_d: 2614 return DAG.getNode( 2615 ISD::SUB, DL, N->getValueType(0), 2616 DAG.getConstant( 2617 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0, 2618 /*isSigned=*/true), 2619 SDLoc(N), N->getValueType(0)), 2620 N->getOperand(1)); 2621 case Intrinsic::loongarch_lsx_vmax_b: 2622 case Intrinsic::loongarch_lsx_vmax_h: 2623 case Intrinsic::loongarch_lsx_vmax_w: 2624 case Intrinsic::loongarch_lsx_vmax_d: 2625 case Intrinsic::loongarch_lasx_xvmax_b: 2626 case Intrinsic::loongarch_lasx_xvmax_h: 2627 case Intrinsic::loongarch_lasx_xvmax_w: 2628 case Intrinsic::loongarch_lasx_xvmax_d: 2629 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), 2630 N->getOperand(2)); 2631 case Intrinsic::loongarch_lsx_vmax_bu: 2632 case Intrinsic::loongarch_lsx_vmax_hu: 2633 case Intrinsic::loongarch_lsx_vmax_wu: 2634 case Intrinsic::loongarch_lsx_vmax_du: 2635 case Intrinsic::loongarch_lasx_xvmax_bu: 2636 case Intrinsic::loongarch_lasx_xvmax_hu: 2637 case Intrinsic::loongarch_lasx_xvmax_wu: 2638 case Intrinsic::loongarch_lasx_xvmax_du: 2639 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), 2640 N->getOperand(2)); 2641 case Intrinsic::loongarch_lsx_vmaxi_b: 2642 case Intrinsic::loongarch_lsx_vmaxi_h: 2643 case Intrinsic::loongarch_lsx_vmaxi_w: 2644 case Intrinsic::loongarch_lsx_vmaxi_d: 2645 case Intrinsic::loongarch_lasx_xvmaxi_b: 2646 case Intrinsic::loongarch_lasx_xvmaxi_h: 2647 case Intrinsic::loongarch_lasx_xvmaxi_w: 2648 case Intrinsic::loongarch_lasx_xvmaxi_d: 2649 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), 2650 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); 2651 case Intrinsic::loongarch_lsx_vmaxi_bu: 2652 case Intrinsic::loongarch_lsx_vmaxi_hu: 2653 case Intrinsic::loongarch_lsx_vmaxi_wu: 2654 case Intrinsic::loongarch_lsx_vmaxi_du: 2655 case Intrinsic::loongarch_lasx_xvmaxi_bu: 2656 case Intrinsic::loongarch_lasx_xvmaxi_hu: 2657 case Intrinsic::loongarch_lasx_xvmaxi_wu: 2658 case Intrinsic::loongarch_lasx_xvmaxi_du: 2659 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), 2660 lowerVectorSplatImm<5>(N, 2, DAG)); 2661 case Intrinsic::loongarch_lsx_vmin_b: 2662 case Intrinsic::loongarch_lsx_vmin_h: 2663 case Intrinsic::loongarch_lsx_vmin_w: 2664 case Intrinsic::loongarch_lsx_vmin_d: 2665 case Intrinsic::loongarch_lasx_xvmin_b: 2666 case Intrinsic::loongarch_lasx_xvmin_h: 2667 case Intrinsic::loongarch_lasx_xvmin_w: 2668 case Intrinsic::loongarch_lasx_xvmin_d: 2669 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), 2670 N->getOperand(2)); 2671 case Intrinsic::loongarch_lsx_vmin_bu: 2672 case Intrinsic::loongarch_lsx_vmin_hu: 2673 case Intrinsic::loongarch_lsx_vmin_wu: 2674 case Intrinsic::loongarch_lsx_vmin_du: 2675 case Intrinsic::loongarch_lasx_xvmin_bu: 2676 case Intrinsic::loongarch_lasx_xvmin_hu: 2677 case Intrinsic::loongarch_lasx_xvmin_wu: 2678 case Intrinsic::loongarch_lasx_xvmin_du: 2679 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), 2680 N->getOperand(2)); 2681 case Intrinsic::loongarch_lsx_vmini_b: 2682 case Intrinsic::loongarch_lsx_vmini_h: 2683 case Intrinsic::loongarch_lsx_vmini_w: 2684 case Intrinsic::loongarch_lsx_vmini_d: 2685 case Intrinsic::loongarch_lasx_xvmini_b: 2686 case Intrinsic::loongarch_lasx_xvmini_h: 2687 case Intrinsic::loongarch_lasx_xvmini_w: 2688 case Intrinsic::loongarch_lasx_xvmini_d: 2689 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), 2690 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); 2691 case Intrinsic::loongarch_lsx_vmini_bu: 2692 case Intrinsic::loongarch_lsx_vmini_hu: 2693 case Intrinsic::loongarch_lsx_vmini_wu: 2694 case Intrinsic::loongarch_lsx_vmini_du: 2695 case Intrinsic::loongarch_lasx_xvmini_bu: 2696 case Intrinsic::loongarch_lasx_xvmini_hu: 2697 case Intrinsic::loongarch_lasx_xvmini_wu: 2698 case Intrinsic::loongarch_lasx_xvmini_du: 2699 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), 2700 lowerVectorSplatImm<5>(N, 2, DAG)); 2701 case Intrinsic::loongarch_lsx_vmul_b: 2702 case Intrinsic::loongarch_lsx_vmul_h: 2703 case Intrinsic::loongarch_lsx_vmul_w: 2704 case Intrinsic::loongarch_lsx_vmul_d: 2705 case Intrinsic::loongarch_lasx_xvmul_b: 2706 case Intrinsic::loongarch_lasx_xvmul_h: 2707 case Intrinsic::loongarch_lasx_xvmul_w: 2708 case Intrinsic::loongarch_lasx_xvmul_d: 2709 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), 2710 N->getOperand(2)); 2711 case Intrinsic::loongarch_lsx_vmadd_b: 2712 case Intrinsic::loongarch_lsx_vmadd_h: 2713 case Intrinsic::loongarch_lsx_vmadd_w: 2714 case Intrinsic::loongarch_lsx_vmadd_d: 2715 case Intrinsic::loongarch_lasx_xvmadd_b: 2716 case Intrinsic::loongarch_lasx_xvmadd_h: 2717 case Intrinsic::loongarch_lasx_xvmadd_w: 2718 case Intrinsic::loongarch_lasx_xvmadd_d: { 2719 EVT ResTy = N->getValueType(0); 2720 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), 2721 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), 2722 N->getOperand(3))); 2723 } 2724 case Intrinsic::loongarch_lsx_vmsub_b: 2725 case Intrinsic::loongarch_lsx_vmsub_h: 2726 case Intrinsic::loongarch_lsx_vmsub_w: 2727 case Intrinsic::loongarch_lsx_vmsub_d: 2728 case Intrinsic::loongarch_lasx_xvmsub_b: 2729 case Intrinsic::loongarch_lasx_xvmsub_h: 2730 case Intrinsic::loongarch_lasx_xvmsub_w: 2731 case Intrinsic::loongarch_lasx_xvmsub_d: { 2732 EVT ResTy = N->getValueType(0); 2733 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), 2734 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), 2735 N->getOperand(3))); 2736 } 2737 case Intrinsic::loongarch_lsx_vdiv_b: 2738 case Intrinsic::loongarch_lsx_vdiv_h: 2739 case Intrinsic::loongarch_lsx_vdiv_w: 2740 case Intrinsic::loongarch_lsx_vdiv_d: 2741 case Intrinsic::loongarch_lasx_xvdiv_b: 2742 case Intrinsic::loongarch_lasx_xvdiv_h: 2743 case Intrinsic::loongarch_lasx_xvdiv_w: 2744 case Intrinsic::loongarch_lasx_xvdiv_d: 2745 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), 2746 N->getOperand(2)); 2747 case Intrinsic::loongarch_lsx_vdiv_bu: 2748 case Intrinsic::loongarch_lsx_vdiv_hu: 2749 case Intrinsic::loongarch_lsx_vdiv_wu: 2750 case Intrinsic::loongarch_lsx_vdiv_du: 2751 case Intrinsic::loongarch_lasx_xvdiv_bu: 2752 case Intrinsic::loongarch_lasx_xvdiv_hu: 2753 case Intrinsic::loongarch_lasx_xvdiv_wu: 2754 case Intrinsic::loongarch_lasx_xvdiv_du: 2755 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), 2756 N->getOperand(2)); 2757 case Intrinsic::loongarch_lsx_vmod_b: 2758 case Intrinsic::loongarch_lsx_vmod_h: 2759 case Intrinsic::loongarch_lsx_vmod_w: 2760 case Intrinsic::loongarch_lsx_vmod_d: 2761 case Intrinsic::loongarch_lasx_xvmod_b: 2762 case Intrinsic::loongarch_lasx_xvmod_h: 2763 case Intrinsic::loongarch_lasx_xvmod_w: 2764 case Intrinsic::loongarch_lasx_xvmod_d: 2765 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), 2766 N->getOperand(2)); 2767 case Intrinsic::loongarch_lsx_vmod_bu: 2768 case Intrinsic::loongarch_lsx_vmod_hu: 2769 case Intrinsic::loongarch_lsx_vmod_wu: 2770 case Intrinsic::loongarch_lsx_vmod_du: 2771 case Intrinsic::loongarch_lasx_xvmod_bu: 2772 case Intrinsic::loongarch_lasx_xvmod_hu: 2773 case Intrinsic::loongarch_lasx_xvmod_wu: 2774 case Intrinsic::loongarch_lasx_xvmod_du: 2775 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), 2776 N->getOperand(2)); 2777 case Intrinsic::loongarch_lsx_vand_v: 2778 case Intrinsic::loongarch_lasx_xvand_v: 2779 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), 2780 N->getOperand(2)); 2781 case Intrinsic::loongarch_lsx_vor_v: 2782 case Intrinsic::loongarch_lasx_xvor_v: 2783 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), 2784 N->getOperand(2)); 2785 case Intrinsic::loongarch_lsx_vxor_v: 2786 case Intrinsic::loongarch_lasx_xvxor_v: 2787 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), 2788 N->getOperand(2)); 2789 case Intrinsic::loongarch_lsx_vnor_v: 2790 case Intrinsic::loongarch_lasx_xvnor_v: { 2791 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), 2792 N->getOperand(2)); 2793 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2794 } 2795 case Intrinsic::loongarch_lsx_vandi_b: 2796 case Intrinsic::loongarch_lasx_xvandi_b: 2797 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), 2798 lowerVectorSplatImm<8>(N, 2, DAG)); 2799 case Intrinsic::loongarch_lsx_vori_b: 2800 case Intrinsic::loongarch_lasx_xvori_b: 2801 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), 2802 lowerVectorSplatImm<8>(N, 2, DAG)); 2803 case Intrinsic::loongarch_lsx_vxori_b: 2804 case Intrinsic::loongarch_lasx_xvxori_b: 2805 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), 2806 lowerVectorSplatImm<8>(N, 2, DAG)); 2807 case Intrinsic::loongarch_lsx_vsll_b: 2808 case Intrinsic::loongarch_lsx_vsll_h: 2809 case Intrinsic::loongarch_lsx_vsll_w: 2810 case Intrinsic::loongarch_lsx_vsll_d: 2811 case Intrinsic::loongarch_lasx_xvsll_b: 2812 case Intrinsic::loongarch_lasx_xvsll_h: 2813 case Intrinsic::loongarch_lasx_xvsll_w: 2814 case Intrinsic::loongarch_lasx_xvsll_d: 2815 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 2816 truncateVecElts(N, DAG)); 2817 case Intrinsic::loongarch_lsx_vslli_b: 2818 case Intrinsic::loongarch_lasx_xvslli_b: 2819 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 2820 lowerVectorSplatImm<3>(N, 2, DAG)); 2821 case Intrinsic::loongarch_lsx_vslli_h: 2822 case Intrinsic::loongarch_lasx_xvslli_h: 2823 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 2824 lowerVectorSplatImm<4>(N, 2, DAG)); 2825 case Intrinsic::loongarch_lsx_vslli_w: 2826 case Intrinsic::loongarch_lasx_xvslli_w: 2827 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 2828 lowerVectorSplatImm<5>(N, 2, DAG)); 2829 case Intrinsic::loongarch_lsx_vslli_d: 2830 case Intrinsic::loongarch_lasx_xvslli_d: 2831 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 2832 lowerVectorSplatImm<6>(N, 2, DAG)); 2833 case Intrinsic::loongarch_lsx_vsrl_b: 2834 case Intrinsic::loongarch_lsx_vsrl_h: 2835 case Intrinsic::loongarch_lsx_vsrl_w: 2836 case Intrinsic::loongarch_lsx_vsrl_d: 2837 case Intrinsic::loongarch_lasx_xvsrl_b: 2838 case Intrinsic::loongarch_lasx_xvsrl_h: 2839 case Intrinsic::loongarch_lasx_xvsrl_w: 2840 case Intrinsic::loongarch_lasx_xvsrl_d: 2841 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 2842 truncateVecElts(N, DAG)); 2843 case Intrinsic::loongarch_lsx_vsrli_b: 2844 case Intrinsic::loongarch_lasx_xvsrli_b: 2845 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 2846 lowerVectorSplatImm<3>(N, 2, DAG)); 2847 case Intrinsic::loongarch_lsx_vsrli_h: 2848 case Intrinsic::loongarch_lasx_xvsrli_h: 2849 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 2850 lowerVectorSplatImm<4>(N, 2, DAG)); 2851 case Intrinsic::loongarch_lsx_vsrli_w: 2852 case Intrinsic::loongarch_lasx_xvsrli_w: 2853 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 2854 lowerVectorSplatImm<5>(N, 2, DAG)); 2855 case Intrinsic::loongarch_lsx_vsrli_d: 2856 case Intrinsic::loongarch_lasx_xvsrli_d: 2857 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 2858 lowerVectorSplatImm<6>(N, 2, DAG)); 2859 case Intrinsic::loongarch_lsx_vsra_b: 2860 case Intrinsic::loongarch_lsx_vsra_h: 2861 case Intrinsic::loongarch_lsx_vsra_w: 2862 case Intrinsic::loongarch_lsx_vsra_d: 2863 case Intrinsic::loongarch_lasx_xvsra_b: 2864 case Intrinsic::loongarch_lasx_xvsra_h: 2865 case Intrinsic::loongarch_lasx_xvsra_w: 2866 case Intrinsic::loongarch_lasx_xvsra_d: 2867 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 2868 truncateVecElts(N, DAG)); 2869 case Intrinsic::loongarch_lsx_vsrai_b: 2870 case Intrinsic::loongarch_lasx_xvsrai_b: 2871 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 2872 lowerVectorSplatImm<3>(N, 2, DAG)); 2873 case Intrinsic::loongarch_lsx_vsrai_h: 2874 case Intrinsic::loongarch_lasx_xvsrai_h: 2875 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 2876 lowerVectorSplatImm<4>(N, 2, DAG)); 2877 case Intrinsic::loongarch_lsx_vsrai_w: 2878 case Intrinsic::loongarch_lasx_xvsrai_w: 2879 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 2880 lowerVectorSplatImm<5>(N, 2, DAG)); 2881 case Intrinsic::loongarch_lsx_vsrai_d: 2882 case Intrinsic::loongarch_lasx_xvsrai_d: 2883 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 2884 lowerVectorSplatImm<6>(N, 2, DAG)); 2885 case Intrinsic::loongarch_lsx_vclz_b: 2886 case Intrinsic::loongarch_lsx_vclz_h: 2887 case Intrinsic::loongarch_lsx_vclz_w: 2888 case Intrinsic::loongarch_lsx_vclz_d: 2889 case Intrinsic::loongarch_lasx_xvclz_b: 2890 case Intrinsic::loongarch_lasx_xvclz_h: 2891 case Intrinsic::loongarch_lasx_xvclz_w: 2892 case Intrinsic::loongarch_lasx_xvclz_d: 2893 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1)); 2894 case Intrinsic::loongarch_lsx_vpcnt_b: 2895 case Intrinsic::loongarch_lsx_vpcnt_h: 2896 case Intrinsic::loongarch_lsx_vpcnt_w: 2897 case Intrinsic::loongarch_lsx_vpcnt_d: 2898 case Intrinsic::loongarch_lasx_xvpcnt_b: 2899 case Intrinsic::loongarch_lasx_xvpcnt_h: 2900 case Intrinsic::loongarch_lasx_xvpcnt_w: 2901 case Intrinsic::loongarch_lasx_xvpcnt_d: 2902 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); 2903 case Intrinsic::loongarch_lsx_vbitclr_b: 2904 case Intrinsic::loongarch_lsx_vbitclr_h: 2905 case Intrinsic::loongarch_lsx_vbitclr_w: 2906 case Intrinsic::loongarch_lsx_vbitclr_d: 2907 case Intrinsic::loongarch_lasx_xvbitclr_b: 2908 case Intrinsic::loongarch_lasx_xvbitclr_h: 2909 case Intrinsic::loongarch_lasx_xvbitclr_w: 2910 case Intrinsic::loongarch_lasx_xvbitclr_d: 2911 return lowerVectorBitClear(N, DAG); 2912 case Intrinsic::loongarch_lsx_vbitclri_b: 2913 case Intrinsic::loongarch_lasx_xvbitclri_b: 2914 return lowerVectorBitClearImm<3>(N, DAG); 2915 case Intrinsic::loongarch_lsx_vbitclri_h: 2916 case Intrinsic::loongarch_lasx_xvbitclri_h: 2917 return lowerVectorBitClearImm<4>(N, DAG); 2918 case Intrinsic::loongarch_lsx_vbitclri_w: 2919 case Intrinsic::loongarch_lasx_xvbitclri_w: 2920 return lowerVectorBitClearImm<5>(N, DAG); 2921 case Intrinsic::loongarch_lsx_vbitclri_d: 2922 case Intrinsic::loongarch_lasx_xvbitclri_d: 2923 return lowerVectorBitClearImm<6>(N, DAG); 2924 case Intrinsic::loongarch_lsx_vbitset_b: 2925 case Intrinsic::loongarch_lsx_vbitset_h: 2926 case Intrinsic::loongarch_lsx_vbitset_w: 2927 case Intrinsic::loongarch_lsx_vbitset_d: 2928 case Intrinsic::loongarch_lasx_xvbitset_b: 2929 case Intrinsic::loongarch_lasx_xvbitset_h: 2930 case Intrinsic::loongarch_lasx_xvbitset_w: 2931 case Intrinsic::loongarch_lasx_xvbitset_d: { 2932 EVT VecTy = N->getValueType(0); 2933 SDValue One = DAG.getConstant(1, DL, VecTy); 2934 return DAG.getNode( 2935 ISD::OR, DL, VecTy, N->getOperand(1), 2936 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); 2937 } 2938 case Intrinsic::loongarch_lsx_vbitseti_b: 2939 case Intrinsic::loongarch_lasx_xvbitseti_b: 2940 return lowerVectorBitSetImm<3>(N, DAG); 2941 case Intrinsic::loongarch_lsx_vbitseti_h: 2942 case Intrinsic::loongarch_lasx_xvbitseti_h: 2943 return lowerVectorBitSetImm<4>(N, DAG); 2944 case Intrinsic::loongarch_lsx_vbitseti_w: 2945 case Intrinsic::loongarch_lasx_xvbitseti_w: 2946 return lowerVectorBitSetImm<5>(N, DAG); 2947 case Intrinsic::loongarch_lsx_vbitseti_d: 2948 case Intrinsic::loongarch_lasx_xvbitseti_d: 2949 return lowerVectorBitSetImm<6>(N, DAG); 2950 case Intrinsic::loongarch_lsx_vbitrev_b: 2951 case Intrinsic::loongarch_lsx_vbitrev_h: 2952 case Intrinsic::loongarch_lsx_vbitrev_w: 2953 case Intrinsic::loongarch_lsx_vbitrev_d: 2954 case Intrinsic::loongarch_lasx_xvbitrev_b: 2955 case Intrinsic::loongarch_lasx_xvbitrev_h: 2956 case Intrinsic::loongarch_lasx_xvbitrev_w: 2957 case Intrinsic::loongarch_lasx_xvbitrev_d: { 2958 EVT VecTy = N->getValueType(0); 2959 SDValue One = DAG.getConstant(1, DL, VecTy); 2960 return DAG.getNode( 2961 ISD::XOR, DL, VecTy, N->getOperand(1), 2962 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); 2963 } 2964 case Intrinsic::loongarch_lsx_vbitrevi_b: 2965 case Intrinsic::loongarch_lasx_xvbitrevi_b: 2966 return lowerVectorBitRevImm<3>(N, DAG); 2967 case Intrinsic::loongarch_lsx_vbitrevi_h: 2968 case Intrinsic::loongarch_lasx_xvbitrevi_h: 2969 return lowerVectorBitRevImm<4>(N, DAG); 2970 case Intrinsic::loongarch_lsx_vbitrevi_w: 2971 case Intrinsic::loongarch_lasx_xvbitrevi_w: 2972 return lowerVectorBitRevImm<5>(N, DAG); 2973 case Intrinsic::loongarch_lsx_vbitrevi_d: 2974 case Intrinsic::loongarch_lasx_xvbitrevi_d: 2975 return lowerVectorBitRevImm<6>(N, DAG); 2976 case Intrinsic::loongarch_lsx_vfadd_s: 2977 case Intrinsic::loongarch_lsx_vfadd_d: 2978 case Intrinsic::loongarch_lasx_xvfadd_s: 2979 case Intrinsic::loongarch_lasx_xvfadd_d: 2980 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), 2981 N->getOperand(2)); 2982 case Intrinsic::loongarch_lsx_vfsub_s: 2983 case Intrinsic::loongarch_lsx_vfsub_d: 2984 case Intrinsic::loongarch_lasx_xvfsub_s: 2985 case Intrinsic::loongarch_lasx_xvfsub_d: 2986 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), 2987 N->getOperand(2)); 2988 case Intrinsic::loongarch_lsx_vfmul_s: 2989 case Intrinsic::loongarch_lsx_vfmul_d: 2990 case Intrinsic::loongarch_lasx_xvfmul_s: 2991 case Intrinsic::loongarch_lasx_xvfmul_d: 2992 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), 2993 N->getOperand(2)); 2994 case Intrinsic::loongarch_lsx_vfdiv_s: 2995 case Intrinsic::loongarch_lsx_vfdiv_d: 2996 case Intrinsic::loongarch_lasx_xvfdiv_s: 2997 case Intrinsic::loongarch_lasx_xvfdiv_d: 2998 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), 2999 N->getOperand(2)); 3000 case Intrinsic::loongarch_lsx_vfmadd_s: 3001 case Intrinsic::loongarch_lsx_vfmadd_d: 3002 case Intrinsic::loongarch_lasx_xvfmadd_s: 3003 case Intrinsic::loongarch_lasx_xvfmadd_d: 3004 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), 3005 N->getOperand(2), N->getOperand(3)); 3006 case Intrinsic::loongarch_lsx_vinsgr2vr_b: 3007 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 3008 N->getOperand(1), N->getOperand(2), 3009 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); 3010 case Intrinsic::loongarch_lsx_vinsgr2vr_h: 3011 case Intrinsic::loongarch_lasx_xvinsgr2vr_w: 3012 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 3013 N->getOperand(1), N->getOperand(2), 3014 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); 3015 case Intrinsic::loongarch_lsx_vinsgr2vr_w: 3016 case Intrinsic::loongarch_lasx_xvinsgr2vr_d: 3017 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 3018 N->getOperand(1), N->getOperand(2), 3019 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); 3020 case Intrinsic::loongarch_lsx_vinsgr2vr_d: 3021 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 3022 N->getOperand(1), N->getOperand(2), 3023 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget)); 3024 case Intrinsic::loongarch_lsx_vreplgr2vr_b: 3025 case Intrinsic::loongarch_lsx_vreplgr2vr_h: 3026 case Intrinsic::loongarch_lsx_vreplgr2vr_w: 3027 case Intrinsic::loongarch_lsx_vreplgr2vr_d: 3028 case Intrinsic::loongarch_lasx_xvreplgr2vr_b: 3029 case Intrinsic::loongarch_lasx_xvreplgr2vr_h: 3030 case Intrinsic::loongarch_lasx_xvreplgr2vr_w: 3031 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: { 3032 EVT ResTy = N->getValueType(0); 3033 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1)); 3034 return DAG.getBuildVector(ResTy, DL, Ops); 3035 } 3036 case Intrinsic::loongarch_lsx_vreplve_b: 3037 case Intrinsic::loongarch_lsx_vreplve_h: 3038 case Intrinsic::loongarch_lsx_vreplve_w: 3039 case Intrinsic::loongarch_lsx_vreplve_d: 3040 case Intrinsic::loongarch_lasx_xvreplve_b: 3041 case Intrinsic::loongarch_lasx_xvreplve_h: 3042 case Intrinsic::loongarch_lasx_xvreplve_w: 3043 case Intrinsic::loongarch_lasx_xvreplve_d: 3044 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), 3045 N->getOperand(1), 3046 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), 3047 N->getOperand(2))); 3048 } 3049 return SDValue(); 3050 } 3051 3052 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, 3053 DAGCombinerInfo &DCI) const { 3054 SelectionDAG &DAG = DCI.DAG; 3055 switch (N->getOpcode()) { 3056 default: 3057 break; 3058 case ISD::AND: 3059 return performANDCombine(N, DAG, DCI, Subtarget); 3060 case ISD::OR: 3061 return performORCombine(N, DAG, DCI, Subtarget); 3062 case ISD::SRL: 3063 return performSRLCombine(N, DAG, DCI, Subtarget); 3064 case LoongArchISD::BITREV_W: 3065 return performBITREV_WCombine(N, DAG, DCI, Subtarget); 3066 case ISD::INTRINSIC_WO_CHAIN: 3067 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); 3068 } 3069 return SDValue(); 3070 } 3071 3072 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, 3073 MachineBasicBlock *MBB) { 3074 if (!ZeroDivCheck) 3075 return MBB; 3076 3077 // Build instructions: 3078 // MBB: 3079 // div(or mod) $dst, $dividend, $divisor 3080 // bnez $divisor, SinkMBB 3081 // BreakMBB: 3082 // break 7 // BRK_DIVZERO 3083 // SinkMBB: 3084 // fallthrough 3085 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 3086 MachineFunction::iterator It = ++MBB->getIterator(); 3087 MachineFunction *MF = MBB->getParent(); 3088 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3089 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3090 MF->insert(It, BreakMBB); 3091 MF->insert(It, SinkMBB); 3092 3093 // Transfer the remainder of MBB and its successor edges to SinkMBB. 3094 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end()); 3095 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 3096 3097 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); 3098 DebugLoc DL = MI.getDebugLoc(); 3099 MachineOperand &Divisor = MI.getOperand(2); 3100 Register DivisorReg = Divisor.getReg(); 3101 3102 // MBB: 3103 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ)) 3104 .addReg(DivisorReg, getKillRegState(Divisor.isKill())) 3105 .addMBB(SinkMBB); 3106 MBB->addSuccessor(BreakMBB); 3107 MBB->addSuccessor(SinkMBB); 3108 3109 // BreakMBB: 3110 // See linux header file arch/loongarch/include/uapi/asm/break.h for the 3111 // definition of BRK_DIVZERO. 3112 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/); 3113 BreakMBB->addSuccessor(SinkMBB); 3114 3115 // Clear Divisor's kill flag. 3116 Divisor.setIsKill(false); 3117 3118 return SinkMBB; 3119 } 3120 3121 static MachineBasicBlock * 3122 emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, 3123 const LoongArchSubtarget &Subtarget) { 3124 unsigned CondOpc; 3125 switch (MI.getOpcode()) { 3126 default: 3127 llvm_unreachable("Unexpected opcode"); 3128 case LoongArch::PseudoVBZ: 3129 CondOpc = LoongArch::VSETEQZ_V; 3130 break; 3131 case LoongArch::PseudoVBZ_B: 3132 CondOpc = LoongArch::VSETANYEQZ_B; 3133 break; 3134 case LoongArch::PseudoVBZ_H: 3135 CondOpc = LoongArch::VSETANYEQZ_H; 3136 break; 3137 case LoongArch::PseudoVBZ_W: 3138 CondOpc = LoongArch::VSETANYEQZ_W; 3139 break; 3140 case LoongArch::PseudoVBZ_D: 3141 CondOpc = LoongArch::VSETANYEQZ_D; 3142 break; 3143 case LoongArch::PseudoVBNZ: 3144 CondOpc = LoongArch::VSETNEZ_V; 3145 break; 3146 case LoongArch::PseudoVBNZ_B: 3147 CondOpc = LoongArch::VSETALLNEZ_B; 3148 break; 3149 case LoongArch::PseudoVBNZ_H: 3150 CondOpc = LoongArch::VSETALLNEZ_H; 3151 break; 3152 case LoongArch::PseudoVBNZ_W: 3153 CondOpc = LoongArch::VSETALLNEZ_W; 3154 break; 3155 case LoongArch::PseudoVBNZ_D: 3156 CondOpc = LoongArch::VSETALLNEZ_D; 3157 break; 3158 case LoongArch::PseudoXVBZ: 3159 CondOpc = LoongArch::XVSETEQZ_V; 3160 break; 3161 case LoongArch::PseudoXVBZ_B: 3162 CondOpc = LoongArch::XVSETANYEQZ_B; 3163 break; 3164 case LoongArch::PseudoXVBZ_H: 3165 CondOpc = LoongArch::XVSETANYEQZ_H; 3166 break; 3167 case LoongArch::PseudoXVBZ_W: 3168 CondOpc = LoongArch::XVSETANYEQZ_W; 3169 break; 3170 case LoongArch::PseudoXVBZ_D: 3171 CondOpc = LoongArch::XVSETANYEQZ_D; 3172 break; 3173 case LoongArch::PseudoXVBNZ: 3174 CondOpc = LoongArch::XVSETNEZ_V; 3175 break; 3176 case LoongArch::PseudoXVBNZ_B: 3177 CondOpc = LoongArch::XVSETALLNEZ_B; 3178 break; 3179 case LoongArch::PseudoXVBNZ_H: 3180 CondOpc = LoongArch::XVSETALLNEZ_H; 3181 break; 3182 case LoongArch::PseudoXVBNZ_W: 3183 CondOpc = LoongArch::XVSETALLNEZ_W; 3184 break; 3185 case LoongArch::PseudoXVBNZ_D: 3186 CondOpc = LoongArch::XVSETALLNEZ_D; 3187 break; 3188 } 3189 3190 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3191 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3192 DebugLoc DL = MI.getDebugLoc(); 3193 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 3194 MachineFunction::iterator It = ++BB->getIterator(); 3195 3196 MachineFunction *F = BB->getParent(); 3197 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB); 3198 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB); 3199 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB); 3200 3201 F->insert(It, FalseBB); 3202 F->insert(It, TrueBB); 3203 F->insert(It, SinkBB); 3204 3205 // Transfer the remainder of MBB and its successor edges to Sink. 3206 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end()); 3207 SinkBB->transferSuccessorsAndUpdatePHIs(BB); 3208 3209 // Insert the real instruction to BB. 3210 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass); 3211 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg()); 3212 3213 // Insert branch. 3214 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB); 3215 BB->addSuccessor(FalseBB); 3216 BB->addSuccessor(TrueBB); 3217 3218 // FalseBB. 3219 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); 3220 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1) 3221 .addReg(LoongArch::R0) 3222 .addImm(0); 3223 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB); 3224 FalseBB->addSuccessor(SinkBB); 3225 3226 // TrueBB. 3227 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); 3228 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2) 3229 .addReg(LoongArch::R0) 3230 .addImm(1); 3231 TrueBB->addSuccessor(SinkBB); 3232 3233 // SinkBB: merge the results. 3234 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI), 3235 MI.getOperand(0).getReg()) 3236 .addReg(RD1) 3237 .addMBB(FalseBB) 3238 .addReg(RD2) 3239 .addMBB(TrueBB); 3240 3241 // The pseudo instruction is gone now. 3242 MI.eraseFromParent(); 3243 return SinkBB; 3244 } 3245 3246 static MachineBasicBlock * 3247 emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, 3248 const LoongArchSubtarget &Subtarget) { 3249 unsigned InsOp; 3250 unsigned HalfSize; 3251 switch (MI.getOpcode()) { 3252 default: 3253 llvm_unreachable("Unexpected opcode"); 3254 case LoongArch::PseudoXVINSGR2VR_B: 3255 HalfSize = 16; 3256 InsOp = LoongArch::VINSGR2VR_B; 3257 break; 3258 case LoongArch::PseudoXVINSGR2VR_H: 3259 HalfSize = 8; 3260 InsOp = LoongArch::VINSGR2VR_H; 3261 break; 3262 } 3263 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3264 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; 3265 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; 3266 DebugLoc DL = MI.getDebugLoc(); 3267 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 3268 // XDst = vector_insert XSrc, Elt, Idx 3269 Register XDst = MI.getOperand(0).getReg(); 3270 Register XSrc = MI.getOperand(1).getReg(); 3271 Register Elt = MI.getOperand(2).getReg(); 3272 unsigned Idx = MI.getOperand(3).getImm(); 3273 3274 Register ScratchReg1 = XSrc; 3275 if (Idx >= HalfSize) { 3276 ScratchReg1 = MRI.createVirtualRegister(RC); 3277 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1) 3278 .addReg(XSrc) 3279 .addReg(XSrc) 3280 .addImm(1); 3281 } 3282 3283 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC); 3284 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC); 3285 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1) 3286 .addReg(ScratchReg1, 0, LoongArch::sub_128); 3287 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2) 3288 .addReg(ScratchSubReg1) 3289 .addReg(Elt) 3290 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx); 3291 3292 Register ScratchReg2 = XDst; 3293 if (Idx >= HalfSize) 3294 ScratchReg2 = MRI.createVirtualRegister(RC); 3295 3296 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2) 3297 .addImm(0) 3298 .addReg(ScratchSubReg2) 3299 .addImm(LoongArch::sub_128); 3300 3301 if (Idx >= HalfSize) 3302 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst) 3303 .addReg(XSrc) 3304 .addReg(ScratchReg2) 3305 .addImm(2); 3306 3307 MI.eraseFromParent(); 3308 return BB; 3309 } 3310 3311 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( 3312 MachineInstr &MI, MachineBasicBlock *BB) const { 3313 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3314 DebugLoc DL = MI.getDebugLoc(); 3315 3316 switch (MI.getOpcode()) { 3317 default: 3318 llvm_unreachable("Unexpected instr type to insert"); 3319 case LoongArch::DIV_W: 3320 case LoongArch::DIV_WU: 3321 case LoongArch::MOD_W: 3322 case LoongArch::MOD_WU: 3323 case LoongArch::DIV_D: 3324 case LoongArch::DIV_DU: 3325 case LoongArch::MOD_D: 3326 case LoongArch::MOD_DU: 3327 return insertDivByZeroTrap(MI, BB); 3328 break; 3329 case LoongArch::WRFCSR: { 3330 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR), 3331 LoongArch::FCSR0 + MI.getOperand(0).getImm()) 3332 .addReg(MI.getOperand(1).getReg()); 3333 MI.eraseFromParent(); 3334 return BB; 3335 } 3336 case LoongArch::RDFCSR: { 3337 MachineInstr *ReadFCSR = 3338 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR), 3339 MI.getOperand(0).getReg()) 3340 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm()); 3341 ReadFCSR->getOperand(1).setIsUndef(); 3342 MI.eraseFromParent(); 3343 return BB; 3344 } 3345 case LoongArch::PseudoVBZ: 3346 case LoongArch::PseudoVBZ_B: 3347 case LoongArch::PseudoVBZ_H: 3348 case LoongArch::PseudoVBZ_W: 3349 case LoongArch::PseudoVBZ_D: 3350 case LoongArch::PseudoVBNZ: 3351 case LoongArch::PseudoVBNZ_B: 3352 case LoongArch::PseudoVBNZ_H: 3353 case LoongArch::PseudoVBNZ_W: 3354 case LoongArch::PseudoVBNZ_D: 3355 case LoongArch::PseudoXVBZ: 3356 case LoongArch::PseudoXVBZ_B: 3357 case LoongArch::PseudoXVBZ_H: 3358 case LoongArch::PseudoXVBZ_W: 3359 case LoongArch::PseudoXVBZ_D: 3360 case LoongArch::PseudoXVBNZ: 3361 case LoongArch::PseudoXVBNZ_B: 3362 case LoongArch::PseudoXVBNZ_H: 3363 case LoongArch::PseudoXVBNZ_W: 3364 case LoongArch::PseudoXVBNZ_D: 3365 return emitVecCondBranchPseudo(MI, BB, Subtarget); 3366 case LoongArch::PseudoXVINSGR2VR_B: 3367 case LoongArch::PseudoXVINSGR2VR_H: 3368 return emitPseudoXVINSGR2VR(MI, BB, Subtarget); 3369 } 3370 } 3371 3372 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( 3373 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 3374 unsigned *Fast) const { 3375 if (!Subtarget.hasUAL()) 3376 return false; 3377 3378 // TODO: set reasonable speed number. 3379 if (Fast) 3380 *Fast = 1; 3381 return true; 3382 } 3383 3384 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { 3385 switch ((LoongArchISD::NodeType)Opcode) { 3386 case LoongArchISD::FIRST_NUMBER: 3387 break; 3388 3389 #define NODE_NAME_CASE(node) \ 3390 case LoongArchISD::node: \ 3391 return "LoongArchISD::" #node; 3392 3393 // TODO: Add more target-dependent nodes later. 3394 NODE_NAME_CASE(CALL) 3395 NODE_NAME_CASE(CALL_MEDIUM) 3396 NODE_NAME_CASE(CALL_LARGE) 3397 NODE_NAME_CASE(RET) 3398 NODE_NAME_CASE(TAIL) 3399 NODE_NAME_CASE(TAIL_MEDIUM) 3400 NODE_NAME_CASE(TAIL_LARGE) 3401 NODE_NAME_CASE(SLL_W) 3402 NODE_NAME_CASE(SRA_W) 3403 NODE_NAME_CASE(SRL_W) 3404 NODE_NAME_CASE(BSTRINS) 3405 NODE_NAME_CASE(BSTRPICK) 3406 NODE_NAME_CASE(MOVGR2FR_W_LA64) 3407 NODE_NAME_CASE(MOVFR2GR_S_LA64) 3408 NODE_NAME_CASE(FTINT) 3409 NODE_NAME_CASE(REVB_2H) 3410 NODE_NAME_CASE(REVB_2W) 3411 NODE_NAME_CASE(BITREV_4B) 3412 NODE_NAME_CASE(BITREV_W) 3413 NODE_NAME_CASE(ROTR_W) 3414 NODE_NAME_CASE(ROTL_W) 3415 NODE_NAME_CASE(CLZ_W) 3416 NODE_NAME_CASE(CTZ_W) 3417 NODE_NAME_CASE(DBAR) 3418 NODE_NAME_CASE(IBAR) 3419 NODE_NAME_CASE(BREAK) 3420 NODE_NAME_CASE(SYSCALL) 3421 NODE_NAME_CASE(CRC_W_B_W) 3422 NODE_NAME_CASE(CRC_W_H_W) 3423 NODE_NAME_CASE(CRC_W_W_W) 3424 NODE_NAME_CASE(CRC_W_D_W) 3425 NODE_NAME_CASE(CRCC_W_B_W) 3426 NODE_NAME_CASE(CRCC_W_H_W) 3427 NODE_NAME_CASE(CRCC_W_W_W) 3428 NODE_NAME_CASE(CRCC_W_D_W) 3429 NODE_NAME_CASE(CSRRD) 3430 NODE_NAME_CASE(CSRWR) 3431 NODE_NAME_CASE(CSRXCHG) 3432 NODE_NAME_CASE(IOCSRRD_B) 3433 NODE_NAME_CASE(IOCSRRD_H) 3434 NODE_NAME_CASE(IOCSRRD_W) 3435 NODE_NAME_CASE(IOCSRRD_D) 3436 NODE_NAME_CASE(IOCSRWR_B) 3437 NODE_NAME_CASE(IOCSRWR_H) 3438 NODE_NAME_CASE(IOCSRWR_W) 3439 NODE_NAME_CASE(IOCSRWR_D) 3440 NODE_NAME_CASE(CPUCFG) 3441 NODE_NAME_CASE(MOVGR2FCSR) 3442 NODE_NAME_CASE(MOVFCSR2GR) 3443 NODE_NAME_CASE(CACOP_D) 3444 NODE_NAME_CASE(CACOP_W) 3445 NODE_NAME_CASE(VPICK_SEXT_ELT) 3446 NODE_NAME_CASE(VPICK_ZEXT_ELT) 3447 NODE_NAME_CASE(VREPLVE) 3448 NODE_NAME_CASE(VALL_ZERO) 3449 NODE_NAME_CASE(VANY_ZERO) 3450 NODE_NAME_CASE(VALL_NONZERO) 3451 NODE_NAME_CASE(VANY_NONZERO) 3452 } 3453 #undef NODE_NAME_CASE 3454 return nullptr; 3455 } 3456 3457 //===----------------------------------------------------------------------===// 3458 // Calling Convention Implementation 3459 //===----------------------------------------------------------------------===// 3460 3461 // Eight general-purpose registers a0-a7 used for passing integer arguments, 3462 // with a0-a1 reused to return values. Generally, the GPRs are used to pass 3463 // fixed-point arguments, and floating-point arguments when no FPR is available 3464 // or with soft float ABI. 3465 const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, 3466 LoongArch::R7, LoongArch::R8, LoongArch::R9, 3467 LoongArch::R10, LoongArch::R11}; 3468 // Eight floating-point registers fa0-fa7 used for passing floating-point 3469 // arguments, and fa0-fa1 are also used to return values. 3470 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, 3471 LoongArch::F3, LoongArch::F4, LoongArch::F5, 3472 LoongArch::F6, LoongArch::F7}; 3473 // FPR32 and FPR64 alias each other. 3474 const MCPhysReg ArgFPR64s[] = { 3475 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, 3476 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; 3477 3478 const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, 3479 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, 3480 LoongArch::VR6, LoongArch::VR7}; 3481 3482 const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, 3483 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, 3484 LoongArch::XR6, LoongArch::XR7}; 3485 3486 // Pass a 2*GRLen argument that has been split into two GRLen values through 3487 // registers or the stack as necessary. 3488 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, 3489 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, 3490 unsigned ValNo2, MVT ValVT2, MVT LocVT2, 3491 ISD::ArgFlagsTy ArgFlags2) { 3492 unsigned GRLenInBytes = GRLen / 8; 3493 if (Register Reg = State.AllocateReg(ArgGPRs)) { 3494 // At least one half can be passed via register. 3495 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 3496 VA1.getLocVT(), CCValAssign::Full)); 3497 } else { 3498 // Both halves must be passed on the stack, with proper alignment. 3499 Align StackAlign = 3500 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 3501 State.addLoc( 3502 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 3503 State.AllocateStack(GRLenInBytes, StackAlign), 3504 VA1.getLocVT(), CCValAssign::Full)); 3505 State.addLoc(CCValAssign::getMem( 3506 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), 3507 LocVT2, CCValAssign::Full)); 3508 return false; 3509 } 3510 if (Register Reg = State.AllocateReg(ArgGPRs)) { 3511 // The second half can also be passed via register. 3512 State.addLoc( 3513 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 3514 } else { 3515 // The second half is passed via the stack, without additional alignment. 3516 State.addLoc(CCValAssign::getMem( 3517 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), 3518 LocVT2, CCValAssign::Full)); 3519 } 3520 return false; 3521 } 3522 3523 // Implements the LoongArch calling convention. Returns true upon failure. 3524 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, 3525 unsigned ValNo, MVT ValVT, 3526 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 3527 CCState &State, bool IsFixed, bool IsRet, 3528 Type *OrigTy) { 3529 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits(); 3530 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen"); 3531 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64; 3532 MVT LocVT = ValVT; 3533 3534 // Any return value split into more than two values can't be returned 3535 // directly. 3536 if (IsRet && ValNo > 1) 3537 return true; 3538 3539 // If passing a variadic argument, or if no FPR is available. 3540 bool UseGPRForFloat = true; 3541 3542 switch (ABI) { 3543 default: 3544 llvm_unreachable("Unexpected ABI"); 3545 case LoongArchABI::ABI_ILP32S: 3546 case LoongArchABI::ABI_ILP32F: 3547 case LoongArchABI::ABI_LP64F: 3548 report_fatal_error("Unimplemented ABI"); 3549 break; 3550 case LoongArchABI::ABI_ILP32D: 3551 case LoongArchABI::ABI_LP64D: 3552 UseGPRForFloat = !IsFixed; 3553 break; 3554 case LoongArchABI::ABI_LP64S: 3555 break; 3556 } 3557 3558 // FPR32 and FPR64 alias each other. 3559 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) 3560 UseGPRForFloat = true; 3561 3562 if (UseGPRForFloat && ValVT == MVT::f32) { 3563 LocVT = GRLenVT; 3564 LocInfo = CCValAssign::BCvt; 3565 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) { 3566 LocVT = MVT::i64; 3567 LocInfo = CCValAssign::BCvt; 3568 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) { 3569 // TODO: Handle passing f64 on LA32 with D feature. 3570 report_fatal_error("Passing f64 with GPR on LA32 is undefined"); 3571 } 3572 3573 // If this is a variadic argument, the LoongArch calling convention requires 3574 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8 3575 // byte alignment. An aligned register should be used regardless of whether 3576 // the original argument was split during legalisation or not. The argument 3577 // will not be passed by registers if the original type is larger than 3578 // 2*GRLen, so the register alignment rule does not apply. 3579 unsigned TwoGRLenInBytes = (2 * GRLen) / 8; 3580 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes && 3581 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) { 3582 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 3583 // Skip 'odd' register if necessary. 3584 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) 3585 State.AllocateReg(ArgGPRs); 3586 } 3587 3588 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 3589 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 3590 State.getPendingArgFlags(); 3591 3592 assert(PendingLocs.size() == PendingArgFlags.size() && 3593 "PendingLocs and PendingArgFlags out of sync"); 3594 3595 // Split arguments might be passed indirectly, so keep track of the pending 3596 // values. 3597 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 3598 LocVT = GRLenVT; 3599 LocInfo = CCValAssign::Indirect; 3600 PendingLocs.push_back( 3601 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 3602 PendingArgFlags.push_back(ArgFlags); 3603 if (!ArgFlags.isSplitEnd()) { 3604 return false; 3605 } 3606 } 3607 3608 // If the split argument only had two elements, it should be passed directly 3609 // in registers or on the stack. 3610 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && 3611 PendingLocs.size() <= 2) { 3612 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 3613 // Apply the normal calling convention rules to the first half of the 3614 // split argument. 3615 CCValAssign VA = PendingLocs[0]; 3616 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 3617 PendingLocs.clear(); 3618 PendingArgFlags.clear(); 3619 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT, 3620 ArgFlags); 3621 } 3622 3623 // Allocate to a register if possible, or else a stack slot. 3624 Register Reg; 3625 unsigned StoreSizeBytes = GRLen / 8; 3626 Align StackAlign = Align(GRLen / 8); 3627 3628 if (ValVT == MVT::f32 && !UseGPRForFloat) 3629 Reg = State.AllocateReg(ArgFPR32s); 3630 else if (ValVT == MVT::f64 && !UseGPRForFloat) 3631 Reg = State.AllocateReg(ArgFPR64s); 3632 else if (ValVT.is128BitVector()) 3633 Reg = State.AllocateReg(ArgVRs); 3634 else if (ValVT.is256BitVector()) 3635 Reg = State.AllocateReg(ArgXRs); 3636 else 3637 Reg = State.AllocateReg(ArgGPRs); 3638 3639 unsigned StackOffset = 3640 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); 3641 3642 // If we reach this point and PendingLocs is non-empty, we must be at the 3643 // end of a split argument that must be passed indirectly. 3644 if (!PendingLocs.empty()) { 3645 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 3646 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 3647 for (auto &It : PendingLocs) { 3648 if (Reg) 3649 It.convertToReg(Reg); 3650 else 3651 It.convertToMem(StackOffset); 3652 State.addLoc(It); 3653 } 3654 PendingLocs.clear(); 3655 PendingArgFlags.clear(); 3656 return false; 3657 } 3658 assert((!UseGPRForFloat || LocVT == GRLenVT) && 3659 "Expected an GRLenVT at this stage"); 3660 3661 if (Reg) { 3662 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3663 return false; 3664 } 3665 3666 // When a floating-point value is passed on the stack, no bit-cast is needed. 3667 if (ValVT.isFloatingPoint()) { 3668 LocVT = ValVT; 3669 LocInfo = CCValAssign::Full; 3670 } 3671 3672 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 3673 return false; 3674 } 3675 3676 void LoongArchTargetLowering::analyzeInputArgs( 3677 MachineFunction &MF, CCState &CCInfo, 3678 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 3679 LoongArchCCAssignFn Fn) const { 3680 FunctionType *FType = MF.getFunction().getFunctionType(); 3681 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 3682 MVT ArgVT = Ins[i].VT; 3683 Type *ArgTy = nullptr; 3684 if (IsRet) 3685 ArgTy = FType->getReturnType(); 3686 else if (Ins[i].isOrigArg()) 3687 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 3688 LoongArchABI::ABI ABI = 3689 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 3690 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags, 3691 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { 3692 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT 3693 << '\n'); 3694 llvm_unreachable(""); 3695 } 3696 } 3697 } 3698 3699 void LoongArchTargetLowering::analyzeOutputArgs( 3700 MachineFunction &MF, CCState &CCInfo, 3701 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 3702 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const { 3703 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 3704 MVT ArgVT = Outs[i].VT; 3705 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 3706 LoongArchABI::ABI ABI = 3707 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 3708 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags, 3709 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 3710 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT 3711 << "\n"); 3712 llvm_unreachable(""); 3713 } 3714 } 3715 } 3716 3717 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 3718 // values. 3719 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 3720 const CCValAssign &VA, const SDLoc &DL) { 3721 switch (VA.getLocInfo()) { 3722 default: 3723 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3724 case CCValAssign::Full: 3725 case CCValAssign::Indirect: 3726 break; 3727 case CCValAssign::BCvt: 3728 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3729 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val); 3730 else 3731 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 3732 break; 3733 } 3734 return Val; 3735 } 3736 3737 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 3738 const CCValAssign &VA, const SDLoc &DL, 3739 const LoongArchTargetLowering &TLI) { 3740 MachineFunction &MF = DAG.getMachineFunction(); 3741 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3742 EVT LocVT = VA.getLocVT(); 3743 SDValue Val; 3744 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 3745 Register VReg = RegInfo.createVirtualRegister(RC); 3746 RegInfo.addLiveIn(VA.getLocReg(), VReg); 3747 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 3748 3749 return convertLocVTToValVT(DAG, Val, VA, DL); 3750 } 3751 3752 // The caller is responsible for loading the full value if the argument is 3753 // passed with CCValAssign::Indirect. 3754 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 3755 const CCValAssign &VA, const SDLoc &DL) { 3756 MachineFunction &MF = DAG.getMachineFunction(); 3757 MachineFrameInfo &MFI = MF.getFrameInfo(); 3758 EVT ValVT = VA.getValVT(); 3759 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), 3760 /*IsImmutable=*/true); 3761 SDValue FIN = DAG.getFrameIndex( 3762 FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0))); 3763 3764 ISD::LoadExtType ExtType; 3765 switch (VA.getLocInfo()) { 3766 default: 3767 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3768 case CCValAssign::Full: 3769 case CCValAssign::Indirect: 3770 case CCValAssign::BCvt: 3771 ExtType = ISD::NON_EXTLOAD; 3772 break; 3773 } 3774 return DAG.getExtLoad( 3775 ExtType, DL, VA.getLocVT(), Chain, FIN, 3776 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 3777 } 3778 3779 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 3780 const CCValAssign &VA, const SDLoc &DL) { 3781 EVT LocVT = VA.getLocVT(); 3782 3783 switch (VA.getLocInfo()) { 3784 default: 3785 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3786 case CCValAssign::Full: 3787 break; 3788 case CCValAssign::BCvt: 3789 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3790 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val); 3791 else 3792 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 3793 break; 3794 } 3795 return Val; 3796 } 3797 3798 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 3799 CCValAssign::LocInfo LocInfo, 3800 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3801 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3802 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim 3803 // s0 s1 s2 s3 s4 s5 s6 s7 s8 3804 static const MCPhysReg GPRList[] = { 3805 LoongArch::R23, LoongArch::R24, LoongArch::R25, 3806 LoongArch::R26, LoongArch::R27, LoongArch::R28, 3807 LoongArch::R29, LoongArch::R30, LoongArch::R31}; 3808 if (unsigned Reg = State.AllocateReg(GPRList)) { 3809 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3810 return false; 3811 } 3812 } 3813 3814 if (LocVT == MVT::f32) { 3815 // Pass in STG registers: F1, F2, F3, F4 3816 // fs0,fs1,fs2,fs3 3817 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, 3818 LoongArch::F26, LoongArch::F27}; 3819 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3820 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3821 return false; 3822 } 3823 } 3824 3825 if (LocVT == MVT::f64) { 3826 // Pass in STG registers: D1, D2, D3, D4 3827 // fs4,fs5,fs6,fs7 3828 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, 3829 LoongArch::F30_64, LoongArch::F31_64}; 3830 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3831 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3832 return false; 3833 } 3834 } 3835 3836 report_fatal_error("No registers left in GHC calling convention"); 3837 return true; 3838 } 3839 3840 // Transform physical registers into virtual registers. 3841 SDValue LoongArchTargetLowering::LowerFormalArguments( 3842 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 3843 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 3844 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3845 3846 MachineFunction &MF = DAG.getMachineFunction(); 3847 3848 switch (CallConv) { 3849 default: 3850 llvm_unreachable("Unsupported calling convention"); 3851 case CallingConv::C: 3852 case CallingConv::Fast: 3853 break; 3854 case CallingConv::GHC: 3855 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) || 3856 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD)) 3857 report_fatal_error( 3858 "GHC calling convention requires the F and D extensions"); 3859 } 3860 3861 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3862 MVT GRLenVT = Subtarget.getGRLenVT(); 3863 unsigned GRLenInBytes = Subtarget.getGRLen() / 8; 3864 // Used with varargs to acumulate store chains. 3865 std::vector<SDValue> OutChains; 3866 3867 // Assign locations to all of the incoming arguments. 3868 SmallVector<CCValAssign> ArgLocs; 3869 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 3870 3871 if (CallConv == CallingConv::GHC) 3872 CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC); 3873 else 3874 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch); 3875 3876 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3877 CCValAssign &VA = ArgLocs[i]; 3878 SDValue ArgValue; 3879 if (VA.isRegLoc()) 3880 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 3881 else 3882 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 3883 if (VA.getLocInfo() == CCValAssign::Indirect) { 3884 // If the original argument was split and passed by reference, we need to 3885 // load all parts of it here (using the same address). 3886 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 3887 MachinePointerInfo())); 3888 unsigned ArgIndex = Ins[i].OrigArgIndex; 3889 unsigned ArgPartOffset = Ins[i].PartOffset; 3890 assert(ArgPartOffset == 0); 3891 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 3892 CCValAssign &PartVA = ArgLocs[i + 1]; 3893 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; 3894 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 3895 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); 3896 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 3897 MachinePointerInfo())); 3898 ++i; 3899 } 3900 continue; 3901 } 3902 InVals.push_back(ArgValue); 3903 } 3904 3905 if (IsVarArg) { 3906 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs); 3907 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 3908 const TargetRegisterClass *RC = &LoongArch::GPRRegClass; 3909 MachineFrameInfo &MFI = MF.getFrameInfo(); 3910 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3911 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>(); 3912 3913 // Offset of the first variable argument from stack pointer, and size of 3914 // the vararg save area. For now, the varargs save area is either zero or 3915 // large enough to hold a0-a7. 3916 int VaArgOffset, VarArgsSaveSize; 3917 3918 // If all registers are allocated, then all varargs must be passed on the 3919 // stack and we don't need to save any argregs. 3920 if (ArgRegs.size() == Idx) { 3921 VaArgOffset = CCInfo.getStackSize(); 3922 VarArgsSaveSize = 0; 3923 } else { 3924 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx); 3925 VaArgOffset = -VarArgsSaveSize; 3926 } 3927 3928 // Record the frame index of the first variable argument 3929 // which is a value necessary to VASTART. 3930 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); 3931 LoongArchFI->setVarArgsFrameIndex(FI); 3932 3933 // If saving an odd number of registers then create an extra stack slot to 3934 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures 3935 // offsets to even-numbered registered remain 2*GRLen-aligned. 3936 if (Idx % 2) { 3937 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes, 3938 true); 3939 VarArgsSaveSize += GRLenInBytes; 3940 } 3941 3942 // Copy the integer registers that may have been used for passing varargs 3943 // to the vararg save area. 3944 for (unsigned I = Idx; I < ArgRegs.size(); 3945 ++I, VaArgOffset += GRLenInBytes) { 3946 const Register Reg = RegInfo.createVirtualRegister(RC); 3947 RegInfo.addLiveIn(ArgRegs[I], Reg); 3948 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT); 3949 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); 3950 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 3951 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 3952 MachinePointerInfo::getFixedStack(MF, FI)); 3953 cast<StoreSDNode>(Store.getNode()) 3954 ->getMemOperand() 3955 ->setValue((Value *)nullptr); 3956 OutChains.push_back(Store); 3957 } 3958 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); 3959 } 3960 3961 // All stores are grouped in one node to allow the matching between 3962 // the size of Ins and InVals. This only happens for vararg functions. 3963 if (!OutChains.empty()) { 3964 OutChains.push_back(Chain); 3965 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 3966 } 3967 3968 return Chain; 3969 } 3970 3971 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 3972 return CI->isTailCall(); 3973 } 3974 3975 // Check if the return value is used as only a return value, as otherwise 3976 // we can't perform a tail-call. 3977 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N, 3978 SDValue &Chain) const { 3979 if (N->getNumValues() != 1) 3980 return false; 3981 if (!N->hasNUsesOfValue(1, 0)) 3982 return false; 3983 3984 SDNode *Copy = *N->use_begin(); 3985 if (Copy->getOpcode() != ISD::CopyToReg) 3986 return false; 3987 3988 // If the ISD::CopyToReg has a glue operand, we conservatively assume it 3989 // isn't safe to perform a tail call. 3990 if (Copy->getGluedNode()) 3991 return false; 3992 3993 // The copy must be used by a LoongArchISD::RET, and nothing else. 3994 bool HasRet = false; 3995 for (SDNode *Node : Copy->uses()) { 3996 if (Node->getOpcode() != LoongArchISD::RET) 3997 return false; 3998 HasRet = true; 3999 } 4000 4001 if (!HasRet) 4002 return false; 4003 4004 Chain = Copy->getOperand(0); 4005 return true; 4006 } 4007 4008 // Check whether the call is eligible for tail call optimization. 4009 bool LoongArchTargetLowering::isEligibleForTailCallOptimization( 4010 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 4011 const SmallVectorImpl<CCValAssign> &ArgLocs) const { 4012 4013 auto CalleeCC = CLI.CallConv; 4014 auto &Outs = CLI.Outs; 4015 auto &Caller = MF.getFunction(); 4016 auto CallerCC = Caller.getCallingConv(); 4017 4018 // Do not tail call opt if the stack is used to pass parameters. 4019 if (CCInfo.getStackSize() != 0) 4020 return false; 4021 4022 // Do not tail call opt if any parameters need to be passed indirectly. 4023 for (auto &VA : ArgLocs) 4024 if (VA.getLocInfo() == CCValAssign::Indirect) 4025 return false; 4026 4027 // Do not tail call opt if either caller or callee uses struct return 4028 // semantics. 4029 auto IsCallerStructRet = Caller.hasStructRetAttr(); 4030 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 4031 if (IsCallerStructRet || IsCalleeStructRet) 4032 return false; 4033 4034 // Do not tail call opt if either the callee or caller has a byval argument. 4035 for (auto &Arg : Outs) 4036 if (Arg.Flags.isByVal()) 4037 return false; 4038 4039 // The callee has to preserve all registers the caller needs to preserve. 4040 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4041 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 4042 if (CalleeCC != CallerCC) { 4043 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 4044 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 4045 return false; 4046 } 4047 return true; 4048 } 4049 4050 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { 4051 return DAG.getDataLayout().getPrefTypeAlign( 4052 VT.getTypeForEVT(*DAG.getContext())); 4053 } 4054 4055 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 4056 // and output parameter nodes. 4057 SDValue 4058 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, 4059 SmallVectorImpl<SDValue> &InVals) const { 4060 SelectionDAG &DAG = CLI.DAG; 4061 SDLoc &DL = CLI.DL; 4062 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 4063 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 4064 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 4065 SDValue Chain = CLI.Chain; 4066 SDValue Callee = CLI.Callee; 4067 CallingConv::ID CallConv = CLI.CallConv; 4068 bool IsVarArg = CLI.IsVarArg; 4069 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4070 MVT GRLenVT = Subtarget.getGRLenVT(); 4071 bool &IsTailCall = CLI.IsTailCall; 4072 4073 MachineFunction &MF = DAG.getMachineFunction(); 4074 4075 // Analyze the operands of the call, assigning locations to each operand. 4076 SmallVector<CCValAssign> ArgLocs; 4077 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 4078 4079 if (CallConv == CallingConv::GHC) 4080 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC); 4081 else 4082 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch); 4083 4084 // Check if it's really possible to do a tail call. 4085 if (IsTailCall) 4086 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 4087 4088 if (IsTailCall) 4089 ++NumTailCalls; 4090 else if (CLI.CB && CLI.CB->isMustTailCall()) 4091 report_fatal_error("failed to perform tail call elimination on a call " 4092 "site marked musttail"); 4093 4094 // Get a count of how many bytes are to be pushed on the stack. 4095 unsigned NumBytes = ArgCCInfo.getStackSize(); 4096 4097 // Create local copies for byval args. 4098 SmallVector<SDValue> ByValArgs; 4099 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 4100 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4101 if (!Flags.isByVal()) 4102 continue; 4103 4104 SDValue Arg = OutVals[i]; 4105 unsigned Size = Flags.getByValSize(); 4106 Align Alignment = Flags.getNonZeroByValAlign(); 4107 4108 int FI = 4109 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 4110 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 4111 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT); 4112 4113 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 4114 /*IsVolatile=*/false, 4115 /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall, 4116 MachinePointerInfo(), MachinePointerInfo()); 4117 ByValArgs.push_back(FIPtr); 4118 } 4119 4120 if (!IsTailCall) 4121 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 4122 4123 // Copy argument values to their designated locations. 4124 SmallVector<std::pair<Register, SDValue>> RegsToPass; 4125 SmallVector<SDValue> MemOpChains; 4126 SDValue StackPtr; 4127 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 4128 CCValAssign &VA = ArgLocs[i]; 4129 SDValue ArgValue = OutVals[i]; 4130 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4131 4132 // Promote the value if needed. 4133 // For now, only handle fully promoted and indirect arguments. 4134 if (VA.getLocInfo() == CCValAssign::Indirect) { 4135 // Store the argument in a stack slot and pass its address. 4136 Align StackAlign = 4137 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), 4138 getPrefTypeAlign(ArgValue.getValueType(), DAG)); 4139 TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); 4140 // If the original argument was split and passed by reference, we need to 4141 // store the required parts of it here (and pass just one address). 4142 unsigned ArgIndex = Outs[i].OrigArgIndex; 4143 unsigned ArgPartOffset = Outs[i].PartOffset; 4144 assert(ArgPartOffset == 0); 4145 // Calculate the total size to store. We don't have access to what we're 4146 // actually storing other than performing the loop and collecting the 4147 // info. 4148 SmallVector<std::pair<SDValue, SDValue>> Parts; 4149 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 4150 SDValue PartValue = OutVals[i + 1]; 4151 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; 4152 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 4153 EVT PartVT = PartValue.getValueType(); 4154 4155 StoredSize += PartVT.getStoreSize(); 4156 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); 4157 Parts.push_back(std::make_pair(PartValue, Offset)); 4158 ++i; 4159 } 4160 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); 4161 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 4162 MemOpChains.push_back( 4163 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 4164 MachinePointerInfo::getFixedStack(MF, FI))); 4165 for (const auto &Part : Parts) { 4166 SDValue PartValue = Part.first; 4167 SDValue PartOffset = Part.second; 4168 SDValue Address = 4169 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); 4170 MemOpChains.push_back( 4171 DAG.getStore(Chain, DL, PartValue, Address, 4172 MachinePointerInfo::getFixedStack(MF, FI))); 4173 } 4174 ArgValue = SpillSlot; 4175 } else { 4176 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 4177 } 4178 4179 // Use local copy if it is a byval arg. 4180 if (Flags.isByVal()) 4181 ArgValue = ByValArgs[j++]; 4182 4183 if (VA.isRegLoc()) { 4184 // Queue up the argument copies and emit them at the end. 4185 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 4186 } else { 4187 assert(VA.isMemLoc() && "Argument not register or memory"); 4188 assert(!IsTailCall && "Tail call not allowed if stack is used " 4189 "for passing parameters"); 4190 4191 // Work out the address of the stack slot. 4192 if (!StackPtr.getNode()) 4193 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT); 4194 SDValue Address = 4195 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 4196 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 4197 4198 // Emit the store. 4199 MemOpChains.push_back( 4200 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 4201 } 4202 } 4203 4204 // Join the stores, which are independent of one another. 4205 if (!MemOpChains.empty()) 4206 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 4207 4208 SDValue Glue; 4209 4210 // Build a sequence of copy-to-reg nodes, chained and glued together. 4211 for (auto &Reg : RegsToPass) { 4212 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 4213 Glue = Chain.getValue(1); 4214 } 4215 4216 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 4217 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 4218 // split it and then direct call can be matched by PseudoCALL. 4219 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 4220 const GlobalValue *GV = S->getGlobal(); 4221 unsigned OpFlags = 4222 getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV) 4223 ? LoongArchII::MO_CALL 4224 : LoongArchII::MO_CALL_PLT; 4225 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags); 4226 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 4227 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal( 4228 *MF.getFunction().getParent(), nullptr) 4229 ? LoongArchII::MO_CALL 4230 : LoongArchII::MO_CALL_PLT; 4231 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 4232 } 4233 4234 // The first call operand is the chain and the second is the target address. 4235 SmallVector<SDValue> Ops; 4236 Ops.push_back(Chain); 4237 Ops.push_back(Callee); 4238 4239 // Add argument registers to the end of the list so that they are 4240 // known live into the call. 4241 for (auto &Reg : RegsToPass) 4242 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 4243 4244 if (!IsTailCall) { 4245 // Add a register mask operand representing the call-preserved registers. 4246 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4247 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 4248 assert(Mask && "Missing call preserved mask for calling convention"); 4249 Ops.push_back(DAG.getRegisterMask(Mask)); 4250 } 4251 4252 // Glue the call to the argument copies, if any. 4253 if (Glue.getNode()) 4254 Ops.push_back(Glue); 4255 4256 // Emit the call. 4257 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 4258 unsigned Op; 4259 switch (DAG.getTarget().getCodeModel()) { 4260 default: 4261 report_fatal_error("Unsupported code model"); 4262 case CodeModel::Small: 4263 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL; 4264 break; 4265 case CodeModel::Medium: 4266 assert(Subtarget.is64Bit() && "Medium code model requires LA64"); 4267 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM; 4268 break; 4269 case CodeModel::Large: 4270 assert(Subtarget.is64Bit() && "Large code model requires LA64"); 4271 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE; 4272 break; 4273 } 4274 4275 if (IsTailCall) { 4276 MF.getFrameInfo().setHasTailCall(); 4277 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops); 4278 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 4279 return Ret; 4280 } 4281 4282 Chain = DAG.getNode(Op, DL, NodeTys, Ops); 4283 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 4284 Glue = Chain.getValue(1); 4285 4286 // Mark the end of the call, which is glued to the call itself. 4287 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 4288 Glue = Chain.getValue(1); 4289 4290 // Assign locations to each value returned by this call. 4291 SmallVector<CCValAssign> RVLocs; 4292 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 4293 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch); 4294 4295 // Copy all of the result registers out of their specified physreg. 4296 for (auto &VA : RVLocs) { 4297 // Copy the value out. 4298 SDValue RetValue = 4299 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 4300 // Glue the RetValue to the end of the call sequence. 4301 Chain = RetValue.getValue(1); 4302 Glue = RetValue.getValue(2); 4303 4304 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 4305 4306 InVals.push_back(RetValue); 4307 } 4308 4309 return Chain; 4310 } 4311 4312 bool LoongArchTargetLowering::CanLowerReturn( 4313 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 4314 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 4315 SmallVector<CCValAssign> RVLocs; 4316 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 4317 4318 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 4319 LoongArchABI::ABI ABI = 4320 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 4321 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full, 4322 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, 4323 nullptr)) 4324 return false; 4325 } 4326 return true; 4327 } 4328 4329 SDValue LoongArchTargetLowering::LowerReturn( 4330 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 4331 const SmallVectorImpl<ISD::OutputArg> &Outs, 4332 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 4333 SelectionDAG &DAG) const { 4334 // Stores the assignment of the return value to a location. 4335 SmallVector<CCValAssign> RVLocs; 4336 4337 // Info about the registers and stack slot. 4338 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 4339 *DAG.getContext()); 4340 4341 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 4342 nullptr, CC_LoongArch); 4343 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 4344 report_fatal_error("GHC functions return void only"); 4345 SDValue Glue; 4346 SmallVector<SDValue, 4> RetOps(1, Chain); 4347 4348 // Copy the result values into the output registers. 4349 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 4350 CCValAssign &VA = RVLocs[i]; 4351 assert(VA.isRegLoc() && "Can only return in registers!"); 4352 4353 // Handle a 'normal' return. 4354 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL); 4355 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 4356 4357 // Guarantee that all emitted copies are stuck together. 4358 Glue = Chain.getValue(1); 4359 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 4360 } 4361 4362 RetOps[0] = Chain; // Update chain. 4363 4364 // Add the glue node if we have it. 4365 if (Glue.getNode()) 4366 RetOps.push_back(Glue); 4367 4368 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps); 4369 } 4370 4371 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 4372 bool ForCodeSize) const { 4373 // TODO: Maybe need more checks here after vector extension is supported. 4374 if (VT == MVT::f32 && !Subtarget.hasBasicF()) 4375 return false; 4376 if (VT == MVT::f64 && !Subtarget.hasBasicD()) 4377 return false; 4378 return (Imm.isZero() || Imm.isExactlyValue(+1.0)); 4379 } 4380 4381 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const { 4382 return true; 4383 } 4384 4385 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const { 4386 return true; 4387 } 4388 4389 bool LoongArchTargetLowering::shouldInsertFencesForAtomic( 4390 const Instruction *I) const { 4391 if (!Subtarget.is64Bit()) 4392 return isa<LoadInst>(I) || isa<StoreInst>(I); 4393 4394 if (isa<LoadInst>(I)) 4395 return true; 4396 4397 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not 4398 // require fences beacuse we can use amswap_db.[w/d]. 4399 if (isa<StoreInst>(I)) { 4400 unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth(); 4401 return (Size == 8 || Size == 16); 4402 } 4403 4404 return false; 4405 } 4406 4407 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL, 4408 LLVMContext &Context, 4409 EVT VT) const { 4410 if (!VT.isVector()) 4411 return getPointerTy(DL); 4412 return VT.changeVectorElementTypeToInteger(); 4413 } 4414 4415 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { 4416 // TODO: Support vectors. 4417 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y); 4418 } 4419 4420 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 4421 const CallInst &I, 4422 MachineFunction &MF, 4423 unsigned Intrinsic) const { 4424 switch (Intrinsic) { 4425 default: 4426 return false; 4427 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32: 4428 case Intrinsic::loongarch_masked_atomicrmw_add_i32: 4429 case Intrinsic::loongarch_masked_atomicrmw_sub_i32: 4430 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: 4431 Info.opc = ISD::INTRINSIC_W_CHAIN; 4432 Info.memVT = MVT::i32; 4433 Info.ptrVal = I.getArgOperand(0); 4434 Info.offset = 0; 4435 Info.align = Align(4); 4436 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 4437 MachineMemOperand::MOVolatile; 4438 return true; 4439 // TODO: Add more Intrinsics later. 4440 } 4441 } 4442 4443 TargetLowering::AtomicExpansionKind 4444 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 4445 // TODO: Add more AtomicRMWInst that needs to be extended. 4446 4447 // Since floating-point operation requires a non-trivial set of data 4448 // operations, use CmpXChg to expand. 4449 if (AI->isFloatingPointOperation() || 4450 AI->getOperation() == AtomicRMWInst::UIncWrap || 4451 AI->getOperation() == AtomicRMWInst::UDecWrap) 4452 return AtomicExpansionKind::CmpXChg; 4453 4454 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 4455 if (Size == 8 || Size == 16) 4456 return AtomicExpansionKind::MaskedIntrinsic; 4457 return AtomicExpansionKind::None; 4458 } 4459 4460 static Intrinsic::ID 4461 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, 4462 AtomicRMWInst::BinOp BinOp) { 4463 if (GRLen == 64) { 4464 switch (BinOp) { 4465 default: 4466 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4467 case AtomicRMWInst::Xchg: 4468 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64; 4469 case AtomicRMWInst::Add: 4470 return Intrinsic::loongarch_masked_atomicrmw_add_i64; 4471 case AtomicRMWInst::Sub: 4472 return Intrinsic::loongarch_masked_atomicrmw_sub_i64; 4473 case AtomicRMWInst::Nand: 4474 return Intrinsic::loongarch_masked_atomicrmw_nand_i64; 4475 case AtomicRMWInst::UMax: 4476 return Intrinsic::loongarch_masked_atomicrmw_umax_i64; 4477 case AtomicRMWInst::UMin: 4478 return Intrinsic::loongarch_masked_atomicrmw_umin_i64; 4479 case AtomicRMWInst::Max: 4480 return Intrinsic::loongarch_masked_atomicrmw_max_i64; 4481 case AtomicRMWInst::Min: 4482 return Intrinsic::loongarch_masked_atomicrmw_min_i64; 4483 // TODO: support other AtomicRMWInst. 4484 } 4485 } 4486 4487 if (GRLen == 32) { 4488 switch (BinOp) { 4489 default: 4490 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4491 case AtomicRMWInst::Xchg: 4492 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32; 4493 case AtomicRMWInst::Add: 4494 return Intrinsic::loongarch_masked_atomicrmw_add_i32; 4495 case AtomicRMWInst::Sub: 4496 return Intrinsic::loongarch_masked_atomicrmw_sub_i32; 4497 case AtomicRMWInst::Nand: 4498 return Intrinsic::loongarch_masked_atomicrmw_nand_i32; 4499 // TODO: support other AtomicRMWInst. 4500 } 4501 } 4502 4503 llvm_unreachable("Unexpected GRLen\n"); 4504 } 4505 4506 TargetLowering::AtomicExpansionKind 4507 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( 4508 AtomicCmpXchgInst *CI) const { 4509 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 4510 if (Size == 8 || Size == 16) 4511 return AtomicExpansionKind::MaskedIntrinsic; 4512 return AtomicExpansionKind::None; 4513 } 4514 4515 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 4516 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 4517 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 4518 AtomicOrdering FailOrd = CI->getFailureOrdering(); 4519 Value *FailureOrdering = 4520 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd)); 4521 4522 // TODO: Support cmpxchg on LA32. 4523 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; 4524 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 4525 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 4526 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 4527 Type *Tys[] = {AlignedAddr->getType()}; 4528 Function *MaskedCmpXchg = 4529 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 4530 Value *Result = Builder.CreateCall( 4531 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); 4532 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 4533 return Result; 4534 } 4535 4536 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( 4537 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 4538 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 4539 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace 4540 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate 4541 // mask, as this produces better code than the LL/SC loop emitted by 4542 // int_loongarch_masked_atomicrmw_xchg. 4543 if (AI->getOperation() == AtomicRMWInst::Xchg && 4544 isa<ConstantInt>(AI->getValOperand())) { 4545 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand()); 4546 if (CVal->isZero()) 4547 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr, 4548 Builder.CreateNot(Mask, "Inv_Mask"), 4549 AI->getAlign(), Ord); 4550 if (CVal->isMinusOne()) 4551 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask, 4552 AI->getAlign(), Ord); 4553 } 4554 4555 unsigned GRLen = Subtarget.getGRLen(); 4556 Value *Ordering = 4557 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering())); 4558 Type *Tys[] = {AlignedAddr->getType()}; 4559 Function *LlwOpScwLoop = Intrinsic::getDeclaration( 4560 AI->getModule(), 4561 getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys); 4562 4563 if (GRLen == 64) { 4564 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 4565 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 4566 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 4567 } 4568 4569 Value *Result; 4570 4571 // Must pass the shift amount needed to sign extend the loaded value prior 4572 // to performing a signed comparison for min/max. ShiftAmt is the number of 4573 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which 4574 // is the number of bits to left+right shift the value in order to 4575 // sign-extend. 4576 if (AI->getOperation() == AtomicRMWInst::Min || 4577 AI->getOperation() == AtomicRMWInst::Max) { 4578 const DataLayout &DL = AI->getModule()->getDataLayout(); 4579 unsigned ValWidth = 4580 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 4581 Value *SextShamt = 4582 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt); 4583 Result = Builder.CreateCall(LlwOpScwLoop, 4584 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 4585 } else { 4586 Result = 4587 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 4588 } 4589 4590 if (GRLen == 64) 4591 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 4592 return Result; 4593 } 4594 4595 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( 4596 const MachineFunction &MF, EVT VT) const { 4597 VT = VT.getScalarType(); 4598 4599 if (!VT.isSimple()) 4600 return false; 4601 4602 switch (VT.getSimpleVT().SimpleTy) { 4603 case MVT::f32: 4604 case MVT::f64: 4605 return true; 4606 default: 4607 break; 4608 } 4609 4610 return false; 4611 } 4612 4613 Register LoongArchTargetLowering::getExceptionPointerRegister( 4614 const Constant *PersonalityFn) const { 4615 return LoongArch::R4; 4616 } 4617 4618 Register LoongArchTargetLowering::getExceptionSelectorRegister( 4619 const Constant *PersonalityFn) const { 4620 return LoongArch::R5; 4621 } 4622 4623 //===----------------------------------------------------------------------===// 4624 // LoongArch Inline Assembly Support 4625 //===----------------------------------------------------------------------===// 4626 4627 LoongArchTargetLowering::ConstraintType 4628 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { 4629 // LoongArch specific constraints in GCC: config/loongarch/constraints.md 4630 // 4631 // 'f': A floating-point register (if available). 4632 // 'k': A memory operand whose address is formed by a base register and 4633 // (optionally scaled) index register. 4634 // 'l': A signed 16-bit constant. 4635 // 'm': A memory operand whose address is formed by a base register and 4636 // offset that is suitable for use in instructions with the same 4637 // addressing mode as st.w and ld.w. 4638 // 'I': A signed 12-bit constant (for arithmetic instructions). 4639 // 'J': Integer zero. 4640 // 'K': An unsigned 12-bit constant (for logic instructions). 4641 // "ZB": An address that is held in a general-purpose register. The offset is 4642 // zero. 4643 // "ZC": A memory operand whose address is formed by a base register and 4644 // offset that is suitable for use in instructions with the same 4645 // addressing mode as ll.w and sc.w. 4646 if (Constraint.size() == 1) { 4647 switch (Constraint[0]) { 4648 default: 4649 break; 4650 case 'f': 4651 return C_RegisterClass; 4652 case 'l': 4653 case 'I': 4654 case 'J': 4655 case 'K': 4656 return C_Immediate; 4657 case 'k': 4658 return C_Memory; 4659 } 4660 } 4661 4662 if (Constraint == "ZC" || Constraint == "ZB") 4663 return C_Memory; 4664 4665 // 'm' is handled here. 4666 return TargetLowering::getConstraintType(Constraint); 4667 } 4668 4669 InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint( 4670 StringRef ConstraintCode) const { 4671 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode) 4672 .Case("k", InlineAsm::ConstraintCode::k) 4673 .Case("ZB", InlineAsm::ConstraintCode::ZB) 4674 .Case("ZC", InlineAsm::ConstraintCode::ZC) 4675 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode)); 4676 } 4677 4678 std::pair<unsigned, const TargetRegisterClass *> 4679 LoongArchTargetLowering::getRegForInlineAsmConstraint( 4680 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 4681 // First, see if this is a constraint that directly corresponds to a LoongArch 4682 // register class. 4683 if (Constraint.size() == 1) { 4684 switch (Constraint[0]) { 4685 case 'r': 4686 // TODO: Support fixed vectors up to GRLen? 4687 if (VT.isVector()) 4688 break; 4689 return std::make_pair(0U, &LoongArch::GPRRegClass); 4690 case 'f': 4691 if (Subtarget.hasBasicF() && VT == MVT::f32) 4692 return std::make_pair(0U, &LoongArch::FPR32RegClass); 4693 if (Subtarget.hasBasicD() && VT == MVT::f64) 4694 return std::make_pair(0U, &LoongArch::FPR64RegClass); 4695 if (Subtarget.hasExtLSX() && 4696 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT)) 4697 return std::make_pair(0U, &LoongArch::LSX128RegClass); 4698 if (Subtarget.hasExtLASX() && 4699 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT)) 4700 return std::make_pair(0U, &LoongArch::LASX256RegClass); 4701 break; 4702 default: 4703 break; 4704 } 4705 } 4706 4707 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen 4708 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm 4709 // constraints while the official register name is prefixed with a '$'. So we 4710 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.) 4711 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is 4712 // case insensitive, so no need to convert the constraint to upper case here. 4713 // 4714 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly 4715 // decode the usage of register name aliases into their official names. And 4716 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use 4717 // official register names. 4718 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") || 4719 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) { 4720 bool IsFP = Constraint[2] == 'f'; 4721 std::pair<StringRef, StringRef> Temp = Constraint.split('$'); 4722 std::pair<unsigned, const TargetRegisterClass *> R; 4723 R = TargetLowering::getRegForInlineAsmConstraint( 4724 TRI, join_items("", Temp.first, Temp.second), VT); 4725 // Match those names to the widest floating point register type available. 4726 if (IsFP) { 4727 unsigned RegNo = R.first; 4728 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) { 4729 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) { 4730 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64; 4731 return std::make_pair(DReg, &LoongArch::FPR64RegClass); 4732 } 4733 } 4734 } 4735 return R; 4736 } 4737 4738 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 4739 } 4740 4741 void LoongArchTargetLowering::LowerAsmOperandForConstraint( 4742 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, 4743 SelectionDAG &DAG) const { 4744 // Currently only support length 1 constraints. 4745 if (Constraint.size() == 1) { 4746 switch (Constraint[0]) { 4747 case 'l': 4748 // Validate & create a 16-bit signed immediate operand. 4749 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4750 uint64_t CVal = C->getSExtValue(); 4751 if (isInt<16>(CVal)) 4752 Ops.push_back( 4753 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 4754 } 4755 return; 4756 case 'I': 4757 // Validate & create a 12-bit signed immediate operand. 4758 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4759 uint64_t CVal = C->getSExtValue(); 4760 if (isInt<12>(CVal)) 4761 Ops.push_back( 4762 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 4763 } 4764 return; 4765 case 'J': 4766 // Validate & create an integer zero operand. 4767 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 4768 if (C->getZExtValue() == 0) 4769 Ops.push_back( 4770 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT())); 4771 return; 4772 case 'K': 4773 // Validate & create a 12-bit unsigned immediate operand. 4774 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4775 uint64_t CVal = C->getZExtValue(); 4776 if (isUInt<12>(CVal)) 4777 Ops.push_back( 4778 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 4779 } 4780 return; 4781 default: 4782 break; 4783 } 4784 } 4785 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 4786 } 4787 4788 #define GET_REGISTER_MATCHER 4789 #include "LoongArchGenAsmMatcher.inc" 4790 4791 Register 4792 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, 4793 const MachineFunction &MF) const { 4794 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$'); 4795 std::string NewRegName = Name.second.str(); 4796 Register Reg = MatchRegisterAltName(NewRegName); 4797 if (Reg == LoongArch::NoRegister) 4798 Reg = MatchRegisterName(NewRegName); 4799 if (Reg == LoongArch::NoRegister) 4800 report_fatal_error( 4801 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 4802 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 4803 if (!ReservedRegs.test(Reg)) 4804 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 4805 StringRef(RegName) + "\".")); 4806 return Reg; 4807 } 4808 4809 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context, 4810 EVT VT, SDValue C) const { 4811 // TODO: Support vectors. 4812 if (!VT.isScalarInteger()) 4813 return false; 4814 4815 // Omit the optimization if the data size exceeds GRLen. 4816 if (VT.getSizeInBits() > Subtarget.getGRLen()) 4817 return false; 4818 4819 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 4820 const APInt &Imm = ConstNode->getAPIntValue(); 4821 // Break MUL into (SLLI + ADD/SUB) or ALSL. 4822 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 4823 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 4824 return true; 4825 // Break MUL into (ALSL x, (SLLI x, imm0), imm1). 4826 if (ConstNode->hasOneUse() && 4827 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || 4828 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2())) 4829 return true; 4830 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)), 4831 // in which the immediate has two set bits. Or Break (MUL x, imm) 4832 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate 4833 // equals to (1 << s0) - (1 << s1). 4834 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) { 4835 unsigned Shifts = Imm.countr_zero(); 4836 // Reject immediates which can be composed via a single LUI. 4837 if (Shifts >= 12) 4838 return false; 4839 // Reject multiplications can be optimized to 4840 // (SLLI (ALSL x, x, 1/2/3/4), s). 4841 APInt ImmPop = Imm.ashr(Shifts); 4842 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17) 4843 return false; 4844 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`, 4845 // since it needs one more instruction than other 3 cases. 4846 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true); 4847 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() || 4848 (ImmSmall - Imm).isPowerOf2()) 4849 return true; 4850 } 4851 } 4852 4853 return false; 4854 } 4855 4856 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL, 4857 const AddrMode &AM, 4858 Type *Ty, unsigned AS, 4859 Instruction *I) const { 4860 // LoongArch has four basic addressing modes: 4861 // 1. reg 4862 // 2. reg + 12-bit signed offset 4863 // 3. reg + 14-bit signed offset left-shifted by 2 4864 // 4. reg1 + reg2 4865 // TODO: Add more checks after support vector extension. 4866 4867 // No global is ever allowed as a base. 4868 if (AM.BaseGV) 4869 return false; 4870 4871 // Require a 12 or 14 bit signed offset. 4872 if (!isInt<12>(AM.BaseOffs) || !isShiftedInt<14, 2>(AM.BaseOffs)) 4873 return false; 4874 4875 switch (AM.Scale) { 4876 case 0: 4877 // "i" is not allowed. 4878 if (!AM.HasBaseReg) 4879 return false; 4880 // Otherwise we have "r+i". 4881 break; 4882 case 1: 4883 // "r+r+i" is not allowed. 4884 if (AM.HasBaseReg && AM.BaseOffs != 0) 4885 return false; 4886 // Otherwise we have "r+r" or "r+i". 4887 break; 4888 case 2: 4889 // "2*r+r" or "2*r+i" is not allowed. 4890 if (AM.HasBaseReg || AM.BaseOffs) 4891 return false; 4892 // Otherwise we have "r+r". 4893 break; 4894 default: 4895 return false; 4896 } 4897 4898 return true; 4899 } 4900 4901 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 4902 return isInt<12>(Imm); 4903 } 4904 4905 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const { 4906 return isInt<12>(Imm); 4907 } 4908 4909 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 4910 // Zexts are free if they can be combined with a load. 4911 // Don't advertise i32->i64 zextload as being free for LA64. It interacts 4912 // poorly with type legalization of compares preferring sext. 4913 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 4914 EVT MemVT = LD->getMemoryVT(); 4915 if ((MemVT == MVT::i8 || MemVT == MVT::i16) && 4916 (LD->getExtensionType() == ISD::NON_EXTLOAD || 4917 LD->getExtensionType() == ISD::ZEXTLOAD)) 4918 return true; 4919 } 4920 4921 return TargetLowering::isZExtFree(Val, VT2); 4922 } 4923 4924 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 4925 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 4926 } 4927 4928 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { 4929 // TODO: Support vectors. 4930 if (Y.getValueType().isVector()) 4931 return false; 4932 4933 return !isa<ConstantSDNode>(Y); 4934 } 4935