1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that LoongArch uses to lower LLVM code into 10 // a selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LoongArchISelLowering.h" 15 #include "LoongArch.h" 16 #include "LoongArchMachineFunctionInfo.h" 17 #include "LoongArchRegisterInfo.h" 18 #include "LoongArchSubtarget.h" 19 #include "LoongArchTargetMachine.h" 20 #include "MCTargetDesc/LoongArchBaseInfo.h" 21 #include "MCTargetDesc/LoongArchMCTargetDesc.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/ADT/StringExtras.h" 24 #include "llvm/CodeGen/ISDOpcodes.h" 25 #include "llvm/CodeGen/RuntimeLibcalls.h" 26 #include "llvm/CodeGen/SelectionDAGNodes.h" 27 #include "llvm/IR/IRBuilder.h" 28 #include "llvm/IR/IntrinsicsLoongArch.h" 29 #include "llvm/Support/CodeGen.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/KnownBits.h" 33 #include "llvm/Support/MathExtras.h" 34 35 using namespace llvm; 36 37 #define DEBUG_TYPE "loongarch-isel-lowering" 38 39 STATISTIC(NumTailCalls, "Number of tail calls"); 40 41 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, 42 cl::desc("Trap on integer division by zero."), 43 cl::init(false)); 44 45 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, 46 const LoongArchSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 MVT GRLenVT = Subtarget.getGRLenVT(); 50 51 // Set up the register classes. 52 53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); 54 if (Subtarget.hasBasicF()) 55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); 56 if (Subtarget.hasBasicD()) 57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); 58 59 static const MVT::SimpleValueType LSXVTs[] = { 60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; 61 static const MVT::SimpleValueType LASXVTs[] = { 62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; 63 64 if (Subtarget.hasExtLSX()) 65 for (MVT VT : LSXVTs) 66 addRegisterClass(VT, &LoongArch::LSX128RegClass); 67 68 if (Subtarget.hasExtLASX()) 69 for (MVT VT : LASXVTs) 70 addRegisterClass(VT, &LoongArch::LASX256RegClass); 71 72 // Set operations for LA32 and LA64. 73 74 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, 75 MVT::i1, Promote); 76 77 setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); 78 setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); 79 setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); 80 setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); 81 setOperationAction(ISD::ROTL, GRLenVT, Expand); 82 setOperationAction(ISD::CTPOP, GRLenVT, Expand); 83 84 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, 85 ISD::JumpTable, ISD::GlobalTLSAddress}, 86 GRLenVT, Custom); 87 88 setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom); 89 90 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); 91 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); 92 setOperationAction(ISD::VASTART, MVT::Other, Custom); 93 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); 94 95 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 96 setOperationAction(ISD::TRAP, MVT::Other, Legal); 97 98 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 99 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 100 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 101 102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before 103 // we get to know which of sll and revb.2h is faster. 104 setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); 105 setOperationAction(ISD::BITREVERSE, GRLenVT, Legal); 106 107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and 108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 109 // and i32 could still be byte-swapped relatively cheaply. 110 setOperationAction(ISD::BSWAP, MVT::i16, Custom); 111 112 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 113 setOperationAction(ISD::BR_CC, GRLenVT, Expand); 114 setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); 115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 116 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); 117 118 setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); 119 setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); 120 121 // Set operations for LA64 only. 122 123 if (Subtarget.is64Bit()) { 124 setOperationAction(ISD::SHL, MVT::i32, Custom); 125 setOperationAction(ISD::SRA, MVT::i32, Custom); 126 setOperationAction(ISD::SRL, MVT::i32, Custom); 127 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 128 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 129 setOperationAction(ISD::ROTR, MVT::i32, Custom); 130 setOperationAction(ISD::ROTL, MVT::i32, Custom); 131 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 132 setOperationAction(ISD::CTLZ, MVT::i32, Custom); 133 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 134 setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); 135 setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); 136 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); 137 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 138 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 139 140 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 141 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 142 } 143 144 // Set operations for LA32 only. 145 146 if (!Subtarget.is64Bit()) { 147 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); 148 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); 149 setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); 150 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 151 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 152 153 // Set libcalls. 154 setLibcallName(RTLIB::MUL_I128, nullptr); 155 // The MULO libcall is not part of libgcc, only compiler-rt. 156 setLibcallName(RTLIB::MULO_I64, nullptr); 157 } 158 159 // The MULO libcall is not part of libgcc, only compiler-rt. 160 setLibcallName(RTLIB::MULO_I128, nullptr); 161 162 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 163 164 static const ISD::CondCode FPCCToExpand[] = { 165 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, 166 ISD::SETGE, ISD::SETNE, ISD::SETGT}; 167 168 // Set operations for 'F' feature. 169 170 if (Subtarget.hasBasicF()) { 171 setCondCodeAction(FPCCToExpand, MVT::f32, Expand); 172 173 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 174 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 175 setOperationAction(ISD::FMA, MVT::f32, Legal); 176 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); 177 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); 178 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); 179 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); 180 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal); 181 setOperationAction(ISD::FSIN, MVT::f32, Expand); 182 setOperationAction(ISD::FCOS, MVT::f32, Expand); 183 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 184 setOperationAction(ISD::FPOW, MVT::f32, Expand); 185 setOperationAction(ISD::FREM, MVT::f32, Expand); 186 187 if (Subtarget.is64Bit()) 188 setOperationAction(ISD::FRINT, MVT::f32, Legal); 189 190 if (!Subtarget.hasBasicD()) { 191 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 192 if (Subtarget.is64Bit()) { 193 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 194 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 195 } 196 } 197 } 198 199 // Set operations for 'D' feature. 200 201 if (Subtarget.hasBasicD()) { 202 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 203 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 204 setCondCodeAction(FPCCToExpand, MVT::f64, Expand); 205 206 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 207 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 208 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); 209 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); 210 setOperationAction(ISD::FMA, MVT::f64, Legal); 211 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); 212 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); 213 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal); 214 setOperationAction(ISD::FSIN, MVT::f64, Expand); 215 setOperationAction(ISD::FCOS, MVT::f64, Expand); 216 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 217 setOperationAction(ISD::FPOW, MVT::f64, Expand); 218 setOperationAction(ISD::FREM, MVT::f64, Expand); 219 220 if (Subtarget.is64Bit()) 221 setOperationAction(ISD::FRINT, MVT::f64, Legal); 222 } 223 224 // Set operations for 'LSX' feature. 225 226 if (Subtarget.hasExtLSX()) { 227 for (MVT VT : MVT::fixedlen_vector_valuetypes()) { 228 // Expand all truncating stores and extending loads. 229 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { 230 setTruncStoreAction(VT, InnerVT, Expand); 231 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 232 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 233 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 234 } 235 // By default everything must be expanded. Then we will selectively turn 236 // on ones that can be effectively codegen'd. 237 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 238 setOperationAction(Op, VT, Expand); 239 } 240 241 for (MVT VT : LSXVTs) { 242 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); 243 setOperationAction(ISD::BITCAST, VT, Legal); 244 setOperationAction(ISD::UNDEF, VT, Legal); 245 246 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 247 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); 248 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 249 250 setOperationAction(ISD::SETCC, VT, Legal); 251 setOperationAction(ISD::VSELECT, VT, Legal); 252 } 253 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { 254 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 255 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); 256 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, 257 Legal); 258 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, 259 VT, Legal); 260 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); 261 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); 262 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); 263 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); 264 setCondCodeAction( 265 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, 266 Expand); 267 } 268 for (MVT VT : {MVT::v4i32, MVT::v2i64}) { 269 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); 270 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); 271 } 272 for (MVT VT : {MVT::v4f32, MVT::v2f64}) { 273 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); 274 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); 275 setOperationAction(ISD::FMA, VT, Legal); 276 setOperationAction(ISD::FSQRT, VT, Legal); 277 setOperationAction(ISD::FNEG, VT, Legal); 278 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, 279 ISD::SETUGE, ISD::SETUGT}, 280 VT, Expand); 281 } 282 } 283 284 // Set operations for 'LASX' feature. 285 286 if (Subtarget.hasExtLASX()) { 287 for (MVT VT : LASXVTs) { 288 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); 289 setOperationAction(ISD::BITCAST, VT, Legal); 290 setOperationAction(ISD::UNDEF, VT, Legal); 291 292 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 293 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 294 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 295 296 setOperationAction(ISD::SETCC, VT, Legal); 297 setOperationAction(ISD::VSELECT, VT, Legal); 298 } 299 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { 300 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 301 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); 302 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, 303 Legal); 304 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, 305 VT, Legal); 306 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); 307 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); 308 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); 309 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); 310 setCondCodeAction( 311 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, 312 Expand); 313 } 314 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { 315 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); 316 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); 317 } 318 for (MVT VT : {MVT::v8f32, MVT::v4f64}) { 319 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); 320 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); 321 setOperationAction(ISD::FMA, VT, Legal); 322 setOperationAction(ISD::FSQRT, VT, Legal); 323 setOperationAction(ISD::FNEG, VT, Legal); 324 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, 325 ISD::SETUGE, ISD::SETUGT}, 326 VT, Expand); 327 } 328 } 329 330 // Set DAG combine for LA32 and LA64. 331 332 setTargetDAGCombine(ISD::AND); 333 setTargetDAGCombine(ISD::OR); 334 setTargetDAGCombine(ISD::SRL); 335 336 // Set DAG combine for 'LSX' feature. 337 338 if (Subtarget.hasExtLSX()) 339 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 340 341 // Compute derived properties from the register classes. 342 computeRegisterProperties(Subtarget.getRegisterInfo()); 343 344 setStackPointerRegisterToSaveRestore(LoongArch::R3); 345 346 setBooleanContents(ZeroOrOneBooleanContent); 347 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 348 349 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); 350 351 setMinCmpXchgSizeInBits(32); 352 353 // Function alignments. 354 setMinFunctionAlignment(Align(4)); 355 // Set preferred alignments. 356 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); 357 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); 358 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); 359 } 360 361 bool LoongArchTargetLowering::isOffsetFoldingLegal( 362 const GlobalAddressSDNode *GA) const { 363 // In order to maximise the opportunity for common subexpression elimination, 364 // keep a separate ADD node for the global address offset instead of folding 365 // it in the global address node. Later peephole optimisations may choose to 366 // fold it back in when profitable. 367 return false; 368 } 369 370 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, 371 SelectionDAG &DAG) const { 372 switch (Op.getOpcode()) { 373 case ISD::ATOMIC_FENCE: 374 return lowerATOMIC_FENCE(Op, DAG); 375 case ISD::EH_DWARF_CFA: 376 return lowerEH_DWARF_CFA(Op, DAG); 377 case ISD::GlobalAddress: 378 return lowerGlobalAddress(Op, DAG); 379 case ISD::GlobalTLSAddress: 380 return lowerGlobalTLSAddress(Op, DAG); 381 case ISD::INTRINSIC_WO_CHAIN: 382 return lowerINTRINSIC_WO_CHAIN(Op, DAG); 383 case ISD::INTRINSIC_W_CHAIN: 384 return lowerINTRINSIC_W_CHAIN(Op, DAG); 385 case ISD::INTRINSIC_VOID: 386 return lowerINTRINSIC_VOID(Op, DAG); 387 case ISD::BlockAddress: 388 return lowerBlockAddress(Op, DAG); 389 case ISD::JumpTable: 390 return lowerJumpTable(Op, DAG); 391 case ISD::SHL_PARTS: 392 return lowerShiftLeftParts(Op, DAG); 393 case ISD::SRA_PARTS: 394 return lowerShiftRightParts(Op, DAG, true); 395 case ISD::SRL_PARTS: 396 return lowerShiftRightParts(Op, DAG, false); 397 case ISD::ConstantPool: 398 return lowerConstantPool(Op, DAG); 399 case ISD::FP_TO_SINT: 400 return lowerFP_TO_SINT(Op, DAG); 401 case ISD::BITCAST: 402 return lowerBITCAST(Op, DAG); 403 case ISD::UINT_TO_FP: 404 return lowerUINT_TO_FP(Op, DAG); 405 case ISD::SINT_TO_FP: 406 return lowerSINT_TO_FP(Op, DAG); 407 case ISD::VASTART: 408 return lowerVASTART(Op, DAG); 409 case ISD::FRAMEADDR: 410 return lowerFRAMEADDR(Op, DAG); 411 case ISD::RETURNADDR: 412 return lowerRETURNADDR(Op, DAG); 413 case ISD::WRITE_REGISTER: 414 return lowerWRITE_REGISTER(Op, DAG); 415 case ISD::INSERT_VECTOR_ELT: 416 return lowerINSERT_VECTOR_ELT(Op, DAG); 417 case ISD::EXTRACT_VECTOR_ELT: 418 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 419 case ISD::BUILD_VECTOR: 420 return lowerBUILD_VECTOR(Op, DAG); 421 case ISD::VECTOR_SHUFFLE: 422 return lowerVECTOR_SHUFFLE(Op, DAG); 423 } 424 return SDValue(); 425 } 426 427 SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 428 SelectionDAG &DAG) const { 429 // TODO: custom shuffle. 430 return SDValue(); 431 } 432 433 static bool isConstantOrUndef(const SDValue Op) { 434 if (Op->isUndef()) 435 return true; 436 if (isa<ConstantSDNode>(Op)) 437 return true; 438 if (isa<ConstantFPSDNode>(Op)) 439 return true; 440 return false; 441 } 442 443 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 444 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 445 if (isConstantOrUndef(Op->getOperand(i))) 446 return true; 447 return false; 448 } 449 450 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, 451 SelectionDAG &DAG) const { 452 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 453 EVT ResTy = Op->getValueType(0); 454 SDLoc DL(Op); 455 APInt SplatValue, SplatUndef; 456 unsigned SplatBitSize; 457 bool HasAnyUndefs; 458 bool Is128Vec = ResTy.is128BitVector(); 459 bool Is256Vec = ResTy.is256BitVector(); 460 461 if ((!Subtarget.hasExtLSX() || !Is128Vec) && 462 (!Subtarget.hasExtLASX() || !Is256Vec)) 463 return SDValue(); 464 465 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 466 /*MinSplatBits=*/8) && 467 SplatBitSize <= 64) { 468 // We can only cope with 8, 16, 32, or 64-bit elements. 469 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 470 SplatBitSize != 64) 471 return SDValue(); 472 473 EVT ViaVecTy; 474 475 switch (SplatBitSize) { 476 default: 477 return SDValue(); 478 case 8: 479 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; 480 break; 481 case 16: 482 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; 483 break; 484 case 32: 485 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; 486 break; 487 case 64: 488 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; 489 break; 490 } 491 492 // SelectionDAG::getConstant will promote SplatValue appropriately. 493 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 494 495 // Bitcast to the type we originally wanted. 496 if (ViaVecTy != ResTy) 497 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 498 499 return Result; 500 } 501 502 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false)) 503 return Op; 504 505 if (!isConstantOrUndefBUILD_VECTOR(Node)) { 506 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 507 // The resulting code is the same length as the expansion, but it doesn't 508 // use memory operations. 509 EVT ResTy = Node->getValueType(0); 510 511 assert(ResTy.isVector()); 512 513 unsigned NumElts = ResTy.getVectorNumElements(); 514 SDValue Vector = DAG.getUNDEF(ResTy); 515 for (unsigned i = 0; i < NumElts; ++i) { 516 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 517 Node->getOperand(i), 518 DAG.getConstant(i, DL, Subtarget.getGRLenVT())); 519 } 520 return Vector; 521 } 522 523 return SDValue(); 524 } 525 526 SDValue 527 LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 528 SelectionDAG &DAG) const { 529 EVT VecTy = Op->getOperand(0)->getValueType(0); 530 SDValue Idx = Op->getOperand(1); 531 EVT EltTy = VecTy.getVectorElementType(); 532 unsigned NumElts = VecTy.getVectorNumElements(); 533 534 if (isa<ConstantSDNode>(Idx) && 535 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || 536 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2)) 537 return Op; 538 539 return SDValue(); 540 } 541 542 SDValue 543 LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 544 SelectionDAG &DAG) const { 545 if (isa<ConstantSDNode>(Op->getOperand(2))) 546 return Op; 547 return SDValue(); 548 } 549 550 SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, 551 SelectionDAG &DAG) const { 552 SDLoc DL(Op); 553 SyncScope::ID FenceSSID = 554 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); 555 556 // singlethread fences only synchronize with signal handlers on the same 557 // thread and thus only need to preserve instruction order, not actually 558 // enforce memory ordering. 559 if (FenceSSID == SyncScope::SingleThread) 560 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 561 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); 562 563 return Op; 564 } 565 566 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, 567 SelectionDAG &DAG) const { 568 569 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) { 570 DAG.getContext()->emitError( 571 "On LA64, only 64-bit registers can be written."); 572 return Op.getOperand(0); 573 } 574 575 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) { 576 DAG.getContext()->emitError( 577 "On LA32, only 32-bit registers can be written."); 578 return Op.getOperand(0); 579 } 580 581 return Op; 582 } 583 584 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op, 585 SelectionDAG &DAG) const { 586 if (!isa<ConstantSDNode>(Op.getOperand(0))) { 587 DAG.getContext()->emitError("argument to '__builtin_frame_address' must " 588 "be a constant integer"); 589 return SDValue(); 590 } 591 592 MachineFunction &MF = DAG.getMachineFunction(); 593 MF.getFrameInfo().setFrameAddressIsTaken(true); 594 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF); 595 EVT VT = Op.getValueType(); 596 SDLoc DL(Op); 597 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 598 unsigned Depth = Op.getConstantOperandVal(0); 599 int GRLenInBytes = Subtarget.getGRLen() / 8; 600 601 while (Depth--) { 602 int Offset = -(GRLenInBytes * 2); 603 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 604 DAG.getIntPtrConstant(Offset, DL)); 605 FrameAddr = 606 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 607 } 608 return FrameAddr; 609 } 610 611 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, 612 SelectionDAG &DAG) const { 613 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 614 return SDValue(); 615 616 // Currently only support lowering return address for current frame. 617 if (Op.getConstantOperandVal(0) != 0) { 618 DAG.getContext()->emitError( 619 "return address can only be determined for the current frame"); 620 return SDValue(); 621 } 622 623 MachineFunction &MF = DAG.getMachineFunction(); 624 MF.getFrameInfo().setReturnAddressIsTaken(true); 625 MVT GRLenVT = Subtarget.getGRLenVT(); 626 627 // Return the value of the return address register, marking it an implicit 628 // live-in. 629 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(), 630 getRegClassFor(GRLenVT)); 631 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT); 632 } 633 634 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, 635 SelectionDAG &DAG) const { 636 MachineFunction &MF = DAG.getMachineFunction(); 637 auto Size = Subtarget.getGRLen() / 8; 638 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false); 639 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 640 } 641 642 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, 643 SelectionDAG &DAG) const { 644 MachineFunction &MF = DAG.getMachineFunction(); 645 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>(); 646 647 SDLoc DL(Op); 648 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 649 getPointerTy(MF.getDataLayout())); 650 651 // vastart just stores the address of the VarArgsFrameIndex slot into the 652 // memory location argument. 653 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 654 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 655 MachinePointerInfo(SV)); 656 } 657 658 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, 659 SelectionDAG &DAG) const { 660 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && 661 !Subtarget.hasBasicD() && "unexpected target features"); 662 663 SDLoc DL(Op); 664 SDValue Op0 = Op.getOperand(0); 665 if (Op0->getOpcode() == ISD::AND) { 666 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); 667 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF)) 668 return Op; 669 } 670 671 if (Op0->getOpcode() == LoongArchISD::BSTRPICK && 672 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) && 673 Op0.getConstantOperandVal(2) == UINT64_C(0)) 674 return Op; 675 676 if (Op0.getOpcode() == ISD::AssertZext && 677 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32)) 678 return Op; 679 680 EVT OpVT = Op0.getValueType(); 681 EVT RetVT = Op.getValueType(); 682 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT); 683 MakeLibCallOptions CallOptions; 684 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); 685 SDValue Chain = SDValue(); 686 SDValue Result; 687 std::tie(Result, Chain) = 688 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); 689 return Result; 690 } 691 692 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, 693 SelectionDAG &DAG) const { 694 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && 695 !Subtarget.hasBasicD() && "unexpected target features"); 696 697 SDLoc DL(Op); 698 SDValue Op0 = Op.getOperand(0); 699 700 if ((Op0.getOpcode() == ISD::AssertSext || 701 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) && 702 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32)) 703 return Op; 704 705 EVT OpVT = Op0.getValueType(); 706 EVT RetVT = Op.getValueType(); 707 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT); 708 MakeLibCallOptions CallOptions; 709 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); 710 SDValue Chain = SDValue(); 711 SDValue Result; 712 std::tie(Result, Chain) = 713 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); 714 return Result; 715 } 716 717 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, 718 SelectionDAG &DAG) const { 719 720 SDLoc DL(Op); 721 SDValue Op0 = Op.getOperand(0); 722 723 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && 724 Subtarget.is64Bit() && Subtarget.hasBasicF()) { 725 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 726 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0); 727 } 728 return Op; 729 } 730 731 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, 732 SelectionDAG &DAG) const { 733 734 SDLoc DL(Op); 735 736 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && 737 !Subtarget.hasBasicD()) { 738 SDValue Dst = 739 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0)); 740 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst); 741 } 742 743 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); 744 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0)); 745 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc); 746 } 747 748 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 749 SelectionDAG &DAG, unsigned Flags) { 750 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 751 } 752 753 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 754 SelectionDAG &DAG, unsigned Flags) { 755 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 756 Flags); 757 } 758 759 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 760 SelectionDAG &DAG, unsigned Flags) { 761 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 762 N->getOffset(), Flags); 763 } 764 765 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 766 SelectionDAG &DAG, unsigned Flags) { 767 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 768 } 769 770 template <class NodeTy> 771 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 772 CodeModel::Model M, 773 bool IsLocal) const { 774 SDLoc DL(N); 775 EVT Ty = getPointerTy(DAG.getDataLayout()); 776 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 777 778 switch (M) { 779 default: 780 report_fatal_error("Unsupported code model"); 781 782 case CodeModel::Large: { 783 assert(Subtarget.is64Bit() && "Large code model requires LA64"); 784 785 // This is not actually used, but is necessary for successfully matching 786 // the PseudoLA_*_LARGE nodes. 787 SDValue Tmp = DAG.getConstant(0, DL, Ty); 788 if (IsLocal) 789 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that 790 // eventually becomes the desired 5-insn code sequence. 791 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty, 792 Tmp, Addr), 793 0); 794 795 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually 796 // becomes the desired 5-insn code sequence. 797 return SDValue( 798 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr), 799 0); 800 } 801 802 case CodeModel::Small: 803 case CodeModel::Medium: 804 if (IsLocal) 805 // This generates the pattern (PseudoLA_PCREL sym), which expands to 806 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). 807 return SDValue( 808 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0); 809 810 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d 811 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). 812 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 813 0); 814 } 815 } 816 817 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, 818 SelectionDAG &DAG) const { 819 return getAddr(cast<BlockAddressSDNode>(Op), DAG, 820 DAG.getTarget().getCodeModel()); 821 } 822 823 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op, 824 SelectionDAG &DAG) const { 825 return getAddr(cast<JumpTableSDNode>(Op), DAG, 826 DAG.getTarget().getCodeModel()); 827 } 828 829 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, 830 SelectionDAG &DAG) const { 831 return getAddr(cast<ConstantPoolSDNode>(Op), DAG, 832 DAG.getTarget().getCodeModel()); 833 } 834 835 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, 836 SelectionDAG &DAG) const { 837 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 838 assert(N->getOffset() == 0 && "unexpected offset in global node"); 839 auto CM = DAG.getTarget().getCodeModel(); 840 const GlobalValue *GV = N->getGlobal(); 841 842 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) { 843 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel()) 844 CM = *GCM; 845 } 846 847 return getAddr(N, DAG, CM, GV->isDSOLocal()); 848 } 849 850 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 851 SelectionDAG &DAG, 852 unsigned Opc, 853 bool Large) const { 854 SDLoc DL(N); 855 EVT Ty = getPointerTy(DAG.getDataLayout()); 856 MVT GRLenVT = Subtarget.getGRLenVT(); 857 858 // This is not actually used, but is necessary for successfully matching the 859 // PseudoLA_*_LARGE nodes. 860 SDValue Tmp = DAG.getConstant(0, DL, Ty); 861 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); 862 SDValue Offset = Large 863 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) 864 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 865 866 // Add the thread pointer. 867 return DAG.getNode(ISD::ADD, DL, Ty, Offset, 868 DAG.getRegister(LoongArch::R2, GRLenVT)); 869 } 870 871 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 872 SelectionDAG &DAG, 873 unsigned Opc, 874 bool Large) const { 875 SDLoc DL(N); 876 EVT Ty = getPointerTy(DAG.getDataLayout()); 877 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 878 879 // This is not actually used, but is necessary for successfully matching the 880 // PseudoLA_*_LARGE nodes. 881 SDValue Tmp = DAG.getConstant(0, DL, Ty); 882 883 // Use a PC-relative addressing mode to access the dynamic GOT address. 884 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); 885 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) 886 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 887 888 // Prepare argument list to generate call. 889 ArgListTy Args; 890 ArgListEntry Entry; 891 Entry.Node = Load; 892 Entry.Ty = CallTy; 893 Args.push_back(Entry); 894 895 // Setup call to __tls_get_addr. 896 TargetLowering::CallLoweringInfo CLI(DAG); 897 CLI.setDebugLoc(DL) 898 .setChain(DAG.getEntryNode()) 899 .setLibCallee(CallingConv::C, CallTy, 900 DAG.getExternalSymbol("__tls_get_addr", Ty), 901 std::move(Args)); 902 903 return LowerCallTo(CLI).first; 904 } 905 906 SDValue 907 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, 908 SelectionDAG &DAG) const { 909 if (DAG.getMachineFunction().getFunction().getCallingConv() == 910 CallingConv::GHC) 911 report_fatal_error("In GHC calling convention TLS is not supported"); 912 913 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large; 914 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64"); 915 916 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 917 assert(N->getOffset() == 0 && "unexpected offset in global node"); 918 919 SDValue Addr; 920 switch (getTargetMachine().getTLSModel(N->getGlobal())) { 921 case TLSModel::GeneralDynamic: 922 // In this model, application code calls the dynamic linker function 923 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at 924 // runtime. 925 Addr = getDynamicTLSAddr(N, DAG, 926 Large ? LoongArch::PseudoLA_TLS_GD_LARGE 927 : LoongArch::PseudoLA_TLS_GD, 928 Large); 929 break; 930 case TLSModel::LocalDynamic: 931 // Same as GeneralDynamic, except for assembly modifiers and relocation 932 // records. 933 Addr = getDynamicTLSAddr(N, DAG, 934 Large ? LoongArch::PseudoLA_TLS_LD_LARGE 935 : LoongArch::PseudoLA_TLS_LD, 936 Large); 937 break; 938 case TLSModel::InitialExec: 939 // This model uses the GOT to resolve TLS offsets. 940 Addr = getStaticTLSAddr(N, DAG, 941 Large ? LoongArch::PseudoLA_TLS_IE_LARGE 942 : LoongArch::PseudoLA_TLS_IE, 943 Large); 944 break; 945 case TLSModel::LocalExec: 946 // This model is used when static linking as the TLS offsets are resolved 947 // during program linking. 948 // 949 // This node doesn't need an extra argument for the large code model. 950 Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE); 951 break; 952 } 953 954 return Addr; 955 } 956 957 template <unsigned N> 958 static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, 959 SelectionDAG &DAG, bool IsSigned = false) { 960 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp)); 961 // Check the ImmArg. 962 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || 963 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { 964 DAG.getContext()->emitError(Op->getOperationName(0) + 965 ": argument out of range."); 966 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType()); 967 } 968 return SDValue(); 969 } 970 971 SDValue 972 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 973 SelectionDAG &DAG) const { 974 SDLoc DL(Op); 975 switch (Op.getConstantOperandVal(0)) { 976 default: 977 return SDValue(); // Don't custom lower most intrinsics. 978 case Intrinsic::thread_pointer: { 979 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 980 return DAG.getRegister(LoongArch::R2, PtrVT); 981 } 982 case Intrinsic::loongarch_lsx_vpickve2gr_d: 983 case Intrinsic::loongarch_lsx_vpickve2gr_du: 984 case Intrinsic::loongarch_lsx_vreplvei_d: 985 case Intrinsic::loongarch_lasx_xvrepl128vei_d: 986 return checkIntrinsicImmArg<1>(Op, 2, DAG); 987 case Intrinsic::loongarch_lsx_vreplvei_w: 988 case Intrinsic::loongarch_lasx_xvrepl128vei_w: 989 case Intrinsic::loongarch_lasx_xvpickve2gr_d: 990 case Intrinsic::loongarch_lasx_xvpickve2gr_du: 991 case Intrinsic::loongarch_lasx_xvpickve_d: 992 case Intrinsic::loongarch_lasx_xvpickve_d_f: 993 return checkIntrinsicImmArg<2>(Op, 2, DAG); 994 case Intrinsic::loongarch_lasx_xvinsve0_d: 995 return checkIntrinsicImmArg<2>(Op, 3, DAG); 996 case Intrinsic::loongarch_lsx_vsat_b: 997 case Intrinsic::loongarch_lsx_vsat_bu: 998 case Intrinsic::loongarch_lsx_vrotri_b: 999 case Intrinsic::loongarch_lsx_vsllwil_h_b: 1000 case Intrinsic::loongarch_lsx_vsllwil_hu_bu: 1001 case Intrinsic::loongarch_lsx_vsrlri_b: 1002 case Intrinsic::loongarch_lsx_vsrari_b: 1003 case Intrinsic::loongarch_lsx_vreplvei_h: 1004 case Intrinsic::loongarch_lasx_xvsat_b: 1005 case Intrinsic::loongarch_lasx_xvsat_bu: 1006 case Intrinsic::loongarch_lasx_xvrotri_b: 1007 case Intrinsic::loongarch_lasx_xvsllwil_h_b: 1008 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: 1009 case Intrinsic::loongarch_lasx_xvsrlri_b: 1010 case Intrinsic::loongarch_lasx_xvsrari_b: 1011 case Intrinsic::loongarch_lasx_xvrepl128vei_h: 1012 case Intrinsic::loongarch_lasx_xvpickve_w: 1013 case Intrinsic::loongarch_lasx_xvpickve_w_f: 1014 return checkIntrinsicImmArg<3>(Op, 2, DAG); 1015 case Intrinsic::loongarch_lasx_xvinsve0_w: 1016 return checkIntrinsicImmArg<3>(Op, 3, DAG); 1017 case Intrinsic::loongarch_lsx_vsat_h: 1018 case Intrinsic::loongarch_lsx_vsat_hu: 1019 case Intrinsic::loongarch_lsx_vrotri_h: 1020 case Intrinsic::loongarch_lsx_vsllwil_w_h: 1021 case Intrinsic::loongarch_lsx_vsllwil_wu_hu: 1022 case Intrinsic::loongarch_lsx_vsrlri_h: 1023 case Intrinsic::loongarch_lsx_vsrari_h: 1024 case Intrinsic::loongarch_lsx_vreplvei_b: 1025 case Intrinsic::loongarch_lasx_xvsat_h: 1026 case Intrinsic::loongarch_lasx_xvsat_hu: 1027 case Intrinsic::loongarch_lasx_xvrotri_h: 1028 case Intrinsic::loongarch_lasx_xvsllwil_w_h: 1029 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: 1030 case Intrinsic::loongarch_lasx_xvsrlri_h: 1031 case Intrinsic::loongarch_lasx_xvsrari_h: 1032 case Intrinsic::loongarch_lasx_xvrepl128vei_b: 1033 return checkIntrinsicImmArg<4>(Op, 2, DAG); 1034 case Intrinsic::loongarch_lsx_vsrlni_b_h: 1035 case Intrinsic::loongarch_lsx_vsrani_b_h: 1036 case Intrinsic::loongarch_lsx_vsrlrni_b_h: 1037 case Intrinsic::loongarch_lsx_vsrarni_b_h: 1038 case Intrinsic::loongarch_lsx_vssrlni_b_h: 1039 case Intrinsic::loongarch_lsx_vssrani_b_h: 1040 case Intrinsic::loongarch_lsx_vssrlni_bu_h: 1041 case Intrinsic::loongarch_lsx_vssrani_bu_h: 1042 case Intrinsic::loongarch_lsx_vssrlrni_b_h: 1043 case Intrinsic::loongarch_lsx_vssrarni_b_h: 1044 case Intrinsic::loongarch_lsx_vssrlrni_bu_h: 1045 case Intrinsic::loongarch_lsx_vssrarni_bu_h: 1046 case Intrinsic::loongarch_lasx_xvsrlni_b_h: 1047 case Intrinsic::loongarch_lasx_xvsrani_b_h: 1048 case Intrinsic::loongarch_lasx_xvsrlrni_b_h: 1049 case Intrinsic::loongarch_lasx_xvsrarni_b_h: 1050 case Intrinsic::loongarch_lasx_xvssrlni_b_h: 1051 case Intrinsic::loongarch_lasx_xvssrani_b_h: 1052 case Intrinsic::loongarch_lasx_xvssrlni_bu_h: 1053 case Intrinsic::loongarch_lasx_xvssrani_bu_h: 1054 case Intrinsic::loongarch_lasx_xvssrlrni_b_h: 1055 case Intrinsic::loongarch_lasx_xvssrarni_b_h: 1056 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: 1057 case Intrinsic::loongarch_lasx_xvssrarni_bu_h: 1058 return checkIntrinsicImmArg<4>(Op, 3, DAG); 1059 case Intrinsic::loongarch_lsx_vsat_w: 1060 case Intrinsic::loongarch_lsx_vsat_wu: 1061 case Intrinsic::loongarch_lsx_vrotri_w: 1062 case Intrinsic::loongarch_lsx_vsllwil_d_w: 1063 case Intrinsic::loongarch_lsx_vsllwil_du_wu: 1064 case Intrinsic::loongarch_lsx_vsrlri_w: 1065 case Intrinsic::loongarch_lsx_vsrari_w: 1066 case Intrinsic::loongarch_lsx_vslei_bu: 1067 case Intrinsic::loongarch_lsx_vslei_hu: 1068 case Intrinsic::loongarch_lsx_vslei_wu: 1069 case Intrinsic::loongarch_lsx_vslei_du: 1070 case Intrinsic::loongarch_lsx_vslti_bu: 1071 case Intrinsic::loongarch_lsx_vslti_hu: 1072 case Intrinsic::loongarch_lsx_vslti_wu: 1073 case Intrinsic::loongarch_lsx_vslti_du: 1074 case Intrinsic::loongarch_lsx_vbsll_v: 1075 case Intrinsic::loongarch_lsx_vbsrl_v: 1076 case Intrinsic::loongarch_lasx_xvsat_w: 1077 case Intrinsic::loongarch_lasx_xvsat_wu: 1078 case Intrinsic::loongarch_lasx_xvrotri_w: 1079 case Intrinsic::loongarch_lasx_xvsllwil_d_w: 1080 case Intrinsic::loongarch_lasx_xvsllwil_du_wu: 1081 case Intrinsic::loongarch_lasx_xvsrlri_w: 1082 case Intrinsic::loongarch_lasx_xvsrari_w: 1083 case Intrinsic::loongarch_lasx_xvslei_bu: 1084 case Intrinsic::loongarch_lasx_xvslei_hu: 1085 case Intrinsic::loongarch_lasx_xvslei_wu: 1086 case Intrinsic::loongarch_lasx_xvslei_du: 1087 case Intrinsic::loongarch_lasx_xvslti_bu: 1088 case Intrinsic::loongarch_lasx_xvslti_hu: 1089 case Intrinsic::loongarch_lasx_xvslti_wu: 1090 case Intrinsic::loongarch_lasx_xvslti_du: 1091 case Intrinsic::loongarch_lasx_xvbsll_v: 1092 case Intrinsic::loongarch_lasx_xvbsrl_v: 1093 return checkIntrinsicImmArg<5>(Op, 2, DAG); 1094 case Intrinsic::loongarch_lsx_vseqi_b: 1095 case Intrinsic::loongarch_lsx_vseqi_h: 1096 case Intrinsic::loongarch_lsx_vseqi_w: 1097 case Intrinsic::loongarch_lsx_vseqi_d: 1098 case Intrinsic::loongarch_lsx_vslei_b: 1099 case Intrinsic::loongarch_lsx_vslei_h: 1100 case Intrinsic::loongarch_lsx_vslei_w: 1101 case Intrinsic::loongarch_lsx_vslei_d: 1102 case Intrinsic::loongarch_lsx_vslti_b: 1103 case Intrinsic::loongarch_lsx_vslti_h: 1104 case Intrinsic::loongarch_lsx_vslti_w: 1105 case Intrinsic::loongarch_lsx_vslti_d: 1106 case Intrinsic::loongarch_lasx_xvseqi_b: 1107 case Intrinsic::loongarch_lasx_xvseqi_h: 1108 case Intrinsic::loongarch_lasx_xvseqi_w: 1109 case Intrinsic::loongarch_lasx_xvseqi_d: 1110 case Intrinsic::loongarch_lasx_xvslei_b: 1111 case Intrinsic::loongarch_lasx_xvslei_h: 1112 case Intrinsic::loongarch_lasx_xvslei_w: 1113 case Intrinsic::loongarch_lasx_xvslei_d: 1114 case Intrinsic::loongarch_lasx_xvslti_b: 1115 case Intrinsic::loongarch_lasx_xvslti_h: 1116 case Intrinsic::loongarch_lasx_xvslti_w: 1117 case Intrinsic::loongarch_lasx_xvslti_d: 1118 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); 1119 case Intrinsic::loongarch_lsx_vsrlni_h_w: 1120 case Intrinsic::loongarch_lsx_vsrani_h_w: 1121 case Intrinsic::loongarch_lsx_vsrlrni_h_w: 1122 case Intrinsic::loongarch_lsx_vsrarni_h_w: 1123 case Intrinsic::loongarch_lsx_vssrlni_h_w: 1124 case Intrinsic::loongarch_lsx_vssrani_h_w: 1125 case Intrinsic::loongarch_lsx_vssrlni_hu_w: 1126 case Intrinsic::loongarch_lsx_vssrani_hu_w: 1127 case Intrinsic::loongarch_lsx_vssrlrni_h_w: 1128 case Intrinsic::loongarch_lsx_vssrarni_h_w: 1129 case Intrinsic::loongarch_lsx_vssrlrni_hu_w: 1130 case Intrinsic::loongarch_lsx_vssrarni_hu_w: 1131 case Intrinsic::loongarch_lsx_vfrstpi_b: 1132 case Intrinsic::loongarch_lsx_vfrstpi_h: 1133 case Intrinsic::loongarch_lasx_xvsrlni_h_w: 1134 case Intrinsic::loongarch_lasx_xvsrani_h_w: 1135 case Intrinsic::loongarch_lasx_xvsrlrni_h_w: 1136 case Intrinsic::loongarch_lasx_xvsrarni_h_w: 1137 case Intrinsic::loongarch_lasx_xvssrlni_h_w: 1138 case Intrinsic::loongarch_lasx_xvssrani_h_w: 1139 case Intrinsic::loongarch_lasx_xvssrlni_hu_w: 1140 case Intrinsic::loongarch_lasx_xvssrani_hu_w: 1141 case Intrinsic::loongarch_lasx_xvssrlrni_h_w: 1142 case Intrinsic::loongarch_lasx_xvssrarni_h_w: 1143 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: 1144 case Intrinsic::loongarch_lasx_xvssrarni_hu_w: 1145 case Intrinsic::loongarch_lasx_xvfrstpi_b: 1146 case Intrinsic::loongarch_lasx_xvfrstpi_h: 1147 return checkIntrinsicImmArg<5>(Op, 3, DAG); 1148 case Intrinsic::loongarch_lsx_vsat_d: 1149 case Intrinsic::loongarch_lsx_vsat_du: 1150 case Intrinsic::loongarch_lsx_vrotri_d: 1151 case Intrinsic::loongarch_lsx_vsrlri_d: 1152 case Intrinsic::loongarch_lsx_vsrari_d: 1153 case Intrinsic::loongarch_lasx_xvsat_d: 1154 case Intrinsic::loongarch_lasx_xvsat_du: 1155 case Intrinsic::loongarch_lasx_xvrotri_d: 1156 case Intrinsic::loongarch_lasx_xvsrlri_d: 1157 case Intrinsic::loongarch_lasx_xvsrari_d: 1158 return checkIntrinsicImmArg<6>(Op, 2, DAG); 1159 case Intrinsic::loongarch_lsx_vsrlni_w_d: 1160 case Intrinsic::loongarch_lsx_vsrani_w_d: 1161 case Intrinsic::loongarch_lsx_vsrlrni_w_d: 1162 case Intrinsic::loongarch_lsx_vsrarni_w_d: 1163 case Intrinsic::loongarch_lsx_vssrlni_w_d: 1164 case Intrinsic::loongarch_lsx_vssrani_w_d: 1165 case Intrinsic::loongarch_lsx_vssrlni_wu_d: 1166 case Intrinsic::loongarch_lsx_vssrani_wu_d: 1167 case Intrinsic::loongarch_lsx_vssrlrni_w_d: 1168 case Intrinsic::loongarch_lsx_vssrarni_w_d: 1169 case Intrinsic::loongarch_lsx_vssrlrni_wu_d: 1170 case Intrinsic::loongarch_lsx_vssrarni_wu_d: 1171 case Intrinsic::loongarch_lasx_xvsrlni_w_d: 1172 case Intrinsic::loongarch_lasx_xvsrani_w_d: 1173 case Intrinsic::loongarch_lasx_xvsrlrni_w_d: 1174 case Intrinsic::loongarch_lasx_xvsrarni_w_d: 1175 case Intrinsic::loongarch_lasx_xvssrlni_w_d: 1176 case Intrinsic::loongarch_lasx_xvssrani_w_d: 1177 case Intrinsic::loongarch_lasx_xvssrlni_wu_d: 1178 case Intrinsic::loongarch_lasx_xvssrani_wu_d: 1179 case Intrinsic::loongarch_lasx_xvssrlrni_w_d: 1180 case Intrinsic::loongarch_lasx_xvssrarni_w_d: 1181 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: 1182 case Intrinsic::loongarch_lasx_xvssrarni_wu_d: 1183 return checkIntrinsicImmArg<6>(Op, 3, DAG); 1184 case Intrinsic::loongarch_lsx_vsrlni_d_q: 1185 case Intrinsic::loongarch_lsx_vsrani_d_q: 1186 case Intrinsic::loongarch_lsx_vsrlrni_d_q: 1187 case Intrinsic::loongarch_lsx_vsrarni_d_q: 1188 case Intrinsic::loongarch_lsx_vssrlni_d_q: 1189 case Intrinsic::loongarch_lsx_vssrani_d_q: 1190 case Intrinsic::loongarch_lsx_vssrlni_du_q: 1191 case Intrinsic::loongarch_lsx_vssrani_du_q: 1192 case Intrinsic::loongarch_lsx_vssrlrni_d_q: 1193 case Intrinsic::loongarch_lsx_vssrarni_d_q: 1194 case Intrinsic::loongarch_lsx_vssrlrni_du_q: 1195 case Intrinsic::loongarch_lsx_vssrarni_du_q: 1196 case Intrinsic::loongarch_lasx_xvsrlni_d_q: 1197 case Intrinsic::loongarch_lasx_xvsrani_d_q: 1198 case Intrinsic::loongarch_lasx_xvsrlrni_d_q: 1199 case Intrinsic::loongarch_lasx_xvsrarni_d_q: 1200 case Intrinsic::loongarch_lasx_xvssrlni_d_q: 1201 case Intrinsic::loongarch_lasx_xvssrani_d_q: 1202 case Intrinsic::loongarch_lasx_xvssrlni_du_q: 1203 case Intrinsic::loongarch_lasx_xvssrani_du_q: 1204 case Intrinsic::loongarch_lasx_xvssrlrni_d_q: 1205 case Intrinsic::loongarch_lasx_xvssrarni_d_q: 1206 case Intrinsic::loongarch_lasx_xvssrlrni_du_q: 1207 case Intrinsic::loongarch_lasx_xvssrarni_du_q: 1208 return checkIntrinsicImmArg<7>(Op, 3, DAG); 1209 case Intrinsic::loongarch_lsx_vnori_b: 1210 case Intrinsic::loongarch_lsx_vshuf4i_b: 1211 case Intrinsic::loongarch_lsx_vshuf4i_h: 1212 case Intrinsic::loongarch_lsx_vshuf4i_w: 1213 case Intrinsic::loongarch_lasx_xvnori_b: 1214 case Intrinsic::loongarch_lasx_xvshuf4i_b: 1215 case Intrinsic::loongarch_lasx_xvshuf4i_h: 1216 case Intrinsic::loongarch_lasx_xvshuf4i_w: 1217 case Intrinsic::loongarch_lasx_xvpermi_d: 1218 return checkIntrinsicImmArg<8>(Op, 2, DAG); 1219 case Intrinsic::loongarch_lsx_vshuf4i_d: 1220 case Intrinsic::loongarch_lsx_vpermi_w: 1221 case Intrinsic::loongarch_lsx_vbitseli_b: 1222 case Intrinsic::loongarch_lsx_vextrins_b: 1223 case Intrinsic::loongarch_lsx_vextrins_h: 1224 case Intrinsic::loongarch_lsx_vextrins_w: 1225 case Intrinsic::loongarch_lsx_vextrins_d: 1226 case Intrinsic::loongarch_lasx_xvshuf4i_d: 1227 case Intrinsic::loongarch_lasx_xvpermi_w: 1228 case Intrinsic::loongarch_lasx_xvpermi_q: 1229 case Intrinsic::loongarch_lasx_xvbitseli_b: 1230 case Intrinsic::loongarch_lasx_xvextrins_b: 1231 case Intrinsic::loongarch_lasx_xvextrins_h: 1232 case Intrinsic::loongarch_lasx_xvextrins_w: 1233 case Intrinsic::loongarch_lasx_xvextrins_d: 1234 return checkIntrinsicImmArg<8>(Op, 3, DAG); 1235 case Intrinsic::loongarch_lsx_vrepli_b: 1236 case Intrinsic::loongarch_lsx_vrepli_h: 1237 case Intrinsic::loongarch_lsx_vrepli_w: 1238 case Intrinsic::loongarch_lsx_vrepli_d: 1239 case Intrinsic::loongarch_lasx_xvrepli_b: 1240 case Intrinsic::loongarch_lasx_xvrepli_h: 1241 case Intrinsic::loongarch_lasx_xvrepli_w: 1242 case Intrinsic::loongarch_lasx_xvrepli_d: 1243 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); 1244 case Intrinsic::loongarch_lsx_vldi: 1245 case Intrinsic::loongarch_lasx_xvldi: 1246 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); 1247 } 1248 } 1249 1250 // Helper function that emits error message for intrinsics with chain and return 1251 // merge values of a UNDEF and the chain. 1252 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, 1253 StringRef ErrorMsg, 1254 SelectionDAG &DAG) { 1255 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); 1256 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, 1257 SDLoc(Op)); 1258 } 1259 1260 SDValue 1261 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 1262 SelectionDAG &DAG) const { 1263 SDLoc DL(Op); 1264 MVT GRLenVT = Subtarget.getGRLenVT(); 1265 EVT VT = Op.getValueType(); 1266 SDValue Chain = Op.getOperand(0); 1267 const StringRef ErrorMsgOOR = "argument out of range"; 1268 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 1269 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 1270 1271 switch (Op.getConstantOperandVal(1)) { 1272 default: 1273 return Op; 1274 case Intrinsic::loongarch_crc_w_b_w: 1275 case Intrinsic::loongarch_crc_w_h_w: 1276 case Intrinsic::loongarch_crc_w_w_w: 1277 case Intrinsic::loongarch_crc_w_d_w: 1278 case Intrinsic::loongarch_crcc_w_b_w: 1279 case Intrinsic::loongarch_crcc_w_h_w: 1280 case Intrinsic::loongarch_crcc_w_w_w: 1281 case Intrinsic::loongarch_crcc_w_d_w: 1282 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG); 1283 case Intrinsic::loongarch_csrrd_w: 1284 case Intrinsic::loongarch_csrrd_d: { 1285 unsigned Imm = Op.getConstantOperandVal(2); 1286 return !isUInt<14>(Imm) 1287 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 1288 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, 1289 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 1290 } 1291 case Intrinsic::loongarch_csrwr_w: 1292 case Intrinsic::loongarch_csrwr_d: { 1293 unsigned Imm = Op.getConstantOperandVal(3); 1294 return !isUInt<14>(Imm) 1295 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 1296 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, 1297 {Chain, Op.getOperand(2), 1298 DAG.getConstant(Imm, DL, GRLenVT)}); 1299 } 1300 case Intrinsic::loongarch_csrxchg_w: 1301 case Intrinsic::loongarch_csrxchg_d: { 1302 unsigned Imm = Op.getConstantOperandVal(4); 1303 return !isUInt<14>(Imm) 1304 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 1305 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, 1306 {Chain, Op.getOperand(2), Op.getOperand(3), 1307 DAG.getConstant(Imm, DL, GRLenVT)}); 1308 } 1309 case Intrinsic::loongarch_iocsrrd_d: { 1310 return DAG.getNode( 1311 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other}, 1312 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))}); 1313 } 1314 #define IOCSRRD_CASE(NAME, NODE) \ 1315 case Intrinsic::loongarch_##NAME: { \ 1316 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \ 1317 {Chain, Op.getOperand(2)}); \ 1318 } 1319 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); 1320 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); 1321 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); 1322 #undef IOCSRRD_CASE 1323 case Intrinsic::loongarch_cpucfg: { 1324 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, 1325 {Chain, Op.getOperand(2)}); 1326 } 1327 case Intrinsic::loongarch_lddir_d: { 1328 unsigned Imm = Op.getConstantOperandVal(3); 1329 return !isUInt<8>(Imm) 1330 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 1331 : Op; 1332 } 1333 case Intrinsic::loongarch_movfcsr2gr: { 1334 if (!Subtarget.hasBasicF()) 1335 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG); 1336 unsigned Imm = Op.getConstantOperandVal(2); 1337 return !isUInt<2>(Imm) 1338 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 1339 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, 1340 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 1341 } 1342 case Intrinsic::loongarch_lsx_vld: 1343 case Intrinsic::loongarch_lsx_vldrepl_b: 1344 case Intrinsic::loongarch_lasx_xvld: 1345 case Intrinsic::loongarch_lasx_xvldrepl_b: 1346 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 1347 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 1348 : SDValue(); 1349 case Intrinsic::loongarch_lsx_vldrepl_h: 1350 case Intrinsic::loongarch_lasx_xvldrepl_h: 1351 return !isShiftedInt<11, 1>( 1352 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 1353 ? emitIntrinsicWithChainErrorMessage( 1354 Op, "argument out of range or not a multiple of 2", DAG) 1355 : SDValue(); 1356 case Intrinsic::loongarch_lsx_vldrepl_w: 1357 case Intrinsic::loongarch_lasx_xvldrepl_w: 1358 return !isShiftedInt<10, 2>( 1359 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 1360 ? emitIntrinsicWithChainErrorMessage( 1361 Op, "argument out of range or not a multiple of 4", DAG) 1362 : SDValue(); 1363 case Intrinsic::loongarch_lsx_vldrepl_d: 1364 case Intrinsic::loongarch_lasx_xvldrepl_d: 1365 return !isShiftedInt<9, 3>( 1366 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 1367 ? emitIntrinsicWithChainErrorMessage( 1368 Op, "argument out of range or not a multiple of 8", DAG) 1369 : SDValue(); 1370 } 1371 } 1372 1373 // Helper function that emits error message for intrinsics with void return 1374 // value and return the chain. 1375 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, 1376 SelectionDAG &DAG) { 1377 1378 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); 1379 return Op.getOperand(0); 1380 } 1381 1382 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, 1383 SelectionDAG &DAG) const { 1384 SDLoc DL(Op); 1385 MVT GRLenVT = Subtarget.getGRLenVT(); 1386 SDValue Chain = Op.getOperand(0); 1387 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1); 1388 SDValue Op2 = Op.getOperand(2); 1389 const StringRef ErrorMsgOOR = "argument out of range"; 1390 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 1391 const StringRef ErrorMsgReqLA32 = "requires loongarch32"; 1392 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 1393 1394 switch (IntrinsicEnum) { 1395 default: 1396 // TODO: Add more Intrinsics. 1397 return SDValue(); 1398 case Intrinsic::loongarch_cacop_d: 1399 case Intrinsic::loongarch_cacop_w: { 1400 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) 1401 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG); 1402 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) 1403 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG); 1404 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) 1405 unsigned Imm1 = Op2->getAsZExtVal(); 1406 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue(); 1407 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2)) 1408 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); 1409 return Op; 1410 } 1411 case Intrinsic::loongarch_dbar: { 1412 unsigned Imm = Op2->getAsZExtVal(); 1413 return !isUInt<15>(Imm) 1414 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1415 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain, 1416 DAG.getConstant(Imm, DL, GRLenVT)); 1417 } 1418 case Intrinsic::loongarch_ibar: { 1419 unsigned Imm = Op2->getAsZExtVal(); 1420 return !isUInt<15>(Imm) 1421 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1422 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain, 1423 DAG.getConstant(Imm, DL, GRLenVT)); 1424 } 1425 case Intrinsic::loongarch_break: { 1426 unsigned Imm = Op2->getAsZExtVal(); 1427 return !isUInt<15>(Imm) 1428 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1429 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain, 1430 DAG.getConstant(Imm, DL, GRLenVT)); 1431 } 1432 case Intrinsic::loongarch_movgr2fcsr: { 1433 if (!Subtarget.hasBasicF()) 1434 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG); 1435 unsigned Imm = Op2->getAsZExtVal(); 1436 return !isUInt<2>(Imm) 1437 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1438 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain, 1439 DAG.getConstant(Imm, DL, GRLenVT), 1440 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, 1441 Op.getOperand(3))); 1442 } 1443 case Intrinsic::loongarch_syscall: { 1444 unsigned Imm = Op2->getAsZExtVal(); 1445 return !isUInt<15>(Imm) 1446 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1447 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain, 1448 DAG.getConstant(Imm, DL, GRLenVT)); 1449 } 1450 #define IOCSRWR_CASE(NAME, NODE) \ 1451 case Intrinsic::loongarch_##NAME: { \ 1452 SDValue Op3 = Op.getOperand(3); \ 1453 return Subtarget.is64Bit() \ 1454 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \ 1455 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ 1456 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \ 1457 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \ 1458 Op3); \ 1459 } 1460 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B); 1461 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H); 1462 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W); 1463 #undef IOCSRWR_CASE 1464 case Intrinsic::loongarch_iocsrwr_d: { 1465 return !Subtarget.is64Bit() 1466 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) 1467 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain, 1468 Op2, 1469 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 1470 Op.getOperand(3))); 1471 } 1472 #define ASRT_LE_GT_CASE(NAME) \ 1473 case Intrinsic::loongarch_##NAME: { \ 1474 return !Subtarget.is64Bit() \ 1475 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \ 1476 : Op; \ 1477 } 1478 ASRT_LE_GT_CASE(asrtle_d) 1479 ASRT_LE_GT_CASE(asrtgt_d) 1480 #undef ASRT_LE_GT_CASE 1481 case Intrinsic::loongarch_ldpte_d: { 1482 unsigned Imm = Op.getConstantOperandVal(3); 1483 return !Subtarget.is64Bit() 1484 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) 1485 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1486 : Op; 1487 } 1488 case Intrinsic::loongarch_lsx_vst: 1489 case Intrinsic::loongarch_lasx_xvst: 1490 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) 1491 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1492 : SDValue(); 1493 case Intrinsic::loongarch_lasx_xvstelm_b: 1494 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1495 !isUInt<5>(Op.getConstantOperandVal(5))) 1496 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1497 : SDValue(); 1498 case Intrinsic::loongarch_lsx_vstelm_b: 1499 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1500 !isUInt<4>(Op.getConstantOperandVal(5))) 1501 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 1502 : SDValue(); 1503 case Intrinsic::loongarch_lasx_xvstelm_h: 1504 return (!isShiftedInt<8, 1>( 1505 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1506 !isUInt<4>(Op.getConstantOperandVal(5))) 1507 ? emitIntrinsicErrorMessage( 1508 Op, "argument out of range or not a multiple of 2", DAG) 1509 : SDValue(); 1510 case Intrinsic::loongarch_lsx_vstelm_h: 1511 return (!isShiftedInt<8, 1>( 1512 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1513 !isUInt<3>(Op.getConstantOperandVal(5))) 1514 ? emitIntrinsicErrorMessage( 1515 Op, "argument out of range or not a multiple of 2", DAG) 1516 : SDValue(); 1517 case Intrinsic::loongarch_lasx_xvstelm_w: 1518 return (!isShiftedInt<8, 2>( 1519 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1520 !isUInt<3>(Op.getConstantOperandVal(5))) 1521 ? emitIntrinsicErrorMessage( 1522 Op, "argument out of range or not a multiple of 4", DAG) 1523 : SDValue(); 1524 case Intrinsic::loongarch_lsx_vstelm_w: 1525 return (!isShiftedInt<8, 2>( 1526 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1527 !isUInt<2>(Op.getConstantOperandVal(5))) 1528 ? emitIntrinsicErrorMessage( 1529 Op, "argument out of range or not a multiple of 4", DAG) 1530 : SDValue(); 1531 case Intrinsic::loongarch_lasx_xvstelm_d: 1532 return (!isShiftedInt<8, 3>( 1533 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1534 !isUInt<2>(Op.getConstantOperandVal(5))) 1535 ? emitIntrinsicErrorMessage( 1536 Op, "argument out of range or not a multiple of 8", DAG) 1537 : SDValue(); 1538 case Intrinsic::loongarch_lsx_vstelm_d: 1539 return (!isShiftedInt<8, 3>( 1540 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 1541 !isUInt<1>(Op.getConstantOperandVal(5))) 1542 ? emitIntrinsicErrorMessage( 1543 Op, "argument out of range or not a multiple of 8", DAG) 1544 : SDValue(); 1545 } 1546 } 1547 1548 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, 1549 SelectionDAG &DAG) const { 1550 SDLoc DL(Op); 1551 SDValue Lo = Op.getOperand(0); 1552 SDValue Hi = Op.getOperand(1); 1553 SDValue Shamt = Op.getOperand(2); 1554 EVT VT = Lo.getValueType(); 1555 1556 // if Shamt-GRLen < 0: // Shamt < GRLen 1557 // Lo = Lo << Shamt 1558 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt)) 1559 // else: 1560 // Lo = 0 1561 // Hi = Lo << (Shamt-GRLen) 1562 1563 SDValue Zero = DAG.getConstant(0, DL, VT); 1564 SDValue One = DAG.getConstant(1, DL, VT); 1565 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); 1566 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); 1567 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); 1568 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); 1569 1570 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 1571 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 1572 SDValue ShiftRightLo = 1573 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt); 1574 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 1575 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 1576 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen); 1577 1578 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); 1579 1580 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 1581 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1582 1583 SDValue Parts[2] = {Lo, Hi}; 1584 return DAG.getMergeValues(Parts, DL); 1585 } 1586 1587 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, 1588 SelectionDAG &DAG, 1589 bool IsSRA) const { 1590 SDLoc DL(Op); 1591 SDValue Lo = Op.getOperand(0); 1592 SDValue Hi = Op.getOperand(1); 1593 SDValue Shamt = Op.getOperand(2); 1594 EVT VT = Lo.getValueType(); 1595 1596 // SRA expansion: 1597 // if Shamt-GRLen < 0: // Shamt < GRLen 1598 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) 1599 // Hi = Hi >>s Shamt 1600 // else: 1601 // Lo = Hi >>s (Shamt-GRLen); 1602 // Hi = Hi >>s (GRLen-1) 1603 // 1604 // SRL expansion: 1605 // if Shamt-GRLen < 0: // Shamt < GRLen 1606 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) 1607 // Hi = Hi >>u Shamt 1608 // else: 1609 // Lo = Hi >>u (Shamt-GRLen); 1610 // Hi = 0; 1611 1612 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 1613 1614 SDValue Zero = DAG.getConstant(0, DL, VT); 1615 SDValue One = DAG.getConstant(1, DL, VT); 1616 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); 1617 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); 1618 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); 1619 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); 1620 1621 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 1622 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 1623 SDValue ShiftLeftHi = 1624 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt); 1625 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 1626 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 1627 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen); 1628 SDValue HiFalse = 1629 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero; 1630 1631 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); 1632 1633 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 1634 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1635 1636 SDValue Parts[2] = {Lo, Hi}; 1637 return DAG.getMergeValues(Parts, DL); 1638 } 1639 1640 // Returns the opcode of the target-specific SDNode that implements the 32-bit 1641 // form of the given Opcode. 1642 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { 1643 switch (Opcode) { 1644 default: 1645 llvm_unreachable("Unexpected opcode"); 1646 case ISD::SHL: 1647 return LoongArchISD::SLL_W; 1648 case ISD::SRA: 1649 return LoongArchISD::SRA_W; 1650 case ISD::SRL: 1651 return LoongArchISD::SRL_W; 1652 case ISD::ROTR: 1653 return LoongArchISD::ROTR_W; 1654 case ISD::ROTL: 1655 return LoongArchISD::ROTL_W; 1656 case ISD::CTTZ: 1657 return LoongArchISD::CTZ_W; 1658 case ISD::CTLZ: 1659 return LoongArchISD::CLZ_W; 1660 } 1661 } 1662 1663 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG 1664 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would 1665 // otherwise be promoted to i64, making it difficult to select the 1666 // SLL_W/.../*W later one because the fact the operation was originally of 1667 // type i8/i16/i32 is lost. 1668 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, 1669 unsigned ExtOpc = ISD::ANY_EXTEND) { 1670 SDLoc DL(N); 1671 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); 1672 SDValue NewOp0, NewRes; 1673 1674 switch (NumOp) { 1675 default: 1676 llvm_unreachable("Unexpected NumOp"); 1677 case 1: { 1678 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 1679 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0); 1680 break; 1681 } 1682 case 2: { 1683 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 1684 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 1685 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 1686 break; 1687 } 1688 // TODO:Handle more NumOp. 1689 } 1690 1691 // ReplaceNodeResults requires we maintain the same type for the return 1692 // value. 1693 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 1694 } 1695 1696 // Helper function that emits error message for intrinsics with/without chain 1697 // and return a UNDEF or and the chain as the results. 1698 static void emitErrorAndReplaceIntrinsicResults( 1699 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, 1700 StringRef ErrorMsg, bool WithChain = true) { 1701 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); 1702 Results.push_back(DAG.getUNDEF(N->getValueType(0))); 1703 if (!WithChain) 1704 return; 1705 Results.push_back(N->getOperand(0)); 1706 } 1707 1708 template <unsigned N> 1709 static void 1710 replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results, 1711 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, 1712 unsigned ResOp) { 1713 const StringRef ErrorMsgOOR = "argument out of range"; 1714 unsigned Imm = Node->getConstantOperandVal(2); 1715 if (!isUInt<N>(Imm)) { 1716 emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR, 1717 /*WithChain=*/false); 1718 return; 1719 } 1720 SDLoc DL(Node); 1721 SDValue Vec = Node->getOperand(1); 1722 1723 SDValue PickElt = 1724 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec, 1725 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()), 1726 DAG.getValueType(Vec.getValueType().getVectorElementType())); 1727 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0), 1728 PickElt.getValue(0))); 1729 } 1730 1731 static void replaceVecCondBranchResults(SDNode *N, 1732 SmallVectorImpl<SDValue> &Results, 1733 SelectionDAG &DAG, 1734 const LoongArchSubtarget &Subtarget, 1735 unsigned ResOp) { 1736 SDLoc DL(N); 1737 SDValue Vec = N->getOperand(1); 1738 1739 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec); 1740 Results.push_back( 1741 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0))); 1742 } 1743 1744 static void 1745 replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1746 SelectionDAG &DAG, 1747 const LoongArchSubtarget &Subtarget) { 1748 switch (N->getConstantOperandVal(0)) { 1749 default: 1750 llvm_unreachable("Unexpected Intrinsic."); 1751 case Intrinsic::loongarch_lsx_vpickve2gr_b: 1752 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, 1753 LoongArchISD::VPICK_SEXT_ELT); 1754 break; 1755 case Intrinsic::loongarch_lsx_vpickve2gr_h: 1756 case Intrinsic::loongarch_lasx_xvpickve2gr_w: 1757 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, 1758 LoongArchISD::VPICK_SEXT_ELT); 1759 break; 1760 case Intrinsic::loongarch_lsx_vpickve2gr_w: 1761 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, 1762 LoongArchISD::VPICK_SEXT_ELT); 1763 break; 1764 case Intrinsic::loongarch_lsx_vpickve2gr_bu: 1765 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, 1766 LoongArchISD::VPICK_ZEXT_ELT); 1767 break; 1768 case Intrinsic::loongarch_lsx_vpickve2gr_hu: 1769 case Intrinsic::loongarch_lasx_xvpickve2gr_wu: 1770 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, 1771 LoongArchISD::VPICK_ZEXT_ELT); 1772 break; 1773 case Intrinsic::loongarch_lsx_vpickve2gr_wu: 1774 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, 1775 LoongArchISD::VPICK_ZEXT_ELT); 1776 break; 1777 case Intrinsic::loongarch_lsx_bz_b: 1778 case Intrinsic::loongarch_lsx_bz_h: 1779 case Intrinsic::loongarch_lsx_bz_w: 1780 case Intrinsic::loongarch_lsx_bz_d: 1781 case Intrinsic::loongarch_lasx_xbz_b: 1782 case Intrinsic::loongarch_lasx_xbz_h: 1783 case Intrinsic::loongarch_lasx_xbz_w: 1784 case Intrinsic::loongarch_lasx_xbz_d: 1785 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 1786 LoongArchISD::VALL_ZERO); 1787 break; 1788 case Intrinsic::loongarch_lsx_bz_v: 1789 case Intrinsic::loongarch_lasx_xbz_v: 1790 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 1791 LoongArchISD::VANY_ZERO); 1792 break; 1793 case Intrinsic::loongarch_lsx_bnz_b: 1794 case Intrinsic::loongarch_lsx_bnz_h: 1795 case Intrinsic::loongarch_lsx_bnz_w: 1796 case Intrinsic::loongarch_lsx_bnz_d: 1797 case Intrinsic::loongarch_lasx_xbnz_b: 1798 case Intrinsic::loongarch_lasx_xbnz_h: 1799 case Intrinsic::loongarch_lasx_xbnz_w: 1800 case Intrinsic::loongarch_lasx_xbnz_d: 1801 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 1802 LoongArchISD::VALL_NONZERO); 1803 break; 1804 case Intrinsic::loongarch_lsx_bnz_v: 1805 case Intrinsic::loongarch_lasx_xbnz_v: 1806 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 1807 LoongArchISD::VANY_NONZERO); 1808 break; 1809 } 1810 } 1811 1812 void LoongArchTargetLowering::ReplaceNodeResults( 1813 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 1814 SDLoc DL(N); 1815 EVT VT = N->getValueType(0); 1816 switch (N->getOpcode()) { 1817 default: 1818 llvm_unreachable("Don't know how to legalize this operation"); 1819 case ISD::SHL: 1820 case ISD::SRA: 1821 case ISD::SRL: 1822 case ISD::ROTR: 1823 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1824 "Unexpected custom legalisation"); 1825 if (N->getOperand(1).getOpcode() != ISD::Constant) { 1826 Results.push_back(customLegalizeToWOp(N, DAG, 2)); 1827 break; 1828 } 1829 break; 1830 case ISD::ROTL: 1831 ConstantSDNode *CN; 1832 if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) { 1833 Results.push_back(customLegalizeToWOp(N, DAG, 2)); 1834 break; 1835 } 1836 break; 1837 case ISD::FP_TO_SINT: { 1838 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1839 "Unexpected custom legalisation"); 1840 SDValue Src = N->getOperand(0); 1841 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); 1842 if (getTypeAction(*DAG.getContext(), Src.getValueType()) != 1843 TargetLowering::TypeSoftenFloat) { 1844 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src); 1845 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst)); 1846 return; 1847 } 1848 // If the FP type needs to be softened, emit a library call using the 'si' 1849 // version. If we left it to default legalization we'd end up with 'di'. 1850 RTLIB::Libcall LC; 1851 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT); 1852 MakeLibCallOptions CallOptions; 1853 EVT OpVT = Src.getValueType(); 1854 CallOptions.setTypeListBeforeSoften(OpVT, VT, true); 1855 SDValue Chain = SDValue(); 1856 SDValue Result; 1857 std::tie(Result, Chain) = 1858 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain); 1859 Results.push_back(Result); 1860 break; 1861 } 1862 case ISD::BITCAST: { 1863 SDValue Src = N->getOperand(0); 1864 EVT SrcVT = Src.getValueType(); 1865 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && 1866 Subtarget.hasBasicF()) { 1867 SDValue Dst = 1868 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src); 1869 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst)); 1870 } 1871 break; 1872 } 1873 case ISD::FP_TO_UINT: { 1874 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1875 "Unexpected custom legalisation"); 1876 auto &TLI = DAG.getTargetLoweringInfo(); 1877 SDValue Tmp1, Tmp2; 1878 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG); 1879 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1)); 1880 break; 1881 } 1882 case ISD::BSWAP: { 1883 SDValue Src = N->getOperand(0); 1884 assert((VT == MVT::i16 || VT == MVT::i32) && 1885 "Unexpected custom legalization"); 1886 MVT GRLenVT = Subtarget.getGRLenVT(); 1887 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); 1888 SDValue Tmp; 1889 switch (VT.getSizeInBits()) { 1890 default: 1891 llvm_unreachable("Unexpected operand width"); 1892 case 16: 1893 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc); 1894 break; 1895 case 32: 1896 // Only LA64 will get to here due to the size mismatch between VT and 1897 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo. 1898 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc); 1899 break; 1900 } 1901 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); 1902 break; 1903 } 1904 case ISD::BITREVERSE: { 1905 SDValue Src = N->getOperand(0); 1906 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) && 1907 "Unexpected custom legalization"); 1908 MVT GRLenVT = Subtarget.getGRLenVT(); 1909 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); 1910 SDValue Tmp; 1911 switch (VT.getSizeInBits()) { 1912 default: 1913 llvm_unreachable("Unexpected operand width"); 1914 case 8: 1915 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc); 1916 break; 1917 case 32: 1918 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc); 1919 break; 1920 } 1921 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); 1922 break; 1923 } 1924 case ISD::CTLZ: 1925 case ISD::CTTZ: { 1926 assert(VT == MVT::i32 && Subtarget.is64Bit() && 1927 "Unexpected custom legalisation"); 1928 Results.push_back(customLegalizeToWOp(N, DAG, 1)); 1929 break; 1930 } 1931 case ISD::INTRINSIC_W_CHAIN: { 1932 SDValue Chain = N->getOperand(0); 1933 SDValue Op2 = N->getOperand(2); 1934 MVT GRLenVT = Subtarget.getGRLenVT(); 1935 const StringRef ErrorMsgOOR = "argument out of range"; 1936 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 1937 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 1938 1939 switch (N->getConstantOperandVal(1)) { 1940 default: 1941 llvm_unreachable("Unexpected Intrinsic."); 1942 case Intrinsic::loongarch_movfcsr2gr: { 1943 if (!Subtarget.hasBasicF()) { 1944 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF); 1945 return; 1946 } 1947 unsigned Imm = Op2->getAsZExtVal(); 1948 if (!isUInt<2>(Imm)) { 1949 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 1950 return; 1951 } 1952 SDValue MOVFCSR2GRResults = DAG.getNode( 1953 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other}, 1954 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 1955 Results.push_back( 1956 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0))); 1957 Results.push_back(MOVFCSR2GRResults.getValue(1)); 1958 break; 1959 } 1960 #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \ 1961 case Intrinsic::loongarch_##NAME: { \ 1962 SDValue NODE = DAG.getNode( \ 1963 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 1964 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ 1965 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ 1966 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ 1967 Results.push_back(NODE.getValue(1)); \ 1968 break; \ 1969 } 1970 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W) 1971 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W) 1972 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W) 1973 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W) 1974 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W) 1975 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W) 1976 #undef CRC_CASE_EXT_BINARYOP 1977 1978 #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \ 1979 case Intrinsic::loongarch_##NAME: { \ 1980 SDValue NODE = DAG.getNode( \ 1981 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 1982 {Chain, Op2, \ 1983 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ 1984 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ 1985 Results.push_back(NODE.getValue(1)); \ 1986 break; \ 1987 } 1988 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) 1989 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W) 1990 #undef CRC_CASE_EXT_UNARYOP 1991 #define CSR_CASE(ID) \ 1992 case Intrinsic::loongarch_##ID: { \ 1993 if (!Subtarget.is64Bit()) \ 1994 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ 1995 break; \ 1996 } 1997 CSR_CASE(csrrd_d); 1998 CSR_CASE(csrwr_d); 1999 CSR_CASE(csrxchg_d); 2000 CSR_CASE(iocsrrd_d); 2001 #undef CSR_CASE 2002 case Intrinsic::loongarch_csrrd_w: { 2003 unsigned Imm = Op2->getAsZExtVal(); 2004 if (!isUInt<14>(Imm)) { 2005 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 2006 return; 2007 } 2008 SDValue CSRRDResults = 2009 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, 2010 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 2011 Results.push_back( 2012 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0))); 2013 Results.push_back(CSRRDResults.getValue(1)); 2014 break; 2015 } 2016 case Intrinsic::loongarch_csrwr_w: { 2017 unsigned Imm = N->getConstantOperandVal(3); 2018 if (!isUInt<14>(Imm)) { 2019 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 2020 return; 2021 } 2022 SDValue CSRWRResults = 2023 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, 2024 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), 2025 DAG.getConstant(Imm, DL, GRLenVT)}); 2026 Results.push_back( 2027 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0))); 2028 Results.push_back(CSRWRResults.getValue(1)); 2029 break; 2030 } 2031 case Intrinsic::loongarch_csrxchg_w: { 2032 unsigned Imm = N->getConstantOperandVal(4); 2033 if (!isUInt<14>(Imm)) { 2034 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 2035 return; 2036 } 2037 SDValue CSRXCHGResults = DAG.getNode( 2038 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, 2039 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), 2040 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)), 2041 DAG.getConstant(Imm, DL, GRLenVT)}); 2042 Results.push_back( 2043 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0))); 2044 Results.push_back(CSRXCHGResults.getValue(1)); 2045 break; 2046 } 2047 #define IOCSRRD_CASE(NAME, NODE) \ 2048 case Intrinsic::loongarch_##NAME: { \ 2049 SDValue IOCSRRDResults = \ 2050 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 2051 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \ 2052 Results.push_back( \ 2053 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \ 2054 Results.push_back(IOCSRRDResults.getValue(1)); \ 2055 break; \ 2056 } 2057 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); 2058 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); 2059 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); 2060 #undef IOCSRRD_CASE 2061 case Intrinsic::loongarch_cpucfg: { 2062 SDValue CPUCFGResults = 2063 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, 2064 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); 2065 Results.push_back( 2066 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0))); 2067 Results.push_back(CPUCFGResults.getValue(1)); 2068 break; 2069 } 2070 case Intrinsic::loongarch_lddir_d: { 2071 if (!Subtarget.is64Bit()) { 2072 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); 2073 return; 2074 } 2075 break; 2076 } 2077 } 2078 break; 2079 } 2080 case ISD::READ_REGISTER: { 2081 if (Subtarget.is64Bit()) 2082 DAG.getContext()->emitError( 2083 "On LA64, only 64-bit registers can be read."); 2084 else 2085 DAG.getContext()->emitError( 2086 "On LA32, only 32-bit registers can be read."); 2087 Results.push_back(DAG.getUNDEF(VT)); 2088 Results.push_back(N->getOperand(0)); 2089 break; 2090 } 2091 case ISD::INTRINSIC_WO_CHAIN: { 2092 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); 2093 break; 2094 } 2095 } 2096 } 2097 2098 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 2099 TargetLowering::DAGCombinerInfo &DCI, 2100 const LoongArchSubtarget &Subtarget) { 2101 if (DCI.isBeforeLegalizeOps()) 2102 return SDValue(); 2103 2104 SDValue FirstOperand = N->getOperand(0); 2105 SDValue SecondOperand = N->getOperand(1); 2106 unsigned FirstOperandOpc = FirstOperand.getOpcode(); 2107 EVT ValTy = N->getValueType(0); 2108 SDLoc DL(N); 2109 uint64_t lsb, msb; 2110 unsigned SMIdx, SMLen; 2111 ConstantSDNode *CN; 2112 SDValue NewOperand; 2113 MVT GRLenVT = Subtarget.getGRLenVT(); 2114 2115 // Op's second operand must be a shifted mask. 2116 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) || 2117 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen)) 2118 return SDValue(); 2119 2120 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { 2121 // Pattern match BSTRPICK. 2122 // $dst = and ((sra or srl) $src , lsb), (2**len - 1) 2123 // => BSTRPICK $dst, $src, msb, lsb 2124 // where msb = lsb + len - 1 2125 2126 // The second operand of the shift must be an immediate. 2127 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1)))) 2128 return SDValue(); 2129 2130 lsb = CN->getZExtValue(); 2131 2132 // Return if the shifted mask does not start at bit 0 or the sum of its 2133 // length and lsb exceeds the word's size. 2134 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) 2135 return SDValue(); 2136 2137 NewOperand = FirstOperand.getOperand(0); 2138 } else { 2139 // Pattern match BSTRPICK. 2140 // $dst = and $src, (2**len- 1) , if len > 12 2141 // => BSTRPICK $dst, $src, msb, lsb 2142 // where lsb = 0 and msb = len - 1 2143 2144 // If the mask is <= 0xfff, andi can be used instead. 2145 if (CN->getZExtValue() <= 0xfff) 2146 return SDValue(); 2147 2148 // Return if the MSB exceeds. 2149 if (SMIdx + SMLen > ValTy.getSizeInBits()) 2150 return SDValue(); 2151 2152 if (SMIdx > 0) { 2153 // Omit if the constant has more than 2 uses. This a conservative 2154 // decision. Whether it is a win depends on the HW microarchitecture. 2155 // However it should always be better for 1 and 2 uses. 2156 if (CN->use_size() > 2) 2157 return SDValue(); 2158 // Return if the constant can be composed by a single LU12I.W. 2159 if ((CN->getZExtValue() & 0xfff) == 0) 2160 return SDValue(); 2161 // Return if the constand can be composed by a single ADDI with 2162 // the zero register. 2163 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0) 2164 return SDValue(); 2165 } 2166 2167 lsb = SMIdx; 2168 NewOperand = FirstOperand; 2169 } 2170 2171 msb = lsb + SMLen - 1; 2172 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand, 2173 DAG.getConstant(msb, DL, GRLenVT), 2174 DAG.getConstant(lsb, DL, GRLenVT)); 2175 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0) 2176 return NR0; 2177 // Try to optimize to 2178 // bstrpick $Rd, $Rs, msb, lsb 2179 // slli $Rd, $Rd, lsb 2180 return DAG.getNode(ISD::SHL, DL, ValTy, NR0, 2181 DAG.getConstant(lsb, DL, GRLenVT)); 2182 } 2183 2184 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 2185 TargetLowering::DAGCombinerInfo &DCI, 2186 const LoongArchSubtarget &Subtarget) { 2187 if (DCI.isBeforeLegalizeOps()) 2188 return SDValue(); 2189 2190 // $dst = srl (and $src, Mask), Shamt 2191 // => 2192 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt 2193 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1 2194 // 2195 2196 SDValue FirstOperand = N->getOperand(0); 2197 ConstantSDNode *CN; 2198 EVT ValTy = N->getValueType(0); 2199 SDLoc DL(N); 2200 MVT GRLenVT = Subtarget.getGRLenVT(); 2201 unsigned MaskIdx, MaskLen; 2202 uint64_t Shamt; 2203 2204 // The first operand must be an AND and the second operand of the AND must be 2205 // a shifted mask. 2206 if (FirstOperand.getOpcode() != ISD::AND || 2207 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) || 2208 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen)) 2209 return SDValue(); 2210 2211 // The second operand (shift amount) must be an immediate. 2212 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) 2213 return SDValue(); 2214 2215 Shamt = CN->getZExtValue(); 2216 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1) 2217 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, 2218 FirstOperand->getOperand(0), 2219 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 2220 DAG.getConstant(Shamt, DL, GRLenVT)); 2221 2222 return SDValue(); 2223 } 2224 2225 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 2226 TargetLowering::DAGCombinerInfo &DCI, 2227 const LoongArchSubtarget &Subtarget) { 2228 MVT GRLenVT = Subtarget.getGRLenVT(); 2229 EVT ValTy = N->getValueType(0); 2230 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2231 ConstantSDNode *CN0, *CN1; 2232 SDLoc DL(N); 2233 unsigned ValBits = ValTy.getSizeInBits(); 2234 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; 2235 unsigned Shamt; 2236 bool SwapAndRetried = false; 2237 2238 if (DCI.isBeforeLegalizeOps()) 2239 return SDValue(); 2240 2241 if (ValBits != 32 && ValBits != 64) 2242 return SDValue(); 2243 2244 Retry: 2245 // 1st pattern to match BSTRINS: 2246 // R = or (and X, mask0), (and (shl Y, lsb), mask1) 2247 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 2248 // => 2249 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) 2250 if (N0.getOpcode() == ISD::AND && 2251 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 2252 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 2253 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL && 2254 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 2255 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && 2256 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && 2257 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 2258 (Shamt = CN1->getZExtValue()) == MaskIdx0 && 2259 (MaskIdx0 + MaskLen0 <= ValBits)) { 2260 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n"); 2261 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 2262 N1.getOperand(0).getOperand(0), 2263 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 2264 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 2265 } 2266 2267 // 2nd pattern to match BSTRINS: 2268 // R = or (and X, mask0), (shl (and Y, mask1), lsb) 2269 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) 2270 // => 2271 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) 2272 if (N0.getOpcode() == ISD::AND && 2273 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 2274 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 2275 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && 2276 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 2277 (Shamt = CN1->getZExtValue()) == MaskIdx0 && 2278 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 2279 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && 2280 MaskLen0 == MaskLen1 && MaskIdx1 == 0 && 2281 (MaskIdx0 + MaskLen0 <= ValBits)) { 2282 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n"); 2283 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 2284 N1.getOperand(0).getOperand(0), 2285 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 2286 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 2287 } 2288 2289 // 3rd pattern to match BSTRINS: 2290 // R = or (and X, mask0), (and Y, mask1) 2291 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 2292 // => 2293 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb 2294 // where msb = lsb + size - 1 2295 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && 2296 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 2297 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 2298 (MaskIdx0 + MaskLen0 <= 64) && 2299 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) && 2300 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { 2301 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n"); 2302 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 2303 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1, 2304 DAG.getConstant(MaskIdx0, DL, GRLenVT)), 2305 DAG.getConstant(ValBits == 32 2306 ? (MaskIdx0 + (MaskLen0 & 31) - 1) 2307 : (MaskIdx0 + MaskLen0 - 1), 2308 DL, GRLenVT), 2309 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 2310 } 2311 2312 // 4th pattern to match BSTRINS: 2313 // R = or (and X, mask), (shl Y, shamt) 2314 // where mask = (2**shamt - 1) 2315 // => 2316 // R = BSTRINS X, Y, ValBits - 1, shamt 2317 // where ValBits = 32 or 64 2318 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && 2319 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 2320 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) && 2321 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 2322 (Shamt = CN1->getZExtValue()) == MaskLen0 && 2323 (MaskIdx0 + MaskLen0 <= ValBits)) { 2324 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n"); 2325 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 2326 N1.getOperand(0), 2327 DAG.getConstant((ValBits - 1), DL, GRLenVT), 2328 DAG.getConstant(Shamt, DL, GRLenVT)); 2329 } 2330 2331 // 5th pattern to match BSTRINS: 2332 // R = or (and X, mask), const 2333 // where ~mask = (2**size - 1) << lsb, mask & const = 0 2334 // => 2335 // R = BSTRINS X, (const >> lsb), msb, lsb 2336 // where msb = lsb + size - 1 2337 if (N0.getOpcode() == ISD::AND && 2338 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 2339 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 2340 (CN1 = dyn_cast<ConstantSDNode>(N1)) && 2341 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { 2342 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n"); 2343 return DAG.getNode( 2344 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 2345 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), 2346 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) 2347 : (MaskIdx0 + MaskLen0 - 1), 2348 DL, GRLenVT), 2349 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 2350 } 2351 2352 // 6th pattern. 2353 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten 2354 // by the incoming bits are known to be zero. 2355 // => 2356 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt 2357 // 2358 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th 2359 // pattern is more common than the 1st. So we put the 1st before the 6th in 2360 // order to match as many nodes as possible. 2361 ConstantSDNode *CNMask, *CNShamt; 2362 unsigned MaskIdx, MaskLen; 2363 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && 2364 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 2365 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && 2366 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 2367 CNShamt->getZExtValue() + MaskLen <= ValBits) { 2368 Shamt = CNShamt->getZExtValue(); 2369 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); 2370 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 2371 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n"); 2372 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 2373 N1.getOperand(0).getOperand(0), 2374 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT), 2375 DAG.getConstant(Shamt, DL, GRLenVT)); 2376 } 2377 } 2378 2379 // 7th pattern. 2380 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be 2381 // overwritten by the incoming bits are known to be zero. 2382 // => 2383 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx 2384 // 2385 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd 2386 // before the 7th in order to match as many nodes as possible. 2387 if (N1.getOpcode() == ISD::AND && 2388 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 2389 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && 2390 N1.getOperand(0).getOpcode() == ISD::SHL && 2391 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 2392 CNShamt->getZExtValue() == MaskIdx) { 2393 APInt ShMask(ValBits, CNMask->getZExtValue()); 2394 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 2395 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n"); 2396 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 2397 N1.getOperand(0).getOperand(0), 2398 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 2399 DAG.getConstant(MaskIdx, DL, GRLenVT)); 2400 } 2401 } 2402 2403 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. 2404 if (!SwapAndRetried) { 2405 std::swap(N0, N1); 2406 SwapAndRetried = true; 2407 goto Retry; 2408 } 2409 2410 SwapAndRetried = false; 2411 Retry2: 2412 // 8th pattern. 2413 // a = b | (c & shifted_mask), where all positions in b to be overwritten by 2414 // the incoming bits are known to be zero. 2415 // => 2416 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx 2417 // 2418 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So 2419 // we put it here in order to match as many nodes as possible or generate less 2420 // instructions. 2421 if (N1.getOpcode() == ISD::AND && 2422 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 2423 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) { 2424 APInt ShMask(ValBits, CNMask->getZExtValue()); 2425 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 2426 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n"); 2427 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 2428 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), 2429 N1->getOperand(0), 2430 DAG.getConstant(MaskIdx, DL, GRLenVT)), 2431 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 2432 DAG.getConstant(MaskIdx, DL, GRLenVT)); 2433 } 2434 } 2435 // Swap N0/N1 and retry. 2436 if (!SwapAndRetried) { 2437 std::swap(N0, N1); 2438 SwapAndRetried = true; 2439 goto Retry2; 2440 } 2441 2442 return SDValue(); 2443 } 2444 2445 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. 2446 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, 2447 TargetLowering::DAGCombinerInfo &DCI, 2448 const LoongArchSubtarget &Subtarget) { 2449 if (DCI.isBeforeLegalizeOps()) 2450 return SDValue(); 2451 2452 SDValue Src = N->getOperand(0); 2453 if (Src.getOpcode() != LoongArchISD::REVB_2W) 2454 return SDValue(); 2455 2456 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0), 2457 Src.getOperand(0)); 2458 } 2459 2460 template <unsigned N> 2461 static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, 2462 SelectionDAG &DAG, 2463 const LoongArchSubtarget &Subtarget, 2464 bool IsSigned = false) { 2465 SDLoc DL(Node); 2466 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp)); 2467 // Check the ImmArg. 2468 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || 2469 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { 2470 DAG.getContext()->emitError(Node->getOperationName(0) + 2471 ": argument out of range."); 2472 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT()); 2473 } 2474 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT()); 2475 } 2476 2477 template <unsigned N> 2478 static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, 2479 SelectionDAG &DAG, bool IsSigned = false) { 2480 SDLoc DL(Node); 2481 EVT ResTy = Node->getValueType(0); 2482 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp)); 2483 2484 // Check the ImmArg. 2485 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || 2486 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { 2487 DAG.getContext()->emitError(Node->getOperationName(0) + 2488 ": argument out of range."); 2489 return DAG.getNode(ISD::UNDEF, DL, ResTy); 2490 } 2491 return DAG.getConstant( 2492 APInt(ResTy.getScalarType().getSizeInBits(), 2493 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), 2494 DL, ResTy); 2495 } 2496 2497 static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { 2498 SDLoc DL(Node); 2499 EVT ResTy = Node->getValueType(0); 2500 SDValue Vec = Node->getOperand(2); 2501 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy); 2502 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask); 2503 } 2504 2505 static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { 2506 SDLoc DL(Node); 2507 EVT ResTy = Node->getValueType(0); 2508 SDValue One = DAG.getConstant(1, DL, ResTy); 2509 SDValue Bit = 2510 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG)); 2511 2512 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), 2513 DAG.getNOT(DL, Bit, ResTy)); 2514 } 2515 2516 template <unsigned N> 2517 static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { 2518 SDLoc DL(Node); 2519 EVT ResTy = Node->getValueType(0); 2520 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2)); 2521 // Check the unsigned ImmArg. 2522 if (!isUInt<N>(CImm->getZExtValue())) { 2523 DAG.getContext()->emitError(Node->getOperationName(0) + 2524 ": argument out of range."); 2525 return DAG.getNode(ISD::UNDEF, DL, ResTy); 2526 } 2527 2528 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); 2529 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy); 2530 2531 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask); 2532 } 2533 2534 template <unsigned N> 2535 static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { 2536 SDLoc DL(Node); 2537 EVT ResTy = Node->getValueType(0); 2538 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2)); 2539 // Check the unsigned ImmArg. 2540 if (!isUInt<N>(CImm->getZExtValue())) { 2541 DAG.getContext()->emitError(Node->getOperationName(0) + 2542 ": argument out of range."); 2543 return DAG.getNode(ISD::UNDEF, DL, ResTy); 2544 } 2545 2546 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); 2547 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); 2548 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm); 2549 } 2550 2551 template <unsigned N> 2552 static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { 2553 SDLoc DL(Node); 2554 EVT ResTy = Node->getValueType(0); 2555 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2)); 2556 // Check the unsigned ImmArg. 2557 if (!isUInt<N>(CImm->getZExtValue())) { 2558 DAG.getContext()->emitError(Node->getOperationName(0) + 2559 ": argument out of range."); 2560 return DAG.getNode(ISD::UNDEF, DL, ResTy); 2561 } 2562 2563 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); 2564 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); 2565 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm); 2566 } 2567 2568 static SDValue 2569 performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, 2570 TargetLowering::DAGCombinerInfo &DCI, 2571 const LoongArchSubtarget &Subtarget) { 2572 SDLoc DL(N); 2573 switch (N->getConstantOperandVal(0)) { 2574 default: 2575 break; 2576 case Intrinsic::loongarch_lsx_vadd_b: 2577 case Intrinsic::loongarch_lsx_vadd_h: 2578 case Intrinsic::loongarch_lsx_vadd_w: 2579 case Intrinsic::loongarch_lsx_vadd_d: 2580 case Intrinsic::loongarch_lasx_xvadd_b: 2581 case Intrinsic::loongarch_lasx_xvadd_h: 2582 case Intrinsic::loongarch_lasx_xvadd_w: 2583 case Intrinsic::loongarch_lasx_xvadd_d: 2584 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), 2585 N->getOperand(2)); 2586 case Intrinsic::loongarch_lsx_vaddi_bu: 2587 case Intrinsic::loongarch_lsx_vaddi_hu: 2588 case Intrinsic::loongarch_lsx_vaddi_wu: 2589 case Intrinsic::loongarch_lsx_vaddi_du: 2590 case Intrinsic::loongarch_lasx_xvaddi_bu: 2591 case Intrinsic::loongarch_lasx_xvaddi_hu: 2592 case Intrinsic::loongarch_lasx_xvaddi_wu: 2593 case Intrinsic::loongarch_lasx_xvaddi_du: 2594 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), 2595 lowerVectorSplatImm<5>(N, 2, DAG)); 2596 case Intrinsic::loongarch_lsx_vsub_b: 2597 case Intrinsic::loongarch_lsx_vsub_h: 2598 case Intrinsic::loongarch_lsx_vsub_w: 2599 case Intrinsic::loongarch_lsx_vsub_d: 2600 case Intrinsic::loongarch_lasx_xvsub_b: 2601 case Intrinsic::loongarch_lasx_xvsub_h: 2602 case Intrinsic::loongarch_lasx_xvsub_w: 2603 case Intrinsic::loongarch_lasx_xvsub_d: 2604 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), 2605 N->getOperand(2)); 2606 case Intrinsic::loongarch_lsx_vsubi_bu: 2607 case Intrinsic::loongarch_lsx_vsubi_hu: 2608 case Intrinsic::loongarch_lsx_vsubi_wu: 2609 case Intrinsic::loongarch_lsx_vsubi_du: 2610 case Intrinsic::loongarch_lasx_xvsubi_bu: 2611 case Intrinsic::loongarch_lasx_xvsubi_hu: 2612 case Intrinsic::loongarch_lasx_xvsubi_wu: 2613 case Intrinsic::loongarch_lasx_xvsubi_du: 2614 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), 2615 lowerVectorSplatImm<5>(N, 2, DAG)); 2616 case Intrinsic::loongarch_lsx_vneg_b: 2617 case Intrinsic::loongarch_lsx_vneg_h: 2618 case Intrinsic::loongarch_lsx_vneg_w: 2619 case Intrinsic::loongarch_lsx_vneg_d: 2620 case Intrinsic::loongarch_lasx_xvneg_b: 2621 case Intrinsic::loongarch_lasx_xvneg_h: 2622 case Intrinsic::loongarch_lasx_xvneg_w: 2623 case Intrinsic::loongarch_lasx_xvneg_d: 2624 return DAG.getNode( 2625 ISD::SUB, DL, N->getValueType(0), 2626 DAG.getConstant( 2627 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0, 2628 /*isSigned=*/true), 2629 SDLoc(N), N->getValueType(0)), 2630 N->getOperand(1)); 2631 case Intrinsic::loongarch_lsx_vmax_b: 2632 case Intrinsic::loongarch_lsx_vmax_h: 2633 case Intrinsic::loongarch_lsx_vmax_w: 2634 case Intrinsic::loongarch_lsx_vmax_d: 2635 case Intrinsic::loongarch_lasx_xvmax_b: 2636 case Intrinsic::loongarch_lasx_xvmax_h: 2637 case Intrinsic::loongarch_lasx_xvmax_w: 2638 case Intrinsic::loongarch_lasx_xvmax_d: 2639 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), 2640 N->getOperand(2)); 2641 case Intrinsic::loongarch_lsx_vmax_bu: 2642 case Intrinsic::loongarch_lsx_vmax_hu: 2643 case Intrinsic::loongarch_lsx_vmax_wu: 2644 case Intrinsic::loongarch_lsx_vmax_du: 2645 case Intrinsic::loongarch_lasx_xvmax_bu: 2646 case Intrinsic::loongarch_lasx_xvmax_hu: 2647 case Intrinsic::loongarch_lasx_xvmax_wu: 2648 case Intrinsic::loongarch_lasx_xvmax_du: 2649 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), 2650 N->getOperand(2)); 2651 case Intrinsic::loongarch_lsx_vmaxi_b: 2652 case Intrinsic::loongarch_lsx_vmaxi_h: 2653 case Intrinsic::loongarch_lsx_vmaxi_w: 2654 case Intrinsic::loongarch_lsx_vmaxi_d: 2655 case Intrinsic::loongarch_lasx_xvmaxi_b: 2656 case Intrinsic::loongarch_lasx_xvmaxi_h: 2657 case Intrinsic::loongarch_lasx_xvmaxi_w: 2658 case Intrinsic::loongarch_lasx_xvmaxi_d: 2659 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), 2660 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); 2661 case Intrinsic::loongarch_lsx_vmaxi_bu: 2662 case Intrinsic::loongarch_lsx_vmaxi_hu: 2663 case Intrinsic::loongarch_lsx_vmaxi_wu: 2664 case Intrinsic::loongarch_lsx_vmaxi_du: 2665 case Intrinsic::loongarch_lasx_xvmaxi_bu: 2666 case Intrinsic::loongarch_lasx_xvmaxi_hu: 2667 case Intrinsic::loongarch_lasx_xvmaxi_wu: 2668 case Intrinsic::loongarch_lasx_xvmaxi_du: 2669 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), 2670 lowerVectorSplatImm<5>(N, 2, DAG)); 2671 case Intrinsic::loongarch_lsx_vmin_b: 2672 case Intrinsic::loongarch_lsx_vmin_h: 2673 case Intrinsic::loongarch_lsx_vmin_w: 2674 case Intrinsic::loongarch_lsx_vmin_d: 2675 case Intrinsic::loongarch_lasx_xvmin_b: 2676 case Intrinsic::loongarch_lasx_xvmin_h: 2677 case Intrinsic::loongarch_lasx_xvmin_w: 2678 case Intrinsic::loongarch_lasx_xvmin_d: 2679 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), 2680 N->getOperand(2)); 2681 case Intrinsic::loongarch_lsx_vmin_bu: 2682 case Intrinsic::loongarch_lsx_vmin_hu: 2683 case Intrinsic::loongarch_lsx_vmin_wu: 2684 case Intrinsic::loongarch_lsx_vmin_du: 2685 case Intrinsic::loongarch_lasx_xvmin_bu: 2686 case Intrinsic::loongarch_lasx_xvmin_hu: 2687 case Intrinsic::loongarch_lasx_xvmin_wu: 2688 case Intrinsic::loongarch_lasx_xvmin_du: 2689 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), 2690 N->getOperand(2)); 2691 case Intrinsic::loongarch_lsx_vmini_b: 2692 case Intrinsic::loongarch_lsx_vmini_h: 2693 case Intrinsic::loongarch_lsx_vmini_w: 2694 case Intrinsic::loongarch_lsx_vmini_d: 2695 case Intrinsic::loongarch_lasx_xvmini_b: 2696 case Intrinsic::loongarch_lasx_xvmini_h: 2697 case Intrinsic::loongarch_lasx_xvmini_w: 2698 case Intrinsic::loongarch_lasx_xvmini_d: 2699 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), 2700 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); 2701 case Intrinsic::loongarch_lsx_vmini_bu: 2702 case Intrinsic::loongarch_lsx_vmini_hu: 2703 case Intrinsic::loongarch_lsx_vmini_wu: 2704 case Intrinsic::loongarch_lsx_vmini_du: 2705 case Intrinsic::loongarch_lasx_xvmini_bu: 2706 case Intrinsic::loongarch_lasx_xvmini_hu: 2707 case Intrinsic::loongarch_lasx_xvmini_wu: 2708 case Intrinsic::loongarch_lasx_xvmini_du: 2709 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), 2710 lowerVectorSplatImm<5>(N, 2, DAG)); 2711 case Intrinsic::loongarch_lsx_vmul_b: 2712 case Intrinsic::loongarch_lsx_vmul_h: 2713 case Intrinsic::loongarch_lsx_vmul_w: 2714 case Intrinsic::loongarch_lsx_vmul_d: 2715 case Intrinsic::loongarch_lasx_xvmul_b: 2716 case Intrinsic::loongarch_lasx_xvmul_h: 2717 case Intrinsic::loongarch_lasx_xvmul_w: 2718 case Intrinsic::loongarch_lasx_xvmul_d: 2719 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), 2720 N->getOperand(2)); 2721 case Intrinsic::loongarch_lsx_vmadd_b: 2722 case Intrinsic::loongarch_lsx_vmadd_h: 2723 case Intrinsic::loongarch_lsx_vmadd_w: 2724 case Intrinsic::loongarch_lsx_vmadd_d: 2725 case Intrinsic::loongarch_lasx_xvmadd_b: 2726 case Intrinsic::loongarch_lasx_xvmadd_h: 2727 case Intrinsic::loongarch_lasx_xvmadd_w: 2728 case Intrinsic::loongarch_lasx_xvmadd_d: { 2729 EVT ResTy = N->getValueType(0); 2730 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), 2731 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), 2732 N->getOperand(3))); 2733 } 2734 case Intrinsic::loongarch_lsx_vmsub_b: 2735 case Intrinsic::loongarch_lsx_vmsub_h: 2736 case Intrinsic::loongarch_lsx_vmsub_w: 2737 case Intrinsic::loongarch_lsx_vmsub_d: 2738 case Intrinsic::loongarch_lasx_xvmsub_b: 2739 case Intrinsic::loongarch_lasx_xvmsub_h: 2740 case Intrinsic::loongarch_lasx_xvmsub_w: 2741 case Intrinsic::loongarch_lasx_xvmsub_d: { 2742 EVT ResTy = N->getValueType(0); 2743 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), 2744 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), 2745 N->getOperand(3))); 2746 } 2747 case Intrinsic::loongarch_lsx_vdiv_b: 2748 case Intrinsic::loongarch_lsx_vdiv_h: 2749 case Intrinsic::loongarch_lsx_vdiv_w: 2750 case Intrinsic::loongarch_lsx_vdiv_d: 2751 case Intrinsic::loongarch_lasx_xvdiv_b: 2752 case Intrinsic::loongarch_lasx_xvdiv_h: 2753 case Intrinsic::loongarch_lasx_xvdiv_w: 2754 case Intrinsic::loongarch_lasx_xvdiv_d: 2755 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), 2756 N->getOperand(2)); 2757 case Intrinsic::loongarch_lsx_vdiv_bu: 2758 case Intrinsic::loongarch_lsx_vdiv_hu: 2759 case Intrinsic::loongarch_lsx_vdiv_wu: 2760 case Intrinsic::loongarch_lsx_vdiv_du: 2761 case Intrinsic::loongarch_lasx_xvdiv_bu: 2762 case Intrinsic::loongarch_lasx_xvdiv_hu: 2763 case Intrinsic::loongarch_lasx_xvdiv_wu: 2764 case Intrinsic::loongarch_lasx_xvdiv_du: 2765 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), 2766 N->getOperand(2)); 2767 case Intrinsic::loongarch_lsx_vmod_b: 2768 case Intrinsic::loongarch_lsx_vmod_h: 2769 case Intrinsic::loongarch_lsx_vmod_w: 2770 case Intrinsic::loongarch_lsx_vmod_d: 2771 case Intrinsic::loongarch_lasx_xvmod_b: 2772 case Intrinsic::loongarch_lasx_xvmod_h: 2773 case Intrinsic::loongarch_lasx_xvmod_w: 2774 case Intrinsic::loongarch_lasx_xvmod_d: 2775 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), 2776 N->getOperand(2)); 2777 case Intrinsic::loongarch_lsx_vmod_bu: 2778 case Intrinsic::loongarch_lsx_vmod_hu: 2779 case Intrinsic::loongarch_lsx_vmod_wu: 2780 case Intrinsic::loongarch_lsx_vmod_du: 2781 case Intrinsic::loongarch_lasx_xvmod_bu: 2782 case Intrinsic::loongarch_lasx_xvmod_hu: 2783 case Intrinsic::loongarch_lasx_xvmod_wu: 2784 case Intrinsic::loongarch_lasx_xvmod_du: 2785 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), 2786 N->getOperand(2)); 2787 case Intrinsic::loongarch_lsx_vand_v: 2788 case Intrinsic::loongarch_lasx_xvand_v: 2789 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), 2790 N->getOperand(2)); 2791 case Intrinsic::loongarch_lsx_vor_v: 2792 case Intrinsic::loongarch_lasx_xvor_v: 2793 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), 2794 N->getOperand(2)); 2795 case Intrinsic::loongarch_lsx_vxor_v: 2796 case Intrinsic::loongarch_lasx_xvxor_v: 2797 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), 2798 N->getOperand(2)); 2799 case Intrinsic::loongarch_lsx_vnor_v: 2800 case Intrinsic::loongarch_lasx_xvnor_v: { 2801 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), 2802 N->getOperand(2)); 2803 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2804 } 2805 case Intrinsic::loongarch_lsx_vandi_b: 2806 case Intrinsic::loongarch_lasx_xvandi_b: 2807 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), 2808 lowerVectorSplatImm<8>(N, 2, DAG)); 2809 case Intrinsic::loongarch_lsx_vori_b: 2810 case Intrinsic::loongarch_lasx_xvori_b: 2811 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), 2812 lowerVectorSplatImm<8>(N, 2, DAG)); 2813 case Intrinsic::loongarch_lsx_vxori_b: 2814 case Intrinsic::loongarch_lasx_xvxori_b: 2815 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), 2816 lowerVectorSplatImm<8>(N, 2, DAG)); 2817 case Intrinsic::loongarch_lsx_vsll_b: 2818 case Intrinsic::loongarch_lsx_vsll_h: 2819 case Intrinsic::loongarch_lsx_vsll_w: 2820 case Intrinsic::loongarch_lsx_vsll_d: 2821 case Intrinsic::loongarch_lasx_xvsll_b: 2822 case Intrinsic::loongarch_lasx_xvsll_h: 2823 case Intrinsic::loongarch_lasx_xvsll_w: 2824 case Intrinsic::loongarch_lasx_xvsll_d: 2825 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 2826 truncateVecElts(N, DAG)); 2827 case Intrinsic::loongarch_lsx_vslli_b: 2828 case Intrinsic::loongarch_lasx_xvslli_b: 2829 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 2830 lowerVectorSplatImm<3>(N, 2, DAG)); 2831 case Intrinsic::loongarch_lsx_vslli_h: 2832 case Intrinsic::loongarch_lasx_xvslli_h: 2833 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 2834 lowerVectorSplatImm<4>(N, 2, DAG)); 2835 case Intrinsic::loongarch_lsx_vslli_w: 2836 case Intrinsic::loongarch_lasx_xvslli_w: 2837 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 2838 lowerVectorSplatImm<5>(N, 2, DAG)); 2839 case Intrinsic::loongarch_lsx_vslli_d: 2840 case Intrinsic::loongarch_lasx_xvslli_d: 2841 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 2842 lowerVectorSplatImm<6>(N, 2, DAG)); 2843 case Intrinsic::loongarch_lsx_vsrl_b: 2844 case Intrinsic::loongarch_lsx_vsrl_h: 2845 case Intrinsic::loongarch_lsx_vsrl_w: 2846 case Intrinsic::loongarch_lsx_vsrl_d: 2847 case Intrinsic::loongarch_lasx_xvsrl_b: 2848 case Intrinsic::loongarch_lasx_xvsrl_h: 2849 case Intrinsic::loongarch_lasx_xvsrl_w: 2850 case Intrinsic::loongarch_lasx_xvsrl_d: 2851 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 2852 truncateVecElts(N, DAG)); 2853 case Intrinsic::loongarch_lsx_vsrli_b: 2854 case Intrinsic::loongarch_lasx_xvsrli_b: 2855 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 2856 lowerVectorSplatImm<3>(N, 2, DAG)); 2857 case Intrinsic::loongarch_lsx_vsrli_h: 2858 case Intrinsic::loongarch_lasx_xvsrli_h: 2859 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 2860 lowerVectorSplatImm<4>(N, 2, DAG)); 2861 case Intrinsic::loongarch_lsx_vsrli_w: 2862 case Intrinsic::loongarch_lasx_xvsrli_w: 2863 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 2864 lowerVectorSplatImm<5>(N, 2, DAG)); 2865 case Intrinsic::loongarch_lsx_vsrli_d: 2866 case Intrinsic::loongarch_lasx_xvsrli_d: 2867 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 2868 lowerVectorSplatImm<6>(N, 2, DAG)); 2869 case Intrinsic::loongarch_lsx_vsra_b: 2870 case Intrinsic::loongarch_lsx_vsra_h: 2871 case Intrinsic::loongarch_lsx_vsra_w: 2872 case Intrinsic::loongarch_lsx_vsra_d: 2873 case Intrinsic::loongarch_lasx_xvsra_b: 2874 case Intrinsic::loongarch_lasx_xvsra_h: 2875 case Intrinsic::loongarch_lasx_xvsra_w: 2876 case Intrinsic::loongarch_lasx_xvsra_d: 2877 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 2878 truncateVecElts(N, DAG)); 2879 case Intrinsic::loongarch_lsx_vsrai_b: 2880 case Intrinsic::loongarch_lasx_xvsrai_b: 2881 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 2882 lowerVectorSplatImm<3>(N, 2, DAG)); 2883 case Intrinsic::loongarch_lsx_vsrai_h: 2884 case Intrinsic::loongarch_lasx_xvsrai_h: 2885 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 2886 lowerVectorSplatImm<4>(N, 2, DAG)); 2887 case Intrinsic::loongarch_lsx_vsrai_w: 2888 case Intrinsic::loongarch_lasx_xvsrai_w: 2889 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 2890 lowerVectorSplatImm<5>(N, 2, DAG)); 2891 case Intrinsic::loongarch_lsx_vsrai_d: 2892 case Intrinsic::loongarch_lasx_xvsrai_d: 2893 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 2894 lowerVectorSplatImm<6>(N, 2, DAG)); 2895 case Intrinsic::loongarch_lsx_vclz_b: 2896 case Intrinsic::loongarch_lsx_vclz_h: 2897 case Intrinsic::loongarch_lsx_vclz_w: 2898 case Intrinsic::loongarch_lsx_vclz_d: 2899 case Intrinsic::loongarch_lasx_xvclz_b: 2900 case Intrinsic::loongarch_lasx_xvclz_h: 2901 case Intrinsic::loongarch_lasx_xvclz_w: 2902 case Intrinsic::loongarch_lasx_xvclz_d: 2903 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1)); 2904 case Intrinsic::loongarch_lsx_vpcnt_b: 2905 case Intrinsic::loongarch_lsx_vpcnt_h: 2906 case Intrinsic::loongarch_lsx_vpcnt_w: 2907 case Intrinsic::loongarch_lsx_vpcnt_d: 2908 case Intrinsic::loongarch_lasx_xvpcnt_b: 2909 case Intrinsic::loongarch_lasx_xvpcnt_h: 2910 case Intrinsic::loongarch_lasx_xvpcnt_w: 2911 case Intrinsic::loongarch_lasx_xvpcnt_d: 2912 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); 2913 case Intrinsic::loongarch_lsx_vbitclr_b: 2914 case Intrinsic::loongarch_lsx_vbitclr_h: 2915 case Intrinsic::loongarch_lsx_vbitclr_w: 2916 case Intrinsic::loongarch_lsx_vbitclr_d: 2917 case Intrinsic::loongarch_lasx_xvbitclr_b: 2918 case Intrinsic::loongarch_lasx_xvbitclr_h: 2919 case Intrinsic::loongarch_lasx_xvbitclr_w: 2920 case Intrinsic::loongarch_lasx_xvbitclr_d: 2921 return lowerVectorBitClear(N, DAG); 2922 case Intrinsic::loongarch_lsx_vbitclri_b: 2923 case Intrinsic::loongarch_lasx_xvbitclri_b: 2924 return lowerVectorBitClearImm<3>(N, DAG); 2925 case Intrinsic::loongarch_lsx_vbitclri_h: 2926 case Intrinsic::loongarch_lasx_xvbitclri_h: 2927 return lowerVectorBitClearImm<4>(N, DAG); 2928 case Intrinsic::loongarch_lsx_vbitclri_w: 2929 case Intrinsic::loongarch_lasx_xvbitclri_w: 2930 return lowerVectorBitClearImm<5>(N, DAG); 2931 case Intrinsic::loongarch_lsx_vbitclri_d: 2932 case Intrinsic::loongarch_lasx_xvbitclri_d: 2933 return lowerVectorBitClearImm<6>(N, DAG); 2934 case Intrinsic::loongarch_lsx_vbitset_b: 2935 case Intrinsic::loongarch_lsx_vbitset_h: 2936 case Intrinsic::loongarch_lsx_vbitset_w: 2937 case Intrinsic::loongarch_lsx_vbitset_d: 2938 case Intrinsic::loongarch_lasx_xvbitset_b: 2939 case Intrinsic::loongarch_lasx_xvbitset_h: 2940 case Intrinsic::loongarch_lasx_xvbitset_w: 2941 case Intrinsic::loongarch_lasx_xvbitset_d: { 2942 EVT VecTy = N->getValueType(0); 2943 SDValue One = DAG.getConstant(1, DL, VecTy); 2944 return DAG.getNode( 2945 ISD::OR, DL, VecTy, N->getOperand(1), 2946 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); 2947 } 2948 case Intrinsic::loongarch_lsx_vbitseti_b: 2949 case Intrinsic::loongarch_lasx_xvbitseti_b: 2950 return lowerVectorBitSetImm<3>(N, DAG); 2951 case Intrinsic::loongarch_lsx_vbitseti_h: 2952 case Intrinsic::loongarch_lasx_xvbitseti_h: 2953 return lowerVectorBitSetImm<4>(N, DAG); 2954 case Intrinsic::loongarch_lsx_vbitseti_w: 2955 case Intrinsic::loongarch_lasx_xvbitseti_w: 2956 return lowerVectorBitSetImm<5>(N, DAG); 2957 case Intrinsic::loongarch_lsx_vbitseti_d: 2958 case Intrinsic::loongarch_lasx_xvbitseti_d: 2959 return lowerVectorBitSetImm<6>(N, DAG); 2960 case Intrinsic::loongarch_lsx_vbitrev_b: 2961 case Intrinsic::loongarch_lsx_vbitrev_h: 2962 case Intrinsic::loongarch_lsx_vbitrev_w: 2963 case Intrinsic::loongarch_lsx_vbitrev_d: 2964 case Intrinsic::loongarch_lasx_xvbitrev_b: 2965 case Intrinsic::loongarch_lasx_xvbitrev_h: 2966 case Intrinsic::loongarch_lasx_xvbitrev_w: 2967 case Intrinsic::loongarch_lasx_xvbitrev_d: { 2968 EVT VecTy = N->getValueType(0); 2969 SDValue One = DAG.getConstant(1, DL, VecTy); 2970 return DAG.getNode( 2971 ISD::XOR, DL, VecTy, N->getOperand(1), 2972 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); 2973 } 2974 case Intrinsic::loongarch_lsx_vbitrevi_b: 2975 case Intrinsic::loongarch_lasx_xvbitrevi_b: 2976 return lowerVectorBitRevImm<3>(N, DAG); 2977 case Intrinsic::loongarch_lsx_vbitrevi_h: 2978 case Intrinsic::loongarch_lasx_xvbitrevi_h: 2979 return lowerVectorBitRevImm<4>(N, DAG); 2980 case Intrinsic::loongarch_lsx_vbitrevi_w: 2981 case Intrinsic::loongarch_lasx_xvbitrevi_w: 2982 return lowerVectorBitRevImm<5>(N, DAG); 2983 case Intrinsic::loongarch_lsx_vbitrevi_d: 2984 case Intrinsic::loongarch_lasx_xvbitrevi_d: 2985 return lowerVectorBitRevImm<6>(N, DAG); 2986 case Intrinsic::loongarch_lsx_vfadd_s: 2987 case Intrinsic::loongarch_lsx_vfadd_d: 2988 case Intrinsic::loongarch_lasx_xvfadd_s: 2989 case Intrinsic::loongarch_lasx_xvfadd_d: 2990 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), 2991 N->getOperand(2)); 2992 case Intrinsic::loongarch_lsx_vfsub_s: 2993 case Intrinsic::loongarch_lsx_vfsub_d: 2994 case Intrinsic::loongarch_lasx_xvfsub_s: 2995 case Intrinsic::loongarch_lasx_xvfsub_d: 2996 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), 2997 N->getOperand(2)); 2998 case Intrinsic::loongarch_lsx_vfmul_s: 2999 case Intrinsic::loongarch_lsx_vfmul_d: 3000 case Intrinsic::loongarch_lasx_xvfmul_s: 3001 case Intrinsic::loongarch_lasx_xvfmul_d: 3002 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), 3003 N->getOperand(2)); 3004 case Intrinsic::loongarch_lsx_vfdiv_s: 3005 case Intrinsic::loongarch_lsx_vfdiv_d: 3006 case Intrinsic::loongarch_lasx_xvfdiv_s: 3007 case Intrinsic::loongarch_lasx_xvfdiv_d: 3008 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), 3009 N->getOperand(2)); 3010 case Intrinsic::loongarch_lsx_vfmadd_s: 3011 case Intrinsic::loongarch_lsx_vfmadd_d: 3012 case Intrinsic::loongarch_lasx_xvfmadd_s: 3013 case Intrinsic::loongarch_lasx_xvfmadd_d: 3014 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), 3015 N->getOperand(2), N->getOperand(3)); 3016 case Intrinsic::loongarch_lsx_vinsgr2vr_b: 3017 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 3018 N->getOperand(1), N->getOperand(2), 3019 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); 3020 case Intrinsic::loongarch_lsx_vinsgr2vr_h: 3021 case Intrinsic::loongarch_lasx_xvinsgr2vr_w: 3022 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 3023 N->getOperand(1), N->getOperand(2), 3024 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); 3025 case Intrinsic::loongarch_lsx_vinsgr2vr_w: 3026 case Intrinsic::loongarch_lasx_xvinsgr2vr_d: 3027 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 3028 N->getOperand(1), N->getOperand(2), 3029 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); 3030 case Intrinsic::loongarch_lsx_vinsgr2vr_d: 3031 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 3032 N->getOperand(1), N->getOperand(2), 3033 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget)); 3034 case Intrinsic::loongarch_lsx_vreplgr2vr_b: 3035 case Intrinsic::loongarch_lsx_vreplgr2vr_h: 3036 case Intrinsic::loongarch_lsx_vreplgr2vr_w: 3037 case Intrinsic::loongarch_lsx_vreplgr2vr_d: 3038 case Intrinsic::loongarch_lasx_xvreplgr2vr_b: 3039 case Intrinsic::loongarch_lasx_xvreplgr2vr_h: 3040 case Intrinsic::loongarch_lasx_xvreplgr2vr_w: 3041 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: { 3042 EVT ResTy = N->getValueType(0); 3043 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1)); 3044 return DAG.getBuildVector(ResTy, DL, Ops); 3045 } 3046 case Intrinsic::loongarch_lsx_vreplve_b: 3047 case Intrinsic::loongarch_lsx_vreplve_h: 3048 case Intrinsic::loongarch_lsx_vreplve_w: 3049 case Intrinsic::loongarch_lsx_vreplve_d: 3050 case Intrinsic::loongarch_lasx_xvreplve_b: 3051 case Intrinsic::loongarch_lasx_xvreplve_h: 3052 case Intrinsic::loongarch_lasx_xvreplve_w: 3053 case Intrinsic::loongarch_lasx_xvreplve_d: 3054 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), 3055 N->getOperand(1), 3056 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), 3057 N->getOperand(2))); 3058 } 3059 return SDValue(); 3060 } 3061 3062 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, 3063 DAGCombinerInfo &DCI) const { 3064 SelectionDAG &DAG = DCI.DAG; 3065 switch (N->getOpcode()) { 3066 default: 3067 break; 3068 case ISD::AND: 3069 return performANDCombine(N, DAG, DCI, Subtarget); 3070 case ISD::OR: 3071 return performORCombine(N, DAG, DCI, Subtarget); 3072 case ISD::SRL: 3073 return performSRLCombine(N, DAG, DCI, Subtarget); 3074 case LoongArchISD::BITREV_W: 3075 return performBITREV_WCombine(N, DAG, DCI, Subtarget); 3076 case ISD::INTRINSIC_WO_CHAIN: 3077 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); 3078 } 3079 return SDValue(); 3080 } 3081 3082 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, 3083 MachineBasicBlock *MBB) { 3084 if (!ZeroDivCheck) 3085 return MBB; 3086 3087 // Build instructions: 3088 // MBB: 3089 // div(or mod) $dst, $dividend, $divisor 3090 // bnez $divisor, SinkMBB 3091 // BreakMBB: 3092 // break 7 // BRK_DIVZERO 3093 // SinkMBB: 3094 // fallthrough 3095 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 3096 MachineFunction::iterator It = ++MBB->getIterator(); 3097 MachineFunction *MF = MBB->getParent(); 3098 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3099 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3100 MF->insert(It, BreakMBB); 3101 MF->insert(It, SinkMBB); 3102 3103 // Transfer the remainder of MBB and its successor edges to SinkMBB. 3104 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end()); 3105 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 3106 3107 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); 3108 DebugLoc DL = MI.getDebugLoc(); 3109 MachineOperand &Divisor = MI.getOperand(2); 3110 Register DivisorReg = Divisor.getReg(); 3111 3112 // MBB: 3113 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ)) 3114 .addReg(DivisorReg, getKillRegState(Divisor.isKill())) 3115 .addMBB(SinkMBB); 3116 MBB->addSuccessor(BreakMBB); 3117 MBB->addSuccessor(SinkMBB); 3118 3119 // BreakMBB: 3120 // See linux header file arch/loongarch/include/uapi/asm/break.h for the 3121 // definition of BRK_DIVZERO. 3122 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/); 3123 BreakMBB->addSuccessor(SinkMBB); 3124 3125 // Clear Divisor's kill flag. 3126 Divisor.setIsKill(false); 3127 3128 return SinkMBB; 3129 } 3130 3131 static MachineBasicBlock * 3132 emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, 3133 const LoongArchSubtarget &Subtarget) { 3134 unsigned CondOpc; 3135 switch (MI.getOpcode()) { 3136 default: 3137 llvm_unreachable("Unexpected opcode"); 3138 case LoongArch::PseudoVBZ: 3139 CondOpc = LoongArch::VSETEQZ_V; 3140 break; 3141 case LoongArch::PseudoVBZ_B: 3142 CondOpc = LoongArch::VSETANYEQZ_B; 3143 break; 3144 case LoongArch::PseudoVBZ_H: 3145 CondOpc = LoongArch::VSETANYEQZ_H; 3146 break; 3147 case LoongArch::PseudoVBZ_W: 3148 CondOpc = LoongArch::VSETANYEQZ_W; 3149 break; 3150 case LoongArch::PseudoVBZ_D: 3151 CondOpc = LoongArch::VSETANYEQZ_D; 3152 break; 3153 case LoongArch::PseudoVBNZ: 3154 CondOpc = LoongArch::VSETNEZ_V; 3155 break; 3156 case LoongArch::PseudoVBNZ_B: 3157 CondOpc = LoongArch::VSETALLNEZ_B; 3158 break; 3159 case LoongArch::PseudoVBNZ_H: 3160 CondOpc = LoongArch::VSETALLNEZ_H; 3161 break; 3162 case LoongArch::PseudoVBNZ_W: 3163 CondOpc = LoongArch::VSETALLNEZ_W; 3164 break; 3165 case LoongArch::PseudoVBNZ_D: 3166 CondOpc = LoongArch::VSETALLNEZ_D; 3167 break; 3168 case LoongArch::PseudoXVBZ: 3169 CondOpc = LoongArch::XVSETEQZ_V; 3170 break; 3171 case LoongArch::PseudoXVBZ_B: 3172 CondOpc = LoongArch::XVSETANYEQZ_B; 3173 break; 3174 case LoongArch::PseudoXVBZ_H: 3175 CondOpc = LoongArch::XVSETANYEQZ_H; 3176 break; 3177 case LoongArch::PseudoXVBZ_W: 3178 CondOpc = LoongArch::XVSETANYEQZ_W; 3179 break; 3180 case LoongArch::PseudoXVBZ_D: 3181 CondOpc = LoongArch::XVSETANYEQZ_D; 3182 break; 3183 case LoongArch::PseudoXVBNZ: 3184 CondOpc = LoongArch::XVSETNEZ_V; 3185 break; 3186 case LoongArch::PseudoXVBNZ_B: 3187 CondOpc = LoongArch::XVSETALLNEZ_B; 3188 break; 3189 case LoongArch::PseudoXVBNZ_H: 3190 CondOpc = LoongArch::XVSETALLNEZ_H; 3191 break; 3192 case LoongArch::PseudoXVBNZ_W: 3193 CondOpc = LoongArch::XVSETALLNEZ_W; 3194 break; 3195 case LoongArch::PseudoXVBNZ_D: 3196 CondOpc = LoongArch::XVSETALLNEZ_D; 3197 break; 3198 } 3199 3200 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3201 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3202 DebugLoc DL = MI.getDebugLoc(); 3203 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 3204 MachineFunction::iterator It = ++BB->getIterator(); 3205 3206 MachineFunction *F = BB->getParent(); 3207 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB); 3208 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB); 3209 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB); 3210 3211 F->insert(It, FalseBB); 3212 F->insert(It, TrueBB); 3213 F->insert(It, SinkBB); 3214 3215 // Transfer the remainder of MBB and its successor edges to Sink. 3216 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end()); 3217 SinkBB->transferSuccessorsAndUpdatePHIs(BB); 3218 3219 // Insert the real instruction to BB. 3220 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass); 3221 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg()); 3222 3223 // Insert branch. 3224 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB); 3225 BB->addSuccessor(FalseBB); 3226 BB->addSuccessor(TrueBB); 3227 3228 // FalseBB. 3229 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); 3230 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1) 3231 .addReg(LoongArch::R0) 3232 .addImm(0); 3233 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB); 3234 FalseBB->addSuccessor(SinkBB); 3235 3236 // TrueBB. 3237 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); 3238 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2) 3239 .addReg(LoongArch::R0) 3240 .addImm(1); 3241 TrueBB->addSuccessor(SinkBB); 3242 3243 // SinkBB: merge the results. 3244 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI), 3245 MI.getOperand(0).getReg()) 3246 .addReg(RD1) 3247 .addMBB(FalseBB) 3248 .addReg(RD2) 3249 .addMBB(TrueBB); 3250 3251 // The pseudo instruction is gone now. 3252 MI.eraseFromParent(); 3253 return SinkBB; 3254 } 3255 3256 static MachineBasicBlock * 3257 emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, 3258 const LoongArchSubtarget &Subtarget) { 3259 unsigned InsOp; 3260 unsigned HalfSize; 3261 switch (MI.getOpcode()) { 3262 default: 3263 llvm_unreachable("Unexpected opcode"); 3264 case LoongArch::PseudoXVINSGR2VR_B: 3265 HalfSize = 16; 3266 InsOp = LoongArch::VINSGR2VR_B; 3267 break; 3268 case LoongArch::PseudoXVINSGR2VR_H: 3269 HalfSize = 8; 3270 InsOp = LoongArch::VINSGR2VR_H; 3271 break; 3272 } 3273 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3274 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; 3275 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; 3276 DebugLoc DL = MI.getDebugLoc(); 3277 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 3278 // XDst = vector_insert XSrc, Elt, Idx 3279 Register XDst = MI.getOperand(0).getReg(); 3280 Register XSrc = MI.getOperand(1).getReg(); 3281 Register Elt = MI.getOperand(2).getReg(); 3282 unsigned Idx = MI.getOperand(3).getImm(); 3283 3284 Register ScratchReg1 = XSrc; 3285 if (Idx >= HalfSize) { 3286 ScratchReg1 = MRI.createVirtualRegister(RC); 3287 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1) 3288 .addReg(XSrc) 3289 .addReg(XSrc) 3290 .addImm(1); 3291 } 3292 3293 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC); 3294 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC); 3295 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1) 3296 .addReg(ScratchReg1, 0, LoongArch::sub_128); 3297 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2) 3298 .addReg(ScratchSubReg1) 3299 .addReg(Elt) 3300 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx); 3301 3302 Register ScratchReg2 = XDst; 3303 if (Idx >= HalfSize) 3304 ScratchReg2 = MRI.createVirtualRegister(RC); 3305 3306 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2) 3307 .addImm(0) 3308 .addReg(ScratchSubReg2) 3309 .addImm(LoongArch::sub_128); 3310 3311 if (Idx >= HalfSize) 3312 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst) 3313 .addReg(XSrc) 3314 .addReg(ScratchReg2) 3315 .addImm(2); 3316 3317 MI.eraseFromParent(); 3318 return BB; 3319 } 3320 3321 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( 3322 MachineInstr &MI, MachineBasicBlock *BB) const { 3323 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3324 DebugLoc DL = MI.getDebugLoc(); 3325 3326 switch (MI.getOpcode()) { 3327 default: 3328 llvm_unreachable("Unexpected instr type to insert"); 3329 case LoongArch::DIV_W: 3330 case LoongArch::DIV_WU: 3331 case LoongArch::MOD_W: 3332 case LoongArch::MOD_WU: 3333 case LoongArch::DIV_D: 3334 case LoongArch::DIV_DU: 3335 case LoongArch::MOD_D: 3336 case LoongArch::MOD_DU: 3337 return insertDivByZeroTrap(MI, BB); 3338 break; 3339 case LoongArch::WRFCSR: { 3340 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR), 3341 LoongArch::FCSR0 + MI.getOperand(0).getImm()) 3342 .addReg(MI.getOperand(1).getReg()); 3343 MI.eraseFromParent(); 3344 return BB; 3345 } 3346 case LoongArch::RDFCSR: { 3347 MachineInstr *ReadFCSR = 3348 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR), 3349 MI.getOperand(0).getReg()) 3350 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm()); 3351 ReadFCSR->getOperand(1).setIsUndef(); 3352 MI.eraseFromParent(); 3353 return BB; 3354 } 3355 case LoongArch::PseudoVBZ: 3356 case LoongArch::PseudoVBZ_B: 3357 case LoongArch::PseudoVBZ_H: 3358 case LoongArch::PseudoVBZ_W: 3359 case LoongArch::PseudoVBZ_D: 3360 case LoongArch::PseudoVBNZ: 3361 case LoongArch::PseudoVBNZ_B: 3362 case LoongArch::PseudoVBNZ_H: 3363 case LoongArch::PseudoVBNZ_W: 3364 case LoongArch::PseudoVBNZ_D: 3365 case LoongArch::PseudoXVBZ: 3366 case LoongArch::PseudoXVBZ_B: 3367 case LoongArch::PseudoXVBZ_H: 3368 case LoongArch::PseudoXVBZ_W: 3369 case LoongArch::PseudoXVBZ_D: 3370 case LoongArch::PseudoXVBNZ: 3371 case LoongArch::PseudoXVBNZ_B: 3372 case LoongArch::PseudoXVBNZ_H: 3373 case LoongArch::PseudoXVBNZ_W: 3374 case LoongArch::PseudoXVBNZ_D: 3375 return emitVecCondBranchPseudo(MI, BB, Subtarget); 3376 case LoongArch::PseudoXVINSGR2VR_B: 3377 case LoongArch::PseudoXVINSGR2VR_H: 3378 return emitPseudoXVINSGR2VR(MI, BB, Subtarget); 3379 } 3380 } 3381 3382 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( 3383 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 3384 unsigned *Fast) const { 3385 if (!Subtarget.hasUAL()) 3386 return false; 3387 3388 // TODO: set reasonable speed number. 3389 if (Fast) 3390 *Fast = 1; 3391 return true; 3392 } 3393 3394 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { 3395 switch ((LoongArchISD::NodeType)Opcode) { 3396 case LoongArchISD::FIRST_NUMBER: 3397 break; 3398 3399 #define NODE_NAME_CASE(node) \ 3400 case LoongArchISD::node: \ 3401 return "LoongArchISD::" #node; 3402 3403 // TODO: Add more target-dependent nodes later. 3404 NODE_NAME_CASE(CALL) 3405 NODE_NAME_CASE(CALL_MEDIUM) 3406 NODE_NAME_CASE(CALL_LARGE) 3407 NODE_NAME_CASE(RET) 3408 NODE_NAME_CASE(TAIL) 3409 NODE_NAME_CASE(TAIL_MEDIUM) 3410 NODE_NAME_CASE(TAIL_LARGE) 3411 NODE_NAME_CASE(SLL_W) 3412 NODE_NAME_CASE(SRA_W) 3413 NODE_NAME_CASE(SRL_W) 3414 NODE_NAME_CASE(BSTRINS) 3415 NODE_NAME_CASE(BSTRPICK) 3416 NODE_NAME_CASE(MOVGR2FR_W_LA64) 3417 NODE_NAME_CASE(MOVFR2GR_S_LA64) 3418 NODE_NAME_CASE(FTINT) 3419 NODE_NAME_CASE(REVB_2H) 3420 NODE_NAME_CASE(REVB_2W) 3421 NODE_NAME_CASE(BITREV_4B) 3422 NODE_NAME_CASE(BITREV_W) 3423 NODE_NAME_CASE(ROTR_W) 3424 NODE_NAME_CASE(ROTL_W) 3425 NODE_NAME_CASE(CLZ_W) 3426 NODE_NAME_CASE(CTZ_W) 3427 NODE_NAME_CASE(DBAR) 3428 NODE_NAME_CASE(IBAR) 3429 NODE_NAME_CASE(BREAK) 3430 NODE_NAME_CASE(SYSCALL) 3431 NODE_NAME_CASE(CRC_W_B_W) 3432 NODE_NAME_CASE(CRC_W_H_W) 3433 NODE_NAME_CASE(CRC_W_W_W) 3434 NODE_NAME_CASE(CRC_W_D_W) 3435 NODE_NAME_CASE(CRCC_W_B_W) 3436 NODE_NAME_CASE(CRCC_W_H_W) 3437 NODE_NAME_CASE(CRCC_W_W_W) 3438 NODE_NAME_CASE(CRCC_W_D_W) 3439 NODE_NAME_CASE(CSRRD) 3440 NODE_NAME_CASE(CSRWR) 3441 NODE_NAME_CASE(CSRXCHG) 3442 NODE_NAME_CASE(IOCSRRD_B) 3443 NODE_NAME_CASE(IOCSRRD_H) 3444 NODE_NAME_CASE(IOCSRRD_W) 3445 NODE_NAME_CASE(IOCSRRD_D) 3446 NODE_NAME_CASE(IOCSRWR_B) 3447 NODE_NAME_CASE(IOCSRWR_H) 3448 NODE_NAME_CASE(IOCSRWR_W) 3449 NODE_NAME_CASE(IOCSRWR_D) 3450 NODE_NAME_CASE(CPUCFG) 3451 NODE_NAME_CASE(MOVGR2FCSR) 3452 NODE_NAME_CASE(MOVFCSR2GR) 3453 NODE_NAME_CASE(CACOP_D) 3454 NODE_NAME_CASE(CACOP_W) 3455 NODE_NAME_CASE(VPICK_SEXT_ELT) 3456 NODE_NAME_CASE(VPICK_ZEXT_ELT) 3457 NODE_NAME_CASE(VREPLVE) 3458 NODE_NAME_CASE(VALL_ZERO) 3459 NODE_NAME_CASE(VANY_ZERO) 3460 NODE_NAME_CASE(VALL_NONZERO) 3461 NODE_NAME_CASE(VANY_NONZERO) 3462 } 3463 #undef NODE_NAME_CASE 3464 return nullptr; 3465 } 3466 3467 //===----------------------------------------------------------------------===// 3468 // Calling Convention Implementation 3469 //===----------------------------------------------------------------------===// 3470 3471 // Eight general-purpose registers a0-a7 used for passing integer arguments, 3472 // with a0-a1 reused to return values. Generally, the GPRs are used to pass 3473 // fixed-point arguments, and floating-point arguments when no FPR is available 3474 // or with soft float ABI. 3475 const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, 3476 LoongArch::R7, LoongArch::R8, LoongArch::R9, 3477 LoongArch::R10, LoongArch::R11}; 3478 // Eight floating-point registers fa0-fa7 used for passing floating-point 3479 // arguments, and fa0-fa1 are also used to return values. 3480 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, 3481 LoongArch::F3, LoongArch::F4, LoongArch::F5, 3482 LoongArch::F6, LoongArch::F7}; 3483 // FPR32 and FPR64 alias each other. 3484 const MCPhysReg ArgFPR64s[] = { 3485 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, 3486 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; 3487 3488 const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, 3489 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, 3490 LoongArch::VR6, LoongArch::VR7}; 3491 3492 const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, 3493 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, 3494 LoongArch::XR6, LoongArch::XR7}; 3495 3496 // Pass a 2*GRLen argument that has been split into two GRLen values through 3497 // registers or the stack as necessary. 3498 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, 3499 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, 3500 unsigned ValNo2, MVT ValVT2, MVT LocVT2, 3501 ISD::ArgFlagsTy ArgFlags2) { 3502 unsigned GRLenInBytes = GRLen / 8; 3503 if (Register Reg = State.AllocateReg(ArgGPRs)) { 3504 // At least one half can be passed via register. 3505 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 3506 VA1.getLocVT(), CCValAssign::Full)); 3507 } else { 3508 // Both halves must be passed on the stack, with proper alignment. 3509 Align StackAlign = 3510 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 3511 State.addLoc( 3512 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 3513 State.AllocateStack(GRLenInBytes, StackAlign), 3514 VA1.getLocVT(), CCValAssign::Full)); 3515 State.addLoc(CCValAssign::getMem( 3516 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), 3517 LocVT2, CCValAssign::Full)); 3518 return false; 3519 } 3520 if (Register Reg = State.AllocateReg(ArgGPRs)) { 3521 // The second half can also be passed via register. 3522 State.addLoc( 3523 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 3524 } else { 3525 // The second half is passed via the stack, without additional alignment. 3526 State.addLoc(CCValAssign::getMem( 3527 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), 3528 LocVT2, CCValAssign::Full)); 3529 } 3530 return false; 3531 } 3532 3533 // Implements the LoongArch calling convention. Returns true upon failure. 3534 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, 3535 unsigned ValNo, MVT ValVT, 3536 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 3537 CCState &State, bool IsFixed, bool IsRet, 3538 Type *OrigTy) { 3539 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits(); 3540 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen"); 3541 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64; 3542 MVT LocVT = ValVT; 3543 3544 // Any return value split into more than two values can't be returned 3545 // directly. 3546 if (IsRet && ValNo > 1) 3547 return true; 3548 3549 // If passing a variadic argument, or if no FPR is available. 3550 bool UseGPRForFloat = true; 3551 3552 switch (ABI) { 3553 default: 3554 llvm_unreachable("Unexpected ABI"); 3555 case LoongArchABI::ABI_ILP32S: 3556 case LoongArchABI::ABI_ILP32F: 3557 case LoongArchABI::ABI_LP64F: 3558 report_fatal_error("Unimplemented ABI"); 3559 break; 3560 case LoongArchABI::ABI_ILP32D: 3561 case LoongArchABI::ABI_LP64D: 3562 UseGPRForFloat = !IsFixed; 3563 break; 3564 case LoongArchABI::ABI_LP64S: 3565 break; 3566 } 3567 3568 // FPR32 and FPR64 alias each other. 3569 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) 3570 UseGPRForFloat = true; 3571 3572 if (UseGPRForFloat && ValVT == MVT::f32) { 3573 LocVT = GRLenVT; 3574 LocInfo = CCValAssign::BCvt; 3575 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) { 3576 LocVT = MVT::i64; 3577 LocInfo = CCValAssign::BCvt; 3578 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) { 3579 // TODO: Handle passing f64 on LA32 with D feature. 3580 report_fatal_error("Passing f64 with GPR on LA32 is undefined"); 3581 } 3582 3583 // If this is a variadic argument, the LoongArch calling convention requires 3584 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8 3585 // byte alignment. An aligned register should be used regardless of whether 3586 // the original argument was split during legalisation or not. The argument 3587 // will not be passed by registers if the original type is larger than 3588 // 2*GRLen, so the register alignment rule does not apply. 3589 unsigned TwoGRLenInBytes = (2 * GRLen) / 8; 3590 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes && 3591 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) { 3592 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 3593 // Skip 'odd' register if necessary. 3594 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) 3595 State.AllocateReg(ArgGPRs); 3596 } 3597 3598 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 3599 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 3600 State.getPendingArgFlags(); 3601 3602 assert(PendingLocs.size() == PendingArgFlags.size() && 3603 "PendingLocs and PendingArgFlags out of sync"); 3604 3605 // Split arguments might be passed indirectly, so keep track of the pending 3606 // values. 3607 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 3608 LocVT = GRLenVT; 3609 LocInfo = CCValAssign::Indirect; 3610 PendingLocs.push_back( 3611 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 3612 PendingArgFlags.push_back(ArgFlags); 3613 if (!ArgFlags.isSplitEnd()) { 3614 return false; 3615 } 3616 } 3617 3618 // If the split argument only had two elements, it should be passed directly 3619 // in registers or on the stack. 3620 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && 3621 PendingLocs.size() <= 2) { 3622 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 3623 // Apply the normal calling convention rules to the first half of the 3624 // split argument. 3625 CCValAssign VA = PendingLocs[0]; 3626 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 3627 PendingLocs.clear(); 3628 PendingArgFlags.clear(); 3629 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT, 3630 ArgFlags); 3631 } 3632 3633 // Allocate to a register if possible, or else a stack slot. 3634 Register Reg; 3635 unsigned StoreSizeBytes = GRLen / 8; 3636 Align StackAlign = Align(GRLen / 8); 3637 3638 if (ValVT == MVT::f32 && !UseGPRForFloat) 3639 Reg = State.AllocateReg(ArgFPR32s); 3640 else if (ValVT == MVT::f64 && !UseGPRForFloat) 3641 Reg = State.AllocateReg(ArgFPR64s); 3642 else if (ValVT.is128BitVector()) 3643 Reg = State.AllocateReg(ArgVRs); 3644 else if (ValVT.is256BitVector()) 3645 Reg = State.AllocateReg(ArgXRs); 3646 else 3647 Reg = State.AllocateReg(ArgGPRs); 3648 3649 unsigned StackOffset = 3650 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); 3651 3652 // If we reach this point and PendingLocs is non-empty, we must be at the 3653 // end of a split argument that must be passed indirectly. 3654 if (!PendingLocs.empty()) { 3655 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 3656 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 3657 for (auto &It : PendingLocs) { 3658 if (Reg) 3659 It.convertToReg(Reg); 3660 else 3661 It.convertToMem(StackOffset); 3662 State.addLoc(It); 3663 } 3664 PendingLocs.clear(); 3665 PendingArgFlags.clear(); 3666 return false; 3667 } 3668 assert((!UseGPRForFloat || LocVT == GRLenVT) && 3669 "Expected an GRLenVT at this stage"); 3670 3671 if (Reg) { 3672 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3673 return false; 3674 } 3675 3676 // When a floating-point value is passed on the stack, no bit-cast is needed. 3677 if (ValVT.isFloatingPoint()) { 3678 LocVT = ValVT; 3679 LocInfo = CCValAssign::Full; 3680 } 3681 3682 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 3683 return false; 3684 } 3685 3686 void LoongArchTargetLowering::analyzeInputArgs( 3687 MachineFunction &MF, CCState &CCInfo, 3688 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 3689 LoongArchCCAssignFn Fn) const { 3690 FunctionType *FType = MF.getFunction().getFunctionType(); 3691 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 3692 MVT ArgVT = Ins[i].VT; 3693 Type *ArgTy = nullptr; 3694 if (IsRet) 3695 ArgTy = FType->getReturnType(); 3696 else if (Ins[i].isOrigArg()) 3697 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 3698 LoongArchABI::ABI ABI = 3699 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 3700 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags, 3701 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { 3702 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT 3703 << '\n'); 3704 llvm_unreachable(""); 3705 } 3706 } 3707 } 3708 3709 void LoongArchTargetLowering::analyzeOutputArgs( 3710 MachineFunction &MF, CCState &CCInfo, 3711 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 3712 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const { 3713 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 3714 MVT ArgVT = Outs[i].VT; 3715 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 3716 LoongArchABI::ABI ABI = 3717 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 3718 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags, 3719 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 3720 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT 3721 << "\n"); 3722 llvm_unreachable(""); 3723 } 3724 } 3725 } 3726 3727 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 3728 // values. 3729 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 3730 const CCValAssign &VA, const SDLoc &DL) { 3731 switch (VA.getLocInfo()) { 3732 default: 3733 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3734 case CCValAssign::Full: 3735 case CCValAssign::Indirect: 3736 break; 3737 case CCValAssign::BCvt: 3738 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3739 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val); 3740 else 3741 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 3742 break; 3743 } 3744 return Val; 3745 } 3746 3747 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 3748 const CCValAssign &VA, const SDLoc &DL, 3749 const LoongArchTargetLowering &TLI) { 3750 MachineFunction &MF = DAG.getMachineFunction(); 3751 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3752 EVT LocVT = VA.getLocVT(); 3753 SDValue Val; 3754 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 3755 Register VReg = RegInfo.createVirtualRegister(RC); 3756 RegInfo.addLiveIn(VA.getLocReg(), VReg); 3757 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 3758 3759 return convertLocVTToValVT(DAG, Val, VA, DL); 3760 } 3761 3762 // The caller is responsible for loading the full value if the argument is 3763 // passed with CCValAssign::Indirect. 3764 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 3765 const CCValAssign &VA, const SDLoc &DL) { 3766 MachineFunction &MF = DAG.getMachineFunction(); 3767 MachineFrameInfo &MFI = MF.getFrameInfo(); 3768 EVT ValVT = VA.getValVT(); 3769 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), 3770 /*IsImmutable=*/true); 3771 SDValue FIN = DAG.getFrameIndex( 3772 FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0))); 3773 3774 ISD::LoadExtType ExtType; 3775 switch (VA.getLocInfo()) { 3776 default: 3777 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3778 case CCValAssign::Full: 3779 case CCValAssign::Indirect: 3780 case CCValAssign::BCvt: 3781 ExtType = ISD::NON_EXTLOAD; 3782 break; 3783 } 3784 return DAG.getExtLoad( 3785 ExtType, DL, VA.getLocVT(), Chain, FIN, 3786 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 3787 } 3788 3789 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 3790 const CCValAssign &VA, const SDLoc &DL) { 3791 EVT LocVT = VA.getLocVT(); 3792 3793 switch (VA.getLocInfo()) { 3794 default: 3795 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3796 case CCValAssign::Full: 3797 break; 3798 case CCValAssign::BCvt: 3799 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3800 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val); 3801 else 3802 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 3803 break; 3804 } 3805 return Val; 3806 } 3807 3808 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 3809 CCValAssign::LocInfo LocInfo, 3810 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3811 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3812 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim 3813 // s0 s1 s2 s3 s4 s5 s6 s7 s8 3814 static const MCPhysReg GPRList[] = { 3815 LoongArch::R23, LoongArch::R24, LoongArch::R25, 3816 LoongArch::R26, LoongArch::R27, LoongArch::R28, 3817 LoongArch::R29, LoongArch::R30, LoongArch::R31}; 3818 if (unsigned Reg = State.AllocateReg(GPRList)) { 3819 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3820 return false; 3821 } 3822 } 3823 3824 if (LocVT == MVT::f32) { 3825 // Pass in STG registers: F1, F2, F3, F4 3826 // fs0,fs1,fs2,fs3 3827 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, 3828 LoongArch::F26, LoongArch::F27}; 3829 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3830 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3831 return false; 3832 } 3833 } 3834 3835 if (LocVT == MVT::f64) { 3836 // Pass in STG registers: D1, D2, D3, D4 3837 // fs4,fs5,fs6,fs7 3838 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, 3839 LoongArch::F30_64, LoongArch::F31_64}; 3840 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3841 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3842 return false; 3843 } 3844 } 3845 3846 report_fatal_error("No registers left in GHC calling convention"); 3847 return true; 3848 } 3849 3850 // Transform physical registers into virtual registers. 3851 SDValue LoongArchTargetLowering::LowerFormalArguments( 3852 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 3853 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 3854 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3855 3856 MachineFunction &MF = DAG.getMachineFunction(); 3857 3858 switch (CallConv) { 3859 default: 3860 llvm_unreachable("Unsupported calling convention"); 3861 case CallingConv::C: 3862 case CallingConv::Fast: 3863 break; 3864 case CallingConv::GHC: 3865 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) || 3866 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD)) 3867 report_fatal_error( 3868 "GHC calling convention requires the F and D extensions"); 3869 } 3870 3871 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3872 MVT GRLenVT = Subtarget.getGRLenVT(); 3873 unsigned GRLenInBytes = Subtarget.getGRLen() / 8; 3874 // Used with varargs to acumulate store chains. 3875 std::vector<SDValue> OutChains; 3876 3877 // Assign locations to all of the incoming arguments. 3878 SmallVector<CCValAssign> ArgLocs; 3879 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 3880 3881 if (CallConv == CallingConv::GHC) 3882 CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC); 3883 else 3884 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch); 3885 3886 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3887 CCValAssign &VA = ArgLocs[i]; 3888 SDValue ArgValue; 3889 if (VA.isRegLoc()) 3890 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 3891 else 3892 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 3893 if (VA.getLocInfo() == CCValAssign::Indirect) { 3894 // If the original argument was split and passed by reference, we need to 3895 // load all parts of it here (using the same address). 3896 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 3897 MachinePointerInfo())); 3898 unsigned ArgIndex = Ins[i].OrigArgIndex; 3899 unsigned ArgPartOffset = Ins[i].PartOffset; 3900 assert(ArgPartOffset == 0); 3901 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 3902 CCValAssign &PartVA = ArgLocs[i + 1]; 3903 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; 3904 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 3905 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); 3906 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 3907 MachinePointerInfo())); 3908 ++i; 3909 } 3910 continue; 3911 } 3912 InVals.push_back(ArgValue); 3913 } 3914 3915 if (IsVarArg) { 3916 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs); 3917 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 3918 const TargetRegisterClass *RC = &LoongArch::GPRRegClass; 3919 MachineFrameInfo &MFI = MF.getFrameInfo(); 3920 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3921 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>(); 3922 3923 // Offset of the first variable argument from stack pointer, and size of 3924 // the vararg save area. For now, the varargs save area is either zero or 3925 // large enough to hold a0-a7. 3926 int VaArgOffset, VarArgsSaveSize; 3927 3928 // If all registers are allocated, then all varargs must be passed on the 3929 // stack and we don't need to save any argregs. 3930 if (ArgRegs.size() == Idx) { 3931 VaArgOffset = CCInfo.getStackSize(); 3932 VarArgsSaveSize = 0; 3933 } else { 3934 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx); 3935 VaArgOffset = -VarArgsSaveSize; 3936 } 3937 3938 // Record the frame index of the first variable argument 3939 // which is a value necessary to VASTART. 3940 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); 3941 LoongArchFI->setVarArgsFrameIndex(FI); 3942 3943 // If saving an odd number of registers then create an extra stack slot to 3944 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures 3945 // offsets to even-numbered registered remain 2*GRLen-aligned. 3946 if (Idx % 2) { 3947 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes, 3948 true); 3949 VarArgsSaveSize += GRLenInBytes; 3950 } 3951 3952 // Copy the integer registers that may have been used for passing varargs 3953 // to the vararg save area. 3954 for (unsigned I = Idx; I < ArgRegs.size(); 3955 ++I, VaArgOffset += GRLenInBytes) { 3956 const Register Reg = RegInfo.createVirtualRegister(RC); 3957 RegInfo.addLiveIn(ArgRegs[I], Reg); 3958 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT); 3959 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); 3960 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 3961 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 3962 MachinePointerInfo::getFixedStack(MF, FI)); 3963 cast<StoreSDNode>(Store.getNode()) 3964 ->getMemOperand() 3965 ->setValue((Value *)nullptr); 3966 OutChains.push_back(Store); 3967 } 3968 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); 3969 } 3970 3971 // All stores are grouped in one node to allow the matching between 3972 // the size of Ins and InVals. This only happens for vararg functions. 3973 if (!OutChains.empty()) { 3974 OutChains.push_back(Chain); 3975 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 3976 } 3977 3978 return Chain; 3979 } 3980 3981 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 3982 return CI->isTailCall(); 3983 } 3984 3985 // Check if the return value is used as only a return value, as otherwise 3986 // we can't perform a tail-call. 3987 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N, 3988 SDValue &Chain) const { 3989 if (N->getNumValues() != 1) 3990 return false; 3991 if (!N->hasNUsesOfValue(1, 0)) 3992 return false; 3993 3994 SDNode *Copy = *N->use_begin(); 3995 if (Copy->getOpcode() != ISD::CopyToReg) 3996 return false; 3997 3998 // If the ISD::CopyToReg has a glue operand, we conservatively assume it 3999 // isn't safe to perform a tail call. 4000 if (Copy->getGluedNode()) 4001 return false; 4002 4003 // The copy must be used by a LoongArchISD::RET, and nothing else. 4004 bool HasRet = false; 4005 for (SDNode *Node : Copy->uses()) { 4006 if (Node->getOpcode() != LoongArchISD::RET) 4007 return false; 4008 HasRet = true; 4009 } 4010 4011 if (!HasRet) 4012 return false; 4013 4014 Chain = Copy->getOperand(0); 4015 return true; 4016 } 4017 4018 // Check whether the call is eligible for tail call optimization. 4019 bool LoongArchTargetLowering::isEligibleForTailCallOptimization( 4020 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 4021 const SmallVectorImpl<CCValAssign> &ArgLocs) const { 4022 4023 auto CalleeCC = CLI.CallConv; 4024 auto &Outs = CLI.Outs; 4025 auto &Caller = MF.getFunction(); 4026 auto CallerCC = Caller.getCallingConv(); 4027 4028 // Do not tail call opt if the stack is used to pass parameters. 4029 if (CCInfo.getStackSize() != 0) 4030 return false; 4031 4032 // Do not tail call opt if any parameters need to be passed indirectly. 4033 for (auto &VA : ArgLocs) 4034 if (VA.getLocInfo() == CCValAssign::Indirect) 4035 return false; 4036 4037 // Do not tail call opt if either caller or callee uses struct return 4038 // semantics. 4039 auto IsCallerStructRet = Caller.hasStructRetAttr(); 4040 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 4041 if (IsCallerStructRet || IsCalleeStructRet) 4042 return false; 4043 4044 // Do not tail call opt if either the callee or caller has a byval argument. 4045 for (auto &Arg : Outs) 4046 if (Arg.Flags.isByVal()) 4047 return false; 4048 4049 // The callee has to preserve all registers the caller needs to preserve. 4050 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4051 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 4052 if (CalleeCC != CallerCC) { 4053 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 4054 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 4055 return false; 4056 } 4057 return true; 4058 } 4059 4060 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { 4061 return DAG.getDataLayout().getPrefTypeAlign( 4062 VT.getTypeForEVT(*DAG.getContext())); 4063 } 4064 4065 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 4066 // and output parameter nodes. 4067 SDValue 4068 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, 4069 SmallVectorImpl<SDValue> &InVals) const { 4070 SelectionDAG &DAG = CLI.DAG; 4071 SDLoc &DL = CLI.DL; 4072 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 4073 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 4074 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 4075 SDValue Chain = CLI.Chain; 4076 SDValue Callee = CLI.Callee; 4077 CallingConv::ID CallConv = CLI.CallConv; 4078 bool IsVarArg = CLI.IsVarArg; 4079 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4080 MVT GRLenVT = Subtarget.getGRLenVT(); 4081 bool &IsTailCall = CLI.IsTailCall; 4082 4083 MachineFunction &MF = DAG.getMachineFunction(); 4084 4085 // Analyze the operands of the call, assigning locations to each operand. 4086 SmallVector<CCValAssign> ArgLocs; 4087 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 4088 4089 if (CallConv == CallingConv::GHC) 4090 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC); 4091 else 4092 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch); 4093 4094 // Check if it's really possible to do a tail call. 4095 if (IsTailCall) 4096 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 4097 4098 if (IsTailCall) 4099 ++NumTailCalls; 4100 else if (CLI.CB && CLI.CB->isMustTailCall()) 4101 report_fatal_error("failed to perform tail call elimination on a call " 4102 "site marked musttail"); 4103 4104 // Get a count of how many bytes are to be pushed on the stack. 4105 unsigned NumBytes = ArgCCInfo.getStackSize(); 4106 4107 // Create local copies for byval args. 4108 SmallVector<SDValue> ByValArgs; 4109 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 4110 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4111 if (!Flags.isByVal()) 4112 continue; 4113 4114 SDValue Arg = OutVals[i]; 4115 unsigned Size = Flags.getByValSize(); 4116 Align Alignment = Flags.getNonZeroByValAlign(); 4117 4118 int FI = 4119 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 4120 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 4121 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT); 4122 4123 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 4124 /*IsVolatile=*/false, 4125 /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall, 4126 MachinePointerInfo(), MachinePointerInfo()); 4127 ByValArgs.push_back(FIPtr); 4128 } 4129 4130 if (!IsTailCall) 4131 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 4132 4133 // Copy argument values to their designated locations. 4134 SmallVector<std::pair<Register, SDValue>> RegsToPass; 4135 SmallVector<SDValue> MemOpChains; 4136 SDValue StackPtr; 4137 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 4138 CCValAssign &VA = ArgLocs[i]; 4139 SDValue ArgValue = OutVals[i]; 4140 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4141 4142 // Promote the value if needed. 4143 // For now, only handle fully promoted and indirect arguments. 4144 if (VA.getLocInfo() == CCValAssign::Indirect) { 4145 // Store the argument in a stack slot and pass its address. 4146 Align StackAlign = 4147 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), 4148 getPrefTypeAlign(ArgValue.getValueType(), DAG)); 4149 TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); 4150 // If the original argument was split and passed by reference, we need to 4151 // store the required parts of it here (and pass just one address). 4152 unsigned ArgIndex = Outs[i].OrigArgIndex; 4153 unsigned ArgPartOffset = Outs[i].PartOffset; 4154 assert(ArgPartOffset == 0); 4155 // Calculate the total size to store. We don't have access to what we're 4156 // actually storing other than performing the loop and collecting the 4157 // info. 4158 SmallVector<std::pair<SDValue, SDValue>> Parts; 4159 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 4160 SDValue PartValue = OutVals[i + 1]; 4161 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; 4162 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 4163 EVT PartVT = PartValue.getValueType(); 4164 4165 StoredSize += PartVT.getStoreSize(); 4166 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); 4167 Parts.push_back(std::make_pair(PartValue, Offset)); 4168 ++i; 4169 } 4170 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); 4171 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 4172 MemOpChains.push_back( 4173 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 4174 MachinePointerInfo::getFixedStack(MF, FI))); 4175 for (const auto &Part : Parts) { 4176 SDValue PartValue = Part.first; 4177 SDValue PartOffset = Part.second; 4178 SDValue Address = 4179 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); 4180 MemOpChains.push_back( 4181 DAG.getStore(Chain, DL, PartValue, Address, 4182 MachinePointerInfo::getFixedStack(MF, FI))); 4183 } 4184 ArgValue = SpillSlot; 4185 } else { 4186 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 4187 } 4188 4189 // Use local copy if it is a byval arg. 4190 if (Flags.isByVal()) 4191 ArgValue = ByValArgs[j++]; 4192 4193 if (VA.isRegLoc()) { 4194 // Queue up the argument copies and emit them at the end. 4195 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 4196 } else { 4197 assert(VA.isMemLoc() && "Argument not register or memory"); 4198 assert(!IsTailCall && "Tail call not allowed if stack is used " 4199 "for passing parameters"); 4200 4201 // Work out the address of the stack slot. 4202 if (!StackPtr.getNode()) 4203 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT); 4204 SDValue Address = 4205 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 4206 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 4207 4208 // Emit the store. 4209 MemOpChains.push_back( 4210 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 4211 } 4212 } 4213 4214 // Join the stores, which are independent of one another. 4215 if (!MemOpChains.empty()) 4216 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 4217 4218 SDValue Glue; 4219 4220 // Build a sequence of copy-to-reg nodes, chained and glued together. 4221 for (auto &Reg : RegsToPass) { 4222 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 4223 Glue = Chain.getValue(1); 4224 } 4225 4226 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 4227 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 4228 // split it and then direct call can be matched by PseudoCALL. 4229 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 4230 const GlobalValue *GV = S->getGlobal(); 4231 unsigned OpFlags = 4232 getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV) 4233 ? LoongArchII::MO_CALL 4234 : LoongArchII::MO_CALL_PLT; 4235 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags); 4236 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 4237 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal( 4238 *MF.getFunction().getParent(), nullptr) 4239 ? LoongArchII::MO_CALL 4240 : LoongArchII::MO_CALL_PLT; 4241 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 4242 } 4243 4244 // The first call operand is the chain and the second is the target address. 4245 SmallVector<SDValue> Ops; 4246 Ops.push_back(Chain); 4247 Ops.push_back(Callee); 4248 4249 // Add argument registers to the end of the list so that they are 4250 // known live into the call. 4251 for (auto &Reg : RegsToPass) 4252 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 4253 4254 if (!IsTailCall) { 4255 // Add a register mask operand representing the call-preserved registers. 4256 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4257 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 4258 assert(Mask && "Missing call preserved mask for calling convention"); 4259 Ops.push_back(DAG.getRegisterMask(Mask)); 4260 } 4261 4262 // Glue the call to the argument copies, if any. 4263 if (Glue.getNode()) 4264 Ops.push_back(Glue); 4265 4266 // Emit the call. 4267 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 4268 unsigned Op; 4269 switch (DAG.getTarget().getCodeModel()) { 4270 default: 4271 report_fatal_error("Unsupported code model"); 4272 case CodeModel::Small: 4273 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL; 4274 break; 4275 case CodeModel::Medium: 4276 assert(Subtarget.is64Bit() && "Medium code model requires LA64"); 4277 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM; 4278 break; 4279 case CodeModel::Large: 4280 assert(Subtarget.is64Bit() && "Large code model requires LA64"); 4281 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE; 4282 break; 4283 } 4284 4285 if (IsTailCall) { 4286 MF.getFrameInfo().setHasTailCall(); 4287 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops); 4288 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 4289 return Ret; 4290 } 4291 4292 Chain = DAG.getNode(Op, DL, NodeTys, Ops); 4293 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 4294 Glue = Chain.getValue(1); 4295 4296 // Mark the end of the call, which is glued to the call itself. 4297 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 4298 Glue = Chain.getValue(1); 4299 4300 // Assign locations to each value returned by this call. 4301 SmallVector<CCValAssign> RVLocs; 4302 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 4303 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch); 4304 4305 // Copy all of the result registers out of their specified physreg. 4306 for (auto &VA : RVLocs) { 4307 // Copy the value out. 4308 SDValue RetValue = 4309 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 4310 // Glue the RetValue to the end of the call sequence. 4311 Chain = RetValue.getValue(1); 4312 Glue = RetValue.getValue(2); 4313 4314 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 4315 4316 InVals.push_back(RetValue); 4317 } 4318 4319 return Chain; 4320 } 4321 4322 bool LoongArchTargetLowering::CanLowerReturn( 4323 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 4324 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 4325 SmallVector<CCValAssign> RVLocs; 4326 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 4327 4328 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 4329 LoongArchABI::ABI ABI = 4330 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 4331 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full, 4332 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, 4333 nullptr)) 4334 return false; 4335 } 4336 return true; 4337 } 4338 4339 SDValue LoongArchTargetLowering::LowerReturn( 4340 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 4341 const SmallVectorImpl<ISD::OutputArg> &Outs, 4342 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 4343 SelectionDAG &DAG) const { 4344 // Stores the assignment of the return value to a location. 4345 SmallVector<CCValAssign> RVLocs; 4346 4347 // Info about the registers and stack slot. 4348 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 4349 *DAG.getContext()); 4350 4351 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 4352 nullptr, CC_LoongArch); 4353 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 4354 report_fatal_error("GHC functions return void only"); 4355 SDValue Glue; 4356 SmallVector<SDValue, 4> RetOps(1, Chain); 4357 4358 // Copy the result values into the output registers. 4359 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 4360 CCValAssign &VA = RVLocs[i]; 4361 assert(VA.isRegLoc() && "Can only return in registers!"); 4362 4363 // Handle a 'normal' return. 4364 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL); 4365 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 4366 4367 // Guarantee that all emitted copies are stuck together. 4368 Glue = Chain.getValue(1); 4369 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 4370 } 4371 4372 RetOps[0] = Chain; // Update chain. 4373 4374 // Add the glue node if we have it. 4375 if (Glue.getNode()) 4376 RetOps.push_back(Glue); 4377 4378 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps); 4379 } 4380 4381 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 4382 bool ForCodeSize) const { 4383 // TODO: Maybe need more checks here after vector extension is supported. 4384 if (VT == MVT::f32 && !Subtarget.hasBasicF()) 4385 return false; 4386 if (VT == MVT::f64 && !Subtarget.hasBasicD()) 4387 return false; 4388 return (Imm.isZero() || Imm.isExactlyValue(+1.0)); 4389 } 4390 4391 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const { 4392 return true; 4393 } 4394 4395 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const { 4396 return true; 4397 } 4398 4399 bool LoongArchTargetLowering::shouldInsertFencesForAtomic( 4400 const Instruction *I) const { 4401 if (!Subtarget.is64Bit()) 4402 return isa<LoadInst>(I) || isa<StoreInst>(I); 4403 4404 if (isa<LoadInst>(I)) 4405 return true; 4406 4407 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not 4408 // require fences beacuse we can use amswap_db.[w/d]. 4409 if (isa<StoreInst>(I)) { 4410 unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth(); 4411 return (Size == 8 || Size == 16); 4412 } 4413 4414 return false; 4415 } 4416 4417 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL, 4418 LLVMContext &Context, 4419 EVT VT) const { 4420 if (!VT.isVector()) 4421 return getPointerTy(DL); 4422 return VT.changeVectorElementTypeToInteger(); 4423 } 4424 4425 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { 4426 // TODO: Support vectors. 4427 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y); 4428 } 4429 4430 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 4431 const CallInst &I, 4432 MachineFunction &MF, 4433 unsigned Intrinsic) const { 4434 switch (Intrinsic) { 4435 default: 4436 return false; 4437 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32: 4438 case Intrinsic::loongarch_masked_atomicrmw_add_i32: 4439 case Intrinsic::loongarch_masked_atomicrmw_sub_i32: 4440 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: 4441 Info.opc = ISD::INTRINSIC_W_CHAIN; 4442 Info.memVT = MVT::i32; 4443 Info.ptrVal = I.getArgOperand(0); 4444 Info.offset = 0; 4445 Info.align = Align(4); 4446 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 4447 MachineMemOperand::MOVolatile; 4448 return true; 4449 // TODO: Add more Intrinsics later. 4450 } 4451 } 4452 4453 TargetLowering::AtomicExpansionKind 4454 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 4455 // TODO: Add more AtomicRMWInst that needs to be extended. 4456 4457 // Since floating-point operation requires a non-trivial set of data 4458 // operations, use CmpXChg to expand. 4459 if (AI->isFloatingPointOperation() || 4460 AI->getOperation() == AtomicRMWInst::UIncWrap || 4461 AI->getOperation() == AtomicRMWInst::UDecWrap) 4462 return AtomicExpansionKind::CmpXChg; 4463 4464 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 4465 if (Size == 8 || Size == 16) 4466 return AtomicExpansionKind::MaskedIntrinsic; 4467 return AtomicExpansionKind::None; 4468 } 4469 4470 static Intrinsic::ID 4471 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, 4472 AtomicRMWInst::BinOp BinOp) { 4473 if (GRLen == 64) { 4474 switch (BinOp) { 4475 default: 4476 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4477 case AtomicRMWInst::Xchg: 4478 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64; 4479 case AtomicRMWInst::Add: 4480 return Intrinsic::loongarch_masked_atomicrmw_add_i64; 4481 case AtomicRMWInst::Sub: 4482 return Intrinsic::loongarch_masked_atomicrmw_sub_i64; 4483 case AtomicRMWInst::Nand: 4484 return Intrinsic::loongarch_masked_atomicrmw_nand_i64; 4485 case AtomicRMWInst::UMax: 4486 return Intrinsic::loongarch_masked_atomicrmw_umax_i64; 4487 case AtomicRMWInst::UMin: 4488 return Intrinsic::loongarch_masked_atomicrmw_umin_i64; 4489 case AtomicRMWInst::Max: 4490 return Intrinsic::loongarch_masked_atomicrmw_max_i64; 4491 case AtomicRMWInst::Min: 4492 return Intrinsic::loongarch_masked_atomicrmw_min_i64; 4493 // TODO: support other AtomicRMWInst. 4494 } 4495 } 4496 4497 if (GRLen == 32) { 4498 switch (BinOp) { 4499 default: 4500 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4501 case AtomicRMWInst::Xchg: 4502 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32; 4503 case AtomicRMWInst::Add: 4504 return Intrinsic::loongarch_masked_atomicrmw_add_i32; 4505 case AtomicRMWInst::Sub: 4506 return Intrinsic::loongarch_masked_atomicrmw_sub_i32; 4507 case AtomicRMWInst::Nand: 4508 return Intrinsic::loongarch_masked_atomicrmw_nand_i32; 4509 // TODO: support other AtomicRMWInst. 4510 } 4511 } 4512 4513 llvm_unreachable("Unexpected GRLen\n"); 4514 } 4515 4516 TargetLowering::AtomicExpansionKind 4517 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( 4518 AtomicCmpXchgInst *CI) const { 4519 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 4520 if (Size == 8 || Size == 16) 4521 return AtomicExpansionKind::MaskedIntrinsic; 4522 return AtomicExpansionKind::None; 4523 } 4524 4525 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 4526 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 4527 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 4528 AtomicOrdering FailOrd = CI->getFailureOrdering(); 4529 Value *FailureOrdering = 4530 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd)); 4531 4532 // TODO: Support cmpxchg on LA32. 4533 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; 4534 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 4535 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 4536 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 4537 Type *Tys[] = {AlignedAddr->getType()}; 4538 Function *MaskedCmpXchg = 4539 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 4540 Value *Result = Builder.CreateCall( 4541 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); 4542 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 4543 return Result; 4544 } 4545 4546 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( 4547 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 4548 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 4549 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace 4550 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate 4551 // mask, as this produces better code than the LL/SC loop emitted by 4552 // int_loongarch_masked_atomicrmw_xchg. 4553 if (AI->getOperation() == AtomicRMWInst::Xchg && 4554 isa<ConstantInt>(AI->getValOperand())) { 4555 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand()); 4556 if (CVal->isZero()) 4557 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr, 4558 Builder.CreateNot(Mask, "Inv_Mask"), 4559 AI->getAlign(), Ord); 4560 if (CVal->isMinusOne()) 4561 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask, 4562 AI->getAlign(), Ord); 4563 } 4564 4565 unsigned GRLen = Subtarget.getGRLen(); 4566 Value *Ordering = 4567 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering())); 4568 Type *Tys[] = {AlignedAddr->getType()}; 4569 Function *LlwOpScwLoop = Intrinsic::getDeclaration( 4570 AI->getModule(), 4571 getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys); 4572 4573 if (GRLen == 64) { 4574 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 4575 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 4576 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 4577 } 4578 4579 Value *Result; 4580 4581 // Must pass the shift amount needed to sign extend the loaded value prior 4582 // to performing a signed comparison for min/max. ShiftAmt is the number of 4583 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which 4584 // is the number of bits to left+right shift the value in order to 4585 // sign-extend. 4586 if (AI->getOperation() == AtomicRMWInst::Min || 4587 AI->getOperation() == AtomicRMWInst::Max) { 4588 const DataLayout &DL = AI->getModule()->getDataLayout(); 4589 unsigned ValWidth = 4590 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 4591 Value *SextShamt = 4592 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt); 4593 Result = Builder.CreateCall(LlwOpScwLoop, 4594 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 4595 } else { 4596 Result = 4597 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 4598 } 4599 4600 if (GRLen == 64) 4601 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 4602 return Result; 4603 } 4604 4605 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( 4606 const MachineFunction &MF, EVT VT) const { 4607 VT = VT.getScalarType(); 4608 4609 if (!VT.isSimple()) 4610 return false; 4611 4612 switch (VT.getSimpleVT().SimpleTy) { 4613 case MVT::f32: 4614 case MVT::f64: 4615 return true; 4616 default: 4617 break; 4618 } 4619 4620 return false; 4621 } 4622 4623 Register LoongArchTargetLowering::getExceptionPointerRegister( 4624 const Constant *PersonalityFn) const { 4625 return LoongArch::R4; 4626 } 4627 4628 Register LoongArchTargetLowering::getExceptionSelectorRegister( 4629 const Constant *PersonalityFn) const { 4630 return LoongArch::R5; 4631 } 4632 4633 //===----------------------------------------------------------------------===// 4634 // LoongArch Inline Assembly Support 4635 //===----------------------------------------------------------------------===// 4636 4637 LoongArchTargetLowering::ConstraintType 4638 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { 4639 // LoongArch specific constraints in GCC: config/loongarch/constraints.md 4640 // 4641 // 'f': A floating-point register (if available). 4642 // 'k': A memory operand whose address is formed by a base register and 4643 // (optionally scaled) index register. 4644 // 'l': A signed 16-bit constant. 4645 // 'm': A memory operand whose address is formed by a base register and 4646 // offset that is suitable for use in instructions with the same 4647 // addressing mode as st.w and ld.w. 4648 // 'I': A signed 12-bit constant (for arithmetic instructions). 4649 // 'J': Integer zero. 4650 // 'K': An unsigned 12-bit constant (for logic instructions). 4651 // "ZB": An address that is held in a general-purpose register. The offset is 4652 // zero. 4653 // "ZC": A memory operand whose address is formed by a base register and 4654 // offset that is suitable for use in instructions with the same 4655 // addressing mode as ll.w and sc.w. 4656 if (Constraint.size() == 1) { 4657 switch (Constraint[0]) { 4658 default: 4659 break; 4660 case 'f': 4661 return C_RegisterClass; 4662 case 'l': 4663 case 'I': 4664 case 'J': 4665 case 'K': 4666 return C_Immediate; 4667 case 'k': 4668 return C_Memory; 4669 } 4670 } 4671 4672 if (Constraint == "ZC" || Constraint == "ZB") 4673 return C_Memory; 4674 4675 // 'm' is handled here. 4676 return TargetLowering::getConstraintType(Constraint); 4677 } 4678 4679 InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint( 4680 StringRef ConstraintCode) const { 4681 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode) 4682 .Case("k", InlineAsm::ConstraintCode::k) 4683 .Case("ZB", InlineAsm::ConstraintCode::ZB) 4684 .Case("ZC", InlineAsm::ConstraintCode::ZC) 4685 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode)); 4686 } 4687 4688 std::pair<unsigned, const TargetRegisterClass *> 4689 LoongArchTargetLowering::getRegForInlineAsmConstraint( 4690 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 4691 // First, see if this is a constraint that directly corresponds to a LoongArch 4692 // register class. 4693 if (Constraint.size() == 1) { 4694 switch (Constraint[0]) { 4695 case 'r': 4696 // TODO: Support fixed vectors up to GRLen? 4697 if (VT.isVector()) 4698 break; 4699 return std::make_pair(0U, &LoongArch::GPRRegClass); 4700 case 'f': 4701 if (Subtarget.hasBasicF() && VT == MVT::f32) 4702 return std::make_pair(0U, &LoongArch::FPR32RegClass); 4703 if (Subtarget.hasBasicD() && VT == MVT::f64) 4704 return std::make_pair(0U, &LoongArch::FPR64RegClass); 4705 if (Subtarget.hasExtLSX() && 4706 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT)) 4707 return std::make_pair(0U, &LoongArch::LSX128RegClass); 4708 if (Subtarget.hasExtLASX() && 4709 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT)) 4710 return std::make_pair(0U, &LoongArch::LASX256RegClass); 4711 break; 4712 default: 4713 break; 4714 } 4715 } 4716 4717 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen 4718 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm 4719 // constraints while the official register name is prefixed with a '$'. So we 4720 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.) 4721 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is 4722 // case insensitive, so no need to convert the constraint to upper case here. 4723 // 4724 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly 4725 // decode the usage of register name aliases into their official names. And 4726 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use 4727 // official register names. 4728 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") || 4729 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) { 4730 bool IsFP = Constraint[2] == 'f'; 4731 std::pair<StringRef, StringRef> Temp = Constraint.split('$'); 4732 std::pair<unsigned, const TargetRegisterClass *> R; 4733 R = TargetLowering::getRegForInlineAsmConstraint( 4734 TRI, join_items("", Temp.first, Temp.second), VT); 4735 // Match those names to the widest floating point register type available. 4736 if (IsFP) { 4737 unsigned RegNo = R.first; 4738 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) { 4739 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) { 4740 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64; 4741 return std::make_pair(DReg, &LoongArch::FPR64RegClass); 4742 } 4743 } 4744 } 4745 return R; 4746 } 4747 4748 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 4749 } 4750 4751 void LoongArchTargetLowering::LowerAsmOperandForConstraint( 4752 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, 4753 SelectionDAG &DAG) const { 4754 // Currently only support length 1 constraints. 4755 if (Constraint.size() == 1) { 4756 switch (Constraint[0]) { 4757 case 'l': 4758 // Validate & create a 16-bit signed immediate operand. 4759 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4760 uint64_t CVal = C->getSExtValue(); 4761 if (isInt<16>(CVal)) 4762 Ops.push_back( 4763 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 4764 } 4765 return; 4766 case 'I': 4767 // Validate & create a 12-bit signed immediate operand. 4768 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4769 uint64_t CVal = C->getSExtValue(); 4770 if (isInt<12>(CVal)) 4771 Ops.push_back( 4772 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 4773 } 4774 return; 4775 case 'J': 4776 // Validate & create an integer zero operand. 4777 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 4778 if (C->getZExtValue() == 0) 4779 Ops.push_back( 4780 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT())); 4781 return; 4782 case 'K': 4783 // Validate & create a 12-bit unsigned immediate operand. 4784 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4785 uint64_t CVal = C->getZExtValue(); 4786 if (isUInt<12>(CVal)) 4787 Ops.push_back( 4788 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 4789 } 4790 return; 4791 default: 4792 break; 4793 } 4794 } 4795 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 4796 } 4797 4798 #define GET_REGISTER_MATCHER 4799 #include "LoongArchGenAsmMatcher.inc" 4800 4801 Register 4802 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, 4803 const MachineFunction &MF) const { 4804 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$'); 4805 std::string NewRegName = Name.second.str(); 4806 Register Reg = MatchRegisterAltName(NewRegName); 4807 if (Reg == LoongArch::NoRegister) 4808 Reg = MatchRegisterName(NewRegName); 4809 if (Reg == LoongArch::NoRegister) 4810 report_fatal_error( 4811 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 4812 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 4813 if (!ReservedRegs.test(Reg)) 4814 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 4815 StringRef(RegName) + "\".")); 4816 return Reg; 4817 } 4818 4819 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context, 4820 EVT VT, SDValue C) const { 4821 // TODO: Support vectors. 4822 if (!VT.isScalarInteger()) 4823 return false; 4824 4825 // Omit the optimization if the data size exceeds GRLen. 4826 if (VT.getSizeInBits() > Subtarget.getGRLen()) 4827 return false; 4828 4829 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 4830 const APInt &Imm = ConstNode->getAPIntValue(); 4831 // Break MUL into (SLLI + ADD/SUB) or ALSL. 4832 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 4833 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 4834 return true; 4835 // Break MUL into (ALSL x, (SLLI x, imm0), imm1). 4836 if (ConstNode->hasOneUse() && 4837 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || 4838 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2())) 4839 return true; 4840 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)), 4841 // in which the immediate has two set bits. Or Break (MUL x, imm) 4842 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate 4843 // equals to (1 << s0) - (1 << s1). 4844 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) { 4845 unsigned Shifts = Imm.countr_zero(); 4846 // Reject immediates which can be composed via a single LUI. 4847 if (Shifts >= 12) 4848 return false; 4849 // Reject multiplications can be optimized to 4850 // (SLLI (ALSL x, x, 1/2/3/4), s). 4851 APInt ImmPop = Imm.ashr(Shifts); 4852 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17) 4853 return false; 4854 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`, 4855 // since it needs one more instruction than other 3 cases. 4856 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true); 4857 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() || 4858 (ImmSmall - Imm).isPowerOf2()) 4859 return true; 4860 } 4861 } 4862 4863 return false; 4864 } 4865 4866 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL, 4867 const AddrMode &AM, 4868 Type *Ty, unsigned AS, 4869 Instruction *I) const { 4870 // LoongArch has four basic addressing modes: 4871 // 1. reg 4872 // 2. reg + 12-bit signed offset 4873 // 3. reg + 14-bit signed offset left-shifted by 2 4874 // 4. reg1 + reg2 4875 // TODO: Add more checks after support vector extension. 4876 4877 // No global is ever allowed as a base. 4878 if (AM.BaseGV) 4879 return false; 4880 4881 // Require a 12 or 14 bit signed offset. 4882 if (!isInt<12>(AM.BaseOffs) || !isShiftedInt<14, 2>(AM.BaseOffs)) 4883 return false; 4884 4885 switch (AM.Scale) { 4886 case 0: 4887 // "i" is not allowed. 4888 if (!AM.HasBaseReg) 4889 return false; 4890 // Otherwise we have "r+i". 4891 break; 4892 case 1: 4893 // "r+r+i" is not allowed. 4894 if (AM.HasBaseReg && AM.BaseOffs != 0) 4895 return false; 4896 // Otherwise we have "r+r" or "r+i". 4897 break; 4898 case 2: 4899 // "2*r+r" or "2*r+i" is not allowed. 4900 if (AM.HasBaseReg || AM.BaseOffs) 4901 return false; 4902 // Otherwise we have "r+r". 4903 break; 4904 default: 4905 return false; 4906 } 4907 4908 return true; 4909 } 4910 4911 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 4912 return isInt<12>(Imm); 4913 } 4914 4915 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const { 4916 return isInt<12>(Imm); 4917 } 4918 4919 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 4920 // Zexts are free if they can be combined with a load. 4921 // Don't advertise i32->i64 zextload as being free for LA64. It interacts 4922 // poorly with type legalization of compares preferring sext. 4923 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 4924 EVT MemVT = LD->getMemoryVT(); 4925 if ((MemVT == MVT::i8 || MemVT == MVT::i16) && 4926 (LD->getExtensionType() == ISD::NON_EXTLOAD || 4927 LD->getExtensionType() == ISD::ZEXTLOAD)) 4928 return true; 4929 } 4930 4931 return TargetLowering::isZExtFree(Val, VT2); 4932 } 4933 4934 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 4935 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 4936 } 4937 4938 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { 4939 // TODO: Support vectors. 4940 if (Y.getValueType().isVector()) 4941 return false; 4942 4943 return !isa<ConstantSDNode>(Y); 4944 } 4945 4946 ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { 4947 // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension. 4948 return ISD::SIGN_EXTEND; 4949 } 4950