1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that LoongArch uses to lower LLVM code into 10 // a selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LoongArchISelLowering.h" 15 #include "LoongArch.h" 16 #include "LoongArchMachineFunctionInfo.h" 17 #include "LoongArchRegisterInfo.h" 18 #include "LoongArchSubtarget.h" 19 #include "LoongArchTargetMachine.h" 20 #include "MCTargetDesc/LoongArchBaseInfo.h" 21 #include "MCTargetDesc/LoongArchMCTargetDesc.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/ADT/StringExtras.h" 24 #include "llvm/CodeGen/ISDOpcodes.h" 25 #include "llvm/CodeGen/RuntimeLibcallUtil.h" 26 #include "llvm/CodeGen/SelectionDAGNodes.h" 27 #include "llvm/IR/IRBuilder.h" 28 #include "llvm/IR/IntrinsicsLoongArch.h" 29 #include "llvm/Support/CodeGen.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/KnownBits.h" 33 #include "llvm/Support/MathExtras.h" 34 35 using namespace llvm; 36 37 #define DEBUG_TYPE "loongarch-isel-lowering" 38 39 STATISTIC(NumTailCalls, "Number of tail calls"); 40 41 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, 42 cl::desc("Trap on integer division by zero."), 43 cl::init(false)); 44 45 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, 46 const LoongArchSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 MVT GRLenVT = Subtarget.getGRLenVT(); 50 51 // Set up the register classes. 52 53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); 54 if (Subtarget.hasBasicF()) 55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); 56 if (Subtarget.hasBasicD()) 57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); 58 59 static const MVT::SimpleValueType LSXVTs[] = { 60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; 61 static const MVT::SimpleValueType LASXVTs[] = { 62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; 63 64 if (Subtarget.hasExtLSX()) 65 for (MVT VT : LSXVTs) 66 addRegisterClass(VT, &LoongArch::LSX128RegClass); 67 68 if (Subtarget.hasExtLASX()) 69 for (MVT VT : LASXVTs) 70 addRegisterClass(VT, &LoongArch::LASX256RegClass); 71 72 // Set operations for LA32 and LA64. 73 74 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, 75 MVT::i1, Promote); 76 77 setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); 78 setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); 79 setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); 80 setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); 81 setOperationAction(ISD::ROTL, GRLenVT, Expand); 82 setOperationAction(ISD::CTPOP, GRLenVT, Expand); 83 84 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, 85 ISD::JumpTable, ISD::GlobalTLSAddress}, 86 GRLenVT, Custom); 87 88 setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom); 89 90 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); 91 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); 92 setOperationAction(ISD::VASTART, MVT::Other, Custom); 93 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); 94 95 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 96 setOperationAction(ISD::TRAP, MVT::Other, Legal); 97 98 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 99 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 100 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 101 102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before 103 // we get to know which of sll and revb.2h is faster. 104 setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); 105 setOperationAction(ISD::BITREVERSE, GRLenVT, Legal); 106 107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and 108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 109 // and i32 could still be byte-swapped relatively cheaply. 110 setOperationAction(ISD::BSWAP, MVT::i16, Custom); 111 112 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 113 setOperationAction(ISD::BR_CC, GRLenVT, Expand); 114 setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); 115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 116 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); 117 118 setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); 119 setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); 120 121 // Set operations for LA64 only. 122 123 if (Subtarget.is64Bit()) { 124 setOperationAction(ISD::ADD, MVT::i32, Custom); 125 setOperationAction(ISD::SUB, MVT::i32, Custom); 126 setOperationAction(ISD::SHL, MVT::i32, Custom); 127 setOperationAction(ISD::SRA, MVT::i32, Custom); 128 setOperationAction(ISD::SRL, MVT::i32, Custom); 129 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 130 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 131 setOperationAction(ISD::ROTR, MVT::i32, Custom); 132 setOperationAction(ISD::ROTL, MVT::i32, Custom); 133 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 134 setOperationAction(ISD::CTLZ, MVT::i32, Custom); 135 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 136 setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); 137 setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); 138 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); 139 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 140 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 141 142 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 143 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 144 setOperationAction({ISD::UDIV, ISD::UREM}, MVT::i32, Custom); 145 } 146 147 // Set operations for LA32 only. 148 149 if (!Subtarget.is64Bit()) { 150 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); 151 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); 152 setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); 153 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 154 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 155 } 156 157 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 158 159 static const ISD::CondCode FPCCToExpand[] = { 160 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, 161 ISD::SETGE, ISD::SETNE, ISD::SETGT}; 162 163 // Set operations for 'F' feature. 164 165 if (Subtarget.hasBasicF()) { 166 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 167 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 168 setCondCodeAction(FPCCToExpand, MVT::f32, Expand); 169 170 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 171 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 172 setOperationAction(ISD::FMA, MVT::f32, Legal); 173 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); 174 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); 175 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); 176 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); 177 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal); 178 setOperationAction(ISD::FSIN, MVT::f32, Expand); 179 setOperationAction(ISD::FCOS, MVT::f32, Expand); 180 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 181 setOperationAction(ISD::FPOW, MVT::f32, Expand); 182 setOperationAction(ISD::FREM, MVT::f32, Expand); 183 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); 184 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); 185 186 if (Subtarget.is64Bit()) 187 setOperationAction(ISD::FRINT, MVT::f32, Legal); 188 189 if (!Subtarget.hasBasicD()) { 190 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 191 if (Subtarget.is64Bit()) { 192 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 193 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 194 } 195 } 196 } 197 198 // Set operations for 'D' feature. 199 200 if (Subtarget.hasBasicD()) { 201 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 202 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 203 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 204 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 205 setCondCodeAction(FPCCToExpand, MVT::f64, Expand); 206 207 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 208 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 209 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); 210 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); 211 setOperationAction(ISD::FMA, MVT::f64, Legal); 212 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); 213 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); 214 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal); 215 setOperationAction(ISD::FSIN, MVT::f64, Expand); 216 setOperationAction(ISD::FCOS, MVT::f64, Expand); 217 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 218 setOperationAction(ISD::FPOW, MVT::f64, Expand); 219 setOperationAction(ISD::FREM, MVT::f64, Expand); 220 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); 221 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); 222 223 if (Subtarget.is64Bit()) 224 setOperationAction(ISD::FRINT, MVT::f64, Legal); 225 } 226 227 // Set operations for 'LSX' feature. 228 229 if (Subtarget.hasExtLSX()) { 230 for (MVT VT : MVT::fixedlen_vector_valuetypes()) { 231 // Expand all truncating stores and extending loads. 232 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { 233 setTruncStoreAction(VT, InnerVT, Expand); 234 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 235 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 236 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 237 } 238 // By default everything must be expanded. Then we will selectively turn 239 // on ones that can be effectively codegen'd. 240 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 241 setOperationAction(Op, VT, Expand); 242 } 243 244 for (MVT VT : LSXVTs) { 245 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); 246 setOperationAction(ISD::BITCAST, VT, Legal); 247 setOperationAction(ISD::UNDEF, VT, Legal); 248 249 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 250 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); 251 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 252 253 setOperationAction(ISD::SETCC, VT, Legal); 254 setOperationAction(ISD::VSELECT, VT, Legal); 255 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 256 } 257 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { 258 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); 259 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, 260 Legal); 261 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, 262 VT, Legal); 263 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); 264 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); 265 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); 266 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); 267 setCondCodeAction( 268 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, 269 Expand); 270 } 271 for (MVT VT : {MVT::v4i32, MVT::v2i64}) { 272 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); 273 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); 274 } 275 for (MVT VT : {MVT::v4f32, MVT::v2f64}) { 276 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); 277 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); 278 setOperationAction(ISD::FMA, VT, Legal); 279 setOperationAction(ISD::FSQRT, VT, Legal); 280 setOperationAction(ISD::FNEG, VT, Legal); 281 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, 282 ISD::SETUGE, ISD::SETUGT}, 283 VT, Expand); 284 } 285 } 286 287 // Set operations for 'LASX' feature. 288 289 if (Subtarget.hasExtLASX()) { 290 for (MVT VT : LASXVTs) { 291 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); 292 setOperationAction(ISD::BITCAST, VT, Legal); 293 setOperationAction(ISD::UNDEF, VT, Legal); 294 295 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 296 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 297 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 298 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); 299 300 setOperationAction(ISD::SETCC, VT, Legal); 301 setOperationAction(ISD::VSELECT, VT, Legal); 302 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 303 } 304 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { 305 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); 306 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, 307 Legal); 308 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, 309 VT, Legal); 310 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); 311 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); 312 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); 313 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); 314 setCondCodeAction( 315 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, 316 Expand); 317 } 318 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { 319 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); 320 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); 321 } 322 for (MVT VT : {MVT::v8f32, MVT::v4f64}) { 323 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); 324 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); 325 setOperationAction(ISD::FMA, VT, Legal); 326 setOperationAction(ISD::FSQRT, VT, Legal); 327 setOperationAction(ISD::FNEG, VT, Legal); 328 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, 329 ISD::SETUGE, ISD::SETUGT}, 330 VT, Expand); 331 } 332 } 333 334 // Set DAG combine for LA32 and LA64. 335 336 setTargetDAGCombine(ISD::AND); 337 setTargetDAGCombine(ISD::OR); 338 setTargetDAGCombine(ISD::SRL); 339 setTargetDAGCombine(ISD::SETCC); 340 341 // Set DAG combine for 'LSX' feature. 342 343 if (Subtarget.hasExtLSX()) 344 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 345 346 // Compute derived properties from the register classes. 347 computeRegisterProperties(Subtarget.getRegisterInfo()); 348 349 setStackPointerRegisterToSaveRestore(LoongArch::R3); 350 351 setBooleanContents(ZeroOrOneBooleanContent); 352 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 353 354 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); 355 356 setMinCmpXchgSizeInBits(32); 357 358 // Function alignments. 359 setMinFunctionAlignment(Align(4)); 360 // Set preferred alignments. 361 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); 362 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); 363 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); 364 } 365 366 bool LoongArchTargetLowering::isOffsetFoldingLegal( 367 const GlobalAddressSDNode *GA) const { 368 // In order to maximise the opportunity for common subexpression elimination, 369 // keep a separate ADD node for the global address offset instead of folding 370 // it in the global address node. Later peephole optimisations may choose to 371 // fold it back in when profitable. 372 return false; 373 } 374 375 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, 376 SelectionDAG &DAG) const { 377 switch (Op.getOpcode()) { 378 case ISD::ATOMIC_FENCE: 379 return lowerATOMIC_FENCE(Op, DAG); 380 case ISD::EH_DWARF_CFA: 381 return lowerEH_DWARF_CFA(Op, DAG); 382 case ISD::GlobalAddress: 383 return lowerGlobalAddress(Op, DAG); 384 case ISD::GlobalTLSAddress: 385 return lowerGlobalTLSAddress(Op, DAG); 386 case ISD::INTRINSIC_WO_CHAIN: 387 return lowerINTRINSIC_WO_CHAIN(Op, DAG); 388 case ISD::INTRINSIC_W_CHAIN: 389 return lowerINTRINSIC_W_CHAIN(Op, DAG); 390 case ISD::INTRINSIC_VOID: 391 return lowerINTRINSIC_VOID(Op, DAG); 392 case ISD::BlockAddress: 393 return lowerBlockAddress(Op, DAG); 394 case ISD::JumpTable: 395 return lowerJumpTable(Op, DAG); 396 case ISD::SHL_PARTS: 397 return lowerShiftLeftParts(Op, DAG); 398 case ISD::SRA_PARTS: 399 return lowerShiftRightParts(Op, DAG, true); 400 case ISD::SRL_PARTS: 401 return lowerShiftRightParts(Op, DAG, false); 402 case ISD::ConstantPool: 403 return lowerConstantPool(Op, DAG); 404 case ISD::FP_TO_SINT: 405 return lowerFP_TO_SINT(Op, DAG); 406 case ISD::BITCAST: 407 return lowerBITCAST(Op, DAG); 408 case ISD::UINT_TO_FP: 409 return lowerUINT_TO_FP(Op, DAG); 410 case ISD::SINT_TO_FP: 411 return lowerSINT_TO_FP(Op, DAG); 412 case ISD::VASTART: 413 return lowerVASTART(Op, DAG); 414 case ISD::FRAMEADDR: 415 return lowerFRAMEADDR(Op, DAG); 416 case ISD::RETURNADDR: 417 return lowerRETURNADDR(Op, DAG); 418 case ISD::WRITE_REGISTER: 419 return lowerWRITE_REGISTER(Op, DAG); 420 case ISD::INSERT_VECTOR_ELT: 421 return lowerINSERT_VECTOR_ELT(Op, DAG); 422 case ISD::EXTRACT_VECTOR_ELT: 423 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 424 case ISD::BUILD_VECTOR: 425 return lowerBUILD_VECTOR(Op, DAG); 426 case ISD::VECTOR_SHUFFLE: 427 return lowerVECTOR_SHUFFLE(Op, DAG); 428 } 429 return SDValue(); 430 } 431 432 /// Determine whether a range fits a regular pattern of values. 433 /// This function accounts for the possibility of jumping over the End iterator. 434 template <typename ValType> 435 static bool 436 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, 437 unsigned CheckStride, 438 typename SmallVectorImpl<ValType>::const_iterator End, 439 ValType ExpectedIndex, unsigned ExpectedIndexStride) { 440 auto &I = Begin; 441 442 while (I != End) { 443 if (*I != -1 && *I != ExpectedIndex) 444 return false; 445 ExpectedIndex += ExpectedIndexStride; 446 447 // Incrementing past End is undefined behaviour so we must increment one 448 // step at a time and check for End at each step. 449 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) 450 ; // Empty loop body. 451 } 452 return true; 453 } 454 455 /// Lower VECTOR_SHUFFLE into VREPLVEI (if possible). 456 /// 457 /// VREPLVEI performs vector broadcast based on an element specified by an 458 /// integer immediate, with its mask being similar to: 459 /// <x, x, x, ...> 460 /// where x is any valid index. 461 /// 462 /// When undef's appear in the mask they are treated as if they were whatever 463 /// value is necessary in order to fit the above form. 464 static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, 465 MVT VT, SDValue V1, SDValue V2, 466 SelectionDAG &DAG) { 467 int SplatIndex = -1; 468 for (const auto &M : Mask) { 469 if (M != -1) { 470 SplatIndex = M; 471 break; 472 } 473 } 474 475 if (SplatIndex == -1) 476 return DAG.getUNDEF(VT); 477 478 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index"); 479 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) { 480 APInt Imm(64, SplatIndex); 481 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1, 482 DAG.getConstant(Imm, DL, MVT::i64)); 483 } 484 485 return SDValue(); 486 } 487 488 /// Lower VECTOR_SHUFFLE into VSHUF4I (if possible). 489 /// 490 /// VSHUF4I splits the vector into blocks of four elements, then shuffles these 491 /// elements according to a <4 x i2> constant (encoded as an integer immediate). 492 /// 493 /// It is therefore possible to lower into VSHUF4I when the mask takes the form: 494 /// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 495 /// When undef's appear they are treated as if they were whatever value is 496 /// necessary in order to fit the above forms. 497 /// 498 /// For example: 499 /// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 500 /// <8 x i32> <i32 3, i32 2, i32 1, i32 0, 501 /// i32 7, i32 6, i32 5, i32 4> 502 /// is lowered to: 503 /// (VSHUF4I_H $v0, $v1, 27) 504 /// where the 27 comes from: 505 /// 3 + (2 << 2) + (1 << 4) + (0 << 6) 506 static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, 507 MVT VT, SDValue V1, SDValue V2, 508 SelectionDAG &DAG) { 509 510 // When the size is less than 4, lower cost instructions may be used. 511 if (Mask.size() < 4) 512 return SDValue(); 513 514 int SubMask[4] = {-1, -1, -1, -1}; 515 for (unsigned i = 0; i < 4; ++i) { 516 for (unsigned j = i; j < Mask.size(); j += 4) { 517 int Idx = Mask[j]; 518 519 // Convert from vector index to 4-element subvector index 520 // If an index refers to an element outside of the subvector then give up 521 if (Idx != -1) { 522 Idx -= 4 * (j / 4); 523 if (Idx < 0 || Idx >= 4) 524 return SDValue(); 525 } 526 527 // If the mask has an undef, replace it with the current index. 528 // Note that it might still be undef if the current index is also undef 529 if (SubMask[i] == -1) 530 SubMask[i] = Idx; 531 // Check that non-undef values are the same as in the mask. If they 532 // aren't then give up 533 else if (Idx != -1 && Idx != SubMask[i]) 534 return SDValue(); 535 } 536 } 537 538 // Calculate the immediate. Replace any remaining undefs with zero 539 APInt Imm(64, 0); 540 for (int i = 3; i >= 0; --i) { 541 int Idx = SubMask[i]; 542 543 if (Idx == -1) 544 Idx = 0; 545 546 Imm <<= 2; 547 Imm |= Idx & 0x3; 548 } 549 550 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, 551 DAG.getConstant(Imm, DL, MVT::i64)); 552 } 553 554 /// Lower VECTOR_SHUFFLE into VPACKEV (if possible). 555 /// 556 /// VPACKEV interleaves the even elements from each vector. 557 /// 558 /// It is possible to lower into VPACKEV when the mask consists of two of the 559 /// following forms interleaved: 560 /// <0, 2, 4, ...> 561 /// <n, n+2, n+4, ...> 562 /// where n is the number of elements in the vector. 563 /// For example: 564 /// <0, 0, 2, 2, 4, 4, ...> 565 /// <0, n, 2, n+2, 4, n+4, ...> 566 /// 567 /// When undef's appear in the mask they are treated as if they were whatever 568 /// value is necessary in order to fit the above forms. 569 static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask, 570 MVT VT, SDValue V1, SDValue V2, 571 SelectionDAG &DAG) { 572 573 const auto &Begin = Mask.begin(); 574 const auto &End = Mask.end(); 575 SDValue OriV1 = V1, OriV2 = V2; 576 577 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2)) 578 V1 = OriV1; 579 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2)) 580 V1 = OriV2; 581 else 582 return SDValue(); 583 584 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2)) 585 V2 = OriV1; 586 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2)) 587 V2 = OriV2; 588 else 589 return SDValue(); 590 591 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1); 592 } 593 594 /// Lower VECTOR_SHUFFLE into VPACKOD (if possible). 595 /// 596 /// VPACKOD interleaves the odd elements from each vector. 597 /// 598 /// It is possible to lower into VPACKOD when the mask consists of two of the 599 /// following forms interleaved: 600 /// <1, 3, 5, ...> 601 /// <n+1, n+3, n+5, ...> 602 /// where n is the number of elements in the vector. 603 /// For example: 604 /// <1, 1, 3, 3, 5, 5, ...> 605 /// <1, n+1, 3, n+3, 5, n+5, ...> 606 /// 607 /// When undef's appear in the mask they are treated as if they were whatever 608 /// value is necessary in order to fit the above forms. 609 static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask, 610 MVT VT, SDValue V1, SDValue V2, 611 SelectionDAG &DAG) { 612 613 const auto &Begin = Mask.begin(); 614 const auto &End = Mask.end(); 615 SDValue OriV1 = V1, OriV2 = V2; 616 617 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2)) 618 V1 = OriV1; 619 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2)) 620 V1 = OriV2; 621 else 622 return SDValue(); 623 624 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2)) 625 V2 = OriV1; 626 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2)) 627 V2 = OriV2; 628 else 629 return SDValue(); 630 631 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1); 632 } 633 634 /// Lower VECTOR_SHUFFLE into VILVH (if possible). 635 /// 636 /// VILVH interleaves consecutive elements from the left (highest-indexed) half 637 /// of each vector. 638 /// 639 /// It is possible to lower into VILVH when the mask consists of two of the 640 /// following forms interleaved: 641 /// <x, x+1, x+2, ...> 642 /// <n+x, n+x+1, n+x+2, ...> 643 /// where n is the number of elements in the vector and x is half n. 644 /// For example: 645 /// <x, x, x+1, x+1, x+2, x+2, ...> 646 /// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 647 /// 648 /// When undef's appear in the mask they are treated as if they were whatever 649 /// value is necessary in order to fit the above forms. 650 static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask, 651 MVT VT, SDValue V1, SDValue V2, 652 SelectionDAG &DAG) { 653 654 const auto &Begin = Mask.begin(); 655 const auto &End = Mask.end(); 656 unsigned HalfSize = Mask.size() / 2; 657 SDValue OriV1 = V1, OriV2 = V2; 658 659 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1)) 660 V1 = OriV1; 661 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1)) 662 V1 = OriV2; 663 else 664 return SDValue(); 665 666 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1)) 667 V2 = OriV1; 668 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize, 669 1)) 670 V2 = OriV2; 671 else 672 return SDValue(); 673 674 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1); 675 } 676 677 /// Lower VECTOR_SHUFFLE into VILVL (if possible). 678 /// 679 /// VILVL interleaves consecutive elements from the right (lowest-indexed) half 680 /// of each vector. 681 /// 682 /// It is possible to lower into VILVL when the mask consists of two of the 683 /// following forms interleaved: 684 /// <0, 1, 2, ...> 685 /// <n, n+1, n+2, ...> 686 /// where n is the number of elements in the vector. 687 /// For example: 688 /// <0, 0, 1, 1, 2, 2, ...> 689 /// <0, n, 1, n+1, 2, n+2, ...> 690 /// 691 /// When undef's appear in the mask they are treated as if they were whatever 692 /// value is necessary in order to fit the above forms. 693 static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask, 694 MVT VT, SDValue V1, SDValue V2, 695 SelectionDAG &DAG) { 696 697 const auto &Begin = Mask.begin(); 698 const auto &End = Mask.end(); 699 SDValue OriV1 = V1, OriV2 = V2; 700 701 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1)) 702 V1 = OriV1; 703 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1)) 704 V1 = OriV2; 705 else 706 return SDValue(); 707 708 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1)) 709 V2 = OriV1; 710 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1)) 711 V2 = OriV2; 712 else 713 return SDValue(); 714 715 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1); 716 } 717 718 /// Lower VECTOR_SHUFFLE into VPICKEV (if possible). 719 /// 720 /// VPICKEV copies the even elements of each vector into the result vector. 721 /// 722 /// It is possible to lower into VPICKEV when the mask consists of two of the 723 /// following forms concatenated: 724 /// <0, 2, 4, ...> 725 /// <n, n+2, n+4, ...> 726 /// where n is the number of elements in the vector. 727 /// For example: 728 /// <0, 2, 4, ..., 0, 2, 4, ...> 729 /// <0, 2, 4, ..., n, n+2, n+4, ...> 730 /// 731 /// When undef's appear in the mask they are treated as if they were whatever 732 /// value is necessary in order to fit the above forms. 733 static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask, 734 MVT VT, SDValue V1, SDValue V2, 735 SelectionDAG &DAG) { 736 737 const auto &Begin = Mask.begin(); 738 const auto &Mid = Mask.begin() + Mask.size() / 2; 739 const auto &End = Mask.end(); 740 SDValue OriV1 = V1, OriV2 = V2; 741 742 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2)) 743 V1 = OriV1; 744 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2)) 745 V1 = OriV2; 746 else 747 return SDValue(); 748 749 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2)) 750 V2 = OriV1; 751 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2)) 752 V2 = OriV2; 753 754 else 755 return SDValue(); 756 757 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1); 758 } 759 760 /// Lower VECTOR_SHUFFLE into VPICKOD (if possible). 761 /// 762 /// VPICKOD copies the odd elements of each vector into the result vector. 763 /// 764 /// It is possible to lower into VPICKOD when the mask consists of two of the 765 /// following forms concatenated: 766 /// <1, 3, 5, ...> 767 /// <n+1, n+3, n+5, ...> 768 /// where n is the number of elements in the vector. 769 /// For example: 770 /// <1, 3, 5, ..., 1, 3, 5, ...> 771 /// <1, 3, 5, ..., n+1, n+3, n+5, ...> 772 /// 773 /// When undef's appear in the mask they are treated as if they were whatever 774 /// value is necessary in order to fit the above forms. 775 static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask, 776 MVT VT, SDValue V1, SDValue V2, 777 SelectionDAG &DAG) { 778 779 const auto &Begin = Mask.begin(); 780 const auto &Mid = Mask.begin() + Mask.size() / 2; 781 const auto &End = Mask.end(); 782 SDValue OriV1 = V1, OriV2 = V2; 783 784 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2)) 785 V1 = OriV1; 786 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2)) 787 V1 = OriV2; 788 else 789 return SDValue(); 790 791 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2)) 792 V2 = OriV1; 793 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2)) 794 V2 = OriV2; 795 else 796 return SDValue(); 797 798 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); 799 } 800 801 /// Lower VECTOR_SHUFFLE into VSHUF. 802 /// 803 /// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and 804 /// adding it as an operand to the resulting VSHUF. 805 static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask, 806 MVT VT, SDValue V1, SDValue V2, 807 SelectionDAG &DAG) { 808 809 SmallVector<SDValue, 16> Ops; 810 for (auto M : Mask) 811 Ops.push_back(DAG.getConstant(M, DL, MVT::i64)); 812 813 EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); 814 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); 815 816 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 817 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 818 // VSHF concatenates the vectors in a bitwise fashion: 819 // <0b00, 0b01> + <0b10, 0b11> -> 820 // 0b0100 + 0b1110 -> 0b01001110 821 // <0b10, 0b11, 0b00, 0b01> 822 // We must therefore swap the operands to get the correct result. 823 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1); 824 } 825 826 /// Dispatching routine to lower various 128-bit LoongArch vector shuffles. 827 /// 828 /// This routine breaks down the specific type of 128-bit shuffle and 829 /// dispatches to the lowering routines accordingly. 830 static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, 831 SDValue V1, SDValue V2, SelectionDAG &DAG) { 832 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 || 833 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 || 834 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) && 835 "Vector type is unsupported for lsx!"); 836 assert(V1.getSimpleValueType() == V2.getSimpleValueType() && 837 "Two operands have different types!"); 838 assert(VT.getVectorNumElements() == Mask.size() && 839 "Unexpected mask size for shuffle!"); 840 assert(Mask.size() % 2 == 0 && "Expected even mask size."); 841 842 SDValue Result; 843 // TODO: Add more comparison patterns. 844 if (V2.isUndef()) { 845 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG))) 846 return Result; 847 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG))) 848 return Result; 849 850 // TODO: This comment may be enabled in the future to better match the 851 // pattern for instruction selection. 852 /* V2 = V1; */ 853 } 854 855 // It is recommended not to change the pattern comparison order for better 856 // performance. 857 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG))) 858 return Result; 859 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG))) 860 return Result; 861 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG))) 862 return Result; 863 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG))) 864 return Result; 865 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG))) 866 return Result; 867 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG))) 868 return Result; 869 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG))) 870 return Result; 871 872 return SDValue(); 873 } 874 875 /// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible). 876 /// 877 /// It is a XVREPLVEI when the mask is: 878 /// <x, x, x, ..., x+n, x+n, x+n, ...> 879 /// where the number of x is equal to n and n is half the length of vector. 880 /// 881 /// When undef's appear in the mask they are treated as if they were whatever 882 /// value is necessary in order to fit the above form. 883 static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, 884 ArrayRef<int> Mask, MVT VT, 885 SDValue V1, SDValue V2, 886 SelectionDAG &DAG) { 887 int SplatIndex = -1; 888 for (const auto &M : Mask) { 889 if (M != -1) { 890 SplatIndex = M; 891 break; 892 } 893 } 894 895 if (SplatIndex == -1) 896 return DAG.getUNDEF(VT); 897 898 const auto &Begin = Mask.begin(); 899 const auto &End = Mask.end(); 900 unsigned HalfSize = Mask.size() / 2; 901 902 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index"); 903 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) && 904 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize, 905 0)) { 906 APInt Imm(64, SplatIndex); 907 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1, 908 DAG.getConstant(Imm, DL, MVT::i64)); 909 } 910 911 return SDValue(); 912 } 913 914 /// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible). 915 static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, 916 MVT VT, SDValue V1, SDValue V2, 917 SelectionDAG &DAG) { 918 // When the size is less than or equal to 4, lower cost instructions may be 919 // used. 920 if (Mask.size() <= 4) 921 return SDValue(); 922 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG); 923 } 924 925 /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible). 926 static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask, 927 MVT VT, SDValue V1, SDValue V2, 928 SelectionDAG &DAG) { 929 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG); 930 } 931 932 /// Lower VECTOR_SHUFFLE into XVPACKOD (if possible). 933 static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask, 934 MVT VT, SDValue V1, SDValue V2, 935 SelectionDAG &DAG) { 936 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG); 937 } 938 939 /// Lower VECTOR_SHUFFLE into XVILVH (if possible). 940 static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask, 941 MVT VT, SDValue V1, SDValue V2, 942 SelectionDAG &DAG) { 943 944 const auto &Begin = Mask.begin(); 945 const auto &End = Mask.end(); 946 unsigned HalfSize = Mask.size() / 2; 947 unsigned LeftSize = HalfSize / 2; 948 SDValue OriV1 = V1, OriV2 = V2; 949 950 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize, 951 1) && 952 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1)) 953 V1 = OriV1; 954 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 955 Mask.size() + HalfSize - LeftSize, 1) && 956 fitsRegularPattern<int>(Begin + HalfSize, 2, End, 957 Mask.size() + HalfSize + LeftSize, 1)) 958 V1 = OriV2; 959 else 960 return SDValue(); 961 962 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize, 963 1) && 964 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize, 965 1)) 966 V2 = OriV1; 967 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 968 Mask.size() + HalfSize - LeftSize, 1) && 969 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, 970 Mask.size() + HalfSize + LeftSize, 1)) 971 V2 = OriV2; 972 else 973 return SDValue(); 974 975 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1); 976 } 977 978 /// Lower VECTOR_SHUFFLE into XVILVL (if possible). 979 static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask, 980 MVT VT, SDValue V1, SDValue V2, 981 SelectionDAG &DAG) { 982 983 const auto &Begin = Mask.begin(); 984 const auto &End = Mask.end(); 985 unsigned HalfSize = Mask.size() / 2; 986 SDValue OriV1 = V1, OriV2 = V2; 987 988 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) && 989 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1)) 990 V1 = OriV1; 991 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) && 992 fitsRegularPattern<int>(Begin + HalfSize, 2, End, 993 Mask.size() + HalfSize, 1)) 994 V1 = OriV2; 995 else 996 return SDValue(); 997 998 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) && 999 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1)) 1000 V2 = OriV1; 1001 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(), 1002 1) && 1003 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, 1004 Mask.size() + HalfSize, 1)) 1005 V2 = OriV2; 1006 else 1007 return SDValue(); 1008 1009 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1); 1010 } 1011 1012 /// Lower VECTOR_SHUFFLE into XVPICKEV (if possible). 1013 static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask, 1014 MVT VT, SDValue V1, SDValue V2, 1015 SelectionDAG &DAG) { 1016 1017 const auto &Begin = Mask.begin(); 1018 const auto &LeftMid = Mask.begin() + Mask.size() / 4; 1019 const auto &Mid = Mask.begin() + Mask.size() / 2; 1020 const auto &RightMid = Mask.end() - Mask.size() / 4; 1021 const auto &End = Mask.end(); 1022 unsigned HalfSize = Mask.size() / 2; 1023 SDValue OriV1 = V1, OriV2 = V2; 1024 1025 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) && 1026 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2)) 1027 V1 = OriV1; 1028 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) && 1029 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2)) 1030 V1 = OriV2; 1031 else 1032 return SDValue(); 1033 1034 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) && 1035 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2)) 1036 V2 = OriV1; 1037 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) && 1038 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2)) 1039 V2 = OriV2; 1040 1041 else 1042 return SDValue(); 1043 1044 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1); 1045 } 1046 1047 /// Lower VECTOR_SHUFFLE into XVPICKOD (if possible). 1048 static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask, 1049 MVT VT, SDValue V1, SDValue V2, 1050 SelectionDAG &DAG) { 1051 1052 const auto &Begin = Mask.begin(); 1053 const auto &LeftMid = Mask.begin() + Mask.size() / 4; 1054 const auto &Mid = Mask.begin() + Mask.size() / 2; 1055 const auto &RightMid = Mask.end() - Mask.size() / 4; 1056 const auto &End = Mask.end(); 1057 unsigned HalfSize = Mask.size() / 2; 1058 SDValue OriV1 = V1, OriV2 = V2; 1059 1060 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) && 1061 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2)) 1062 V1 = OriV1; 1063 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) && 1064 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1, 1065 2)) 1066 V1 = OriV2; 1067 else 1068 return SDValue(); 1069 1070 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) && 1071 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2)) 1072 V2 = OriV1; 1073 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) && 1074 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1, 1075 2)) 1076 V2 = OriV2; 1077 else 1078 return SDValue(); 1079 1080 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); 1081 } 1082 1083 /// Lower VECTOR_SHUFFLE into XVSHUF (if possible). 1084 static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask, 1085 MVT VT, SDValue V1, SDValue V2, 1086 SelectionDAG &DAG) { 1087 1088 int MaskSize = Mask.size(); 1089 int HalfSize = Mask.size() / 2; 1090 const auto &Begin = Mask.begin(); 1091 const auto &Mid = Mask.begin() + HalfSize; 1092 const auto &End = Mask.end(); 1093 1094 // VECTOR_SHUFFLE concatenates the vectors: 1095 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15> 1096 // shuffling -> 1097 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15> 1098 // 1099 // XVSHUF concatenates the vectors: 1100 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7> 1101 // shuffling -> 1102 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7> 1103 SmallVector<SDValue, 8> MaskAlloc; 1104 for (auto it = Begin; it < Mid; it++) { 1105 if (*it < 0) // UNDEF 1106 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64)); 1107 else if ((*it >= 0 && *it < HalfSize) || 1108 (*it >= MaskSize && *it <= MaskSize + HalfSize)) { 1109 int M = *it < HalfSize ? *it : *it - HalfSize; 1110 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64)); 1111 } else 1112 return SDValue(); 1113 } 1114 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!"); 1115 1116 for (auto it = Mid; it < End; it++) { 1117 if (*it < 0) // UNDEF 1118 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64)); 1119 else if ((*it >= HalfSize && *it < MaskSize) || 1120 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) { 1121 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize; 1122 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64)); 1123 } else 1124 return SDValue(); 1125 } 1126 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!"); 1127 1128 EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); 1129 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc); 1130 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1); 1131 } 1132 1133 /// Shuffle vectors by lane to generate more optimized instructions. 1134 /// 256-bit shuffles are always considered as 2-lane 128-bit shuffles. 1135 /// 1136 /// Therefore, except for the following four cases, other cases are regarded 1137 /// as cross-lane shuffles, where optimization is relatively limited. 1138 /// 1139 /// - Shuffle high, low lanes of two inputs vector 1140 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6> 1141 /// - Shuffle low, high lanes of two inputs vector 1142 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5> 1143 /// - Shuffle low, low lanes of two inputs vector 1144 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6> 1145 /// - Shuffle high, high lanes of two inputs vector 1146 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5> 1147 /// 1148 /// The first case is the closest to LoongArch instructions and the other 1149 /// cases need to be converted to it for processing. 1150 /// 1151 /// This function may modify V1, V2 and Mask 1152 static void canonicalizeShuffleVectorByLane(const SDLoc &DL, 1153 MutableArrayRef<int> Mask, MVT VT, 1154 SDValue &V1, SDValue &V2, 1155 SelectionDAG &DAG) { 1156 1157 enum HalfMaskType { HighLaneTy, LowLaneTy, None }; 1158 1159 int MaskSize = Mask.size(); 1160 int HalfSize = Mask.size() / 2; 1161 1162 HalfMaskType preMask = None, postMask = None; 1163 1164 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) { 1165 return M < 0 || (M >= 0 && M < HalfSize) || 1166 (M >= MaskSize && M < MaskSize + HalfSize); 1167 })) 1168 preMask = HighLaneTy; 1169 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) { 1170 return M < 0 || (M >= HalfSize && M < MaskSize) || 1171 (M >= MaskSize + HalfSize && M < MaskSize * 2); 1172 })) 1173 preMask = LowLaneTy; 1174 1175 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { 1176 return M < 0 || (M >= 0 && M < HalfSize) || 1177 (M >= MaskSize && M < MaskSize + HalfSize); 1178 })) 1179 postMask = HighLaneTy; 1180 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { 1181 return M < 0 || (M >= HalfSize && M < MaskSize) || 1182 (M >= MaskSize + HalfSize && M < MaskSize * 2); 1183 })) 1184 postMask = LowLaneTy; 1185 1186 // The pre-half of mask is high lane type, and the post-half of mask 1187 // is low lane type, which is closest to the LoongArch instructions. 1188 // 1189 // Note: In the LoongArch architecture, the high lane of mask corresponds 1190 // to the lower 128-bit of vector register, and the low lane of mask 1191 // corresponds the higher 128-bit of vector register. 1192 if (preMask == HighLaneTy && postMask == LowLaneTy) { 1193 return; 1194 } 1195 if (preMask == LowLaneTy && postMask == HighLaneTy) { 1196 V1 = DAG.getBitcast(MVT::v4i64, V1); 1197 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, 1198 DAG.getConstant(0b01001110, DL, MVT::i64)); 1199 V1 = DAG.getBitcast(VT, V1); 1200 1201 if (!V2.isUndef()) { 1202 V2 = DAG.getBitcast(MVT::v4i64, V2); 1203 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, 1204 DAG.getConstant(0b01001110, DL, MVT::i64)); 1205 V2 = DAG.getBitcast(VT, V2); 1206 } 1207 1208 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { 1209 *it = *it < 0 ? *it : *it - HalfSize; 1210 } 1211 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { 1212 *it = *it < 0 ? *it : *it + HalfSize; 1213 } 1214 } else if (preMask == LowLaneTy && postMask == LowLaneTy) { 1215 V1 = DAG.getBitcast(MVT::v4i64, V1); 1216 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, 1217 DAG.getConstant(0b11101110, DL, MVT::i64)); 1218 V1 = DAG.getBitcast(VT, V1); 1219 1220 if (!V2.isUndef()) { 1221 V2 = DAG.getBitcast(MVT::v4i64, V2); 1222 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, 1223 DAG.getConstant(0b11101110, DL, MVT::i64)); 1224 V2 = DAG.getBitcast(VT, V2); 1225 } 1226 1227 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { 1228 *it = *it < 0 ? *it : *it - HalfSize; 1229 } 1230 } else if (preMask == HighLaneTy && postMask == HighLaneTy) { 1231 V1 = DAG.getBitcast(MVT::v4i64, V1); 1232 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, 1233 DAG.getConstant(0b01000100, DL, MVT::i64)); 1234 V1 = DAG.getBitcast(VT, V1); 1235 1236 if (!V2.isUndef()) { 1237 V2 = DAG.getBitcast(MVT::v4i64, V2); 1238 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, 1239 DAG.getConstant(0b01000100, DL, MVT::i64)); 1240 V2 = DAG.getBitcast(VT, V2); 1241 } 1242 1243 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { 1244 *it = *it < 0 ? *it : *it + HalfSize; 1245 } 1246 } else { // cross-lane 1247 return; 1248 } 1249 } 1250 1251 /// Dispatching routine to lower various 256-bit LoongArch vector shuffles. 1252 /// 1253 /// This routine breaks down the specific type of 256-bit shuffle and 1254 /// dispatches to the lowering routines accordingly. 1255 static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, 1256 SDValue V1, SDValue V2, SelectionDAG &DAG) { 1257 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 || 1258 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 || 1259 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) && 1260 "Vector type is unsupported for lasx!"); 1261 assert(V1.getSimpleValueType() == V2.getSimpleValueType() && 1262 "Two operands have different types!"); 1263 assert(VT.getVectorNumElements() == Mask.size() && 1264 "Unexpected mask size for shuffle!"); 1265 assert(Mask.size() % 2 == 0 && "Expected even mask size."); 1266 assert(Mask.size() >= 4 && "Mask size is less than 4."); 1267 1268 // canonicalize non cross-lane shuffle vector 1269 SmallVector<int> NewMask(Mask); 1270 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG); 1271 1272 SDValue Result; 1273 // TODO: Add more comparison patterns. 1274 if (V2.isUndef()) { 1275 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG))) 1276 return Result; 1277 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG))) 1278 return Result; 1279 1280 // TODO: This comment may be enabled in the future to better match the 1281 // pattern for instruction selection. 1282 /* V2 = V1; */ 1283 } 1284 1285 // It is recommended not to change the pattern comparison order for better 1286 // performance. 1287 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG))) 1288 return Result; 1289 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG))) 1290 return Result; 1291 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG))) 1292 return Result; 1293 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG))) 1294 return Result; 1295 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG))) 1296 return Result; 1297 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG))) 1298 return Result; 1299 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG))) 1300 return Result; 1301 1302 return SDValue(); 1303 } 1304 1305 SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 1306 SelectionDAG &DAG) const { 1307 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 1308 ArrayRef<int> OrigMask = SVOp->getMask(); 1309 SDValue V1 = Op.getOperand(0); 1310 SDValue V2 = Op.getOperand(1); 1311 MVT VT = Op.getSimpleValueType(); 1312 int NumElements = VT.getVectorNumElements(); 1313 SDLoc DL(Op); 1314 1315 bool V1IsUndef = V1.isUndef(); 1316 bool V2IsUndef = V2.isUndef(); 1317 if (V1IsUndef && V2IsUndef) 1318 return DAG.getUNDEF(VT); 1319 1320 // When we create a shuffle node we put the UNDEF node to second operand, 1321 // but in some cases the first operand may be transformed to UNDEF. 1322 // In this case we should just commute the node. 1323 if (V1IsUndef) 1324 return DAG.getCommutedVectorShuffle(*SVOp); 1325 1326 // Check for non-undef masks pointing at an undef vector and make the masks 1327 // undef as well. This makes it easier to match the shuffle based solely on 1328 // the mask. 1329 if (V2IsUndef && 1330 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) { 1331 SmallVector<int, 8> NewMask(OrigMask); 1332 for (int &M : NewMask) 1333 if (M >= NumElements) 1334 M = -1; 1335 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); 1336 } 1337 1338 // Check for illegal shuffle mask element index values. 1339 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2); 1340 (void)MaskUpperLimit; 1341 assert(llvm::all_of(OrigMask, 1342 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && 1343 "Out of bounds shuffle index"); 1344 1345 // For each vector width, delegate to a specialized lowering routine. 1346 if (VT.is128BitVector()) 1347 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG); 1348 1349 if (VT.is256BitVector()) 1350 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG); 1351 1352 return SDValue(); 1353 } 1354 1355 static bool isConstantOrUndef(const SDValue Op) { 1356 if (Op->isUndef()) 1357 return true; 1358 if (isa<ConstantSDNode>(Op)) 1359 return true; 1360 if (isa<ConstantFPSDNode>(Op)) 1361 return true; 1362 return false; 1363 } 1364 1365 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 1366 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 1367 if (isConstantOrUndef(Op->getOperand(i))) 1368 return true; 1369 return false; 1370 } 1371 1372 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, 1373 SelectionDAG &DAG) const { 1374 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 1375 EVT ResTy = Op->getValueType(0); 1376 SDLoc DL(Op); 1377 APInt SplatValue, SplatUndef; 1378 unsigned SplatBitSize; 1379 bool HasAnyUndefs; 1380 bool Is128Vec = ResTy.is128BitVector(); 1381 bool Is256Vec = ResTy.is256BitVector(); 1382 1383 if ((!Subtarget.hasExtLSX() || !Is128Vec) && 1384 (!Subtarget.hasExtLASX() || !Is256Vec)) 1385 return SDValue(); 1386 1387 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 1388 /*MinSplatBits=*/8) && 1389 SplatBitSize <= 64) { 1390 // We can only cope with 8, 16, 32, or 64-bit elements. 1391 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 1392 SplatBitSize != 64) 1393 return SDValue(); 1394 1395 EVT ViaVecTy; 1396 1397 switch (SplatBitSize) { 1398 default: 1399 return SDValue(); 1400 case 8: 1401 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; 1402 break; 1403 case 16: 1404 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; 1405 break; 1406 case 32: 1407 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; 1408 break; 1409 case 64: 1410 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; 1411 break; 1412 } 1413 1414 // SelectionDAG::getConstant will promote SplatValue appropriately. 1415 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 1416 1417 // Bitcast to the type we originally wanted. 1418 if (ViaVecTy != ResTy) 1419 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 1420 1421 return Result; 1422 } 1423 1424 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false)) 1425 return Op; 1426 1427 if (!isConstantOrUndefBUILD_VECTOR(Node)) { 1428 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 1429 // The resulting code is the same length as the expansion, but it doesn't 1430 // use memory operations. 1431 EVT ResTy = Node->getValueType(0); 1432 1433 assert(ResTy.isVector()); 1434 1435 unsigned NumElts = ResTy.getVectorNumElements(); 1436 SDValue Vector = DAG.getUNDEF(ResTy); 1437 for (unsigned i = 0; i < NumElts; ++i) { 1438 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 1439 Node->getOperand(i), 1440 DAG.getConstant(i, DL, Subtarget.getGRLenVT())); 1441 } 1442 return Vector; 1443 } 1444 1445 return SDValue(); 1446 } 1447 1448 SDValue 1449 LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 1450 SelectionDAG &DAG) const { 1451 EVT VecTy = Op->getOperand(0)->getValueType(0); 1452 SDValue Idx = Op->getOperand(1); 1453 EVT EltTy = VecTy.getVectorElementType(); 1454 unsigned NumElts = VecTy.getVectorNumElements(); 1455 1456 if (isa<ConstantSDNode>(Idx) && 1457 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || 1458 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2)) 1459 return Op; 1460 1461 return SDValue(); 1462 } 1463 1464 SDValue 1465 LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 1466 SelectionDAG &DAG) const { 1467 if (isa<ConstantSDNode>(Op->getOperand(2))) 1468 return Op; 1469 return SDValue(); 1470 } 1471 1472 SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, 1473 SelectionDAG &DAG) const { 1474 SDLoc DL(Op); 1475 SyncScope::ID FenceSSID = 1476 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); 1477 1478 // singlethread fences only synchronize with signal handlers on the same 1479 // thread and thus only need to preserve instruction order, not actually 1480 // enforce memory ordering. 1481 if (FenceSSID == SyncScope::SingleThread) 1482 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 1483 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); 1484 1485 return Op; 1486 } 1487 1488 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, 1489 SelectionDAG &DAG) const { 1490 1491 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) { 1492 DAG.getContext()->emitError( 1493 "On LA64, only 64-bit registers can be written."); 1494 return Op.getOperand(0); 1495 } 1496 1497 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) { 1498 DAG.getContext()->emitError( 1499 "On LA32, only 32-bit registers can be written."); 1500 return Op.getOperand(0); 1501 } 1502 1503 return Op; 1504 } 1505 1506 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op, 1507 SelectionDAG &DAG) const { 1508 if (!isa<ConstantSDNode>(Op.getOperand(0))) { 1509 DAG.getContext()->emitError("argument to '__builtin_frame_address' must " 1510 "be a constant integer"); 1511 return SDValue(); 1512 } 1513 1514 MachineFunction &MF = DAG.getMachineFunction(); 1515 MF.getFrameInfo().setFrameAddressIsTaken(true); 1516 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF); 1517 EVT VT = Op.getValueType(); 1518 SDLoc DL(Op); 1519 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 1520 unsigned Depth = Op.getConstantOperandVal(0); 1521 int GRLenInBytes = Subtarget.getGRLen() / 8; 1522 1523 while (Depth--) { 1524 int Offset = -(GRLenInBytes * 2); 1525 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 1526 DAG.getIntPtrConstant(Offset, DL)); 1527 FrameAddr = 1528 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 1529 } 1530 return FrameAddr; 1531 } 1532 1533 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, 1534 SelectionDAG &DAG) const { 1535 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1536 return SDValue(); 1537 1538 // Currently only support lowering return address for current frame. 1539 if (Op.getConstantOperandVal(0) != 0) { 1540 DAG.getContext()->emitError( 1541 "return address can only be determined for the current frame"); 1542 return SDValue(); 1543 } 1544 1545 MachineFunction &MF = DAG.getMachineFunction(); 1546 MF.getFrameInfo().setReturnAddressIsTaken(true); 1547 MVT GRLenVT = Subtarget.getGRLenVT(); 1548 1549 // Return the value of the return address register, marking it an implicit 1550 // live-in. 1551 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(), 1552 getRegClassFor(GRLenVT)); 1553 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT); 1554 } 1555 1556 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, 1557 SelectionDAG &DAG) const { 1558 MachineFunction &MF = DAG.getMachineFunction(); 1559 auto Size = Subtarget.getGRLen() / 8; 1560 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false); 1561 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1562 } 1563 1564 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, 1565 SelectionDAG &DAG) const { 1566 MachineFunction &MF = DAG.getMachineFunction(); 1567 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>(); 1568 1569 SDLoc DL(Op); 1570 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1571 getPointerTy(MF.getDataLayout())); 1572 1573 // vastart just stores the address of the VarArgsFrameIndex slot into the 1574 // memory location argument. 1575 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1576 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 1577 MachinePointerInfo(SV)); 1578 } 1579 1580 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, 1581 SelectionDAG &DAG) const { 1582 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && 1583 !Subtarget.hasBasicD() && "unexpected target features"); 1584 1585 SDLoc DL(Op); 1586 SDValue Op0 = Op.getOperand(0); 1587 if (Op0->getOpcode() == ISD::AND) { 1588 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); 1589 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF)) 1590 return Op; 1591 } 1592 1593 if (Op0->getOpcode() == LoongArchISD::BSTRPICK && 1594 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) && 1595 Op0.getConstantOperandVal(2) == UINT64_C(0)) 1596 return Op; 1597 1598 if (Op0.getOpcode() == ISD::AssertZext && 1599 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32)) 1600 return Op; 1601 1602 EVT OpVT = Op0.getValueType(); 1603 EVT RetVT = Op.getValueType(); 1604 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT); 1605 MakeLibCallOptions CallOptions; 1606 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); 1607 SDValue Chain = SDValue(); 1608 SDValue Result; 1609 std::tie(Result, Chain) = 1610 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); 1611 return Result; 1612 } 1613 1614 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, 1615 SelectionDAG &DAG) const { 1616 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && 1617 !Subtarget.hasBasicD() && "unexpected target features"); 1618 1619 SDLoc DL(Op); 1620 SDValue Op0 = Op.getOperand(0); 1621 1622 if ((Op0.getOpcode() == ISD::AssertSext || 1623 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) && 1624 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32)) 1625 return Op; 1626 1627 EVT OpVT = Op0.getValueType(); 1628 EVT RetVT = Op.getValueType(); 1629 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT); 1630 MakeLibCallOptions CallOptions; 1631 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); 1632 SDValue Chain = SDValue(); 1633 SDValue Result; 1634 std::tie(Result, Chain) = 1635 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); 1636 return Result; 1637 } 1638 1639 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, 1640 SelectionDAG &DAG) const { 1641 1642 SDLoc DL(Op); 1643 SDValue Op0 = Op.getOperand(0); 1644 1645 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && 1646 Subtarget.is64Bit() && Subtarget.hasBasicF()) { 1647 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 1648 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0); 1649 } 1650 return Op; 1651 } 1652 1653 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, 1654 SelectionDAG &DAG) const { 1655 1656 SDLoc DL(Op); 1657 1658 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && 1659 !Subtarget.hasBasicD()) { 1660 SDValue Dst = 1661 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0)); 1662 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst); 1663 } 1664 1665 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); 1666 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0)); 1667 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc); 1668 } 1669 1670 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 1671 SelectionDAG &DAG, unsigned Flags) { 1672 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 1673 } 1674 1675 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 1676 SelectionDAG &DAG, unsigned Flags) { 1677 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 1678 Flags); 1679 } 1680 1681 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 1682 SelectionDAG &DAG, unsigned Flags) { 1683 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 1684 N->getOffset(), Flags); 1685 } 1686 1687 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 1688 SelectionDAG &DAG, unsigned Flags) { 1689 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 1690 } 1691 1692 template <class NodeTy> 1693 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 1694 CodeModel::Model M, 1695 bool IsLocal) const { 1696 SDLoc DL(N); 1697 EVT Ty = getPointerTy(DAG.getDataLayout()); 1698 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1699 SDValue Load; 1700 1701 switch (M) { 1702 default: 1703 report_fatal_error("Unsupported code model"); 1704 1705 case CodeModel::Large: { 1706 assert(Subtarget.is64Bit() && "Large code model requires LA64"); 1707 1708 // This is not actually used, but is necessary for successfully matching 1709 // the PseudoLA_*_LARGE nodes. 1710 SDValue Tmp = DAG.getConstant(0, DL, Ty); 1711 if (IsLocal) { 1712 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that 1713 // eventually becomes the desired 5-insn code sequence. 1714 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty, 1715 Tmp, Addr), 1716 0); 1717 } else { 1718 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that 1719 // eventually becomes the desired 5-insn code sequence. 1720 Load = SDValue( 1721 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr), 1722 0); 1723 } 1724 break; 1725 } 1726 1727 case CodeModel::Small: 1728 case CodeModel::Medium: 1729 if (IsLocal) { 1730 // This generates the pattern (PseudoLA_PCREL sym), which expands to 1731 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). 1732 Load = SDValue( 1733 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0); 1734 } else { 1735 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d 1736 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). 1737 Load = 1738 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0); 1739 } 1740 } 1741 1742 if (!IsLocal) { 1743 // Mark the load instruction as invariant to enable hoisting in MachineLICM. 1744 MachineFunction &MF = DAG.getMachineFunction(); 1745 MachineMemOperand *MemOp = MF.getMachineMemOperand( 1746 MachinePointerInfo::getGOT(MF), 1747 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 1748 MachineMemOperand::MOInvariant, 1749 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 1750 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); 1751 } 1752 1753 return Load; 1754 } 1755 1756 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, 1757 SelectionDAG &DAG) const { 1758 return getAddr(cast<BlockAddressSDNode>(Op), DAG, 1759 DAG.getTarget().getCodeModel()); 1760 } 1761 1762 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op, 1763 SelectionDAG &DAG) const { 1764 return getAddr(cast<JumpTableSDNode>(Op), DAG, 1765 DAG.getTarget().getCodeModel()); 1766 } 1767 1768 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, 1769 SelectionDAG &DAG) const { 1770 return getAddr(cast<ConstantPoolSDNode>(Op), DAG, 1771 DAG.getTarget().getCodeModel()); 1772 } 1773 1774 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, 1775 SelectionDAG &DAG) const { 1776 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1777 assert(N->getOffset() == 0 && "unexpected offset in global node"); 1778 auto CM = DAG.getTarget().getCodeModel(); 1779 const GlobalValue *GV = N->getGlobal(); 1780 1781 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) { 1782 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel()) 1783 CM = *GCM; 1784 } 1785 1786 return getAddr(N, DAG, CM, GV->isDSOLocal()); 1787 } 1788 1789 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 1790 SelectionDAG &DAG, 1791 unsigned Opc, bool UseGOT, 1792 bool Large) const { 1793 SDLoc DL(N); 1794 EVT Ty = getPointerTy(DAG.getDataLayout()); 1795 MVT GRLenVT = Subtarget.getGRLenVT(); 1796 1797 // This is not actually used, but is necessary for successfully matching the 1798 // PseudoLA_*_LARGE nodes. 1799 SDValue Tmp = DAG.getConstant(0, DL, Ty); 1800 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); 1801 SDValue Offset = Large 1802 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) 1803 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 1804 if (UseGOT) { 1805 // Mark the load instruction as invariant to enable hoisting in MachineLICM. 1806 MachineFunction &MF = DAG.getMachineFunction(); 1807 MachineMemOperand *MemOp = MF.getMachineMemOperand( 1808 MachinePointerInfo::getGOT(MF), 1809 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 1810 MachineMemOperand::MOInvariant, 1811 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 1812 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp}); 1813 } 1814 1815 // Add the thread pointer. 1816 return DAG.getNode(ISD::ADD, DL, Ty, Offset, 1817 DAG.getRegister(LoongArch::R2, GRLenVT)); 1818 } 1819 1820 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 1821 SelectionDAG &DAG, 1822 unsigned Opc, 1823 bool Large) const { 1824 SDLoc DL(N); 1825 EVT Ty = getPointerTy(DAG.getDataLayout()); 1826 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 1827 1828 // This is not actually used, but is necessary for successfully matching the 1829 // PseudoLA_*_LARGE nodes. 1830 SDValue Tmp = DAG.getConstant(0, DL, Ty); 1831 1832 // Use a PC-relative addressing mode to access the dynamic GOT address. 1833 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); 1834 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) 1835 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 1836 1837 // Prepare argument list to generate call. 1838 ArgListTy Args; 1839 ArgListEntry Entry; 1840 Entry.Node = Load; 1841 Entry.Ty = CallTy; 1842 Args.push_back(Entry); 1843 1844 // Setup call to __tls_get_addr. 1845 TargetLowering::CallLoweringInfo CLI(DAG); 1846 CLI.setDebugLoc(DL) 1847 .setChain(DAG.getEntryNode()) 1848 .setLibCallee(CallingConv::C, CallTy, 1849 DAG.getExternalSymbol("__tls_get_addr", Ty), 1850 std::move(Args)); 1851 1852 return LowerCallTo(CLI).first; 1853 } 1854 1855 SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N, 1856 SelectionDAG &DAG, unsigned Opc, 1857 bool Large) const { 1858 SDLoc DL(N); 1859 EVT Ty = getPointerTy(DAG.getDataLayout()); 1860 const GlobalValue *GV = N->getGlobal(); 1861 1862 // This is not actually used, but is necessary for successfully matching the 1863 // PseudoLA_*_LARGE nodes. 1864 SDValue Tmp = DAG.getConstant(0, DL, Ty); 1865 1866 // Use a PC-relative addressing mode to access the global dynamic GOT address. 1867 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym). 1868 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1869 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) 1870 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 1871 } 1872 1873 SDValue 1874 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, 1875 SelectionDAG &DAG) const { 1876 if (DAG.getMachineFunction().getFunction().getCallingConv() == 1877 CallingConv::GHC) 1878 report_fatal_error("In GHC calling convention TLS is not supported"); 1879 1880 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large; 1881 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64"); 1882 1883 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1884 assert(N->getOffset() == 0 && "unexpected offset in global node"); 1885 1886 if (DAG.getTarget().useEmulatedTLS()) 1887 report_fatal_error("the emulated TLS is prohibited", 1888 /*GenCrashDiag=*/false); 1889 1890 bool IsDesc = DAG.getTarget().useTLSDESC(); 1891 1892 switch (getTargetMachine().getTLSModel(N->getGlobal())) { 1893 case TLSModel::GeneralDynamic: 1894 // In this model, application code calls the dynamic linker function 1895 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at 1896 // runtime. 1897 if (!IsDesc) 1898 return getDynamicTLSAddr(N, DAG, 1899 Large ? LoongArch::PseudoLA_TLS_GD_LARGE 1900 : LoongArch::PseudoLA_TLS_GD, 1901 Large); 1902 break; 1903 case TLSModel::LocalDynamic: 1904 // Same as GeneralDynamic, except for assembly modifiers and relocation 1905 // records. 1906 if (!IsDesc) 1907 return getDynamicTLSAddr(N, DAG, 1908 Large ? LoongArch::PseudoLA_TLS_LD_LARGE 1909 : LoongArch::PseudoLA_TLS_LD, 1910 Large); 1911 break; 1912 case TLSModel::InitialExec: 1913 // This model uses the GOT to resolve TLS offsets. 1914 return getStaticTLSAddr(N, DAG, 1915 Large ? LoongArch::PseudoLA_TLS_IE_LARGE 1916 : LoongArch::PseudoLA_TLS_IE, 1917 /*UseGOT=*/true, Large); 1918 case TLSModel::LocalExec: 1919 // This model is used when static linking as the TLS offsets are resolved 1920 // during program linking. 1921 // 1922 // This node doesn't need an extra argument for the large code model. 1923 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE, 1924 /*UseGOT=*/false); 1925 } 1926 1927 return getTLSDescAddr(N, DAG, 1928 Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE 1929 : LoongArch::PseudoLA_TLS_DESC_PC, 1930 Large); 1931 } 1932 1933 template <unsigned N> 1934 static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, 1935 SelectionDAG &DAG, bool IsSigned = false) { 1936 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp)); 1937 // Check the ImmArg. 1938 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || 1939 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { 1940 DAG.getContext()->emitError(Op->getOperationName(0) + 1941 ": argument out of range."); 1942 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType()); 1943 } 1944 return SDValue(); 1945 } 1946 1947 SDValue 1948 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1949 SelectionDAG &DAG) const { 1950 SDLoc DL(Op); 1951 switch (Op.getConstantOperandVal(0)) { 1952 default: 1953 return SDValue(); // Don't custom lower most intrinsics. 1954 case Intrinsic::thread_pointer: { 1955 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1956 return DAG.getRegister(LoongArch::R2, PtrVT); 1957 } 1958 case Intrinsic::loongarch_lsx_vpickve2gr_d: 1959 case Intrinsic::loongarch_lsx_vpickve2gr_du: 1960 case Intrinsic::loongarch_lsx_vreplvei_d: 1961 case Intrinsic::loongarch_lasx_xvrepl128vei_d: 1962 return checkIntrinsicImmArg<1>(Op, 2, DAG); 1963 case Intrinsic::loongarch_lsx_vreplvei_w: 1964 case Intrinsic::loongarch_lasx_xvrepl128vei_w: 1965 case Intrinsic::loongarch_lasx_xvpickve2gr_d: 1966 case Intrinsic::loongarch_lasx_xvpickve2gr_du: 1967 case Intrinsic::loongarch_lasx_xvpickve_d: 1968 case Intrinsic::loongarch_lasx_xvpickve_d_f: 1969 return checkIntrinsicImmArg<2>(Op, 2, DAG); 1970 case Intrinsic::loongarch_lasx_xvinsve0_d: 1971 return checkIntrinsicImmArg<2>(Op, 3, DAG); 1972 case Intrinsic::loongarch_lsx_vsat_b: 1973 case Intrinsic::loongarch_lsx_vsat_bu: 1974 case Intrinsic::loongarch_lsx_vrotri_b: 1975 case Intrinsic::loongarch_lsx_vsllwil_h_b: 1976 case Intrinsic::loongarch_lsx_vsllwil_hu_bu: 1977 case Intrinsic::loongarch_lsx_vsrlri_b: 1978 case Intrinsic::loongarch_lsx_vsrari_b: 1979 case Intrinsic::loongarch_lsx_vreplvei_h: 1980 case Intrinsic::loongarch_lasx_xvsat_b: 1981 case Intrinsic::loongarch_lasx_xvsat_bu: 1982 case Intrinsic::loongarch_lasx_xvrotri_b: 1983 case Intrinsic::loongarch_lasx_xvsllwil_h_b: 1984 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: 1985 case Intrinsic::loongarch_lasx_xvsrlri_b: 1986 case Intrinsic::loongarch_lasx_xvsrari_b: 1987 case Intrinsic::loongarch_lasx_xvrepl128vei_h: 1988 case Intrinsic::loongarch_lasx_xvpickve_w: 1989 case Intrinsic::loongarch_lasx_xvpickve_w_f: 1990 return checkIntrinsicImmArg<3>(Op, 2, DAG); 1991 case Intrinsic::loongarch_lasx_xvinsve0_w: 1992 return checkIntrinsicImmArg<3>(Op, 3, DAG); 1993 case Intrinsic::loongarch_lsx_vsat_h: 1994 case Intrinsic::loongarch_lsx_vsat_hu: 1995 case Intrinsic::loongarch_lsx_vrotri_h: 1996 case Intrinsic::loongarch_lsx_vsllwil_w_h: 1997 case Intrinsic::loongarch_lsx_vsllwil_wu_hu: 1998 case Intrinsic::loongarch_lsx_vsrlri_h: 1999 case Intrinsic::loongarch_lsx_vsrari_h: 2000 case Intrinsic::loongarch_lsx_vreplvei_b: 2001 case Intrinsic::loongarch_lasx_xvsat_h: 2002 case Intrinsic::loongarch_lasx_xvsat_hu: 2003 case Intrinsic::loongarch_lasx_xvrotri_h: 2004 case Intrinsic::loongarch_lasx_xvsllwil_w_h: 2005 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: 2006 case Intrinsic::loongarch_lasx_xvsrlri_h: 2007 case Intrinsic::loongarch_lasx_xvsrari_h: 2008 case Intrinsic::loongarch_lasx_xvrepl128vei_b: 2009 return checkIntrinsicImmArg<4>(Op, 2, DAG); 2010 case Intrinsic::loongarch_lsx_vsrlni_b_h: 2011 case Intrinsic::loongarch_lsx_vsrani_b_h: 2012 case Intrinsic::loongarch_lsx_vsrlrni_b_h: 2013 case Intrinsic::loongarch_lsx_vsrarni_b_h: 2014 case Intrinsic::loongarch_lsx_vssrlni_b_h: 2015 case Intrinsic::loongarch_lsx_vssrani_b_h: 2016 case Intrinsic::loongarch_lsx_vssrlni_bu_h: 2017 case Intrinsic::loongarch_lsx_vssrani_bu_h: 2018 case Intrinsic::loongarch_lsx_vssrlrni_b_h: 2019 case Intrinsic::loongarch_lsx_vssrarni_b_h: 2020 case Intrinsic::loongarch_lsx_vssrlrni_bu_h: 2021 case Intrinsic::loongarch_lsx_vssrarni_bu_h: 2022 case Intrinsic::loongarch_lasx_xvsrlni_b_h: 2023 case Intrinsic::loongarch_lasx_xvsrani_b_h: 2024 case Intrinsic::loongarch_lasx_xvsrlrni_b_h: 2025 case Intrinsic::loongarch_lasx_xvsrarni_b_h: 2026 case Intrinsic::loongarch_lasx_xvssrlni_b_h: 2027 case Intrinsic::loongarch_lasx_xvssrani_b_h: 2028 case Intrinsic::loongarch_lasx_xvssrlni_bu_h: 2029 case Intrinsic::loongarch_lasx_xvssrani_bu_h: 2030 case Intrinsic::loongarch_lasx_xvssrlrni_b_h: 2031 case Intrinsic::loongarch_lasx_xvssrarni_b_h: 2032 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: 2033 case Intrinsic::loongarch_lasx_xvssrarni_bu_h: 2034 return checkIntrinsicImmArg<4>(Op, 3, DAG); 2035 case Intrinsic::loongarch_lsx_vsat_w: 2036 case Intrinsic::loongarch_lsx_vsat_wu: 2037 case Intrinsic::loongarch_lsx_vrotri_w: 2038 case Intrinsic::loongarch_lsx_vsllwil_d_w: 2039 case Intrinsic::loongarch_lsx_vsllwil_du_wu: 2040 case Intrinsic::loongarch_lsx_vsrlri_w: 2041 case Intrinsic::loongarch_lsx_vsrari_w: 2042 case Intrinsic::loongarch_lsx_vslei_bu: 2043 case Intrinsic::loongarch_lsx_vslei_hu: 2044 case Intrinsic::loongarch_lsx_vslei_wu: 2045 case Intrinsic::loongarch_lsx_vslei_du: 2046 case Intrinsic::loongarch_lsx_vslti_bu: 2047 case Intrinsic::loongarch_lsx_vslti_hu: 2048 case Intrinsic::loongarch_lsx_vslti_wu: 2049 case Intrinsic::loongarch_lsx_vslti_du: 2050 case Intrinsic::loongarch_lsx_vbsll_v: 2051 case Intrinsic::loongarch_lsx_vbsrl_v: 2052 case Intrinsic::loongarch_lasx_xvsat_w: 2053 case Intrinsic::loongarch_lasx_xvsat_wu: 2054 case Intrinsic::loongarch_lasx_xvrotri_w: 2055 case Intrinsic::loongarch_lasx_xvsllwil_d_w: 2056 case Intrinsic::loongarch_lasx_xvsllwil_du_wu: 2057 case Intrinsic::loongarch_lasx_xvsrlri_w: 2058 case Intrinsic::loongarch_lasx_xvsrari_w: 2059 case Intrinsic::loongarch_lasx_xvslei_bu: 2060 case Intrinsic::loongarch_lasx_xvslei_hu: 2061 case Intrinsic::loongarch_lasx_xvslei_wu: 2062 case Intrinsic::loongarch_lasx_xvslei_du: 2063 case Intrinsic::loongarch_lasx_xvslti_bu: 2064 case Intrinsic::loongarch_lasx_xvslti_hu: 2065 case Intrinsic::loongarch_lasx_xvslti_wu: 2066 case Intrinsic::loongarch_lasx_xvslti_du: 2067 case Intrinsic::loongarch_lasx_xvbsll_v: 2068 case Intrinsic::loongarch_lasx_xvbsrl_v: 2069 return checkIntrinsicImmArg<5>(Op, 2, DAG); 2070 case Intrinsic::loongarch_lsx_vseqi_b: 2071 case Intrinsic::loongarch_lsx_vseqi_h: 2072 case Intrinsic::loongarch_lsx_vseqi_w: 2073 case Intrinsic::loongarch_lsx_vseqi_d: 2074 case Intrinsic::loongarch_lsx_vslei_b: 2075 case Intrinsic::loongarch_lsx_vslei_h: 2076 case Intrinsic::loongarch_lsx_vslei_w: 2077 case Intrinsic::loongarch_lsx_vslei_d: 2078 case Intrinsic::loongarch_lsx_vslti_b: 2079 case Intrinsic::loongarch_lsx_vslti_h: 2080 case Intrinsic::loongarch_lsx_vslti_w: 2081 case Intrinsic::loongarch_lsx_vslti_d: 2082 case Intrinsic::loongarch_lasx_xvseqi_b: 2083 case Intrinsic::loongarch_lasx_xvseqi_h: 2084 case Intrinsic::loongarch_lasx_xvseqi_w: 2085 case Intrinsic::loongarch_lasx_xvseqi_d: 2086 case Intrinsic::loongarch_lasx_xvslei_b: 2087 case Intrinsic::loongarch_lasx_xvslei_h: 2088 case Intrinsic::loongarch_lasx_xvslei_w: 2089 case Intrinsic::loongarch_lasx_xvslei_d: 2090 case Intrinsic::loongarch_lasx_xvslti_b: 2091 case Intrinsic::loongarch_lasx_xvslti_h: 2092 case Intrinsic::loongarch_lasx_xvslti_w: 2093 case Intrinsic::loongarch_lasx_xvslti_d: 2094 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); 2095 case Intrinsic::loongarch_lsx_vsrlni_h_w: 2096 case Intrinsic::loongarch_lsx_vsrani_h_w: 2097 case Intrinsic::loongarch_lsx_vsrlrni_h_w: 2098 case Intrinsic::loongarch_lsx_vsrarni_h_w: 2099 case Intrinsic::loongarch_lsx_vssrlni_h_w: 2100 case Intrinsic::loongarch_lsx_vssrani_h_w: 2101 case Intrinsic::loongarch_lsx_vssrlni_hu_w: 2102 case Intrinsic::loongarch_lsx_vssrani_hu_w: 2103 case Intrinsic::loongarch_lsx_vssrlrni_h_w: 2104 case Intrinsic::loongarch_lsx_vssrarni_h_w: 2105 case Intrinsic::loongarch_lsx_vssrlrni_hu_w: 2106 case Intrinsic::loongarch_lsx_vssrarni_hu_w: 2107 case Intrinsic::loongarch_lsx_vfrstpi_b: 2108 case Intrinsic::loongarch_lsx_vfrstpi_h: 2109 case Intrinsic::loongarch_lasx_xvsrlni_h_w: 2110 case Intrinsic::loongarch_lasx_xvsrani_h_w: 2111 case Intrinsic::loongarch_lasx_xvsrlrni_h_w: 2112 case Intrinsic::loongarch_lasx_xvsrarni_h_w: 2113 case Intrinsic::loongarch_lasx_xvssrlni_h_w: 2114 case Intrinsic::loongarch_lasx_xvssrani_h_w: 2115 case Intrinsic::loongarch_lasx_xvssrlni_hu_w: 2116 case Intrinsic::loongarch_lasx_xvssrani_hu_w: 2117 case Intrinsic::loongarch_lasx_xvssrlrni_h_w: 2118 case Intrinsic::loongarch_lasx_xvssrarni_h_w: 2119 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: 2120 case Intrinsic::loongarch_lasx_xvssrarni_hu_w: 2121 case Intrinsic::loongarch_lasx_xvfrstpi_b: 2122 case Intrinsic::loongarch_lasx_xvfrstpi_h: 2123 return checkIntrinsicImmArg<5>(Op, 3, DAG); 2124 case Intrinsic::loongarch_lsx_vsat_d: 2125 case Intrinsic::loongarch_lsx_vsat_du: 2126 case Intrinsic::loongarch_lsx_vrotri_d: 2127 case Intrinsic::loongarch_lsx_vsrlri_d: 2128 case Intrinsic::loongarch_lsx_vsrari_d: 2129 case Intrinsic::loongarch_lasx_xvsat_d: 2130 case Intrinsic::loongarch_lasx_xvsat_du: 2131 case Intrinsic::loongarch_lasx_xvrotri_d: 2132 case Intrinsic::loongarch_lasx_xvsrlri_d: 2133 case Intrinsic::loongarch_lasx_xvsrari_d: 2134 return checkIntrinsicImmArg<6>(Op, 2, DAG); 2135 case Intrinsic::loongarch_lsx_vsrlni_w_d: 2136 case Intrinsic::loongarch_lsx_vsrani_w_d: 2137 case Intrinsic::loongarch_lsx_vsrlrni_w_d: 2138 case Intrinsic::loongarch_lsx_vsrarni_w_d: 2139 case Intrinsic::loongarch_lsx_vssrlni_w_d: 2140 case Intrinsic::loongarch_lsx_vssrani_w_d: 2141 case Intrinsic::loongarch_lsx_vssrlni_wu_d: 2142 case Intrinsic::loongarch_lsx_vssrani_wu_d: 2143 case Intrinsic::loongarch_lsx_vssrlrni_w_d: 2144 case Intrinsic::loongarch_lsx_vssrarni_w_d: 2145 case Intrinsic::loongarch_lsx_vssrlrni_wu_d: 2146 case Intrinsic::loongarch_lsx_vssrarni_wu_d: 2147 case Intrinsic::loongarch_lasx_xvsrlni_w_d: 2148 case Intrinsic::loongarch_lasx_xvsrani_w_d: 2149 case Intrinsic::loongarch_lasx_xvsrlrni_w_d: 2150 case Intrinsic::loongarch_lasx_xvsrarni_w_d: 2151 case Intrinsic::loongarch_lasx_xvssrlni_w_d: 2152 case Intrinsic::loongarch_lasx_xvssrani_w_d: 2153 case Intrinsic::loongarch_lasx_xvssrlni_wu_d: 2154 case Intrinsic::loongarch_lasx_xvssrani_wu_d: 2155 case Intrinsic::loongarch_lasx_xvssrlrni_w_d: 2156 case Intrinsic::loongarch_lasx_xvssrarni_w_d: 2157 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: 2158 case Intrinsic::loongarch_lasx_xvssrarni_wu_d: 2159 return checkIntrinsicImmArg<6>(Op, 3, DAG); 2160 case Intrinsic::loongarch_lsx_vsrlni_d_q: 2161 case Intrinsic::loongarch_lsx_vsrani_d_q: 2162 case Intrinsic::loongarch_lsx_vsrlrni_d_q: 2163 case Intrinsic::loongarch_lsx_vsrarni_d_q: 2164 case Intrinsic::loongarch_lsx_vssrlni_d_q: 2165 case Intrinsic::loongarch_lsx_vssrani_d_q: 2166 case Intrinsic::loongarch_lsx_vssrlni_du_q: 2167 case Intrinsic::loongarch_lsx_vssrani_du_q: 2168 case Intrinsic::loongarch_lsx_vssrlrni_d_q: 2169 case Intrinsic::loongarch_lsx_vssrarni_d_q: 2170 case Intrinsic::loongarch_lsx_vssrlrni_du_q: 2171 case Intrinsic::loongarch_lsx_vssrarni_du_q: 2172 case Intrinsic::loongarch_lasx_xvsrlni_d_q: 2173 case Intrinsic::loongarch_lasx_xvsrani_d_q: 2174 case Intrinsic::loongarch_lasx_xvsrlrni_d_q: 2175 case Intrinsic::loongarch_lasx_xvsrarni_d_q: 2176 case Intrinsic::loongarch_lasx_xvssrlni_d_q: 2177 case Intrinsic::loongarch_lasx_xvssrani_d_q: 2178 case Intrinsic::loongarch_lasx_xvssrlni_du_q: 2179 case Intrinsic::loongarch_lasx_xvssrani_du_q: 2180 case Intrinsic::loongarch_lasx_xvssrlrni_d_q: 2181 case Intrinsic::loongarch_lasx_xvssrarni_d_q: 2182 case Intrinsic::loongarch_lasx_xvssrlrni_du_q: 2183 case Intrinsic::loongarch_lasx_xvssrarni_du_q: 2184 return checkIntrinsicImmArg<7>(Op, 3, DAG); 2185 case Intrinsic::loongarch_lsx_vnori_b: 2186 case Intrinsic::loongarch_lsx_vshuf4i_b: 2187 case Intrinsic::loongarch_lsx_vshuf4i_h: 2188 case Intrinsic::loongarch_lsx_vshuf4i_w: 2189 case Intrinsic::loongarch_lasx_xvnori_b: 2190 case Intrinsic::loongarch_lasx_xvshuf4i_b: 2191 case Intrinsic::loongarch_lasx_xvshuf4i_h: 2192 case Intrinsic::loongarch_lasx_xvshuf4i_w: 2193 case Intrinsic::loongarch_lasx_xvpermi_d: 2194 return checkIntrinsicImmArg<8>(Op, 2, DAG); 2195 case Intrinsic::loongarch_lsx_vshuf4i_d: 2196 case Intrinsic::loongarch_lsx_vpermi_w: 2197 case Intrinsic::loongarch_lsx_vbitseli_b: 2198 case Intrinsic::loongarch_lsx_vextrins_b: 2199 case Intrinsic::loongarch_lsx_vextrins_h: 2200 case Intrinsic::loongarch_lsx_vextrins_w: 2201 case Intrinsic::loongarch_lsx_vextrins_d: 2202 case Intrinsic::loongarch_lasx_xvshuf4i_d: 2203 case Intrinsic::loongarch_lasx_xvpermi_w: 2204 case Intrinsic::loongarch_lasx_xvpermi_q: 2205 case Intrinsic::loongarch_lasx_xvbitseli_b: 2206 case Intrinsic::loongarch_lasx_xvextrins_b: 2207 case Intrinsic::loongarch_lasx_xvextrins_h: 2208 case Intrinsic::loongarch_lasx_xvextrins_w: 2209 case Intrinsic::loongarch_lasx_xvextrins_d: 2210 return checkIntrinsicImmArg<8>(Op, 3, DAG); 2211 case Intrinsic::loongarch_lsx_vrepli_b: 2212 case Intrinsic::loongarch_lsx_vrepli_h: 2213 case Intrinsic::loongarch_lsx_vrepli_w: 2214 case Intrinsic::loongarch_lsx_vrepli_d: 2215 case Intrinsic::loongarch_lasx_xvrepli_b: 2216 case Intrinsic::loongarch_lasx_xvrepli_h: 2217 case Intrinsic::loongarch_lasx_xvrepli_w: 2218 case Intrinsic::loongarch_lasx_xvrepli_d: 2219 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); 2220 case Intrinsic::loongarch_lsx_vldi: 2221 case Intrinsic::loongarch_lasx_xvldi: 2222 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); 2223 } 2224 } 2225 2226 // Helper function that emits error message for intrinsics with chain and return 2227 // merge values of a UNDEF and the chain. 2228 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, 2229 StringRef ErrorMsg, 2230 SelectionDAG &DAG) { 2231 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); 2232 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, 2233 SDLoc(Op)); 2234 } 2235 2236 SDValue 2237 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2238 SelectionDAG &DAG) const { 2239 SDLoc DL(Op); 2240 MVT GRLenVT = Subtarget.getGRLenVT(); 2241 EVT VT = Op.getValueType(); 2242 SDValue Chain = Op.getOperand(0); 2243 const StringRef ErrorMsgOOR = "argument out of range"; 2244 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 2245 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 2246 2247 switch (Op.getConstantOperandVal(1)) { 2248 default: 2249 return Op; 2250 case Intrinsic::loongarch_crc_w_b_w: 2251 case Intrinsic::loongarch_crc_w_h_w: 2252 case Intrinsic::loongarch_crc_w_w_w: 2253 case Intrinsic::loongarch_crc_w_d_w: 2254 case Intrinsic::loongarch_crcc_w_b_w: 2255 case Intrinsic::loongarch_crcc_w_h_w: 2256 case Intrinsic::loongarch_crcc_w_w_w: 2257 case Intrinsic::loongarch_crcc_w_d_w: 2258 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG); 2259 case Intrinsic::loongarch_csrrd_w: 2260 case Intrinsic::loongarch_csrrd_d: { 2261 unsigned Imm = Op.getConstantOperandVal(2); 2262 return !isUInt<14>(Imm) 2263 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 2264 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, 2265 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 2266 } 2267 case Intrinsic::loongarch_csrwr_w: 2268 case Intrinsic::loongarch_csrwr_d: { 2269 unsigned Imm = Op.getConstantOperandVal(3); 2270 return !isUInt<14>(Imm) 2271 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 2272 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, 2273 {Chain, Op.getOperand(2), 2274 DAG.getConstant(Imm, DL, GRLenVT)}); 2275 } 2276 case Intrinsic::loongarch_csrxchg_w: 2277 case Intrinsic::loongarch_csrxchg_d: { 2278 unsigned Imm = Op.getConstantOperandVal(4); 2279 return !isUInt<14>(Imm) 2280 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 2281 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, 2282 {Chain, Op.getOperand(2), Op.getOperand(3), 2283 DAG.getConstant(Imm, DL, GRLenVT)}); 2284 } 2285 case Intrinsic::loongarch_iocsrrd_d: { 2286 return DAG.getNode( 2287 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other}, 2288 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))}); 2289 } 2290 #define IOCSRRD_CASE(NAME, NODE) \ 2291 case Intrinsic::loongarch_##NAME: { \ 2292 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \ 2293 {Chain, Op.getOperand(2)}); \ 2294 } 2295 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); 2296 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); 2297 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); 2298 #undef IOCSRRD_CASE 2299 case Intrinsic::loongarch_cpucfg: { 2300 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, 2301 {Chain, Op.getOperand(2)}); 2302 } 2303 case Intrinsic::loongarch_lddir_d: { 2304 unsigned Imm = Op.getConstantOperandVal(3); 2305 return !isUInt<8>(Imm) 2306 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 2307 : Op; 2308 } 2309 case Intrinsic::loongarch_movfcsr2gr: { 2310 if (!Subtarget.hasBasicF()) 2311 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG); 2312 unsigned Imm = Op.getConstantOperandVal(2); 2313 return !isUInt<2>(Imm) 2314 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 2315 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, 2316 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 2317 } 2318 case Intrinsic::loongarch_lsx_vld: 2319 case Intrinsic::loongarch_lsx_vldrepl_b: 2320 case Intrinsic::loongarch_lasx_xvld: 2321 case Intrinsic::loongarch_lasx_xvldrepl_b: 2322 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 2323 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 2324 : SDValue(); 2325 case Intrinsic::loongarch_lsx_vldrepl_h: 2326 case Intrinsic::loongarch_lasx_xvldrepl_h: 2327 return !isShiftedInt<11, 1>( 2328 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 2329 ? emitIntrinsicWithChainErrorMessage( 2330 Op, "argument out of range or not a multiple of 2", DAG) 2331 : SDValue(); 2332 case Intrinsic::loongarch_lsx_vldrepl_w: 2333 case Intrinsic::loongarch_lasx_xvldrepl_w: 2334 return !isShiftedInt<10, 2>( 2335 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 2336 ? emitIntrinsicWithChainErrorMessage( 2337 Op, "argument out of range or not a multiple of 4", DAG) 2338 : SDValue(); 2339 case Intrinsic::loongarch_lsx_vldrepl_d: 2340 case Intrinsic::loongarch_lasx_xvldrepl_d: 2341 return !isShiftedInt<9, 3>( 2342 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 2343 ? emitIntrinsicWithChainErrorMessage( 2344 Op, "argument out of range or not a multiple of 8", DAG) 2345 : SDValue(); 2346 } 2347 } 2348 2349 // Helper function that emits error message for intrinsics with void return 2350 // value and return the chain. 2351 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, 2352 SelectionDAG &DAG) { 2353 2354 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); 2355 return Op.getOperand(0); 2356 } 2357 2358 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2359 SelectionDAG &DAG) const { 2360 SDLoc DL(Op); 2361 MVT GRLenVT = Subtarget.getGRLenVT(); 2362 SDValue Chain = Op.getOperand(0); 2363 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1); 2364 SDValue Op2 = Op.getOperand(2); 2365 const StringRef ErrorMsgOOR = "argument out of range"; 2366 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 2367 const StringRef ErrorMsgReqLA32 = "requires loongarch32"; 2368 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 2369 2370 switch (IntrinsicEnum) { 2371 default: 2372 // TODO: Add more Intrinsics. 2373 return SDValue(); 2374 case Intrinsic::loongarch_cacop_d: 2375 case Intrinsic::loongarch_cacop_w: { 2376 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) 2377 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG); 2378 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) 2379 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG); 2380 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) 2381 unsigned Imm1 = Op2->getAsZExtVal(); 2382 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue(); 2383 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2)) 2384 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); 2385 return Op; 2386 } 2387 case Intrinsic::loongarch_dbar: { 2388 unsigned Imm = Op2->getAsZExtVal(); 2389 return !isUInt<15>(Imm) 2390 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2391 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain, 2392 DAG.getConstant(Imm, DL, GRLenVT)); 2393 } 2394 case Intrinsic::loongarch_ibar: { 2395 unsigned Imm = Op2->getAsZExtVal(); 2396 return !isUInt<15>(Imm) 2397 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2398 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain, 2399 DAG.getConstant(Imm, DL, GRLenVT)); 2400 } 2401 case Intrinsic::loongarch_break: { 2402 unsigned Imm = Op2->getAsZExtVal(); 2403 return !isUInt<15>(Imm) 2404 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2405 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain, 2406 DAG.getConstant(Imm, DL, GRLenVT)); 2407 } 2408 case Intrinsic::loongarch_movgr2fcsr: { 2409 if (!Subtarget.hasBasicF()) 2410 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG); 2411 unsigned Imm = Op2->getAsZExtVal(); 2412 return !isUInt<2>(Imm) 2413 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2414 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain, 2415 DAG.getConstant(Imm, DL, GRLenVT), 2416 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, 2417 Op.getOperand(3))); 2418 } 2419 case Intrinsic::loongarch_syscall: { 2420 unsigned Imm = Op2->getAsZExtVal(); 2421 return !isUInt<15>(Imm) 2422 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2423 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain, 2424 DAG.getConstant(Imm, DL, GRLenVT)); 2425 } 2426 #define IOCSRWR_CASE(NAME, NODE) \ 2427 case Intrinsic::loongarch_##NAME: { \ 2428 SDValue Op3 = Op.getOperand(3); \ 2429 return Subtarget.is64Bit() \ 2430 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \ 2431 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ 2432 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \ 2433 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \ 2434 Op3); \ 2435 } 2436 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B); 2437 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H); 2438 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W); 2439 #undef IOCSRWR_CASE 2440 case Intrinsic::loongarch_iocsrwr_d: { 2441 return !Subtarget.is64Bit() 2442 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) 2443 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain, 2444 Op2, 2445 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 2446 Op.getOperand(3))); 2447 } 2448 #define ASRT_LE_GT_CASE(NAME) \ 2449 case Intrinsic::loongarch_##NAME: { \ 2450 return !Subtarget.is64Bit() \ 2451 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \ 2452 : Op; \ 2453 } 2454 ASRT_LE_GT_CASE(asrtle_d) 2455 ASRT_LE_GT_CASE(asrtgt_d) 2456 #undef ASRT_LE_GT_CASE 2457 case Intrinsic::loongarch_ldpte_d: { 2458 unsigned Imm = Op.getConstantOperandVal(3); 2459 return !Subtarget.is64Bit() 2460 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) 2461 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2462 : Op; 2463 } 2464 case Intrinsic::loongarch_lsx_vst: 2465 case Intrinsic::loongarch_lasx_xvst: 2466 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) 2467 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2468 : SDValue(); 2469 case Intrinsic::loongarch_lasx_xvstelm_b: 2470 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2471 !isUInt<5>(Op.getConstantOperandVal(5))) 2472 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2473 : SDValue(); 2474 case Intrinsic::loongarch_lsx_vstelm_b: 2475 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2476 !isUInt<4>(Op.getConstantOperandVal(5))) 2477 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2478 : SDValue(); 2479 case Intrinsic::loongarch_lasx_xvstelm_h: 2480 return (!isShiftedInt<8, 1>( 2481 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2482 !isUInt<4>(Op.getConstantOperandVal(5))) 2483 ? emitIntrinsicErrorMessage( 2484 Op, "argument out of range or not a multiple of 2", DAG) 2485 : SDValue(); 2486 case Intrinsic::loongarch_lsx_vstelm_h: 2487 return (!isShiftedInt<8, 1>( 2488 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2489 !isUInt<3>(Op.getConstantOperandVal(5))) 2490 ? emitIntrinsicErrorMessage( 2491 Op, "argument out of range or not a multiple of 2", DAG) 2492 : SDValue(); 2493 case Intrinsic::loongarch_lasx_xvstelm_w: 2494 return (!isShiftedInt<8, 2>( 2495 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2496 !isUInt<3>(Op.getConstantOperandVal(5))) 2497 ? emitIntrinsicErrorMessage( 2498 Op, "argument out of range or not a multiple of 4", DAG) 2499 : SDValue(); 2500 case Intrinsic::loongarch_lsx_vstelm_w: 2501 return (!isShiftedInt<8, 2>( 2502 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2503 !isUInt<2>(Op.getConstantOperandVal(5))) 2504 ? emitIntrinsicErrorMessage( 2505 Op, "argument out of range or not a multiple of 4", DAG) 2506 : SDValue(); 2507 case Intrinsic::loongarch_lasx_xvstelm_d: 2508 return (!isShiftedInt<8, 3>( 2509 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2510 !isUInt<2>(Op.getConstantOperandVal(5))) 2511 ? emitIntrinsicErrorMessage( 2512 Op, "argument out of range or not a multiple of 8", DAG) 2513 : SDValue(); 2514 case Intrinsic::loongarch_lsx_vstelm_d: 2515 return (!isShiftedInt<8, 3>( 2516 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2517 !isUInt<1>(Op.getConstantOperandVal(5))) 2518 ? emitIntrinsicErrorMessage( 2519 Op, "argument out of range or not a multiple of 8", DAG) 2520 : SDValue(); 2521 } 2522 } 2523 2524 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, 2525 SelectionDAG &DAG) const { 2526 SDLoc DL(Op); 2527 SDValue Lo = Op.getOperand(0); 2528 SDValue Hi = Op.getOperand(1); 2529 SDValue Shamt = Op.getOperand(2); 2530 EVT VT = Lo.getValueType(); 2531 2532 // if Shamt-GRLen < 0: // Shamt < GRLen 2533 // Lo = Lo << Shamt 2534 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt)) 2535 // else: 2536 // Lo = 0 2537 // Hi = Lo << (Shamt-GRLen) 2538 2539 SDValue Zero = DAG.getConstant(0, DL, VT); 2540 SDValue One = DAG.getConstant(1, DL, VT); 2541 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); 2542 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); 2543 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); 2544 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); 2545 2546 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 2547 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 2548 SDValue ShiftRightLo = 2549 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt); 2550 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 2551 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 2552 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen); 2553 2554 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); 2555 2556 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 2557 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2558 2559 SDValue Parts[2] = {Lo, Hi}; 2560 return DAG.getMergeValues(Parts, DL); 2561 } 2562 2563 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, 2564 SelectionDAG &DAG, 2565 bool IsSRA) const { 2566 SDLoc DL(Op); 2567 SDValue Lo = Op.getOperand(0); 2568 SDValue Hi = Op.getOperand(1); 2569 SDValue Shamt = Op.getOperand(2); 2570 EVT VT = Lo.getValueType(); 2571 2572 // SRA expansion: 2573 // if Shamt-GRLen < 0: // Shamt < GRLen 2574 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) 2575 // Hi = Hi >>s Shamt 2576 // else: 2577 // Lo = Hi >>s (Shamt-GRLen); 2578 // Hi = Hi >>s (GRLen-1) 2579 // 2580 // SRL expansion: 2581 // if Shamt-GRLen < 0: // Shamt < GRLen 2582 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) 2583 // Hi = Hi >>u Shamt 2584 // else: 2585 // Lo = Hi >>u (Shamt-GRLen); 2586 // Hi = 0; 2587 2588 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 2589 2590 SDValue Zero = DAG.getConstant(0, DL, VT); 2591 SDValue One = DAG.getConstant(1, DL, VT); 2592 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); 2593 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); 2594 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); 2595 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); 2596 2597 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 2598 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 2599 SDValue ShiftLeftHi = 2600 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt); 2601 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 2602 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 2603 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen); 2604 SDValue HiFalse = 2605 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero; 2606 2607 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); 2608 2609 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 2610 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2611 2612 SDValue Parts[2] = {Lo, Hi}; 2613 return DAG.getMergeValues(Parts, DL); 2614 } 2615 2616 // Returns the opcode of the target-specific SDNode that implements the 32-bit 2617 // form of the given Opcode. 2618 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { 2619 switch (Opcode) { 2620 default: 2621 llvm_unreachable("Unexpected opcode"); 2622 case ISD::UDIV: 2623 return LoongArchISD::DIV_WU; 2624 case ISD::UREM: 2625 return LoongArchISD::MOD_WU; 2626 case ISD::SHL: 2627 return LoongArchISD::SLL_W; 2628 case ISD::SRA: 2629 return LoongArchISD::SRA_W; 2630 case ISD::SRL: 2631 return LoongArchISD::SRL_W; 2632 case ISD::ROTL: 2633 case ISD::ROTR: 2634 return LoongArchISD::ROTR_W; 2635 case ISD::CTTZ: 2636 return LoongArchISD::CTZ_W; 2637 case ISD::CTLZ: 2638 return LoongArchISD::CLZ_W; 2639 } 2640 } 2641 2642 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG 2643 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would 2644 // otherwise be promoted to i64, making it difficult to select the 2645 // SLL_W/.../*W later one because the fact the operation was originally of 2646 // type i8/i16/i32 is lost. 2647 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, 2648 unsigned ExtOpc = ISD::ANY_EXTEND) { 2649 SDLoc DL(N); 2650 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); 2651 SDValue NewOp0, NewRes; 2652 2653 switch (NumOp) { 2654 default: 2655 llvm_unreachable("Unexpected NumOp"); 2656 case 1: { 2657 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 2658 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0); 2659 break; 2660 } 2661 case 2: { 2662 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 2663 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 2664 if (N->getOpcode() == ISD::ROTL) { 2665 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64); 2666 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1); 2667 } 2668 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 2669 break; 2670 } 2671 // TODO:Handle more NumOp. 2672 } 2673 2674 // ReplaceNodeResults requires we maintain the same type for the return 2675 // value. 2676 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 2677 } 2678 2679 // Converts the given 32-bit operation to a i64 operation with signed extension 2680 // semantic to reduce the signed extension instructions. 2681 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 2682 SDLoc DL(N); 2683 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2684 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 2685 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 2686 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 2687 DAG.getValueType(MVT::i32)); 2688 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 2689 } 2690 2691 // Helper function that emits error message for intrinsics with/without chain 2692 // and return a UNDEF or and the chain as the results. 2693 static void emitErrorAndReplaceIntrinsicResults( 2694 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, 2695 StringRef ErrorMsg, bool WithChain = true) { 2696 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); 2697 Results.push_back(DAG.getUNDEF(N->getValueType(0))); 2698 if (!WithChain) 2699 return; 2700 Results.push_back(N->getOperand(0)); 2701 } 2702 2703 template <unsigned N> 2704 static void 2705 replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results, 2706 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, 2707 unsigned ResOp) { 2708 const StringRef ErrorMsgOOR = "argument out of range"; 2709 unsigned Imm = Node->getConstantOperandVal(2); 2710 if (!isUInt<N>(Imm)) { 2711 emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR, 2712 /*WithChain=*/false); 2713 return; 2714 } 2715 SDLoc DL(Node); 2716 SDValue Vec = Node->getOperand(1); 2717 2718 SDValue PickElt = 2719 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec, 2720 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()), 2721 DAG.getValueType(Vec.getValueType().getVectorElementType())); 2722 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0), 2723 PickElt.getValue(0))); 2724 } 2725 2726 static void replaceVecCondBranchResults(SDNode *N, 2727 SmallVectorImpl<SDValue> &Results, 2728 SelectionDAG &DAG, 2729 const LoongArchSubtarget &Subtarget, 2730 unsigned ResOp) { 2731 SDLoc DL(N); 2732 SDValue Vec = N->getOperand(1); 2733 2734 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec); 2735 Results.push_back( 2736 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0))); 2737 } 2738 2739 static void 2740 replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 2741 SelectionDAG &DAG, 2742 const LoongArchSubtarget &Subtarget) { 2743 switch (N->getConstantOperandVal(0)) { 2744 default: 2745 llvm_unreachable("Unexpected Intrinsic."); 2746 case Intrinsic::loongarch_lsx_vpickve2gr_b: 2747 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, 2748 LoongArchISD::VPICK_SEXT_ELT); 2749 break; 2750 case Intrinsic::loongarch_lsx_vpickve2gr_h: 2751 case Intrinsic::loongarch_lasx_xvpickve2gr_w: 2752 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, 2753 LoongArchISD::VPICK_SEXT_ELT); 2754 break; 2755 case Intrinsic::loongarch_lsx_vpickve2gr_w: 2756 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, 2757 LoongArchISD::VPICK_SEXT_ELT); 2758 break; 2759 case Intrinsic::loongarch_lsx_vpickve2gr_bu: 2760 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, 2761 LoongArchISD::VPICK_ZEXT_ELT); 2762 break; 2763 case Intrinsic::loongarch_lsx_vpickve2gr_hu: 2764 case Intrinsic::loongarch_lasx_xvpickve2gr_wu: 2765 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, 2766 LoongArchISD::VPICK_ZEXT_ELT); 2767 break; 2768 case Intrinsic::loongarch_lsx_vpickve2gr_wu: 2769 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, 2770 LoongArchISD::VPICK_ZEXT_ELT); 2771 break; 2772 case Intrinsic::loongarch_lsx_bz_b: 2773 case Intrinsic::loongarch_lsx_bz_h: 2774 case Intrinsic::loongarch_lsx_bz_w: 2775 case Intrinsic::loongarch_lsx_bz_d: 2776 case Intrinsic::loongarch_lasx_xbz_b: 2777 case Intrinsic::loongarch_lasx_xbz_h: 2778 case Intrinsic::loongarch_lasx_xbz_w: 2779 case Intrinsic::loongarch_lasx_xbz_d: 2780 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 2781 LoongArchISD::VALL_ZERO); 2782 break; 2783 case Intrinsic::loongarch_lsx_bz_v: 2784 case Intrinsic::loongarch_lasx_xbz_v: 2785 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 2786 LoongArchISD::VANY_ZERO); 2787 break; 2788 case Intrinsic::loongarch_lsx_bnz_b: 2789 case Intrinsic::loongarch_lsx_bnz_h: 2790 case Intrinsic::loongarch_lsx_bnz_w: 2791 case Intrinsic::loongarch_lsx_bnz_d: 2792 case Intrinsic::loongarch_lasx_xbnz_b: 2793 case Intrinsic::loongarch_lasx_xbnz_h: 2794 case Intrinsic::loongarch_lasx_xbnz_w: 2795 case Intrinsic::loongarch_lasx_xbnz_d: 2796 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 2797 LoongArchISD::VALL_NONZERO); 2798 break; 2799 case Intrinsic::loongarch_lsx_bnz_v: 2800 case Intrinsic::loongarch_lasx_xbnz_v: 2801 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 2802 LoongArchISD::VANY_NONZERO); 2803 break; 2804 } 2805 } 2806 2807 void LoongArchTargetLowering::ReplaceNodeResults( 2808 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 2809 SDLoc DL(N); 2810 EVT VT = N->getValueType(0); 2811 switch (N->getOpcode()) { 2812 default: 2813 llvm_unreachable("Don't know how to legalize this operation"); 2814 case ISD::ADD: 2815 case ISD::SUB: 2816 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2817 "Unexpected custom legalisation"); 2818 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 2819 break; 2820 case ISD::UDIV: 2821 case ISD::UREM: 2822 assert(VT == MVT::i32 && Subtarget.is64Bit() && 2823 "Unexpected custom legalisation"); 2824 Results.push_back(customLegalizeToWOp(N, DAG, 2, ISD::SIGN_EXTEND)); 2825 break; 2826 case ISD::SHL: 2827 case ISD::SRA: 2828 case ISD::SRL: 2829 assert(VT == MVT::i32 && Subtarget.is64Bit() && 2830 "Unexpected custom legalisation"); 2831 if (N->getOperand(1).getOpcode() != ISD::Constant) { 2832 Results.push_back(customLegalizeToWOp(N, DAG, 2)); 2833 break; 2834 } 2835 break; 2836 case ISD::ROTL: 2837 case ISD::ROTR: 2838 assert(VT == MVT::i32 && Subtarget.is64Bit() && 2839 "Unexpected custom legalisation"); 2840 Results.push_back(customLegalizeToWOp(N, DAG, 2)); 2841 break; 2842 case ISD::FP_TO_SINT: { 2843 assert(VT == MVT::i32 && Subtarget.is64Bit() && 2844 "Unexpected custom legalisation"); 2845 SDValue Src = N->getOperand(0); 2846 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); 2847 if (getTypeAction(*DAG.getContext(), Src.getValueType()) != 2848 TargetLowering::TypeSoftenFloat) { 2849 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src); 2850 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst)); 2851 return; 2852 } 2853 // If the FP type needs to be softened, emit a library call using the 'si' 2854 // version. If we left it to default legalization we'd end up with 'di'. 2855 RTLIB::Libcall LC; 2856 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT); 2857 MakeLibCallOptions CallOptions; 2858 EVT OpVT = Src.getValueType(); 2859 CallOptions.setTypeListBeforeSoften(OpVT, VT, true); 2860 SDValue Chain = SDValue(); 2861 SDValue Result; 2862 std::tie(Result, Chain) = 2863 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain); 2864 Results.push_back(Result); 2865 break; 2866 } 2867 case ISD::BITCAST: { 2868 SDValue Src = N->getOperand(0); 2869 EVT SrcVT = Src.getValueType(); 2870 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && 2871 Subtarget.hasBasicF()) { 2872 SDValue Dst = 2873 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src); 2874 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst)); 2875 } 2876 break; 2877 } 2878 case ISD::FP_TO_UINT: { 2879 assert(VT == MVT::i32 && Subtarget.is64Bit() && 2880 "Unexpected custom legalisation"); 2881 auto &TLI = DAG.getTargetLoweringInfo(); 2882 SDValue Tmp1, Tmp2; 2883 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG); 2884 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1)); 2885 break; 2886 } 2887 case ISD::BSWAP: { 2888 SDValue Src = N->getOperand(0); 2889 assert((VT == MVT::i16 || VT == MVT::i32) && 2890 "Unexpected custom legalization"); 2891 MVT GRLenVT = Subtarget.getGRLenVT(); 2892 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); 2893 SDValue Tmp; 2894 switch (VT.getSizeInBits()) { 2895 default: 2896 llvm_unreachable("Unexpected operand width"); 2897 case 16: 2898 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc); 2899 break; 2900 case 32: 2901 // Only LA64 will get to here due to the size mismatch between VT and 2902 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo. 2903 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc); 2904 break; 2905 } 2906 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); 2907 break; 2908 } 2909 case ISD::BITREVERSE: { 2910 SDValue Src = N->getOperand(0); 2911 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) && 2912 "Unexpected custom legalization"); 2913 MVT GRLenVT = Subtarget.getGRLenVT(); 2914 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); 2915 SDValue Tmp; 2916 switch (VT.getSizeInBits()) { 2917 default: 2918 llvm_unreachable("Unexpected operand width"); 2919 case 8: 2920 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc); 2921 break; 2922 case 32: 2923 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc); 2924 break; 2925 } 2926 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); 2927 break; 2928 } 2929 case ISD::CTLZ: 2930 case ISD::CTTZ: { 2931 assert(VT == MVT::i32 && Subtarget.is64Bit() && 2932 "Unexpected custom legalisation"); 2933 Results.push_back(customLegalizeToWOp(N, DAG, 1)); 2934 break; 2935 } 2936 case ISD::INTRINSIC_W_CHAIN: { 2937 SDValue Chain = N->getOperand(0); 2938 SDValue Op2 = N->getOperand(2); 2939 MVT GRLenVT = Subtarget.getGRLenVT(); 2940 const StringRef ErrorMsgOOR = "argument out of range"; 2941 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 2942 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 2943 2944 switch (N->getConstantOperandVal(1)) { 2945 default: 2946 llvm_unreachable("Unexpected Intrinsic."); 2947 case Intrinsic::loongarch_movfcsr2gr: { 2948 if (!Subtarget.hasBasicF()) { 2949 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF); 2950 return; 2951 } 2952 unsigned Imm = Op2->getAsZExtVal(); 2953 if (!isUInt<2>(Imm)) { 2954 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 2955 return; 2956 } 2957 SDValue MOVFCSR2GRResults = DAG.getNode( 2958 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other}, 2959 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 2960 Results.push_back( 2961 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0))); 2962 Results.push_back(MOVFCSR2GRResults.getValue(1)); 2963 break; 2964 } 2965 #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \ 2966 case Intrinsic::loongarch_##NAME: { \ 2967 SDValue NODE = DAG.getNode( \ 2968 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 2969 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ 2970 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ 2971 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ 2972 Results.push_back(NODE.getValue(1)); \ 2973 break; \ 2974 } 2975 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W) 2976 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W) 2977 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W) 2978 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W) 2979 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W) 2980 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W) 2981 #undef CRC_CASE_EXT_BINARYOP 2982 2983 #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \ 2984 case Intrinsic::loongarch_##NAME: { \ 2985 SDValue NODE = DAG.getNode( \ 2986 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 2987 {Chain, Op2, \ 2988 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ 2989 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ 2990 Results.push_back(NODE.getValue(1)); \ 2991 break; \ 2992 } 2993 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) 2994 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W) 2995 #undef CRC_CASE_EXT_UNARYOP 2996 #define CSR_CASE(ID) \ 2997 case Intrinsic::loongarch_##ID: { \ 2998 if (!Subtarget.is64Bit()) \ 2999 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ 3000 break; \ 3001 } 3002 CSR_CASE(csrrd_d); 3003 CSR_CASE(csrwr_d); 3004 CSR_CASE(csrxchg_d); 3005 CSR_CASE(iocsrrd_d); 3006 #undef CSR_CASE 3007 case Intrinsic::loongarch_csrrd_w: { 3008 unsigned Imm = Op2->getAsZExtVal(); 3009 if (!isUInt<14>(Imm)) { 3010 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 3011 return; 3012 } 3013 SDValue CSRRDResults = 3014 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, 3015 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 3016 Results.push_back( 3017 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0))); 3018 Results.push_back(CSRRDResults.getValue(1)); 3019 break; 3020 } 3021 case Intrinsic::loongarch_csrwr_w: { 3022 unsigned Imm = N->getConstantOperandVal(3); 3023 if (!isUInt<14>(Imm)) { 3024 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 3025 return; 3026 } 3027 SDValue CSRWRResults = 3028 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, 3029 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), 3030 DAG.getConstant(Imm, DL, GRLenVT)}); 3031 Results.push_back( 3032 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0))); 3033 Results.push_back(CSRWRResults.getValue(1)); 3034 break; 3035 } 3036 case Intrinsic::loongarch_csrxchg_w: { 3037 unsigned Imm = N->getConstantOperandVal(4); 3038 if (!isUInt<14>(Imm)) { 3039 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 3040 return; 3041 } 3042 SDValue CSRXCHGResults = DAG.getNode( 3043 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, 3044 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), 3045 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)), 3046 DAG.getConstant(Imm, DL, GRLenVT)}); 3047 Results.push_back( 3048 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0))); 3049 Results.push_back(CSRXCHGResults.getValue(1)); 3050 break; 3051 } 3052 #define IOCSRRD_CASE(NAME, NODE) \ 3053 case Intrinsic::loongarch_##NAME: { \ 3054 SDValue IOCSRRDResults = \ 3055 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 3056 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \ 3057 Results.push_back( \ 3058 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \ 3059 Results.push_back(IOCSRRDResults.getValue(1)); \ 3060 break; \ 3061 } 3062 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); 3063 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); 3064 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); 3065 #undef IOCSRRD_CASE 3066 case Intrinsic::loongarch_cpucfg: { 3067 SDValue CPUCFGResults = 3068 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, 3069 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); 3070 Results.push_back( 3071 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0))); 3072 Results.push_back(CPUCFGResults.getValue(1)); 3073 break; 3074 } 3075 case Intrinsic::loongarch_lddir_d: { 3076 if (!Subtarget.is64Bit()) { 3077 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); 3078 return; 3079 } 3080 break; 3081 } 3082 } 3083 break; 3084 } 3085 case ISD::READ_REGISTER: { 3086 if (Subtarget.is64Bit()) 3087 DAG.getContext()->emitError( 3088 "On LA64, only 64-bit registers can be read."); 3089 else 3090 DAG.getContext()->emitError( 3091 "On LA32, only 32-bit registers can be read."); 3092 Results.push_back(DAG.getUNDEF(VT)); 3093 Results.push_back(N->getOperand(0)); 3094 break; 3095 } 3096 case ISD::INTRINSIC_WO_CHAIN: { 3097 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); 3098 break; 3099 } 3100 } 3101 } 3102 3103 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 3104 TargetLowering::DAGCombinerInfo &DCI, 3105 const LoongArchSubtarget &Subtarget) { 3106 if (DCI.isBeforeLegalizeOps()) 3107 return SDValue(); 3108 3109 SDValue FirstOperand = N->getOperand(0); 3110 SDValue SecondOperand = N->getOperand(1); 3111 unsigned FirstOperandOpc = FirstOperand.getOpcode(); 3112 EVT ValTy = N->getValueType(0); 3113 SDLoc DL(N); 3114 uint64_t lsb, msb; 3115 unsigned SMIdx, SMLen; 3116 ConstantSDNode *CN; 3117 SDValue NewOperand; 3118 MVT GRLenVT = Subtarget.getGRLenVT(); 3119 3120 // Op's second operand must be a shifted mask. 3121 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) || 3122 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen)) 3123 return SDValue(); 3124 3125 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { 3126 // Pattern match BSTRPICK. 3127 // $dst = and ((sra or srl) $src , lsb), (2**len - 1) 3128 // => BSTRPICK $dst, $src, msb, lsb 3129 // where msb = lsb + len - 1 3130 3131 // The second operand of the shift must be an immediate. 3132 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1)))) 3133 return SDValue(); 3134 3135 lsb = CN->getZExtValue(); 3136 3137 // Return if the shifted mask does not start at bit 0 or the sum of its 3138 // length and lsb exceeds the word's size. 3139 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) 3140 return SDValue(); 3141 3142 NewOperand = FirstOperand.getOperand(0); 3143 } else { 3144 // Pattern match BSTRPICK. 3145 // $dst = and $src, (2**len- 1) , if len > 12 3146 // => BSTRPICK $dst, $src, msb, lsb 3147 // where lsb = 0 and msb = len - 1 3148 3149 // If the mask is <= 0xfff, andi can be used instead. 3150 if (CN->getZExtValue() <= 0xfff) 3151 return SDValue(); 3152 3153 // Return if the MSB exceeds. 3154 if (SMIdx + SMLen > ValTy.getSizeInBits()) 3155 return SDValue(); 3156 3157 if (SMIdx > 0) { 3158 // Omit if the constant has more than 2 uses. This a conservative 3159 // decision. Whether it is a win depends on the HW microarchitecture. 3160 // However it should always be better for 1 and 2 uses. 3161 if (CN->use_size() > 2) 3162 return SDValue(); 3163 // Return if the constant can be composed by a single LU12I.W. 3164 if ((CN->getZExtValue() & 0xfff) == 0) 3165 return SDValue(); 3166 // Return if the constand can be composed by a single ADDI with 3167 // the zero register. 3168 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0) 3169 return SDValue(); 3170 } 3171 3172 lsb = SMIdx; 3173 NewOperand = FirstOperand; 3174 } 3175 3176 msb = lsb + SMLen - 1; 3177 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand, 3178 DAG.getConstant(msb, DL, GRLenVT), 3179 DAG.getConstant(lsb, DL, GRLenVT)); 3180 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0) 3181 return NR0; 3182 // Try to optimize to 3183 // bstrpick $Rd, $Rs, msb, lsb 3184 // slli $Rd, $Rd, lsb 3185 return DAG.getNode(ISD::SHL, DL, ValTy, NR0, 3186 DAG.getConstant(lsb, DL, GRLenVT)); 3187 } 3188 3189 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 3190 TargetLowering::DAGCombinerInfo &DCI, 3191 const LoongArchSubtarget &Subtarget) { 3192 if (DCI.isBeforeLegalizeOps()) 3193 return SDValue(); 3194 3195 // $dst = srl (and $src, Mask), Shamt 3196 // => 3197 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt 3198 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1 3199 // 3200 3201 SDValue FirstOperand = N->getOperand(0); 3202 ConstantSDNode *CN; 3203 EVT ValTy = N->getValueType(0); 3204 SDLoc DL(N); 3205 MVT GRLenVT = Subtarget.getGRLenVT(); 3206 unsigned MaskIdx, MaskLen; 3207 uint64_t Shamt; 3208 3209 // The first operand must be an AND and the second operand of the AND must be 3210 // a shifted mask. 3211 if (FirstOperand.getOpcode() != ISD::AND || 3212 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) || 3213 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen)) 3214 return SDValue(); 3215 3216 // The second operand (shift amount) must be an immediate. 3217 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) 3218 return SDValue(); 3219 3220 Shamt = CN->getZExtValue(); 3221 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1) 3222 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, 3223 FirstOperand->getOperand(0), 3224 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 3225 DAG.getConstant(Shamt, DL, GRLenVT)); 3226 3227 return SDValue(); 3228 } 3229 3230 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 3231 TargetLowering::DAGCombinerInfo &DCI, 3232 const LoongArchSubtarget &Subtarget) { 3233 MVT GRLenVT = Subtarget.getGRLenVT(); 3234 EVT ValTy = N->getValueType(0); 3235 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3236 ConstantSDNode *CN0, *CN1; 3237 SDLoc DL(N); 3238 unsigned ValBits = ValTy.getSizeInBits(); 3239 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; 3240 unsigned Shamt; 3241 bool SwapAndRetried = false; 3242 3243 if (DCI.isBeforeLegalizeOps()) 3244 return SDValue(); 3245 3246 if (ValBits != 32 && ValBits != 64) 3247 return SDValue(); 3248 3249 Retry: 3250 // 1st pattern to match BSTRINS: 3251 // R = or (and X, mask0), (and (shl Y, lsb), mask1) 3252 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 3253 // => 3254 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) 3255 if (N0.getOpcode() == ISD::AND && 3256 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 3257 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 3258 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL && 3259 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 3260 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && 3261 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && 3262 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 3263 (Shamt = CN1->getZExtValue()) == MaskIdx0 && 3264 (MaskIdx0 + MaskLen0 <= ValBits)) { 3265 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n"); 3266 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 3267 N1.getOperand(0).getOperand(0), 3268 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 3269 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 3270 } 3271 3272 // 2nd pattern to match BSTRINS: 3273 // R = or (and X, mask0), (shl (and Y, mask1), lsb) 3274 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) 3275 // => 3276 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) 3277 if (N0.getOpcode() == ISD::AND && 3278 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 3279 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 3280 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && 3281 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 3282 (Shamt = CN1->getZExtValue()) == MaskIdx0 && 3283 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 3284 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && 3285 MaskLen0 == MaskLen1 && MaskIdx1 == 0 && 3286 (MaskIdx0 + MaskLen0 <= ValBits)) { 3287 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n"); 3288 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 3289 N1.getOperand(0).getOperand(0), 3290 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 3291 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 3292 } 3293 3294 // 3rd pattern to match BSTRINS: 3295 // R = or (and X, mask0), (and Y, mask1) 3296 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 3297 // => 3298 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb 3299 // where msb = lsb + size - 1 3300 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && 3301 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 3302 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 3303 (MaskIdx0 + MaskLen0 <= 64) && 3304 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) && 3305 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { 3306 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n"); 3307 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 3308 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1, 3309 DAG.getConstant(MaskIdx0, DL, GRLenVT)), 3310 DAG.getConstant(ValBits == 32 3311 ? (MaskIdx0 + (MaskLen0 & 31) - 1) 3312 : (MaskIdx0 + MaskLen0 - 1), 3313 DL, GRLenVT), 3314 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 3315 } 3316 3317 // 4th pattern to match BSTRINS: 3318 // R = or (and X, mask), (shl Y, shamt) 3319 // where mask = (2**shamt - 1) 3320 // => 3321 // R = BSTRINS X, Y, ValBits - 1, shamt 3322 // where ValBits = 32 or 64 3323 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && 3324 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 3325 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) && 3326 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 3327 (Shamt = CN1->getZExtValue()) == MaskLen0 && 3328 (MaskIdx0 + MaskLen0 <= ValBits)) { 3329 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n"); 3330 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 3331 N1.getOperand(0), 3332 DAG.getConstant((ValBits - 1), DL, GRLenVT), 3333 DAG.getConstant(Shamt, DL, GRLenVT)); 3334 } 3335 3336 // 5th pattern to match BSTRINS: 3337 // R = or (and X, mask), const 3338 // where ~mask = (2**size - 1) << lsb, mask & const = 0 3339 // => 3340 // R = BSTRINS X, (const >> lsb), msb, lsb 3341 // where msb = lsb + size - 1 3342 if (N0.getOpcode() == ISD::AND && 3343 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 3344 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 3345 (CN1 = dyn_cast<ConstantSDNode>(N1)) && 3346 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { 3347 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n"); 3348 return DAG.getNode( 3349 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 3350 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), 3351 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) 3352 : (MaskIdx0 + MaskLen0 - 1), 3353 DL, GRLenVT), 3354 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 3355 } 3356 3357 // 6th pattern. 3358 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten 3359 // by the incoming bits are known to be zero. 3360 // => 3361 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt 3362 // 3363 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th 3364 // pattern is more common than the 1st. So we put the 1st before the 6th in 3365 // order to match as many nodes as possible. 3366 ConstantSDNode *CNMask, *CNShamt; 3367 unsigned MaskIdx, MaskLen; 3368 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && 3369 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 3370 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && 3371 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 3372 CNShamt->getZExtValue() + MaskLen <= ValBits) { 3373 Shamt = CNShamt->getZExtValue(); 3374 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); 3375 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 3376 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n"); 3377 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 3378 N1.getOperand(0).getOperand(0), 3379 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT), 3380 DAG.getConstant(Shamt, DL, GRLenVT)); 3381 } 3382 } 3383 3384 // 7th pattern. 3385 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be 3386 // overwritten by the incoming bits are known to be zero. 3387 // => 3388 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx 3389 // 3390 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd 3391 // before the 7th in order to match as many nodes as possible. 3392 if (N1.getOpcode() == ISD::AND && 3393 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 3394 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && 3395 N1.getOperand(0).getOpcode() == ISD::SHL && 3396 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 3397 CNShamt->getZExtValue() == MaskIdx) { 3398 APInt ShMask(ValBits, CNMask->getZExtValue()); 3399 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 3400 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n"); 3401 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 3402 N1.getOperand(0).getOperand(0), 3403 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 3404 DAG.getConstant(MaskIdx, DL, GRLenVT)); 3405 } 3406 } 3407 3408 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. 3409 if (!SwapAndRetried) { 3410 std::swap(N0, N1); 3411 SwapAndRetried = true; 3412 goto Retry; 3413 } 3414 3415 SwapAndRetried = false; 3416 Retry2: 3417 // 8th pattern. 3418 // a = b | (c & shifted_mask), where all positions in b to be overwritten by 3419 // the incoming bits are known to be zero. 3420 // => 3421 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx 3422 // 3423 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So 3424 // we put it here in order to match as many nodes as possible or generate less 3425 // instructions. 3426 if (N1.getOpcode() == ISD::AND && 3427 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 3428 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) { 3429 APInt ShMask(ValBits, CNMask->getZExtValue()); 3430 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 3431 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n"); 3432 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 3433 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), 3434 N1->getOperand(0), 3435 DAG.getConstant(MaskIdx, DL, GRLenVT)), 3436 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 3437 DAG.getConstant(MaskIdx, DL, GRLenVT)); 3438 } 3439 } 3440 // Swap N0/N1 and retry. 3441 if (!SwapAndRetried) { 3442 std::swap(N0, N1); 3443 SwapAndRetried = true; 3444 goto Retry2; 3445 } 3446 3447 return SDValue(); 3448 } 3449 3450 static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) { 3451 ExtType = ISD::NON_EXTLOAD; 3452 3453 switch (V.getNode()->getOpcode()) { 3454 case ISD::LOAD: { 3455 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode()); 3456 if ((LoadNode->getMemoryVT() == MVT::i8) || 3457 (LoadNode->getMemoryVT() == MVT::i16)) { 3458 ExtType = LoadNode->getExtensionType(); 3459 return true; 3460 } 3461 return false; 3462 } 3463 case ISD::AssertSext: { 3464 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1)); 3465 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { 3466 ExtType = ISD::SEXTLOAD; 3467 return true; 3468 } 3469 return false; 3470 } 3471 case ISD::AssertZext: { 3472 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1)); 3473 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { 3474 ExtType = ISD::ZEXTLOAD; 3475 return true; 3476 } 3477 return false; 3478 } 3479 default: 3480 return false; 3481 } 3482 3483 return false; 3484 } 3485 3486 // Eliminate redundant truncation and zero-extension nodes. 3487 // * Case 1: 3488 // +------------+ +------------+ +------------+ 3489 // | Input1 | | Input2 | | CC | 3490 // +------------+ +------------+ +------------+ 3491 // | | | 3492 // V V +----+ 3493 // +------------+ +------------+ | 3494 // | TRUNCATE | | TRUNCATE | | 3495 // +------------+ +------------+ | 3496 // | | | 3497 // V V | 3498 // +------------+ +------------+ | 3499 // | ZERO_EXT | | ZERO_EXT | | 3500 // +------------+ +------------+ | 3501 // | | | 3502 // | +-------------+ | 3503 // V V | | 3504 // +----------------+ | | 3505 // | AND | | | 3506 // +----------------+ | | 3507 // | | | 3508 // +---------------+ | | 3509 // | | | 3510 // V V V 3511 // +-------------+ 3512 // | CMP | 3513 // +-------------+ 3514 // * Case 2: 3515 // +------------+ +------------+ +-------------+ +------------+ +------------+ 3516 // | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC | 3517 // +------------+ +------------+ +-------------+ +------------+ +------------+ 3518 // | | | | | 3519 // V | | | | 3520 // +------------+ | | | | 3521 // | XOR |<---------------------+ | | 3522 // +------------+ | | | 3523 // | | | | 3524 // V V +---------------+ | 3525 // +------------+ +------------+ | | 3526 // | TRUNCATE | | TRUNCATE | | +-------------------------+ 3527 // +------------+ +------------+ | | 3528 // | | | | 3529 // V V | | 3530 // +------------+ +------------+ | | 3531 // | ZERO_EXT | | ZERO_EXT | | | 3532 // +------------+ +------------+ | | 3533 // | | | | 3534 // V V | | 3535 // +----------------+ | | 3536 // | AND | | | 3537 // +----------------+ | | 3538 // | | | 3539 // +---------------+ | | 3540 // | | | 3541 // V V V 3542 // +-------------+ 3543 // | CMP | 3544 // +-------------+ 3545 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, 3546 TargetLowering::DAGCombinerInfo &DCI, 3547 const LoongArchSubtarget &Subtarget) { 3548 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 3549 3550 SDNode *AndNode = N->getOperand(0).getNode(); 3551 if (AndNode->getOpcode() != ISD::AND) 3552 return SDValue(); 3553 3554 SDValue AndInputValue2 = AndNode->getOperand(1); 3555 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND) 3556 return SDValue(); 3557 3558 SDValue CmpInputValue = N->getOperand(1); 3559 SDValue AndInputValue1 = AndNode->getOperand(0); 3560 if (AndInputValue1.getOpcode() == ISD::XOR) { 3561 if (CC != ISD::SETEQ && CC != ISD::SETNE) 3562 return SDValue(); 3563 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1)); 3564 if (!CN || CN->getSExtValue() != -1) 3565 return SDValue(); 3566 CN = dyn_cast<ConstantSDNode>(CmpInputValue); 3567 if (!CN || CN->getSExtValue() != 0) 3568 return SDValue(); 3569 AndInputValue1 = AndInputValue1.getOperand(0); 3570 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND) 3571 return SDValue(); 3572 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) { 3573 if (AndInputValue2 != CmpInputValue) 3574 return SDValue(); 3575 } else { 3576 return SDValue(); 3577 } 3578 3579 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0); 3580 if (TruncValue1.getOpcode() != ISD::TRUNCATE) 3581 return SDValue(); 3582 3583 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0); 3584 if (TruncValue2.getOpcode() != ISD::TRUNCATE) 3585 return SDValue(); 3586 3587 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0); 3588 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0); 3589 ISD::LoadExtType ExtType1; 3590 ISD::LoadExtType ExtType2; 3591 3592 if (!checkValueWidth(TruncInputValue1, ExtType1) || 3593 !checkValueWidth(TruncInputValue2, ExtType2)) 3594 return SDValue(); 3595 3596 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) || 3597 AndNode->getValueType(0) != TruncInputValue1->getValueType(0)) 3598 return SDValue(); 3599 3600 if ((ExtType2 != ISD::ZEXTLOAD) && 3601 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD))) 3602 return SDValue(); 3603 3604 // These truncation and zero-extension nodes are not necessary, remove them. 3605 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0), 3606 TruncInputValue1, TruncInputValue2); 3607 SDValue NewSetCC = 3608 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC); 3609 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode()); 3610 return SDValue(N, 0); 3611 } 3612 3613 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. 3614 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, 3615 TargetLowering::DAGCombinerInfo &DCI, 3616 const LoongArchSubtarget &Subtarget) { 3617 if (DCI.isBeforeLegalizeOps()) 3618 return SDValue(); 3619 3620 SDValue Src = N->getOperand(0); 3621 if (Src.getOpcode() != LoongArchISD::REVB_2W) 3622 return SDValue(); 3623 3624 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0), 3625 Src.getOperand(0)); 3626 } 3627 3628 template <unsigned N> 3629 static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, 3630 SelectionDAG &DAG, 3631 const LoongArchSubtarget &Subtarget, 3632 bool IsSigned = false) { 3633 SDLoc DL(Node); 3634 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp)); 3635 // Check the ImmArg. 3636 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || 3637 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { 3638 DAG.getContext()->emitError(Node->getOperationName(0) + 3639 ": argument out of range."); 3640 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT()); 3641 } 3642 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT()); 3643 } 3644 3645 template <unsigned N> 3646 static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, 3647 SelectionDAG &DAG, bool IsSigned = false) { 3648 SDLoc DL(Node); 3649 EVT ResTy = Node->getValueType(0); 3650 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp)); 3651 3652 // Check the ImmArg. 3653 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || 3654 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { 3655 DAG.getContext()->emitError(Node->getOperationName(0) + 3656 ": argument out of range."); 3657 return DAG.getNode(ISD::UNDEF, DL, ResTy); 3658 } 3659 return DAG.getConstant( 3660 APInt(ResTy.getScalarType().getSizeInBits(), 3661 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), 3662 DL, ResTy); 3663 } 3664 3665 static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { 3666 SDLoc DL(Node); 3667 EVT ResTy = Node->getValueType(0); 3668 SDValue Vec = Node->getOperand(2); 3669 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy); 3670 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask); 3671 } 3672 3673 static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { 3674 SDLoc DL(Node); 3675 EVT ResTy = Node->getValueType(0); 3676 SDValue One = DAG.getConstant(1, DL, ResTy); 3677 SDValue Bit = 3678 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG)); 3679 3680 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), 3681 DAG.getNOT(DL, Bit, ResTy)); 3682 } 3683 3684 template <unsigned N> 3685 static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { 3686 SDLoc DL(Node); 3687 EVT ResTy = Node->getValueType(0); 3688 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2)); 3689 // Check the unsigned ImmArg. 3690 if (!isUInt<N>(CImm->getZExtValue())) { 3691 DAG.getContext()->emitError(Node->getOperationName(0) + 3692 ": argument out of range."); 3693 return DAG.getNode(ISD::UNDEF, DL, ResTy); 3694 } 3695 3696 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); 3697 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy); 3698 3699 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask); 3700 } 3701 3702 template <unsigned N> 3703 static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { 3704 SDLoc DL(Node); 3705 EVT ResTy = Node->getValueType(0); 3706 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2)); 3707 // Check the unsigned ImmArg. 3708 if (!isUInt<N>(CImm->getZExtValue())) { 3709 DAG.getContext()->emitError(Node->getOperationName(0) + 3710 ": argument out of range."); 3711 return DAG.getNode(ISD::UNDEF, DL, ResTy); 3712 } 3713 3714 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); 3715 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); 3716 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm); 3717 } 3718 3719 template <unsigned N> 3720 static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { 3721 SDLoc DL(Node); 3722 EVT ResTy = Node->getValueType(0); 3723 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2)); 3724 // Check the unsigned ImmArg. 3725 if (!isUInt<N>(CImm->getZExtValue())) { 3726 DAG.getContext()->emitError(Node->getOperationName(0) + 3727 ": argument out of range."); 3728 return DAG.getNode(ISD::UNDEF, DL, ResTy); 3729 } 3730 3731 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); 3732 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); 3733 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm); 3734 } 3735 3736 static SDValue 3737 performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, 3738 TargetLowering::DAGCombinerInfo &DCI, 3739 const LoongArchSubtarget &Subtarget) { 3740 SDLoc DL(N); 3741 switch (N->getConstantOperandVal(0)) { 3742 default: 3743 break; 3744 case Intrinsic::loongarch_lsx_vadd_b: 3745 case Intrinsic::loongarch_lsx_vadd_h: 3746 case Intrinsic::loongarch_lsx_vadd_w: 3747 case Intrinsic::loongarch_lsx_vadd_d: 3748 case Intrinsic::loongarch_lasx_xvadd_b: 3749 case Intrinsic::loongarch_lasx_xvadd_h: 3750 case Intrinsic::loongarch_lasx_xvadd_w: 3751 case Intrinsic::loongarch_lasx_xvadd_d: 3752 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), 3753 N->getOperand(2)); 3754 case Intrinsic::loongarch_lsx_vaddi_bu: 3755 case Intrinsic::loongarch_lsx_vaddi_hu: 3756 case Intrinsic::loongarch_lsx_vaddi_wu: 3757 case Intrinsic::loongarch_lsx_vaddi_du: 3758 case Intrinsic::loongarch_lasx_xvaddi_bu: 3759 case Intrinsic::loongarch_lasx_xvaddi_hu: 3760 case Intrinsic::loongarch_lasx_xvaddi_wu: 3761 case Intrinsic::loongarch_lasx_xvaddi_du: 3762 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), 3763 lowerVectorSplatImm<5>(N, 2, DAG)); 3764 case Intrinsic::loongarch_lsx_vsub_b: 3765 case Intrinsic::loongarch_lsx_vsub_h: 3766 case Intrinsic::loongarch_lsx_vsub_w: 3767 case Intrinsic::loongarch_lsx_vsub_d: 3768 case Intrinsic::loongarch_lasx_xvsub_b: 3769 case Intrinsic::loongarch_lasx_xvsub_h: 3770 case Intrinsic::loongarch_lasx_xvsub_w: 3771 case Intrinsic::loongarch_lasx_xvsub_d: 3772 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), 3773 N->getOperand(2)); 3774 case Intrinsic::loongarch_lsx_vsubi_bu: 3775 case Intrinsic::loongarch_lsx_vsubi_hu: 3776 case Intrinsic::loongarch_lsx_vsubi_wu: 3777 case Intrinsic::loongarch_lsx_vsubi_du: 3778 case Intrinsic::loongarch_lasx_xvsubi_bu: 3779 case Intrinsic::loongarch_lasx_xvsubi_hu: 3780 case Intrinsic::loongarch_lasx_xvsubi_wu: 3781 case Intrinsic::loongarch_lasx_xvsubi_du: 3782 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), 3783 lowerVectorSplatImm<5>(N, 2, DAG)); 3784 case Intrinsic::loongarch_lsx_vneg_b: 3785 case Intrinsic::loongarch_lsx_vneg_h: 3786 case Intrinsic::loongarch_lsx_vneg_w: 3787 case Intrinsic::loongarch_lsx_vneg_d: 3788 case Intrinsic::loongarch_lasx_xvneg_b: 3789 case Intrinsic::loongarch_lasx_xvneg_h: 3790 case Intrinsic::loongarch_lasx_xvneg_w: 3791 case Intrinsic::loongarch_lasx_xvneg_d: 3792 return DAG.getNode( 3793 ISD::SUB, DL, N->getValueType(0), 3794 DAG.getConstant( 3795 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0, 3796 /*isSigned=*/true), 3797 SDLoc(N), N->getValueType(0)), 3798 N->getOperand(1)); 3799 case Intrinsic::loongarch_lsx_vmax_b: 3800 case Intrinsic::loongarch_lsx_vmax_h: 3801 case Intrinsic::loongarch_lsx_vmax_w: 3802 case Intrinsic::loongarch_lsx_vmax_d: 3803 case Intrinsic::loongarch_lasx_xvmax_b: 3804 case Intrinsic::loongarch_lasx_xvmax_h: 3805 case Intrinsic::loongarch_lasx_xvmax_w: 3806 case Intrinsic::loongarch_lasx_xvmax_d: 3807 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), 3808 N->getOperand(2)); 3809 case Intrinsic::loongarch_lsx_vmax_bu: 3810 case Intrinsic::loongarch_lsx_vmax_hu: 3811 case Intrinsic::loongarch_lsx_vmax_wu: 3812 case Intrinsic::loongarch_lsx_vmax_du: 3813 case Intrinsic::loongarch_lasx_xvmax_bu: 3814 case Intrinsic::loongarch_lasx_xvmax_hu: 3815 case Intrinsic::loongarch_lasx_xvmax_wu: 3816 case Intrinsic::loongarch_lasx_xvmax_du: 3817 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), 3818 N->getOperand(2)); 3819 case Intrinsic::loongarch_lsx_vmaxi_b: 3820 case Intrinsic::loongarch_lsx_vmaxi_h: 3821 case Intrinsic::loongarch_lsx_vmaxi_w: 3822 case Intrinsic::loongarch_lsx_vmaxi_d: 3823 case Intrinsic::loongarch_lasx_xvmaxi_b: 3824 case Intrinsic::loongarch_lasx_xvmaxi_h: 3825 case Intrinsic::loongarch_lasx_xvmaxi_w: 3826 case Intrinsic::loongarch_lasx_xvmaxi_d: 3827 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), 3828 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); 3829 case Intrinsic::loongarch_lsx_vmaxi_bu: 3830 case Intrinsic::loongarch_lsx_vmaxi_hu: 3831 case Intrinsic::loongarch_lsx_vmaxi_wu: 3832 case Intrinsic::loongarch_lsx_vmaxi_du: 3833 case Intrinsic::loongarch_lasx_xvmaxi_bu: 3834 case Intrinsic::loongarch_lasx_xvmaxi_hu: 3835 case Intrinsic::loongarch_lasx_xvmaxi_wu: 3836 case Intrinsic::loongarch_lasx_xvmaxi_du: 3837 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), 3838 lowerVectorSplatImm<5>(N, 2, DAG)); 3839 case Intrinsic::loongarch_lsx_vmin_b: 3840 case Intrinsic::loongarch_lsx_vmin_h: 3841 case Intrinsic::loongarch_lsx_vmin_w: 3842 case Intrinsic::loongarch_lsx_vmin_d: 3843 case Intrinsic::loongarch_lasx_xvmin_b: 3844 case Intrinsic::loongarch_lasx_xvmin_h: 3845 case Intrinsic::loongarch_lasx_xvmin_w: 3846 case Intrinsic::loongarch_lasx_xvmin_d: 3847 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), 3848 N->getOperand(2)); 3849 case Intrinsic::loongarch_lsx_vmin_bu: 3850 case Intrinsic::loongarch_lsx_vmin_hu: 3851 case Intrinsic::loongarch_lsx_vmin_wu: 3852 case Intrinsic::loongarch_lsx_vmin_du: 3853 case Intrinsic::loongarch_lasx_xvmin_bu: 3854 case Intrinsic::loongarch_lasx_xvmin_hu: 3855 case Intrinsic::loongarch_lasx_xvmin_wu: 3856 case Intrinsic::loongarch_lasx_xvmin_du: 3857 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), 3858 N->getOperand(2)); 3859 case Intrinsic::loongarch_lsx_vmini_b: 3860 case Intrinsic::loongarch_lsx_vmini_h: 3861 case Intrinsic::loongarch_lsx_vmini_w: 3862 case Intrinsic::loongarch_lsx_vmini_d: 3863 case Intrinsic::loongarch_lasx_xvmini_b: 3864 case Intrinsic::loongarch_lasx_xvmini_h: 3865 case Intrinsic::loongarch_lasx_xvmini_w: 3866 case Intrinsic::loongarch_lasx_xvmini_d: 3867 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), 3868 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); 3869 case Intrinsic::loongarch_lsx_vmini_bu: 3870 case Intrinsic::loongarch_lsx_vmini_hu: 3871 case Intrinsic::loongarch_lsx_vmini_wu: 3872 case Intrinsic::loongarch_lsx_vmini_du: 3873 case Intrinsic::loongarch_lasx_xvmini_bu: 3874 case Intrinsic::loongarch_lasx_xvmini_hu: 3875 case Intrinsic::loongarch_lasx_xvmini_wu: 3876 case Intrinsic::loongarch_lasx_xvmini_du: 3877 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), 3878 lowerVectorSplatImm<5>(N, 2, DAG)); 3879 case Intrinsic::loongarch_lsx_vmul_b: 3880 case Intrinsic::loongarch_lsx_vmul_h: 3881 case Intrinsic::loongarch_lsx_vmul_w: 3882 case Intrinsic::loongarch_lsx_vmul_d: 3883 case Intrinsic::loongarch_lasx_xvmul_b: 3884 case Intrinsic::loongarch_lasx_xvmul_h: 3885 case Intrinsic::loongarch_lasx_xvmul_w: 3886 case Intrinsic::loongarch_lasx_xvmul_d: 3887 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), 3888 N->getOperand(2)); 3889 case Intrinsic::loongarch_lsx_vmadd_b: 3890 case Intrinsic::loongarch_lsx_vmadd_h: 3891 case Intrinsic::loongarch_lsx_vmadd_w: 3892 case Intrinsic::loongarch_lsx_vmadd_d: 3893 case Intrinsic::loongarch_lasx_xvmadd_b: 3894 case Intrinsic::loongarch_lasx_xvmadd_h: 3895 case Intrinsic::loongarch_lasx_xvmadd_w: 3896 case Intrinsic::loongarch_lasx_xvmadd_d: { 3897 EVT ResTy = N->getValueType(0); 3898 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), 3899 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), 3900 N->getOperand(3))); 3901 } 3902 case Intrinsic::loongarch_lsx_vmsub_b: 3903 case Intrinsic::loongarch_lsx_vmsub_h: 3904 case Intrinsic::loongarch_lsx_vmsub_w: 3905 case Intrinsic::loongarch_lsx_vmsub_d: 3906 case Intrinsic::loongarch_lasx_xvmsub_b: 3907 case Intrinsic::loongarch_lasx_xvmsub_h: 3908 case Intrinsic::loongarch_lasx_xvmsub_w: 3909 case Intrinsic::loongarch_lasx_xvmsub_d: { 3910 EVT ResTy = N->getValueType(0); 3911 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), 3912 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), 3913 N->getOperand(3))); 3914 } 3915 case Intrinsic::loongarch_lsx_vdiv_b: 3916 case Intrinsic::loongarch_lsx_vdiv_h: 3917 case Intrinsic::loongarch_lsx_vdiv_w: 3918 case Intrinsic::loongarch_lsx_vdiv_d: 3919 case Intrinsic::loongarch_lasx_xvdiv_b: 3920 case Intrinsic::loongarch_lasx_xvdiv_h: 3921 case Intrinsic::loongarch_lasx_xvdiv_w: 3922 case Intrinsic::loongarch_lasx_xvdiv_d: 3923 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), 3924 N->getOperand(2)); 3925 case Intrinsic::loongarch_lsx_vdiv_bu: 3926 case Intrinsic::loongarch_lsx_vdiv_hu: 3927 case Intrinsic::loongarch_lsx_vdiv_wu: 3928 case Intrinsic::loongarch_lsx_vdiv_du: 3929 case Intrinsic::loongarch_lasx_xvdiv_bu: 3930 case Intrinsic::loongarch_lasx_xvdiv_hu: 3931 case Intrinsic::loongarch_lasx_xvdiv_wu: 3932 case Intrinsic::loongarch_lasx_xvdiv_du: 3933 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), 3934 N->getOperand(2)); 3935 case Intrinsic::loongarch_lsx_vmod_b: 3936 case Intrinsic::loongarch_lsx_vmod_h: 3937 case Intrinsic::loongarch_lsx_vmod_w: 3938 case Intrinsic::loongarch_lsx_vmod_d: 3939 case Intrinsic::loongarch_lasx_xvmod_b: 3940 case Intrinsic::loongarch_lasx_xvmod_h: 3941 case Intrinsic::loongarch_lasx_xvmod_w: 3942 case Intrinsic::loongarch_lasx_xvmod_d: 3943 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), 3944 N->getOperand(2)); 3945 case Intrinsic::loongarch_lsx_vmod_bu: 3946 case Intrinsic::loongarch_lsx_vmod_hu: 3947 case Intrinsic::loongarch_lsx_vmod_wu: 3948 case Intrinsic::loongarch_lsx_vmod_du: 3949 case Intrinsic::loongarch_lasx_xvmod_bu: 3950 case Intrinsic::loongarch_lasx_xvmod_hu: 3951 case Intrinsic::loongarch_lasx_xvmod_wu: 3952 case Intrinsic::loongarch_lasx_xvmod_du: 3953 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), 3954 N->getOperand(2)); 3955 case Intrinsic::loongarch_lsx_vand_v: 3956 case Intrinsic::loongarch_lasx_xvand_v: 3957 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), 3958 N->getOperand(2)); 3959 case Intrinsic::loongarch_lsx_vor_v: 3960 case Intrinsic::loongarch_lasx_xvor_v: 3961 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), 3962 N->getOperand(2)); 3963 case Intrinsic::loongarch_lsx_vxor_v: 3964 case Intrinsic::loongarch_lasx_xvxor_v: 3965 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), 3966 N->getOperand(2)); 3967 case Intrinsic::loongarch_lsx_vnor_v: 3968 case Intrinsic::loongarch_lasx_xvnor_v: { 3969 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), 3970 N->getOperand(2)); 3971 return DAG.getNOT(DL, Res, Res->getValueType(0)); 3972 } 3973 case Intrinsic::loongarch_lsx_vandi_b: 3974 case Intrinsic::loongarch_lasx_xvandi_b: 3975 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), 3976 lowerVectorSplatImm<8>(N, 2, DAG)); 3977 case Intrinsic::loongarch_lsx_vori_b: 3978 case Intrinsic::loongarch_lasx_xvori_b: 3979 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), 3980 lowerVectorSplatImm<8>(N, 2, DAG)); 3981 case Intrinsic::loongarch_lsx_vxori_b: 3982 case Intrinsic::loongarch_lasx_xvxori_b: 3983 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), 3984 lowerVectorSplatImm<8>(N, 2, DAG)); 3985 case Intrinsic::loongarch_lsx_vsll_b: 3986 case Intrinsic::loongarch_lsx_vsll_h: 3987 case Intrinsic::loongarch_lsx_vsll_w: 3988 case Intrinsic::loongarch_lsx_vsll_d: 3989 case Intrinsic::loongarch_lasx_xvsll_b: 3990 case Intrinsic::loongarch_lasx_xvsll_h: 3991 case Intrinsic::loongarch_lasx_xvsll_w: 3992 case Intrinsic::loongarch_lasx_xvsll_d: 3993 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 3994 truncateVecElts(N, DAG)); 3995 case Intrinsic::loongarch_lsx_vslli_b: 3996 case Intrinsic::loongarch_lasx_xvslli_b: 3997 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 3998 lowerVectorSplatImm<3>(N, 2, DAG)); 3999 case Intrinsic::loongarch_lsx_vslli_h: 4000 case Intrinsic::loongarch_lasx_xvslli_h: 4001 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 4002 lowerVectorSplatImm<4>(N, 2, DAG)); 4003 case Intrinsic::loongarch_lsx_vslli_w: 4004 case Intrinsic::loongarch_lasx_xvslli_w: 4005 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 4006 lowerVectorSplatImm<5>(N, 2, DAG)); 4007 case Intrinsic::loongarch_lsx_vslli_d: 4008 case Intrinsic::loongarch_lasx_xvslli_d: 4009 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 4010 lowerVectorSplatImm<6>(N, 2, DAG)); 4011 case Intrinsic::loongarch_lsx_vsrl_b: 4012 case Intrinsic::loongarch_lsx_vsrl_h: 4013 case Intrinsic::loongarch_lsx_vsrl_w: 4014 case Intrinsic::loongarch_lsx_vsrl_d: 4015 case Intrinsic::loongarch_lasx_xvsrl_b: 4016 case Intrinsic::loongarch_lasx_xvsrl_h: 4017 case Intrinsic::loongarch_lasx_xvsrl_w: 4018 case Intrinsic::loongarch_lasx_xvsrl_d: 4019 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 4020 truncateVecElts(N, DAG)); 4021 case Intrinsic::loongarch_lsx_vsrli_b: 4022 case Intrinsic::loongarch_lasx_xvsrli_b: 4023 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 4024 lowerVectorSplatImm<3>(N, 2, DAG)); 4025 case Intrinsic::loongarch_lsx_vsrli_h: 4026 case Intrinsic::loongarch_lasx_xvsrli_h: 4027 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 4028 lowerVectorSplatImm<4>(N, 2, DAG)); 4029 case Intrinsic::loongarch_lsx_vsrli_w: 4030 case Intrinsic::loongarch_lasx_xvsrli_w: 4031 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 4032 lowerVectorSplatImm<5>(N, 2, DAG)); 4033 case Intrinsic::loongarch_lsx_vsrli_d: 4034 case Intrinsic::loongarch_lasx_xvsrli_d: 4035 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 4036 lowerVectorSplatImm<6>(N, 2, DAG)); 4037 case Intrinsic::loongarch_lsx_vsra_b: 4038 case Intrinsic::loongarch_lsx_vsra_h: 4039 case Intrinsic::loongarch_lsx_vsra_w: 4040 case Intrinsic::loongarch_lsx_vsra_d: 4041 case Intrinsic::loongarch_lasx_xvsra_b: 4042 case Intrinsic::loongarch_lasx_xvsra_h: 4043 case Intrinsic::loongarch_lasx_xvsra_w: 4044 case Intrinsic::loongarch_lasx_xvsra_d: 4045 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 4046 truncateVecElts(N, DAG)); 4047 case Intrinsic::loongarch_lsx_vsrai_b: 4048 case Intrinsic::loongarch_lasx_xvsrai_b: 4049 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 4050 lowerVectorSplatImm<3>(N, 2, DAG)); 4051 case Intrinsic::loongarch_lsx_vsrai_h: 4052 case Intrinsic::loongarch_lasx_xvsrai_h: 4053 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 4054 lowerVectorSplatImm<4>(N, 2, DAG)); 4055 case Intrinsic::loongarch_lsx_vsrai_w: 4056 case Intrinsic::loongarch_lasx_xvsrai_w: 4057 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 4058 lowerVectorSplatImm<5>(N, 2, DAG)); 4059 case Intrinsic::loongarch_lsx_vsrai_d: 4060 case Intrinsic::loongarch_lasx_xvsrai_d: 4061 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 4062 lowerVectorSplatImm<6>(N, 2, DAG)); 4063 case Intrinsic::loongarch_lsx_vclz_b: 4064 case Intrinsic::loongarch_lsx_vclz_h: 4065 case Intrinsic::loongarch_lsx_vclz_w: 4066 case Intrinsic::loongarch_lsx_vclz_d: 4067 case Intrinsic::loongarch_lasx_xvclz_b: 4068 case Intrinsic::loongarch_lasx_xvclz_h: 4069 case Intrinsic::loongarch_lasx_xvclz_w: 4070 case Intrinsic::loongarch_lasx_xvclz_d: 4071 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1)); 4072 case Intrinsic::loongarch_lsx_vpcnt_b: 4073 case Intrinsic::loongarch_lsx_vpcnt_h: 4074 case Intrinsic::loongarch_lsx_vpcnt_w: 4075 case Intrinsic::loongarch_lsx_vpcnt_d: 4076 case Intrinsic::loongarch_lasx_xvpcnt_b: 4077 case Intrinsic::loongarch_lasx_xvpcnt_h: 4078 case Intrinsic::loongarch_lasx_xvpcnt_w: 4079 case Intrinsic::loongarch_lasx_xvpcnt_d: 4080 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); 4081 case Intrinsic::loongarch_lsx_vbitclr_b: 4082 case Intrinsic::loongarch_lsx_vbitclr_h: 4083 case Intrinsic::loongarch_lsx_vbitclr_w: 4084 case Intrinsic::loongarch_lsx_vbitclr_d: 4085 case Intrinsic::loongarch_lasx_xvbitclr_b: 4086 case Intrinsic::loongarch_lasx_xvbitclr_h: 4087 case Intrinsic::loongarch_lasx_xvbitclr_w: 4088 case Intrinsic::loongarch_lasx_xvbitclr_d: 4089 return lowerVectorBitClear(N, DAG); 4090 case Intrinsic::loongarch_lsx_vbitclri_b: 4091 case Intrinsic::loongarch_lasx_xvbitclri_b: 4092 return lowerVectorBitClearImm<3>(N, DAG); 4093 case Intrinsic::loongarch_lsx_vbitclri_h: 4094 case Intrinsic::loongarch_lasx_xvbitclri_h: 4095 return lowerVectorBitClearImm<4>(N, DAG); 4096 case Intrinsic::loongarch_lsx_vbitclri_w: 4097 case Intrinsic::loongarch_lasx_xvbitclri_w: 4098 return lowerVectorBitClearImm<5>(N, DAG); 4099 case Intrinsic::loongarch_lsx_vbitclri_d: 4100 case Intrinsic::loongarch_lasx_xvbitclri_d: 4101 return lowerVectorBitClearImm<6>(N, DAG); 4102 case Intrinsic::loongarch_lsx_vbitset_b: 4103 case Intrinsic::loongarch_lsx_vbitset_h: 4104 case Intrinsic::loongarch_lsx_vbitset_w: 4105 case Intrinsic::loongarch_lsx_vbitset_d: 4106 case Intrinsic::loongarch_lasx_xvbitset_b: 4107 case Intrinsic::loongarch_lasx_xvbitset_h: 4108 case Intrinsic::loongarch_lasx_xvbitset_w: 4109 case Intrinsic::loongarch_lasx_xvbitset_d: { 4110 EVT VecTy = N->getValueType(0); 4111 SDValue One = DAG.getConstant(1, DL, VecTy); 4112 return DAG.getNode( 4113 ISD::OR, DL, VecTy, N->getOperand(1), 4114 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); 4115 } 4116 case Intrinsic::loongarch_lsx_vbitseti_b: 4117 case Intrinsic::loongarch_lasx_xvbitseti_b: 4118 return lowerVectorBitSetImm<3>(N, DAG); 4119 case Intrinsic::loongarch_lsx_vbitseti_h: 4120 case Intrinsic::loongarch_lasx_xvbitseti_h: 4121 return lowerVectorBitSetImm<4>(N, DAG); 4122 case Intrinsic::loongarch_lsx_vbitseti_w: 4123 case Intrinsic::loongarch_lasx_xvbitseti_w: 4124 return lowerVectorBitSetImm<5>(N, DAG); 4125 case Intrinsic::loongarch_lsx_vbitseti_d: 4126 case Intrinsic::loongarch_lasx_xvbitseti_d: 4127 return lowerVectorBitSetImm<6>(N, DAG); 4128 case Intrinsic::loongarch_lsx_vbitrev_b: 4129 case Intrinsic::loongarch_lsx_vbitrev_h: 4130 case Intrinsic::loongarch_lsx_vbitrev_w: 4131 case Intrinsic::loongarch_lsx_vbitrev_d: 4132 case Intrinsic::loongarch_lasx_xvbitrev_b: 4133 case Intrinsic::loongarch_lasx_xvbitrev_h: 4134 case Intrinsic::loongarch_lasx_xvbitrev_w: 4135 case Intrinsic::loongarch_lasx_xvbitrev_d: { 4136 EVT VecTy = N->getValueType(0); 4137 SDValue One = DAG.getConstant(1, DL, VecTy); 4138 return DAG.getNode( 4139 ISD::XOR, DL, VecTy, N->getOperand(1), 4140 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); 4141 } 4142 case Intrinsic::loongarch_lsx_vbitrevi_b: 4143 case Intrinsic::loongarch_lasx_xvbitrevi_b: 4144 return lowerVectorBitRevImm<3>(N, DAG); 4145 case Intrinsic::loongarch_lsx_vbitrevi_h: 4146 case Intrinsic::loongarch_lasx_xvbitrevi_h: 4147 return lowerVectorBitRevImm<4>(N, DAG); 4148 case Intrinsic::loongarch_lsx_vbitrevi_w: 4149 case Intrinsic::loongarch_lasx_xvbitrevi_w: 4150 return lowerVectorBitRevImm<5>(N, DAG); 4151 case Intrinsic::loongarch_lsx_vbitrevi_d: 4152 case Intrinsic::loongarch_lasx_xvbitrevi_d: 4153 return lowerVectorBitRevImm<6>(N, DAG); 4154 case Intrinsic::loongarch_lsx_vfadd_s: 4155 case Intrinsic::loongarch_lsx_vfadd_d: 4156 case Intrinsic::loongarch_lasx_xvfadd_s: 4157 case Intrinsic::loongarch_lasx_xvfadd_d: 4158 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), 4159 N->getOperand(2)); 4160 case Intrinsic::loongarch_lsx_vfsub_s: 4161 case Intrinsic::loongarch_lsx_vfsub_d: 4162 case Intrinsic::loongarch_lasx_xvfsub_s: 4163 case Intrinsic::loongarch_lasx_xvfsub_d: 4164 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), 4165 N->getOperand(2)); 4166 case Intrinsic::loongarch_lsx_vfmul_s: 4167 case Intrinsic::loongarch_lsx_vfmul_d: 4168 case Intrinsic::loongarch_lasx_xvfmul_s: 4169 case Intrinsic::loongarch_lasx_xvfmul_d: 4170 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), 4171 N->getOperand(2)); 4172 case Intrinsic::loongarch_lsx_vfdiv_s: 4173 case Intrinsic::loongarch_lsx_vfdiv_d: 4174 case Intrinsic::loongarch_lasx_xvfdiv_s: 4175 case Intrinsic::loongarch_lasx_xvfdiv_d: 4176 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), 4177 N->getOperand(2)); 4178 case Intrinsic::loongarch_lsx_vfmadd_s: 4179 case Intrinsic::loongarch_lsx_vfmadd_d: 4180 case Intrinsic::loongarch_lasx_xvfmadd_s: 4181 case Intrinsic::loongarch_lasx_xvfmadd_d: 4182 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), 4183 N->getOperand(2), N->getOperand(3)); 4184 case Intrinsic::loongarch_lsx_vinsgr2vr_b: 4185 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 4186 N->getOperand(1), N->getOperand(2), 4187 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); 4188 case Intrinsic::loongarch_lsx_vinsgr2vr_h: 4189 case Intrinsic::loongarch_lasx_xvinsgr2vr_w: 4190 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 4191 N->getOperand(1), N->getOperand(2), 4192 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); 4193 case Intrinsic::loongarch_lsx_vinsgr2vr_w: 4194 case Intrinsic::loongarch_lasx_xvinsgr2vr_d: 4195 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 4196 N->getOperand(1), N->getOperand(2), 4197 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); 4198 case Intrinsic::loongarch_lsx_vinsgr2vr_d: 4199 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 4200 N->getOperand(1), N->getOperand(2), 4201 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget)); 4202 case Intrinsic::loongarch_lsx_vreplgr2vr_b: 4203 case Intrinsic::loongarch_lsx_vreplgr2vr_h: 4204 case Intrinsic::loongarch_lsx_vreplgr2vr_w: 4205 case Intrinsic::loongarch_lsx_vreplgr2vr_d: 4206 case Intrinsic::loongarch_lasx_xvreplgr2vr_b: 4207 case Intrinsic::loongarch_lasx_xvreplgr2vr_h: 4208 case Intrinsic::loongarch_lasx_xvreplgr2vr_w: 4209 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: { 4210 EVT ResTy = N->getValueType(0); 4211 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1)); 4212 return DAG.getBuildVector(ResTy, DL, Ops); 4213 } 4214 case Intrinsic::loongarch_lsx_vreplve_b: 4215 case Intrinsic::loongarch_lsx_vreplve_h: 4216 case Intrinsic::loongarch_lsx_vreplve_w: 4217 case Intrinsic::loongarch_lsx_vreplve_d: 4218 case Intrinsic::loongarch_lasx_xvreplve_b: 4219 case Intrinsic::loongarch_lasx_xvreplve_h: 4220 case Intrinsic::loongarch_lasx_xvreplve_w: 4221 case Intrinsic::loongarch_lasx_xvreplve_d: 4222 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), 4223 N->getOperand(1), 4224 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), 4225 N->getOperand(2))); 4226 } 4227 return SDValue(); 4228 } 4229 4230 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, 4231 DAGCombinerInfo &DCI) const { 4232 SelectionDAG &DAG = DCI.DAG; 4233 switch (N->getOpcode()) { 4234 default: 4235 break; 4236 case ISD::AND: 4237 return performANDCombine(N, DAG, DCI, Subtarget); 4238 case ISD::OR: 4239 return performORCombine(N, DAG, DCI, Subtarget); 4240 case ISD::SETCC: 4241 return performSETCCCombine(N, DAG, DCI, Subtarget); 4242 case ISD::SRL: 4243 return performSRLCombine(N, DAG, DCI, Subtarget); 4244 case LoongArchISD::BITREV_W: 4245 return performBITREV_WCombine(N, DAG, DCI, Subtarget); 4246 case ISD::INTRINSIC_WO_CHAIN: 4247 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); 4248 } 4249 return SDValue(); 4250 } 4251 4252 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, 4253 MachineBasicBlock *MBB) { 4254 if (!ZeroDivCheck) 4255 return MBB; 4256 4257 // Build instructions: 4258 // MBB: 4259 // div(or mod) $dst, $dividend, $divisor 4260 // bnez $divisor, SinkMBB 4261 // BreakMBB: 4262 // break 7 // BRK_DIVZERO 4263 // SinkMBB: 4264 // fallthrough 4265 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 4266 MachineFunction::iterator It = ++MBB->getIterator(); 4267 MachineFunction *MF = MBB->getParent(); 4268 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB); 4269 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); 4270 MF->insert(It, BreakMBB); 4271 MF->insert(It, SinkMBB); 4272 4273 // Transfer the remainder of MBB and its successor edges to SinkMBB. 4274 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end()); 4275 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 4276 4277 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); 4278 DebugLoc DL = MI.getDebugLoc(); 4279 MachineOperand &Divisor = MI.getOperand(2); 4280 Register DivisorReg = Divisor.getReg(); 4281 4282 // MBB: 4283 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ)) 4284 .addReg(DivisorReg, getKillRegState(Divisor.isKill())) 4285 .addMBB(SinkMBB); 4286 MBB->addSuccessor(BreakMBB); 4287 MBB->addSuccessor(SinkMBB); 4288 4289 // BreakMBB: 4290 // See linux header file arch/loongarch/include/uapi/asm/break.h for the 4291 // definition of BRK_DIVZERO. 4292 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/); 4293 BreakMBB->addSuccessor(SinkMBB); 4294 4295 // Clear Divisor's kill flag. 4296 Divisor.setIsKill(false); 4297 4298 return SinkMBB; 4299 } 4300 4301 static MachineBasicBlock * 4302 emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, 4303 const LoongArchSubtarget &Subtarget) { 4304 unsigned CondOpc; 4305 switch (MI.getOpcode()) { 4306 default: 4307 llvm_unreachable("Unexpected opcode"); 4308 case LoongArch::PseudoVBZ: 4309 CondOpc = LoongArch::VSETEQZ_V; 4310 break; 4311 case LoongArch::PseudoVBZ_B: 4312 CondOpc = LoongArch::VSETANYEQZ_B; 4313 break; 4314 case LoongArch::PseudoVBZ_H: 4315 CondOpc = LoongArch::VSETANYEQZ_H; 4316 break; 4317 case LoongArch::PseudoVBZ_W: 4318 CondOpc = LoongArch::VSETANYEQZ_W; 4319 break; 4320 case LoongArch::PseudoVBZ_D: 4321 CondOpc = LoongArch::VSETANYEQZ_D; 4322 break; 4323 case LoongArch::PseudoVBNZ: 4324 CondOpc = LoongArch::VSETNEZ_V; 4325 break; 4326 case LoongArch::PseudoVBNZ_B: 4327 CondOpc = LoongArch::VSETALLNEZ_B; 4328 break; 4329 case LoongArch::PseudoVBNZ_H: 4330 CondOpc = LoongArch::VSETALLNEZ_H; 4331 break; 4332 case LoongArch::PseudoVBNZ_W: 4333 CondOpc = LoongArch::VSETALLNEZ_W; 4334 break; 4335 case LoongArch::PseudoVBNZ_D: 4336 CondOpc = LoongArch::VSETALLNEZ_D; 4337 break; 4338 case LoongArch::PseudoXVBZ: 4339 CondOpc = LoongArch::XVSETEQZ_V; 4340 break; 4341 case LoongArch::PseudoXVBZ_B: 4342 CondOpc = LoongArch::XVSETANYEQZ_B; 4343 break; 4344 case LoongArch::PseudoXVBZ_H: 4345 CondOpc = LoongArch::XVSETANYEQZ_H; 4346 break; 4347 case LoongArch::PseudoXVBZ_W: 4348 CondOpc = LoongArch::XVSETANYEQZ_W; 4349 break; 4350 case LoongArch::PseudoXVBZ_D: 4351 CondOpc = LoongArch::XVSETANYEQZ_D; 4352 break; 4353 case LoongArch::PseudoXVBNZ: 4354 CondOpc = LoongArch::XVSETNEZ_V; 4355 break; 4356 case LoongArch::PseudoXVBNZ_B: 4357 CondOpc = LoongArch::XVSETALLNEZ_B; 4358 break; 4359 case LoongArch::PseudoXVBNZ_H: 4360 CondOpc = LoongArch::XVSETALLNEZ_H; 4361 break; 4362 case LoongArch::PseudoXVBNZ_W: 4363 CondOpc = LoongArch::XVSETALLNEZ_W; 4364 break; 4365 case LoongArch::PseudoXVBNZ_D: 4366 CondOpc = LoongArch::XVSETALLNEZ_D; 4367 break; 4368 } 4369 4370 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 4371 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4372 DebugLoc DL = MI.getDebugLoc(); 4373 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 4374 MachineFunction::iterator It = ++BB->getIterator(); 4375 4376 MachineFunction *F = BB->getParent(); 4377 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB); 4378 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB); 4379 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB); 4380 4381 F->insert(It, FalseBB); 4382 F->insert(It, TrueBB); 4383 F->insert(It, SinkBB); 4384 4385 // Transfer the remainder of MBB and its successor edges to Sink. 4386 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end()); 4387 SinkBB->transferSuccessorsAndUpdatePHIs(BB); 4388 4389 // Insert the real instruction to BB. 4390 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass); 4391 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg()); 4392 4393 // Insert branch. 4394 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB); 4395 BB->addSuccessor(FalseBB); 4396 BB->addSuccessor(TrueBB); 4397 4398 // FalseBB. 4399 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); 4400 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1) 4401 .addReg(LoongArch::R0) 4402 .addImm(0); 4403 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB); 4404 FalseBB->addSuccessor(SinkBB); 4405 4406 // TrueBB. 4407 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); 4408 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2) 4409 .addReg(LoongArch::R0) 4410 .addImm(1); 4411 TrueBB->addSuccessor(SinkBB); 4412 4413 // SinkBB: merge the results. 4414 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI), 4415 MI.getOperand(0).getReg()) 4416 .addReg(RD1) 4417 .addMBB(FalseBB) 4418 .addReg(RD2) 4419 .addMBB(TrueBB); 4420 4421 // The pseudo instruction is gone now. 4422 MI.eraseFromParent(); 4423 return SinkBB; 4424 } 4425 4426 static MachineBasicBlock * 4427 emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, 4428 const LoongArchSubtarget &Subtarget) { 4429 unsigned InsOp; 4430 unsigned HalfSize; 4431 switch (MI.getOpcode()) { 4432 default: 4433 llvm_unreachable("Unexpected opcode"); 4434 case LoongArch::PseudoXVINSGR2VR_B: 4435 HalfSize = 16; 4436 InsOp = LoongArch::VINSGR2VR_B; 4437 break; 4438 case LoongArch::PseudoXVINSGR2VR_H: 4439 HalfSize = 8; 4440 InsOp = LoongArch::VINSGR2VR_H; 4441 break; 4442 } 4443 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 4444 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; 4445 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; 4446 DebugLoc DL = MI.getDebugLoc(); 4447 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 4448 // XDst = vector_insert XSrc, Elt, Idx 4449 Register XDst = MI.getOperand(0).getReg(); 4450 Register XSrc = MI.getOperand(1).getReg(); 4451 Register Elt = MI.getOperand(2).getReg(); 4452 unsigned Idx = MI.getOperand(3).getImm(); 4453 4454 Register ScratchReg1 = XSrc; 4455 if (Idx >= HalfSize) { 4456 ScratchReg1 = MRI.createVirtualRegister(RC); 4457 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1) 4458 .addReg(XSrc) 4459 .addReg(XSrc) 4460 .addImm(1); 4461 } 4462 4463 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC); 4464 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC); 4465 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1) 4466 .addReg(ScratchReg1, 0, LoongArch::sub_128); 4467 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2) 4468 .addReg(ScratchSubReg1) 4469 .addReg(Elt) 4470 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx); 4471 4472 Register ScratchReg2 = XDst; 4473 if (Idx >= HalfSize) 4474 ScratchReg2 = MRI.createVirtualRegister(RC); 4475 4476 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2) 4477 .addImm(0) 4478 .addReg(ScratchSubReg2) 4479 .addImm(LoongArch::sub_128); 4480 4481 if (Idx >= HalfSize) 4482 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst) 4483 .addReg(XSrc) 4484 .addReg(ScratchReg2) 4485 .addImm(2); 4486 4487 MI.eraseFromParent(); 4488 return BB; 4489 } 4490 4491 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( 4492 MachineInstr &MI, MachineBasicBlock *BB) const { 4493 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 4494 DebugLoc DL = MI.getDebugLoc(); 4495 4496 switch (MI.getOpcode()) { 4497 default: 4498 llvm_unreachable("Unexpected instr type to insert"); 4499 case LoongArch::DIV_W: 4500 case LoongArch::DIV_WU: 4501 case LoongArch::MOD_W: 4502 case LoongArch::MOD_WU: 4503 case LoongArch::DIV_D: 4504 case LoongArch::DIV_DU: 4505 case LoongArch::MOD_D: 4506 case LoongArch::MOD_DU: 4507 return insertDivByZeroTrap(MI, BB); 4508 break; 4509 case LoongArch::WRFCSR: { 4510 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR), 4511 LoongArch::FCSR0 + MI.getOperand(0).getImm()) 4512 .addReg(MI.getOperand(1).getReg()); 4513 MI.eraseFromParent(); 4514 return BB; 4515 } 4516 case LoongArch::RDFCSR: { 4517 MachineInstr *ReadFCSR = 4518 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR), 4519 MI.getOperand(0).getReg()) 4520 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm()); 4521 ReadFCSR->getOperand(1).setIsUndef(); 4522 MI.eraseFromParent(); 4523 return BB; 4524 } 4525 case LoongArch::PseudoVBZ: 4526 case LoongArch::PseudoVBZ_B: 4527 case LoongArch::PseudoVBZ_H: 4528 case LoongArch::PseudoVBZ_W: 4529 case LoongArch::PseudoVBZ_D: 4530 case LoongArch::PseudoVBNZ: 4531 case LoongArch::PseudoVBNZ_B: 4532 case LoongArch::PseudoVBNZ_H: 4533 case LoongArch::PseudoVBNZ_W: 4534 case LoongArch::PseudoVBNZ_D: 4535 case LoongArch::PseudoXVBZ: 4536 case LoongArch::PseudoXVBZ_B: 4537 case LoongArch::PseudoXVBZ_H: 4538 case LoongArch::PseudoXVBZ_W: 4539 case LoongArch::PseudoXVBZ_D: 4540 case LoongArch::PseudoXVBNZ: 4541 case LoongArch::PseudoXVBNZ_B: 4542 case LoongArch::PseudoXVBNZ_H: 4543 case LoongArch::PseudoXVBNZ_W: 4544 case LoongArch::PseudoXVBNZ_D: 4545 return emitVecCondBranchPseudo(MI, BB, Subtarget); 4546 case LoongArch::PseudoXVINSGR2VR_B: 4547 case LoongArch::PseudoXVINSGR2VR_H: 4548 return emitPseudoXVINSGR2VR(MI, BB, Subtarget); 4549 } 4550 } 4551 4552 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( 4553 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 4554 unsigned *Fast) const { 4555 if (!Subtarget.hasUAL()) 4556 return false; 4557 4558 // TODO: set reasonable speed number. 4559 if (Fast) 4560 *Fast = 1; 4561 return true; 4562 } 4563 4564 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { 4565 switch ((LoongArchISD::NodeType)Opcode) { 4566 case LoongArchISD::FIRST_NUMBER: 4567 break; 4568 4569 #define NODE_NAME_CASE(node) \ 4570 case LoongArchISD::node: \ 4571 return "LoongArchISD::" #node; 4572 4573 // TODO: Add more target-dependent nodes later. 4574 NODE_NAME_CASE(CALL) 4575 NODE_NAME_CASE(CALL_MEDIUM) 4576 NODE_NAME_CASE(CALL_LARGE) 4577 NODE_NAME_CASE(RET) 4578 NODE_NAME_CASE(TAIL) 4579 NODE_NAME_CASE(TAIL_MEDIUM) 4580 NODE_NAME_CASE(TAIL_LARGE) 4581 NODE_NAME_CASE(SLL_W) 4582 NODE_NAME_CASE(SRA_W) 4583 NODE_NAME_CASE(SRL_W) 4584 NODE_NAME_CASE(BSTRINS) 4585 NODE_NAME_CASE(BSTRPICK) 4586 NODE_NAME_CASE(MOVGR2FR_W_LA64) 4587 NODE_NAME_CASE(MOVFR2GR_S_LA64) 4588 NODE_NAME_CASE(FTINT) 4589 NODE_NAME_CASE(REVB_2H) 4590 NODE_NAME_CASE(REVB_2W) 4591 NODE_NAME_CASE(BITREV_4B) 4592 NODE_NAME_CASE(BITREV_W) 4593 NODE_NAME_CASE(ROTR_W) 4594 NODE_NAME_CASE(ROTL_W) 4595 NODE_NAME_CASE(DIV_WU) 4596 NODE_NAME_CASE(MOD_WU) 4597 NODE_NAME_CASE(CLZ_W) 4598 NODE_NAME_CASE(CTZ_W) 4599 NODE_NAME_CASE(DBAR) 4600 NODE_NAME_CASE(IBAR) 4601 NODE_NAME_CASE(BREAK) 4602 NODE_NAME_CASE(SYSCALL) 4603 NODE_NAME_CASE(CRC_W_B_W) 4604 NODE_NAME_CASE(CRC_W_H_W) 4605 NODE_NAME_CASE(CRC_W_W_W) 4606 NODE_NAME_CASE(CRC_W_D_W) 4607 NODE_NAME_CASE(CRCC_W_B_W) 4608 NODE_NAME_CASE(CRCC_W_H_W) 4609 NODE_NAME_CASE(CRCC_W_W_W) 4610 NODE_NAME_CASE(CRCC_W_D_W) 4611 NODE_NAME_CASE(CSRRD) 4612 NODE_NAME_CASE(CSRWR) 4613 NODE_NAME_CASE(CSRXCHG) 4614 NODE_NAME_CASE(IOCSRRD_B) 4615 NODE_NAME_CASE(IOCSRRD_H) 4616 NODE_NAME_CASE(IOCSRRD_W) 4617 NODE_NAME_CASE(IOCSRRD_D) 4618 NODE_NAME_CASE(IOCSRWR_B) 4619 NODE_NAME_CASE(IOCSRWR_H) 4620 NODE_NAME_CASE(IOCSRWR_W) 4621 NODE_NAME_CASE(IOCSRWR_D) 4622 NODE_NAME_CASE(CPUCFG) 4623 NODE_NAME_CASE(MOVGR2FCSR) 4624 NODE_NAME_CASE(MOVFCSR2GR) 4625 NODE_NAME_CASE(CACOP_D) 4626 NODE_NAME_CASE(CACOP_W) 4627 NODE_NAME_CASE(VSHUF) 4628 NODE_NAME_CASE(VPICKEV) 4629 NODE_NAME_CASE(VPICKOD) 4630 NODE_NAME_CASE(VPACKEV) 4631 NODE_NAME_CASE(VPACKOD) 4632 NODE_NAME_CASE(VILVL) 4633 NODE_NAME_CASE(VILVH) 4634 NODE_NAME_CASE(VSHUF4I) 4635 NODE_NAME_CASE(VREPLVEI) 4636 NODE_NAME_CASE(XVPERMI) 4637 NODE_NAME_CASE(VPICK_SEXT_ELT) 4638 NODE_NAME_CASE(VPICK_ZEXT_ELT) 4639 NODE_NAME_CASE(VREPLVE) 4640 NODE_NAME_CASE(VALL_ZERO) 4641 NODE_NAME_CASE(VANY_ZERO) 4642 NODE_NAME_CASE(VALL_NONZERO) 4643 NODE_NAME_CASE(VANY_NONZERO) 4644 } 4645 #undef NODE_NAME_CASE 4646 return nullptr; 4647 } 4648 4649 //===----------------------------------------------------------------------===// 4650 // Calling Convention Implementation 4651 //===----------------------------------------------------------------------===// 4652 4653 // Eight general-purpose registers a0-a7 used for passing integer arguments, 4654 // with a0-a1 reused to return values. Generally, the GPRs are used to pass 4655 // fixed-point arguments, and floating-point arguments when no FPR is available 4656 // or with soft float ABI. 4657 const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, 4658 LoongArch::R7, LoongArch::R8, LoongArch::R9, 4659 LoongArch::R10, LoongArch::R11}; 4660 // Eight floating-point registers fa0-fa7 used for passing floating-point 4661 // arguments, and fa0-fa1 are also used to return values. 4662 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, 4663 LoongArch::F3, LoongArch::F4, LoongArch::F5, 4664 LoongArch::F6, LoongArch::F7}; 4665 // FPR32 and FPR64 alias each other. 4666 const MCPhysReg ArgFPR64s[] = { 4667 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, 4668 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; 4669 4670 const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, 4671 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, 4672 LoongArch::VR6, LoongArch::VR7}; 4673 4674 const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, 4675 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, 4676 LoongArch::XR6, LoongArch::XR7}; 4677 4678 // Pass a 2*GRLen argument that has been split into two GRLen values through 4679 // registers or the stack as necessary. 4680 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, 4681 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, 4682 unsigned ValNo2, MVT ValVT2, MVT LocVT2, 4683 ISD::ArgFlagsTy ArgFlags2) { 4684 unsigned GRLenInBytes = GRLen / 8; 4685 if (Register Reg = State.AllocateReg(ArgGPRs)) { 4686 // At least one half can be passed via register. 4687 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 4688 VA1.getLocVT(), CCValAssign::Full)); 4689 } else { 4690 // Both halves must be passed on the stack, with proper alignment. 4691 Align StackAlign = 4692 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 4693 State.addLoc( 4694 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 4695 State.AllocateStack(GRLenInBytes, StackAlign), 4696 VA1.getLocVT(), CCValAssign::Full)); 4697 State.addLoc(CCValAssign::getMem( 4698 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), 4699 LocVT2, CCValAssign::Full)); 4700 return false; 4701 } 4702 if (Register Reg = State.AllocateReg(ArgGPRs)) { 4703 // The second half can also be passed via register. 4704 State.addLoc( 4705 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 4706 } else { 4707 // The second half is passed via the stack, without additional alignment. 4708 State.addLoc(CCValAssign::getMem( 4709 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), 4710 LocVT2, CCValAssign::Full)); 4711 } 4712 return false; 4713 } 4714 4715 // Implements the LoongArch calling convention. Returns true upon failure. 4716 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, 4717 unsigned ValNo, MVT ValVT, 4718 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 4719 CCState &State, bool IsFixed, bool IsRet, 4720 Type *OrigTy) { 4721 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits(); 4722 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen"); 4723 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64; 4724 MVT LocVT = ValVT; 4725 4726 // Any return value split into more than two values can't be returned 4727 // directly. 4728 if (IsRet && ValNo > 1) 4729 return true; 4730 4731 // If passing a variadic argument, or if no FPR is available. 4732 bool UseGPRForFloat = true; 4733 4734 switch (ABI) { 4735 default: 4736 llvm_unreachable("Unexpected ABI"); 4737 break; 4738 case LoongArchABI::ABI_ILP32F: 4739 case LoongArchABI::ABI_LP64F: 4740 case LoongArchABI::ABI_ILP32D: 4741 case LoongArchABI::ABI_LP64D: 4742 UseGPRForFloat = !IsFixed; 4743 break; 4744 case LoongArchABI::ABI_ILP32S: 4745 case LoongArchABI::ABI_LP64S: 4746 break; 4747 } 4748 4749 // FPR32 and FPR64 alias each other. 4750 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) 4751 UseGPRForFloat = true; 4752 4753 if (UseGPRForFloat && ValVT == MVT::f32) { 4754 LocVT = GRLenVT; 4755 LocInfo = CCValAssign::BCvt; 4756 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) { 4757 LocVT = MVT::i64; 4758 LocInfo = CCValAssign::BCvt; 4759 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) { 4760 // TODO: Handle passing f64 on LA32 with D feature. 4761 report_fatal_error("Passing f64 with GPR on LA32 is undefined"); 4762 } 4763 4764 // If this is a variadic argument, the LoongArch calling convention requires 4765 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8 4766 // byte alignment. An aligned register should be used regardless of whether 4767 // the original argument was split during legalisation or not. The argument 4768 // will not be passed by registers if the original type is larger than 4769 // 2*GRLen, so the register alignment rule does not apply. 4770 unsigned TwoGRLenInBytes = (2 * GRLen) / 8; 4771 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes && 4772 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) { 4773 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 4774 // Skip 'odd' register if necessary. 4775 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) 4776 State.AllocateReg(ArgGPRs); 4777 } 4778 4779 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 4780 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 4781 State.getPendingArgFlags(); 4782 4783 assert(PendingLocs.size() == PendingArgFlags.size() && 4784 "PendingLocs and PendingArgFlags out of sync"); 4785 4786 // Split arguments might be passed indirectly, so keep track of the pending 4787 // values. 4788 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 4789 LocVT = GRLenVT; 4790 LocInfo = CCValAssign::Indirect; 4791 PendingLocs.push_back( 4792 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 4793 PendingArgFlags.push_back(ArgFlags); 4794 if (!ArgFlags.isSplitEnd()) { 4795 return false; 4796 } 4797 } 4798 4799 // If the split argument only had two elements, it should be passed directly 4800 // in registers or on the stack. 4801 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && 4802 PendingLocs.size() <= 2) { 4803 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 4804 // Apply the normal calling convention rules to the first half of the 4805 // split argument. 4806 CCValAssign VA = PendingLocs[0]; 4807 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 4808 PendingLocs.clear(); 4809 PendingArgFlags.clear(); 4810 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT, 4811 ArgFlags); 4812 } 4813 4814 // Allocate to a register if possible, or else a stack slot. 4815 Register Reg; 4816 unsigned StoreSizeBytes = GRLen / 8; 4817 Align StackAlign = Align(GRLen / 8); 4818 4819 if (ValVT == MVT::f32 && !UseGPRForFloat) 4820 Reg = State.AllocateReg(ArgFPR32s); 4821 else if (ValVT == MVT::f64 && !UseGPRForFloat) 4822 Reg = State.AllocateReg(ArgFPR64s); 4823 else if (ValVT.is128BitVector()) 4824 Reg = State.AllocateReg(ArgVRs); 4825 else if (ValVT.is256BitVector()) 4826 Reg = State.AllocateReg(ArgXRs); 4827 else 4828 Reg = State.AllocateReg(ArgGPRs); 4829 4830 unsigned StackOffset = 4831 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); 4832 4833 // If we reach this point and PendingLocs is non-empty, we must be at the 4834 // end of a split argument that must be passed indirectly. 4835 if (!PendingLocs.empty()) { 4836 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 4837 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 4838 for (auto &It : PendingLocs) { 4839 if (Reg) 4840 It.convertToReg(Reg); 4841 else 4842 It.convertToMem(StackOffset); 4843 State.addLoc(It); 4844 } 4845 PendingLocs.clear(); 4846 PendingArgFlags.clear(); 4847 return false; 4848 } 4849 assert((!UseGPRForFloat || LocVT == GRLenVT) && 4850 "Expected an GRLenVT at this stage"); 4851 4852 if (Reg) { 4853 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4854 return false; 4855 } 4856 4857 // When a floating-point value is passed on the stack, no bit-cast is needed. 4858 if (ValVT.isFloatingPoint()) { 4859 LocVT = ValVT; 4860 LocInfo = CCValAssign::Full; 4861 } 4862 4863 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 4864 return false; 4865 } 4866 4867 void LoongArchTargetLowering::analyzeInputArgs( 4868 MachineFunction &MF, CCState &CCInfo, 4869 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 4870 LoongArchCCAssignFn Fn) const { 4871 FunctionType *FType = MF.getFunction().getFunctionType(); 4872 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 4873 MVT ArgVT = Ins[i].VT; 4874 Type *ArgTy = nullptr; 4875 if (IsRet) 4876 ArgTy = FType->getReturnType(); 4877 else if (Ins[i].isOrigArg()) 4878 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 4879 LoongArchABI::ABI ABI = 4880 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 4881 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags, 4882 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { 4883 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT 4884 << '\n'); 4885 llvm_unreachable(""); 4886 } 4887 } 4888 } 4889 4890 void LoongArchTargetLowering::analyzeOutputArgs( 4891 MachineFunction &MF, CCState &CCInfo, 4892 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 4893 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const { 4894 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 4895 MVT ArgVT = Outs[i].VT; 4896 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 4897 LoongArchABI::ABI ABI = 4898 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 4899 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags, 4900 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 4901 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT 4902 << "\n"); 4903 llvm_unreachable(""); 4904 } 4905 } 4906 } 4907 4908 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 4909 // values. 4910 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 4911 const CCValAssign &VA, const SDLoc &DL) { 4912 switch (VA.getLocInfo()) { 4913 default: 4914 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 4915 case CCValAssign::Full: 4916 case CCValAssign::Indirect: 4917 break; 4918 case CCValAssign::BCvt: 4919 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 4920 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val); 4921 else 4922 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 4923 break; 4924 } 4925 return Val; 4926 } 4927 4928 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 4929 const CCValAssign &VA, const SDLoc &DL, 4930 const ISD::InputArg &In, 4931 const LoongArchTargetLowering &TLI) { 4932 MachineFunction &MF = DAG.getMachineFunction(); 4933 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4934 EVT LocVT = VA.getLocVT(); 4935 SDValue Val; 4936 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 4937 Register VReg = RegInfo.createVirtualRegister(RC); 4938 RegInfo.addLiveIn(VA.getLocReg(), VReg); 4939 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 4940 4941 // If input is sign extended from 32 bits, note it for the OptW pass. 4942 if (In.isOrigArg()) { 4943 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex()); 4944 if (OrigArg->getType()->isIntegerTy()) { 4945 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); 4946 // An input zero extended from i31 can also be considered sign extended. 4947 if ((BitWidth <= 32 && In.Flags.isSExt()) || 4948 (BitWidth < 32 && In.Flags.isZExt())) { 4949 LoongArchMachineFunctionInfo *LAFI = 4950 MF.getInfo<LoongArchMachineFunctionInfo>(); 4951 LAFI->addSExt32Register(VReg); 4952 } 4953 } 4954 } 4955 4956 return convertLocVTToValVT(DAG, Val, VA, DL); 4957 } 4958 4959 // The caller is responsible for loading the full value if the argument is 4960 // passed with CCValAssign::Indirect. 4961 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 4962 const CCValAssign &VA, const SDLoc &DL) { 4963 MachineFunction &MF = DAG.getMachineFunction(); 4964 MachineFrameInfo &MFI = MF.getFrameInfo(); 4965 EVT ValVT = VA.getValVT(); 4966 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), 4967 /*IsImmutable=*/true); 4968 SDValue FIN = DAG.getFrameIndex( 4969 FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0))); 4970 4971 ISD::LoadExtType ExtType; 4972 switch (VA.getLocInfo()) { 4973 default: 4974 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 4975 case CCValAssign::Full: 4976 case CCValAssign::Indirect: 4977 case CCValAssign::BCvt: 4978 ExtType = ISD::NON_EXTLOAD; 4979 break; 4980 } 4981 return DAG.getExtLoad( 4982 ExtType, DL, VA.getLocVT(), Chain, FIN, 4983 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 4984 } 4985 4986 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 4987 const CCValAssign &VA, const SDLoc &DL) { 4988 EVT LocVT = VA.getLocVT(); 4989 4990 switch (VA.getLocInfo()) { 4991 default: 4992 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 4993 case CCValAssign::Full: 4994 break; 4995 case CCValAssign::BCvt: 4996 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 4997 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val); 4998 else 4999 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 5000 break; 5001 } 5002 return Val; 5003 } 5004 5005 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 5006 CCValAssign::LocInfo LocInfo, 5007 ISD::ArgFlagsTy ArgFlags, CCState &State) { 5008 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 5009 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim 5010 // s0 s1 s2 s3 s4 s5 s6 s7 s8 5011 static const MCPhysReg GPRList[] = { 5012 LoongArch::R23, LoongArch::R24, LoongArch::R25, 5013 LoongArch::R26, LoongArch::R27, LoongArch::R28, 5014 LoongArch::R29, LoongArch::R30, LoongArch::R31}; 5015 if (unsigned Reg = State.AllocateReg(GPRList)) { 5016 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5017 return false; 5018 } 5019 } 5020 5021 if (LocVT == MVT::f32) { 5022 // Pass in STG registers: F1, F2, F3, F4 5023 // fs0,fs1,fs2,fs3 5024 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, 5025 LoongArch::F26, LoongArch::F27}; 5026 if (unsigned Reg = State.AllocateReg(FPR32List)) { 5027 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5028 return false; 5029 } 5030 } 5031 5032 if (LocVT == MVT::f64) { 5033 // Pass in STG registers: D1, D2, D3, D4 5034 // fs4,fs5,fs6,fs7 5035 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, 5036 LoongArch::F30_64, LoongArch::F31_64}; 5037 if (unsigned Reg = State.AllocateReg(FPR64List)) { 5038 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5039 return false; 5040 } 5041 } 5042 5043 report_fatal_error("No registers left in GHC calling convention"); 5044 return true; 5045 } 5046 5047 // Transform physical registers into virtual registers. 5048 SDValue LoongArchTargetLowering::LowerFormalArguments( 5049 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 5050 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 5051 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 5052 5053 MachineFunction &MF = DAG.getMachineFunction(); 5054 5055 switch (CallConv) { 5056 default: 5057 llvm_unreachable("Unsupported calling convention"); 5058 case CallingConv::C: 5059 case CallingConv::Fast: 5060 break; 5061 case CallingConv::GHC: 5062 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) || 5063 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD)) 5064 report_fatal_error( 5065 "GHC calling convention requires the F and D extensions"); 5066 } 5067 5068 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5069 MVT GRLenVT = Subtarget.getGRLenVT(); 5070 unsigned GRLenInBytes = Subtarget.getGRLen() / 8; 5071 // Used with varargs to acumulate store chains. 5072 std::vector<SDValue> OutChains; 5073 5074 // Assign locations to all of the incoming arguments. 5075 SmallVector<CCValAssign> ArgLocs; 5076 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 5077 5078 if (CallConv == CallingConv::GHC) 5079 CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC); 5080 else 5081 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch); 5082 5083 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 5084 CCValAssign &VA = ArgLocs[i]; 5085 SDValue ArgValue; 5086 if (VA.isRegLoc()) 5087 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this); 5088 else 5089 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 5090 if (VA.getLocInfo() == CCValAssign::Indirect) { 5091 // If the original argument was split and passed by reference, we need to 5092 // load all parts of it here (using the same address). 5093 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 5094 MachinePointerInfo())); 5095 unsigned ArgIndex = Ins[i].OrigArgIndex; 5096 unsigned ArgPartOffset = Ins[i].PartOffset; 5097 assert(ArgPartOffset == 0); 5098 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 5099 CCValAssign &PartVA = ArgLocs[i + 1]; 5100 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; 5101 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 5102 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); 5103 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 5104 MachinePointerInfo())); 5105 ++i; 5106 } 5107 continue; 5108 } 5109 InVals.push_back(ArgValue); 5110 } 5111 5112 if (IsVarArg) { 5113 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs); 5114 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 5115 const TargetRegisterClass *RC = &LoongArch::GPRRegClass; 5116 MachineFrameInfo &MFI = MF.getFrameInfo(); 5117 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5118 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>(); 5119 5120 // Offset of the first variable argument from stack pointer, and size of 5121 // the vararg save area. For now, the varargs save area is either zero or 5122 // large enough to hold a0-a7. 5123 int VaArgOffset, VarArgsSaveSize; 5124 5125 // If all registers are allocated, then all varargs must be passed on the 5126 // stack and we don't need to save any argregs. 5127 if (ArgRegs.size() == Idx) { 5128 VaArgOffset = CCInfo.getStackSize(); 5129 VarArgsSaveSize = 0; 5130 } else { 5131 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx); 5132 VaArgOffset = -VarArgsSaveSize; 5133 } 5134 5135 // Record the frame index of the first variable argument 5136 // which is a value necessary to VASTART. 5137 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); 5138 LoongArchFI->setVarArgsFrameIndex(FI); 5139 5140 // If saving an odd number of registers then create an extra stack slot to 5141 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures 5142 // offsets to even-numbered registered remain 2*GRLen-aligned. 5143 if (Idx % 2) { 5144 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes, 5145 true); 5146 VarArgsSaveSize += GRLenInBytes; 5147 } 5148 5149 // Copy the integer registers that may have been used for passing varargs 5150 // to the vararg save area. 5151 for (unsigned I = Idx; I < ArgRegs.size(); 5152 ++I, VaArgOffset += GRLenInBytes) { 5153 const Register Reg = RegInfo.createVirtualRegister(RC); 5154 RegInfo.addLiveIn(ArgRegs[I], Reg); 5155 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT); 5156 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); 5157 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 5158 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 5159 MachinePointerInfo::getFixedStack(MF, FI)); 5160 cast<StoreSDNode>(Store.getNode()) 5161 ->getMemOperand() 5162 ->setValue((Value *)nullptr); 5163 OutChains.push_back(Store); 5164 } 5165 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); 5166 } 5167 5168 // All stores are grouped in one node to allow the matching between 5169 // the size of Ins and InVals. This only happens for vararg functions. 5170 if (!OutChains.empty()) { 5171 OutChains.push_back(Chain); 5172 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 5173 } 5174 5175 return Chain; 5176 } 5177 5178 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 5179 return CI->isTailCall(); 5180 } 5181 5182 // Check if the return value is used as only a return value, as otherwise 5183 // we can't perform a tail-call. 5184 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N, 5185 SDValue &Chain) const { 5186 if (N->getNumValues() != 1) 5187 return false; 5188 if (!N->hasNUsesOfValue(1, 0)) 5189 return false; 5190 5191 SDNode *Copy = *N->use_begin(); 5192 if (Copy->getOpcode() != ISD::CopyToReg) 5193 return false; 5194 5195 // If the ISD::CopyToReg has a glue operand, we conservatively assume it 5196 // isn't safe to perform a tail call. 5197 if (Copy->getGluedNode()) 5198 return false; 5199 5200 // The copy must be used by a LoongArchISD::RET, and nothing else. 5201 bool HasRet = false; 5202 for (SDNode *Node : Copy->uses()) { 5203 if (Node->getOpcode() != LoongArchISD::RET) 5204 return false; 5205 HasRet = true; 5206 } 5207 5208 if (!HasRet) 5209 return false; 5210 5211 Chain = Copy->getOperand(0); 5212 return true; 5213 } 5214 5215 // Check whether the call is eligible for tail call optimization. 5216 bool LoongArchTargetLowering::isEligibleForTailCallOptimization( 5217 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 5218 const SmallVectorImpl<CCValAssign> &ArgLocs) const { 5219 5220 auto CalleeCC = CLI.CallConv; 5221 auto &Outs = CLI.Outs; 5222 auto &Caller = MF.getFunction(); 5223 auto CallerCC = Caller.getCallingConv(); 5224 5225 // Do not tail call opt if the stack is used to pass parameters. 5226 if (CCInfo.getStackSize() != 0) 5227 return false; 5228 5229 // Do not tail call opt if any parameters need to be passed indirectly. 5230 for (auto &VA : ArgLocs) 5231 if (VA.getLocInfo() == CCValAssign::Indirect) 5232 return false; 5233 5234 // Do not tail call opt if either caller or callee uses struct return 5235 // semantics. 5236 auto IsCallerStructRet = Caller.hasStructRetAttr(); 5237 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 5238 if (IsCallerStructRet || IsCalleeStructRet) 5239 return false; 5240 5241 // Do not tail call opt if either the callee or caller has a byval argument. 5242 for (auto &Arg : Outs) 5243 if (Arg.Flags.isByVal()) 5244 return false; 5245 5246 // The callee has to preserve all registers the caller needs to preserve. 5247 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); 5248 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 5249 if (CalleeCC != CallerCC) { 5250 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 5251 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 5252 return false; 5253 } 5254 return true; 5255 } 5256 5257 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { 5258 return DAG.getDataLayout().getPrefTypeAlign( 5259 VT.getTypeForEVT(*DAG.getContext())); 5260 } 5261 5262 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 5263 // and output parameter nodes. 5264 SDValue 5265 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, 5266 SmallVectorImpl<SDValue> &InVals) const { 5267 SelectionDAG &DAG = CLI.DAG; 5268 SDLoc &DL = CLI.DL; 5269 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 5270 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 5271 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 5272 SDValue Chain = CLI.Chain; 5273 SDValue Callee = CLI.Callee; 5274 CallingConv::ID CallConv = CLI.CallConv; 5275 bool IsVarArg = CLI.IsVarArg; 5276 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5277 MVT GRLenVT = Subtarget.getGRLenVT(); 5278 bool &IsTailCall = CLI.IsTailCall; 5279 5280 MachineFunction &MF = DAG.getMachineFunction(); 5281 5282 // Analyze the operands of the call, assigning locations to each operand. 5283 SmallVector<CCValAssign> ArgLocs; 5284 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 5285 5286 if (CallConv == CallingConv::GHC) 5287 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC); 5288 else 5289 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch); 5290 5291 // Check if it's really possible to do a tail call. 5292 if (IsTailCall) 5293 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 5294 5295 if (IsTailCall) 5296 ++NumTailCalls; 5297 else if (CLI.CB && CLI.CB->isMustTailCall()) 5298 report_fatal_error("failed to perform tail call elimination on a call " 5299 "site marked musttail"); 5300 5301 // Get a count of how many bytes are to be pushed on the stack. 5302 unsigned NumBytes = ArgCCInfo.getStackSize(); 5303 5304 // Create local copies for byval args. 5305 SmallVector<SDValue> ByValArgs; 5306 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 5307 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5308 if (!Flags.isByVal()) 5309 continue; 5310 5311 SDValue Arg = OutVals[i]; 5312 unsigned Size = Flags.getByValSize(); 5313 Align Alignment = Flags.getNonZeroByValAlign(); 5314 5315 int FI = 5316 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 5317 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 5318 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT); 5319 5320 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 5321 /*IsVolatile=*/false, 5322 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt, 5323 MachinePointerInfo(), MachinePointerInfo()); 5324 ByValArgs.push_back(FIPtr); 5325 } 5326 5327 if (!IsTailCall) 5328 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 5329 5330 // Copy argument values to their designated locations. 5331 SmallVector<std::pair<Register, SDValue>> RegsToPass; 5332 SmallVector<SDValue> MemOpChains; 5333 SDValue StackPtr; 5334 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 5335 CCValAssign &VA = ArgLocs[i]; 5336 SDValue ArgValue = OutVals[i]; 5337 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5338 5339 // Promote the value if needed. 5340 // For now, only handle fully promoted and indirect arguments. 5341 if (VA.getLocInfo() == CCValAssign::Indirect) { 5342 // Store the argument in a stack slot and pass its address. 5343 Align StackAlign = 5344 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), 5345 getPrefTypeAlign(ArgValue.getValueType(), DAG)); 5346 TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); 5347 // If the original argument was split and passed by reference, we need to 5348 // store the required parts of it here (and pass just one address). 5349 unsigned ArgIndex = Outs[i].OrigArgIndex; 5350 unsigned ArgPartOffset = Outs[i].PartOffset; 5351 assert(ArgPartOffset == 0); 5352 // Calculate the total size to store. We don't have access to what we're 5353 // actually storing other than performing the loop and collecting the 5354 // info. 5355 SmallVector<std::pair<SDValue, SDValue>> Parts; 5356 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 5357 SDValue PartValue = OutVals[i + 1]; 5358 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; 5359 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 5360 EVT PartVT = PartValue.getValueType(); 5361 5362 StoredSize += PartVT.getStoreSize(); 5363 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); 5364 Parts.push_back(std::make_pair(PartValue, Offset)); 5365 ++i; 5366 } 5367 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); 5368 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 5369 MemOpChains.push_back( 5370 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 5371 MachinePointerInfo::getFixedStack(MF, FI))); 5372 for (const auto &Part : Parts) { 5373 SDValue PartValue = Part.first; 5374 SDValue PartOffset = Part.second; 5375 SDValue Address = 5376 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); 5377 MemOpChains.push_back( 5378 DAG.getStore(Chain, DL, PartValue, Address, 5379 MachinePointerInfo::getFixedStack(MF, FI))); 5380 } 5381 ArgValue = SpillSlot; 5382 } else { 5383 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 5384 } 5385 5386 // Use local copy if it is a byval arg. 5387 if (Flags.isByVal()) 5388 ArgValue = ByValArgs[j++]; 5389 5390 if (VA.isRegLoc()) { 5391 // Queue up the argument copies and emit them at the end. 5392 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 5393 } else { 5394 assert(VA.isMemLoc() && "Argument not register or memory"); 5395 assert(!IsTailCall && "Tail call not allowed if stack is used " 5396 "for passing parameters"); 5397 5398 // Work out the address of the stack slot. 5399 if (!StackPtr.getNode()) 5400 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT); 5401 SDValue Address = 5402 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 5403 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 5404 5405 // Emit the store. 5406 MemOpChains.push_back( 5407 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 5408 } 5409 } 5410 5411 // Join the stores, which are independent of one another. 5412 if (!MemOpChains.empty()) 5413 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 5414 5415 SDValue Glue; 5416 5417 // Build a sequence of copy-to-reg nodes, chained and glued together. 5418 for (auto &Reg : RegsToPass) { 5419 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 5420 Glue = Chain.getValue(1); 5421 } 5422 5423 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 5424 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 5425 // split it and then direct call can be matched by PseudoCALL. 5426 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 5427 const GlobalValue *GV = S->getGlobal(); 5428 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV) 5429 ? LoongArchII::MO_CALL 5430 : LoongArchII::MO_CALL_PLT; 5431 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags); 5432 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 5433 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr) 5434 ? LoongArchII::MO_CALL 5435 : LoongArchII::MO_CALL_PLT; 5436 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 5437 } 5438 5439 // The first call operand is the chain and the second is the target address. 5440 SmallVector<SDValue> Ops; 5441 Ops.push_back(Chain); 5442 Ops.push_back(Callee); 5443 5444 // Add argument registers to the end of the list so that they are 5445 // known live into the call. 5446 for (auto &Reg : RegsToPass) 5447 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 5448 5449 if (!IsTailCall) { 5450 // Add a register mask operand representing the call-preserved registers. 5451 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 5452 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 5453 assert(Mask && "Missing call preserved mask for calling convention"); 5454 Ops.push_back(DAG.getRegisterMask(Mask)); 5455 } 5456 5457 // Glue the call to the argument copies, if any. 5458 if (Glue.getNode()) 5459 Ops.push_back(Glue); 5460 5461 // Emit the call. 5462 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 5463 unsigned Op; 5464 switch (DAG.getTarget().getCodeModel()) { 5465 default: 5466 report_fatal_error("Unsupported code model"); 5467 case CodeModel::Small: 5468 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL; 5469 break; 5470 case CodeModel::Medium: 5471 assert(Subtarget.is64Bit() && "Medium code model requires LA64"); 5472 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM; 5473 break; 5474 case CodeModel::Large: 5475 assert(Subtarget.is64Bit() && "Large code model requires LA64"); 5476 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE; 5477 break; 5478 } 5479 5480 if (IsTailCall) { 5481 MF.getFrameInfo().setHasTailCall(); 5482 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops); 5483 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 5484 return Ret; 5485 } 5486 5487 Chain = DAG.getNode(Op, DL, NodeTys, Ops); 5488 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 5489 Glue = Chain.getValue(1); 5490 5491 // Mark the end of the call, which is glued to the call itself. 5492 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 5493 Glue = Chain.getValue(1); 5494 5495 // Assign locations to each value returned by this call. 5496 SmallVector<CCValAssign> RVLocs; 5497 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 5498 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch); 5499 5500 // Copy all of the result registers out of their specified physreg. 5501 for (auto &VA : RVLocs) { 5502 // Copy the value out. 5503 SDValue RetValue = 5504 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 5505 // Glue the RetValue to the end of the call sequence. 5506 Chain = RetValue.getValue(1); 5507 Glue = RetValue.getValue(2); 5508 5509 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 5510 5511 InVals.push_back(RetValue); 5512 } 5513 5514 return Chain; 5515 } 5516 5517 bool LoongArchTargetLowering::CanLowerReturn( 5518 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 5519 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 5520 SmallVector<CCValAssign> RVLocs; 5521 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 5522 5523 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 5524 LoongArchABI::ABI ABI = 5525 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 5526 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full, 5527 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, 5528 nullptr)) 5529 return false; 5530 } 5531 return true; 5532 } 5533 5534 SDValue LoongArchTargetLowering::LowerReturn( 5535 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 5536 const SmallVectorImpl<ISD::OutputArg> &Outs, 5537 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 5538 SelectionDAG &DAG) const { 5539 // Stores the assignment of the return value to a location. 5540 SmallVector<CCValAssign> RVLocs; 5541 5542 // Info about the registers and stack slot. 5543 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 5544 *DAG.getContext()); 5545 5546 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 5547 nullptr, CC_LoongArch); 5548 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 5549 report_fatal_error("GHC functions return void only"); 5550 SDValue Glue; 5551 SmallVector<SDValue, 4> RetOps(1, Chain); 5552 5553 // Copy the result values into the output registers. 5554 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 5555 CCValAssign &VA = RVLocs[i]; 5556 assert(VA.isRegLoc() && "Can only return in registers!"); 5557 5558 // Handle a 'normal' return. 5559 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL); 5560 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 5561 5562 // Guarantee that all emitted copies are stuck together. 5563 Glue = Chain.getValue(1); 5564 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 5565 } 5566 5567 RetOps[0] = Chain; // Update chain. 5568 5569 // Add the glue node if we have it. 5570 if (Glue.getNode()) 5571 RetOps.push_back(Glue); 5572 5573 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps); 5574 } 5575 5576 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 5577 bool ForCodeSize) const { 5578 // TODO: Maybe need more checks here after vector extension is supported. 5579 if (VT == MVT::f32 && !Subtarget.hasBasicF()) 5580 return false; 5581 if (VT == MVT::f64 && !Subtarget.hasBasicD()) 5582 return false; 5583 return (Imm.isZero() || Imm.isExactlyValue(+1.0)); 5584 } 5585 5586 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const { 5587 return true; 5588 } 5589 5590 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const { 5591 return true; 5592 } 5593 5594 bool LoongArchTargetLowering::shouldInsertFencesForAtomic( 5595 const Instruction *I) const { 5596 if (!Subtarget.is64Bit()) 5597 return isa<LoadInst>(I) || isa<StoreInst>(I); 5598 5599 if (isa<LoadInst>(I)) 5600 return true; 5601 5602 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not 5603 // require fences beacuse we can use amswap_db.[w/d]. 5604 Type *Ty = I->getOperand(0)->getType(); 5605 if (isa<StoreInst>(I) && Ty->isIntegerTy()) { 5606 unsigned Size = Ty->getIntegerBitWidth(); 5607 return (Size == 8 || Size == 16); 5608 } 5609 5610 return false; 5611 } 5612 5613 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL, 5614 LLVMContext &Context, 5615 EVT VT) const { 5616 if (!VT.isVector()) 5617 return getPointerTy(DL); 5618 return VT.changeVectorElementTypeToInteger(); 5619 } 5620 5621 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { 5622 // TODO: Support vectors. 5623 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y); 5624 } 5625 5626 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 5627 const CallInst &I, 5628 MachineFunction &MF, 5629 unsigned Intrinsic) const { 5630 switch (Intrinsic) { 5631 default: 5632 return false; 5633 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32: 5634 case Intrinsic::loongarch_masked_atomicrmw_add_i32: 5635 case Intrinsic::loongarch_masked_atomicrmw_sub_i32: 5636 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: 5637 Info.opc = ISD::INTRINSIC_W_CHAIN; 5638 Info.memVT = MVT::i32; 5639 Info.ptrVal = I.getArgOperand(0); 5640 Info.offset = 0; 5641 Info.align = Align(4); 5642 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 5643 MachineMemOperand::MOVolatile; 5644 return true; 5645 // TODO: Add more Intrinsics later. 5646 } 5647 } 5648 5649 TargetLowering::AtomicExpansionKind 5650 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 5651 // TODO: Add more AtomicRMWInst that needs to be extended. 5652 5653 // Since floating-point operation requires a non-trivial set of data 5654 // operations, use CmpXChg to expand. 5655 if (AI->isFloatingPointOperation() || 5656 AI->getOperation() == AtomicRMWInst::UIncWrap || 5657 AI->getOperation() == AtomicRMWInst::UDecWrap) 5658 return AtomicExpansionKind::CmpXChg; 5659 5660 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 5661 if (Size == 8 || Size == 16) 5662 return AtomicExpansionKind::MaskedIntrinsic; 5663 return AtomicExpansionKind::None; 5664 } 5665 5666 static Intrinsic::ID 5667 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, 5668 AtomicRMWInst::BinOp BinOp) { 5669 if (GRLen == 64) { 5670 switch (BinOp) { 5671 default: 5672 llvm_unreachable("Unexpected AtomicRMW BinOp"); 5673 case AtomicRMWInst::Xchg: 5674 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64; 5675 case AtomicRMWInst::Add: 5676 return Intrinsic::loongarch_masked_atomicrmw_add_i64; 5677 case AtomicRMWInst::Sub: 5678 return Intrinsic::loongarch_masked_atomicrmw_sub_i64; 5679 case AtomicRMWInst::Nand: 5680 return Intrinsic::loongarch_masked_atomicrmw_nand_i64; 5681 case AtomicRMWInst::UMax: 5682 return Intrinsic::loongarch_masked_atomicrmw_umax_i64; 5683 case AtomicRMWInst::UMin: 5684 return Intrinsic::loongarch_masked_atomicrmw_umin_i64; 5685 case AtomicRMWInst::Max: 5686 return Intrinsic::loongarch_masked_atomicrmw_max_i64; 5687 case AtomicRMWInst::Min: 5688 return Intrinsic::loongarch_masked_atomicrmw_min_i64; 5689 // TODO: support other AtomicRMWInst. 5690 } 5691 } 5692 5693 if (GRLen == 32) { 5694 switch (BinOp) { 5695 default: 5696 llvm_unreachable("Unexpected AtomicRMW BinOp"); 5697 case AtomicRMWInst::Xchg: 5698 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32; 5699 case AtomicRMWInst::Add: 5700 return Intrinsic::loongarch_masked_atomicrmw_add_i32; 5701 case AtomicRMWInst::Sub: 5702 return Intrinsic::loongarch_masked_atomicrmw_sub_i32; 5703 case AtomicRMWInst::Nand: 5704 return Intrinsic::loongarch_masked_atomicrmw_nand_i32; 5705 // TODO: support other AtomicRMWInst. 5706 } 5707 } 5708 5709 llvm_unreachable("Unexpected GRLen\n"); 5710 } 5711 5712 TargetLowering::AtomicExpansionKind 5713 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( 5714 AtomicCmpXchgInst *CI) const { 5715 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 5716 if (Size == 8 || Size == 16) 5717 return AtomicExpansionKind::MaskedIntrinsic; 5718 return AtomicExpansionKind::None; 5719 } 5720 5721 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 5722 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 5723 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 5724 AtomicOrdering FailOrd = CI->getFailureOrdering(); 5725 Value *FailureOrdering = 5726 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd)); 5727 5728 // TODO: Support cmpxchg on LA32. 5729 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; 5730 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 5731 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 5732 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 5733 Type *Tys[] = {AlignedAddr->getType()}; 5734 Function *MaskedCmpXchg = 5735 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 5736 Value *Result = Builder.CreateCall( 5737 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); 5738 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 5739 return Result; 5740 } 5741 5742 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( 5743 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 5744 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 5745 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace 5746 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate 5747 // mask, as this produces better code than the LL/SC loop emitted by 5748 // int_loongarch_masked_atomicrmw_xchg. 5749 if (AI->getOperation() == AtomicRMWInst::Xchg && 5750 isa<ConstantInt>(AI->getValOperand())) { 5751 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand()); 5752 if (CVal->isZero()) 5753 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr, 5754 Builder.CreateNot(Mask, "Inv_Mask"), 5755 AI->getAlign(), Ord); 5756 if (CVal->isMinusOne()) 5757 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask, 5758 AI->getAlign(), Ord); 5759 } 5760 5761 unsigned GRLen = Subtarget.getGRLen(); 5762 Value *Ordering = 5763 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering())); 5764 Type *Tys[] = {AlignedAddr->getType()}; 5765 Function *LlwOpScwLoop = Intrinsic::getDeclaration( 5766 AI->getModule(), 5767 getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys); 5768 5769 if (GRLen == 64) { 5770 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 5771 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 5772 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 5773 } 5774 5775 Value *Result; 5776 5777 // Must pass the shift amount needed to sign extend the loaded value prior 5778 // to performing a signed comparison for min/max. ShiftAmt is the number of 5779 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which 5780 // is the number of bits to left+right shift the value in order to 5781 // sign-extend. 5782 if (AI->getOperation() == AtomicRMWInst::Min || 5783 AI->getOperation() == AtomicRMWInst::Max) { 5784 const DataLayout &DL = AI->getDataLayout(); 5785 unsigned ValWidth = 5786 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 5787 Value *SextShamt = 5788 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt); 5789 Result = Builder.CreateCall(LlwOpScwLoop, 5790 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 5791 } else { 5792 Result = 5793 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 5794 } 5795 5796 if (GRLen == 64) 5797 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 5798 return Result; 5799 } 5800 5801 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( 5802 const MachineFunction &MF, EVT VT) const { 5803 VT = VT.getScalarType(); 5804 5805 if (!VT.isSimple()) 5806 return false; 5807 5808 switch (VT.getSimpleVT().SimpleTy) { 5809 case MVT::f32: 5810 case MVT::f64: 5811 return true; 5812 default: 5813 break; 5814 } 5815 5816 return false; 5817 } 5818 5819 Register LoongArchTargetLowering::getExceptionPointerRegister( 5820 const Constant *PersonalityFn) const { 5821 return LoongArch::R4; 5822 } 5823 5824 Register LoongArchTargetLowering::getExceptionSelectorRegister( 5825 const Constant *PersonalityFn) const { 5826 return LoongArch::R5; 5827 } 5828 5829 //===----------------------------------------------------------------------===// 5830 // LoongArch Inline Assembly Support 5831 //===----------------------------------------------------------------------===// 5832 5833 LoongArchTargetLowering::ConstraintType 5834 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { 5835 // LoongArch specific constraints in GCC: config/loongarch/constraints.md 5836 // 5837 // 'f': A floating-point register (if available). 5838 // 'k': A memory operand whose address is formed by a base register and 5839 // (optionally scaled) index register. 5840 // 'l': A signed 16-bit constant. 5841 // 'm': A memory operand whose address is formed by a base register and 5842 // offset that is suitable for use in instructions with the same 5843 // addressing mode as st.w and ld.w. 5844 // 'I': A signed 12-bit constant (for arithmetic instructions). 5845 // 'J': Integer zero. 5846 // 'K': An unsigned 12-bit constant (for logic instructions). 5847 // "ZB": An address that is held in a general-purpose register. The offset is 5848 // zero. 5849 // "ZC": A memory operand whose address is formed by a base register and 5850 // offset that is suitable for use in instructions with the same 5851 // addressing mode as ll.w and sc.w. 5852 if (Constraint.size() == 1) { 5853 switch (Constraint[0]) { 5854 default: 5855 break; 5856 case 'f': 5857 return C_RegisterClass; 5858 case 'l': 5859 case 'I': 5860 case 'J': 5861 case 'K': 5862 return C_Immediate; 5863 case 'k': 5864 return C_Memory; 5865 } 5866 } 5867 5868 if (Constraint == "ZC" || Constraint == "ZB") 5869 return C_Memory; 5870 5871 // 'm' is handled here. 5872 return TargetLowering::getConstraintType(Constraint); 5873 } 5874 5875 InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint( 5876 StringRef ConstraintCode) const { 5877 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode) 5878 .Case("k", InlineAsm::ConstraintCode::k) 5879 .Case("ZB", InlineAsm::ConstraintCode::ZB) 5880 .Case("ZC", InlineAsm::ConstraintCode::ZC) 5881 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode)); 5882 } 5883 5884 std::pair<unsigned, const TargetRegisterClass *> 5885 LoongArchTargetLowering::getRegForInlineAsmConstraint( 5886 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 5887 // First, see if this is a constraint that directly corresponds to a LoongArch 5888 // register class. 5889 if (Constraint.size() == 1) { 5890 switch (Constraint[0]) { 5891 case 'r': 5892 // TODO: Support fixed vectors up to GRLen? 5893 if (VT.isVector()) 5894 break; 5895 return std::make_pair(0U, &LoongArch::GPRRegClass); 5896 case 'f': 5897 if (Subtarget.hasBasicF() && VT == MVT::f32) 5898 return std::make_pair(0U, &LoongArch::FPR32RegClass); 5899 if (Subtarget.hasBasicD() && VT == MVT::f64) 5900 return std::make_pair(0U, &LoongArch::FPR64RegClass); 5901 if (Subtarget.hasExtLSX() && 5902 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT)) 5903 return std::make_pair(0U, &LoongArch::LSX128RegClass); 5904 if (Subtarget.hasExtLASX() && 5905 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT)) 5906 return std::make_pair(0U, &LoongArch::LASX256RegClass); 5907 break; 5908 default: 5909 break; 5910 } 5911 } 5912 5913 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen 5914 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm 5915 // constraints while the official register name is prefixed with a '$'. So we 5916 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.) 5917 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is 5918 // case insensitive, so no need to convert the constraint to upper case here. 5919 // 5920 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly 5921 // decode the usage of register name aliases into their official names. And 5922 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use 5923 // official register names. 5924 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") || 5925 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) { 5926 bool IsFP = Constraint[2] == 'f'; 5927 std::pair<StringRef, StringRef> Temp = Constraint.split('$'); 5928 std::pair<unsigned, const TargetRegisterClass *> R; 5929 R = TargetLowering::getRegForInlineAsmConstraint( 5930 TRI, join_items("", Temp.first, Temp.second), VT); 5931 // Match those names to the widest floating point register type available. 5932 if (IsFP) { 5933 unsigned RegNo = R.first; 5934 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) { 5935 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) { 5936 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64; 5937 return std::make_pair(DReg, &LoongArch::FPR64RegClass); 5938 } 5939 } 5940 } 5941 return R; 5942 } 5943 5944 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 5945 } 5946 5947 void LoongArchTargetLowering::LowerAsmOperandForConstraint( 5948 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, 5949 SelectionDAG &DAG) const { 5950 // Currently only support length 1 constraints. 5951 if (Constraint.size() == 1) { 5952 switch (Constraint[0]) { 5953 case 'l': 5954 // Validate & create a 16-bit signed immediate operand. 5955 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 5956 uint64_t CVal = C->getSExtValue(); 5957 if (isInt<16>(CVal)) 5958 Ops.push_back( 5959 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 5960 } 5961 return; 5962 case 'I': 5963 // Validate & create a 12-bit signed immediate operand. 5964 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 5965 uint64_t CVal = C->getSExtValue(); 5966 if (isInt<12>(CVal)) 5967 Ops.push_back( 5968 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 5969 } 5970 return; 5971 case 'J': 5972 // Validate & create an integer zero operand. 5973 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 5974 if (C->getZExtValue() == 0) 5975 Ops.push_back( 5976 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT())); 5977 return; 5978 case 'K': 5979 // Validate & create a 12-bit unsigned immediate operand. 5980 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 5981 uint64_t CVal = C->getZExtValue(); 5982 if (isUInt<12>(CVal)) 5983 Ops.push_back( 5984 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 5985 } 5986 return; 5987 default: 5988 break; 5989 } 5990 } 5991 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5992 } 5993 5994 #define GET_REGISTER_MATCHER 5995 #include "LoongArchGenAsmMatcher.inc" 5996 5997 Register 5998 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, 5999 const MachineFunction &MF) const { 6000 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$'); 6001 std::string NewRegName = Name.second.str(); 6002 Register Reg = MatchRegisterAltName(NewRegName); 6003 if (Reg == LoongArch::NoRegister) 6004 Reg = MatchRegisterName(NewRegName); 6005 if (Reg == LoongArch::NoRegister) 6006 report_fatal_error( 6007 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 6008 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 6009 if (!ReservedRegs.test(Reg)) 6010 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 6011 StringRef(RegName) + "\".")); 6012 return Reg; 6013 } 6014 6015 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context, 6016 EVT VT, SDValue C) const { 6017 // TODO: Support vectors. 6018 if (!VT.isScalarInteger()) 6019 return false; 6020 6021 // Omit the optimization if the data size exceeds GRLen. 6022 if (VT.getSizeInBits() > Subtarget.getGRLen()) 6023 return false; 6024 6025 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 6026 const APInt &Imm = ConstNode->getAPIntValue(); 6027 // Break MUL into (SLLI + ADD/SUB) or ALSL. 6028 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 6029 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 6030 return true; 6031 // Break MUL into (ALSL x, (SLLI x, imm0), imm1). 6032 if (ConstNode->hasOneUse() && 6033 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || 6034 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2())) 6035 return true; 6036 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)), 6037 // in which the immediate has two set bits. Or Break (MUL x, imm) 6038 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate 6039 // equals to (1 << s0) - (1 << s1). 6040 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) { 6041 unsigned Shifts = Imm.countr_zero(); 6042 // Reject immediates which can be composed via a single LUI. 6043 if (Shifts >= 12) 6044 return false; 6045 // Reject multiplications can be optimized to 6046 // (SLLI (ALSL x, x, 1/2/3/4), s). 6047 APInt ImmPop = Imm.ashr(Shifts); 6048 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17) 6049 return false; 6050 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`, 6051 // since it needs one more instruction than other 3 cases. 6052 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true); 6053 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() || 6054 (ImmSmall - Imm).isPowerOf2()) 6055 return true; 6056 } 6057 } 6058 6059 return false; 6060 } 6061 6062 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL, 6063 const AddrMode &AM, 6064 Type *Ty, unsigned AS, 6065 Instruction *I) const { 6066 // LoongArch has four basic addressing modes: 6067 // 1. reg 6068 // 2. reg + 12-bit signed offset 6069 // 3. reg + 14-bit signed offset left-shifted by 2 6070 // 4. reg1 + reg2 6071 // TODO: Add more checks after support vector extension. 6072 6073 // No global is ever allowed as a base. 6074 if (AM.BaseGV) 6075 return false; 6076 6077 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2 6078 // with `UAL` feature. 6079 if (!isInt<12>(AM.BaseOffs) && 6080 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL())) 6081 return false; 6082 6083 switch (AM.Scale) { 6084 case 0: 6085 // "r+i" or just "i", depending on HasBaseReg. 6086 break; 6087 case 1: 6088 // "r+r+i" is not allowed. 6089 if (AM.HasBaseReg && AM.BaseOffs) 6090 return false; 6091 // Otherwise we have "r+r" or "r+i". 6092 break; 6093 case 2: 6094 // "2*r+r" or "2*r+i" is not allowed. 6095 if (AM.HasBaseReg || AM.BaseOffs) 6096 return false; 6097 // Allow "2*r" as "r+r". 6098 break; 6099 default: 6100 return false; 6101 } 6102 6103 return true; 6104 } 6105 6106 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 6107 return isInt<12>(Imm); 6108 } 6109 6110 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const { 6111 return isInt<12>(Imm); 6112 } 6113 6114 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 6115 // Zexts are free if they can be combined with a load. 6116 // Don't advertise i32->i64 zextload as being free for LA64. It interacts 6117 // poorly with type legalization of compares preferring sext. 6118 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 6119 EVT MemVT = LD->getMemoryVT(); 6120 if ((MemVT == MVT::i8 || MemVT == MVT::i16) && 6121 (LD->getExtensionType() == ISD::NON_EXTLOAD || 6122 LD->getExtensionType() == ISD::ZEXTLOAD)) 6123 return true; 6124 } 6125 6126 return TargetLowering::isZExtFree(Val, VT2); 6127 } 6128 6129 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, 6130 EVT DstVT) const { 6131 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 6132 } 6133 6134 bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const { 6135 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32); 6136 } 6137 6138 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { 6139 // TODO: Support vectors. 6140 if (Y.getValueType().isVector()) 6141 return false; 6142 6143 return !isa<ConstantSDNode>(Y); 6144 } 6145 6146 ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { 6147 // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension. 6148 return ISD::SIGN_EXTEND; 6149 } 6150 6151 bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall( 6152 EVT Type, bool IsSigned) const { 6153 if (Subtarget.is64Bit() && Type == MVT::i32) 6154 return true; 6155 6156 return IsSigned; 6157 } 6158 6159 bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 6160 // Return false to suppress the unnecessary extensions if the LibCall 6161 // arguments or return value is a float narrower than GRLEN on a soft FP ABI. 6162 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() && 6163 Type.getSizeInBits() < Subtarget.getGRLen())) 6164 return false; 6165 return true; 6166 } 6167