1 //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/MemoryLocation.h" 24 #include "llvm/Analysis/VectorUtils.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineJumpTableInfo.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 31 #include "llvm/CodeGen/ValueTypes.h" 32 #include "llvm/IR/DiagnosticInfo.h" 33 #include "llvm/IR/DiagnosticPrinter.h" 34 #include "llvm/IR/IRBuilder.h" 35 #include "llvm/IR/Instructions.h" 36 #include "llvm/IR/IntrinsicsRISCV.h" 37 #include "llvm/IR/PatternMatch.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Support/KnownBits.h" 42 #include "llvm/Support/MathExtras.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include <optional> 45 46 using namespace llvm; 47 48 #define DEBUG_TYPE "riscv-lower" 49 50 STATISTIC(NumTailCalls, "Number of tail calls"); 51 52 static cl::opt<unsigned> ExtensionMaxWebSize( 53 DEBUG_TYPE "-ext-max-web-size", cl::Hidden, 54 cl::desc("Give the maximum size (in number of nodes) of the web of " 55 "instructions that we will consider for VW expansion"), 56 cl::init(18)); 57 58 static cl::opt<bool> 59 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, 60 cl::desc("Allow the formation of VW_W operations (e.g., " 61 "VWADD_W) with splat constants"), 62 cl::init(false)); 63 64 static cl::opt<unsigned> NumRepeatedDivisors( 65 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, 66 cl::desc("Set the minimum number of repetitions of a divisor to allow " 67 "transformation to multiplications by the reciprocal"), 68 cl::init(2)); 69 70 static cl::opt<int> 71 FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, 72 cl::desc("Give the maximum number of instructions that we will " 73 "use for creating a floating-point immediate value"), 74 cl::init(2)); 75 76 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 77 const RISCVSubtarget &STI) 78 : TargetLowering(TM), Subtarget(STI) { 79 80 if (Subtarget.isRVE()) 81 report_fatal_error("Codegen not yet implemented for RVE"); 82 83 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 84 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 85 86 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 87 !Subtarget.hasStdExtF()) { 88 errs() << "Hard-float 'f' ABI can't be used for a target that " 89 "doesn't support the F instruction set extension (ignoring " 90 "target-abi)\n"; 91 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 92 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 93 !Subtarget.hasStdExtD()) { 94 errs() << "Hard-float 'd' ABI can't be used for a target that " 95 "doesn't support the D instruction set extension (ignoring " 96 "target-abi)\n"; 97 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 98 } 99 100 switch (ABI) { 101 default: 102 report_fatal_error("Don't know how to lower this ABI"); 103 case RISCVABI::ABI_ILP32: 104 case RISCVABI::ABI_ILP32F: 105 case RISCVABI::ABI_ILP32D: 106 case RISCVABI::ABI_LP64: 107 case RISCVABI::ABI_LP64F: 108 case RISCVABI::ABI_LP64D: 109 break; 110 } 111 112 MVT XLenVT = Subtarget.getXLenVT(); 113 114 // Set up the register classes. 115 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 116 117 if (Subtarget.hasStdExtZfhOrZfhmin()) 118 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 119 if (Subtarget.hasStdExtZfbfmin()) 120 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass); 121 if (Subtarget.hasStdExtF()) 122 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 123 if (Subtarget.hasStdExtD()) 124 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 125 if (Subtarget.hasStdExtZhinxOrZhinxmin()) 126 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass); 127 if (Subtarget.hasStdExtZfinx()) 128 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass); 129 if (Subtarget.hasStdExtZdinx()) { 130 if (Subtarget.is64Bit()) 131 addRegisterClass(MVT::f64, &RISCV::GPRRegClass); 132 else 133 addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass); 134 } 135 136 static const MVT::SimpleValueType BoolVecVTs[] = { 137 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 138 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 139 static const MVT::SimpleValueType IntVecVTs[] = { 140 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 141 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 142 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 143 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 144 MVT::nxv4i64, MVT::nxv8i64}; 145 static const MVT::SimpleValueType F16VecVTs[] = { 146 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 147 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 148 static const MVT::SimpleValueType F32VecVTs[] = { 149 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 150 static const MVT::SimpleValueType F64VecVTs[] = { 151 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 152 153 if (Subtarget.hasVInstructions()) { 154 auto addRegClassForRVV = [this](MVT VT) { 155 // Disable the smallest fractional LMUL types if ELEN is less than 156 // RVVBitsPerBlock. 157 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN(); 158 if (VT.getVectorMinNumElements() < MinElts) 159 return; 160 161 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 162 const TargetRegisterClass *RC; 163 if (Size <= RISCV::RVVBitsPerBlock) 164 RC = &RISCV::VRRegClass; 165 else if (Size == 2 * RISCV::RVVBitsPerBlock) 166 RC = &RISCV::VRM2RegClass; 167 else if (Size == 4 * RISCV::RVVBitsPerBlock) 168 RC = &RISCV::VRM4RegClass; 169 else if (Size == 8 * RISCV::RVVBitsPerBlock) 170 RC = &RISCV::VRM8RegClass; 171 else 172 llvm_unreachable("Unexpected size"); 173 174 addRegisterClass(VT, RC); 175 }; 176 177 for (MVT VT : BoolVecVTs) 178 addRegClassForRVV(VT); 179 for (MVT VT : IntVecVTs) { 180 if (VT.getVectorElementType() == MVT::i64 && 181 !Subtarget.hasVInstructionsI64()) 182 continue; 183 addRegClassForRVV(VT); 184 } 185 186 if (Subtarget.hasVInstructionsF16()) 187 for (MVT VT : F16VecVTs) 188 addRegClassForRVV(VT); 189 190 if (Subtarget.hasVInstructionsF32()) 191 for (MVT VT : F32VecVTs) 192 addRegClassForRVV(VT); 193 194 if (Subtarget.hasVInstructionsF64()) 195 for (MVT VT : F64VecVTs) 196 addRegClassForRVV(VT); 197 198 if (Subtarget.useRVVForFixedLengthVectors()) { 199 auto addRegClassForFixedVectors = [this](MVT VT) { 200 MVT ContainerVT = getContainerForFixedLengthVector(VT); 201 unsigned RCID = getRegClassIDForVecVT(ContainerVT); 202 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 203 addRegisterClass(VT, TRI.getRegClass(RCID)); 204 }; 205 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 206 if (useRVVForFixedLengthVectorVT(VT)) 207 addRegClassForFixedVectors(VT); 208 209 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 210 if (useRVVForFixedLengthVectorVT(VT)) 211 addRegClassForFixedVectors(VT); 212 } 213 } 214 215 // Compute derived properties from the register classes. 216 computeRegisterProperties(STI.getRegisterInfo()); 217 218 setStackPointerRegisterToSaveRestore(RISCV::X2); 219 220 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT, 221 MVT::i1, Promote); 222 // DAGCombiner can call isLoadExtLegal for types that aren't legal. 223 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, 224 MVT::i1, Promote); 225 226 // TODO: add all necessary setOperationAction calls. 227 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 228 229 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 230 setOperationAction(ISD::BR_CC, XLenVT, Expand); 231 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 232 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 233 234 setCondCodeAction(ISD::SETLE, XLenVT, Expand); 235 setCondCodeAction(ISD::SETGT, XLenVT, Custom); 236 setCondCodeAction(ISD::SETGE, XLenVT, Expand); 237 setCondCodeAction(ISD::SETULE, XLenVT, Expand); 238 setCondCodeAction(ISD::SETUGT, XLenVT, Custom); 239 setCondCodeAction(ISD::SETUGE, XLenVT, Expand); 240 241 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); 242 243 setOperationAction(ISD::VASTART, MVT::Other, Custom); 244 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); 245 246 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 247 248 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 249 250 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb()) 251 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand); 252 253 if (Subtarget.is64Bit()) { 254 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); 255 256 setOperationAction(ISD::LOAD, MVT::i32, Custom); 257 258 setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL}, 259 MVT::i32, Custom); 260 261 setOperationAction(ISD::SADDO, MVT::i32, Custom); 262 setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT}, 263 MVT::i32, Custom); 264 } else { 265 setLibcallName( 266 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128}, 267 nullptr); 268 setLibcallName(RTLIB::MULO_I64, nullptr); 269 } 270 271 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) 272 setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand); 273 else if (Subtarget.is64Bit()) 274 setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom); 275 else 276 setOperationAction(ISD::MUL, MVT::i64, Custom); 277 278 if (!Subtarget.hasStdExtM()) 279 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, 280 XLenVT, Expand); 281 else if (Subtarget.is64Bit()) 282 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, 283 {MVT::i8, MVT::i16, MVT::i32}, Custom); 284 285 setOperationAction( 286 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT, 287 Expand); 288 289 setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT, 290 Custom); 291 292 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) { 293 if (Subtarget.is64Bit()) 294 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); 295 } else if (Subtarget.hasVendorXTHeadBb()) { 296 if (Subtarget.is64Bit()) 297 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); 298 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom); 299 } else { 300 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand); 301 } 302 303 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 304 // pattern match it directly in isel. 305 setOperationAction(ISD::BSWAP, XLenVT, 306 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || 307 Subtarget.hasVendorXTHeadBb()) 308 ? Legal 309 : Expand); 310 // Zbkb can use rev8+brev8 to implement bitreverse. 311 setOperationAction(ISD::BITREVERSE, XLenVT, 312 Subtarget.hasStdExtZbkb() ? Custom : Expand); 313 314 if (Subtarget.hasStdExtZbb()) { 315 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT, 316 Legal); 317 318 if (Subtarget.is64Bit()) 319 setOperationAction( 320 {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, 321 MVT::i32, Custom); 322 } else { 323 setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand); 324 } 325 326 if (Subtarget.hasVendorXTHeadBb()) { 327 setOperationAction(ISD::CTLZ, XLenVT, Legal); 328 329 // We need the custom lowering to make sure that the resulting sequence 330 // for the 32bit case is efficient on 64bit targets. 331 if (Subtarget.is64Bit()) 332 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom); 333 } 334 335 if (Subtarget.is64Bit()) 336 setOperationAction(ISD::ABS, MVT::i32, Custom); 337 338 if (!Subtarget.hasVendorXTHeadCondMov()) 339 setOperationAction(ISD::SELECT, XLenVT, Custom); 340 341 static const unsigned FPLegalNodeTypes[] = { 342 ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT, 343 ISD::LLRINT, ISD::LROUND, ISD::LLROUND, 344 ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND, 345 ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD, 346 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, 347 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS}; 348 349 static const ISD::CondCode FPCCToExpand[] = { 350 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 351 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 352 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 353 354 static const unsigned FPOpToExpand[] = { 355 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, 356 ISD::FREM}; 357 358 static const unsigned FPRndMode[] = { 359 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND, 360 ISD::FROUNDEVEN}; 361 362 if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) 363 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 364 365 if (Subtarget.hasStdExtZfbfmin()) { 366 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 367 setOperationAction(ISD::BITCAST, MVT::bf16, Custom); 368 setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom); 369 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); 370 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); 371 setOperationAction(ISD::ConstantFP, MVT::bf16, Expand); 372 } 373 374 if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) { 375 if (Subtarget.hasStdExtZfhOrZhinx()) { 376 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); 377 setOperationAction(FPRndMode, MVT::f16, 378 Subtarget.hasStdExtZfa() ? Legal : Custom); 379 setOperationAction(ISD::SELECT, MVT::f16, Custom); 380 setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom); 381 } else { 382 static const unsigned ZfhminPromoteOps[] = { 383 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, 384 ISD::FSUB, ISD::FMUL, ISD::FMA, 385 ISD::FDIV, ISD::FSQRT, ISD::FABS, 386 ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD, 387 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, 388 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, 389 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR, 390 ISD::FTRUNC, ISD::FRINT, ISD::FROUND, 391 ISD::FROUNDEVEN, ISD::SELECT}; 392 393 setOperationAction(ZfhminPromoteOps, MVT::f16, Promote); 394 setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT, 395 ISD::STRICT_LROUND, ISD::STRICT_LLROUND}, 396 MVT::f16, Legal); 397 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the 398 // DAGCombiner::visitFP_ROUND probably needs improvements first. 399 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); 400 } 401 402 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); 403 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); 404 setCondCodeAction(FPCCToExpand, MVT::f16, Expand); 405 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 406 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 407 408 setOperationAction(ISD::FNEARBYINT, MVT::f16, 409 Subtarget.hasStdExtZfa() ? Legal : Promote); 410 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI, 411 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, 412 ISD::FEXP2, ISD::FLOG, ISD::FLOG2, ISD::FLOG10}, 413 MVT::f16, Promote); 414 415 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have 416 // complete support for all operations in LegalizeDAG. 417 setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, 418 ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, 419 ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, 420 ISD::STRICT_FTRUNC}, 421 MVT::f16, Promote); 422 423 // We need to custom promote this. 424 if (Subtarget.is64Bit()) 425 setOperationAction(ISD::FPOWI, MVT::i32, Custom); 426 427 if (!Subtarget.hasStdExtZfa()) 428 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom); 429 } 430 431 if (Subtarget.hasStdExtFOrZfinx()) { 432 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal); 433 setOperationAction(FPRndMode, MVT::f32, 434 Subtarget.hasStdExtZfa() ? Legal : Custom); 435 setCondCodeAction(FPCCToExpand, MVT::f32, Expand); 436 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 437 setOperationAction(ISD::SELECT, MVT::f32, Custom); 438 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 439 setOperationAction(FPOpToExpand, MVT::f32, Expand); 440 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 441 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 442 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom); 443 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom); 444 setOperationAction(ISD::FP_TO_BF16, MVT::f32, 445 Subtarget.isSoftFPABI() ? LibCall : Custom); 446 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom); 447 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom); 448 449 if (Subtarget.hasStdExtZfa()) 450 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); 451 else 452 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom); 453 } 454 455 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit()) 456 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 457 458 if (Subtarget.hasStdExtDOrZdinx()) { 459 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal); 460 461 if (Subtarget.hasStdExtZfa()) { 462 setOperationAction(FPRndMode, MVT::f64, Legal); 463 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); 464 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 465 setOperationAction(ISD::BITCAST, MVT::f64, Custom); 466 } else { 467 if (Subtarget.is64Bit()) 468 setOperationAction(FPRndMode, MVT::f64, Custom); 469 470 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom); 471 } 472 473 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); 474 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); 475 setCondCodeAction(FPCCToExpand, MVT::f64, Expand); 476 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 477 setOperationAction(ISD::SELECT, MVT::f64, Custom); 478 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 479 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 480 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 481 setOperationAction(FPOpToExpand, MVT::f64, Expand); 482 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 483 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 484 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom); 485 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom); 486 setOperationAction(ISD::FP_TO_BF16, MVT::f64, 487 Subtarget.isSoftFPABI() ? LibCall : Custom); 488 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom); 489 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); 490 } 491 492 if (Subtarget.is64Bit()) { 493 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT, 494 ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT}, 495 MVT::i32, Custom); 496 setOperationAction(ISD::LROUND, MVT::i32, Custom); 497 } 498 499 if (Subtarget.hasStdExtFOrZfinx()) { 500 setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT, 501 Custom); 502 503 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, 504 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, 505 XLenVT, Legal); 506 507 setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom); 508 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); 509 } 510 511 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, 512 ISD::JumpTable}, 513 XLenVT, Custom); 514 515 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 516 517 if (Subtarget.is64Bit()) 518 setOperationAction(ISD::Constant, MVT::i64, Custom); 519 520 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 521 // Unfortunately this can't be determined just from the ISA naming string. 522 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 523 Subtarget.is64Bit() ? Legal : Custom); 524 525 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal); 526 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 527 if (Subtarget.is64Bit()) 528 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 529 530 if (Subtarget.hasStdExtZicbop()) { 531 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 532 } 533 534 if (Subtarget.hasStdExtA()) { 535 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 536 setMinCmpXchgSizeInBits(32); 537 } else if (Subtarget.hasForcedAtomics()) { 538 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 539 } else { 540 setMaxAtomicSizeInBitsSupported(0); 541 } 542 543 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 544 545 setBooleanContents(ZeroOrOneBooleanContent); 546 547 if (Subtarget.hasVInstructions()) { 548 setBooleanVectorContents(ZeroOrOneBooleanContent); 549 550 setOperationAction(ISD::VSCALE, XLenVT, Custom); 551 552 // RVV intrinsics may have illegal operands. 553 // We also need to custom legalize vmv.x.s. 554 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN, 555 ISD::INTRINSIC_VOID}, 556 {MVT::i8, MVT::i16}, Custom); 557 if (Subtarget.is64Bit()) 558 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, 559 MVT::i32, Custom); 560 else 561 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN}, 562 MVT::i64, Custom); 563 564 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, 565 MVT::Other, Custom); 566 567 static const unsigned IntegerVPOps[] = { 568 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, 569 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM, 570 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, 571 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR, 572 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, 573 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, 574 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, 575 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT, 576 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, 577 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, 578 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, 579 ISD::VP_ABS}; 580 581 static const unsigned FloatingPointVPOps[] = { 582 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, 583 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, 584 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, 585 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, 586 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP, 587 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND, 588 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, 589 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, 590 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, 591 ISD::VP_FRINT, ISD::VP_FNEARBYINT}; 592 593 static const unsigned IntegerVecReduceOps[] = { 594 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, 595 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, 596 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}; 597 598 static const unsigned FloatingPointVecReduceOps[] = { 599 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN, 600 ISD::VECREDUCE_FMAX}; 601 602 if (!Subtarget.is64Bit()) { 603 // We must custom-lower certain vXi64 operations on RV32 due to the vector 604 // element type being illegal. 605 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, 606 MVT::i64, Custom); 607 608 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom); 609 610 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, 611 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, 612 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, 613 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN}, 614 MVT::i64, Custom); 615 } 616 617 for (MVT VT : BoolVecVTs) { 618 if (!isTypeLegal(VT)) 619 continue; 620 621 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 622 623 // Mask VTs are custom-expanded into a series of standard nodes 624 setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS, 625 ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, 626 ISD::SCALAR_TO_VECTOR}, 627 VT, Custom); 628 629 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 630 Custom); 631 632 setOperationAction(ISD::SELECT, VT, Custom); 633 setOperationAction( 634 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT, 635 Expand); 636 637 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom); 638 639 setOperationAction( 640 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, 641 Custom); 642 643 setOperationAction( 644 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, 645 Custom); 646 647 // RVV has native int->float & float->int conversions where the 648 // element type sizes are within one power-of-two of each other. Any 649 // wider distances between type sizes have to be lowered as sequences 650 // which progressively narrow the gap in stages. 651 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, 652 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, 653 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, 654 ISD::STRICT_FP_TO_UINT}, 655 VT, Custom); 656 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 657 Custom); 658 659 // Expand all extending loads to types larger than this, and truncating 660 // stores from types larger than this. 661 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 662 setTruncStoreAction(OtherVT, VT, Expand); 663 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT, 664 VT, Expand); 665 } 666 667 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, 668 ISD::VP_TRUNCATE, ISD::VP_SETCC}, 669 VT, Custom); 670 671 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); 672 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); 673 674 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 675 676 setOperationPromotedToType( 677 ISD::VECTOR_SPLICE, VT, 678 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount())); 679 } 680 681 for (MVT VT : IntVecVTs) { 682 if (!isTypeLegal(VT)) 683 continue; 684 685 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 686 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 687 688 // Vectors implement MULHS/MULHU. 689 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand); 690 691 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*. 692 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) 693 setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand); 694 695 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT, 696 Legal); 697 698 setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand); 699 700 // Custom-lower extensions and truncations from/to mask types. 701 setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, 702 VT, Custom); 703 704 // RVV has native int->float & float->int conversions where the 705 // element type sizes are within one power-of-two of each other. Any 706 // wider distances between type sizes have to be lowered as sequences 707 // which progressively narrow the gap in stages. 708 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, 709 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, 710 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, 711 ISD::STRICT_FP_TO_UINT}, 712 VT, Custom); 713 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 714 Custom); 715 716 setOperationAction( 717 {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal); 718 719 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 720 // nodes which truncate by one power of two at a time. 721 setOperationAction(ISD::TRUNCATE, VT, Custom); 722 723 // Custom-lower insert/extract operations to simplify patterns. 724 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 725 Custom); 726 727 // Custom-lower reduction operations to set up the corresponding custom 728 // nodes' operands. 729 setOperationAction(IntegerVecReduceOps, VT, Custom); 730 731 setOperationAction(IntegerVPOps, VT, Custom); 732 733 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 734 735 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, 736 VT, Custom); 737 738 setOperationAction( 739 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 740 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, 741 VT, Custom); 742 743 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 744 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, 745 VT, Custom); 746 747 setOperationAction(ISD::SELECT, VT, Custom); 748 setOperationAction(ISD::SELECT_CC, VT, Expand); 749 750 setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom); 751 752 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 753 setTruncStoreAction(VT, OtherVT, Expand); 754 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT, 755 VT, Expand); 756 } 757 758 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); 759 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); 760 761 // Splice 762 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); 763 764 if (Subtarget.hasStdExtZvbb()) { 765 setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Legal); 766 setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Custom); 767 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, 768 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, 769 VT, Custom); 770 } else { 771 setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Expand); 772 setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Expand); 773 setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand); 774 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, 775 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, 776 VT, Expand); 777 778 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the 779 // range of f32. 780 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 781 if (isTypeLegal(FloatVT)) { 782 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, 783 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ, 784 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF}, 785 VT, Custom); 786 } 787 788 setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand); 789 } 790 } 791 792 // Expand various CCs to best match the RVV ISA, which natively supports UNE 793 // but no other unordered comparisons, and supports all ordered comparisons 794 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 795 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 796 // and we pattern-match those back to the "original", swapping operands once 797 // more. This way we catch both operations and both "vf" and "fv" forms with 798 // fewer patterns. 799 static const ISD::CondCode VFPCCToExpand[] = { 800 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 801 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 802 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 803 }; 804 805 // Sets common operation actions on RVV floating-point vector types. 806 const auto SetCommonVFPActions = [&](MVT VT) { 807 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 808 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 809 // sizes are within one power-of-two of each other. Therefore conversions 810 // between vXf16 and vXf64 must be lowered as sequences which convert via 811 // vXf32. 812 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 813 // Custom-lower insert/extract operations to simplify patterns. 814 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 815 Custom); 816 // Expand various condition codes (explained above). 817 setCondCodeAction(VFPCCToExpand, VT, Expand); 818 819 setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal); 820 821 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, 822 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, 823 ISD::IS_FPCLASS}, 824 VT, Custom); 825 826 setOperationAction(FloatingPointVecReduceOps, VT, Custom); 827 828 // Expand FP operations that need libcalls. 829 setOperationAction(ISD::FREM, VT, Expand); 830 setOperationAction(ISD::FPOW, VT, Expand); 831 setOperationAction(ISD::FCOS, VT, Expand); 832 setOperationAction(ISD::FSIN, VT, Expand); 833 setOperationAction(ISD::FSINCOS, VT, Expand); 834 setOperationAction(ISD::FEXP, VT, Expand); 835 setOperationAction(ISD::FEXP2, VT, Expand); 836 setOperationAction(ISD::FLOG, VT, Expand); 837 setOperationAction(ISD::FLOG2, VT, Expand); 838 setOperationAction(ISD::FLOG10, VT, Expand); 839 840 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 841 842 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 843 844 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, 845 VT, Custom); 846 847 setOperationAction( 848 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 849 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, 850 VT, Custom); 851 852 setOperationAction(ISD::SELECT, VT, Custom); 853 setOperationAction(ISD::SELECT_CC, VT, Expand); 854 855 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 856 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, 857 VT, Custom); 858 859 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); 860 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); 861 862 setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom); 863 864 setOperationAction(FloatingPointVPOps, VT, Custom); 865 866 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, 867 Custom); 868 setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, 869 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}, 870 VT, Legal); 871 setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, 872 ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL, 873 ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, 874 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, 875 VT, Custom); 876 }; 877 878 // Sets common extload/truncstore actions on RVV floating-point vector 879 // types. 880 const auto SetCommonVFPExtLoadTruncStoreActions = 881 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) { 882 for (auto SmallVT : SmallerVTs) { 883 setTruncStoreAction(VT, SmallVT, Expand); 884 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand); 885 } 886 }; 887 888 if (Subtarget.hasVInstructionsF16()) { 889 for (MVT VT : F16VecVTs) { 890 if (!isTypeLegal(VT)) 891 continue; 892 SetCommonVFPActions(VT); 893 } 894 } 895 896 if (Subtarget.hasVInstructionsF32()) { 897 for (MVT VT : F32VecVTs) { 898 if (!isTypeLegal(VT)) 899 continue; 900 SetCommonVFPActions(VT); 901 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 902 } 903 } 904 905 if (Subtarget.hasVInstructionsF64()) { 906 for (MVT VT : F64VecVTs) { 907 if (!isTypeLegal(VT)) 908 continue; 909 SetCommonVFPActions(VT); 910 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 911 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); 912 } 913 } 914 915 if (Subtarget.useRVVForFixedLengthVectors()) { 916 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 917 if (!useRVVForFixedLengthVectorVT(VT)) 918 continue; 919 920 // By default everything must be expanded. 921 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 922 setOperationAction(Op, VT, Expand); 923 for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { 924 setTruncStoreAction(VT, OtherVT, Expand); 925 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, 926 OtherVT, VT, Expand); 927 } 928 929 // Custom lower fixed vector undefs to scalable vector undefs to avoid 930 // expansion to a build_vector of 0s. 931 setOperationAction(ISD::UNDEF, VT, Custom); 932 933 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 934 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, 935 Custom); 936 937 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT, 938 Custom); 939 940 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, 941 VT, Custom); 942 943 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 944 945 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 946 947 setOperationAction(ISD::SETCC, VT, Custom); 948 949 setOperationAction(ISD::SELECT, VT, Custom); 950 951 setOperationAction(ISD::TRUNCATE, VT, Custom); 952 953 setOperationAction(ISD::BITCAST, VT, Custom); 954 955 setOperationAction( 956 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, 957 Custom); 958 959 setOperationAction( 960 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, 961 Custom); 962 963 setOperationAction( 964 { 965 ISD::SINT_TO_FP, 966 ISD::UINT_TO_FP, 967 ISD::FP_TO_SINT, 968 ISD::FP_TO_UINT, 969 ISD::STRICT_SINT_TO_FP, 970 ISD::STRICT_UINT_TO_FP, 971 ISD::STRICT_FP_TO_SINT, 972 ISD::STRICT_FP_TO_UINT, 973 }, 974 VT, Custom); 975 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 976 Custom); 977 978 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 979 980 // Operations below are different for between masks and other vectors. 981 if (VT.getVectorElementType() == MVT::i1) { 982 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND, 983 ISD::OR, ISD::XOR}, 984 VT, Custom); 985 986 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, 987 ISD::VP_SETCC, ISD::VP_TRUNCATE}, 988 VT, Custom); 989 continue; 990 } 991 992 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to 993 // it before type legalization for i64 vectors on RV32. It will then be 994 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle. 995 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs 996 // improvements first. 997 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { 998 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 999 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 1000 } 1001 1002 setOperationAction( 1003 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom); 1004 1005 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, 1006 ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 1007 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, 1008 ISD::VP_SCATTER}, 1009 VT, Custom); 1010 1011 setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR, 1012 ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV, 1013 ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL}, 1014 VT, Custom); 1015 1016 setOperationAction( 1017 {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom); 1018 1019 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*. 1020 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) 1021 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); 1022 1023 setOperationAction( 1024 {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, 1025 Custom); 1026 1027 setOperationAction(ISD::VSELECT, VT, Custom); 1028 setOperationAction(ISD::SELECT_CC, VT, Expand); 1029 1030 setOperationAction( 1031 {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom); 1032 1033 // Custom-lower reduction operations to set up the corresponding custom 1034 // nodes' operands. 1035 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX, 1036 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX, 1037 ISD::VECREDUCE_UMIN}, 1038 VT, Custom); 1039 1040 setOperationAction(IntegerVPOps, VT, Custom); 1041 1042 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the 1043 // range of f32. 1044 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1045 if (isTypeLegal(FloatVT)) 1046 setOperationAction( 1047 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, 1048 Custom); 1049 } 1050 1051 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 1052 // There are no extending loads or truncating stores. 1053 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) { 1054 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 1055 setTruncStoreAction(VT, InnerVT, Expand); 1056 } 1057 1058 if (!useRVVForFixedLengthVectorVT(VT)) 1059 continue; 1060 1061 // By default everything must be expanded. 1062 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 1063 setOperationAction(Op, VT, Expand); 1064 1065 // Custom lower fixed vector undefs to scalable vector undefs to avoid 1066 // expansion to a build_vector of 0s. 1067 setOperationAction(ISD::UNDEF, VT, Custom); 1068 1069 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 1070 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, 1071 Custom); 1072 1073 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, 1074 ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT, 1075 ISD::EXTRACT_VECTOR_ELT}, 1076 VT, Custom); 1077 1078 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, 1079 ISD::MGATHER, ISD::MSCATTER}, 1080 VT, Custom); 1081 1082 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, 1083 ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 1084 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, 1085 ISD::VP_SCATTER}, 1086 VT, Custom); 1087 1088 setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, 1089 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT, 1090 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM, 1091 ISD::IS_FPCLASS}, 1092 VT, Custom); 1093 1094 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 1095 1096 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, 1097 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT}, 1098 VT, Custom); 1099 1100 setCondCodeAction(VFPCCToExpand, VT, Expand); 1101 1102 setOperationAction(ISD::SETCC, VT, Custom); 1103 setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom); 1104 setOperationAction(ISD::SELECT_CC, VT, Expand); 1105 1106 setOperationAction(ISD::BITCAST, VT, Custom); 1107 1108 setOperationAction(FloatingPointVecReduceOps, VT, Custom); 1109 1110 setOperationAction(FloatingPointVPOps, VT, Custom); 1111 1112 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, 1113 Custom); 1114 setOperationAction( 1115 {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, 1116 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA, 1117 ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC, 1118 ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, 1119 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, 1120 VT, Custom); 1121 } 1122 1123 // Custom-legalize bitcasts from fixed-length vectors to scalar types. 1124 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64}, 1125 Custom); 1126 if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) 1127 setOperationAction(ISD::BITCAST, MVT::f16, Custom); 1128 if (Subtarget.hasStdExtFOrZfinx()) 1129 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 1130 if (Subtarget.hasStdExtDOrZdinx()) 1131 setOperationAction(ISD::BITCAST, MVT::f64, Custom); 1132 } 1133 } 1134 1135 if (Subtarget.hasForcedAtomics()) { 1136 // Set atomic rmw/cas operations to expand to force __sync libcalls. 1137 setOperationAction( 1138 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD, 1139 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR, 1140 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN, 1141 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX}, 1142 XLenVT, Expand); 1143 } 1144 1145 if (Subtarget.hasVendorXTHeadMemIdx()) { 1146 for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC; 1147 ++im) { 1148 setIndexedLoadAction(im, MVT::i8, Legal); 1149 setIndexedStoreAction(im, MVT::i8, Legal); 1150 setIndexedLoadAction(im, MVT::i16, Legal); 1151 setIndexedStoreAction(im, MVT::i16, Legal); 1152 setIndexedLoadAction(im, MVT::i32, Legal); 1153 setIndexedStoreAction(im, MVT::i32, Legal); 1154 1155 if (Subtarget.is64Bit()) { 1156 setIndexedLoadAction(im, MVT::i64, Legal); 1157 setIndexedStoreAction(im, MVT::i64, Legal); 1158 } 1159 } 1160 } 1161 1162 // Function alignments. 1163 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4); 1164 setMinFunctionAlignment(FunctionAlignment); 1165 // Set preferred alignments. 1166 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); 1167 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); 1168 1169 setMinimumJumpTableEntries(5); 1170 1171 // Jumps are expensive, compared to logic 1172 setJumpIsExpensive(); 1173 1174 setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, 1175 ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, 1176 ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); 1177 if (Subtarget.is64Bit()) 1178 setTargetDAGCombine(ISD::SRA); 1179 1180 if (Subtarget.hasStdExtFOrZfinx()) 1181 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM}); 1182 1183 if (Subtarget.hasStdExtZbb()) 1184 setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}); 1185 1186 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) 1187 setTargetDAGCombine(ISD::TRUNCATE); 1188 1189 if (Subtarget.hasStdExtZbkb()) 1190 setTargetDAGCombine(ISD::BITREVERSE); 1191 if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) 1192 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); 1193 if (Subtarget.hasStdExtFOrZfinx()) 1194 setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, 1195 ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}); 1196 if (Subtarget.hasVInstructions()) 1197 setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, 1198 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL, 1199 ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR, 1200 ISD::CONCAT_VECTORS}); 1201 if (Subtarget.hasVendorXTHeadMemPair()) 1202 setTargetDAGCombine({ISD::LOAD, ISD::STORE}); 1203 if (Subtarget.useRVVForFixedLengthVectors()) 1204 setTargetDAGCombine(ISD::BITCAST); 1205 1206 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); 1207 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); 1208 1209 // Disable strict node mutation. 1210 IsStrictFPEnabled = true; 1211 } 1212 1213 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 1214 LLVMContext &Context, 1215 EVT VT) const { 1216 if (!VT.isVector()) 1217 return getPointerTy(DL); 1218 if (Subtarget.hasVInstructions() && 1219 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 1220 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 1221 return VT.changeVectorElementTypeToInteger(); 1222 } 1223 1224 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const { 1225 return Subtarget.getXLenVT(); 1226 } 1227 1228 // Return false if we can lower get_vector_length to a vsetvli intrinsic. 1229 bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT, 1230 unsigned VF, 1231 bool IsScalable) const { 1232 if (!Subtarget.hasVInstructions()) 1233 return true; 1234 1235 if (!IsScalable) 1236 return true; 1237 1238 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT()) 1239 return true; 1240 1241 // Don't allow VF=1 if those types are't legal. 1242 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELEN()) 1243 return true; 1244 1245 // VLEN=32 support is incomplete. 1246 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) 1247 return true; 1248 1249 // The maximum VF is for the smallest element width with LMUL=8. 1250 // VF must be a power of 2. 1251 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8; 1252 return VF > MaxVF || !isPowerOf2_32(VF); 1253 } 1254 1255 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 1256 const CallInst &I, 1257 MachineFunction &MF, 1258 unsigned Intrinsic) const { 1259 auto &DL = I.getModule()->getDataLayout(); 1260 1261 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore, 1262 bool IsUnitStrided) { 1263 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN; 1264 Info.ptrVal = I.getArgOperand(PtrOp); 1265 Type *MemTy; 1266 if (IsStore) { 1267 // Store value is the first operand. 1268 MemTy = I.getArgOperand(0)->getType(); 1269 } else { 1270 // Use return type. If it's segment load, return type is a struct. 1271 MemTy = I.getType(); 1272 if (MemTy->isStructTy()) 1273 MemTy = MemTy->getStructElementType(0); 1274 } 1275 if (!IsUnitStrided) 1276 MemTy = MemTy->getScalarType(); 1277 1278 Info.memVT = getValueType(DL, MemTy); 1279 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8); 1280 Info.size = MemoryLocation::UnknownSize; 1281 Info.flags |= 1282 IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; 1283 return true; 1284 }; 1285 1286 if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) 1287 Info.flags |= MachineMemOperand::MONonTemporal; 1288 1289 Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I); 1290 switch (Intrinsic) { 1291 default: 1292 return false; 1293 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 1294 case Intrinsic::riscv_masked_atomicrmw_add_i32: 1295 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 1296 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 1297 case Intrinsic::riscv_masked_atomicrmw_max_i32: 1298 case Intrinsic::riscv_masked_atomicrmw_min_i32: 1299 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 1300 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 1301 case Intrinsic::riscv_masked_cmpxchg_i32: 1302 Info.opc = ISD::INTRINSIC_W_CHAIN; 1303 Info.memVT = MVT::i32; 1304 Info.ptrVal = I.getArgOperand(0); 1305 Info.offset = 0; 1306 Info.align = Align(4); 1307 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 1308 MachineMemOperand::MOVolatile; 1309 return true; 1310 case Intrinsic::riscv_masked_strided_load: 1311 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false, 1312 /*IsUnitStrided*/ false); 1313 case Intrinsic::riscv_masked_strided_store: 1314 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true, 1315 /*IsUnitStrided*/ false); 1316 case Intrinsic::riscv_seg2_load: 1317 case Intrinsic::riscv_seg3_load: 1318 case Intrinsic::riscv_seg4_load: 1319 case Intrinsic::riscv_seg5_load: 1320 case Intrinsic::riscv_seg6_load: 1321 case Intrinsic::riscv_seg7_load: 1322 case Intrinsic::riscv_seg8_load: 1323 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false, 1324 /*IsUnitStrided*/ false); 1325 case Intrinsic::riscv_seg2_store: 1326 case Intrinsic::riscv_seg3_store: 1327 case Intrinsic::riscv_seg4_store: 1328 case Intrinsic::riscv_seg5_store: 1329 case Intrinsic::riscv_seg6_store: 1330 case Intrinsic::riscv_seg7_store: 1331 case Intrinsic::riscv_seg8_store: 1332 // Operands are (vec, ..., vec, ptr, vl) 1333 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, 1334 /*IsStore*/ true, 1335 /*IsUnitStrided*/ false); 1336 case Intrinsic::riscv_vle: 1337 case Intrinsic::riscv_vle_mask: 1338 case Intrinsic::riscv_vleff: 1339 case Intrinsic::riscv_vleff_mask: 1340 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1341 /*IsStore*/ false, 1342 /*IsUnitStrided*/ true); 1343 case Intrinsic::riscv_vse: 1344 case Intrinsic::riscv_vse_mask: 1345 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1346 /*IsStore*/ true, 1347 /*IsUnitStrided*/ true); 1348 case Intrinsic::riscv_vlse: 1349 case Intrinsic::riscv_vlse_mask: 1350 case Intrinsic::riscv_vloxei: 1351 case Intrinsic::riscv_vloxei_mask: 1352 case Intrinsic::riscv_vluxei: 1353 case Intrinsic::riscv_vluxei_mask: 1354 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1355 /*IsStore*/ false, 1356 /*IsUnitStrided*/ false); 1357 case Intrinsic::riscv_vsse: 1358 case Intrinsic::riscv_vsse_mask: 1359 case Intrinsic::riscv_vsoxei: 1360 case Intrinsic::riscv_vsoxei_mask: 1361 case Intrinsic::riscv_vsuxei: 1362 case Intrinsic::riscv_vsuxei_mask: 1363 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1364 /*IsStore*/ true, 1365 /*IsUnitStrided*/ false); 1366 case Intrinsic::riscv_vlseg2: 1367 case Intrinsic::riscv_vlseg3: 1368 case Intrinsic::riscv_vlseg4: 1369 case Intrinsic::riscv_vlseg5: 1370 case Intrinsic::riscv_vlseg6: 1371 case Intrinsic::riscv_vlseg7: 1372 case Intrinsic::riscv_vlseg8: 1373 case Intrinsic::riscv_vlseg2ff: 1374 case Intrinsic::riscv_vlseg3ff: 1375 case Intrinsic::riscv_vlseg4ff: 1376 case Intrinsic::riscv_vlseg5ff: 1377 case Intrinsic::riscv_vlseg6ff: 1378 case Intrinsic::riscv_vlseg7ff: 1379 case Intrinsic::riscv_vlseg8ff: 1380 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, 1381 /*IsStore*/ false, 1382 /*IsUnitStrided*/ false); 1383 case Intrinsic::riscv_vlseg2_mask: 1384 case Intrinsic::riscv_vlseg3_mask: 1385 case Intrinsic::riscv_vlseg4_mask: 1386 case Intrinsic::riscv_vlseg5_mask: 1387 case Intrinsic::riscv_vlseg6_mask: 1388 case Intrinsic::riscv_vlseg7_mask: 1389 case Intrinsic::riscv_vlseg8_mask: 1390 case Intrinsic::riscv_vlseg2ff_mask: 1391 case Intrinsic::riscv_vlseg3ff_mask: 1392 case Intrinsic::riscv_vlseg4ff_mask: 1393 case Intrinsic::riscv_vlseg5ff_mask: 1394 case Intrinsic::riscv_vlseg6ff_mask: 1395 case Intrinsic::riscv_vlseg7ff_mask: 1396 case Intrinsic::riscv_vlseg8ff_mask: 1397 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, 1398 /*IsStore*/ false, 1399 /*IsUnitStrided*/ false); 1400 case Intrinsic::riscv_vlsseg2: 1401 case Intrinsic::riscv_vlsseg3: 1402 case Intrinsic::riscv_vlsseg4: 1403 case Intrinsic::riscv_vlsseg5: 1404 case Intrinsic::riscv_vlsseg6: 1405 case Intrinsic::riscv_vlsseg7: 1406 case Intrinsic::riscv_vlsseg8: 1407 case Intrinsic::riscv_vloxseg2: 1408 case Intrinsic::riscv_vloxseg3: 1409 case Intrinsic::riscv_vloxseg4: 1410 case Intrinsic::riscv_vloxseg5: 1411 case Intrinsic::riscv_vloxseg6: 1412 case Intrinsic::riscv_vloxseg7: 1413 case Intrinsic::riscv_vloxseg8: 1414 case Intrinsic::riscv_vluxseg2: 1415 case Intrinsic::riscv_vluxseg3: 1416 case Intrinsic::riscv_vluxseg4: 1417 case Intrinsic::riscv_vluxseg5: 1418 case Intrinsic::riscv_vluxseg6: 1419 case Intrinsic::riscv_vluxseg7: 1420 case Intrinsic::riscv_vluxseg8: 1421 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, 1422 /*IsStore*/ false, 1423 /*IsUnitStrided*/ false); 1424 case Intrinsic::riscv_vlsseg2_mask: 1425 case Intrinsic::riscv_vlsseg3_mask: 1426 case Intrinsic::riscv_vlsseg4_mask: 1427 case Intrinsic::riscv_vlsseg5_mask: 1428 case Intrinsic::riscv_vlsseg6_mask: 1429 case Intrinsic::riscv_vlsseg7_mask: 1430 case Intrinsic::riscv_vlsseg8_mask: 1431 case Intrinsic::riscv_vloxseg2_mask: 1432 case Intrinsic::riscv_vloxseg3_mask: 1433 case Intrinsic::riscv_vloxseg4_mask: 1434 case Intrinsic::riscv_vloxseg5_mask: 1435 case Intrinsic::riscv_vloxseg6_mask: 1436 case Intrinsic::riscv_vloxseg7_mask: 1437 case Intrinsic::riscv_vloxseg8_mask: 1438 case Intrinsic::riscv_vluxseg2_mask: 1439 case Intrinsic::riscv_vluxseg3_mask: 1440 case Intrinsic::riscv_vluxseg4_mask: 1441 case Intrinsic::riscv_vluxseg5_mask: 1442 case Intrinsic::riscv_vluxseg6_mask: 1443 case Intrinsic::riscv_vluxseg7_mask: 1444 case Intrinsic::riscv_vluxseg8_mask: 1445 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5, 1446 /*IsStore*/ false, 1447 /*IsUnitStrided*/ false); 1448 case Intrinsic::riscv_vsseg2: 1449 case Intrinsic::riscv_vsseg3: 1450 case Intrinsic::riscv_vsseg4: 1451 case Intrinsic::riscv_vsseg5: 1452 case Intrinsic::riscv_vsseg6: 1453 case Intrinsic::riscv_vsseg7: 1454 case Intrinsic::riscv_vsseg8: 1455 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, 1456 /*IsStore*/ true, 1457 /*IsUnitStrided*/ false); 1458 case Intrinsic::riscv_vsseg2_mask: 1459 case Intrinsic::riscv_vsseg3_mask: 1460 case Intrinsic::riscv_vsseg4_mask: 1461 case Intrinsic::riscv_vsseg5_mask: 1462 case Intrinsic::riscv_vsseg6_mask: 1463 case Intrinsic::riscv_vsseg7_mask: 1464 case Intrinsic::riscv_vsseg8_mask: 1465 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, 1466 /*IsStore*/ true, 1467 /*IsUnitStrided*/ false); 1468 case Intrinsic::riscv_vssseg2: 1469 case Intrinsic::riscv_vssseg3: 1470 case Intrinsic::riscv_vssseg4: 1471 case Intrinsic::riscv_vssseg5: 1472 case Intrinsic::riscv_vssseg6: 1473 case Intrinsic::riscv_vssseg7: 1474 case Intrinsic::riscv_vssseg8: 1475 case Intrinsic::riscv_vsoxseg2: 1476 case Intrinsic::riscv_vsoxseg3: 1477 case Intrinsic::riscv_vsoxseg4: 1478 case Intrinsic::riscv_vsoxseg5: 1479 case Intrinsic::riscv_vsoxseg6: 1480 case Intrinsic::riscv_vsoxseg7: 1481 case Intrinsic::riscv_vsoxseg8: 1482 case Intrinsic::riscv_vsuxseg2: 1483 case Intrinsic::riscv_vsuxseg3: 1484 case Intrinsic::riscv_vsuxseg4: 1485 case Intrinsic::riscv_vsuxseg5: 1486 case Intrinsic::riscv_vsuxseg6: 1487 case Intrinsic::riscv_vsuxseg7: 1488 case Intrinsic::riscv_vsuxseg8: 1489 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, 1490 /*IsStore*/ true, 1491 /*IsUnitStrided*/ false); 1492 case Intrinsic::riscv_vssseg2_mask: 1493 case Intrinsic::riscv_vssseg3_mask: 1494 case Intrinsic::riscv_vssseg4_mask: 1495 case Intrinsic::riscv_vssseg5_mask: 1496 case Intrinsic::riscv_vssseg6_mask: 1497 case Intrinsic::riscv_vssseg7_mask: 1498 case Intrinsic::riscv_vssseg8_mask: 1499 case Intrinsic::riscv_vsoxseg2_mask: 1500 case Intrinsic::riscv_vsoxseg3_mask: 1501 case Intrinsic::riscv_vsoxseg4_mask: 1502 case Intrinsic::riscv_vsoxseg5_mask: 1503 case Intrinsic::riscv_vsoxseg6_mask: 1504 case Intrinsic::riscv_vsoxseg7_mask: 1505 case Intrinsic::riscv_vsoxseg8_mask: 1506 case Intrinsic::riscv_vsuxseg2_mask: 1507 case Intrinsic::riscv_vsuxseg3_mask: 1508 case Intrinsic::riscv_vsuxseg4_mask: 1509 case Intrinsic::riscv_vsuxseg5_mask: 1510 case Intrinsic::riscv_vsuxseg6_mask: 1511 case Intrinsic::riscv_vsuxseg7_mask: 1512 case Intrinsic::riscv_vsuxseg8_mask: 1513 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, 1514 /*IsStore*/ true, 1515 /*IsUnitStrided*/ false); 1516 } 1517 } 1518 1519 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 1520 const AddrMode &AM, Type *Ty, 1521 unsigned AS, 1522 Instruction *I) const { 1523 // No global is ever allowed as a base. 1524 if (AM.BaseGV) 1525 return false; 1526 1527 // RVV instructions only support register addressing. 1528 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty)) 1529 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs; 1530 1531 // Require a 12-bit signed offset. 1532 if (!isInt<12>(AM.BaseOffs)) 1533 return false; 1534 1535 switch (AM.Scale) { 1536 case 0: // "r+i" or just "i", depending on HasBaseReg. 1537 break; 1538 case 1: 1539 if (!AM.HasBaseReg) // allow "r+i". 1540 break; 1541 return false; // disallow "r+r" or "r+r+i". 1542 default: 1543 return false; 1544 } 1545 1546 return true; 1547 } 1548 1549 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 1550 return isInt<12>(Imm); 1551 } 1552 1553 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 1554 return isInt<12>(Imm); 1555 } 1556 1557 // On RV32, 64-bit integers are split into their high and low parts and held 1558 // in two different registers, so the trunc is free since the low register can 1559 // just be used. 1560 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of 1561 // isTruncateFree? 1562 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 1563 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 1564 return false; 1565 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 1566 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 1567 return (SrcBits == 64 && DestBits == 32); 1568 } 1569 1570 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 1571 // We consider i64->i32 free on RV64 since we have good selection of W 1572 // instructions that make promoting operations back to i64 free in many cases. 1573 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || 1574 !DstVT.isInteger()) 1575 return false; 1576 unsigned SrcBits = SrcVT.getSizeInBits(); 1577 unsigned DestBits = DstVT.getSizeInBits(); 1578 return (SrcBits == 64 && DestBits == 32); 1579 } 1580 1581 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 1582 // Zexts are free if they can be combined with a load. 1583 // Don't advertise i32->i64 zextload as being free for RV64. It interacts 1584 // poorly with type legalization of compares preferring sext. 1585 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 1586 EVT MemVT = LD->getMemoryVT(); 1587 if ((MemVT == MVT::i8 || MemVT == MVT::i16) && 1588 (LD->getExtensionType() == ISD::NON_EXTLOAD || 1589 LD->getExtensionType() == ISD::ZEXTLOAD)) 1590 return true; 1591 } 1592 1593 return TargetLowering::isZExtFree(Val, VT2); 1594 } 1595 1596 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 1597 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 1598 } 1599 1600 bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const { 1601 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32); 1602 } 1603 1604 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { 1605 return Subtarget.hasStdExtZbb(); 1606 } 1607 1608 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { 1609 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb(); 1610 } 1611 1612 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( 1613 const Instruction &AndI) const { 1614 // We expect to be able to match a bit extraction instruction if the Zbs 1615 // extension is supported and the mask is a power of two. However, we 1616 // conservatively return false if the mask would fit in an ANDI instruction, 1617 // on the basis that it's possible the sinking+duplication of the AND in 1618 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction 1619 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ). 1620 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs()) 1621 return false; 1622 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1)); 1623 if (!Mask) 1624 return false; 1625 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2(); 1626 } 1627 1628 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const { 1629 EVT VT = Y.getValueType(); 1630 1631 // FIXME: Support vectors once we have tests. 1632 if (VT.isVector()) 1633 return false; 1634 1635 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && 1636 !isa<ConstantSDNode>(Y); 1637 } 1638 1639 bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const { 1640 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test. 1641 if (Subtarget.hasStdExtZbs()) 1642 return X.getValueType().isScalarInteger(); 1643 auto *C = dyn_cast<ConstantSDNode>(Y); 1644 // XTheadBs provides th.tst (similar to bexti), if Y is a constant 1645 if (Subtarget.hasVendorXTHeadBs()) 1646 return C != nullptr; 1647 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position. 1648 return C && C->getAPIntValue().ule(10); 1649 } 1650 1651 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode, 1652 EVT VT) const { 1653 // Only enable for rvv. 1654 if (!VT.isVector() || !Subtarget.hasVInstructions()) 1655 return false; 1656 1657 if (VT.isFixedLengthVector() && !isTypeLegal(VT)) 1658 return false; 1659 1660 return true; 1661 } 1662 1663 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 1664 Type *Ty) const { 1665 assert(Ty->isIntegerTy()); 1666 1667 unsigned BitSize = Ty->getIntegerBitWidth(); 1668 if (BitSize > Subtarget.getXLen()) 1669 return false; 1670 1671 // Fast path, assume 32-bit immediates are cheap. 1672 int64_t Val = Imm.getSExtValue(); 1673 if (isInt<32>(Val)) 1674 return true; 1675 1676 // A constant pool entry may be more aligned thant he load we're trying to 1677 // replace. If we don't support unaligned scalar mem, prefer the constant 1678 // pool. 1679 // TODO: Can the caller pass down the alignment? 1680 if (!Subtarget.enableUnalignedScalarMem()) 1681 return true; 1682 1683 // Prefer to keep the load if it would require many instructions. 1684 // This uses the same threshold we use for constant pools but doesn't 1685 // check useConstantPoolForLargeInts. 1686 // TODO: Should we keep the load only when we're definitely going to emit a 1687 // constant pool? 1688 1689 RISCVMatInt::InstSeq Seq = 1690 RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits()); 1691 return Seq.size() <= Subtarget.getMaxBuildIntsCost(); 1692 } 1693 1694 bool RISCVTargetLowering:: 1695 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 1696 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 1697 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 1698 SelectionDAG &DAG) const { 1699 // One interesting pattern that we'd want to form is 'bit extract': 1700 // ((1 >> Y) & 1) ==/!= 0 1701 // But we also need to be careful not to try to reverse that fold. 1702 1703 // Is this '((1 >> Y) & 1)'? 1704 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne()) 1705 return false; // Keep the 'bit extract' pattern. 1706 1707 // Will this be '((1 >> Y) & 1)' after the transform? 1708 if (NewShiftOpcode == ISD::SRL && CC->isOne()) 1709 return true; // Do form the 'bit extract' pattern. 1710 1711 // If 'X' is a constant, and we transform, then we will immediately 1712 // try to undo the fold, thus causing endless combine loop. 1713 // So only do the transform if X is not a constant. This matches the default 1714 // implementation of this function. 1715 return !XC; 1716 } 1717 1718 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const { 1719 switch (Opcode) { 1720 case Instruction::Add: 1721 case Instruction::Sub: 1722 case Instruction::Mul: 1723 case Instruction::And: 1724 case Instruction::Or: 1725 case Instruction::Xor: 1726 case Instruction::FAdd: 1727 case Instruction::FSub: 1728 case Instruction::FMul: 1729 case Instruction::FDiv: 1730 case Instruction::ICmp: 1731 case Instruction::FCmp: 1732 return true; 1733 case Instruction::Shl: 1734 case Instruction::LShr: 1735 case Instruction::AShr: 1736 case Instruction::UDiv: 1737 case Instruction::SDiv: 1738 case Instruction::URem: 1739 case Instruction::SRem: 1740 return Operand == 1; 1741 default: 1742 return false; 1743 } 1744 } 1745 1746 1747 bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const { 1748 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) 1749 return false; 1750 1751 if (canSplatOperand(I->getOpcode(), Operand)) 1752 return true; 1753 1754 auto *II = dyn_cast<IntrinsicInst>(I); 1755 if (!II) 1756 return false; 1757 1758 switch (II->getIntrinsicID()) { 1759 case Intrinsic::fma: 1760 case Intrinsic::vp_fma: 1761 return Operand == 0 || Operand == 1; 1762 case Intrinsic::vp_shl: 1763 case Intrinsic::vp_lshr: 1764 case Intrinsic::vp_ashr: 1765 case Intrinsic::vp_udiv: 1766 case Intrinsic::vp_sdiv: 1767 case Intrinsic::vp_urem: 1768 case Intrinsic::vp_srem: 1769 return Operand == 1; 1770 // These intrinsics are commutative. 1771 case Intrinsic::vp_add: 1772 case Intrinsic::vp_mul: 1773 case Intrinsic::vp_and: 1774 case Intrinsic::vp_or: 1775 case Intrinsic::vp_xor: 1776 case Intrinsic::vp_fadd: 1777 case Intrinsic::vp_fmul: 1778 case Intrinsic::vp_icmp: 1779 case Intrinsic::vp_fcmp: 1780 // These intrinsics have 'vr' versions. 1781 case Intrinsic::vp_sub: 1782 case Intrinsic::vp_fsub: 1783 case Intrinsic::vp_fdiv: 1784 return Operand == 0 || Operand == 1; 1785 default: 1786 return false; 1787 } 1788 } 1789 1790 /// Check if sinking \p I's operands to I's basic block is profitable, because 1791 /// the operands can be folded into a target instruction, e.g. 1792 /// splats of scalars can fold into vector instructions. 1793 bool RISCVTargetLowering::shouldSinkOperands( 1794 Instruction *I, SmallVectorImpl<Use *> &Ops) const { 1795 using namespace llvm::PatternMatch; 1796 1797 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) 1798 return false; 1799 1800 for (auto OpIdx : enumerate(I->operands())) { 1801 if (!canSplatOperand(I, OpIdx.index())) 1802 continue; 1803 1804 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get()); 1805 // Make sure we are not already sinking this operand 1806 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; })) 1807 continue; 1808 1809 // We are looking for a splat that can be sunk. 1810 if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), 1811 m_Undef(), m_ZeroMask()))) 1812 continue; 1813 1814 // Don't sink i1 splats. 1815 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1)) 1816 continue; 1817 1818 // All uses of the shuffle should be sunk to avoid duplicating it across gpr 1819 // and vector registers 1820 for (Use &U : Op->uses()) { 1821 Instruction *Insn = cast<Instruction>(U.getUser()); 1822 if (!canSplatOperand(Insn, U.getOperandNo())) 1823 return false; 1824 } 1825 1826 Ops.push_back(&Op->getOperandUse(0)); 1827 Ops.push_back(&OpIdx.value()); 1828 } 1829 return true; 1830 } 1831 1832 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { 1833 unsigned Opc = VecOp.getOpcode(); 1834 1835 // Assume target opcodes can't be scalarized. 1836 // TODO - do we have any exceptions? 1837 if (Opc >= ISD::BUILTIN_OP_END) 1838 return false; 1839 1840 // If the vector op is not supported, try to convert to scalar. 1841 EVT VecVT = VecOp.getValueType(); 1842 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) 1843 return true; 1844 1845 // If the vector op is supported, but the scalar op is not, the transform may 1846 // not be worthwhile. 1847 EVT ScalarVT = VecVT.getScalarType(); 1848 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); 1849 } 1850 1851 bool RISCVTargetLowering::isOffsetFoldingLegal( 1852 const GlobalAddressSDNode *GA) const { 1853 // In order to maximise the opportunity for common subexpression elimination, 1854 // keep a separate ADD node for the global address offset instead of folding 1855 // it in the global address node. Later peephole optimisations may choose to 1856 // fold it back in when profitable. 1857 return false; 1858 } 1859 1860 // Returns 0-31 if the fli instruction is available for the type and this is 1861 // legal FP immediate for the type. Returns -1 otherwise. 1862 int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const { 1863 if (!Subtarget.hasStdExtZfa()) 1864 return -1; 1865 1866 bool IsSupportedVT = false; 1867 if (VT == MVT::f16) { 1868 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh(); 1869 } else if (VT == MVT::f32) { 1870 IsSupportedVT = true; 1871 } else if (VT == MVT::f64) { 1872 assert(Subtarget.hasStdExtD() && "Expect D extension"); 1873 IsSupportedVT = true; 1874 } 1875 1876 if (!IsSupportedVT) 1877 return -1; 1878 1879 return RISCVLoadFPImm::getLoadFPImm(Imm); 1880 } 1881 1882 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 1883 bool ForCodeSize) const { 1884 bool IsLegalVT = false; 1885 if (VT == MVT::f16) 1886 IsLegalVT = Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin(); 1887 else if (VT == MVT::f32) 1888 IsLegalVT = Subtarget.hasStdExtFOrZfinx(); 1889 else if (VT == MVT::f64) 1890 IsLegalVT = Subtarget.hasStdExtDOrZdinx(); 1891 1892 if (!IsLegalVT) 1893 return false; 1894 1895 if (getLegalZfaFPImm(Imm, VT) >= 0) 1896 return true; 1897 1898 // Cannot create a 64 bit floating-point immediate value for rv32. 1899 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) { 1900 // td can handle +0.0 or -0.0 already. 1901 // -0.0 can be created by fmv + fneg. 1902 return Imm.isZero(); 1903 } 1904 // Special case: the cost for -0.0 is 1. 1905 int Cost = Imm.isNegZero() 1906 ? 1 1907 : RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), 1908 Subtarget.getXLen(), 1909 Subtarget.getFeatureBits()); 1910 // If the constantpool data is already in cache, only Cost 1 is cheaper. 1911 return Cost < FPImmCost; 1912 } 1913 1914 // TODO: This is very conservative. 1915 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1916 unsigned Index) const { 1917 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) 1918 return false; 1919 1920 // Only support extracting a fixed from a fixed vector for now. 1921 if (ResVT.isScalableVector() || SrcVT.isScalableVector()) 1922 return false; 1923 1924 unsigned ResElts = ResVT.getVectorNumElements(); 1925 unsigned SrcElts = SrcVT.getVectorNumElements(); 1926 1927 // Convervatively only handle extracting half of a vector. 1928 // TODO: Relax this. 1929 if ((ResElts * 2) != SrcElts) 1930 return false; 1931 1932 // The smallest type we can slide is i8. 1933 // TODO: We can extract index 0 from a mask vector without a slide. 1934 if (ResVT.getVectorElementType() == MVT::i1) 1935 return false; 1936 1937 // Slide can support arbitrary index, but we only treat vslidedown.vi as 1938 // cheap. 1939 if (Index >= 32) 1940 return false; 1941 1942 // TODO: We can do arbitrary slidedowns, but for now only support extracting 1943 // the upper half of a vector until we have more test coverage. 1944 return Index == 0 || Index == ResElts; 1945 } 1946 1947 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, 1948 CallingConv::ID CC, 1949 EVT VT) const { 1950 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. 1951 // We might still end up using a GPR but that will be decided based on ABI. 1952 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && 1953 !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) 1954 return MVT::f32; 1955 1956 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); 1957 } 1958 1959 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, 1960 CallingConv::ID CC, 1961 EVT VT) const { 1962 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. 1963 // We might still end up using a GPR but that will be decided based on ABI. 1964 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && 1965 !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) 1966 return 1; 1967 1968 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); 1969 } 1970 1971 // Changes the condition code and swaps operands if necessary, so the SetCC 1972 // operation matches one of the comparisons supported directly by branches 1973 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare 1974 // with 1/-1. 1975 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, 1976 ISD::CondCode &CC, SelectionDAG &DAG) { 1977 // If this is a single bit test that can't be handled by ANDI, shift the 1978 // bit to be tested to the MSB and perform a signed compare with 0. 1979 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) && 1980 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() && 1981 isa<ConstantSDNode>(LHS.getOperand(1))) { 1982 uint64_t Mask = LHS.getConstantOperandVal(1); 1983 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) { 1984 unsigned ShAmt = 0; 1985 if (isPowerOf2_64(Mask)) { 1986 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; 1987 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask); 1988 } else { 1989 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask); 1990 } 1991 1992 LHS = LHS.getOperand(0); 1993 if (ShAmt != 0) 1994 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS, 1995 DAG.getConstant(ShAmt, DL, LHS.getValueType())); 1996 return; 1997 } 1998 } 1999 2000 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 2001 int64_t C = RHSC->getSExtValue(); 2002 switch (CC) { 2003 default: break; 2004 case ISD::SETGT: 2005 // Convert X > -1 to X >= 0. 2006 if (C == -1) { 2007 RHS = DAG.getConstant(0, DL, RHS.getValueType()); 2008 CC = ISD::SETGE; 2009 return; 2010 } 2011 break; 2012 case ISD::SETLT: 2013 // Convert X < 1 to 0 <= X. 2014 if (C == 1) { 2015 RHS = LHS; 2016 LHS = DAG.getConstant(0, DL, RHS.getValueType()); 2017 CC = ISD::SETGE; 2018 return; 2019 } 2020 break; 2021 } 2022 } 2023 2024 switch (CC) { 2025 default: 2026 break; 2027 case ISD::SETGT: 2028 case ISD::SETLE: 2029 case ISD::SETUGT: 2030 case ISD::SETULE: 2031 CC = ISD::getSetCCSwappedOperands(CC); 2032 std::swap(LHS, RHS); 2033 break; 2034 } 2035 } 2036 2037 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) { 2038 assert(VT.isScalableVector() && "Expecting a scalable vector type"); 2039 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 2040 if (VT.getVectorElementType() == MVT::i1) 2041 KnownSize *= 8; 2042 2043 switch (KnownSize) { 2044 default: 2045 llvm_unreachable("Invalid LMUL."); 2046 case 8: 2047 return RISCVII::VLMUL::LMUL_F8; 2048 case 16: 2049 return RISCVII::VLMUL::LMUL_F4; 2050 case 32: 2051 return RISCVII::VLMUL::LMUL_F2; 2052 case 64: 2053 return RISCVII::VLMUL::LMUL_1; 2054 case 128: 2055 return RISCVII::VLMUL::LMUL_2; 2056 case 256: 2057 return RISCVII::VLMUL::LMUL_4; 2058 case 512: 2059 return RISCVII::VLMUL::LMUL_8; 2060 } 2061 } 2062 2063 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) { 2064 switch (LMul) { 2065 default: 2066 llvm_unreachable("Invalid LMUL."); 2067 case RISCVII::VLMUL::LMUL_F8: 2068 case RISCVII::VLMUL::LMUL_F4: 2069 case RISCVII::VLMUL::LMUL_F2: 2070 case RISCVII::VLMUL::LMUL_1: 2071 return RISCV::VRRegClassID; 2072 case RISCVII::VLMUL::LMUL_2: 2073 return RISCV::VRM2RegClassID; 2074 case RISCVII::VLMUL::LMUL_4: 2075 return RISCV::VRM4RegClassID; 2076 case RISCVII::VLMUL::LMUL_8: 2077 return RISCV::VRM8RegClassID; 2078 } 2079 } 2080 2081 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 2082 RISCVII::VLMUL LMUL = getLMUL(VT); 2083 if (LMUL == RISCVII::VLMUL::LMUL_F8 || 2084 LMUL == RISCVII::VLMUL::LMUL_F4 || 2085 LMUL == RISCVII::VLMUL::LMUL_F2 || 2086 LMUL == RISCVII::VLMUL::LMUL_1) { 2087 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 2088 "Unexpected subreg numbering"); 2089 return RISCV::sub_vrm1_0 + Index; 2090 } 2091 if (LMUL == RISCVII::VLMUL::LMUL_2) { 2092 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 2093 "Unexpected subreg numbering"); 2094 return RISCV::sub_vrm2_0 + Index; 2095 } 2096 if (LMUL == RISCVII::VLMUL::LMUL_4) { 2097 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 2098 "Unexpected subreg numbering"); 2099 return RISCV::sub_vrm4_0 + Index; 2100 } 2101 llvm_unreachable("Invalid vector type."); 2102 } 2103 2104 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 2105 if (VT.getVectorElementType() == MVT::i1) 2106 return RISCV::VRRegClassID; 2107 return getRegClassIDForLMUL(getLMUL(VT)); 2108 } 2109 2110 // Attempt to decompose a subvector insert/extract between VecVT and 2111 // SubVecVT via subregister indices. Returns the subregister index that 2112 // can perform the subvector insert/extract with the given element index, as 2113 // well as the index corresponding to any leftover subvectors that must be 2114 // further inserted/extracted within the register class for SubVecVT. 2115 std::pair<unsigned, unsigned> 2116 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2117 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 2118 const RISCVRegisterInfo *TRI) { 2119 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 2120 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 2121 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 2122 "Register classes not ordered"); 2123 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 2124 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 2125 // Try to compose a subregister index that takes us from the incoming 2126 // LMUL>1 register class down to the outgoing one. At each step we half 2127 // the LMUL: 2128 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 2129 // Note that this is not guaranteed to find a subregister index, such as 2130 // when we are extracting from one VR type to another. 2131 unsigned SubRegIdx = RISCV::NoSubRegister; 2132 for (const unsigned RCID : 2133 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 2134 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 2135 VecVT = VecVT.getHalfNumVectorElementsVT(); 2136 bool IsHi = 2137 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 2138 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 2139 getSubregIndexByMVT(VecVT, IsHi)); 2140 if (IsHi) 2141 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 2142 } 2143 return {SubRegIdx, InsertExtractIdx}; 2144 } 2145 2146 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar 2147 // stores for those types. 2148 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const { 2149 return !Subtarget.useRVVForFixedLengthVectors() || 2150 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1); 2151 } 2152 2153 bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const { 2154 if (!ScalarTy.isSimple()) 2155 return false; 2156 switch (ScalarTy.getSimpleVT().SimpleTy) { 2157 case MVT::iPTR: 2158 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true; 2159 case MVT::i8: 2160 case MVT::i16: 2161 case MVT::i32: 2162 return true; 2163 case MVT::i64: 2164 return Subtarget.hasVInstructionsI64(); 2165 case MVT::f16: 2166 return Subtarget.hasVInstructionsF16(); 2167 case MVT::f32: 2168 return Subtarget.hasVInstructionsF32(); 2169 case MVT::f64: 2170 return Subtarget.hasVInstructionsF64(); 2171 default: 2172 return false; 2173 } 2174 } 2175 2176 2177 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const { 2178 return NumRepeatedDivisors; 2179 } 2180 2181 static SDValue getVLOperand(SDValue Op) { 2182 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 2183 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 2184 "Unexpected opcode"); 2185 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 2186 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 2187 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2188 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 2189 if (!II) 2190 return SDValue(); 2191 return Op.getOperand(II->VLOperand + 1 + HasChain); 2192 } 2193 2194 static bool useRVVForFixedLengthVectorVT(MVT VT, 2195 const RISCVSubtarget &Subtarget) { 2196 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!"); 2197 if (!Subtarget.useRVVForFixedLengthVectors()) 2198 return false; 2199 2200 // We only support a set of vector types with a consistent maximum fixed size 2201 // across all supported vector element types to avoid legalization issues. 2202 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest 2203 // fixed-length vector type we support is 1024 bytes. 2204 if (VT.getFixedSizeInBits() > 1024 * 8) 2205 return false; 2206 2207 unsigned MinVLen = Subtarget.getRealMinVLen(); 2208 2209 MVT EltVT = VT.getVectorElementType(); 2210 2211 // Don't use RVV for vectors we cannot scalarize if required. 2212 switch (EltVT.SimpleTy) { 2213 // i1 is supported but has different rules. 2214 default: 2215 return false; 2216 case MVT::i1: 2217 // Masks can only use a single register. 2218 if (VT.getVectorNumElements() > MinVLen) 2219 return false; 2220 MinVLen /= 8; 2221 break; 2222 case MVT::i8: 2223 case MVT::i16: 2224 case MVT::i32: 2225 break; 2226 case MVT::i64: 2227 if (!Subtarget.hasVInstructionsI64()) 2228 return false; 2229 break; 2230 case MVT::f16: 2231 if (!Subtarget.hasVInstructionsF16()) 2232 return false; 2233 break; 2234 case MVT::f32: 2235 if (!Subtarget.hasVInstructionsF32()) 2236 return false; 2237 break; 2238 case MVT::f64: 2239 if (!Subtarget.hasVInstructionsF64()) 2240 return false; 2241 break; 2242 } 2243 2244 // Reject elements larger than ELEN. 2245 if (EltVT.getSizeInBits() > Subtarget.getELEN()) 2246 return false; 2247 2248 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen); 2249 // Don't use RVV for types that don't fit. 2250 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 2251 return false; 2252 2253 // TODO: Perhaps an artificial restriction, but worth having whilst getting 2254 // the base fixed length RVV support in place. 2255 if (!VT.isPow2VectorType()) 2256 return false; 2257 2258 return true; 2259 } 2260 2261 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 2262 return ::useRVVForFixedLengthVectorVT(VT, Subtarget); 2263 } 2264 2265 // Return the largest legal scalable vector type that matches VT's element type. 2266 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, 2267 const RISCVSubtarget &Subtarget) { 2268 // This may be called before legal types are setup. 2269 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) || 2270 useRVVForFixedLengthVectorVT(VT, Subtarget)) && 2271 "Expected legal fixed length vector!"); 2272 2273 unsigned MinVLen = Subtarget.getRealMinVLen(); 2274 unsigned MaxELen = Subtarget.getELEN(); 2275 2276 MVT EltVT = VT.getVectorElementType(); 2277 switch (EltVT.SimpleTy) { 2278 default: 2279 llvm_unreachable("unexpected element type for RVV container"); 2280 case MVT::i1: 2281 case MVT::i8: 2282 case MVT::i16: 2283 case MVT::i32: 2284 case MVT::i64: 2285 case MVT::f16: 2286 case MVT::f32: 2287 case MVT::f64: { 2288 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for 2289 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within 2290 // each fractional LMUL we support SEW between 8 and LMUL*ELEN. 2291 unsigned NumElts = 2292 (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen; 2293 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen); 2294 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts"); 2295 return MVT::getScalableVectorVT(EltVT, NumElts); 2296 } 2297 } 2298 } 2299 2300 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, 2301 const RISCVSubtarget &Subtarget) { 2302 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, 2303 Subtarget); 2304 } 2305 2306 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { 2307 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget()); 2308 } 2309 2310 // Grow V to consume an entire RVV register. 2311 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 2312 const RISCVSubtarget &Subtarget) { 2313 assert(VT.isScalableVector() && 2314 "Expected to convert into a scalable vector!"); 2315 assert(V.getValueType().isFixedLengthVector() && 2316 "Expected a fixed length vector operand!"); 2317 SDLoc DL(V); 2318 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2319 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 2320 } 2321 2322 // Shrink V so it's just big enough to maintain a VT's worth of data. 2323 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 2324 const RISCVSubtarget &Subtarget) { 2325 assert(VT.isFixedLengthVector() && 2326 "Expected to convert into a fixed length vector!"); 2327 assert(V.getValueType().isScalableVector() && 2328 "Expected a scalable vector operand!"); 2329 SDLoc DL(V); 2330 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2331 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 2332 } 2333 2334 /// Return the type of the mask type suitable for masking the provided 2335 /// vector type. This is simply an i1 element type vector of the same 2336 /// (possibly scalable) length. 2337 static MVT getMaskTypeFor(MVT VecVT) { 2338 assert(VecVT.isVector()); 2339 ElementCount EC = VecVT.getVectorElementCount(); 2340 return MVT::getVectorVT(MVT::i1, EC); 2341 } 2342 2343 /// Creates an all ones mask suitable for masking a vector of type VecTy with 2344 /// vector length VL. . 2345 static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, 2346 SelectionDAG &DAG) { 2347 MVT MaskVT = getMaskTypeFor(VecVT); 2348 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2349 } 2350 2351 static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG, 2352 const RISCVSubtarget &Subtarget) { 2353 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT()); 2354 } 2355 2356 static std::pair<SDValue, SDValue> 2357 getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, 2358 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { 2359 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 2360 SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget); 2361 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); 2362 return {Mask, VL}; 2363 } 2364 2365 // Gets the two common "VL" operands: an all-ones mask and the vector length. 2366 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 2367 // the vector type that the fixed-length vector is contained in. Otherwise if 2368 // VecVT is scalable, then ContainerVT should be the same as VecVT. 2369 static std::pair<SDValue, SDValue> 2370 getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, 2371 const RISCVSubtarget &Subtarget) { 2372 if (VecVT.isFixedLengthVector()) 2373 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG, 2374 Subtarget); 2375 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 2376 MVT XLenVT = Subtarget.getXLenVT(); 2377 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT); 2378 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); 2379 return {Mask, VL}; 2380 } 2381 2382 // As above but assuming the given type is a scalable vector type. 2383 static std::pair<SDValue, SDValue> 2384 getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, 2385 const RISCVSubtarget &Subtarget) { 2386 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 2387 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); 2388 } 2389 2390 SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL, 2391 SelectionDAG &DAG) const { 2392 assert(VecVT.isScalableVector() && "Expected scalable vector"); 2393 return DAG.getElementCount(DL, Subtarget.getXLenVT(), 2394 VecVT.getVectorElementCount()); 2395 } 2396 2397 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 2398 // of either is (currently) supported. This can get us into an infinite loop 2399 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 2400 // as a ..., etc. 2401 // Until either (or both) of these can reliably lower any node, reporting that 2402 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 2403 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 2404 // which is not desirable. 2405 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( 2406 EVT VT, unsigned DefinedValues) const { 2407 return false; 2408 } 2409 2410 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, 2411 const RISCVSubtarget &Subtarget) { 2412 // RISC-V FP-to-int conversions saturate to the destination register size, but 2413 // don't produce 0 for nan. We can use a conversion instruction and fix the 2414 // nan case with a compare and a select. 2415 SDValue Src = Op.getOperand(0); 2416 2417 MVT DstVT = Op.getSimpleValueType(); 2418 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 2419 2420 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; 2421 2422 if (!DstVT.isVector()) { 2423 // In absense of Zfh, promote f16 to f32, then saturate the result. 2424 if (Src.getSimpleValueType() == MVT::f16 && 2425 !Subtarget.hasStdExtZfhOrZhinx()) { 2426 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src); 2427 } 2428 2429 unsigned Opc; 2430 if (SatVT == DstVT) 2431 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 2432 else if (DstVT == MVT::i64 && SatVT == MVT::i32) 2433 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 2434 else 2435 return SDValue(); 2436 // FIXME: Support other SatVTs by clamping before or after the conversion. 2437 2438 SDLoc DL(Op); 2439 SDValue FpToInt = DAG.getNode( 2440 Opc, DL, DstVT, Src, 2441 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT())); 2442 2443 if (Opc == RISCVISD::FCVT_WU_RV64) 2444 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); 2445 2446 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); 2447 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, 2448 ISD::CondCode::SETUO); 2449 } 2450 2451 // Vectors. 2452 2453 MVT DstEltVT = DstVT.getVectorElementType(); 2454 MVT SrcVT = Src.getSimpleValueType(); 2455 MVT SrcEltVT = SrcVT.getVectorElementType(); 2456 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 2457 unsigned DstEltSize = DstEltVT.getSizeInBits(); 2458 2459 // Only handle saturating to the destination type. 2460 if (SatVT != DstEltVT) 2461 return SDValue(); 2462 2463 // FIXME: Don't support narrowing by more than 1 steps for now. 2464 if (SrcEltSize > (2 * DstEltSize)) 2465 return SDValue(); 2466 2467 MVT DstContainerVT = DstVT; 2468 MVT SrcContainerVT = SrcVT; 2469 if (DstVT.isFixedLengthVector()) { 2470 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget); 2471 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 2472 assert(DstContainerVT.getVectorElementCount() == 2473 SrcContainerVT.getVectorElementCount() && 2474 "Expected same element count"); 2475 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 2476 } 2477 2478 SDLoc DL(Op); 2479 2480 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget); 2481 2482 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), 2483 {Src, Src, DAG.getCondCode(ISD::SETNE), 2484 DAG.getUNDEF(Mask.getValueType()), Mask, VL}); 2485 2486 // Need to widen by more than 1 step, promote the FP type, then do a widening 2487 // convert. 2488 if (DstEltSize > (2 * SrcEltSize)) { 2489 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!"); 2490 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32); 2491 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL); 2492 } 2493 2494 unsigned RVVOpc = 2495 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; 2496 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL); 2497 2498 SDValue SplatZero = DAG.getNode( 2499 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT), 2500 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL); 2501 Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero, 2502 Res, VL); 2503 2504 if (DstVT.isFixedLengthVector()) 2505 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget); 2506 2507 return Res; 2508 } 2509 2510 static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) { 2511 switch (Opc) { 2512 case ISD::FROUNDEVEN: 2513 case ISD::STRICT_FROUNDEVEN: 2514 case ISD::VP_FROUNDEVEN: 2515 return RISCVFPRndMode::RNE; 2516 case ISD::FTRUNC: 2517 case ISD::STRICT_FTRUNC: 2518 case ISD::VP_FROUNDTOZERO: 2519 return RISCVFPRndMode::RTZ; 2520 case ISD::FFLOOR: 2521 case ISD::STRICT_FFLOOR: 2522 case ISD::VP_FFLOOR: 2523 return RISCVFPRndMode::RDN; 2524 case ISD::FCEIL: 2525 case ISD::STRICT_FCEIL: 2526 case ISD::VP_FCEIL: 2527 return RISCVFPRndMode::RUP; 2528 case ISD::FROUND: 2529 case ISD::STRICT_FROUND: 2530 case ISD::VP_FROUND: 2531 return RISCVFPRndMode::RMM; 2532 case ISD::FRINT: 2533 return RISCVFPRndMode::DYN; 2534 } 2535 2536 return RISCVFPRndMode::Invalid; 2537 } 2538 2539 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND 2540 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to 2541 // the integer domain and back. Taking care to avoid converting values that are 2542 // nan or already correct. 2543 static SDValue 2544 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 2545 const RISCVSubtarget &Subtarget) { 2546 MVT VT = Op.getSimpleValueType(); 2547 assert(VT.isVector() && "Unexpected type"); 2548 2549 SDLoc DL(Op); 2550 2551 SDValue Src = Op.getOperand(0); 2552 2553 MVT ContainerVT = VT; 2554 if (VT.isFixedLengthVector()) { 2555 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2556 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2557 } 2558 2559 SDValue Mask, VL; 2560 if (Op->isVPOpcode()) { 2561 Mask = Op.getOperand(1); 2562 if (VT.isFixedLengthVector()) 2563 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 2564 Subtarget); 2565 VL = Op.getOperand(2); 2566 } else { 2567 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2568 } 2569 2570 // Freeze the source since we are increasing the number of uses. 2571 Src = DAG.getFreeze(Src); 2572 2573 // We do the conversion on the absolute value and fix the sign at the end. 2574 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL); 2575 2576 // Determine the largest integer that can be represented exactly. This and 2577 // values larger than it don't have any fractional bits so don't need to 2578 // be converted. 2579 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT); 2580 unsigned Precision = APFloat::semanticsPrecision(FltSem); 2581 APFloat MaxVal = APFloat(FltSem); 2582 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 2583 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 2584 SDValue MaxValNode = 2585 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType()); 2586 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, 2587 DAG.getUNDEF(ContainerVT), MaxValNode, VL); 2588 2589 // If abs(Src) was larger than MaxVal or nan, keep it. 2590 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2591 Mask = 2592 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, 2593 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), 2594 Mask, Mask, VL}); 2595 2596 // Truncate to integer and convert back to FP. 2597 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); 2598 MVT XLenVT = Subtarget.getXLenVT(); 2599 SDValue Truncated; 2600 2601 switch (Op.getOpcode()) { 2602 default: 2603 llvm_unreachable("Unexpected opcode"); 2604 case ISD::FCEIL: 2605 case ISD::VP_FCEIL: 2606 case ISD::FFLOOR: 2607 case ISD::VP_FFLOOR: 2608 case ISD::FROUND: 2609 case ISD::FROUNDEVEN: 2610 case ISD::VP_FROUND: 2611 case ISD::VP_FROUNDEVEN: 2612 case ISD::VP_FROUNDTOZERO: { 2613 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 2614 assert(FRM != RISCVFPRndMode::Invalid); 2615 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask, 2616 DAG.getTargetConstant(FRM, DL, XLenVT), VL); 2617 break; 2618 } 2619 case ISD::FTRUNC: 2620 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src, 2621 Mask, VL); 2622 break; 2623 case ISD::FRINT: 2624 case ISD::VP_FRINT: 2625 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL); 2626 break; 2627 case ISD::FNEARBYINT: 2628 case ISD::VP_FNEARBYINT: 2629 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src, 2630 Mask, VL); 2631 break; 2632 } 2633 2634 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. 2635 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL) 2636 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated, 2637 Mask, VL); 2638 2639 // Restore the original sign so that -0.0 is preserved. 2640 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated, 2641 Src, Src, Mask, VL); 2642 2643 if (!VT.isFixedLengthVector()) 2644 return Truncated; 2645 2646 return convertFromScalableVector(VT, Truncated, DAG, Subtarget); 2647 } 2648 2649 // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND 2650 // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to 2651 // qNan and coverting the new source to integer and back to FP. 2652 static SDValue 2653 lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 2654 const RISCVSubtarget &Subtarget) { 2655 SDLoc DL(Op); 2656 MVT VT = Op.getSimpleValueType(); 2657 SDValue Chain = Op.getOperand(0); 2658 SDValue Src = Op.getOperand(1); 2659 2660 MVT ContainerVT = VT; 2661 if (VT.isFixedLengthVector()) { 2662 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2663 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2664 } 2665 2666 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2667 2668 // Freeze the source since we are increasing the number of uses. 2669 Src = DAG.getFreeze(Src); 2670 2671 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src. 2672 MVT MaskVT = Mask.getSimpleValueType(); 2673 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL, 2674 DAG.getVTList(MaskVT, MVT::Other), 2675 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE), 2676 DAG.getUNDEF(MaskVT), Mask, VL}); 2677 Chain = Unorder.getValue(1); 2678 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL, 2679 DAG.getVTList(ContainerVT, MVT::Other), 2680 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL}); 2681 Chain = Src.getValue(1); 2682 2683 // We do the conversion on the absolute value and fix the sign at the end. 2684 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL); 2685 2686 // Determine the largest integer that can be represented exactly. This and 2687 // values larger than it don't have any fractional bits so don't need to 2688 // be converted. 2689 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT); 2690 unsigned Precision = APFloat::semanticsPrecision(FltSem); 2691 APFloat MaxVal = APFloat(FltSem); 2692 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 2693 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 2694 SDValue MaxValNode = 2695 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType()); 2696 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, 2697 DAG.getUNDEF(ContainerVT), MaxValNode, VL); 2698 2699 // If abs(Src) was larger than MaxVal or nan, keep it. 2700 Mask = DAG.getNode( 2701 RISCVISD::SETCC_VL, DL, MaskVT, 2702 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL}); 2703 2704 // Truncate to integer and convert back to FP. 2705 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); 2706 MVT XLenVT = Subtarget.getXLenVT(); 2707 SDValue Truncated; 2708 2709 switch (Op.getOpcode()) { 2710 default: 2711 llvm_unreachable("Unexpected opcode"); 2712 case ISD::STRICT_FCEIL: 2713 case ISD::STRICT_FFLOOR: 2714 case ISD::STRICT_FROUND: 2715 case ISD::STRICT_FROUNDEVEN: { 2716 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 2717 assert(FRM != RISCVFPRndMode::Invalid); 2718 Truncated = DAG.getNode( 2719 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other), 2720 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL}); 2721 break; 2722 } 2723 case ISD::STRICT_FTRUNC: 2724 Truncated = 2725 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL, 2726 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL); 2727 break; 2728 case ISD::STRICT_FNEARBYINT: 2729 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL, 2730 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src, 2731 Mask, VL); 2732 break; 2733 } 2734 Chain = Truncated.getValue(1); 2735 2736 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. 2737 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) { 2738 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL, 2739 DAG.getVTList(ContainerVT, MVT::Other), Chain, 2740 Truncated, Mask, VL); 2741 Chain = Truncated.getValue(1); 2742 } 2743 2744 // Restore the original sign so that -0.0 is preserved. 2745 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated, 2746 Src, Src, Mask, VL); 2747 2748 if (VT.isFixedLengthVector()) 2749 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget); 2750 return DAG.getMergeValues({Truncated, Chain}, DL); 2751 } 2752 2753 static SDValue 2754 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 2755 const RISCVSubtarget &Subtarget) { 2756 MVT VT = Op.getSimpleValueType(); 2757 if (VT.isVector()) 2758 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 2759 2760 if (DAG.shouldOptForSize()) 2761 return SDValue(); 2762 2763 SDLoc DL(Op); 2764 SDValue Src = Op.getOperand(0); 2765 2766 // Create an integer the size of the mantissa with the MSB set. This and all 2767 // values larger than it don't have any fractional bits so don't need to be 2768 // converted. 2769 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); 2770 unsigned Precision = APFloat::semanticsPrecision(FltSem); 2771 APFloat MaxVal = APFloat(FltSem); 2772 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 2773 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 2774 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT); 2775 2776 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 2777 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode, 2778 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT())); 2779 } 2780 2781 static SDValue 2782 getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, 2783 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, 2784 SDValue Offset, SDValue Mask, SDValue VL, 2785 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { 2786 if (Merge.isUndef()) 2787 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 2788 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT()); 2789 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; 2790 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops); 2791 } 2792 2793 static SDValue 2794 getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, 2795 EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, 2796 SDValue VL, 2797 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { 2798 if (Merge.isUndef()) 2799 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 2800 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT()); 2801 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; 2802 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops); 2803 } 2804 2805 struct VIDSequence { 2806 int64_t StepNumerator; 2807 unsigned StepDenominator; 2808 int64_t Addend; 2809 }; 2810 2811 static std::optional<uint64_t> getExactInteger(const APFloat &APF, 2812 uint32_t BitWidth) { 2813 APSInt ValInt(BitWidth, !APF.isNegative()); 2814 // We use an arbitrary rounding mode here. If a floating-point is an exact 2815 // integer (e.g., 1.0), the rounding mode does not affect the output value. If 2816 // the rounding mode changes the output value, then it is not an exact 2817 // integer. 2818 RoundingMode ArbitraryRM = RoundingMode::TowardZero; 2819 bool IsExact; 2820 // If it is out of signed integer range, it will return an invalid operation. 2821 // If it is not an exact integer, IsExact is false. 2822 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) == 2823 APFloatBase::opInvalidOp) || 2824 !IsExact) 2825 return std::nullopt; 2826 return ValInt.extractBitsAsZExtValue(BitWidth, 0); 2827 } 2828 2829 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] 2830 // to the (non-zero) step S and start value X. This can be then lowered as the 2831 // RVV sequence (VID * S) + X, for example. 2832 // The step S is represented as an integer numerator divided by a positive 2833 // denominator. Note that the implementation currently only identifies 2834 // sequences in which either the numerator is +/- 1 or the denominator is 1. It 2835 // cannot detect 2/3, for example. 2836 // Note that this method will also match potentially unappealing index 2837 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to 2838 // determine whether this is worth generating code for. 2839 static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) { 2840 unsigned NumElts = Op.getNumOperands(); 2841 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); 2842 bool IsInteger = Op.getValueType().isInteger(); 2843 2844 std::optional<unsigned> SeqStepDenom; 2845 std::optional<int64_t> SeqStepNum, SeqAddend; 2846 std::optional<std::pair<uint64_t, unsigned>> PrevElt; 2847 unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits(); 2848 for (unsigned Idx = 0; Idx < NumElts; Idx++) { 2849 // Assume undef elements match the sequence; we just have to be careful 2850 // when interpolating across them. 2851 if (Op.getOperand(Idx).isUndef()) 2852 continue; 2853 2854 uint64_t Val; 2855 if (IsInteger) { 2856 // The BUILD_VECTOR must be all constants. 2857 if (!isa<ConstantSDNode>(Op.getOperand(Idx))) 2858 return std::nullopt; 2859 Val = Op.getConstantOperandVal(Idx) & 2860 maskTrailingOnes<uint64_t>(EltSizeInBits); 2861 } else { 2862 // The BUILD_VECTOR must be all constants. 2863 if (!isa<ConstantFPSDNode>(Op.getOperand(Idx))) 2864 return std::nullopt; 2865 if (auto ExactInteger = getExactInteger( 2866 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(), 2867 EltSizeInBits)) 2868 Val = *ExactInteger; 2869 else 2870 return std::nullopt; 2871 } 2872 2873 if (PrevElt) { 2874 // Calculate the step since the last non-undef element, and ensure 2875 // it's consistent across the entire sequence. 2876 unsigned IdxDiff = Idx - PrevElt->second; 2877 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits); 2878 2879 // A zero-value value difference means that we're somewhere in the middle 2880 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a 2881 // step change before evaluating the sequence. 2882 if (ValDiff == 0) 2883 continue; 2884 2885 int64_t Remainder = ValDiff % IdxDiff; 2886 // Normalize the step if it's greater than 1. 2887 if (Remainder != ValDiff) { 2888 // The difference must cleanly divide the element span. 2889 if (Remainder != 0) 2890 return std::nullopt; 2891 ValDiff /= IdxDiff; 2892 IdxDiff = 1; 2893 } 2894 2895 if (!SeqStepNum) 2896 SeqStepNum = ValDiff; 2897 else if (ValDiff != SeqStepNum) 2898 return std::nullopt; 2899 2900 if (!SeqStepDenom) 2901 SeqStepDenom = IdxDiff; 2902 else if (IdxDiff != *SeqStepDenom) 2903 return std::nullopt; 2904 } 2905 2906 // Record this non-undef element for later. 2907 if (!PrevElt || PrevElt->first != Val) 2908 PrevElt = std::make_pair(Val, Idx); 2909 } 2910 2911 // We need to have logged a step for this to count as a legal index sequence. 2912 if (!SeqStepNum || !SeqStepDenom) 2913 return std::nullopt; 2914 2915 // Loop back through the sequence and validate elements we might have skipped 2916 // while waiting for a valid step. While doing this, log any sequence addend. 2917 for (unsigned Idx = 0; Idx < NumElts; Idx++) { 2918 if (Op.getOperand(Idx).isUndef()) 2919 continue; 2920 uint64_t Val; 2921 if (IsInteger) { 2922 Val = Op.getConstantOperandVal(Idx) & 2923 maskTrailingOnes<uint64_t>(EltSizeInBits); 2924 } else { 2925 Val = *getExactInteger( 2926 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(), 2927 EltSizeInBits); 2928 } 2929 uint64_t ExpectedVal = 2930 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; 2931 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits); 2932 if (!SeqAddend) 2933 SeqAddend = Addend; 2934 else if (Addend != SeqAddend) 2935 return std::nullopt; 2936 } 2937 2938 assert(SeqAddend && "Must have an addend if we have a step"); 2939 2940 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend}; 2941 } 2942 2943 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT 2944 // and lower it as a VRGATHER_VX_VL from the source vector. 2945 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, 2946 SelectionDAG &DAG, 2947 const RISCVSubtarget &Subtarget) { 2948 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 2949 return SDValue(); 2950 SDValue Vec = SplatVal.getOperand(0); 2951 // Only perform this optimization on vectors of the same size for simplicity. 2952 // Don't perform this optimization for i1 vectors. 2953 // FIXME: Support i1 vectors, maybe by promoting to i8? 2954 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1) 2955 return SDValue(); 2956 SDValue Idx = SplatVal.getOperand(1); 2957 // The index must be a legal type. 2958 if (Idx.getValueType() != Subtarget.getXLenVT()) 2959 return SDValue(); 2960 2961 MVT ContainerVT = VT; 2962 if (VT.isFixedLengthVector()) { 2963 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2964 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2965 } 2966 2967 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2968 2969 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec, 2970 Idx, DAG.getUNDEF(ContainerVT), Mask, VL); 2971 2972 if (!VT.isFixedLengthVector()) 2973 return Gather; 2974 2975 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 2976 } 2977 2978 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 2979 const RISCVSubtarget &Subtarget) { 2980 MVT VT = Op.getSimpleValueType(); 2981 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 2982 2983 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2984 2985 SDLoc DL(Op); 2986 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2987 2988 MVT XLenVT = Subtarget.getXLenVT(); 2989 unsigned NumElts = Op.getNumOperands(); 2990 2991 if (VT.getVectorElementType() == MVT::i1) { 2992 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 2993 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 2994 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 2995 } 2996 2997 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 2998 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 2999 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 3000 } 3001 3002 // Lower constant mask BUILD_VECTORs via an integer vector type, in 3003 // scalar integer chunks whose bit-width depends on the number of mask 3004 // bits and XLEN. 3005 // First, determine the most appropriate scalar integer type to use. This 3006 // is at most XLenVT, but may be shrunk to a smaller vector element type 3007 // according to the size of the final vector - use i8 chunks rather than 3008 // XLenVT if we're producing a v8i1. This results in more consistent 3009 // codegen across RV32 and RV64. 3010 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen()); 3011 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN()); 3012 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { 3013 // If we have to use more than one INSERT_VECTOR_ELT then this 3014 // optimization is likely to increase code size; avoid peforming it in 3015 // such a case. We can use a load from a constant pool in this case. 3016 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) 3017 return SDValue(); 3018 // Now we can create our integer vector type. Note that it may be larger 3019 // than the resulting mask type: v4i1 would use v1i8 as its integer type. 3020 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits); 3021 MVT IntegerViaVecVT = 3022 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), 3023 IntegerViaVecElts); 3024 3025 uint64_t Bits = 0; 3026 unsigned BitPos = 0, IntegerEltIdx = 0; 3027 SmallVector<SDValue, 8> Elts(IntegerViaVecElts); 3028 3029 for (unsigned I = 0; I < NumElts;) { 3030 SDValue V = Op.getOperand(I); 3031 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue(); 3032 Bits |= ((uint64_t)BitValue << BitPos); 3033 ++BitPos; 3034 ++I; 3035 3036 // Once we accumulate enough bits to fill our scalar type or process the 3037 // last element, insert into our vector and clear our accumulated data. 3038 if (I % NumViaIntegerBits == 0 || I == NumElts) { 3039 if (NumViaIntegerBits <= 32) 3040 Bits = SignExtend64<32>(Bits); 3041 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); 3042 Elts[IntegerEltIdx] = Elt; 3043 Bits = 0; 3044 BitPos = 0; 3045 IntegerEltIdx++; 3046 } 3047 } 3048 3049 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts); 3050 3051 if (NumElts < NumViaIntegerBits) { 3052 // If we're producing a smaller vector than our minimum legal integer 3053 // type, bitcast to the equivalent (known-legal) mask type, and extract 3054 // our final mask. 3055 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); 3056 Vec = DAG.getBitcast(MVT::v8i1, Vec); 3057 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, 3058 DAG.getConstant(0, DL, XLenVT)); 3059 } else { 3060 // Else we must have produced an integer type with the same size as the 3061 // mask type; bitcast for the final result. 3062 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); 3063 Vec = DAG.getBitcast(VT, Vec); 3064 } 3065 3066 return Vec; 3067 } 3068 3069 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask 3070 // vector type, we have a legal equivalently-sized i8 type, so we can use 3071 // that. 3072 MVT WideVecVT = VT.changeVectorElementType(MVT::i8); 3073 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT); 3074 3075 SDValue WideVec; 3076 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 3077 // For a splat, perform a scalar truncate before creating the wider 3078 // vector. 3079 assert(Splat.getValueType() == XLenVT && 3080 "Unexpected type for i1 splat value"); 3081 Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat, 3082 DAG.getConstant(1, DL, XLenVT)); 3083 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat); 3084 } else { 3085 SmallVector<SDValue, 8> Ops(Op->op_values()); 3086 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops); 3087 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT); 3088 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne); 3089 } 3090 3091 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE); 3092 } 3093 3094 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 3095 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget)) 3096 return Gather; 3097 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 3098 : RISCVISD::VMV_V_X_VL; 3099 Splat = 3100 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); 3101 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 3102 } 3103 3104 // Try and match index sequences, which we can lower to the vid instruction 3105 // with optional modifications. An all-undef vector is matched by 3106 // getSplatValue, above. 3107 if (auto SimpleVID = isSimpleVIDSequence(Op)) { 3108 int64_t StepNumerator = SimpleVID->StepNumerator; 3109 unsigned StepDenominator = SimpleVID->StepDenominator; 3110 int64_t Addend = SimpleVID->Addend; 3111 3112 assert(StepNumerator != 0 && "Invalid step"); 3113 bool Negate = false; 3114 int64_t SplatStepVal = StepNumerator; 3115 unsigned StepOpcode = ISD::MUL; 3116 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it 3117 // anyway as the shift of 63 won't fit in uimm5. 3118 if (StepNumerator != 1 && StepNumerator != INT64_MIN && 3119 isPowerOf2_64(std::abs(StepNumerator))) { 3120 Negate = StepNumerator < 0; 3121 StepOpcode = ISD::SHL; 3122 SplatStepVal = Log2_64(std::abs(StepNumerator)); 3123 } 3124 3125 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a 3126 // threshold since it's the immediate value many RVV instructions accept. 3127 // There is no vmul.vi instruction so ensure multiply constant can fit in 3128 // a single addi instruction. 3129 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) || 3130 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) && 3131 isPowerOf2_32(StepDenominator) && 3132 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) { 3133 MVT VIDVT = 3134 VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; 3135 MVT VIDContainerVT = 3136 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget); 3137 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL); 3138 // Convert right out of the scalable type so we can use standard ISD 3139 // nodes for the rest of the computation. If we used scalable types with 3140 // these, we'd lose the fixed-length vector info and generate worse 3141 // vsetvli code. 3142 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget); 3143 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) || 3144 (StepOpcode == ISD::SHL && SplatStepVal != 0)) { 3145 SDValue SplatStep = DAG.getSplatBuildVector( 3146 VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT)); 3147 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep); 3148 } 3149 if (StepDenominator != 1) { 3150 SDValue SplatStep = DAG.getSplatBuildVector( 3151 VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT)); 3152 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep); 3153 } 3154 if (Addend != 0 || Negate) { 3155 SDValue SplatAddend = DAG.getSplatBuildVector( 3156 VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT)); 3157 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend, 3158 VID); 3159 } 3160 if (VT.isFloatingPoint()) { 3161 // TODO: Use vfwcvt to reduce register pressure. 3162 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID); 3163 } 3164 return VID; 3165 } 3166 } 3167 3168 // Attempt to detect "hidden" splats, which only reveal themselves as splats 3169 // when re-interpreted as a vector with a larger element type. For example, 3170 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 3171 // could be instead splat as 3172 // v2i32 = build_vector i32 0x00010000, i32 0x00010000 3173 // TODO: This optimization could also work on non-constant splats, but it 3174 // would require bit-manipulation instructions to construct the splat value. 3175 SmallVector<SDValue> Sequence; 3176 unsigned EltBitSize = VT.getScalarSizeInBits(); 3177 const auto *BV = cast<BuildVectorSDNode>(Op); 3178 if (VT.isInteger() && EltBitSize < 64 && 3179 ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && 3180 BV->getRepeatedSequence(Sequence) && 3181 (Sequence.size() * EltBitSize) <= 64) { 3182 unsigned SeqLen = Sequence.size(); 3183 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); 3184 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen); 3185 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || 3186 ViaIntVT == MVT::i64) && 3187 "Unexpected sequence type"); 3188 3189 unsigned EltIdx = 0; 3190 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); 3191 uint64_t SplatValue = 0; 3192 // Construct the amalgamated value which can be splatted as this larger 3193 // vector type. 3194 for (const auto &SeqV : Sequence) { 3195 if (!SeqV.isUndef()) 3196 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask) 3197 << (EltIdx * EltBitSize)); 3198 EltIdx++; 3199 } 3200 3201 // On RV64, sign-extend from 32 to 64 bits where possible in order to 3202 // achieve better constant materializion. 3203 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) 3204 SplatValue = SignExtend64<32>(SplatValue); 3205 3206 // Since we can't introduce illegal i64 types at this stage, we can only 3207 // perform an i64 splat on RV32 if it is its own sign-extended value. That 3208 // way we can use RVV instructions to splat. 3209 assert((ViaIntVT.bitsLE(XLenVT) || 3210 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && 3211 "Unexpected bitcast sequence"); 3212 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) { 3213 SDValue ViaVL = 3214 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT); 3215 MVT ViaContainerVT = 3216 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget); 3217 SDValue Splat = 3218 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, 3219 DAG.getUNDEF(ViaContainerVT), 3220 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); 3221 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); 3222 return DAG.getBitcast(VT, Splat); 3223 } 3224 } 3225 3226 // Try and optimize BUILD_VECTORs with "dominant values" - these are values 3227 // which constitute a large proportion of the elements. In such cases we can 3228 // splat a vector with the dominant element and make up the shortfall with 3229 // INSERT_VECTOR_ELTs. 3230 // Note that this includes vectors of 2 elements by association. The 3231 // upper-most element is the "dominant" one, allowing us to use a splat to 3232 // "insert" the upper element, and an insert of the lower element at position 3233 // 0, which improves codegen. 3234 SDValue DominantValue; 3235 unsigned MostCommonCount = 0; 3236 DenseMap<SDValue, unsigned> ValueCounts; 3237 unsigned NumUndefElts = 3238 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); 3239 3240 // Track the number of scalar loads we know we'd be inserting, estimated as 3241 // any non-zero floating-point constant. Other kinds of element are either 3242 // already in registers or are materialized on demand. The threshold at which 3243 // a vector load is more desirable than several scalar materializion and 3244 // vector-insertion instructions is not known. 3245 unsigned NumScalarLoads = 0; 3246 3247 for (SDValue V : Op->op_values()) { 3248 if (V.isUndef()) 3249 continue; 3250 3251 ValueCounts.insert(std::make_pair(V, 0)); 3252 unsigned &Count = ValueCounts[V]; 3253 if (0 == Count) 3254 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V)) 3255 NumScalarLoads += !CFP->isExactlyValue(+0.0); 3256 3257 // Is this value dominant? In case of a tie, prefer the highest element as 3258 // it's cheaper to insert near the beginning of a vector than it is at the 3259 // end. 3260 if (++Count >= MostCommonCount) { 3261 DominantValue = V; 3262 MostCommonCount = Count; 3263 } 3264 } 3265 3266 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); 3267 unsigned NumDefElts = NumElts - NumUndefElts; 3268 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; 3269 3270 // Don't perform this optimization when optimizing for size, since 3271 // materializing elements and inserting them tends to cause code bloat. 3272 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && 3273 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) && 3274 ((MostCommonCount > DominantValueCountThreshold) || 3275 (ValueCounts.size() <= Log2_32(NumDefElts)))) { 3276 // Start by splatting the most common element. 3277 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); 3278 3279 DenseSet<SDValue> Processed{DominantValue}; 3280 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); 3281 for (const auto &OpIdx : enumerate(Op->ops())) { 3282 const SDValue &V = OpIdx.value(); 3283 if (V.isUndef() || !Processed.insert(V).second) 3284 continue; 3285 if (ValueCounts[V] == 1) { 3286 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, 3287 DAG.getConstant(OpIdx.index(), DL, XLenVT)); 3288 } else { 3289 // Blend in all instances of this value using a VSELECT, using a 3290 // mask where each bit signals whether that element is the one 3291 // we're after. 3292 SmallVector<SDValue> Ops; 3293 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { 3294 return DAG.getConstant(V == V1, DL, XLenVT); 3295 }); 3296 Vec = DAG.getNode(ISD::VSELECT, DL, VT, 3297 DAG.getBuildVector(SelMaskTy, DL, Ops), 3298 DAG.getSplatBuildVector(VT, DL, V), Vec); 3299 } 3300 } 3301 3302 return Vec; 3303 } 3304 3305 // For constant vectors, use generic constant pool lowering. Otherwise, 3306 // we'd have to materialize constants in GPRs just to move them into the 3307 // vector. 3308 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || 3309 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) 3310 return SDValue(); 3311 3312 assert((!VT.isFloatingPoint() || 3313 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) && 3314 "Illegal type which will result in reserved encoding"); 3315 3316 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 3317 3318 SDValue Vec = DAG.getUNDEF(ContainerVT); 3319 unsigned UndefCount = 0; 3320 for (const SDValue &V : Op->ops()) { 3321 if (V.isUndef()) { 3322 UndefCount++; 3323 continue; 3324 } 3325 if (UndefCount) { 3326 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); 3327 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 3328 Vec, Offset, Mask, VL, Policy); 3329 UndefCount = 0; 3330 } 3331 auto OpCode = 3332 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; 3333 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, 3334 V, Mask, VL); 3335 } 3336 if (UndefCount) { 3337 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); 3338 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 3339 Vec, Offset, Mask, VL, Policy); 3340 } 3341 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 3342 } 3343 3344 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, 3345 SDValue Lo, SDValue Hi, SDValue VL, 3346 SelectionDAG &DAG) { 3347 if (!Passthru) 3348 Passthru = DAG.getUNDEF(VT); 3349 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 3350 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 3351 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 3352 // If Hi constant is all the same sign bit as Lo, lower this as a custom 3353 // node in order to try and match RVV vector/scalar instructions. 3354 if ((LoC >> 31) == HiC) 3355 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); 3356 3357 // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use 3358 // vmv.v.x whose EEW = 32 to lower it. 3359 if (LoC == HiC && isAllOnesConstant(VL)) { 3360 MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); 3361 // TODO: if vl <= min(VLMAX), we can also do this. But we could not 3362 // access the subtarget here now. 3363 auto InterVec = DAG.getNode( 3364 RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo, 3365 DAG.getRegister(RISCV::X0, MVT::i32)); 3366 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec); 3367 } 3368 } 3369 3370 // Fall back to a stack store and stride x0 vector load. 3371 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo, 3372 Hi, VL); 3373 } 3374 3375 // Called by type legalization to handle splat of i64 on RV32. 3376 // FIXME: We can optimize this when the type has sign or zero bits in one 3377 // of the halves. 3378 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, 3379 SDValue Scalar, SDValue VL, 3380 SelectionDAG &DAG) { 3381 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!"); 3382 SDValue Lo, Hi; 3383 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32); 3384 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG); 3385 } 3386 3387 // This function lowers a splat of a scalar operand Splat with the vector 3388 // length VL. It ensures the final sequence is type legal, which is useful when 3389 // lowering a splat after type legalization. 3390 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, 3391 MVT VT, const SDLoc &DL, SelectionDAG &DAG, 3392 const RISCVSubtarget &Subtarget) { 3393 bool HasPassthru = Passthru && !Passthru.isUndef(); 3394 if (!HasPassthru && !Passthru) 3395 Passthru = DAG.getUNDEF(VT); 3396 if (VT.isFloatingPoint()) { 3397 // If VL is 1, we could use vfmv.s.f. 3398 if (isOneConstant(VL)) 3399 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL); 3400 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); 3401 } 3402 3403 MVT XLenVT = Subtarget.getXLenVT(); 3404 3405 // Simplest case is that the operand needs to be promoted to XLenVT. 3406 if (Scalar.getValueType().bitsLE(XLenVT)) { 3407 // If the operand is a constant, sign extend to increase our chances 3408 // of being able to use a .vi instruction. ANY_EXTEND would become a 3409 // a zero extend and the simm5 check in isel would fail. 3410 // FIXME: Should we ignore the upper bits in isel instead? 3411 unsigned ExtOpc = 3412 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 3413 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 3414 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar); 3415 // If VL is 1 and the scalar value won't benefit from immediate, we could 3416 // use vmv.s.x. 3417 if (isOneConstant(VL) && 3418 (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue()))) 3419 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL); 3420 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); 3421 } 3422 3423 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 && 3424 "Unexpected scalar for splat lowering!"); 3425 3426 if (isOneConstant(VL) && isNullConstant(Scalar)) 3427 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, 3428 DAG.getConstant(0, DL, XLenVT), VL); 3429 3430 // Otherwise use the more complicated splatting algorithm. 3431 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); 3432 } 3433 3434 static MVT getLMUL1VT(MVT VT) { 3435 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 3436 "Unexpected vector MVT"); 3437 return MVT::getScalableVectorVT( 3438 VT.getVectorElementType(), 3439 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 3440 } 3441 3442 // This function lowers an insert of a scalar operand Scalar into lane 3443 // 0 of the vector regardless of the value of VL. The contents of the 3444 // remaining lanes of the result vector are unspecified. VL is assumed 3445 // to be non-zero. 3446 static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, 3447 const SDLoc &DL, SelectionDAG &DAG, 3448 const RISCVSubtarget &Subtarget) { 3449 const MVT XLenVT = Subtarget.getXLenVT(); 3450 3451 SDValue Passthru = DAG.getUNDEF(VT); 3452 if (VT.isFloatingPoint()) { 3453 // TODO: Use vmv.v.i for appropriate constants 3454 // Use M1 or smaller to avoid over constraining register allocation 3455 const MVT M1VT = getLMUL1VT(VT); 3456 auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT; 3457 SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT, 3458 DAG.getUNDEF(InnerVT), Scalar, VL); 3459 if (VT != InnerVT) 3460 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, 3461 DAG.getUNDEF(VT), 3462 Result, DAG.getConstant(0, DL, XLenVT)); 3463 return Result; 3464 } 3465 3466 3467 // Avoid the tricky legalization cases by falling back to using the 3468 // splat code which already handles it gracefully. 3469 if (!Scalar.getValueType().bitsLE(XLenVT)) 3470 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar, 3471 DAG.getConstant(1, DL, XLenVT), 3472 VT, DL, DAG, Subtarget); 3473 3474 // If the operand is a constant, sign extend to increase our chances 3475 // of being able to use a .vi instruction. ANY_EXTEND would become a 3476 // a zero extend and the simm5 check in isel would fail. 3477 // FIXME: Should we ignore the upper bits in isel instead? 3478 unsigned ExtOpc = 3479 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 3480 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 3481 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or 3482 // higher would involve overly constraining the register allocator for 3483 // no purpose. 3484 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar)) { 3485 if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) && 3486 VT.bitsLE(getLMUL1VT(VT))) 3487 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); 3488 } 3489 // Use M1 or smaller to avoid over constraining register allocation 3490 const MVT M1VT = getLMUL1VT(VT); 3491 auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT; 3492 SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT, 3493 DAG.getUNDEF(InnerVT), Scalar, VL); 3494 if (VT != InnerVT) 3495 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, 3496 DAG.getUNDEF(VT), 3497 Result, DAG.getConstant(0, DL, XLenVT)); 3498 return Result; 3499 } 3500 3501 // Is this a shuffle extracts either the even or odd elements of a vector? 3502 // That is, specifically, either (a) or (b) below. 3503 // t34: v8i8 = extract_subvector t11, Constant:i64<0> 3504 // t33: v8i8 = extract_subvector t11, Constant:i64<8> 3505 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 3506 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 3507 // Returns {Src Vector, Even Elements} om success 3508 static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, 3509 SDValue V2, ArrayRef<int> Mask, 3510 const RISCVSubtarget &Subtarget) { 3511 // Need to be able to widen the vector. 3512 if (VT.getScalarSizeInBits() >= Subtarget.getELEN()) 3513 return false; 3514 3515 // Both input must be extracts. 3516 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR || 3517 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR) 3518 return false; 3519 3520 // Extracting from the same source. 3521 SDValue Src = V1.getOperand(0); 3522 if (Src != V2.getOperand(0)) 3523 return false; 3524 3525 // Src needs to have twice the number of elements. 3526 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) 3527 return false; 3528 3529 // The extracts must extract the two halves of the source. 3530 if (V1.getConstantOperandVal(1) != 0 || 3531 V2.getConstantOperandVal(1) != Mask.size()) 3532 return false; 3533 3534 // First index must be the first even or odd element from V1. 3535 if (Mask[0] != 0 && Mask[0] != 1) 3536 return false; 3537 3538 // The others must increase by 2 each time. 3539 // TODO: Support undef elements? 3540 for (unsigned i = 1; i != Mask.size(); ++i) 3541 if (Mask[i] != Mask[i - 1] + 2) 3542 return false; 3543 3544 return true; 3545 } 3546 3547 /// Is this shuffle interleaving contiguous elements from one vector into the 3548 /// even elements and contiguous elements from another vector into the odd 3549 /// elements. \p EvenSrc will contain the element that should be in the first 3550 /// even element. \p OddSrc will contain the element that should be in the first 3551 /// odd element. These can be the first element in a source or the element half 3552 /// way through the source. 3553 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc, 3554 int &OddSrc, const RISCVSubtarget &Subtarget) { 3555 // We need to be able to widen elements to the next larger integer type. 3556 if (VT.getScalarSizeInBits() >= Subtarget.getELEN()) 3557 return false; 3558 3559 int Size = Mask.size(); 3560 int NumElts = VT.getVectorNumElements(); 3561 assert(Size == (int)NumElts && "Unexpected mask size"); 3562 3563 SmallVector<unsigned, 2> StartIndexes; 3564 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes)) 3565 return false; 3566 3567 EvenSrc = StartIndexes[0]; 3568 OddSrc = StartIndexes[1]; 3569 3570 // One source should be low half of first vector. 3571 if (EvenSrc != 0 && OddSrc != 0) 3572 return false; 3573 3574 // Subvectors will be subtracted from either at the start of the two input 3575 // vectors, or at the start and middle of the first vector if it's an unary 3576 // interleave. 3577 // In both cases, HalfNumElts will be extracted. 3578 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise 3579 // we'll create an illegal extract_subvector. 3580 // FIXME: We could support other values using a slidedown first. 3581 int HalfNumElts = NumElts / 2; 3582 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0); 3583 } 3584 3585 /// Match shuffles that concatenate two vectors, rotate the concatenation, 3586 /// and then extract the original number of elements from the rotated result. 3587 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The 3588 /// returned rotation amount is for a rotate right, where elements move from 3589 /// higher elements to lower elements. \p LoSrc indicates the first source 3590 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector 3591 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be 3592 /// 0 or 1 if a rotation is found. 3593 /// 3594 /// NOTE: We talk about rotate to the right which matches how bit shift and 3595 /// rotate instructions are described where LSBs are on the right, but LLVM IR 3596 /// and the table below write vectors with the lowest elements on the left. 3597 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) { 3598 int Size = Mask.size(); 3599 3600 // We need to detect various ways of spelling a rotation: 3601 // [11, 12, 13, 14, 15, 0, 1, 2] 3602 // [-1, 12, 13, 14, -1, -1, 1, -1] 3603 // [-1, -1, -1, -1, -1, -1, 1, 2] 3604 // [ 3, 4, 5, 6, 7, 8, 9, 10] 3605 // [-1, 4, 5, 6, -1, -1, 9, -1] 3606 // [-1, 4, 5, 6, -1, -1, -1, -1] 3607 int Rotation = 0; 3608 LoSrc = -1; 3609 HiSrc = -1; 3610 for (int i = 0; i != Size; ++i) { 3611 int M = Mask[i]; 3612 if (M < 0) 3613 continue; 3614 3615 // Determine where a rotate vector would have started. 3616 int StartIdx = i - (M % Size); 3617 // The identity rotation isn't interesting, stop. 3618 if (StartIdx == 0) 3619 return -1; 3620 3621 // If we found the tail of a vector the rotation must be the missing 3622 // front. If we found the head of a vector, it must be how much of the 3623 // head. 3624 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx; 3625 3626 if (Rotation == 0) 3627 Rotation = CandidateRotation; 3628 else if (Rotation != CandidateRotation) 3629 // The rotations don't match, so we can't match this mask. 3630 return -1; 3631 3632 // Compute which value this mask is pointing at. 3633 int MaskSrc = M < Size ? 0 : 1; 3634 3635 // Compute which of the two target values this index should be assigned to. 3636 // This reflects whether the high elements are remaining or the low elemnts 3637 // are remaining. 3638 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc; 3639 3640 // Either set up this value if we've not encountered it before, or check 3641 // that it remains consistent. 3642 if (TargetSrc < 0) 3643 TargetSrc = MaskSrc; 3644 else if (TargetSrc != MaskSrc) 3645 // This may be a rotation, but it pulls from the inputs in some 3646 // unsupported interleaving. 3647 return -1; 3648 } 3649 3650 // Check that we successfully analyzed the mask, and normalize the results. 3651 assert(Rotation != 0 && "Failed to locate a viable rotation!"); 3652 assert((LoSrc >= 0 || HiSrc >= 0) && 3653 "Failed to find a rotated input vector!"); 3654 3655 return Rotation; 3656 } 3657 3658 // Lower a deinterleave shuffle to vnsrl. 3659 // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true) 3660 // -> [p, q, r, s] (EvenElts == false) 3661 // VT is the type of the vector to return, <[vscale x ]n x ty> 3662 // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty> 3663 static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, 3664 bool EvenElts, 3665 const RISCVSubtarget &Subtarget, 3666 SelectionDAG &DAG) { 3667 // The result is a vector of type <m x n x ty> 3668 MVT ContainerVT = VT; 3669 // Convert fixed vectors to scalable if needed 3670 if (ContainerVT.isFixedLengthVector()) { 3671 assert(Src.getSimpleValueType().isFixedLengthVector()); 3672 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); 3673 3674 // The source is a vector of type <m x n*2 x ty> 3675 MVT SrcContainerVT = 3676 MVT::getVectorVT(ContainerVT.getVectorElementType(), 3677 ContainerVT.getVectorElementCount() * 2); 3678 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 3679 } 3680 3681 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3682 3683 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2> 3684 // This also converts FP to int. 3685 unsigned EltBits = ContainerVT.getScalarSizeInBits(); 3686 MVT WideSrcContainerVT = MVT::getVectorVT( 3687 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount()); 3688 Src = DAG.getBitcast(WideSrcContainerVT, Src); 3689 3690 // The integer version of the container type. 3691 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger(); 3692 3693 // If we want even elements, then the shift amount is 0. Otherwise, shift by 3694 // the original element size. 3695 unsigned Shift = EvenElts ? 0 : EltBits; 3696 SDValue SplatShift = DAG.getNode( 3697 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT), 3698 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL); 3699 SDValue Res = 3700 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift, 3701 DAG.getUNDEF(IntContainerVT), TrueMask, VL); 3702 // Cast back to FP if needed. 3703 Res = DAG.getBitcast(ContainerVT, Res); 3704 3705 if (VT.isFixedLengthVector()) 3706 Res = convertFromScalableVector(VT, Res, DAG, Subtarget); 3707 return Res; 3708 } 3709 3710 // Lower the following shuffle to vslidedown. 3711 // a) 3712 // t49: v8i8 = extract_subvector t13, Constant:i64<0> 3713 // t109: v8i8 = extract_subvector t13, Constant:i64<8> 3714 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106 3715 // b) 3716 // t69: v16i16 = extract_subvector t68, Constant:i64<0> 3717 // t23: v8i16 = extract_subvector t69, Constant:i64<0> 3718 // t29: v4i16 = extract_subvector t23, Constant:i64<4> 3719 // t26: v8i16 = extract_subvector t69, Constant:i64<8> 3720 // t30: v4i16 = extract_subvector t26, Constant:i64<0> 3721 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30 3722 static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, 3723 SDValue V1, SDValue V2, 3724 ArrayRef<int> Mask, 3725 const RISCVSubtarget &Subtarget, 3726 SelectionDAG &DAG) { 3727 auto findNonEXTRACT_SUBVECTORParent = 3728 [](SDValue Parent) -> std::pair<SDValue, uint64_t> { 3729 uint64_t Offset = 0; 3730 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR && 3731 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from 3732 // a scalable vector. But we don't want to match the case. 3733 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) { 3734 Offset += Parent.getConstantOperandVal(1); 3735 Parent = Parent.getOperand(0); 3736 } 3737 return std::make_pair(Parent, Offset); 3738 }; 3739 3740 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1); 3741 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2); 3742 3743 // Extracting from the same source. 3744 SDValue Src = V1Src; 3745 if (Src != V2Src) 3746 return SDValue(); 3747 3748 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs. 3749 SmallVector<int, 16> NewMask(Mask); 3750 for (size_t i = 0; i != NewMask.size(); ++i) { 3751 if (NewMask[i] == -1) 3752 continue; 3753 3754 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) { 3755 NewMask[i] = NewMask[i] + V1IndexOffset; 3756 } else { 3757 // Minus NewMask.size() is needed. Otherwise, the b case would be 3758 // <5,6,7,12> instead of <5,6,7,8>. 3759 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset; 3760 } 3761 } 3762 3763 // First index must be known and non-zero. It will be used as the slidedown 3764 // amount. 3765 if (NewMask[0] <= 0) 3766 return SDValue(); 3767 3768 // NewMask is also continuous. 3769 for (unsigned i = 1; i != NewMask.size(); ++i) 3770 if (NewMask[i - 1] + 1 != NewMask[i]) 3771 return SDValue(); 3772 3773 MVT XLenVT = Subtarget.getXLenVT(); 3774 MVT SrcVT = Src.getSimpleValueType(); 3775 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 3776 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 3777 SDValue Slidedown = 3778 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 3779 convertToScalableVector(ContainerVT, Src, DAG, Subtarget), 3780 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL); 3781 return DAG.getNode( 3782 ISD::EXTRACT_SUBVECTOR, DL, VT, 3783 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 3784 DAG.getConstant(0, DL, XLenVT)); 3785 } 3786 3787 // Because vslideup leaves the destination elements at the start intact, we can 3788 // use it to perform shuffles that insert subvectors: 3789 // 3790 // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11> 3791 // -> 3792 // vsetvli zero, 8, e8, mf2, ta, ma 3793 // vslideup.vi v8, v9, 4 3794 // 3795 // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7> 3796 // -> 3797 // vsetvli zero, 5, e8, mf2, tu, ma 3798 // vslideup.v1 v8, v9, 2 3799 static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, 3800 SDValue V1, SDValue V2, 3801 ArrayRef<int> Mask, 3802 const RISCVSubtarget &Subtarget, 3803 SelectionDAG &DAG) { 3804 unsigned NumElts = VT.getVectorNumElements(); 3805 int NumSubElts, Index; 3806 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts, 3807 Index)) 3808 return SDValue(); 3809 3810 bool OpsSwapped = Mask[Index] < (int)NumElts; 3811 SDValue InPlace = OpsSwapped ? V2 : V1; 3812 SDValue ToInsert = OpsSwapped ? V1 : V2; 3813 3814 MVT XLenVT = Subtarget.getXLenVT(); 3815 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3816 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first; 3817 // We slide up by the index that the subvector is being inserted at, and set 3818 // VL to the index + the number of elements being inserted. 3819 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC; 3820 // If the we're adding a suffix to the in place vector, i.e. inserting right 3821 // up to the very end of it, then we don't actually care about the tail. 3822 if (NumSubElts + Index >= (int)NumElts) 3823 Policy |= RISCVII::TAIL_AGNOSTIC; 3824 3825 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget); 3826 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget); 3827 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT); 3828 3829 SDValue Res; 3830 // If we're inserting into the lowest elements, use a tail undisturbed 3831 // vmv.v.v. 3832 if (Index == 0) 3833 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert, 3834 VL); 3835 else 3836 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert, 3837 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy); 3838 return convertFromScalableVector(VT, Res, DAG, Subtarget); 3839 } 3840 3841 /// Match v(f)slide1up/down idioms. These operations involve sliding 3842 /// N-1 elements to make room for an inserted scalar at one end. 3843 static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, 3844 SDValue V1, SDValue V2, 3845 ArrayRef<int> Mask, 3846 const RISCVSubtarget &Subtarget, 3847 SelectionDAG &DAG) { 3848 bool OpsSwapped = false; 3849 if (!isa<BuildVectorSDNode>(V1)) { 3850 if (!isa<BuildVectorSDNode>(V2)) 3851 return SDValue(); 3852 std::swap(V1, V2); 3853 OpsSwapped = true; 3854 } 3855 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue(); 3856 if (!Splat) 3857 return SDValue(); 3858 3859 // Return true if the mask could describe a slide of Mask.size() - 1 3860 // elements from concat_vector(V1, V2)[Base:] to [Offset:]. 3861 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) { 3862 const unsigned S = (Offset > 0) ? 0 : -Offset; 3863 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0); 3864 for (unsigned i = S; i != E; ++i) 3865 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset) 3866 return false; 3867 return true; 3868 }; 3869 3870 const unsigned NumElts = VT.getVectorNumElements(); 3871 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1); 3872 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1)) 3873 return SDValue(); 3874 3875 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0]; 3876 // Inserted lane must come from splat, undef scalar is legal but not profitable. 3877 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped) 3878 return SDValue(); 3879 3880 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3881 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3882 auto OpCode = IsVSlidedown ? 3883 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) : 3884 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL); 3885 auto Vec = DAG.getNode(OpCode, DL, ContainerVT, 3886 DAG.getUNDEF(ContainerVT), 3887 convertToScalableVector(ContainerVT, V2, DAG, Subtarget), 3888 Splat, TrueMask, VL); 3889 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 3890 } 3891 3892 // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx 3893 // to create an interleaved vector of <[vscale x] n*2 x ty>. 3894 // This requires that the size of ty is less than the subtarget's maximum ELEN. 3895 static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, 3896 const SDLoc &DL, SelectionDAG &DAG, 3897 const RISCVSubtarget &Subtarget) { 3898 MVT VecVT = EvenV.getSimpleValueType(); 3899 MVT VecContainerVT = VecVT; // <vscale x n x ty> 3900 // Convert fixed vectors to scalable if needed 3901 if (VecContainerVT.isFixedLengthVector()) { 3902 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); 3903 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget); 3904 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget); 3905 } 3906 3907 assert(VecVT.getScalarSizeInBits() < Subtarget.getELEN()); 3908 3909 // We're working with a vector of the same size as the resulting 3910 // interleaved vector, but with half the number of elements and 3911 // twice the SEW (Hence the restriction on not using the maximum 3912 // ELEN) 3913 MVT WideVT = 3914 MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2), 3915 VecVT.getVectorElementCount()); 3916 MVT WideContainerVT = WideVT; // <vscale x n x ty*2> 3917 if (WideContainerVT.isFixedLengthVector()) 3918 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget); 3919 3920 // Bitcast the input vectors to integers in case they are FP 3921 VecContainerVT = VecContainerVT.changeTypeToInteger(); 3922 EvenV = DAG.getBitcast(VecContainerVT, EvenV); 3923 OddV = DAG.getBitcast(VecContainerVT, OddV); 3924 3925 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget); 3926 SDValue Passthru = DAG.getUNDEF(WideContainerVT); 3927 3928 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with 3929 // vwaddu.vv 3930 SDValue Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, 3931 EvenV, OddV, Passthru, Mask, VL); 3932 3933 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1) 3934 SDValue AllOnesVec = DAG.getSplatVector( 3935 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT())); 3936 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, OddV, 3937 AllOnesVec, Passthru, Mask, VL); 3938 3939 // Add the two together so we get 3940 // (OddV * 0xff...ff) + (OddV + EvenV) 3941 // = (OddV * 0x100...00) + EvenV 3942 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV 3943 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx 3944 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, Interleaved, 3945 OddsMul, Passthru, Mask, VL); 3946 3947 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty> 3948 MVT ResultContainerVT = MVT::getVectorVT( 3949 VecVT.getVectorElementType(), // Make sure to use original type 3950 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2)); 3951 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved); 3952 3953 // Convert back to a fixed vector if needed 3954 MVT ResultVT = 3955 MVT::getVectorVT(VecVT.getVectorElementType(), 3956 VecVT.getVectorElementCount().multiplyCoefficientBy(2)); 3957 if (ResultVT.isFixedLengthVector()) 3958 Interleaved = 3959 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget); 3960 3961 return Interleaved; 3962 } 3963 3964 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 3965 const RISCVSubtarget &Subtarget) { 3966 SDValue V1 = Op.getOperand(0); 3967 SDValue V2 = Op.getOperand(1); 3968 SDLoc DL(Op); 3969 MVT XLenVT = Subtarget.getXLenVT(); 3970 MVT VT = Op.getSimpleValueType(); 3971 unsigned NumElts = VT.getVectorNumElements(); 3972 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 3973 3974 // Promote i1 shuffle to i8 shuffle. 3975 if (VT.getVectorElementType() == MVT::i1) { 3976 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()); 3977 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1); 3978 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT) 3979 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2); 3980 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask()); 3981 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT), 3982 ISD::SETNE); 3983 } 3984 3985 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3986 3987 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3988 3989 if (SVN->isSplat()) { 3990 const int Lane = SVN->getSplatIndex(); 3991 if (Lane >= 0) { 3992 MVT SVT = VT.getVectorElementType(); 3993 3994 // Turn splatted vector load into a strided load with an X0 stride. 3995 SDValue V = V1; 3996 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector 3997 // with undef. 3998 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts? 3999 int Offset = Lane; 4000 if (V.getOpcode() == ISD::CONCAT_VECTORS) { 4001 int OpElements = 4002 V.getOperand(0).getSimpleValueType().getVectorNumElements(); 4003 V = V.getOperand(Offset / OpElements); 4004 Offset %= OpElements; 4005 } 4006 4007 // We need to ensure the load isn't atomic or volatile. 4008 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) { 4009 auto *Ld = cast<LoadSDNode>(V); 4010 Offset *= SVT.getStoreSize(); 4011 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), 4012 TypeSize::Fixed(Offset), DL); 4013 4014 // If this is SEW=64 on RV32, use a strided load with a stride of x0. 4015 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) { 4016 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 4017 SDValue IntID = 4018 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT); 4019 SDValue Ops[] = {Ld->getChain(), 4020 IntID, 4021 DAG.getUNDEF(ContainerVT), 4022 NewAddr, 4023 DAG.getRegister(RISCV::X0, XLenVT), 4024 VL}; 4025 SDValue NewLoad = DAG.getMemIntrinsicNode( 4026 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT, 4027 DAG.getMachineFunction().getMachineMemOperand( 4028 Ld->getMemOperand(), Offset, SVT.getStoreSize())); 4029 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad); 4030 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 4031 } 4032 4033 // Otherwise use a scalar load and splat. This will give the best 4034 // opportunity to fold a splat into the operation. ISel can turn it into 4035 // the x0 strided load if we aren't able to fold away the select. 4036 if (SVT.isFloatingPoint()) 4037 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, 4038 Ld->getPointerInfo().getWithOffset(Offset), 4039 Ld->getOriginalAlign(), 4040 Ld->getMemOperand()->getFlags()); 4041 else 4042 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr, 4043 Ld->getPointerInfo().getWithOffset(Offset), SVT, 4044 Ld->getOriginalAlign(), 4045 Ld->getMemOperand()->getFlags()); 4046 DAG.makeEquivalentMemoryOrdering(Ld, V); 4047 4048 unsigned Opc = 4049 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; 4050 SDValue Splat = 4051 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL); 4052 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 4053 } 4054 4055 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 4056 assert(Lane < (int)NumElts && "Unexpected lane!"); 4057 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, 4058 V1, DAG.getConstant(Lane, DL, XLenVT), 4059 DAG.getUNDEF(ContainerVT), TrueMask, VL); 4060 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 4061 } 4062 } 4063 4064 ArrayRef<int> Mask = SVN->getMask(); 4065 4066 if (SDValue V = 4067 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG)) 4068 return V; 4069 4070 if (SDValue V = 4071 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG)) 4072 return V; 4073 4074 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may 4075 // be undef which can be handled with a single SLIDEDOWN/UP. 4076 int LoSrc, HiSrc; 4077 int Rotation = isElementRotate(LoSrc, HiSrc, Mask); 4078 if (Rotation > 0) { 4079 SDValue LoV, HiV; 4080 if (LoSrc >= 0) { 4081 LoV = LoSrc == 0 ? V1 : V2; 4082 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget); 4083 } 4084 if (HiSrc >= 0) { 4085 HiV = HiSrc == 0 ? V1 : V2; 4086 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget); 4087 } 4088 4089 // We found a rotation. We need to slide HiV down by Rotation. Then we need 4090 // to slide LoV up by (NumElts - Rotation). 4091 unsigned InvRotate = NumElts - Rotation; 4092 4093 SDValue Res = DAG.getUNDEF(ContainerVT); 4094 if (HiV) { 4095 // Even though we could use a smaller VL, don't to avoid a vsetivli 4096 // toggle. 4097 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV, 4098 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL); 4099 } 4100 if (LoV) 4101 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV, 4102 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL, 4103 RISCVII::TAIL_AGNOSTIC); 4104 4105 return convertFromScalableVector(VT, Res, DAG, Subtarget); 4106 } 4107 4108 // If this is a deinterleave and we can widen the vector, then we can use 4109 // vnsrl to deinterleave. 4110 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) { 4111 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0, 4112 Subtarget, DAG); 4113 } 4114 4115 if (SDValue V = 4116 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) 4117 return V; 4118 4119 // Detect an interleave shuffle and lower to 4120 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1)) 4121 int EvenSrc, OddSrc; 4122 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) { 4123 // Extract the halves of the vectors. 4124 MVT HalfVT = VT.getHalfNumVectorElementsVT(); 4125 4126 int Size = Mask.size(); 4127 SDValue EvenV, OddV; 4128 assert(EvenSrc >= 0 && "Undef source?"); 4129 EvenV = (EvenSrc / Size) == 0 ? V1 : V2; 4130 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV, 4131 DAG.getConstant(EvenSrc % Size, DL, XLenVT)); 4132 4133 assert(OddSrc >= 0 && "Undef source?"); 4134 OddV = (OddSrc / Size) == 0 ? V1 : V2; 4135 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV, 4136 DAG.getConstant(OddSrc % Size, DL, XLenVT)); 4137 4138 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); 4139 } 4140 4141 // Detect shuffles which can be re-expressed as vector selects; these are 4142 // shuffles in which each element in the destination is taken from an element 4143 // at the corresponding index in either source vectors. 4144 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) { 4145 int MaskIndex = MaskIdx.value(); 4146 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts; 4147 }); 4148 4149 assert(!V1.isUndef() && "Unexpected shuffle canonicalization"); 4150 4151 SmallVector<SDValue> MaskVals; 4152 // As a backup, shuffles can be lowered via a vrgather instruction, possibly 4153 // merged with a second vrgather. 4154 SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS; 4155 4156 // By default we preserve the original operand order, and use a mask to 4157 // select LHS as true and RHS as false. However, since RVV vector selects may 4158 // feature splats but only on the LHS, we may choose to invert our mask and 4159 // instead select between RHS and LHS. 4160 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1); 4161 bool InvertMask = IsSelect == SwapOps; 4162 4163 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle 4164 // half. 4165 DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts; 4166 4167 // Now construct the mask that will be used by the vselect or blended 4168 // vrgather operation. For vrgathers, construct the appropriate indices into 4169 // each vector. 4170 for (int MaskIndex : Mask) { 4171 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask; 4172 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); 4173 if (!IsSelect) { 4174 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; 4175 GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0 4176 ? DAG.getConstant(MaskIndex, DL, XLenVT) 4177 : DAG.getUNDEF(XLenVT)); 4178 GatherIndicesRHS.push_back( 4179 IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT) 4180 : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT)); 4181 if (IsLHSOrUndefIndex && MaskIndex >= 0) 4182 ++LHSIndexCounts[MaskIndex]; 4183 if (!IsLHSOrUndefIndex) 4184 ++RHSIndexCounts[MaskIndex - NumElts]; 4185 } 4186 } 4187 4188 if (SwapOps) { 4189 std::swap(V1, V2); 4190 std::swap(GatherIndicesLHS, GatherIndicesRHS); 4191 } 4192 4193 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); 4194 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); 4195 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); 4196 4197 if (IsSelect) 4198 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2); 4199 4200 if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) { 4201 // On such a large vector we're unable to use i8 as the index type. 4202 // FIXME: We could promote the index to i16 and use vrgatherei16, but that 4203 // may involve vector splitting if we're already at LMUL=8, or our 4204 // user-supplied maximum fixed-length LMUL. 4205 return SDValue(); 4206 } 4207 4208 unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL; 4209 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL; 4210 MVT IndexVT = VT.changeTypeToInteger(); 4211 // Since we can't introduce illegal index types at this stage, use i16 and 4212 // vrgatherei16 if the corresponding index type for plain vrgather is greater 4213 // than XLenVT. 4214 if (IndexVT.getScalarType().bitsGT(XLenVT)) { 4215 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; 4216 IndexVT = IndexVT.changeVectorElementType(MVT::i16); 4217 } 4218 4219 MVT IndexContainerVT = 4220 ContainerVT.changeVectorElementType(IndexVT.getScalarType()); 4221 4222 SDValue Gather; 4223 // TODO: This doesn't trigger for i64 vectors on RV32, since there we 4224 // encounter a bitcasted BUILD_VECTOR with low/high i32 values. 4225 if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) { 4226 Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG, 4227 Subtarget); 4228 } else { 4229 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 4230 // If only one index is used, we can use a "splat" vrgather. 4231 // TODO: We can splat the most-common index and fix-up any stragglers, if 4232 // that's beneficial. 4233 if (LHSIndexCounts.size() == 1) { 4234 int SplatIndex = LHSIndexCounts.begin()->getFirst(); 4235 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1, 4236 DAG.getConstant(SplatIndex, DL, XLenVT), 4237 DAG.getUNDEF(ContainerVT), TrueMask, VL); 4238 } else { 4239 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); 4240 LHSIndices = 4241 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget); 4242 4243 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, 4244 DAG.getUNDEF(ContainerVT), TrueMask, VL); 4245 } 4246 } 4247 4248 // If a second vector operand is used by this shuffle, blend it in with an 4249 // additional vrgather. 4250 if (!V2.isUndef()) { 4251 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); 4252 4253 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 4254 SelectMask = 4255 convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); 4256 4257 // If only one index is used, we can use a "splat" vrgather. 4258 // TODO: We can splat the most-common index and fix-up any stragglers, if 4259 // that's beneficial. 4260 if (RHSIndexCounts.size() == 1) { 4261 int SplatIndex = RHSIndexCounts.begin()->getFirst(); 4262 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, 4263 DAG.getConstant(SplatIndex, DL, XLenVT), Gather, 4264 SelectMask, VL); 4265 } else { 4266 SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); 4267 RHSIndices = 4268 convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); 4269 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather, 4270 SelectMask, VL); 4271 } 4272 } 4273 4274 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 4275 } 4276 4277 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { 4278 // Support splats for any type. These should type legalize well. 4279 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) 4280 return true; 4281 4282 // Only support legal VTs for other shuffles for now. 4283 if (!isTypeLegal(VT)) 4284 return false; 4285 4286 MVT SVT = VT.getSimpleVT(); 4287 4288 // Not for i1 vectors. 4289 if (SVT.getScalarType() == MVT::i1) 4290 return false; 4291 4292 int Dummy1, Dummy2; 4293 return (isElementRotate(Dummy1, Dummy2, M) > 0) || 4294 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget); 4295 } 4296 4297 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting 4298 // the exponent. 4299 SDValue 4300 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, 4301 SelectionDAG &DAG) const { 4302 MVT VT = Op.getSimpleValueType(); 4303 unsigned EltSize = VT.getScalarSizeInBits(); 4304 SDValue Src = Op.getOperand(0); 4305 SDLoc DL(Op); 4306 MVT ContainerVT = VT; 4307 4308 SDValue Mask, VL; 4309 if (Op->isVPOpcode()) { 4310 Mask = Op.getOperand(1); 4311 if (VT.isFixedLengthVector()) 4312 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 4313 Subtarget); 4314 VL = Op.getOperand(2); 4315 } 4316 4317 // We choose FP type that can represent the value if possible. Otherwise, we 4318 // use rounding to zero conversion for correct exponent of the result. 4319 // TODO: Use f16 for i8 when possible? 4320 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32; 4321 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()))) 4322 FloatEltVT = MVT::f32; 4323 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); 4324 4325 // Legal types should have been checked in the RISCVTargetLowering 4326 // constructor. 4327 // TODO: Splitting may make sense in some cases. 4328 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) && 4329 "Expected legal float type!"); 4330 4331 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X. 4332 // The trailing zero count is equal to log2 of this single bit value. 4333 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { 4334 SDValue Neg = DAG.getNegative(Src, DL, VT); 4335 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg); 4336 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) { 4337 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT), 4338 Src, Mask, VL); 4339 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL); 4340 } 4341 4342 // We have a legal FP type, convert to it. 4343 SDValue FloatVal; 4344 if (FloatVT.bitsGT(VT)) { 4345 if (Op->isVPOpcode()) 4346 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL); 4347 else 4348 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); 4349 } else { 4350 // Use RTZ to avoid rounding influencing exponent of FloatVal. 4351 if (VT.isFixedLengthVector()) { 4352 ContainerVT = getContainerForFixedLengthVector(VT); 4353 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 4354 } 4355 if (!Op->isVPOpcode()) 4356 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 4357 SDValue RTZRM = 4358 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()); 4359 MVT ContainerFloatVT = 4360 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount()); 4361 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT, 4362 Src, Mask, RTZRM, VL); 4363 if (VT.isFixedLengthVector()) 4364 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget); 4365 } 4366 // Bitcast to integer and shift the exponent to the LSB. 4367 EVT IntVT = FloatVT.changeVectorElementTypeToInteger(); 4368 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal); 4369 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23; 4370 4371 SDValue Exp; 4372 // Restore back to original type. Truncation after SRL is to generate vnsrl. 4373 if (Op->isVPOpcode()) { 4374 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast, 4375 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL); 4376 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL); 4377 } else { 4378 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast, 4379 DAG.getConstant(ShiftAmt, DL, IntVT)); 4380 if (IntVT.bitsLT(VT)) 4381 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp); 4382 else if (IntVT.bitsGT(VT)) 4383 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp); 4384 } 4385 4386 // The exponent contains log2 of the value in biased form. 4387 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127; 4388 // For trailing zeros, we just need to subtract the bias. 4389 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) 4390 return DAG.getNode(ISD::SUB, DL, VT, Exp, 4391 DAG.getConstant(ExponentBias, DL, VT)); 4392 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) 4393 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp, 4394 DAG.getConstant(ExponentBias, DL, VT), Mask, VL); 4395 4396 // For leading zeros, we need to remove the bias and convert from log2 to 4397 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). 4398 unsigned Adjust = ExponentBias + (EltSize - 1); 4399 SDValue Res; 4400 if (Op->isVPOpcode()) 4401 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp, 4402 Mask, VL); 4403 else 4404 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp); 4405 4406 // The above result with zero input equals to Adjust which is greater than 4407 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ. 4408 if (Op.getOpcode() == ISD::CTLZ) 4409 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT)); 4410 else if (Op.getOpcode() == ISD::VP_CTLZ) 4411 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res, 4412 DAG.getConstant(EltSize, DL, VT), Mask, VL); 4413 return Res; 4414 } 4415 4416 // While RVV has alignment restrictions, we should always be able to load as a 4417 // legal equivalently-sized byte-typed vector instead. This method is 4418 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If 4419 // the load is already correctly-aligned, it returns SDValue(). 4420 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op, 4421 SelectionDAG &DAG) const { 4422 auto *Load = cast<LoadSDNode>(Op); 4423 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load"); 4424 4425 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 4426 Load->getMemoryVT(), 4427 *Load->getMemOperand())) 4428 return SDValue(); 4429 4430 SDLoc DL(Op); 4431 MVT VT = Op.getSimpleValueType(); 4432 unsigned EltSizeBits = VT.getScalarSizeInBits(); 4433 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && 4434 "Unexpected unaligned RVV load type"); 4435 MVT NewVT = 4436 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); 4437 assert(NewVT.isValid() && 4438 "Expecting equally-sized RVV vector types to be legal"); 4439 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(), 4440 Load->getPointerInfo(), Load->getOriginalAlign(), 4441 Load->getMemOperand()->getFlags()); 4442 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL); 4443 } 4444 4445 // While RVV has alignment restrictions, we should always be able to store as a 4446 // legal equivalently-sized byte-typed vector instead. This method is 4447 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It 4448 // returns SDValue() if the store is already correctly aligned. 4449 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, 4450 SelectionDAG &DAG) const { 4451 auto *Store = cast<StoreSDNode>(Op); 4452 assert(Store && Store->getValue().getValueType().isVector() && 4453 "Expected vector store"); 4454 4455 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 4456 Store->getMemoryVT(), 4457 *Store->getMemOperand())) 4458 return SDValue(); 4459 4460 SDLoc DL(Op); 4461 SDValue StoredVal = Store->getValue(); 4462 MVT VT = StoredVal.getSimpleValueType(); 4463 unsigned EltSizeBits = VT.getScalarSizeInBits(); 4464 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && 4465 "Unexpected unaligned RVV store type"); 4466 MVT NewVT = 4467 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); 4468 assert(NewVT.isValid() && 4469 "Expecting equally-sized RVV vector types to be legal"); 4470 StoredVal = DAG.getBitcast(NewVT, StoredVal); 4471 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(), 4472 Store->getPointerInfo(), Store->getOriginalAlign(), 4473 Store->getMemOperand()->getFlags()); 4474 } 4475 4476 static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, 4477 const RISCVSubtarget &Subtarget) { 4478 assert(Op.getValueType() == MVT::i64 && "Unexpected VT"); 4479 4480 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue(); 4481 4482 // All simm32 constants should be handled by isel. 4483 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making 4484 // this check redundant, but small immediates are common so this check 4485 // should have better compile time. 4486 if (isInt<32>(Imm)) 4487 return Op; 4488 4489 // We only need to cost the immediate, if constant pool lowering is enabled. 4490 if (!Subtarget.useConstantPoolForLargeInts()) 4491 return Op; 4492 4493 RISCVMatInt::InstSeq Seq = 4494 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 4495 if (Seq.size() <= Subtarget.getMaxBuildIntsCost()) 4496 return Op; 4497 4498 // Special case. See if we can build the constant as (ADD (SLLI X, 32), X) do 4499 // that if it will avoid a constant pool. 4500 // It will require an extra temporary register though. 4501 if (!DAG.shouldOptForSize()) { 4502 int64_t LoVal = SignExtend64<32>(Imm); 4503 int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32); 4504 if (LoVal == HiVal) { 4505 RISCVMatInt::InstSeq SeqLo = 4506 RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits()); 4507 if ((SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost()) 4508 return Op; 4509 } 4510 } 4511 4512 // Expand to a constant pool using the default expansion code. 4513 return SDValue(); 4514 } 4515 4516 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 4517 const RISCVSubtarget &Subtarget) { 4518 SDLoc dl(Op); 4519 AtomicOrdering FenceOrdering = 4520 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); 4521 SyncScope::ID FenceSSID = 4522 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); 4523 4524 if (Subtarget.hasStdExtZtso()) { 4525 // The only fence that needs an instruction is a sequentially-consistent 4526 // cross-thread fence. 4527 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && 4528 FenceSSID == SyncScope::System) 4529 return Op; 4530 4531 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 4532 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); 4533 } 4534 4535 // singlethread fences only synchronize with signal handlers on the same 4536 // thread and thus only need to preserve instruction order, not actually 4537 // enforce memory ordering. 4538 if (FenceSSID == SyncScope::SingleThread) 4539 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 4540 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); 4541 4542 return Op; 4543 } 4544 4545 SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, 4546 SelectionDAG &DAG) const { 4547 SDLoc DL(Op); 4548 MVT VT = Op.getSimpleValueType(); 4549 MVT XLenVT = Subtarget.getXLenVT(); 4550 auto CNode = cast<ConstantSDNode>(Op.getOperand(1)); 4551 unsigned Check = CNode->getZExtValue(); 4552 unsigned TDCMask = 0; 4553 if (Check & fcSNan) 4554 TDCMask |= RISCV::FPMASK_Signaling_NaN; 4555 if (Check & fcQNan) 4556 TDCMask |= RISCV::FPMASK_Quiet_NaN; 4557 if (Check & fcPosInf) 4558 TDCMask |= RISCV::FPMASK_Positive_Infinity; 4559 if (Check & fcNegInf) 4560 TDCMask |= RISCV::FPMASK_Negative_Infinity; 4561 if (Check & fcPosNormal) 4562 TDCMask |= RISCV::FPMASK_Positive_Normal; 4563 if (Check & fcNegNormal) 4564 TDCMask |= RISCV::FPMASK_Negative_Normal; 4565 if (Check & fcPosSubnormal) 4566 TDCMask |= RISCV::FPMASK_Positive_Subnormal; 4567 if (Check & fcNegSubnormal) 4568 TDCMask |= RISCV::FPMASK_Negative_Subnormal; 4569 if (Check & fcPosZero) 4570 TDCMask |= RISCV::FPMASK_Positive_Zero; 4571 if (Check & fcNegZero) 4572 TDCMask |= RISCV::FPMASK_Negative_Zero; 4573 4574 bool IsOneBitMask = isPowerOf2_32(TDCMask); 4575 4576 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT); 4577 4578 if (VT.isVector()) { 4579 SDValue Op0 = Op.getOperand(0); 4580 MVT VT0 = Op.getOperand(0).getSimpleValueType(); 4581 4582 if (VT.isScalableVector()) { 4583 MVT DstVT = VT0.changeVectorElementTypeToInteger(); 4584 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget); 4585 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask, 4586 VL, Op->getFlags()); 4587 if (IsOneBitMask) 4588 return DAG.getSetCC(DL, VT, FPCLASS, 4589 DAG.getConstant(TDCMask, DL, DstVT), 4590 ISD::CondCode::SETEQ); 4591 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS, 4592 DAG.getConstant(TDCMask, DL, DstVT)); 4593 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT), 4594 ISD::SETNE); 4595 } 4596 4597 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0); 4598 MVT ContainerVT = getContainerForFixedLengthVector(VT); 4599 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger(); 4600 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget); 4601 4602 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget); 4603 4604 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0, 4605 Mask, VL, Op->getFlags()); 4606 4607 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT, 4608 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL); 4609 if (IsOneBitMask) { 4610 SDValue VMSEQ = 4611 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, 4612 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ), 4613 DAG.getUNDEF(ContainerVT), Mask, VL}); 4614 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget); 4615 } 4616 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS, 4617 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL); 4618 4619 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 4620 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT, 4621 DAG.getUNDEF(ContainerDstVT), SplatZero, VL); 4622 4623 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, 4624 {AND, SplatZero, DAG.getCondCode(ISD::SETNE), 4625 DAG.getUNDEF(ContainerVT), Mask, VL}); 4626 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget); 4627 } 4628 4629 SDValue FPCLASS = DAG.getNode(RISCVISD::FPCLASS, DL, VT, Op.getOperand(0)); 4630 SDValue AND = DAG.getNode(ISD::AND, DL, VT, FPCLASS, TDCMaskV); 4631 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, XLenVT), 4632 ISD::CondCode::SETNE); 4633 } 4634 4635 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these 4636 // operations propagate nans. 4637 static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, 4638 const RISCVSubtarget &Subtarget) { 4639 SDLoc DL(Op); 4640 EVT VT = Op.getValueType(); 4641 4642 SDValue X = Op.getOperand(0); 4643 SDValue Y = Op.getOperand(1); 4644 4645 MVT XLenVT = Subtarget.getXLenVT(); 4646 4647 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This 4648 // ensures that when one input is a nan, the other will also be a nan allowing 4649 // the nan to propagate. If both inputs are nan, this will swap the inputs 4650 // which is harmless. 4651 // FIXME: Handle nonans FMF and use isKnownNeverNaN. 4652 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ); 4653 SDValue NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X); 4654 4655 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ); 4656 SDValue NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y); 4657 4658 unsigned Opc = 4659 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN; 4660 return DAG.getNode(Opc, DL, VT, NewX, NewY); 4661 } 4662 4663 /// Get a RISCV target specified VL op for a given SDNode. 4664 static unsigned getRISCVVLOp(SDValue Op) { 4665 #define OP_CASE(NODE) \ 4666 case ISD::NODE: \ 4667 return RISCVISD::NODE##_VL; 4668 switch (Op.getOpcode()) { 4669 default: 4670 llvm_unreachable("don't have RISC-V specified VL op for this SDNode"); 4671 // clang-format off 4672 OP_CASE(ADD) 4673 OP_CASE(SUB) 4674 OP_CASE(MUL) 4675 OP_CASE(MULHS) 4676 OP_CASE(MULHU) 4677 OP_CASE(SDIV) 4678 OP_CASE(SREM) 4679 OP_CASE(UDIV) 4680 OP_CASE(UREM) 4681 OP_CASE(SHL) 4682 OP_CASE(SRA) 4683 OP_CASE(SRL) 4684 OP_CASE(SADDSAT) 4685 OP_CASE(UADDSAT) 4686 OP_CASE(SSUBSAT) 4687 OP_CASE(USUBSAT) 4688 OP_CASE(FADD) 4689 OP_CASE(FSUB) 4690 OP_CASE(FMUL) 4691 OP_CASE(FDIV) 4692 OP_CASE(FNEG) 4693 OP_CASE(FABS) 4694 OP_CASE(FSQRT) 4695 OP_CASE(SMIN) 4696 OP_CASE(SMAX) 4697 OP_CASE(UMIN) 4698 OP_CASE(UMAX) 4699 OP_CASE(FMINNUM) 4700 OP_CASE(FMAXNUM) 4701 OP_CASE(STRICT_FADD) 4702 OP_CASE(STRICT_FSUB) 4703 OP_CASE(STRICT_FMUL) 4704 OP_CASE(STRICT_FDIV) 4705 OP_CASE(STRICT_FSQRT) 4706 // clang-format on 4707 #undef OP_CASE 4708 case ISD::FMA: 4709 return RISCVISD::VFMADD_VL; 4710 case ISD::STRICT_FMA: 4711 return RISCVISD::STRICT_VFMADD_VL; 4712 case ISD::AND: 4713 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) 4714 return RISCVISD::VMAND_VL; 4715 return RISCVISD::AND_VL; 4716 case ISD::OR: 4717 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) 4718 return RISCVISD::VMOR_VL; 4719 return RISCVISD::OR_VL; 4720 case ISD::XOR: 4721 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) 4722 return RISCVISD::VMXOR_VL; 4723 return RISCVISD::XOR_VL; 4724 } 4725 } 4726 4727 /// Return true if a RISC-V target specified op has a merge operand. 4728 static bool hasMergeOp(unsigned Opcode) { 4729 assert(Opcode > RISCVISD::FIRST_NUMBER && 4730 Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL && 4731 "not a RISC-V target specific op"); 4732 assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 && 4733 "adding target specific op should update this function"); 4734 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::FMAXNUM_VL) 4735 return true; 4736 if (Opcode == RISCVISD::FCOPYSIGN_VL) 4737 return true; 4738 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL) 4739 return true; 4740 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL) 4741 return true; 4742 return false; 4743 } 4744 4745 /// Return true if a RISC-V target specified op has a mask operand. 4746 static bool hasMaskOp(unsigned Opcode) { 4747 assert(Opcode > RISCVISD::FIRST_NUMBER && 4748 Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL && 4749 "not a RISC-V target specific op"); 4750 assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 && 4751 "adding target specific op should update this function"); 4752 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) 4753 return true; 4754 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL) 4755 return true; 4756 if (Opcode >= RISCVISD::STRICT_FADD_VL && 4757 Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL) 4758 return true; 4759 return false; 4760 } 4761 4762 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 4763 SelectionDAG &DAG) const { 4764 switch (Op.getOpcode()) { 4765 default: 4766 report_fatal_error("unimplemented operand"); 4767 case ISD::ATOMIC_FENCE: 4768 return LowerATOMIC_FENCE(Op, DAG, Subtarget); 4769 case ISD::GlobalAddress: 4770 return lowerGlobalAddress(Op, DAG); 4771 case ISD::BlockAddress: 4772 return lowerBlockAddress(Op, DAG); 4773 case ISD::ConstantPool: 4774 return lowerConstantPool(Op, DAG); 4775 case ISD::JumpTable: 4776 return lowerJumpTable(Op, DAG); 4777 case ISD::GlobalTLSAddress: 4778 return lowerGlobalTLSAddress(Op, DAG); 4779 case ISD::Constant: 4780 return lowerConstant(Op, DAG, Subtarget); 4781 case ISD::SELECT: 4782 return lowerSELECT(Op, DAG); 4783 case ISD::BRCOND: 4784 return lowerBRCOND(Op, DAG); 4785 case ISD::VASTART: 4786 return lowerVASTART(Op, DAG); 4787 case ISD::FRAMEADDR: 4788 return lowerFRAMEADDR(Op, DAG); 4789 case ISD::RETURNADDR: 4790 return lowerRETURNADDR(Op, DAG); 4791 case ISD::SHL_PARTS: 4792 return lowerShiftLeftParts(Op, DAG); 4793 case ISD::SRA_PARTS: 4794 return lowerShiftRightParts(Op, DAG, true); 4795 case ISD::SRL_PARTS: 4796 return lowerShiftRightParts(Op, DAG, false); 4797 case ISD::ROTL: 4798 case ISD::ROTR: 4799 assert(Subtarget.hasVendorXTHeadBb() && 4800 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && 4801 "Unexpected custom legalization"); 4802 // XTHeadBb only supports rotate by constant. 4803 if (!isa<ConstantSDNode>(Op.getOperand(1))) 4804 return SDValue(); 4805 return Op; 4806 case ISD::BITCAST: { 4807 SDLoc DL(Op); 4808 EVT VT = Op.getValueType(); 4809 SDValue Op0 = Op.getOperand(0); 4810 EVT Op0VT = Op0.getValueType(); 4811 MVT XLenVT = Subtarget.getXLenVT(); 4812 if (VT == MVT::f16 && Op0VT == MVT::i16 && 4813 Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) { 4814 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); 4815 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 4816 return FPConv; 4817 } 4818 if (VT == MVT::bf16 && Op0VT == MVT::i16 && 4819 Subtarget.hasStdExtZfbfmin()) { 4820 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); 4821 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0); 4822 return FPConv; 4823 } 4824 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && 4825 Subtarget.hasStdExtFOrZfinx()) { 4826 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 4827 SDValue FPConv = 4828 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 4829 return FPConv; 4830 } 4831 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 && 4832 Subtarget.hasStdExtZfa()) { 4833 SDValue Lo, Hi; 4834 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32); 4835 SDValue RetReg = 4836 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 4837 return RetReg; 4838 } 4839 4840 // Consider other scalar<->scalar casts as legal if the types are legal. 4841 // Otherwise expand them. 4842 if (!VT.isVector() && !Op0VT.isVector()) { 4843 if (isTypeLegal(VT) && isTypeLegal(Op0VT)) 4844 return Op; 4845 return SDValue(); 4846 } 4847 4848 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() && 4849 "Unexpected types"); 4850 4851 if (VT.isFixedLengthVector()) { 4852 // We can handle fixed length vector bitcasts with a simple replacement 4853 // in isel. 4854 if (Op0VT.isFixedLengthVector()) 4855 return Op; 4856 // When bitcasting from scalar to fixed-length vector, insert the scalar 4857 // into a one-element vector of the result type, and perform a vector 4858 // bitcast. 4859 if (!Op0VT.isVector()) { 4860 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1); 4861 if (!isTypeLegal(BVT)) 4862 return SDValue(); 4863 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT, 4864 DAG.getUNDEF(BVT), Op0, 4865 DAG.getConstant(0, DL, XLenVT))); 4866 } 4867 return SDValue(); 4868 } 4869 // Custom-legalize bitcasts from fixed-length vector types to scalar types 4870 // thus: bitcast the vector to a one-element vector type whose element type 4871 // is the same as the result type, and extract the first element. 4872 if (!VT.isVector() && Op0VT.isFixedLengthVector()) { 4873 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); 4874 if (!isTypeLegal(BVT)) 4875 return SDValue(); 4876 SDValue BVec = DAG.getBitcast(BVT, Op0); 4877 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 4878 DAG.getConstant(0, DL, XLenVT)); 4879 } 4880 return SDValue(); 4881 } 4882 case ISD::INTRINSIC_WO_CHAIN: 4883 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4884 case ISD::INTRINSIC_W_CHAIN: 4885 return LowerINTRINSIC_W_CHAIN(Op, DAG); 4886 case ISD::INTRINSIC_VOID: 4887 return LowerINTRINSIC_VOID(Op, DAG); 4888 case ISD::IS_FPCLASS: 4889 return LowerIS_FPCLASS(Op, DAG); 4890 case ISD::BITREVERSE: { 4891 MVT VT = Op.getSimpleValueType(); 4892 SDLoc DL(Op); 4893 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization"); 4894 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode"); 4895 // Expand bitreverse to a bswap(rev8) followed by brev8. 4896 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0)); 4897 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap); 4898 } 4899 case ISD::TRUNCATE: 4900 // Only custom-lower vector truncates 4901 if (!Op.getSimpleValueType().isVector()) 4902 return Op; 4903 return lowerVectorTruncLike(Op, DAG); 4904 case ISD::ANY_EXTEND: 4905 case ISD::ZERO_EXTEND: 4906 if (Op.getOperand(0).getValueType().isVector() && 4907 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 4908 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 4909 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); 4910 case ISD::SIGN_EXTEND: 4911 if (Op.getOperand(0).getValueType().isVector() && 4912 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 4913 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 4914 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); 4915 case ISD::SPLAT_VECTOR_PARTS: 4916 return lowerSPLAT_VECTOR_PARTS(Op, DAG); 4917 case ISD::INSERT_VECTOR_ELT: 4918 return lowerINSERT_VECTOR_ELT(Op, DAG); 4919 case ISD::EXTRACT_VECTOR_ELT: 4920 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 4921 case ISD::SCALAR_TO_VECTOR: { 4922 MVT VT = Op.getSimpleValueType(); 4923 SDLoc DL(Op); 4924 SDValue Scalar = Op.getOperand(0); 4925 if (VT.getVectorElementType() == MVT::i1) { 4926 MVT WideVT = VT.changeVectorElementType(MVT::i8); 4927 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar); 4928 return DAG.getNode(ISD::TRUNCATE, DL, VT, V); 4929 } 4930 MVT ContainerVT = VT; 4931 if (VT.isFixedLengthVector()) 4932 ContainerVT = getContainerForFixedLengthVector(VT); 4933 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 4934 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT, 4935 DAG.getUNDEF(ContainerVT), Scalar, VL); 4936 if (VT.isFixedLengthVector()) 4937 V = convertFromScalableVector(VT, V, DAG, Subtarget); 4938 return V; 4939 } 4940 case ISD::VSCALE: { 4941 MVT VT = Op.getSimpleValueType(); 4942 SDLoc DL(Op); 4943 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 4944 // We define our scalable vector types for lmul=1 to use a 64 bit known 4945 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 4946 // vscale as VLENB / 8. 4947 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!"); 4948 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) 4949 report_fatal_error("Support for VLEN==32 is incomplete."); 4950 // We assume VLENB is a multiple of 8. We manually choose the best shift 4951 // here because SimplifyDemandedBits isn't always able to simplify it. 4952 uint64_t Val = Op.getConstantOperandVal(0); 4953 if (isPowerOf2_64(Val)) { 4954 uint64_t Log2 = Log2_64(Val); 4955 if (Log2 < 3) 4956 return DAG.getNode(ISD::SRL, DL, VT, VLENB, 4957 DAG.getConstant(3 - Log2, DL, VT)); 4958 if (Log2 > 3) 4959 return DAG.getNode(ISD::SHL, DL, VT, VLENB, 4960 DAG.getConstant(Log2 - 3, DL, VT)); 4961 return VLENB; 4962 } 4963 // If the multiplier is a multiple of 8, scale it down to avoid needing 4964 // to shift the VLENB value. 4965 if ((Val % 8) == 0) 4966 return DAG.getNode(ISD::MUL, DL, VT, VLENB, 4967 DAG.getConstant(Val / 8, DL, VT)); 4968 4969 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 4970 DAG.getConstant(3, DL, VT)); 4971 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 4972 } 4973 case ISD::FPOWI: { 4974 // Custom promote f16 powi with illegal i32 integer type on RV64. Once 4975 // promoted this will be legalized into a libcall by LegalizeIntegerTypes. 4976 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() && 4977 Op.getOperand(1).getValueType() == MVT::i32) { 4978 SDLoc DL(Op); 4979 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); 4980 SDValue Powi = 4981 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1)); 4982 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi, 4983 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 4984 } 4985 return SDValue(); 4986 } 4987 case ISD::FMAXIMUM: 4988 case ISD::FMINIMUM: 4989 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); 4990 case ISD::FP_EXTEND: { 4991 SDLoc DL(Op); 4992 EVT VT = Op.getValueType(); 4993 SDValue Op0 = Op.getOperand(0); 4994 EVT Op0VT = Op0.getValueType(); 4995 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) 4996 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0); 4997 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) { 4998 SDValue FloatVal = 4999 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0); 5000 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal); 5001 } 5002 5003 if (!Op.getValueType().isVector()) 5004 return Op; 5005 return lowerVectorFPExtendOrRoundLike(Op, DAG); 5006 } 5007 case ISD::FP_ROUND: { 5008 SDLoc DL(Op); 5009 EVT VT = Op.getValueType(); 5010 SDValue Op0 = Op.getOperand(0); 5011 EVT Op0VT = Op0.getValueType(); 5012 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin()) 5013 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0); 5014 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() && 5015 Subtarget.hasStdExtDOrZdinx()) { 5016 SDValue FloatVal = 5017 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0, 5018 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 5019 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal); 5020 } 5021 5022 if (!Op.getValueType().isVector()) 5023 return Op; 5024 return lowerVectorFPExtendOrRoundLike(Op, DAG); 5025 } 5026 case ISD::STRICT_FP_ROUND: 5027 case ISD::STRICT_FP_EXTEND: 5028 return lowerStrictFPExtendOrRoundLike(Op, DAG); 5029 case ISD::FP_TO_SINT: 5030 case ISD::FP_TO_UINT: 5031 case ISD::SINT_TO_FP: 5032 case ISD::UINT_TO_FP: 5033 case ISD::STRICT_FP_TO_SINT: 5034 case ISD::STRICT_FP_TO_UINT: 5035 case ISD::STRICT_SINT_TO_FP: 5036 case ISD::STRICT_UINT_TO_FP: { 5037 // RVV can only do fp<->int conversions to types half/double the size as 5038 // the source. We custom-lower any conversions that do two hops into 5039 // sequences. 5040 MVT VT = Op.getSimpleValueType(); 5041 if (!VT.isVector()) 5042 return Op; 5043 SDLoc DL(Op); 5044 bool IsStrict = Op->isStrictFPOpcode(); 5045 SDValue Src = Op.getOperand(0 + IsStrict); 5046 MVT EltVT = VT.getVectorElementType(); 5047 MVT SrcVT = Src.getSimpleValueType(); 5048 MVT SrcEltVT = SrcVT.getVectorElementType(); 5049 unsigned EltSize = EltVT.getSizeInBits(); 5050 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 5051 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 5052 "Unexpected vector element types"); 5053 5054 bool IsInt2FP = SrcEltVT.isInteger(); 5055 // Widening conversions 5056 if (EltSize > (2 * SrcEltSize)) { 5057 if (IsInt2FP) { 5058 // Do a regular integer sign/zero extension then convert to float. 5059 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2), 5060 VT.getVectorElementCount()); 5061 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP || 5062 Op.getOpcode() == ISD::STRICT_UINT_TO_FP) 5063 ? ISD::ZERO_EXTEND 5064 : ISD::SIGN_EXTEND; 5065 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 5066 if (IsStrict) 5067 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), 5068 Op.getOperand(0), Ext); 5069 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 5070 } 5071 // FP2Int 5072 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 5073 // Do one doubling fp_extend then complete the operation by converting 5074 // to int. 5075 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 5076 if (IsStrict) { 5077 auto [FExt, Chain] = 5078 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT); 5079 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt); 5080 } 5081 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 5082 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 5083 } 5084 5085 // Narrowing conversions 5086 if (SrcEltSize > (2 * EltSize)) { 5087 if (IsInt2FP) { 5088 // One narrowing int_to_fp, then an fp_round. 5089 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 5090 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 5091 if (IsStrict) { 5092 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, 5093 DAG.getVTList(InterimFVT, MVT::Other), 5094 Op.getOperand(0), Src); 5095 SDValue Chain = Int2FP.getValue(1); 5096 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first; 5097 } 5098 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 5099 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 5100 } 5101 // FP2Int 5102 // One narrowing fp_to_int, then truncate the integer. If the float isn't 5103 // representable by the integer, the result is poison. 5104 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 5105 VT.getVectorElementCount()); 5106 if (IsStrict) { 5107 SDValue FP2Int = 5108 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other), 5109 Op.getOperand(0), Src); 5110 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 5111 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL); 5112 } 5113 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 5114 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 5115 } 5116 5117 // Scalable vectors can exit here. Patterns will handle equally-sized 5118 // conversions halving/doubling ones. 5119 if (!VT.isFixedLengthVector()) 5120 return Op; 5121 5122 // For fixed-length vectors we lower to a custom "VL" node. 5123 unsigned RVVOpc = 0; 5124 switch (Op.getOpcode()) { 5125 default: 5126 llvm_unreachable("Impossible opcode"); 5127 case ISD::FP_TO_SINT: 5128 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL; 5129 break; 5130 case ISD::FP_TO_UINT: 5131 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL; 5132 break; 5133 case ISD::SINT_TO_FP: 5134 RVVOpc = RISCVISD::SINT_TO_FP_VL; 5135 break; 5136 case ISD::UINT_TO_FP: 5137 RVVOpc = RISCVISD::UINT_TO_FP_VL; 5138 break; 5139 case ISD::STRICT_FP_TO_SINT: 5140 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL; 5141 break; 5142 case ISD::STRICT_FP_TO_UINT: 5143 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL; 5144 break; 5145 case ISD::STRICT_SINT_TO_FP: 5146 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL; 5147 break; 5148 case ISD::STRICT_UINT_TO_FP: 5149 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL; 5150 break; 5151 } 5152 5153 MVT ContainerVT = getContainerForFixedLengthVector(VT); 5154 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 5155 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() && 5156 "Expected same element count"); 5157 5158 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 5159 5160 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 5161 if (IsStrict) { 5162 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), 5163 Op.getOperand(0), Src, Mask, VL); 5164 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget); 5165 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL); 5166 } 5167 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); 5168 return convertFromScalableVector(VT, Src, DAG, Subtarget); 5169 } 5170 case ISD::FP_TO_SINT_SAT: 5171 case ISD::FP_TO_UINT_SAT: 5172 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget); 5173 case ISD::FP_TO_BF16: { 5174 // Custom lower to ensure the libcall return is passed in an FPR on hard 5175 // float ABIs. 5176 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization"); 5177 SDLoc DL(Op); 5178 MakeLibCallOptions CallOptions; 5179 RTLIB::Libcall LC = 5180 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16); 5181 SDValue Res = 5182 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; 5183 if (Subtarget.is64Bit()) 5184 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); 5185 return DAG.getBitcast(MVT::i32, Res); 5186 } 5187 case ISD::BF16_TO_FP: { 5188 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization"); 5189 MVT VT = Op.getSimpleValueType(); 5190 SDLoc DL(Op); 5191 Op = DAG.getNode( 5192 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0), 5193 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL)); 5194 SDValue Res = Subtarget.is64Bit() 5195 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op) 5196 : DAG.getBitcast(MVT::f32, Op); 5197 // fp_extend if the target VT is bigger than f32. 5198 if (VT != MVT::f32) 5199 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res); 5200 return Res; 5201 } 5202 case ISD::FP_TO_FP16: { 5203 // Custom lower to ensure the libcall return is passed in an FPR on hard 5204 // float ABIs. 5205 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation"); 5206 SDLoc DL(Op); 5207 MakeLibCallOptions CallOptions; 5208 RTLIB::Libcall LC = 5209 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16); 5210 SDValue Res = 5211 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; 5212 if (Subtarget.is64Bit()) 5213 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); 5214 return DAG.getBitcast(MVT::i32, Res); 5215 } 5216 case ISD::FP16_TO_FP: { 5217 // Custom lower to ensure the libcall argument is passed in an FPR on hard 5218 // float ABIs. 5219 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation"); 5220 SDLoc DL(Op); 5221 MakeLibCallOptions CallOptions; 5222 SDValue Arg = Subtarget.is64Bit() 5223 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, 5224 Op.getOperand(0)) 5225 : DAG.getBitcast(MVT::f32, Op.getOperand(0)); 5226 SDValue Res = 5227 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL) 5228 .first; 5229 return Res; 5230 } 5231 case ISD::FTRUNC: 5232 case ISD::FCEIL: 5233 case ISD::FFLOOR: 5234 case ISD::FNEARBYINT: 5235 case ISD::FRINT: 5236 case ISD::FROUND: 5237 case ISD::FROUNDEVEN: 5238 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 5239 case ISD::VECREDUCE_ADD: 5240 case ISD::VECREDUCE_UMAX: 5241 case ISD::VECREDUCE_SMAX: 5242 case ISD::VECREDUCE_UMIN: 5243 case ISD::VECREDUCE_SMIN: 5244 return lowerVECREDUCE(Op, DAG); 5245 case ISD::VECREDUCE_AND: 5246 case ISD::VECREDUCE_OR: 5247 case ISD::VECREDUCE_XOR: 5248 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 5249 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false); 5250 return lowerVECREDUCE(Op, DAG); 5251 case ISD::VECREDUCE_FADD: 5252 case ISD::VECREDUCE_SEQ_FADD: 5253 case ISD::VECREDUCE_FMIN: 5254 case ISD::VECREDUCE_FMAX: 5255 return lowerFPVECREDUCE(Op, DAG); 5256 case ISD::VP_REDUCE_ADD: 5257 case ISD::VP_REDUCE_UMAX: 5258 case ISD::VP_REDUCE_SMAX: 5259 case ISD::VP_REDUCE_UMIN: 5260 case ISD::VP_REDUCE_SMIN: 5261 case ISD::VP_REDUCE_FADD: 5262 case ISD::VP_REDUCE_SEQ_FADD: 5263 case ISD::VP_REDUCE_FMIN: 5264 case ISD::VP_REDUCE_FMAX: 5265 return lowerVPREDUCE(Op, DAG); 5266 case ISD::VP_REDUCE_AND: 5267 case ISD::VP_REDUCE_OR: 5268 case ISD::VP_REDUCE_XOR: 5269 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1) 5270 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true); 5271 return lowerVPREDUCE(Op, DAG); 5272 case ISD::UNDEF: { 5273 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType()); 5274 return convertFromScalableVector(Op.getSimpleValueType(), 5275 DAG.getUNDEF(ContainerVT), DAG, Subtarget); 5276 } 5277 case ISD::INSERT_SUBVECTOR: 5278 return lowerINSERT_SUBVECTOR(Op, DAG); 5279 case ISD::EXTRACT_SUBVECTOR: 5280 return lowerEXTRACT_SUBVECTOR(Op, DAG); 5281 case ISD::VECTOR_DEINTERLEAVE: 5282 return lowerVECTOR_DEINTERLEAVE(Op, DAG); 5283 case ISD::VECTOR_INTERLEAVE: 5284 return lowerVECTOR_INTERLEAVE(Op, DAG); 5285 case ISD::STEP_VECTOR: 5286 return lowerSTEP_VECTOR(Op, DAG); 5287 case ISD::VECTOR_REVERSE: 5288 return lowerVECTOR_REVERSE(Op, DAG); 5289 case ISD::VECTOR_SPLICE: 5290 return lowerVECTOR_SPLICE(Op, DAG); 5291 case ISD::BUILD_VECTOR: 5292 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 5293 case ISD::SPLAT_VECTOR: 5294 if (Op.getValueType().getVectorElementType() == MVT::i1) 5295 return lowerVectorMaskSplat(Op, DAG); 5296 return SDValue(); 5297 case ISD::VECTOR_SHUFFLE: 5298 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 5299 case ISD::CONCAT_VECTORS: { 5300 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 5301 // better than going through the stack, as the default expansion does. 5302 SDLoc DL(Op); 5303 MVT VT = Op.getSimpleValueType(); 5304 unsigned NumOpElts = 5305 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); 5306 SDValue Vec = DAG.getUNDEF(VT); 5307 for (const auto &OpIdx : enumerate(Op->ops())) { 5308 SDValue SubVec = OpIdx.value(); 5309 // Don't insert undef subvectors. 5310 if (SubVec.isUndef()) 5311 continue; 5312 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec, 5313 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); 5314 } 5315 return Vec; 5316 } 5317 case ISD::LOAD: 5318 if (auto V = expandUnalignedRVVLoad(Op, DAG)) 5319 return V; 5320 if (Op.getValueType().isFixedLengthVector()) 5321 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 5322 return Op; 5323 case ISD::STORE: 5324 if (auto V = expandUnalignedRVVStore(Op, DAG)) 5325 return V; 5326 if (Op.getOperand(1).getValueType().isFixedLengthVector()) 5327 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 5328 return Op; 5329 case ISD::MLOAD: 5330 case ISD::VP_LOAD: 5331 return lowerMaskedLoad(Op, DAG); 5332 case ISD::MSTORE: 5333 case ISD::VP_STORE: 5334 return lowerMaskedStore(Op, DAG); 5335 case ISD::SELECT_CC: { 5336 // This occurs because we custom legalize SETGT and SETUGT for setcc. That 5337 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand 5338 // into separate SETCC+SELECT just like LegalizeDAG. 5339 SDValue Tmp1 = Op.getOperand(0); 5340 SDValue Tmp2 = Op.getOperand(1); 5341 SDValue True = Op.getOperand(2); 5342 SDValue False = Op.getOperand(3); 5343 EVT VT = Op.getValueType(); 5344 SDValue CC = Op.getOperand(4); 5345 EVT CmpVT = Tmp1.getValueType(); 5346 EVT CCVT = 5347 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); 5348 SDLoc DL(Op); 5349 SDValue Cond = 5350 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags()); 5351 return DAG.getSelect(DL, VT, Cond, True, False); 5352 } 5353 case ISD::SETCC: { 5354 MVT OpVT = Op.getOperand(0).getSimpleValueType(); 5355 if (OpVT.isScalarInteger()) { 5356 MVT VT = Op.getSimpleValueType(); 5357 SDValue LHS = Op.getOperand(0); 5358 SDValue RHS = Op.getOperand(1); 5359 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 5360 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) && 5361 "Unexpected CondCode"); 5362 5363 SDLoc DL(Op); 5364 5365 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can 5366 // convert this to the equivalent of (set(u)ge X, C+1) by using 5367 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant 5368 // in a register. 5369 if (isa<ConstantSDNode>(RHS)) { 5370 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue(); 5371 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) { 5372 // If this is an unsigned compare and the constant is -1, incrementing 5373 // the constant would change behavior. The result should be false. 5374 if (CCVal == ISD::SETUGT && Imm == -1) 5375 return DAG.getConstant(0, DL, VT); 5376 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT. 5377 CCVal = ISD::getSetCCSwappedOperands(CCVal); 5378 SDValue SetCC = DAG.getSetCC( 5379 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal); 5380 return DAG.getLogicalNOT(DL, SetCC, VT); 5381 } 5382 } 5383 5384 // Not a constant we could handle, swap the operands and condition code to 5385 // SETLT/SETULT. 5386 CCVal = ISD::getSetCCSwappedOperands(CCVal); 5387 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal); 5388 } 5389 5390 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 5391 } 5392 case ISD::ADD: 5393 case ISD::SUB: 5394 case ISD::MUL: 5395 case ISD::MULHS: 5396 case ISD::MULHU: 5397 case ISD::AND: 5398 case ISD::OR: 5399 case ISD::XOR: 5400 case ISD::SDIV: 5401 case ISD::SREM: 5402 case ISD::UDIV: 5403 case ISD::UREM: 5404 return lowerToScalableOp(Op, DAG); 5405 case ISD::SHL: 5406 case ISD::SRA: 5407 case ISD::SRL: 5408 if (Op.getSimpleValueType().isFixedLengthVector()) 5409 return lowerToScalableOp(Op, DAG); 5410 // This can be called for an i32 shift amount that needs to be promoted. 5411 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && 5412 "Unexpected custom legalisation"); 5413 return SDValue(); 5414 case ISD::SADDSAT: 5415 case ISD::UADDSAT: 5416 case ISD::SSUBSAT: 5417 case ISD::USUBSAT: 5418 case ISD::FADD: 5419 case ISD::FSUB: 5420 case ISD::FMUL: 5421 case ISD::FDIV: 5422 case ISD::FNEG: 5423 case ISD::FABS: 5424 case ISD::FSQRT: 5425 case ISD::FMA: 5426 case ISD::SMIN: 5427 case ISD::SMAX: 5428 case ISD::UMIN: 5429 case ISD::UMAX: 5430 case ISD::FMINNUM: 5431 case ISD::FMAXNUM: 5432 return lowerToScalableOp(Op, DAG); 5433 case ISD::ABS: 5434 case ISD::VP_ABS: 5435 return lowerABS(Op, DAG); 5436 case ISD::CTLZ: 5437 case ISD::CTLZ_ZERO_UNDEF: 5438 case ISD::CTTZ_ZERO_UNDEF: 5439 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 5440 case ISD::VSELECT: 5441 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 5442 case ISD::FCOPYSIGN: 5443 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); 5444 case ISD::STRICT_FADD: 5445 case ISD::STRICT_FSUB: 5446 case ISD::STRICT_FMUL: 5447 case ISD::STRICT_FDIV: 5448 case ISD::STRICT_FSQRT: 5449 case ISD::STRICT_FMA: 5450 return lowerToScalableOp(Op, DAG); 5451 case ISD::STRICT_FSETCC: 5452 case ISD::STRICT_FSETCCS: 5453 return lowerVectorStrictFSetcc(Op, DAG); 5454 case ISD::STRICT_FCEIL: 5455 case ISD::STRICT_FRINT: 5456 case ISD::STRICT_FFLOOR: 5457 case ISD::STRICT_FTRUNC: 5458 case ISD::STRICT_FNEARBYINT: 5459 case ISD::STRICT_FROUND: 5460 case ISD::STRICT_FROUNDEVEN: 5461 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 5462 case ISD::MGATHER: 5463 case ISD::VP_GATHER: 5464 return lowerMaskedGather(Op, DAG); 5465 case ISD::MSCATTER: 5466 case ISD::VP_SCATTER: 5467 return lowerMaskedScatter(Op, DAG); 5468 case ISD::GET_ROUNDING: 5469 return lowerGET_ROUNDING(Op, DAG); 5470 case ISD::SET_ROUNDING: 5471 return lowerSET_ROUNDING(Op, DAG); 5472 case ISD::EH_DWARF_CFA: 5473 return lowerEH_DWARF_CFA(Op, DAG); 5474 case ISD::VP_SELECT: 5475 return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL); 5476 case ISD::VP_MERGE: 5477 return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL); 5478 case ISD::VP_ADD: 5479 return lowerVPOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true); 5480 case ISD::VP_SUB: 5481 return lowerVPOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true); 5482 case ISD::VP_MUL: 5483 return lowerVPOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true); 5484 case ISD::VP_SDIV: 5485 return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true); 5486 case ISD::VP_UDIV: 5487 return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true); 5488 case ISD::VP_SREM: 5489 return lowerVPOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true); 5490 case ISD::VP_UREM: 5491 return lowerVPOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true); 5492 case ISD::VP_AND: 5493 return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL); 5494 case ISD::VP_OR: 5495 return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL); 5496 case ISD::VP_XOR: 5497 return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL); 5498 case ISD::VP_ASHR: 5499 return lowerVPOp(Op, DAG, RISCVISD::SRA_VL, /*HasMergeOp*/ true); 5500 case ISD::VP_LSHR: 5501 return lowerVPOp(Op, DAG, RISCVISD::SRL_VL, /*HasMergeOp*/ true); 5502 case ISD::VP_SHL: 5503 return lowerVPOp(Op, DAG, RISCVISD::SHL_VL, /*HasMergeOp*/ true); 5504 case ISD::VP_FADD: 5505 return lowerVPOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true); 5506 case ISD::VP_FSUB: 5507 return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true); 5508 case ISD::VP_FMUL: 5509 return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true); 5510 case ISD::VP_FDIV: 5511 return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true); 5512 case ISD::VP_FNEG: 5513 return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL); 5514 case ISD::VP_FABS: 5515 return lowerVPOp(Op, DAG, RISCVISD::FABS_VL); 5516 case ISD::VP_SQRT: 5517 return lowerVPOp(Op, DAG, RISCVISD::FSQRT_VL); 5518 case ISD::VP_FMA: 5519 return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL); 5520 case ISD::VP_FMINNUM: 5521 return lowerVPOp(Op, DAG, RISCVISD::FMINNUM_VL, /*HasMergeOp*/ true); 5522 case ISD::VP_FMAXNUM: 5523 return lowerVPOp(Op, DAG, RISCVISD::FMAXNUM_VL, /*HasMergeOp*/ true); 5524 case ISD::VP_FCOPYSIGN: 5525 return lowerVPOp(Op, DAG, RISCVISD::FCOPYSIGN_VL, /*HasMergeOp*/ true); 5526 case ISD::VP_SIGN_EXTEND: 5527 case ISD::VP_ZERO_EXTEND: 5528 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) 5529 return lowerVPExtMaskOp(Op, DAG); 5530 return lowerVPOp(Op, DAG, 5531 Op.getOpcode() == ISD::VP_SIGN_EXTEND 5532 ? RISCVISD::VSEXT_VL 5533 : RISCVISD::VZEXT_VL); 5534 case ISD::VP_TRUNCATE: 5535 return lowerVectorTruncLike(Op, DAG); 5536 case ISD::VP_FP_EXTEND: 5537 case ISD::VP_FP_ROUND: 5538 return lowerVectorFPExtendOrRoundLike(Op, DAG); 5539 case ISD::VP_FP_TO_SINT: 5540 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_X_F_VL); 5541 case ISD::VP_FP_TO_UINT: 5542 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_XU_F_VL); 5543 case ISD::VP_SINT_TO_FP: 5544 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL); 5545 case ISD::VP_UINT_TO_FP: 5546 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL); 5547 case ISD::VP_SETCC: 5548 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) 5549 return lowerVPSetCCMaskOp(Op, DAG); 5550 return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL, /*HasMergeOp*/ true); 5551 case ISD::VP_SMIN: 5552 return lowerVPOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true); 5553 case ISD::VP_SMAX: 5554 return lowerVPOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true); 5555 case ISD::VP_UMIN: 5556 return lowerVPOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true); 5557 case ISD::VP_UMAX: 5558 return lowerVPOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true); 5559 case ISD::VP_BITREVERSE: 5560 return lowerVPOp(Op, DAG, RISCVISD::BITREVERSE_VL, /*HasMergeOp*/ true); 5561 case ISD::VP_BSWAP: 5562 return lowerVPOp(Op, DAG, RISCVISD::BSWAP_VL, /*HasMergeOp*/ true); 5563 case ISD::VP_CTLZ: 5564 case ISD::VP_CTLZ_ZERO_UNDEF: 5565 if (Subtarget.hasStdExtZvbb()) 5566 return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true); 5567 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 5568 case ISD::VP_CTTZ: 5569 case ISD::VP_CTTZ_ZERO_UNDEF: 5570 if (Subtarget.hasStdExtZvbb()) 5571 return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true); 5572 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 5573 case ISD::VP_CTPOP: 5574 return lowerVPOp(Op, DAG, RISCVISD::CTPOP_VL, /*HasMergeOp*/ true); 5575 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: 5576 return lowerVPStridedLoad(Op, DAG); 5577 case ISD::EXPERIMENTAL_VP_STRIDED_STORE: 5578 return lowerVPStridedStore(Op, DAG); 5579 case ISD::VP_FCEIL: 5580 case ISD::VP_FFLOOR: 5581 case ISD::VP_FRINT: 5582 case ISD::VP_FNEARBYINT: 5583 case ISD::VP_FROUND: 5584 case ISD::VP_FROUNDEVEN: 5585 case ISD::VP_FROUNDTOZERO: 5586 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 5587 } 5588 } 5589 5590 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, 5591 SelectionDAG &DAG, unsigned Flags) { 5592 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 5593 } 5594 5595 static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty, 5596 SelectionDAG &DAG, unsigned Flags) { 5597 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 5598 Flags); 5599 } 5600 5601 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, 5602 SelectionDAG &DAG, unsigned Flags) { 5603 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 5604 N->getOffset(), Flags); 5605 } 5606 5607 static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty, 5608 SelectionDAG &DAG, unsigned Flags) { 5609 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 5610 } 5611 5612 template <class NodeTy> 5613 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 5614 bool IsLocal, bool IsExternWeak) const { 5615 SDLoc DL(N); 5616 EVT Ty = getPointerTy(DAG.getDataLayout()); 5617 5618 // When HWASAN is used and tagging of global variables is enabled 5619 // they should be accessed via the GOT, since the tagged address of a global 5620 // is incompatible with existing code models. This also applies to non-pic 5621 // mode. 5622 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) { 5623 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 5624 if (IsLocal && !Subtarget.allowTaggedGlobals()) 5625 // Use PC-relative addressing to access the symbol. This generates the 5626 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 5627 // %pcrel_lo(auipc)). 5628 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); 5629 5630 // Use PC-relative addressing to access the GOT for this symbol, then load 5631 // the address from the GOT. This generates the pattern (PseudoLGA sym), 5632 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 5633 MachineFunction &MF = DAG.getMachineFunction(); 5634 MachineMemOperand *MemOp = MF.getMachineMemOperand( 5635 MachinePointerInfo::getGOT(MF), 5636 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 5637 MachineMemOperand::MOInvariant, 5638 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 5639 SDValue Load = 5640 DAG.getMemIntrinsicNode(RISCVISD::LGA, DL, DAG.getVTList(Ty, MVT::Other), 5641 {DAG.getEntryNode(), Addr}, Ty, MemOp); 5642 return Load; 5643 } 5644 5645 switch (getTargetMachine().getCodeModel()) { 5646 default: 5647 report_fatal_error("Unsupported code model for lowering"); 5648 case CodeModel::Small: { 5649 // Generate a sequence for accessing addresses within the first 2 GiB of 5650 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 5651 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 5652 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 5653 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); 5654 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo); 5655 } 5656 case CodeModel::Medium: { 5657 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 5658 if (IsExternWeak) { 5659 // An extern weak symbol may be undefined, i.e. have value 0, which may 5660 // not be within 2GiB of PC, so use GOT-indirect addressing to access the 5661 // symbol. This generates the pattern (PseudoLGA sym), which expands to 5662 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 5663 MachineFunction &MF = DAG.getMachineFunction(); 5664 MachineMemOperand *MemOp = MF.getMachineMemOperand( 5665 MachinePointerInfo::getGOT(MF), 5666 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 5667 MachineMemOperand::MOInvariant, 5668 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 5669 SDValue Load = 5670 DAG.getMemIntrinsicNode(RISCVISD::LGA, DL, 5671 DAG.getVTList(Ty, MVT::Other), 5672 {DAG.getEntryNode(), Addr}, Ty, MemOp); 5673 return Load; 5674 } 5675 5676 // Generate a sequence for accessing addresses within any 2GiB range within 5677 // the address space. This generates the pattern (PseudoLLA sym), which 5678 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 5679 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); 5680 } 5681 } 5682 } 5683 5684 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 5685 SelectionDAG &DAG) const { 5686 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 5687 assert(N->getOffset() == 0 && "unexpected offset in global node"); 5688 const GlobalValue *GV = N->getGlobal(); 5689 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage()); 5690 } 5691 5692 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 5693 SelectionDAG &DAG) const { 5694 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 5695 5696 return getAddr(N, DAG); 5697 } 5698 5699 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 5700 SelectionDAG &DAG) const { 5701 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 5702 5703 return getAddr(N, DAG); 5704 } 5705 5706 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 5707 SelectionDAG &DAG) const { 5708 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 5709 5710 return getAddr(N, DAG); 5711 } 5712 5713 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 5714 SelectionDAG &DAG, 5715 bool UseGOT) const { 5716 SDLoc DL(N); 5717 EVT Ty = getPointerTy(DAG.getDataLayout()); 5718 const GlobalValue *GV = N->getGlobal(); 5719 MVT XLenVT = Subtarget.getXLenVT(); 5720 5721 if (UseGOT) { 5722 // Use PC-relative addressing to access the GOT for this TLS symbol, then 5723 // load the address from the GOT and add the thread pointer. This generates 5724 // the pattern (PseudoLA_TLS_IE sym), which expands to 5725 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 5726 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 5727 MachineFunction &MF = DAG.getMachineFunction(); 5728 MachineMemOperand *MemOp = MF.getMachineMemOperand( 5729 MachinePointerInfo::getGOT(MF), 5730 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 5731 MachineMemOperand::MOInvariant, 5732 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 5733 SDValue Load = DAG.getMemIntrinsicNode( 5734 RISCVISD::LA_TLS_IE, DL, DAG.getVTList(Ty, MVT::Other), 5735 {DAG.getEntryNode(), Addr}, Ty, MemOp); 5736 5737 // Add the thread pointer. 5738 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 5739 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 5740 } 5741 5742 // Generate a sequence for accessing the address relative to the thread 5743 // pointer, with the appropriate adjustment for the thread pointer offset. 5744 // This generates the pattern 5745 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 5746 SDValue AddrHi = 5747 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 5748 SDValue AddrAdd = 5749 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 5750 SDValue AddrLo = 5751 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 5752 5753 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); 5754 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 5755 SDValue MNAdd = 5756 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd); 5757 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo); 5758 } 5759 5760 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 5761 SelectionDAG &DAG) const { 5762 SDLoc DL(N); 5763 EVT Ty = getPointerTy(DAG.getDataLayout()); 5764 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 5765 const GlobalValue *GV = N->getGlobal(); 5766 5767 // Use a PC-relative addressing mode to access the global dynamic GOT address. 5768 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 5769 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 5770 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 5771 SDValue Load = DAG.getNode(RISCVISD::LA_TLS_GD, DL, Ty, Addr); 5772 5773 // Prepare argument list to generate call. 5774 ArgListTy Args; 5775 ArgListEntry Entry; 5776 Entry.Node = Load; 5777 Entry.Ty = CallTy; 5778 Args.push_back(Entry); 5779 5780 // Setup call to __tls_get_addr. 5781 TargetLowering::CallLoweringInfo CLI(DAG); 5782 CLI.setDebugLoc(DL) 5783 .setChain(DAG.getEntryNode()) 5784 .setLibCallee(CallingConv::C, CallTy, 5785 DAG.getExternalSymbol("__tls_get_addr", Ty), 5786 std::move(Args)); 5787 5788 return LowerCallTo(CLI).first; 5789 } 5790 5791 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 5792 SelectionDAG &DAG) const { 5793 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 5794 assert(N->getOffset() == 0 && "unexpected offset in global node"); 5795 5796 if (DAG.getTarget().useEmulatedTLS()) 5797 return LowerToTLSEmulatedModel(N, DAG); 5798 5799 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 5800 5801 if (DAG.getMachineFunction().getFunction().getCallingConv() == 5802 CallingConv::GHC) 5803 report_fatal_error("In GHC calling convention TLS is not supported"); 5804 5805 SDValue Addr; 5806 switch (Model) { 5807 case TLSModel::LocalExec: 5808 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 5809 break; 5810 case TLSModel::InitialExec: 5811 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 5812 break; 5813 case TLSModel::LocalDynamic: 5814 case TLSModel::GeneralDynamic: 5815 Addr = getDynamicTLSAddr(N, DAG); 5816 break; 5817 } 5818 5819 return Addr; 5820 } 5821 5822 // Return true if Val is equal to (setcc LHS, RHS, CC). 5823 // Return false if Val is the inverse of (setcc LHS, RHS, CC). 5824 // Otherwise, return std::nullopt. 5825 static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS, 5826 ISD::CondCode CC, SDValue Val) { 5827 assert(Val->getOpcode() == ISD::SETCC); 5828 SDValue LHS2 = Val.getOperand(0); 5829 SDValue RHS2 = Val.getOperand(1); 5830 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get(); 5831 5832 if (LHS == LHS2 && RHS == RHS2) { 5833 if (CC == CC2) 5834 return true; 5835 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType())) 5836 return false; 5837 } else if (LHS == RHS2 && RHS == LHS2) { 5838 CC2 = ISD::getSetCCSwappedOperands(CC2); 5839 if (CC == CC2) 5840 return true; 5841 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType())) 5842 return false; 5843 } 5844 5845 return std::nullopt; 5846 } 5847 5848 static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, 5849 const RISCVSubtarget &Subtarget) { 5850 SDValue CondV = N->getOperand(0); 5851 SDValue TrueV = N->getOperand(1); 5852 SDValue FalseV = N->getOperand(2); 5853 MVT VT = N->getSimpleValueType(0); 5854 SDLoc DL(N); 5855 5856 if (!Subtarget.hasShortForwardBranchOpt()) { 5857 // (select c, -1, y) -> -c | y 5858 if (isAllOnesConstant(TrueV)) { 5859 SDValue Neg = DAG.getNegative(CondV, DL, VT); 5860 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); 5861 } 5862 // (select c, y, -1) -> (c-1) | y 5863 if (isAllOnesConstant(FalseV)) { 5864 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, 5865 DAG.getAllOnesConstant(DL, VT)); 5866 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); 5867 } 5868 5869 // (select c, 0, y) -> (c-1) & y 5870 if (isNullConstant(TrueV)) { 5871 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, 5872 DAG.getAllOnesConstant(DL, VT)); 5873 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); 5874 } 5875 // (select c, y, 0) -> -c & y 5876 if (isNullConstant(FalseV)) { 5877 SDValue Neg = DAG.getNegative(CondV, DL, VT); 5878 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); 5879 } 5880 } 5881 5882 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops 5883 // when both truev and falsev are also setcc. 5884 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC && 5885 FalseV.getOpcode() == ISD::SETCC) { 5886 SDValue LHS = CondV.getOperand(0); 5887 SDValue RHS = CondV.getOperand(1); 5888 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 5889 5890 // (select x, x, y) -> x | y 5891 // (select !x, x, y) -> x & y 5892 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) { 5893 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV, 5894 FalseV); 5895 } 5896 // (select x, y, x) -> x & y 5897 // (select !x, y, x) -> x | y 5898 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) { 5899 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV, 5900 FalseV); 5901 } 5902 } 5903 5904 return SDValue(); 5905 } 5906 5907 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can 5908 /// check for equality with 0. This function emits nodes that convert the 5909 /// seteq/setne into something that can be compared with 0. 5910 /// Based on RISCVDAGToDAGISel::selectSETCC but modified to produce 5911 /// target-independent SelectionDAG nodes rather than machine nodes. 5912 static SDValue selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, 5913 SelectionDAG &DAG) { 5914 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) && 5915 "Unexpected condition code!"); 5916 5917 // We're looking for a setcc. 5918 if (N->getOpcode() != ISD::SETCC) 5919 return SDValue(); 5920 5921 // Must be an equality comparison. 5922 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get(); 5923 if (CCVal != ExpectedCCVal) 5924 return SDValue(); 5925 5926 SDValue LHS = N->getOperand(0); 5927 SDValue RHS = N->getOperand(1); 5928 5929 if (!LHS.getValueType().isScalarInteger()) 5930 return SDValue(); 5931 5932 // If the RHS side is 0, we don't need any extra instructions, return the LHS. 5933 if (isNullConstant(RHS)) 5934 return LHS; 5935 5936 SDLoc DL(N); 5937 5938 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) { 5939 int64_t CVal = C->getSExtValue(); 5940 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and 5941 // non-zero otherwise. 5942 if (CVal == -2048) 5943 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS, 5944 DAG.getConstant(CVal, DL, N->getValueType(0))); 5945 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the 5946 // LHS is equal to the RHS and non-zero otherwise. 5947 if (isInt<12>(CVal) || CVal == 2048) 5948 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), LHS, 5949 DAG.getConstant(-CVal, DL, N->getValueType(0))); 5950 } 5951 5952 // If nothing else we can XOR the LHS and RHS to produce zero if they are 5953 // equal and a non-zero value if they aren't. 5954 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS, RHS); 5955 } 5956 5957 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants 5958 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable. 5959 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up 5960 // being `0` or `-1`. In such cases we can replace `select` with `and`. 5961 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize 5962 // than `c0`? 5963 static SDValue 5964 foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, 5965 const RISCVSubtarget &Subtarget) { 5966 if (Subtarget.hasShortForwardBranchOpt()) 5967 return SDValue(); 5968 5969 unsigned SelOpNo = 0; 5970 SDValue Sel = BO->getOperand(0); 5971 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { 5972 SelOpNo = 1; 5973 Sel = BO->getOperand(1); 5974 } 5975 5976 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) 5977 return SDValue(); 5978 5979 unsigned ConstSelOpNo = 1; 5980 unsigned OtherSelOpNo = 2; 5981 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) { 5982 ConstSelOpNo = 2; 5983 OtherSelOpNo = 1; 5984 } 5985 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo); 5986 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp); 5987 if (!ConstSelOpNode || ConstSelOpNode->isOpaque()) 5988 return SDValue(); 5989 5990 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1); 5991 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp); 5992 if (!ConstBinOpNode || ConstBinOpNode->isOpaque()) 5993 return SDValue(); 5994 5995 SDLoc DL(Sel); 5996 EVT VT = BO->getValueType(0); 5997 5998 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp}; 5999 if (SelOpNo == 1) 6000 std::swap(NewConstOps[0], NewConstOps[1]); 6001 6002 SDValue NewConstOp = 6003 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps); 6004 if (!NewConstOp) 6005 return SDValue(); 6006 6007 const APInt &NewConstAPInt = 6008 cast<ConstantSDNode>(NewConstOp)->getAPIntValue(); 6009 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes()) 6010 return SDValue(); 6011 6012 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo); 6013 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp}; 6014 if (SelOpNo == 1) 6015 std::swap(NewNonConstOps[0], NewNonConstOps[1]); 6016 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps); 6017 6018 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp; 6019 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp; 6020 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF); 6021 } 6022 6023 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 6024 SDValue CondV = Op.getOperand(0); 6025 SDValue TrueV = Op.getOperand(1); 6026 SDValue FalseV = Op.getOperand(2); 6027 SDLoc DL(Op); 6028 MVT VT = Op.getSimpleValueType(); 6029 MVT XLenVT = Subtarget.getXLenVT(); 6030 6031 // Lower vector SELECTs to VSELECTs by splatting the condition. 6032 if (VT.isVector()) { 6033 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1); 6034 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV); 6035 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV); 6036 } 6037 6038 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ 6039 // nodes to implement the SELECT. Performing the lowering here allows for 6040 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless 6041 // sequence or RISCVISD::SELECT_CC node (branch-based select). 6042 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) && 6043 VT.isScalarInteger()) { 6044 if (SDValue NewCondV = selectSETCC(CondV, ISD::SETNE, DAG)) { 6045 // (select (riscv_setne c), t, 0) -> (czero_eqz t, c) 6046 if (isNullConstant(FalseV)) 6047 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV); 6048 // (select (riscv_setne c), 0, f) -> (czero_nez f, c) 6049 if (isNullConstant(TrueV)) 6050 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV); 6051 // (select (riscv_setne c), t, f) -> (or (czero_eqz t, c), (czero_nez f, 6052 // c) 6053 return DAG.getNode( 6054 ISD::OR, DL, VT, 6055 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV), 6056 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV)); 6057 } 6058 if (SDValue NewCondV = selectSETCC(CondV, ISD::SETEQ, DAG)) { 6059 // (select (riscv_seteq c), t, 0) -> (czero_nez t, c) 6060 if (isNullConstant(FalseV)) 6061 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV); 6062 // (select (riscv_seteq c), 0, f) -> (czero_eqz f, c) 6063 if (isNullConstant(TrueV)) 6064 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV); 6065 // (select (riscv_seteq c), t, f) -> (or (czero_eqz f, c), (czero_nez t, 6066 // c) 6067 return DAG.getNode( 6068 ISD::OR, DL, VT, 6069 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV), 6070 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV)); 6071 } 6072 6073 // (select c, t, 0) -> (czero_eqz t, c) 6074 if (isNullConstant(FalseV)) 6075 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV); 6076 // (select c, 0, f) -> (czero_nez f, c) 6077 if (isNullConstant(TrueV)) 6078 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV); 6079 6080 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c)) 6081 if (TrueV.getOpcode() == ISD::AND && 6082 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) 6083 return DAG.getNode( 6084 ISD::OR, DL, VT, TrueV, 6085 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); 6086 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x)) 6087 if (FalseV.getOpcode() == ISD::AND && 6088 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) 6089 return DAG.getNode( 6090 ISD::OR, DL, VT, FalseV, 6091 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV)); 6092 6093 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c)) 6094 return DAG.getNode(ISD::OR, DL, VT, 6095 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV), 6096 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); 6097 } 6098 6099 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) 6100 return V; 6101 6102 if (Op.hasOneUse()) { 6103 unsigned UseOpc = Op->use_begin()->getOpcode(); 6104 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) { 6105 SDNode *BinOp = *Op->use_begin(); 6106 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(), 6107 DAG, Subtarget)) { 6108 DAG.ReplaceAllUsesWith(BinOp, &NewSel); 6109 return lowerSELECT(NewSel, DAG); 6110 } 6111 } 6112 } 6113 6114 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc)) 6115 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1))) 6116 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV); 6117 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV); 6118 if (FPTV && FPFV) { 6119 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0)) 6120 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV); 6121 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) { 6122 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV, 6123 DAG.getConstant(1, DL, XLenVT)); 6124 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR); 6125 } 6126 } 6127 6128 // If the condition is not an integer SETCC which operates on XLenVT, we need 6129 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.: 6130 // (select condv, truev, falsev) 6131 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 6132 if (CondV.getOpcode() != ISD::SETCC || 6133 CondV.getOperand(0).getSimpleValueType() != XLenVT) { 6134 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 6135 SDValue SetNE = DAG.getCondCode(ISD::SETNE); 6136 6137 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 6138 6139 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops); 6140 } 6141 6142 // If the CondV is the output of a SETCC node which operates on XLenVT inputs, 6143 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take 6144 // advantage of the integer compare+branch instructions. i.e.: 6145 // (select (setcc lhs, rhs, cc), truev, falsev) 6146 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 6147 SDValue LHS = CondV.getOperand(0); 6148 SDValue RHS = CondV.getOperand(1); 6149 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 6150 6151 // Special case for a select of 2 constants that have a diffence of 1. 6152 // Normally this is done by DAGCombine, but if the select is introduced by 6153 // type legalization or op legalization, we miss it. Restricting to SETLT 6154 // case for now because that is what signed saturating add/sub need. 6155 // FIXME: We don't need the condition to be SETLT or even a SETCC, 6156 // but we would probably want to swap the true/false values if the condition 6157 // is SETGE/SETLE to avoid an XORI. 6158 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) && 6159 CCVal == ISD::SETLT) { 6160 const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue(); 6161 const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue(); 6162 if (TrueVal - 1 == FalseVal) 6163 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV); 6164 if (TrueVal + 1 == FalseVal) 6165 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV); 6166 } 6167 6168 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 6169 // 1 < x ? x : 1 -> 0 < x ? x : 1 6170 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) && 6171 RHS == TrueV && LHS == FalseV) { 6172 LHS = DAG.getConstant(0, DL, VT); 6173 // 0 <u x is the same as x != 0. 6174 if (CCVal == ISD::SETULT) { 6175 std::swap(LHS, RHS); 6176 CCVal = ISD::SETNE; 6177 } 6178 } 6179 6180 // x <s -1 ? x : -1 -> x <s 0 ? x : -1 6181 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV && 6182 RHS == FalseV) { 6183 RHS = DAG.getConstant(0, DL, VT); 6184 } 6185 6186 SDValue TargetCC = DAG.getCondCode(CCVal); 6187 6188 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) { 6189 // (select (setcc lhs, rhs, CC), constant, falsev) 6190 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant) 6191 std::swap(TrueV, FalseV); 6192 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType())); 6193 } 6194 6195 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 6196 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops); 6197 } 6198 6199 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 6200 SDValue CondV = Op.getOperand(1); 6201 SDLoc DL(Op); 6202 MVT XLenVT = Subtarget.getXLenVT(); 6203 6204 if (CondV.getOpcode() == ISD::SETCC && 6205 CondV.getOperand(0).getValueType() == XLenVT) { 6206 SDValue LHS = CondV.getOperand(0); 6207 SDValue RHS = CondV.getOperand(1); 6208 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 6209 6210 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 6211 6212 SDValue TargetCC = DAG.getCondCode(CCVal); 6213 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 6214 LHS, RHS, TargetCC, Op.getOperand(2)); 6215 } 6216 6217 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 6218 CondV, DAG.getConstant(0, DL, XLenVT), 6219 DAG.getCondCode(ISD::SETNE), Op.getOperand(2)); 6220 } 6221 6222 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 6223 MachineFunction &MF = DAG.getMachineFunction(); 6224 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 6225 6226 SDLoc DL(Op); 6227 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 6228 getPointerTy(MF.getDataLayout())); 6229 6230 // vastart just stores the address of the VarArgsFrameIndex slot into the 6231 // memory location argument. 6232 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 6233 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 6234 MachinePointerInfo(SV)); 6235 } 6236 6237 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 6238 SelectionDAG &DAG) const { 6239 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 6240 MachineFunction &MF = DAG.getMachineFunction(); 6241 MachineFrameInfo &MFI = MF.getFrameInfo(); 6242 MFI.setFrameAddressIsTaken(true); 6243 Register FrameReg = RI.getFrameRegister(MF); 6244 int XLenInBytes = Subtarget.getXLen() / 8; 6245 6246 EVT VT = Op.getValueType(); 6247 SDLoc DL(Op); 6248 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 6249 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 6250 while (Depth--) { 6251 int Offset = -(XLenInBytes * 2); 6252 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 6253 DAG.getIntPtrConstant(Offset, DL)); 6254 FrameAddr = 6255 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 6256 } 6257 return FrameAddr; 6258 } 6259 6260 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 6261 SelectionDAG &DAG) const { 6262 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 6263 MachineFunction &MF = DAG.getMachineFunction(); 6264 MachineFrameInfo &MFI = MF.getFrameInfo(); 6265 MFI.setReturnAddressIsTaken(true); 6266 MVT XLenVT = Subtarget.getXLenVT(); 6267 int XLenInBytes = Subtarget.getXLen() / 8; 6268 6269 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 6270 return SDValue(); 6271 6272 EVT VT = Op.getValueType(); 6273 SDLoc DL(Op); 6274 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 6275 if (Depth) { 6276 int Off = -XLenInBytes; 6277 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 6278 SDValue Offset = DAG.getConstant(Off, DL, VT); 6279 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 6280 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 6281 MachinePointerInfo()); 6282 } 6283 6284 // Return the value of the return address register, marking it an implicit 6285 // live-in. 6286 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 6287 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 6288 } 6289 6290 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 6291 SelectionDAG &DAG) const { 6292 SDLoc DL(Op); 6293 SDValue Lo = Op.getOperand(0); 6294 SDValue Hi = Op.getOperand(1); 6295 SDValue Shamt = Op.getOperand(2); 6296 EVT VT = Lo.getValueType(); 6297 6298 // if Shamt-XLEN < 0: // Shamt < XLEN 6299 // Lo = Lo << Shamt 6300 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt)) 6301 // else: 6302 // Lo = 0 6303 // Hi = Lo << (Shamt-XLEN) 6304 6305 SDValue Zero = DAG.getConstant(0, DL, VT); 6306 SDValue One = DAG.getConstant(1, DL, VT); 6307 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 6308 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 6309 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 6310 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 6311 6312 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 6313 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 6314 SDValue ShiftRightLo = 6315 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 6316 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 6317 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 6318 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 6319 6320 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 6321 6322 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 6323 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 6324 6325 SDValue Parts[2] = {Lo, Hi}; 6326 return DAG.getMergeValues(Parts, DL); 6327 } 6328 6329 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 6330 bool IsSRA) const { 6331 SDLoc DL(Op); 6332 SDValue Lo = Op.getOperand(0); 6333 SDValue Hi = Op.getOperand(1); 6334 SDValue Shamt = Op.getOperand(2); 6335 EVT VT = Lo.getValueType(); 6336 6337 // SRA expansion: 6338 // if Shamt-XLEN < 0: // Shamt < XLEN 6339 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1)) 6340 // Hi = Hi >>s Shamt 6341 // else: 6342 // Lo = Hi >>s (Shamt-XLEN); 6343 // Hi = Hi >>s (XLEN-1) 6344 // 6345 // SRL expansion: 6346 // if Shamt-XLEN < 0: // Shamt < XLEN 6347 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1)) 6348 // Hi = Hi >>u Shamt 6349 // else: 6350 // Lo = Hi >>u (Shamt-XLEN); 6351 // Hi = 0; 6352 6353 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 6354 6355 SDValue Zero = DAG.getConstant(0, DL, VT); 6356 SDValue One = DAG.getConstant(1, DL, VT); 6357 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 6358 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 6359 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 6360 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 6361 6362 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 6363 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 6364 SDValue ShiftLeftHi = 6365 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 6366 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 6367 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 6368 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 6369 SDValue HiFalse = 6370 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 6371 6372 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 6373 6374 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 6375 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 6376 6377 SDValue Parts[2] = {Lo, Hi}; 6378 return DAG.getMergeValues(Parts, DL); 6379 } 6380 6381 // Lower splats of i1 types to SETCC. For each mask vector type, we have a 6382 // legal equivalently-sized i8 type, so we can use that as a go-between. 6383 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, 6384 SelectionDAG &DAG) const { 6385 SDLoc DL(Op); 6386 MVT VT = Op.getSimpleValueType(); 6387 SDValue SplatVal = Op.getOperand(0); 6388 // All-zeros or all-ones splats are handled specially. 6389 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) { 6390 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; 6391 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL); 6392 } 6393 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) { 6394 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; 6395 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL); 6396 } 6397 MVT XLenVT = Subtarget.getXLenVT(); 6398 assert(SplatVal.getValueType() == XLenVT && 6399 "Unexpected type for i1 splat value"); 6400 MVT InterVT = VT.changeVectorElementType(MVT::i8); 6401 SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal, 6402 DAG.getConstant(1, DL, XLenVT)); 6403 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal); 6404 SDValue Zero = DAG.getConstant(0, DL, InterVT); 6405 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE); 6406 } 6407 6408 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is 6409 // illegal (currently only vXi64 RV32). 6410 // FIXME: We could also catch non-constant sign-extended i32 values and lower 6411 // them to VMV_V_X_VL. 6412 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, 6413 SelectionDAG &DAG) const { 6414 SDLoc DL(Op); 6415 MVT VecVT = Op.getSimpleValueType(); 6416 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 6417 "Unexpected SPLAT_VECTOR_PARTS lowering"); 6418 6419 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!"); 6420 SDValue Lo = Op.getOperand(0); 6421 SDValue Hi = Op.getOperand(1); 6422 6423 if (VecVT.isFixedLengthVector()) { 6424 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 6425 SDLoc DL(Op); 6426 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; 6427 6428 SDValue Res = 6429 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG); 6430 return convertFromScalableVector(VecVT, Res, DAG, Subtarget); 6431 } 6432 6433 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 6434 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 6435 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 6436 // If Hi constant is all the same sign bit as Lo, lower this as a custom 6437 // node in order to try and match RVV vector/scalar instructions. 6438 if ((LoC >> 31) == HiC) 6439 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), 6440 Lo, DAG.getRegister(RISCV::X0, MVT::i32)); 6441 } 6442 6443 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended. 6444 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo && 6445 isa<ConstantSDNode>(Hi.getOperand(1)) && 6446 Hi.getConstantOperandVal(1) == 31) 6447 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo, 6448 DAG.getRegister(RISCV::X0, MVT::i32)); 6449 6450 // Fall back to use a stack store and stride x0 vector load. Use X0 as VL. 6451 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, 6452 DAG.getUNDEF(VecVT), Lo, Hi, 6453 DAG.getRegister(RISCV::X0, MVT::i32)); 6454 } 6455 6456 // Custom-lower extensions from mask vectors by using a vselect either with 1 6457 // for zero/any-extension or -1 for sign-extension: 6458 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 6459 // Note that any-extension is lowered identically to zero-extension. 6460 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 6461 int64_t ExtTrueVal) const { 6462 SDLoc DL(Op); 6463 MVT VecVT = Op.getSimpleValueType(); 6464 SDValue Src = Op.getOperand(0); 6465 // Only custom-lower extensions from mask types 6466 assert(Src.getValueType().isVector() && 6467 Src.getValueType().getVectorElementType() == MVT::i1); 6468 6469 if (VecVT.isScalableVector()) { 6470 SDValue SplatZero = DAG.getConstant(0, DL, VecVT); 6471 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT); 6472 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 6473 } 6474 6475 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 6476 MVT I1ContainerVT = 6477 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 6478 6479 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 6480 6481 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; 6482 6483 MVT XLenVT = Subtarget.getXLenVT(); 6484 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 6485 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 6486 6487 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 6488 DAG.getUNDEF(ContainerVT), SplatZero, VL); 6489 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 6490 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL); 6491 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, 6492 SplatTrueVal, SplatZero, VL); 6493 6494 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 6495 } 6496 6497 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( 6498 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { 6499 MVT ExtVT = Op.getSimpleValueType(); 6500 // Only custom-lower extensions from fixed-length vector types. 6501 if (!ExtVT.isFixedLengthVector()) 6502 return Op; 6503 MVT VT = Op.getOperand(0).getSimpleValueType(); 6504 // Grab the canonical container type for the extended type. Infer the smaller 6505 // type from that to ensure the same number of vector elements, as we know 6506 // the LMUL will be sufficient to hold the smaller type. 6507 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); 6508 // Get the extended container type manually to ensure the same number of 6509 // vector elements between source and dest. 6510 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 6511 ContainerExtVT.getVectorElementCount()); 6512 6513 SDValue Op1 = 6514 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 6515 6516 SDLoc DL(Op); 6517 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 6518 6519 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); 6520 6521 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); 6522 } 6523 6524 // Custom-lower truncations from vectors to mask vectors by using a mask and a 6525 // setcc operation: 6526 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 6527 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op, 6528 SelectionDAG &DAG) const { 6529 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; 6530 SDLoc DL(Op); 6531 EVT MaskVT = Op.getValueType(); 6532 // Only expect to custom-lower truncations to mask types 6533 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 6534 "Unexpected type for vector mask lowering"); 6535 SDValue Src = Op.getOperand(0); 6536 MVT VecVT = Src.getSimpleValueType(); 6537 SDValue Mask, VL; 6538 if (IsVPTrunc) { 6539 Mask = Op.getOperand(1); 6540 VL = Op.getOperand(2); 6541 } 6542 // If this is a fixed vector, we need to convert it to a scalable vector. 6543 MVT ContainerVT = VecVT; 6544 6545 if (VecVT.isFixedLengthVector()) { 6546 ContainerVT = getContainerForFixedLengthVector(VecVT); 6547 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 6548 if (IsVPTrunc) { 6549 MVT MaskContainerVT = 6550 getContainerForFixedLengthVector(Mask.getSimpleValueType()); 6551 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); 6552 } 6553 } 6554 6555 if (!IsVPTrunc) { 6556 std::tie(Mask, VL) = 6557 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 6558 } 6559 6560 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 6561 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 6562 6563 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 6564 DAG.getUNDEF(ContainerVT), SplatOne, VL); 6565 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 6566 DAG.getUNDEF(ContainerVT), SplatZero, VL); 6567 6568 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 6569 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, 6570 DAG.getUNDEF(ContainerVT), Mask, VL); 6571 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, 6572 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE), 6573 DAG.getUNDEF(MaskContainerVT), Mask, VL}); 6574 if (MaskVT.isFixedLengthVector()) 6575 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); 6576 return Trunc; 6577 } 6578 6579 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op, 6580 SelectionDAG &DAG) const { 6581 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; 6582 SDLoc DL(Op); 6583 6584 MVT VT = Op.getSimpleValueType(); 6585 // Only custom-lower vector truncates 6586 assert(VT.isVector() && "Unexpected type for vector truncate lowering"); 6587 6588 // Truncates to mask types are handled differently 6589 if (VT.getVectorElementType() == MVT::i1) 6590 return lowerVectorMaskTruncLike(Op, DAG); 6591 6592 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 6593 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 6594 // truncate by one power of two at a time. 6595 MVT DstEltVT = VT.getVectorElementType(); 6596 6597 SDValue Src = Op.getOperand(0); 6598 MVT SrcVT = Src.getSimpleValueType(); 6599 MVT SrcEltVT = SrcVT.getVectorElementType(); 6600 6601 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) && 6602 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 6603 "Unexpected vector truncate lowering"); 6604 6605 MVT ContainerVT = SrcVT; 6606 SDValue Mask, VL; 6607 if (IsVPTrunc) { 6608 Mask = Op.getOperand(1); 6609 VL = Op.getOperand(2); 6610 } 6611 if (SrcVT.isFixedLengthVector()) { 6612 ContainerVT = getContainerForFixedLengthVector(SrcVT); 6613 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 6614 if (IsVPTrunc) { 6615 MVT MaskVT = getMaskTypeFor(ContainerVT); 6616 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 6617 } 6618 } 6619 6620 SDValue Result = Src; 6621 if (!IsVPTrunc) { 6622 std::tie(Mask, VL) = 6623 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 6624 } 6625 6626 LLVMContext &Context = *DAG.getContext(); 6627 const ElementCount Count = ContainerVT.getVectorElementCount(); 6628 do { 6629 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); 6630 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 6631 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, 6632 Mask, VL); 6633 } while (SrcEltVT != DstEltVT); 6634 6635 if (SrcVT.isFixedLengthVector()) 6636 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 6637 6638 return Result; 6639 } 6640 6641 SDValue 6642 RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op, 6643 SelectionDAG &DAG) const { 6644 SDLoc DL(Op); 6645 SDValue Chain = Op.getOperand(0); 6646 SDValue Src = Op.getOperand(1); 6647 MVT VT = Op.getSimpleValueType(); 6648 MVT SrcVT = Src.getSimpleValueType(); 6649 MVT ContainerVT = VT; 6650 if (VT.isFixedLengthVector()) { 6651 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 6652 ContainerVT = 6653 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 6654 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 6655 } 6656 6657 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 6658 6659 // RVV can only widen/truncate fp to types double/half the size as the source. 6660 if ((VT.getVectorElementType() == MVT::f64 && 6661 SrcVT.getVectorElementType() == MVT::f16) || 6662 (VT.getVectorElementType() == MVT::f16 && 6663 SrcVT.getVectorElementType() == MVT::f64)) { 6664 // For double rounding, the intermediate rounding should be round-to-odd. 6665 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND 6666 ? RISCVISD::STRICT_FP_EXTEND_VL 6667 : RISCVISD::STRICT_VFNCVT_ROD_VL; 6668 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 6669 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other), 6670 Chain, Src, Mask, VL); 6671 Chain = Src.getValue(1); 6672 } 6673 6674 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND 6675 ? RISCVISD::STRICT_FP_EXTEND_VL 6676 : RISCVISD::STRICT_FP_ROUND_VL; 6677 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), 6678 Chain, Src, Mask, VL); 6679 if (VT.isFixedLengthVector()) { 6680 // StrictFP operations have two result values. Their lowered result should 6681 // have same result count. 6682 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget); 6683 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL); 6684 } 6685 return Res; 6686 } 6687 6688 SDValue 6689 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op, 6690 SelectionDAG &DAG) const { 6691 bool IsVP = 6692 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND; 6693 bool IsExtend = 6694 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND; 6695 // RVV can only do truncate fp to types half the size as the source. We 6696 // custom-lower f64->f16 rounds via RVV's round-to-odd float 6697 // conversion instruction. 6698 SDLoc DL(Op); 6699 MVT VT = Op.getSimpleValueType(); 6700 6701 assert(VT.isVector() && "Unexpected type for vector truncate lowering"); 6702 6703 SDValue Src = Op.getOperand(0); 6704 MVT SrcVT = Src.getSimpleValueType(); 6705 6706 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 || 6707 SrcVT.getVectorElementType() != MVT::f16); 6708 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 || 6709 SrcVT.getVectorElementType() != MVT::f64); 6710 6711 bool IsDirectConv = IsDirectExtend || IsDirectTrunc; 6712 6713 // Prepare any fixed-length vector operands. 6714 MVT ContainerVT = VT; 6715 SDValue Mask, VL; 6716 if (IsVP) { 6717 Mask = Op.getOperand(1); 6718 VL = Op.getOperand(2); 6719 } 6720 if (VT.isFixedLengthVector()) { 6721 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 6722 ContainerVT = 6723 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 6724 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 6725 if (IsVP) { 6726 MVT MaskVT = getMaskTypeFor(ContainerVT); 6727 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 6728 } 6729 } 6730 6731 if (!IsVP) 6732 std::tie(Mask, VL) = 6733 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 6734 6735 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL; 6736 6737 if (IsDirectConv) { 6738 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL); 6739 if (VT.isFixedLengthVector()) 6740 Src = convertFromScalableVector(VT, Src, DAG, Subtarget); 6741 return Src; 6742 } 6743 6744 unsigned InterConvOpc = 6745 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL; 6746 6747 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 6748 SDValue IntermediateConv = 6749 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL); 6750 SDValue Result = 6751 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL); 6752 if (VT.isFixedLengthVector()) 6753 return convertFromScalableVector(VT, Result, DAG, Subtarget); 6754 return Result; 6755 } 6756 6757 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the 6758 // first position of a vector, and that vector is slid up to the insert index. 6759 // By limiting the active vector length to index+1 and merging with the 6760 // original vector (with an undisturbed tail policy for elements >= VL), we 6761 // achieve the desired result of leaving all elements untouched except the one 6762 // at VL-1, which is replaced with the desired value. 6763 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 6764 SelectionDAG &DAG) const { 6765 SDLoc DL(Op); 6766 MVT VecVT = Op.getSimpleValueType(); 6767 SDValue Vec = Op.getOperand(0); 6768 SDValue Val = Op.getOperand(1); 6769 SDValue Idx = Op.getOperand(2); 6770 6771 if (VecVT.getVectorElementType() == MVT::i1) { 6772 // FIXME: For now we just promote to an i8 vector and insert into that, 6773 // but this is probably not optimal. 6774 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 6775 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 6776 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx); 6777 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec); 6778 } 6779 6780 MVT ContainerVT = VecVT; 6781 // If the operand is a fixed-length vector, convert to a scalable one. 6782 if (VecVT.isFixedLengthVector()) { 6783 ContainerVT = getContainerForFixedLengthVector(VecVT); 6784 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 6785 } 6786 6787 MVT XLenVT = Subtarget.getXLenVT(); 6788 6789 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; 6790 // Even i64-element vectors on RV32 can be lowered without scalar 6791 // legalization if the most-significant 32 bits of the value are not affected 6792 // by the sign-extension of the lower 32 bits. 6793 // TODO: We could also catch sign extensions of a 32-bit value. 6794 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) { 6795 const auto *CVal = cast<ConstantSDNode>(Val); 6796 if (isInt<32>(CVal->getSExtValue())) { 6797 IsLegalInsert = true; 6798 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 6799 } 6800 } 6801 6802 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 6803 6804 SDValue ValInVec; 6805 6806 if (IsLegalInsert) { 6807 unsigned Opc = 6808 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL; 6809 if (isNullConstant(Idx)) { 6810 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL); 6811 if (!VecVT.isFixedLengthVector()) 6812 return Vec; 6813 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); 6814 } 6815 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget); 6816 } else { 6817 // On RV32, i64-element vectors must be specially handled to place the 6818 // value at element 0, by using two vslide1down instructions in sequence on 6819 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for 6820 // this. 6821 SDValue ValLo, ValHi; 6822 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32); 6823 MVT I32ContainerVT = 6824 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2); 6825 SDValue I32Mask = 6826 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; 6827 // Limit the active VL to two. 6828 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); 6829 // If the Idx is 0 we can insert directly into the vector. 6830 if (isNullConstant(Idx)) { 6831 // First slide in the lo value, then the hi in above it. We use slide1down 6832 // to avoid the register group overlap constraint of vslide1up. 6833 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 6834 Vec, Vec, ValLo, I32Mask, InsertI64VL); 6835 // If the source vector is undef don't pass along the tail elements from 6836 // the previous slide1down. 6837 SDValue Tail = Vec.isUndef() ? Vec : ValInVec; 6838 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 6839 Tail, ValInVec, ValHi, I32Mask, InsertI64VL); 6840 // Bitcast back to the right container type. 6841 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 6842 6843 if (!VecVT.isFixedLengthVector()) 6844 return ValInVec; 6845 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget); 6846 } 6847 6848 // First slide in the lo value, then the hi in above it. We use slide1down 6849 // to avoid the register group overlap constraint of vslide1up. 6850 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 6851 DAG.getUNDEF(I32ContainerVT), 6852 DAG.getUNDEF(I32ContainerVT), ValLo, 6853 I32Mask, InsertI64VL); 6854 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 6855 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi, 6856 I32Mask, InsertI64VL); 6857 // Bitcast back to the right container type. 6858 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 6859 } 6860 6861 // Now that the value is in a vector, slide it into position. 6862 SDValue InsertVL = 6863 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT)); 6864 6865 // Use tail agnostic policy if Idx is the last index of Vec. 6866 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; 6867 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) && 6868 cast<ConstantSDNode>(Idx)->getZExtValue() + 1 == 6869 VecVT.getVectorNumElements()) 6870 Policy = RISCVII::TAIL_AGNOSTIC; 6871 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec, 6872 Idx, Mask, InsertVL, Policy); 6873 if (!VecVT.isFixedLengthVector()) 6874 return Slideup; 6875 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 6876 } 6877 6878 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 6879 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 6880 // types this is done using VMV_X_S to allow us to glean information about the 6881 // sign bits of the result. 6882 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 6883 SelectionDAG &DAG) const { 6884 SDLoc DL(Op); 6885 SDValue Idx = Op.getOperand(1); 6886 SDValue Vec = Op.getOperand(0); 6887 EVT EltVT = Op.getValueType(); 6888 MVT VecVT = Vec.getSimpleValueType(); 6889 MVT XLenVT = Subtarget.getXLenVT(); 6890 6891 if (VecVT.getVectorElementType() == MVT::i1) { 6892 // Use vfirst.m to extract the first bit. 6893 if (isNullConstant(Idx)) { 6894 MVT ContainerVT = VecVT; 6895 if (VecVT.isFixedLengthVector()) { 6896 ContainerVT = getContainerForFixedLengthVector(VecVT); 6897 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 6898 } 6899 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 6900 SDValue Vfirst = 6901 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL); 6902 return DAG.getSetCC(DL, XLenVT, Vfirst, DAG.getConstant(0, DL, XLenVT), 6903 ISD::SETEQ); 6904 } 6905 if (VecVT.isFixedLengthVector()) { 6906 unsigned NumElts = VecVT.getVectorNumElements(); 6907 if (NumElts >= 8) { 6908 MVT WideEltVT; 6909 unsigned WidenVecLen; 6910 SDValue ExtractElementIdx; 6911 SDValue ExtractBitIdx; 6912 unsigned MaxEEW = Subtarget.getELEN(); 6913 MVT LargestEltVT = MVT::getIntegerVT( 6914 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits()))); 6915 if (NumElts <= LargestEltVT.getSizeInBits()) { 6916 assert(isPowerOf2_32(NumElts) && 6917 "the number of elements should be power of 2"); 6918 WideEltVT = MVT::getIntegerVT(NumElts); 6919 WidenVecLen = 1; 6920 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT); 6921 ExtractBitIdx = Idx; 6922 } else { 6923 WideEltVT = LargestEltVT; 6924 WidenVecLen = NumElts / WideEltVT.getSizeInBits(); 6925 // extract element index = index / element width 6926 ExtractElementIdx = DAG.getNode( 6927 ISD::SRL, DL, XLenVT, Idx, 6928 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT)); 6929 // mask bit index = index % element width 6930 ExtractBitIdx = DAG.getNode( 6931 ISD::AND, DL, XLenVT, Idx, 6932 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT)); 6933 } 6934 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen); 6935 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec); 6936 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, 6937 Vec, ExtractElementIdx); 6938 // Extract the bit from GPR. 6939 SDValue ShiftRight = 6940 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx); 6941 return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight, 6942 DAG.getConstant(1, DL, XLenVT)); 6943 } 6944 } 6945 // Otherwise, promote to an i8 vector and extract from that. 6946 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 6947 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 6948 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx); 6949 } 6950 6951 // If this is a fixed vector, we need to convert it to a scalable vector. 6952 MVT ContainerVT = VecVT; 6953 if (VecVT.isFixedLengthVector()) { 6954 ContainerVT = getContainerForFixedLengthVector(VecVT); 6955 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 6956 } 6957 6958 // If the index is 0, the vector is already in the right position. 6959 if (!isNullConstant(Idx)) { 6960 // Use a VL of 1 to avoid processing more elements than we need. 6961 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget); 6962 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, 6963 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 6964 } 6965 6966 if (!EltVT.isInteger()) { 6967 // Floating-point extracts are handled in TableGen. 6968 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 6969 DAG.getConstant(0, DL, XLenVT)); 6970 } 6971 6972 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 6973 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 6974 } 6975 6976 // Some RVV intrinsics may claim that they want an integer operand to be 6977 // promoted or expanded. 6978 static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, 6979 const RISCVSubtarget &Subtarget) { 6980 assert((Op.getOpcode() == ISD::INTRINSIC_VOID || 6981 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 6982 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 6983 "Unexpected opcode"); 6984 6985 if (!Subtarget.hasVInstructions()) 6986 return SDValue(); 6987 6988 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID || 6989 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 6990 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 6991 6992 SDLoc DL(Op); 6993 6994 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 6995 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 6996 if (!II || !II->hasScalarOperand()) 6997 return SDValue(); 6998 6999 unsigned SplatOp = II->ScalarOperand + 1 + HasChain; 7000 assert(SplatOp < Op.getNumOperands()); 7001 7002 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 7003 SDValue &ScalarOp = Operands[SplatOp]; 7004 MVT OpVT = ScalarOp.getSimpleValueType(); 7005 MVT XLenVT = Subtarget.getXLenVT(); 7006 7007 // If this isn't a scalar, or its type is XLenVT we're done. 7008 if (!OpVT.isScalarInteger() || OpVT == XLenVT) 7009 return SDValue(); 7010 7011 // Simplest case is that the operand needs to be promoted to XLenVT. 7012 if (OpVT.bitsLT(XLenVT)) { 7013 // If the operand is a constant, sign extend to increase our chances 7014 // of being able to use a .vi instruction. ANY_EXTEND would become a 7015 // a zero extend and the simm5 check in isel would fail. 7016 // FIXME: Should we ignore the upper bits in isel instead? 7017 unsigned ExtOpc = 7018 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 7019 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); 7020 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 7021 } 7022 7023 // Use the previous operand to get the vXi64 VT. The result might be a mask 7024 // VT for compares. Using the previous operand assumes that the previous 7025 // operand will never have a smaller element size than a scalar operand and 7026 // that a widening operation never uses SEW=64. 7027 // NOTE: If this fails the below assert, we can probably just find the 7028 // element count from any operand or result and use it to construct the VT. 7029 assert(II->ScalarOperand > 0 && "Unexpected splat operand!"); 7030 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType(); 7031 7032 // The more complex case is when the scalar is larger than XLenVT. 7033 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && 7034 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); 7035 7036 // If this is a sign-extended 32-bit value, we can truncate it and rely on the 7037 // instruction to sign-extend since SEW>XLEN. 7038 if (DAG.ComputeNumSignBits(ScalarOp) > 32) { 7039 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp); 7040 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 7041 } 7042 7043 switch (IntNo) { 7044 case Intrinsic::riscv_vslide1up: 7045 case Intrinsic::riscv_vslide1down: 7046 case Intrinsic::riscv_vslide1up_mask: 7047 case Intrinsic::riscv_vslide1down_mask: { 7048 // We need to special case these when the scalar is larger than XLen. 7049 unsigned NumOps = Op.getNumOperands(); 7050 bool IsMasked = NumOps == 7; 7051 7052 // Convert the vector source to the equivalent nxvXi32 vector. 7053 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); 7054 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]); 7055 SDValue ScalarLo, ScalarHi; 7056 std::tie(ScalarLo, ScalarHi) = 7057 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32); 7058 7059 // Double the VL since we halved SEW. 7060 SDValue AVL = getVLOperand(Op); 7061 SDValue I32VL; 7062 7063 // Optimize for constant AVL 7064 if (isa<ConstantSDNode>(AVL)) { 7065 unsigned EltSize = VT.getScalarSizeInBits(); 7066 unsigned MinSize = VT.getSizeInBits().getKnownMinValue(); 7067 7068 unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); 7069 unsigned MaxVLMAX = 7070 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); 7071 7072 unsigned VectorBitsMin = Subtarget.getRealMinVLen(); 7073 unsigned MinVLMAX = 7074 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize); 7075 7076 uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue(); 7077 if (AVLInt <= MinVLMAX) { 7078 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT); 7079 } else if (AVLInt >= 2 * MaxVLMAX) { 7080 // Just set vl to VLMAX in this situation 7081 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT); 7082 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); 7083 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits()); 7084 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); 7085 SDValue SETVLMAX = DAG.getTargetConstant( 7086 Intrinsic::riscv_vsetvlimax, DL, MVT::i32); 7087 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW, 7088 LMUL); 7089 } else { 7090 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl 7091 // is related to the hardware implementation. 7092 // So let the following code handle 7093 } 7094 } 7095 if (!I32VL) { 7096 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT); 7097 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); 7098 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits()); 7099 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); 7100 SDValue SETVL = 7101 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32); 7102 // Using vsetvli instruction to get actually used length which related to 7103 // the hardware implementation 7104 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL, 7105 SEW, LMUL); 7106 I32VL = 7107 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); 7108 } 7109 7110 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG); 7111 7112 // Shift the two scalar parts in using SEW=32 slide1up/slide1down 7113 // instructions. 7114 SDValue Passthru; 7115 if (IsMasked) 7116 Passthru = DAG.getUNDEF(I32VT); 7117 else 7118 Passthru = DAG.getBitcast(I32VT, Operands[1]); 7119 7120 if (IntNo == Intrinsic::riscv_vslide1up || 7121 IntNo == Intrinsic::riscv_vslide1up_mask) { 7122 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, 7123 ScalarHi, I32Mask, I32VL); 7124 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, 7125 ScalarLo, I32Mask, I32VL); 7126 } else { 7127 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, 7128 ScalarLo, I32Mask, I32VL); 7129 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, 7130 ScalarHi, I32Mask, I32VL); 7131 } 7132 7133 // Convert back to nxvXi64. 7134 Vec = DAG.getBitcast(VT, Vec); 7135 7136 if (!IsMasked) 7137 return Vec; 7138 // Apply mask after the operation. 7139 SDValue Mask = Operands[NumOps - 3]; 7140 SDValue MaskedOff = Operands[1]; 7141 // Assume Policy operand is the last operand. 7142 uint64_t Policy = 7143 cast<ConstantSDNode>(Operands[NumOps - 1])->getZExtValue(); 7144 // We don't need to select maskedoff if it's undef. 7145 if (MaskedOff.isUndef()) 7146 return Vec; 7147 // TAMU 7148 if (Policy == RISCVII::TAIL_AGNOSTIC) 7149 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, 7150 AVL); 7151 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. 7152 // It's fine because vmerge does not care mask policy. 7153 return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff, 7154 AVL); 7155 } 7156 } 7157 7158 // We need to convert the scalar to a splat vector. 7159 SDValue VL = getVLOperand(Op); 7160 assert(VL.getValueType() == XLenVT); 7161 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG); 7162 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 7163 } 7164 7165 // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support 7166 // scalable vector llvm.get.vector.length for now. 7167 // 7168 // We need to convert from a scalable VF to a vsetvli with VLMax equal to 7169 // (vscale * VF). The vscale and VF are independent of element width. We use 7170 // SEW=8 for the vsetvli because it is the only element width that supports all 7171 // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is 7172 // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The 7173 // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different 7174 // SEW and LMUL are better for the surrounding vector instructions. 7175 static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, 7176 const RISCVSubtarget &Subtarget) { 7177 MVT XLenVT = Subtarget.getXLenVT(); 7178 7179 // The smallest LMUL is only valid for the smallest element width. 7180 const unsigned ElementWidth = 8; 7181 7182 // Determine the VF that corresponds to LMUL 1 for ElementWidth. 7183 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth; 7184 // We don't support VF==1 with ELEN==32. 7185 unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELEN(); 7186 7187 unsigned VF = N->getConstantOperandVal(2); 7188 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) && 7189 "Unexpected VF"); 7190 (void)MinVF; 7191 7192 bool Fractional = VF < LMul1VF; 7193 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF; 7194 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional); 7195 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth); 7196 7197 SDLoc DL(N); 7198 7199 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT); 7200 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT); 7201 7202 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1)); 7203 7204 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT); 7205 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul); 7206 } 7207 7208 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 7209 SelectionDAG &DAG) const { 7210 unsigned IntNo = Op.getConstantOperandVal(0); 7211 SDLoc DL(Op); 7212 MVT XLenVT = Subtarget.getXLenVT(); 7213 7214 switch (IntNo) { 7215 default: 7216 break; // Don't custom lower most intrinsics. 7217 case Intrinsic::thread_pointer: { 7218 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7219 return DAG.getRegister(RISCV::X4, PtrVT); 7220 } 7221 case Intrinsic::riscv_orc_b: 7222 case Intrinsic::riscv_brev8: 7223 case Intrinsic::riscv_sha256sig0: 7224 case Intrinsic::riscv_sha256sig1: 7225 case Intrinsic::riscv_sha256sum0: 7226 case Intrinsic::riscv_sha256sum1: 7227 case Intrinsic::riscv_sm3p0: 7228 case Intrinsic::riscv_sm3p1: { 7229 unsigned Opc; 7230 switch (IntNo) { 7231 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; 7232 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; 7233 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; 7234 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; 7235 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; 7236 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; 7237 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; 7238 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; 7239 } 7240 7241 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); 7242 } 7243 case Intrinsic::riscv_sm4ks: 7244 case Intrinsic::riscv_sm4ed: { 7245 unsigned Opc = 7246 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; 7247 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2), 7248 Op.getOperand(3)); 7249 } 7250 case Intrinsic::riscv_zip: 7251 case Intrinsic::riscv_unzip: { 7252 unsigned Opc = 7253 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP; 7254 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); 7255 } 7256 case Intrinsic::riscv_clmul: 7257 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1), 7258 Op.getOperand(2)); 7259 case Intrinsic::riscv_clmulh: 7260 return DAG.getNode(RISCVISD::CLMULH, DL, XLenVT, Op.getOperand(1), 7261 Op.getOperand(2)); 7262 case Intrinsic::riscv_clmulr: 7263 return DAG.getNode(RISCVISD::CLMULR, DL, XLenVT, Op.getOperand(1), 7264 Op.getOperand(2)); 7265 case Intrinsic::experimental_get_vector_length: 7266 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget); 7267 case Intrinsic::riscv_vmv_x_s: 7268 assert(Op.getValueType() == XLenVT && "Unexpected VT!"); 7269 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 7270 Op.getOperand(1)); 7271 case Intrinsic::riscv_vfmv_f_s: 7272 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), 7273 Op.getOperand(1), DAG.getConstant(0, DL, XLenVT)); 7274 case Intrinsic::riscv_vmv_v_x: 7275 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2), 7276 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG, 7277 Subtarget); 7278 case Intrinsic::riscv_vfmv_v_f: 7279 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 7280 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 7281 case Intrinsic::riscv_vmv_s_x: { 7282 SDValue Scalar = Op.getOperand(2); 7283 7284 if (Scalar.getValueType().bitsLE(XLenVT)) { 7285 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar); 7286 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(), 7287 Op.getOperand(1), Scalar, Op.getOperand(3)); 7288 } 7289 7290 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 7291 7292 // This is an i64 value that lives in two scalar registers. We have to 7293 // insert this in a convoluted way. First we build vXi64 splat containing 7294 // the two values that we assemble using some bit math. Next we'll use 7295 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask 7296 // to merge element 0 from our splat into the source vector. 7297 // FIXME: This is probably not the best way to do this, but it is 7298 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting 7299 // point. 7300 // sw lo, (a0) 7301 // sw hi, 4(a0) 7302 // vlse vX, (a0) 7303 // 7304 // vid.v vVid 7305 // vmseq.vx mMask, vVid, 0 7306 // vmerge.vvm vDest, vSrc, vVal, mMask 7307 MVT VT = Op.getSimpleValueType(); 7308 SDValue Vec = Op.getOperand(1); 7309 SDValue VL = getVLOperand(Op); 7310 7311 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG); 7312 if (Op.getOperand(1).isUndef()) 7313 return SplattedVal; 7314 SDValue SplattedIdx = 7315 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 7316 DAG.getConstant(0, DL, MVT::i32), VL); 7317 7318 MVT MaskVT = getMaskTypeFor(VT); 7319 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG); 7320 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 7321 SDValue SelectCond = 7322 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, 7323 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ), 7324 DAG.getUNDEF(MaskVT), Mask, VL}); 7325 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, 7326 Vec, VL); 7327 } 7328 } 7329 7330 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 7331 } 7332 7333 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 7334 SelectionDAG &DAG) const { 7335 unsigned IntNo = Op.getConstantOperandVal(1); 7336 switch (IntNo) { 7337 default: 7338 break; 7339 case Intrinsic::riscv_masked_strided_load: { 7340 SDLoc DL(Op); 7341 MVT XLenVT = Subtarget.getXLenVT(); 7342 7343 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 7344 // the selection of the masked intrinsics doesn't do this for us. 7345 SDValue Mask = Op.getOperand(5); 7346 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 7347 7348 MVT VT = Op->getSimpleValueType(0); 7349 MVT ContainerVT = VT; 7350 if (VT.isFixedLengthVector()) 7351 ContainerVT = getContainerForFixedLengthVector(VT); 7352 7353 SDValue PassThru = Op.getOperand(2); 7354 if (!IsUnmasked) { 7355 MVT MaskVT = getMaskTypeFor(ContainerVT); 7356 if (VT.isFixedLengthVector()) { 7357 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 7358 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 7359 } 7360 } 7361 7362 auto *Load = cast<MemIntrinsicSDNode>(Op); 7363 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 7364 SDValue Ptr = Op.getOperand(3); 7365 SDValue Stride = Op.getOperand(4); 7366 SDValue Result, Chain; 7367 7368 // TODO: We restrict this to unmasked loads currently in consideration of 7369 // the complexity of hanlding all falses masks. 7370 if (IsUnmasked && isNullConstant(Stride)) { 7371 MVT ScalarVT = ContainerVT.getVectorElementType(); 7372 SDValue ScalarLoad = 7373 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr, 7374 ScalarVT, Load->getMemOperand()); 7375 Chain = ScalarLoad.getValue(1); 7376 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG, 7377 Subtarget); 7378 } else { 7379 SDValue IntID = DAG.getTargetConstant( 7380 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL, 7381 XLenVT); 7382 7383 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID}; 7384 if (IsUnmasked) 7385 Ops.push_back(DAG.getUNDEF(ContainerVT)); 7386 else 7387 Ops.push_back(PassThru); 7388 Ops.push_back(Ptr); 7389 Ops.push_back(Stride); 7390 if (!IsUnmasked) 7391 Ops.push_back(Mask); 7392 Ops.push_back(VL); 7393 if (!IsUnmasked) { 7394 SDValue Policy = 7395 DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 7396 Ops.push_back(Policy); 7397 } 7398 7399 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 7400 Result = 7401 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 7402 Load->getMemoryVT(), Load->getMemOperand()); 7403 Chain = Result.getValue(1); 7404 } 7405 if (VT.isFixedLengthVector()) 7406 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 7407 return DAG.getMergeValues({Result, Chain}, DL); 7408 } 7409 case Intrinsic::riscv_seg2_load: 7410 case Intrinsic::riscv_seg3_load: 7411 case Intrinsic::riscv_seg4_load: 7412 case Intrinsic::riscv_seg5_load: 7413 case Intrinsic::riscv_seg6_load: 7414 case Intrinsic::riscv_seg7_load: 7415 case Intrinsic::riscv_seg8_load: { 7416 SDLoc DL(Op); 7417 static const Intrinsic::ID VlsegInts[7] = { 7418 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, 7419 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, 7420 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, 7421 Intrinsic::riscv_vlseg8}; 7422 unsigned NF = Op->getNumValues() - 1; 7423 assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); 7424 MVT XLenVT = Subtarget.getXLenVT(); 7425 MVT VT = Op->getSimpleValueType(0); 7426 MVT ContainerVT = getContainerForFixedLengthVector(VT); 7427 7428 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget); 7429 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT); 7430 auto *Load = cast<MemIntrinsicSDNode>(Op); 7431 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT); 7432 ContainerVTs.push_back(MVT::Other); 7433 SDVTList VTs = DAG.getVTList(ContainerVTs); 7434 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID}; 7435 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT)); 7436 Ops.push_back(Op.getOperand(2)); 7437 Ops.push_back(VL); 7438 SDValue Result = 7439 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 7440 Load->getMemoryVT(), Load->getMemOperand()); 7441 SmallVector<SDValue, 9> Results; 7442 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) 7443 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx), 7444 DAG, Subtarget)); 7445 Results.push_back(Result.getValue(NF)); 7446 return DAG.getMergeValues(Results, DL); 7447 } 7448 } 7449 7450 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 7451 } 7452 7453 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, 7454 SelectionDAG &DAG) const { 7455 unsigned IntNo = Op.getConstantOperandVal(1); 7456 switch (IntNo) { 7457 default: 7458 break; 7459 case Intrinsic::riscv_masked_strided_store: { 7460 SDLoc DL(Op); 7461 MVT XLenVT = Subtarget.getXLenVT(); 7462 7463 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 7464 // the selection of the masked intrinsics doesn't do this for us. 7465 SDValue Mask = Op.getOperand(5); 7466 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 7467 7468 SDValue Val = Op.getOperand(2); 7469 MVT VT = Val.getSimpleValueType(); 7470 MVT ContainerVT = VT; 7471 if (VT.isFixedLengthVector()) { 7472 ContainerVT = getContainerForFixedLengthVector(VT); 7473 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 7474 } 7475 if (!IsUnmasked) { 7476 MVT MaskVT = getMaskTypeFor(ContainerVT); 7477 if (VT.isFixedLengthVector()) 7478 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 7479 } 7480 7481 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 7482 7483 SDValue IntID = DAG.getTargetConstant( 7484 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL, 7485 XLenVT); 7486 7487 auto *Store = cast<MemIntrinsicSDNode>(Op); 7488 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID}; 7489 Ops.push_back(Val); 7490 Ops.push_back(Op.getOperand(3)); // Ptr 7491 Ops.push_back(Op.getOperand(4)); // Stride 7492 if (!IsUnmasked) 7493 Ops.push_back(Mask); 7494 Ops.push_back(VL); 7495 7496 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(), 7497 Ops, Store->getMemoryVT(), 7498 Store->getMemOperand()); 7499 } 7500 case Intrinsic::riscv_seg2_store: 7501 case Intrinsic::riscv_seg3_store: 7502 case Intrinsic::riscv_seg4_store: 7503 case Intrinsic::riscv_seg5_store: 7504 case Intrinsic::riscv_seg6_store: 7505 case Intrinsic::riscv_seg7_store: 7506 case Intrinsic::riscv_seg8_store: { 7507 SDLoc DL(Op); 7508 static const Intrinsic::ID VssegInts[] = { 7509 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, 7510 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, 7511 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, 7512 Intrinsic::riscv_vsseg8}; 7513 // Operands are (chain, int_id, vec*, ptr, vl) 7514 unsigned NF = Op->getNumOperands() - 4; 7515 assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); 7516 MVT XLenVT = Subtarget.getXLenVT(); 7517 MVT VT = Op->getOperand(2).getSimpleValueType(); 7518 MVT ContainerVT = getContainerForFixedLengthVector(VT); 7519 7520 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget); 7521 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT); 7522 SDValue Ptr = Op->getOperand(NF + 2); 7523 7524 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op); 7525 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID}; 7526 for (unsigned i = 0; i < NF; i++) 7527 Ops.push_back(convertToScalableVector( 7528 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget)); 7529 Ops.append({Ptr, VL}); 7530 7531 return DAG.getMemIntrinsicNode( 7532 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops, 7533 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand()); 7534 } 7535 } 7536 7537 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 7538 } 7539 7540 static unsigned getRVVReductionOp(unsigned ISDOpcode) { 7541 switch (ISDOpcode) { 7542 default: 7543 llvm_unreachable("Unhandled reduction"); 7544 case ISD::VECREDUCE_ADD: 7545 return RISCVISD::VECREDUCE_ADD_VL; 7546 case ISD::VECREDUCE_UMAX: 7547 return RISCVISD::VECREDUCE_UMAX_VL; 7548 case ISD::VECREDUCE_SMAX: 7549 return RISCVISD::VECREDUCE_SMAX_VL; 7550 case ISD::VECREDUCE_UMIN: 7551 return RISCVISD::VECREDUCE_UMIN_VL; 7552 case ISD::VECREDUCE_SMIN: 7553 return RISCVISD::VECREDUCE_SMIN_VL; 7554 case ISD::VECREDUCE_AND: 7555 return RISCVISD::VECREDUCE_AND_VL; 7556 case ISD::VECREDUCE_OR: 7557 return RISCVISD::VECREDUCE_OR_VL; 7558 case ISD::VECREDUCE_XOR: 7559 return RISCVISD::VECREDUCE_XOR_VL; 7560 } 7561 } 7562 7563 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, 7564 SelectionDAG &DAG, 7565 bool IsVP) const { 7566 SDLoc DL(Op); 7567 SDValue Vec = Op.getOperand(IsVP ? 1 : 0); 7568 MVT VecVT = Vec.getSimpleValueType(); 7569 assert((Op.getOpcode() == ISD::VECREDUCE_AND || 7570 Op.getOpcode() == ISD::VECREDUCE_OR || 7571 Op.getOpcode() == ISD::VECREDUCE_XOR || 7572 Op.getOpcode() == ISD::VP_REDUCE_AND || 7573 Op.getOpcode() == ISD::VP_REDUCE_OR || 7574 Op.getOpcode() == ISD::VP_REDUCE_XOR) && 7575 "Unexpected reduction lowering"); 7576 7577 MVT XLenVT = Subtarget.getXLenVT(); 7578 assert(Op.getValueType() == XLenVT && 7579 "Expected reduction output to be legalized to XLenVT"); 7580 7581 MVT ContainerVT = VecVT; 7582 if (VecVT.isFixedLengthVector()) { 7583 ContainerVT = getContainerForFixedLengthVector(VecVT); 7584 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 7585 } 7586 7587 SDValue Mask, VL; 7588 if (IsVP) { 7589 Mask = Op.getOperand(2); 7590 VL = Op.getOperand(3); 7591 } else { 7592 std::tie(Mask, VL) = 7593 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 7594 } 7595 7596 unsigned BaseOpc; 7597 ISD::CondCode CC; 7598 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 7599 7600 switch (Op.getOpcode()) { 7601 default: 7602 llvm_unreachable("Unhandled reduction"); 7603 case ISD::VECREDUCE_AND: 7604 case ISD::VP_REDUCE_AND: { 7605 // vcpop ~x == 0 7606 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 7607 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL); 7608 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 7609 CC = ISD::SETEQ; 7610 BaseOpc = ISD::AND; 7611 break; 7612 } 7613 case ISD::VECREDUCE_OR: 7614 case ISD::VP_REDUCE_OR: 7615 // vcpop x != 0 7616 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 7617 CC = ISD::SETNE; 7618 BaseOpc = ISD::OR; 7619 break; 7620 case ISD::VECREDUCE_XOR: 7621 case ISD::VP_REDUCE_XOR: { 7622 // ((vcpop x) & 1) != 0 7623 SDValue One = DAG.getConstant(1, DL, XLenVT); 7624 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 7625 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One); 7626 CC = ISD::SETNE; 7627 BaseOpc = ISD::XOR; 7628 break; 7629 } 7630 } 7631 7632 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC); 7633 7634 if (!IsVP) 7635 return SetCC; 7636 7637 // Now include the start value in the operation. 7638 // Note that we must return the start value when no elements are operated 7639 // upon. The vcpop instructions we've emitted in each case above will return 7640 // 0 for an inactive vector, and so we've already received the neutral value: 7641 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we 7642 // can simply include the start value. 7643 return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0)); 7644 } 7645 7646 static bool isNonZeroAVL(SDValue AVL) { 7647 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL); 7648 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL); 7649 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) || 7650 (ImmAVL && ImmAVL->getZExtValue() >= 1); 7651 } 7652 7653 /// Helper to lower a reduction sequence of the form: 7654 /// scalar = reduce_op vec, scalar_start 7655 static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, 7656 SDValue StartValue, SDValue Vec, SDValue Mask, 7657 SDValue VL, const SDLoc &DL, SelectionDAG &DAG, 7658 const RISCVSubtarget &Subtarget) { 7659 const MVT VecVT = Vec.getSimpleValueType(); 7660 const MVT M1VT = getLMUL1VT(VecVT); 7661 const MVT XLenVT = Subtarget.getXLenVT(); 7662 const bool NonZeroAVL = isNonZeroAVL(VL); 7663 7664 // The reduction needs an LMUL1 input; do the splat at either LMUL1 7665 // or the original VT if fractional. 7666 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT; 7667 // We reuse the VL of the reduction to reduce vsetvli toggles if we can 7668 // prove it is non-zero. For the AVL=0 case, we need the scalar to 7669 // be the result of the reduction operation. 7670 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT); 7671 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, 7672 DAG, Subtarget); 7673 if (M1VT != InnerVT) 7674 InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, 7675 DAG.getUNDEF(M1VT), 7676 InitialValue, DAG.getConstant(0, DL, XLenVT)); 7677 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue; 7678 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 7679 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy}; 7680 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops); 7681 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, 7682 DAG.getConstant(0, DL, XLenVT)); 7683 } 7684 7685 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 7686 SelectionDAG &DAG) const { 7687 SDLoc DL(Op); 7688 SDValue Vec = Op.getOperand(0); 7689 EVT VecEVT = Vec.getValueType(); 7690 7691 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); 7692 7693 // Due to ordering in legalize types we may have a vector type that needs to 7694 // be split. Do that manually so we can get down to a legal type. 7695 while (getTypeAction(*DAG.getContext(), VecEVT) == 7696 TargetLowering::TypeSplitVector) { 7697 auto [Lo, Hi] = DAG.SplitVector(Vec, DL); 7698 VecEVT = Lo.getValueType(); 7699 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi); 7700 } 7701 7702 // TODO: The type may need to be widened rather than split. Or widened before 7703 // it can be split. 7704 if (!isTypeLegal(VecEVT)) 7705 return SDValue(); 7706 7707 MVT VecVT = VecEVT.getSimpleVT(); 7708 MVT VecEltVT = VecVT.getVectorElementType(); 7709 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 7710 7711 MVT ContainerVT = VecVT; 7712 if (VecVT.isFixedLengthVector()) { 7713 ContainerVT = getContainerForFixedLengthVector(VecVT); 7714 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 7715 } 7716 7717 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 7718 7719 SDValue NeutralElem = 7720 DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); 7721 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), NeutralElem, Vec, 7722 Mask, VL, DL, DAG, Subtarget); 7723 } 7724 7725 // Given a reduction op, this function returns the matching reduction opcode, 7726 // the vector SDValue and the scalar SDValue required to lower this to a 7727 // RISCVISD node. 7728 static std::tuple<unsigned, SDValue, SDValue> 7729 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 7730 SDLoc DL(Op); 7731 auto Flags = Op->getFlags(); 7732 unsigned Opcode = Op.getOpcode(); 7733 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode); 7734 switch (Opcode) { 7735 default: 7736 llvm_unreachable("Unhandled reduction"); 7737 case ISD::VECREDUCE_FADD: { 7738 // Use positive zero if we can. It is cheaper to materialize. 7739 SDValue Zero = 7740 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT); 7741 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero); 7742 } 7743 case ISD::VECREDUCE_SEQ_FADD: 7744 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), 7745 Op.getOperand(0)); 7746 case ISD::VECREDUCE_FMIN: 7747 return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0), 7748 DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); 7749 case ISD::VECREDUCE_FMAX: 7750 return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0), 7751 DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); 7752 } 7753 } 7754 7755 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 7756 SelectionDAG &DAG) const { 7757 SDLoc DL(Op); 7758 MVT VecEltVT = Op.getSimpleValueType(); 7759 7760 unsigned RVVOpcode; 7761 SDValue VectorVal, ScalarVal; 7762 std::tie(RVVOpcode, VectorVal, ScalarVal) = 7763 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 7764 MVT VecVT = VectorVal.getSimpleValueType(); 7765 7766 MVT ContainerVT = VecVT; 7767 if (VecVT.isFixedLengthVector()) { 7768 ContainerVT = getContainerForFixedLengthVector(VecVT); 7769 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); 7770 } 7771 7772 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 7773 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), ScalarVal, 7774 VectorVal, Mask, VL, DL, DAG, Subtarget); 7775 } 7776 7777 static unsigned getRVVVPReductionOp(unsigned ISDOpcode) { 7778 switch (ISDOpcode) { 7779 default: 7780 llvm_unreachable("Unhandled reduction"); 7781 case ISD::VP_REDUCE_ADD: 7782 return RISCVISD::VECREDUCE_ADD_VL; 7783 case ISD::VP_REDUCE_UMAX: 7784 return RISCVISD::VECREDUCE_UMAX_VL; 7785 case ISD::VP_REDUCE_SMAX: 7786 return RISCVISD::VECREDUCE_SMAX_VL; 7787 case ISD::VP_REDUCE_UMIN: 7788 return RISCVISD::VECREDUCE_UMIN_VL; 7789 case ISD::VP_REDUCE_SMIN: 7790 return RISCVISD::VECREDUCE_SMIN_VL; 7791 case ISD::VP_REDUCE_AND: 7792 return RISCVISD::VECREDUCE_AND_VL; 7793 case ISD::VP_REDUCE_OR: 7794 return RISCVISD::VECREDUCE_OR_VL; 7795 case ISD::VP_REDUCE_XOR: 7796 return RISCVISD::VECREDUCE_XOR_VL; 7797 case ISD::VP_REDUCE_FADD: 7798 return RISCVISD::VECREDUCE_FADD_VL; 7799 case ISD::VP_REDUCE_SEQ_FADD: 7800 return RISCVISD::VECREDUCE_SEQ_FADD_VL; 7801 case ISD::VP_REDUCE_FMAX: 7802 return RISCVISD::VECREDUCE_FMAX_VL; 7803 case ISD::VP_REDUCE_FMIN: 7804 return RISCVISD::VECREDUCE_FMIN_VL; 7805 } 7806 } 7807 7808 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op, 7809 SelectionDAG &DAG) const { 7810 SDLoc DL(Op); 7811 SDValue Vec = Op.getOperand(1); 7812 EVT VecEVT = Vec.getValueType(); 7813 7814 // TODO: The type may need to be widened rather than split. Or widened before 7815 // it can be split. 7816 if (!isTypeLegal(VecEVT)) 7817 return SDValue(); 7818 7819 MVT VecVT = VecEVT.getSimpleVT(); 7820 unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode()); 7821 7822 if (VecVT.isFixedLengthVector()) { 7823 auto ContainerVT = getContainerForFixedLengthVector(VecVT); 7824 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 7825 } 7826 7827 SDValue VL = Op.getOperand(3); 7828 SDValue Mask = Op.getOperand(2); 7829 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0), 7830 Vec, Mask, VL, DL, DAG, Subtarget); 7831 } 7832 7833 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 7834 SelectionDAG &DAG) const { 7835 SDValue Vec = Op.getOperand(0); 7836 SDValue SubVec = Op.getOperand(1); 7837 MVT VecVT = Vec.getSimpleValueType(); 7838 MVT SubVecVT = SubVec.getSimpleValueType(); 7839 7840 SDLoc DL(Op); 7841 MVT XLenVT = Subtarget.getXLenVT(); 7842 unsigned OrigIdx = Op.getConstantOperandVal(2); 7843 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 7844 7845 // We don't have the ability to slide mask vectors up indexed by their i1 7846 // elements; the smallest we can do is i8. Often we are able to bitcast to 7847 // equivalent i8 vectors. Note that when inserting a fixed-length vector 7848 // into a scalable one, we might not necessarily have enough scalable 7849 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 7850 if (SubVecVT.getVectorElementType() == MVT::i1 && 7851 (OrigIdx != 0 || !Vec.isUndef())) { 7852 if (VecVT.getVectorMinNumElements() >= 8 && 7853 SubVecVT.getVectorMinNumElements() >= 8) { 7854 assert(OrigIdx % 8 == 0 && "Invalid index"); 7855 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 7856 SubVecVT.getVectorMinNumElements() % 8 == 0 && 7857 "Unexpected mask vector lowering"); 7858 OrigIdx /= 8; 7859 SubVecVT = 7860 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 7861 SubVecVT.isScalableVector()); 7862 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 7863 VecVT.isScalableVector()); 7864 Vec = DAG.getBitcast(VecVT, Vec); 7865 SubVec = DAG.getBitcast(SubVecVT, SubVec); 7866 } else { 7867 // We can't slide this mask vector up indexed by its i1 elements. 7868 // This poses a problem when we wish to insert a scalable vector which 7869 // can't be re-expressed as a larger type. Just choose the slow path and 7870 // extend to a larger type, then truncate back down. 7871 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 7872 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 7873 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 7874 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); 7875 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, 7876 Op.getOperand(2)); 7877 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); 7878 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); 7879 } 7880 } 7881 7882 // If the subvector vector is a fixed-length type, we cannot use subregister 7883 // manipulation to simplify the codegen; we don't know which register of a 7884 // LMUL group contains the specific subvector as we only know the minimum 7885 // register size. Therefore we must slide the vector group up the full 7886 // amount. 7887 if (SubVecVT.isFixedLengthVector()) { 7888 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector()) 7889 return Op; 7890 MVT ContainerVT = VecVT; 7891 if (VecVT.isFixedLengthVector()) { 7892 ContainerVT = getContainerForFixedLengthVector(VecVT); 7893 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 7894 } 7895 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 7896 DAG.getUNDEF(ContainerVT), SubVec, 7897 DAG.getConstant(0, DL, XLenVT)); 7898 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) { 7899 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); 7900 return DAG.getBitcast(Op.getValueType(), SubVec); 7901 } 7902 SDValue Mask = 7903 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 7904 // Set the vector length to only the number of elements we care about. Note 7905 // that for slideup this includes the offset. 7906 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements(); 7907 SDValue VL = getVLOp(EndIndex, DL, DAG, Subtarget); 7908 7909 // Use tail agnostic policy if we're inserting over Vec's tail. 7910 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; 7911 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements()) 7912 Policy = RISCVII::TAIL_AGNOSTIC; 7913 7914 // If we're inserting into the lowest elements, use a tail undisturbed 7915 // vmv.v.v. 7916 if (OrigIdx == 0) { 7917 SubVec = 7918 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL); 7919 } else { 7920 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 7921 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec, 7922 SlideupAmt, Mask, VL, Policy); 7923 } 7924 7925 if (VecVT.isFixedLengthVector()) 7926 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); 7927 return DAG.getBitcast(Op.getValueType(), SubVec); 7928 } 7929 7930 unsigned SubRegIdx, RemIdx; 7931 std::tie(SubRegIdx, RemIdx) = 7932 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 7933 VecVT, SubVecVT, OrigIdx, TRI); 7934 7935 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); 7936 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 7937 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 7938 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 7939 7940 // 1. If the Idx has been completely eliminated and this subvector's size is 7941 // a vector register or a multiple thereof, or the surrounding elements are 7942 // undef, then this is a subvector insert which naturally aligns to a vector 7943 // register. These can easily be handled using subregister manipulation. 7944 // 2. If the subvector is smaller than a vector register, then the insertion 7945 // must preserve the undisturbed elements of the register. We do this by 7946 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 7947 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 7948 // subvector within the vector register, and an INSERT_SUBVECTOR of that 7949 // LMUL=1 type back into the larger vector (resolving to another subregister 7950 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 7951 // to avoid allocating a large register group to hold our subvector. 7952 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) 7953 return Op; 7954 7955 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 7956 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 7957 // (in our case undisturbed). This means we can set up a subvector insertion 7958 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 7959 // size of the subvector. 7960 MVT InterSubVT = VecVT; 7961 SDValue AlignedExtract = Vec; 7962 unsigned AlignedIdx = OrigIdx - RemIdx; 7963 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 7964 InterSubVT = getLMUL1VT(VecVT); 7965 // Extract a subvector equal to the nearest full vector register type. This 7966 // should resolve to a EXTRACT_SUBREG instruction. 7967 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 7968 DAG.getConstant(AlignedIdx, DL, XLenVT)); 7969 } 7970 7971 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 7972 DAG.getUNDEF(InterSubVT), SubVec, 7973 DAG.getConstant(0, DL, XLenVT)); 7974 7975 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 7976 7977 VL = computeVLMax(SubVecVT, DL, DAG); 7978 7979 // If we're inserting into the lowest elements, use a tail undisturbed 7980 // vmv.v.v. 7981 if (RemIdx == 0) { 7982 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract, 7983 SubVec, VL); 7984 } else { 7985 SDValue SlideupAmt = 7986 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx)); 7987 7988 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 7989 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 7990 7991 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec, 7992 SlideupAmt, Mask, VL); 7993 } 7994 7995 // If required, insert this subvector back into the correct vector register. 7996 // This should resolve to an INSERT_SUBREG instruction. 7997 if (VecVT.bitsGT(InterSubVT)) 7998 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec, 7999 DAG.getConstant(AlignedIdx, DL, XLenVT)); 8000 8001 // We might have bitcast from a mask type: cast back to the original type if 8002 // required. 8003 return DAG.getBitcast(Op.getSimpleValueType(), SubVec); 8004 } 8005 8006 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 8007 SelectionDAG &DAG) const { 8008 SDValue Vec = Op.getOperand(0); 8009 MVT SubVecVT = Op.getSimpleValueType(); 8010 MVT VecVT = Vec.getSimpleValueType(); 8011 8012 SDLoc DL(Op); 8013 MVT XLenVT = Subtarget.getXLenVT(); 8014 unsigned OrigIdx = Op.getConstantOperandVal(1); 8015 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 8016 8017 // We don't have the ability to slide mask vectors down indexed by their i1 8018 // elements; the smallest we can do is i8. Often we are able to bitcast to 8019 // equivalent i8 vectors. Note that when extracting a fixed-length vector 8020 // from a scalable one, we might not necessarily have enough scalable 8021 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 8022 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { 8023 if (VecVT.getVectorMinNumElements() >= 8 && 8024 SubVecVT.getVectorMinNumElements() >= 8) { 8025 assert(OrigIdx % 8 == 0 && "Invalid index"); 8026 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 8027 SubVecVT.getVectorMinNumElements() % 8 == 0 && 8028 "Unexpected mask vector lowering"); 8029 OrigIdx /= 8; 8030 SubVecVT = 8031 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 8032 SubVecVT.isScalableVector()); 8033 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 8034 VecVT.isScalableVector()); 8035 Vec = DAG.getBitcast(VecVT, Vec); 8036 } else { 8037 // We can't slide this mask vector down, indexed by its i1 elements. 8038 // This poses a problem when we wish to extract a scalable vector which 8039 // can't be re-expressed as a larger type. Just choose the slow path and 8040 // extend to a larger type, then truncate back down. 8041 // TODO: We could probably improve this when extracting certain fixed 8042 // from fixed, where we can extract as i8 and shift the correct element 8043 // right to reach the desired subvector? 8044 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 8045 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 8046 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 8047 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, 8048 Op.getOperand(1)); 8049 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); 8050 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); 8051 } 8052 } 8053 8054 // If the subvector vector is a fixed-length type, we cannot use subregister 8055 // manipulation to simplify the codegen; we don't know which register of a 8056 // LMUL group contains the specific subvector as we only know the minimum 8057 // register size. Therefore we must slide the vector group down the full 8058 // amount. 8059 if (SubVecVT.isFixedLengthVector()) { 8060 // With an index of 0 this is a cast-like subvector, which can be performed 8061 // with subregister operations. 8062 if (OrigIdx == 0) 8063 return Op; 8064 MVT ContainerVT = VecVT; 8065 if (VecVT.isFixedLengthVector()) { 8066 ContainerVT = getContainerForFixedLengthVector(VecVT); 8067 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 8068 } 8069 SDValue Mask = 8070 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 8071 // Set the vector length to only the number of elements we care about. This 8072 // avoids sliding down elements we're going to discard straight away. 8073 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), DL, DAG, Subtarget); 8074 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 8075 SDValue Slidedown = 8076 getVSlidedown(DAG, Subtarget, DL, ContainerVT, 8077 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); 8078 // Now we can use a cast-like subvector extract to get the result. 8079 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 8080 DAG.getConstant(0, DL, XLenVT)); 8081 return DAG.getBitcast(Op.getValueType(), Slidedown); 8082 } 8083 8084 unsigned SubRegIdx, RemIdx; 8085 std::tie(SubRegIdx, RemIdx) = 8086 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 8087 VecVT, SubVecVT, OrigIdx, TRI); 8088 8089 // If the Idx has been completely eliminated then this is a subvector extract 8090 // which naturally aligns to a vector register. These can easily be handled 8091 // using subregister manipulation. 8092 if (RemIdx == 0) 8093 return Op; 8094 8095 // Else we must shift our vector register directly to extract the subvector. 8096 // Do this using VSLIDEDOWN. 8097 8098 // If the vector type is an LMUL-group type, extract a subvector equal to the 8099 // nearest full vector register type. This should resolve to a EXTRACT_SUBREG 8100 // instruction. 8101 MVT InterSubVT = VecVT; 8102 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 8103 InterSubVT = getLMUL1VT(VecVT); 8104 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 8105 DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT)); 8106 } 8107 8108 // Slide this vector register down by the desired number of elements in order 8109 // to place the desired subvector starting at element 0. 8110 SDValue SlidedownAmt = 8111 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx)); 8112 8113 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 8114 SDValue Slidedown = 8115 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT), 8116 Vec, SlidedownAmt, Mask, VL); 8117 8118 // Now the vector is in the right position, extract our final subvector. This 8119 // should resolve to a COPY. 8120 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 8121 DAG.getConstant(0, DL, XLenVT)); 8122 8123 // We might have bitcast from a mask type: cast back to the original type if 8124 // required. 8125 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); 8126 } 8127 8128 // Widen a vector's operands to i8, then truncate its results back to the 8129 // original type, typically i1. All operand and result types must be the same. 8130 static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, 8131 SelectionDAG &DAG) { 8132 MVT VT = N.getSimpleValueType(); 8133 MVT WideVT = VT.changeVectorElementType(MVT::i8); 8134 SmallVector<SDValue, 4> WideOps; 8135 for (SDValue Op : N->ops()) { 8136 assert(Op.getSimpleValueType() == VT && 8137 "Operands and result must be same type"); 8138 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op)); 8139 } 8140 8141 unsigned NumVals = N->getNumValues(); 8142 8143 SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>( 8144 NumVals, N.getValueType().changeVectorElementType(MVT::i8))); 8145 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps); 8146 SmallVector<SDValue, 4> TruncVals; 8147 for (unsigned I = 0; I < NumVals; I++) { 8148 TruncVals.push_back( 8149 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I), 8150 DAG.getConstant(0, DL, WideVT), ISD::SETNE)); 8151 } 8152 8153 if (TruncVals.size() > 1) 8154 return DAG.getMergeValues(TruncVals, DL); 8155 return TruncVals.front(); 8156 } 8157 8158 SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, 8159 SelectionDAG &DAG) const { 8160 SDLoc DL(Op); 8161 MVT VecVT = Op.getSimpleValueType(); 8162 MVT XLenVT = Subtarget.getXLenVT(); 8163 8164 assert(VecVT.isScalableVector() && 8165 "vector_interleave on non-scalable vector!"); 8166 8167 // 1 bit element vectors need to be widened to e8 8168 if (VecVT.getVectorElementType() == MVT::i1) 8169 return widenVectorOpsToi8(Op, DL, DAG); 8170 8171 // If the VT is LMUL=8, we need to split and reassemble. 8172 if (VecVT.getSizeInBits().getKnownMinValue() == 8173 (8 * RISCV::RVVBitsPerBlock)) { 8174 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); 8175 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1); 8176 EVT SplitVT = Op0Lo.getValueType(); 8177 8178 SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, 8179 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi); 8180 SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, 8181 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi); 8182 8183 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, 8184 ResLo.getValue(0), ResHi.getValue(0)); 8185 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1), 8186 ResHi.getValue(1)); 8187 return DAG.getMergeValues({Even, Odd}, DL); 8188 } 8189 8190 // Concatenate the two vectors as one vector to deinterleave 8191 MVT ConcatVT = 8192 MVT::getVectorVT(VecVT.getVectorElementType(), 8193 VecVT.getVectorElementCount().multiplyCoefficientBy(2)); 8194 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, 8195 Op.getOperand(0), Op.getOperand(1)); 8196 8197 // We want to operate on all lanes, so get the mask and VL and mask for it 8198 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget); 8199 SDValue Passthru = DAG.getUNDEF(ConcatVT); 8200 8201 // We can deinterleave through vnsrl.wi if the element type is smaller than 8202 // ELEN 8203 if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) { 8204 SDValue Even = 8205 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG); 8206 SDValue Odd = 8207 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG); 8208 return DAG.getMergeValues({Even, Odd}, DL); 8209 } 8210 8211 // For the indices, use the same SEW to avoid an extra vsetvli 8212 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger(); 8213 // Create a vector of even indices {0, 2, 4, ...} 8214 SDValue EvenIdx = 8215 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2)); 8216 // Create a vector of odd indices {1, 3, 5, ... } 8217 SDValue OddIdx = 8218 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT)); 8219 8220 // Gather the even and odd elements into two separate vectors 8221 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT, 8222 Concat, EvenIdx, Passthru, Mask, VL); 8223 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT, 8224 Concat, OddIdx, Passthru, Mask, VL); 8225 8226 // Extract the result half of the gather for even and odd 8227 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide, 8228 DAG.getConstant(0, DL, XLenVT)); 8229 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide, 8230 DAG.getConstant(0, DL, XLenVT)); 8231 8232 return DAG.getMergeValues({Even, Odd}, DL); 8233 } 8234 8235 SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op, 8236 SelectionDAG &DAG) const { 8237 SDLoc DL(Op); 8238 MVT VecVT = Op.getSimpleValueType(); 8239 8240 assert(VecVT.isScalableVector() && 8241 "vector_interleave on non-scalable vector!"); 8242 8243 // i1 vectors need to be widened to i8 8244 if (VecVT.getVectorElementType() == MVT::i1) 8245 return widenVectorOpsToi8(Op, DL, DAG); 8246 8247 MVT XLenVT = Subtarget.getXLenVT(); 8248 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT); 8249 8250 // If the VT is LMUL=8, we need to split and reassemble. 8251 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) { 8252 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); 8253 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1); 8254 EVT SplitVT = Op0Lo.getValueType(); 8255 8256 SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, 8257 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo); 8258 SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, 8259 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi); 8260 8261 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, 8262 ResLo.getValue(0), ResLo.getValue(1)); 8263 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, 8264 ResHi.getValue(0), ResHi.getValue(1)); 8265 return DAG.getMergeValues({Lo, Hi}, DL); 8266 } 8267 8268 SDValue Interleaved; 8269 8270 // If the element type is smaller than ELEN, then we can interleave with 8271 // vwaddu.vv and vwmaccu.vx 8272 if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) { 8273 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL, 8274 DAG, Subtarget); 8275 } else { 8276 // Otherwise, fallback to using vrgathere16.vv 8277 MVT ConcatVT = 8278 MVT::getVectorVT(VecVT.getVectorElementType(), 8279 VecVT.getVectorElementCount().multiplyCoefficientBy(2)); 8280 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, 8281 Op.getOperand(0), Op.getOperand(1)); 8282 8283 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16); 8284 8285 // 0 1 2 3 4 5 6 7 ... 8286 SDValue StepVec = DAG.getStepVector(DL, IdxVT); 8287 8288 // 1 1 1 1 1 1 1 1 ... 8289 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT)); 8290 8291 // 1 0 1 0 1 0 1 0 ... 8292 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones); 8293 OddMask = DAG.getSetCC( 8294 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask, 8295 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)), 8296 ISD::CondCode::SETNE); 8297 8298 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG)); 8299 8300 // Build up the index vector for interleaving the concatenated vector 8301 // 0 0 1 1 2 2 3 3 ... 8302 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones); 8303 // 0 n 1 n+1 2 n+2 3 n+3 ... 8304 Idx = 8305 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL); 8306 8307 // Then perform the interleave 8308 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ... 8309 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG); 8310 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT, 8311 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL); 8312 } 8313 8314 // Extract the two halves from the interleaved result 8315 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved, 8316 DAG.getVectorIdxConstant(0, DL)); 8317 SDValue Hi = DAG.getNode( 8318 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved, 8319 DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL)); 8320 8321 return DAG.getMergeValues({Lo, Hi}, DL); 8322 } 8323 8324 // Lower step_vector to the vid instruction. Any non-identity step value must 8325 // be accounted for my manual expansion. 8326 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op, 8327 SelectionDAG &DAG) const { 8328 SDLoc DL(Op); 8329 MVT VT = Op.getSimpleValueType(); 8330 assert(VT.isScalableVector() && "Expected scalable vector"); 8331 MVT XLenVT = Subtarget.getXLenVT(); 8332 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); 8333 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 8334 uint64_t StepValImm = Op.getConstantOperandVal(0); 8335 if (StepValImm != 1) { 8336 if (isPowerOf2_64(StepValImm)) { 8337 SDValue StepVal = 8338 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 8339 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL); 8340 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal); 8341 } else { 8342 SDValue StepVal = lowerScalarSplat( 8343 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), 8344 VL, VT, DL, DAG, Subtarget); 8345 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal); 8346 } 8347 } 8348 return StepVec; 8349 } 8350 8351 // Implement vector_reverse using vrgather.vv with indices determined by 8352 // subtracting the id of each element from (VLMAX-1). This will convert 8353 // the indices like so: 8354 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). 8355 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 8356 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, 8357 SelectionDAG &DAG) const { 8358 SDLoc DL(Op); 8359 MVT VecVT = Op.getSimpleValueType(); 8360 if (VecVT.getVectorElementType() == MVT::i1) { 8361 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 8362 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0)); 8363 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1); 8364 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2); 8365 } 8366 unsigned EltSize = VecVT.getScalarSizeInBits(); 8367 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 8368 unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); 8369 unsigned MaxVLMAX = 8370 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); 8371 8372 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 8373 MVT IntVT = VecVT.changeVectorElementTypeToInteger(); 8374 8375 // If this is SEW=8 and VLMAX is potentially more than 256, we need 8376 // to use vrgatherei16.vv. 8377 // TODO: It's also possible to use vrgatherei16.vv for other types to 8378 // decrease register width for the index calculation. 8379 if (MaxVLMAX > 256 && EltSize == 8) { 8380 // If this is LMUL=8, we have to split before can use vrgatherei16.vv. 8381 // Reverse each half, then reassemble them in reverse order. 8382 // NOTE: It's also possible that after splitting that VLMAX no longer 8383 // requires vrgatherei16.vv. 8384 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 8385 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); 8386 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT); 8387 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 8388 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 8389 // Reassemble the low and high pieces reversed. 8390 // FIXME: This is a CONCAT_VECTORS. 8391 SDValue Res = 8392 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, 8393 DAG.getIntPtrConstant(0, DL)); 8394 return DAG.getNode( 8395 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, 8396 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL)); 8397 } 8398 8399 // Just promote the int type to i16 which will double the LMUL. 8400 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); 8401 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 8402 } 8403 8404 MVT XLenVT = Subtarget.getXLenVT(); 8405 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 8406 8407 // Calculate VLMAX-1 for the desired SEW. 8408 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT, 8409 computeVLMax(VecVT, DL, DAG), 8410 DAG.getConstant(1, DL, XLenVT)); 8411 8412 // Splat VLMAX-1 taking care to handle SEW==64 on RV32. 8413 bool IsRV32E64 = 8414 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; 8415 SDValue SplatVL; 8416 if (!IsRV32E64) 8417 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); 8418 else 8419 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT), 8420 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT)); 8421 8422 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); 8423 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, 8424 DAG.getUNDEF(IntVT), Mask, VL); 8425 8426 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, 8427 DAG.getUNDEF(VecVT), Mask, VL); 8428 } 8429 8430 SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op, 8431 SelectionDAG &DAG) const { 8432 SDLoc DL(Op); 8433 SDValue V1 = Op.getOperand(0); 8434 SDValue V2 = Op.getOperand(1); 8435 MVT XLenVT = Subtarget.getXLenVT(); 8436 MVT VecVT = Op.getSimpleValueType(); 8437 8438 SDValue VLMax = computeVLMax(VecVT, DL, DAG); 8439 8440 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue(); 8441 SDValue DownOffset, UpOffset; 8442 if (ImmValue >= 0) { 8443 // The operand is a TargetConstant, we need to rebuild it as a regular 8444 // constant. 8445 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); 8446 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset); 8447 } else { 8448 // The operand is a TargetConstant, we need to rebuild it as a regular 8449 // constant rather than negating the original operand. 8450 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); 8451 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset); 8452 } 8453 8454 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG); 8455 8456 SDValue SlideDown = 8457 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, 8458 DownOffset, TrueMask, UpOffset); 8459 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset, 8460 TrueMask, DAG.getRegister(RISCV::X0, XLenVT), 8461 RISCVII::TAIL_AGNOSTIC); 8462 } 8463 8464 SDValue 8465 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 8466 SelectionDAG &DAG) const { 8467 SDLoc DL(Op); 8468 auto *Load = cast<LoadSDNode>(Op); 8469 8470 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 8471 Load->getMemoryVT(), 8472 *Load->getMemOperand()) && 8473 "Expecting a correctly-aligned load"); 8474 8475 MVT VT = Op.getSimpleValueType(); 8476 MVT XLenVT = Subtarget.getXLenVT(); 8477 MVT ContainerVT = getContainerForFixedLengthVector(VT); 8478 8479 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget); 8480 8481 bool IsMaskOp = VT.getVectorElementType() == MVT::i1; 8482 SDValue IntID = DAG.getTargetConstant( 8483 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT); 8484 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID}; 8485 if (!IsMaskOp) 8486 Ops.push_back(DAG.getUNDEF(ContainerVT)); 8487 Ops.push_back(Load->getBasePtr()); 8488 Ops.push_back(VL); 8489 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 8490 SDValue NewLoad = 8491 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 8492 Load->getMemoryVT(), Load->getMemOperand()); 8493 8494 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 8495 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); 8496 } 8497 8498 SDValue 8499 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 8500 SelectionDAG &DAG) const { 8501 SDLoc DL(Op); 8502 auto *Store = cast<StoreSDNode>(Op); 8503 8504 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 8505 Store->getMemoryVT(), 8506 *Store->getMemOperand()) && 8507 "Expecting a correctly-aligned store"); 8508 8509 SDValue StoreVal = Store->getValue(); 8510 MVT VT = StoreVal.getSimpleValueType(); 8511 MVT XLenVT = Subtarget.getXLenVT(); 8512 8513 // If the size less than a byte, we need to pad with zeros to make a byte. 8514 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) { 8515 VT = MVT::v8i1; 8516 StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, 8517 DAG.getConstant(0, DL, VT), StoreVal, 8518 DAG.getIntPtrConstant(0, DL)); 8519 } 8520 8521 MVT ContainerVT = getContainerForFixedLengthVector(VT); 8522 8523 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget); 8524 8525 SDValue NewValue = 8526 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); 8527 8528 bool IsMaskOp = VT.getVectorElementType() == MVT::i1; 8529 SDValue IntID = DAG.getTargetConstant( 8530 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT); 8531 return DAG.getMemIntrinsicNode( 8532 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), 8533 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL}, 8534 Store->getMemoryVT(), Store->getMemOperand()); 8535 } 8536 8537 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, 8538 SelectionDAG &DAG) const { 8539 SDLoc DL(Op); 8540 MVT VT = Op.getSimpleValueType(); 8541 8542 const auto *MemSD = cast<MemSDNode>(Op); 8543 EVT MemVT = MemSD->getMemoryVT(); 8544 MachineMemOperand *MMO = MemSD->getMemOperand(); 8545 SDValue Chain = MemSD->getChain(); 8546 SDValue BasePtr = MemSD->getBasePtr(); 8547 8548 SDValue Mask, PassThru, VL; 8549 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) { 8550 Mask = VPLoad->getMask(); 8551 PassThru = DAG.getUNDEF(VT); 8552 VL = VPLoad->getVectorLength(); 8553 } else { 8554 const auto *MLoad = cast<MaskedLoadSDNode>(Op); 8555 Mask = MLoad->getMask(); 8556 PassThru = MLoad->getPassThru(); 8557 } 8558 8559 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 8560 8561 MVT XLenVT = Subtarget.getXLenVT(); 8562 8563 MVT ContainerVT = VT; 8564 if (VT.isFixedLengthVector()) { 8565 ContainerVT = getContainerForFixedLengthVector(VT); 8566 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 8567 if (!IsUnmasked) { 8568 MVT MaskVT = getMaskTypeFor(ContainerVT); 8569 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 8570 } 8571 } 8572 8573 if (!VL) 8574 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 8575 8576 unsigned IntID = 8577 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask; 8578 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 8579 if (IsUnmasked) 8580 Ops.push_back(DAG.getUNDEF(ContainerVT)); 8581 else 8582 Ops.push_back(PassThru); 8583 Ops.push_back(BasePtr); 8584 if (!IsUnmasked) 8585 Ops.push_back(Mask); 8586 Ops.push_back(VL); 8587 if (!IsUnmasked) 8588 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); 8589 8590 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 8591 8592 SDValue Result = 8593 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); 8594 Chain = Result.getValue(1); 8595 8596 if (VT.isFixedLengthVector()) 8597 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 8598 8599 return DAG.getMergeValues({Result, Chain}, DL); 8600 } 8601 8602 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, 8603 SelectionDAG &DAG) const { 8604 SDLoc DL(Op); 8605 8606 const auto *MemSD = cast<MemSDNode>(Op); 8607 EVT MemVT = MemSD->getMemoryVT(); 8608 MachineMemOperand *MMO = MemSD->getMemOperand(); 8609 SDValue Chain = MemSD->getChain(); 8610 SDValue BasePtr = MemSD->getBasePtr(); 8611 SDValue Val, Mask, VL; 8612 8613 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) { 8614 Val = VPStore->getValue(); 8615 Mask = VPStore->getMask(); 8616 VL = VPStore->getVectorLength(); 8617 } else { 8618 const auto *MStore = cast<MaskedStoreSDNode>(Op); 8619 Val = MStore->getValue(); 8620 Mask = MStore->getMask(); 8621 } 8622 8623 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 8624 8625 MVT VT = Val.getSimpleValueType(); 8626 MVT XLenVT = Subtarget.getXLenVT(); 8627 8628 MVT ContainerVT = VT; 8629 if (VT.isFixedLengthVector()) { 8630 ContainerVT = getContainerForFixedLengthVector(VT); 8631 8632 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 8633 if (!IsUnmasked) { 8634 MVT MaskVT = getMaskTypeFor(ContainerVT); 8635 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 8636 } 8637 } 8638 8639 if (!VL) 8640 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 8641 8642 unsigned IntID = 8643 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask; 8644 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 8645 Ops.push_back(Val); 8646 Ops.push_back(BasePtr); 8647 if (!IsUnmasked) 8648 Ops.push_back(Mask); 8649 Ops.push_back(VL); 8650 8651 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, 8652 DAG.getVTList(MVT::Other), Ops, MemVT, MMO); 8653 } 8654 8655 SDValue 8656 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 8657 SelectionDAG &DAG) const { 8658 MVT InVT = Op.getOperand(0).getSimpleValueType(); 8659 MVT ContainerVT = getContainerForFixedLengthVector(InVT); 8660 8661 MVT VT = Op.getSimpleValueType(); 8662 8663 SDValue Op1 = 8664 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 8665 SDValue Op2 = 8666 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 8667 8668 SDLoc DL(Op); 8669 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL, 8670 DAG, Subtarget); 8671 MVT MaskVT = getMaskTypeFor(ContainerVT); 8672 8673 SDValue Cmp = 8674 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, 8675 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL}); 8676 8677 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 8678 } 8679 8680 SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op, 8681 SelectionDAG &DAG) const { 8682 unsigned Opc = Op.getOpcode(); 8683 SDLoc DL(Op); 8684 SDValue Chain = Op.getOperand(0); 8685 SDValue Op1 = Op.getOperand(1); 8686 SDValue Op2 = Op.getOperand(2); 8687 SDValue CC = Op.getOperand(3); 8688 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 8689 MVT VT = Op.getSimpleValueType(); 8690 MVT InVT = Op1.getSimpleValueType(); 8691 8692 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE 8693 // condition code. 8694 if (Opc == ISD::STRICT_FSETCCS) { 8695 // Expand strict_fsetccs(x, oeq) to 8696 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole)) 8697 SDVTList VTList = Op->getVTList(); 8698 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) { 8699 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE); 8700 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1, 8701 Op2, OLECCVal); 8702 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2, 8703 Op1, OLECCVal); 8704 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 8705 Tmp1.getValue(1), Tmp2.getValue(1)); 8706 // Tmp1 and Tmp2 might be the same node. 8707 if (Tmp1 != Tmp2) 8708 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2); 8709 return DAG.getMergeValues({Tmp1, OutChain}, DL); 8710 } 8711 8712 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq)) 8713 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) { 8714 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ); 8715 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1, 8716 Op2, OEQCCVal); 8717 SDValue Res = DAG.getNOT(DL, OEQ, VT); 8718 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL); 8719 } 8720 } 8721 8722 MVT ContainerInVT = InVT; 8723 if (InVT.isFixedLengthVector()) { 8724 ContainerInVT = getContainerForFixedLengthVector(InVT); 8725 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget); 8726 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget); 8727 } 8728 MVT MaskVT = getMaskTypeFor(ContainerInVT); 8729 8730 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget); 8731 8732 SDValue Res; 8733 if (Opc == ISD::STRICT_FSETCC && 8734 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE || 8735 CCVal == ISD::SETOLE)) { 8736 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only 8737 // active when both input elements are ordered. 8738 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG); 8739 SDValue OrderMask1 = DAG.getNode( 8740 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other), 8741 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT), 8742 True, VL}); 8743 SDValue OrderMask2 = DAG.getNode( 8744 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other), 8745 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT), 8746 True, VL}); 8747 Mask = 8748 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL); 8749 // Use Mask as the merge operand to let the result be 0 if either of the 8750 // inputs is unordered. 8751 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL, 8752 DAG.getVTList(MaskVT, MVT::Other), 8753 {Chain, Op1, Op2, CC, Mask, Mask, VL}); 8754 } else { 8755 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL 8756 : RISCVISD::STRICT_FSETCCS_VL; 8757 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other), 8758 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL}); 8759 } 8760 8761 if (VT.isFixedLengthVector()) { 8762 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget); 8763 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL); 8764 } 8765 return Res; 8766 } 8767 8768 // Lower vector ABS to smax(X, sub(0, X)). 8769 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { 8770 SDLoc DL(Op); 8771 MVT VT = Op.getSimpleValueType(); 8772 SDValue X = Op.getOperand(0); 8773 8774 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) && 8775 "Unexpected type for ISD::ABS"); 8776 8777 MVT ContainerVT = VT; 8778 if (VT.isFixedLengthVector()) { 8779 ContainerVT = getContainerForFixedLengthVector(VT); 8780 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 8781 } 8782 8783 SDValue Mask, VL; 8784 if (Op->getOpcode() == ISD::VP_ABS) { 8785 Mask = Op->getOperand(1); 8786 if (VT.isFixedLengthVector()) 8787 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 8788 Subtarget); 8789 VL = Op->getOperand(2); 8790 } else 8791 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 8792 8793 SDValue SplatZero = DAG.getNode( 8794 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 8795 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL); 8796 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, 8797 DAG.getUNDEF(ContainerVT), Mask, VL); 8798 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, 8799 DAG.getUNDEF(ContainerVT), Mask, VL); 8800 8801 if (VT.isFixedLengthVector()) 8802 Max = convertFromScalableVector(VT, Max, DAG, Subtarget); 8803 return Max; 8804 } 8805 8806 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( 8807 SDValue Op, SelectionDAG &DAG) const { 8808 SDLoc DL(Op); 8809 MVT VT = Op.getSimpleValueType(); 8810 SDValue Mag = Op.getOperand(0); 8811 SDValue Sign = Op.getOperand(1); 8812 assert(Mag.getValueType() == Sign.getValueType() && 8813 "Can only handle COPYSIGN with matching types."); 8814 8815 MVT ContainerVT = getContainerForFixedLengthVector(VT); 8816 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget); 8817 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget); 8818 8819 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 8820 8821 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, 8822 Sign, DAG.getUNDEF(ContainerVT), Mask, VL); 8823 8824 return convertFromScalableVector(VT, CopySign, DAG, Subtarget); 8825 } 8826 8827 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 8828 SDValue Op, SelectionDAG &DAG) const { 8829 MVT VT = Op.getSimpleValueType(); 8830 MVT ContainerVT = getContainerForFixedLengthVector(VT); 8831 8832 MVT I1ContainerVT = 8833 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 8834 8835 SDValue CC = 8836 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 8837 SDValue Op1 = 8838 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 8839 SDValue Op2 = 8840 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 8841 8842 SDLoc DL(Op); 8843 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 8844 8845 SDValue Select = 8846 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL); 8847 8848 return convertFromScalableVector(VT, Select, DAG, Subtarget); 8849 } 8850 8851 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, 8852 SelectionDAG &DAG) const { 8853 unsigned NewOpc = getRISCVVLOp(Op); 8854 bool HasMergeOp = hasMergeOp(NewOpc); 8855 bool HasMask = hasMaskOp(NewOpc); 8856 8857 MVT VT = Op.getSimpleValueType(); 8858 MVT ContainerVT = getContainerForFixedLengthVector(VT); 8859 8860 // Create list of operands by converting existing ones to scalable types. 8861 SmallVector<SDValue, 6> Ops; 8862 for (const SDValue &V : Op->op_values()) { 8863 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 8864 8865 // Pass through non-vector operands. 8866 if (!V.getValueType().isVector()) { 8867 Ops.push_back(V); 8868 continue; 8869 } 8870 8871 // "cast" fixed length vector to a scalable vector. 8872 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 8873 "Only fixed length vectors are supported!"); 8874 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 8875 } 8876 8877 SDLoc DL(Op); 8878 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 8879 if (HasMergeOp) 8880 Ops.push_back(DAG.getUNDEF(ContainerVT)); 8881 if (HasMask) 8882 Ops.push_back(Mask); 8883 Ops.push_back(VL); 8884 8885 // StrictFP operations have two result values. Their lowered result should 8886 // have same result count. 8887 if (Op->isStrictFPOpcode()) { 8888 SDValue ScalableRes = 8889 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops, 8890 Op->getFlags()); 8891 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 8892 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL); 8893 } 8894 8895 SDValue ScalableRes = 8896 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags()); 8897 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 8898 } 8899 8900 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node: 8901 // * Operands of each node are assumed to be in the same order. 8902 // * The EVL operand is promoted from i32 to i64 on RV64. 8903 // * Fixed-length vectors are converted to their scalable-vector container 8904 // types. 8905 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG, 8906 unsigned RISCVISDOpc, 8907 bool HasMergeOp) const { 8908 SDLoc DL(Op); 8909 MVT VT = Op.getSimpleValueType(); 8910 SmallVector<SDValue, 4> Ops; 8911 8912 MVT ContainerVT = VT; 8913 if (VT.isFixedLengthVector()) 8914 ContainerVT = getContainerForFixedLengthVector(VT); 8915 8916 for (const auto &OpIdx : enumerate(Op->ops())) { 8917 SDValue V = OpIdx.value(); 8918 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 8919 // Add dummy merge value before the mask. 8920 if (HasMergeOp && *ISD::getVPMaskIdx(Op.getOpcode()) == OpIdx.index()) 8921 Ops.push_back(DAG.getUNDEF(ContainerVT)); 8922 // Pass through operands which aren't fixed-length vectors. 8923 if (!V.getValueType().isFixedLengthVector()) { 8924 Ops.push_back(V); 8925 continue; 8926 } 8927 // "cast" fixed length vector to a scalable vector. 8928 MVT OpVT = V.getSimpleValueType(); 8929 MVT ContainerVT = getContainerForFixedLengthVector(OpVT); 8930 assert(useRVVForFixedLengthVectorVT(OpVT) && 8931 "Only fixed length vectors are supported!"); 8932 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 8933 } 8934 8935 if (!VT.isFixedLengthVector()) 8936 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags()); 8937 8938 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags()); 8939 8940 return convertFromScalableVector(VT, VPOp, DAG, Subtarget); 8941 } 8942 8943 SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op, 8944 SelectionDAG &DAG) const { 8945 SDLoc DL(Op); 8946 MVT VT = Op.getSimpleValueType(); 8947 8948 SDValue Src = Op.getOperand(0); 8949 // NOTE: Mask is dropped. 8950 SDValue VL = Op.getOperand(2); 8951 8952 MVT ContainerVT = VT; 8953 if (VT.isFixedLengthVector()) { 8954 ContainerVT = getContainerForFixedLengthVector(VT); 8955 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 8956 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); 8957 } 8958 8959 MVT XLenVT = Subtarget.getXLenVT(); 8960 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 8961 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 8962 DAG.getUNDEF(ContainerVT), Zero, VL); 8963 8964 SDValue SplatValue = DAG.getConstant( 8965 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT); 8966 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 8967 DAG.getUNDEF(ContainerVT), SplatValue, VL); 8968 8969 SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src, 8970 Splat, ZeroSplat, VL); 8971 if (!VT.isFixedLengthVector()) 8972 return Result; 8973 return convertFromScalableVector(VT, Result, DAG, Subtarget); 8974 } 8975 8976 SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op, 8977 SelectionDAG &DAG) const { 8978 SDLoc DL(Op); 8979 MVT VT = Op.getSimpleValueType(); 8980 8981 SDValue Op1 = Op.getOperand(0); 8982 SDValue Op2 = Op.getOperand(1); 8983 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 8984 // NOTE: Mask is dropped. 8985 SDValue VL = Op.getOperand(4); 8986 8987 MVT ContainerVT = VT; 8988 if (VT.isFixedLengthVector()) { 8989 ContainerVT = getContainerForFixedLengthVector(VT); 8990 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 8991 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); 8992 } 8993 8994 SDValue Result; 8995 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 8996 8997 switch (Condition) { 8998 default: 8999 break; 9000 // X != Y --> (X^Y) 9001 case ISD::SETNE: 9002 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); 9003 break; 9004 // X == Y --> ~(X^Y) 9005 case ISD::SETEQ: { 9006 SDValue Temp = 9007 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); 9008 Result = 9009 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL); 9010 break; 9011 } 9012 // X >s Y --> X == 0 & Y == 1 --> ~X & Y 9013 // X <u Y --> X == 0 & Y == 1 --> ~X & Y 9014 case ISD::SETGT: 9015 case ISD::SETULT: { 9016 SDValue Temp = 9017 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); 9018 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL); 9019 break; 9020 } 9021 // X <s Y --> X == 1 & Y == 0 --> ~Y & X 9022 // X >u Y --> X == 1 & Y == 0 --> ~Y & X 9023 case ISD::SETLT: 9024 case ISD::SETUGT: { 9025 SDValue Temp = 9026 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); 9027 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL); 9028 break; 9029 } 9030 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y 9031 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y 9032 case ISD::SETGE: 9033 case ISD::SETULE: { 9034 SDValue Temp = 9035 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); 9036 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL); 9037 break; 9038 } 9039 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X 9040 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X 9041 case ISD::SETLE: 9042 case ISD::SETUGE: { 9043 SDValue Temp = 9044 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); 9045 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL); 9046 break; 9047 } 9048 } 9049 9050 if (!VT.isFixedLengthVector()) 9051 return Result; 9052 return convertFromScalableVector(VT, Result, DAG, Subtarget); 9053 } 9054 9055 // Lower Floating-Point/Integer Type-Convert VP SDNodes 9056 SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG, 9057 unsigned RISCVISDOpc) const { 9058 SDLoc DL(Op); 9059 9060 SDValue Src = Op.getOperand(0); 9061 SDValue Mask = Op.getOperand(1); 9062 SDValue VL = Op.getOperand(2); 9063 9064 MVT DstVT = Op.getSimpleValueType(); 9065 MVT SrcVT = Src.getSimpleValueType(); 9066 if (DstVT.isFixedLengthVector()) { 9067 DstVT = getContainerForFixedLengthVector(DstVT); 9068 SrcVT = getContainerForFixedLengthVector(SrcVT); 9069 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); 9070 MVT MaskVT = getMaskTypeFor(DstVT); 9071 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 9072 } 9073 9074 unsigned DstEltSize = DstVT.getScalarSizeInBits(); 9075 unsigned SrcEltSize = SrcVT.getScalarSizeInBits(); 9076 9077 SDValue Result; 9078 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion. 9079 if (SrcVT.isInteger()) { 9080 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); 9081 9082 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL 9083 ? RISCVISD::VSEXT_VL 9084 : RISCVISD::VZEXT_VL; 9085 9086 // Do we need to do any pre-widening before converting? 9087 if (SrcEltSize == 1) { 9088 MVT IntVT = DstVT.changeVectorElementTypeToInteger(); 9089 MVT XLenVT = Subtarget.getXLenVT(); 9090 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 9091 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, 9092 DAG.getUNDEF(IntVT), Zero, VL); 9093 SDValue One = DAG.getConstant( 9094 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT); 9095 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, 9096 DAG.getUNDEF(IntVT), One, VL); 9097 Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat, 9098 ZeroSplat, VL); 9099 } else if (DstEltSize > (2 * SrcEltSize)) { 9100 // Widen before converting. 9101 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2), 9102 DstVT.getVectorElementCount()); 9103 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL); 9104 } 9105 9106 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); 9107 } else { 9108 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && 9109 "Wrong input/output vector types"); 9110 9111 // Convert f16 to f32 then convert f32 to i64. 9112 if (DstEltSize > (2 * SrcEltSize)) { 9113 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); 9114 MVT InterimFVT = 9115 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); 9116 Src = 9117 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL); 9118 } 9119 9120 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); 9121 } 9122 } else { // Narrowing + Conversion 9123 if (SrcVT.isInteger()) { 9124 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); 9125 // First do a narrowing convert to an FP type half the size, then round 9126 // the FP type to a small FP type if needed. 9127 9128 MVT InterimFVT = DstVT; 9129 if (SrcEltSize > (2 * DstEltSize)) { 9130 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!"); 9131 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); 9132 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); 9133 } 9134 9135 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL); 9136 9137 if (InterimFVT != DstVT) { 9138 Src = Result; 9139 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL); 9140 } 9141 } else { 9142 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && 9143 "Wrong input/output vector types"); 9144 // First do a narrowing conversion to an integer half the size, then 9145 // truncate if needed. 9146 9147 if (DstEltSize == 1) { 9148 // First convert to the same size integer, then convert to mask using 9149 // setcc. 9150 assert(SrcEltSize >= 16 && "Unexpected FP type!"); 9151 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize), 9152 DstVT.getVectorElementCount()); 9153 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); 9154 9155 // Compare the integer result to 0. The integer should be 0 or 1/-1, 9156 // otherwise the conversion was undefined. 9157 MVT XLenVT = Subtarget.getXLenVT(); 9158 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 9159 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT, 9160 DAG.getUNDEF(InterimIVT), SplatZero, VL); 9161 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, 9162 {Result, SplatZero, DAG.getCondCode(ISD::SETNE), 9163 DAG.getUNDEF(DstVT), Mask, VL}); 9164 } else { 9165 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 9166 DstVT.getVectorElementCount()); 9167 9168 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); 9169 9170 while (InterimIVT != DstVT) { 9171 SrcEltSize /= 2; 9172 Src = Result; 9173 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 9174 DstVT.getVectorElementCount()); 9175 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT, 9176 Src, Mask, VL); 9177 } 9178 } 9179 } 9180 } 9181 9182 MVT VT = Op.getSimpleValueType(); 9183 if (!VT.isFixedLengthVector()) 9184 return Result; 9185 return convertFromScalableVector(VT, Result, DAG, Subtarget); 9186 } 9187 9188 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, 9189 unsigned MaskOpc, 9190 unsigned VecOpc) const { 9191 MVT VT = Op.getSimpleValueType(); 9192 if (VT.getVectorElementType() != MVT::i1) 9193 return lowerVPOp(Op, DAG, VecOpc, true); 9194 9195 // It is safe to drop mask parameter as masked-off elements are undef. 9196 SDValue Op1 = Op->getOperand(0); 9197 SDValue Op2 = Op->getOperand(1); 9198 SDValue VL = Op->getOperand(3); 9199 9200 MVT ContainerVT = VT; 9201 const bool IsFixed = VT.isFixedLengthVector(); 9202 if (IsFixed) { 9203 ContainerVT = getContainerForFixedLengthVector(VT); 9204 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 9205 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); 9206 } 9207 9208 SDLoc DL(Op); 9209 SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL); 9210 if (!IsFixed) 9211 return Val; 9212 return convertFromScalableVector(VT, Val, DAG, Subtarget); 9213 } 9214 9215 SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op, 9216 SelectionDAG &DAG) const { 9217 SDLoc DL(Op); 9218 MVT XLenVT = Subtarget.getXLenVT(); 9219 MVT VT = Op.getSimpleValueType(); 9220 MVT ContainerVT = VT; 9221 if (VT.isFixedLengthVector()) 9222 ContainerVT = getContainerForFixedLengthVector(VT); 9223 9224 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 9225 9226 auto *VPNode = cast<VPStridedLoadSDNode>(Op); 9227 // Check if the mask is known to be all ones 9228 SDValue Mask = VPNode->getMask(); 9229 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 9230 9231 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse 9232 : Intrinsic::riscv_vlse_mask, 9233 DL, XLenVT); 9234 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, 9235 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(), 9236 VPNode->getStride()}; 9237 if (!IsUnmasked) { 9238 if (VT.isFixedLengthVector()) { 9239 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); 9240 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 9241 } 9242 Ops.push_back(Mask); 9243 } 9244 Ops.push_back(VPNode->getVectorLength()); 9245 if (!IsUnmasked) { 9246 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 9247 Ops.push_back(Policy); 9248 } 9249 9250 SDValue Result = 9251 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 9252 VPNode->getMemoryVT(), VPNode->getMemOperand()); 9253 SDValue Chain = Result.getValue(1); 9254 9255 if (VT.isFixedLengthVector()) 9256 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 9257 9258 return DAG.getMergeValues({Result, Chain}, DL); 9259 } 9260 9261 SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op, 9262 SelectionDAG &DAG) const { 9263 SDLoc DL(Op); 9264 MVT XLenVT = Subtarget.getXLenVT(); 9265 9266 auto *VPNode = cast<VPStridedStoreSDNode>(Op); 9267 SDValue StoreVal = VPNode->getValue(); 9268 MVT VT = StoreVal.getSimpleValueType(); 9269 MVT ContainerVT = VT; 9270 if (VT.isFixedLengthVector()) { 9271 ContainerVT = getContainerForFixedLengthVector(VT); 9272 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); 9273 } 9274 9275 // Check if the mask is known to be all ones 9276 SDValue Mask = VPNode->getMask(); 9277 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 9278 9279 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse 9280 : Intrinsic::riscv_vsse_mask, 9281 DL, XLenVT); 9282 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal, 9283 VPNode->getBasePtr(), VPNode->getStride()}; 9284 if (!IsUnmasked) { 9285 if (VT.isFixedLengthVector()) { 9286 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); 9287 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 9288 } 9289 Ops.push_back(Mask); 9290 } 9291 Ops.push_back(VPNode->getVectorLength()); 9292 9293 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(), 9294 Ops, VPNode->getMemoryVT(), 9295 VPNode->getMemOperand()); 9296 } 9297 9298 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be 9299 // matched to a RVV indexed load. The RVV indexed load instructions only 9300 // support the "unsigned unscaled" addressing mode; indices are implicitly 9301 // zero-extended or truncated to XLEN and are treated as byte offsets. Any 9302 // signed or scaled indexing is extended to the XLEN value type and scaled 9303 // accordingly. 9304 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op, 9305 SelectionDAG &DAG) const { 9306 SDLoc DL(Op); 9307 MVT VT = Op.getSimpleValueType(); 9308 9309 const auto *MemSD = cast<MemSDNode>(Op.getNode()); 9310 EVT MemVT = MemSD->getMemoryVT(); 9311 MachineMemOperand *MMO = MemSD->getMemOperand(); 9312 SDValue Chain = MemSD->getChain(); 9313 SDValue BasePtr = MemSD->getBasePtr(); 9314 9315 ISD::LoadExtType LoadExtType; 9316 SDValue Index, Mask, PassThru, VL; 9317 9318 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) { 9319 Index = VPGN->getIndex(); 9320 Mask = VPGN->getMask(); 9321 PassThru = DAG.getUNDEF(VT); 9322 VL = VPGN->getVectorLength(); 9323 // VP doesn't support extending loads. 9324 LoadExtType = ISD::NON_EXTLOAD; 9325 } else { 9326 // Else it must be a MGATHER. 9327 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode()); 9328 Index = MGN->getIndex(); 9329 Mask = MGN->getMask(); 9330 PassThru = MGN->getPassThru(); 9331 LoadExtType = MGN->getExtensionType(); 9332 } 9333 9334 MVT IndexVT = Index.getSimpleValueType(); 9335 MVT XLenVT = Subtarget.getXLenVT(); 9336 9337 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 9338 "Unexpected VTs!"); 9339 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); 9340 // Targets have to explicitly opt-in for extending vector loads. 9341 assert(LoadExtType == ISD::NON_EXTLOAD && 9342 "Unexpected extending MGATHER/VP_GATHER"); 9343 (void)LoadExtType; 9344 9345 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 9346 // the selection of the masked intrinsics doesn't do this for us. 9347 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 9348 9349 MVT ContainerVT = VT; 9350 if (VT.isFixedLengthVector()) { 9351 ContainerVT = getContainerForFixedLengthVector(VT); 9352 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 9353 ContainerVT.getVectorElementCount()); 9354 9355 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 9356 9357 if (!IsUnmasked) { 9358 MVT MaskVT = getMaskTypeFor(ContainerVT); 9359 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 9360 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 9361 } 9362 } 9363 9364 if (!VL) 9365 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 9366 9367 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { 9368 IndexVT = IndexVT.changeVectorElementType(XLenVT); 9369 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(), 9370 VL); 9371 Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index, 9372 TrueMask, VL); 9373 } 9374 9375 unsigned IntID = 9376 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask; 9377 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 9378 if (IsUnmasked) 9379 Ops.push_back(DAG.getUNDEF(ContainerVT)); 9380 else 9381 Ops.push_back(PassThru); 9382 Ops.push_back(BasePtr); 9383 Ops.push_back(Index); 9384 if (!IsUnmasked) 9385 Ops.push_back(Mask); 9386 Ops.push_back(VL); 9387 if (!IsUnmasked) 9388 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); 9389 9390 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 9391 SDValue Result = 9392 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); 9393 Chain = Result.getValue(1); 9394 9395 if (VT.isFixedLengthVector()) 9396 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 9397 9398 return DAG.getMergeValues({Result, Chain}, DL); 9399 } 9400 9401 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be 9402 // matched to a RVV indexed store. The RVV indexed store instructions only 9403 // support the "unsigned unscaled" addressing mode; indices are implicitly 9404 // zero-extended or truncated to XLEN and are treated as byte offsets. Any 9405 // signed or scaled indexing is extended to the XLEN value type and scaled 9406 // accordingly. 9407 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op, 9408 SelectionDAG &DAG) const { 9409 SDLoc DL(Op); 9410 const auto *MemSD = cast<MemSDNode>(Op.getNode()); 9411 EVT MemVT = MemSD->getMemoryVT(); 9412 MachineMemOperand *MMO = MemSD->getMemOperand(); 9413 SDValue Chain = MemSD->getChain(); 9414 SDValue BasePtr = MemSD->getBasePtr(); 9415 9416 bool IsTruncatingStore = false; 9417 SDValue Index, Mask, Val, VL; 9418 9419 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) { 9420 Index = VPSN->getIndex(); 9421 Mask = VPSN->getMask(); 9422 Val = VPSN->getValue(); 9423 VL = VPSN->getVectorLength(); 9424 // VP doesn't support truncating stores. 9425 IsTruncatingStore = false; 9426 } else { 9427 // Else it must be a MSCATTER. 9428 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode()); 9429 Index = MSN->getIndex(); 9430 Mask = MSN->getMask(); 9431 Val = MSN->getValue(); 9432 IsTruncatingStore = MSN->isTruncatingStore(); 9433 } 9434 9435 MVT VT = Val.getSimpleValueType(); 9436 MVT IndexVT = Index.getSimpleValueType(); 9437 MVT XLenVT = Subtarget.getXLenVT(); 9438 9439 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 9440 "Unexpected VTs!"); 9441 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); 9442 // Targets have to explicitly opt-in for extending vector loads and 9443 // truncating vector stores. 9444 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER"); 9445 (void)IsTruncatingStore; 9446 9447 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 9448 // the selection of the masked intrinsics doesn't do this for us. 9449 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 9450 9451 MVT ContainerVT = VT; 9452 if (VT.isFixedLengthVector()) { 9453 ContainerVT = getContainerForFixedLengthVector(VT); 9454 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 9455 ContainerVT.getVectorElementCount()); 9456 9457 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 9458 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 9459 9460 if (!IsUnmasked) { 9461 MVT MaskVT = getMaskTypeFor(ContainerVT); 9462 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 9463 } 9464 } 9465 9466 if (!VL) 9467 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 9468 9469 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { 9470 IndexVT = IndexVT.changeVectorElementType(XLenVT); 9471 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(), 9472 VL); 9473 Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index, 9474 TrueMask, VL); 9475 } 9476 9477 unsigned IntID = 9478 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask; 9479 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 9480 Ops.push_back(Val); 9481 Ops.push_back(BasePtr); 9482 Ops.push_back(Index); 9483 if (!IsUnmasked) 9484 Ops.push_back(Mask); 9485 Ops.push_back(VL); 9486 9487 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, 9488 DAG.getVTList(MVT::Other), Ops, MemVT, MMO); 9489 } 9490 9491 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op, 9492 SelectionDAG &DAG) const { 9493 const MVT XLenVT = Subtarget.getXLenVT(); 9494 SDLoc DL(Op); 9495 SDValue Chain = Op->getOperand(0); 9496 SDValue SysRegNo = DAG.getTargetConstant( 9497 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); 9498 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other); 9499 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo); 9500 9501 // Encoding used for rounding mode in RISC-V differs from that used in 9502 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a 9503 // table, which consists of a sequence of 4-bit fields, each representing 9504 // corresponding FLT_ROUNDS mode. 9505 static const int Table = 9506 (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) | 9507 (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) | 9508 (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) | 9509 (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) | 9510 (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM); 9511 9512 SDValue Shift = 9513 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT)); 9514 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, 9515 DAG.getConstant(Table, DL, XLenVT), Shift); 9516 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, 9517 DAG.getConstant(7, DL, XLenVT)); 9518 9519 return DAG.getMergeValues({Masked, Chain}, DL); 9520 } 9521 9522 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op, 9523 SelectionDAG &DAG) const { 9524 const MVT XLenVT = Subtarget.getXLenVT(); 9525 SDLoc DL(Op); 9526 SDValue Chain = Op->getOperand(0); 9527 SDValue RMValue = Op->getOperand(1); 9528 SDValue SysRegNo = DAG.getTargetConstant( 9529 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); 9530 9531 // Encoding used for rounding mode in RISC-V differs from that used in 9532 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in 9533 // a table, which consists of a sequence of 4-bit fields, each representing 9534 // corresponding RISC-V mode. 9535 static const unsigned Table = 9536 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) | 9537 (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) | 9538 (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) | 9539 (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) | 9540 (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway)); 9541 9542 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue, 9543 DAG.getConstant(2, DL, XLenVT)); 9544 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, 9545 DAG.getConstant(Table, DL, XLenVT), Shift); 9546 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, 9547 DAG.getConstant(0x7, DL, XLenVT)); 9548 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo, 9549 RMValue); 9550 } 9551 9552 SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op, 9553 SelectionDAG &DAG) const { 9554 MachineFunction &MF = DAG.getMachineFunction(); 9555 9556 bool isRISCV64 = Subtarget.is64Bit(); 9557 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 9558 9559 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false); 9560 return DAG.getFrameIndex(FI, PtrVT); 9561 } 9562 9563 // Returns the opcode of the target-specific SDNode that implements the 32-bit 9564 // form of the given Opcode. 9565 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 9566 switch (Opcode) { 9567 default: 9568 llvm_unreachable("Unexpected opcode"); 9569 case ISD::SHL: 9570 return RISCVISD::SLLW; 9571 case ISD::SRA: 9572 return RISCVISD::SRAW; 9573 case ISD::SRL: 9574 return RISCVISD::SRLW; 9575 case ISD::SDIV: 9576 return RISCVISD::DIVW; 9577 case ISD::UDIV: 9578 return RISCVISD::DIVUW; 9579 case ISD::UREM: 9580 return RISCVISD::REMUW; 9581 case ISD::ROTL: 9582 return RISCVISD::ROLW; 9583 case ISD::ROTR: 9584 return RISCVISD::RORW; 9585 } 9586 } 9587 9588 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG 9589 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would 9590 // otherwise be promoted to i64, making it difficult to select the 9591 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of 9592 // type i8/i16/i32 is lost. 9593 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 9594 unsigned ExtOpc = ISD::ANY_EXTEND) { 9595 SDLoc DL(N); 9596 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 9597 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 9598 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 9599 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 9600 // ReplaceNodeResults requires we maintain the same type for the return value. 9601 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 9602 } 9603 9604 // Converts the given 32-bit operation to a i64 operation with signed extension 9605 // semantic to reduce the signed extension instructions. 9606 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 9607 SDLoc DL(N); 9608 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 9609 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 9610 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 9611 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 9612 DAG.getValueType(MVT::i32)); 9613 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 9614 } 9615 9616 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 9617 SmallVectorImpl<SDValue> &Results, 9618 SelectionDAG &DAG) const { 9619 SDLoc DL(N); 9620 switch (N->getOpcode()) { 9621 default: 9622 llvm_unreachable("Don't know how to custom type legalize this operation!"); 9623 case ISD::STRICT_FP_TO_SINT: 9624 case ISD::STRICT_FP_TO_UINT: 9625 case ISD::FP_TO_SINT: 9626 case ISD::FP_TO_UINT: { 9627 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 9628 "Unexpected custom legalisation"); 9629 bool IsStrict = N->isStrictFPOpcode(); 9630 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || 9631 N->getOpcode() == ISD::STRICT_FP_TO_SINT; 9632 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 9633 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 9634 TargetLowering::TypeSoftenFloat) { 9635 if (!isTypeLegal(Op0.getValueType())) 9636 return; 9637 if (IsStrict) { 9638 SDValue Chain = N->getOperand(0); 9639 // In absense of Zfh, promote f16 to f32, then convert. 9640 if (Op0.getValueType() == MVT::f16 && 9641 !Subtarget.hasStdExtZfhOrZhinx()) { 9642 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other}, 9643 {Chain, Op0}); 9644 Chain = Op0.getValue(1); 9645 } 9646 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64 9647 : RISCVISD::STRICT_FCVT_WU_RV64; 9648 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); 9649 SDValue Res = DAG.getNode( 9650 Opc, DL, VTs, Chain, Op0, 9651 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); 9652 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 9653 Results.push_back(Res.getValue(1)); 9654 return; 9655 } 9656 // In absense of Zfh, promote f16 to f32, then convert. 9657 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) 9658 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); 9659 9660 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 9661 SDValue Res = 9662 DAG.getNode(Opc, DL, MVT::i64, Op0, 9663 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); 9664 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 9665 return; 9666 } 9667 // If the FP type needs to be softened, emit a library call using the 'si' 9668 // version. If we left it to default legalization we'd end up with 'di'. If 9669 // the FP type doesn't need to be softened just let generic type 9670 // legalization promote the result type. 9671 RTLIB::Libcall LC; 9672 if (IsSigned) 9673 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 9674 else 9675 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 9676 MakeLibCallOptions CallOptions; 9677 EVT OpVT = Op0.getValueType(); 9678 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 9679 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 9680 SDValue Result; 9681 std::tie(Result, Chain) = 9682 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 9683 Results.push_back(Result); 9684 if (IsStrict) 9685 Results.push_back(Chain); 9686 break; 9687 } 9688 case ISD::LROUND: { 9689 SDValue Op0 = N->getOperand(0); 9690 EVT Op0VT = Op0.getValueType(); 9691 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 9692 TargetLowering::TypeSoftenFloat) { 9693 if (!isTypeLegal(Op0VT)) 9694 return; 9695 9696 // In absense of Zfh, promote f16 to f32, then convert. 9697 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) 9698 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); 9699 9700 SDValue Res = 9701 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0, 9702 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64)); 9703 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 9704 return; 9705 } 9706 // If the FP type needs to be softened, emit a library call to lround. We'll 9707 // need to truncate the result. We assume any value that doesn't fit in i32 9708 // is allowed to return an unspecified value. 9709 RTLIB::Libcall LC = 9710 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32; 9711 MakeLibCallOptions CallOptions; 9712 EVT OpVT = Op0.getValueType(); 9713 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true); 9714 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first; 9715 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result); 9716 Results.push_back(Result); 9717 break; 9718 } 9719 case ISD::READCYCLECOUNTER: { 9720 assert(!Subtarget.is64Bit() && 9721 "READCYCLECOUNTER only has custom type legalization on riscv32"); 9722 9723 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 9724 SDValue RCW = 9725 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 9726 9727 Results.push_back( 9728 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 9729 Results.push_back(RCW.getValue(2)); 9730 break; 9731 } 9732 case ISD::LOAD: { 9733 if (!ISD::isNON_EXTLoad(N)) 9734 return; 9735 9736 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the 9737 // sext_inreg we emit for ADD/SUB/MUL/SLLI. 9738 LoadSDNode *Ld = cast<LoadSDNode>(N); 9739 9740 SDLoc dl(N); 9741 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(), 9742 Ld->getBasePtr(), Ld->getMemoryVT(), 9743 Ld->getMemOperand()); 9744 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res)); 9745 Results.push_back(Res.getValue(1)); 9746 return; 9747 } 9748 case ISD::MUL: { 9749 unsigned Size = N->getSimpleValueType(0).getSizeInBits(); 9750 unsigned XLen = Subtarget.getXLen(); 9751 // This multiply needs to be expanded, try to use MULHSU+MUL if possible. 9752 if (Size > XLen) { 9753 assert(Size == (XLen * 2) && "Unexpected custom legalisation"); 9754 SDValue LHS = N->getOperand(0); 9755 SDValue RHS = N->getOperand(1); 9756 APInt HighMask = APInt::getHighBitsSet(Size, XLen); 9757 9758 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask); 9759 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask); 9760 // We need exactly one side to be unsigned. 9761 if (LHSIsU == RHSIsU) 9762 return; 9763 9764 auto MakeMULPair = [&](SDValue S, SDValue U) { 9765 MVT XLenVT = Subtarget.getXLenVT(); 9766 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S); 9767 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U); 9768 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U); 9769 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U); 9770 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi); 9771 }; 9772 9773 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen; 9774 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen; 9775 9776 // The other operand should be signed, but still prefer MULH when 9777 // possible. 9778 if (RHSIsU && LHSIsS && !RHSIsS) 9779 Results.push_back(MakeMULPair(LHS, RHS)); 9780 else if (LHSIsU && RHSIsS && !LHSIsS) 9781 Results.push_back(MakeMULPair(RHS, LHS)); 9782 9783 return; 9784 } 9785 [[fallthrough]]; 9786 } 9787 case ISD::ADD: 9788 case ISD::SUB: 9789 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 9790 "Unexpected custom legalisation"); 9791 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 9792 break; 9793 case ISD::SHL: 9794 case ISD::SRA: 9795 case ISD::SRL: 9796 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 9797 "Unexpected custom legalisation"); 9798 if (N->getOperand(1).getOpcode() != ISD::Constant) { 9799 // If we can use a BSET instruction, allow default promotion to apply. 9800 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() && 9801 isOneConstant(N->getOperand(0))) 9802 break; 9803 Results.push_back(customLegalizeToWOp(N, DAG)); 9804 break; 9805 } 9806 9807 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is 9808 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the 9809 // shift amount. 9810 if (N->getOpcode() == ISD::SHL) { 9811 SDLoc DL(N); 9812 SDValue NewOp0 = 9813 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 9814 SDValue NewOp1 = 9815 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1)); 9816 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1); 9817 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 9818 DAG.getValueType(MVT::i32)); 9819 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 9820 } 9821 9822 break; 9823 case ISD::ROTL: 9824 case ISD::ROTR: 9825 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 9826 "Unexpected custom legalisation"); 9827 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || 9828 Subtarget.hasVendorXTHeadBb()) && 9829 "Unexpected custom legalization"); 9830 if (!isa<ConstantSDNode>(N->getOperand(1)) && 9831 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())) 9832 return; 9833 Results.push_back(customLegalizeToWOp(N, DAG)); 9834 break; 9835 case ISD::CTTZ: 9836 case ISD::CTTZ_ZERO_UNDEF: 9837 case ISD::CTLZ: 9838 case ISD::CTLZ_ZERO_UNDEF: { 9839 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 9840 "Unexpected custom legalisation"); 9841 9842 SDValue NewOp0 = 9843 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 9844 bool IsCTZ = 9845 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF; 9846 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW; 9847 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0); 9848 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 9849 return; 9850 } 9851 case ISD::SDIV: 9852 case ISD::UDIV: 9853 case ISD::UREM: { 9854 MVT VT = N->getSimpleValueType(0); 9855 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 9856 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 9857 "Unexpected custom legalisation"); 9858 // Don't promote division/remainder by constant since we should expand those 9859 // to multiply by magic constant. 9860 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 9861 if (N->getOperand(1).getOpcode() == ISD::Constant && 9862 !isIntDivCheap(N->getValueType(0), Attr)) 9863 return; 9864 9865 // If the input is i32, use ANY_EXTEND since the W instructions don't read 9866 // the upper 32 bits. For other types we need to sign or zero extend 9867 // based on the opcode. 9868 unsigned ExtOpc = ISD::ANY_EXTEND; 9869 if (VT != MVT::i32) 9870 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 9871 : ISD::ZERO_EXTEND; 9872 9873 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 9874 break; 9875 } 9876 case ISD::SADDO: { 9877 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 9878 "Unexpected custom legalisation"); 9879 9880 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise 9881 // use the default legalization. 9882 if (!isa<ConstantSDNode>(N->getOperand(1))) 9883 return; 9884 9885 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 9886 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 9887 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS); 9888 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 9889 DAG.getValueType(MVT::i32)); 9890 9891 SDValue Zero = DAG.getConstant(0, DL, MVT::i64); 9892 9893 // For an addition, the result should be less than one of the operands (LHS) 9894 // if and only if the other operand (RHS) is negative, otherwise there will 9895 // be overflow. 9896 // For a subtraction, the result should be less than one of the operands 9897 // (LHS) if and only if the other operand (RHS) is (non-zero) positive, 9898 // otherwise there will be overflow. 9899 EVT OType = N->getValueType(1); 9900 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT); 9901 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT); 9902 9903 SDValue Overflow = 9904 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS); 9905 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 9906 Results.push_back(Overflow); 9907 return; 9908 } 9909 case ISD::UADDO: 9910 case ISD::USUBO: { 9911 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 9912 "Unexpected custom legalisation"); 9913 bool IsAdd = N->getOpcode() == ISD::UADDO; 9914 // Create an ADDW or SUBW. 9915 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 9916 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 9917 SDValue Res = 9918 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); 9919 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 9920 DAG.getValueType(MVT::i32)); 9921 9922 SDValue Overflow; 9923 if (IsAdd && isOneConstant(RHS)) { 9924 // Special case uaddo X, 1 overflowed if the addition result is 0. 9925 // The general case (X + C) < C is not necessarily beneficial. Although we 9926 // reduce the live range of X, we may introduce the materialization of 9927 // constant C, especially when the setcc result is used by branch. We have 9928 // no compare with constant and branch instructions. 9929 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, 9930 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ); 9931 } else if (IsAdd && isAllOnesConstant(RHS)) { 9932 // Special case uaddo X, -1 overflowed if X != 0. 9933 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0), 9934 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE); 9935 } else { 9936 // Sign extend the LHS and perform an unsigned compare with the ADDW 9937 // result. Since the inputs are sign extended from i32, this is equivalent 9938 // to comparing the lower 32 bits. 9939 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 9940 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS, 9941 IsAdd ? ISD::SETULT : ISD::SETUGT); 9942 } 9943 9944 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 9945 Results.push_back(Overflow); 9946 return; 9947 } 9948 case ISD::UADDSAT: 9949 case ISD::USUBSAT: { 9950 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 9951 "Unexpected custom legalisation"); 9952 if (Subtarget.hasStdExtZbb()) { 9953 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using 9954 // sign extend allows overflow of the lower 32 bits to be detected on 9955 // the promoted size. 9956 SDValue LHS = 9957 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 9958 SDValue RHS = 9959 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 9960 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS); 9961 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 9962 return; 9963 } 9964 9965 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom 9966 // promotion for UADDO/USUBO. 9967 Results.push_back(expandAddSubSat(N, DAG)); 9968 return; 9969 } 9970 case ISD::ABS: { 9971 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 9972 "Unexpected custom legalisation"); 9973 9974 if (Subtarget.hasStdExtZbb()) { 9975 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel. 9976 // This allows us to remember that the result is sign extended. Expanding 9977 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits. 9978 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, 9979 N->getOperand(0)); 9980 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src); 9981 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs)); 9982 return; 9983 } 9984 9985 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y) 9986 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 9987 9988 // Freeze the source so we can increase it's use count. 9989 Src = DAG.getFreeze(Src); 9990 9991 // Copy sign bit to all bits using the sraiw pattern. 9992 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src, 9993 DAG.getValueType(MVT::i32)); 9994 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill, 9995 DAG.getConstant(31, DL, MVT::i64)); 9996 9997 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill); 9998 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill); 9999 10000 // NOTE: The result is only required to be anyextended, but sext is 10001 // consistent with type legalization of sub. 10002 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes, 10003 DAG.getValueType(MVT::i32)); 10004 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 10005 return; 10006 } 10007 case ISD::BITCAST: { 10008 EVT VT = N->getValueType(0); 10009 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!"); 10010 SDValue Op0 = N->getOperand(0); 10011 EVT Op0VT = Op0.getValueType(); 10012 MVT XLenVT = Subtarget.getXLenVT(); 10013 if (VT == MVT::i16 && Op0VT == MVT::f16 && 10014 Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) { 10015 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); 10016 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 10017 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 && 10018 Subtarget.hasStdExtZfbfmin()) { 10019 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); 10020 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 10021 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() && 10022 Subtarget.hasStdExtFOrZfinx()) { 10023 SDValue FPConv = 10024 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 10025 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 10026 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32 && 10027 Subtarget.hasStdExtZfa()) { 10028 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL, 10029 DAG.getVTList(MVT::i32, MVT::i32), Op0); 10030 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, 10031 NewReg.getValue(0), NewReg.getValue(1)); 10032 Results.push_back(RetReg); 10033 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() && 10034 isTypeLegal(Op0VT)) { 10035 // Custom-legalize bitcasts from fixed-length vector types to illegal 10036 // scalar types in order to improve codegen. Bitcast the vector to a 10037 // one-element vector type whose element type is the same as the result 10038 // type, and extract the first element. 10039 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); 10040 if (isTypeLegal(BVT)) { 10041 SDValue BVec = DAG.getBitcast(BVT, Op0); 10042 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 10043 DAG.getConstant(0, DL, XLenVT))); 10044 } 10045 } 10046 break; 10047 } 10048 case RISCVISD::BREV8: { 10049 MVT VT = N->getSimpleValueType(0); 10050 MVT XLenVT = Subtarget.getXLenVT(); 10051 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) && 10052 "Unexpected custom legalisation"); 10053 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension"); 10054 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0)); 10055 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp); 10056 // ReplaceNodeResults requires we maintain the same type for the return 10057 // value. 10058 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes)); 10059 break; 10060 } 10061 case ISD::EXTRACT_VECTOR_ELT: { 10062 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 10063 // type is illegal (currently only vXi64 RV32). 10064 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 10065 // transferred to the destination register. We issue two of these from the 10066 // upper- and lower- halves of the SEW-bit vector element, slid down to the 10067 // first element. 10068 SDValue Vec = N->getOperand(0); 10069 SDValue Idx = N->getOperand(1); 10070 10071 // The vector type hasn't been legalized yet so we can't issue target 10072 // specific nodes if it needs legalization. 10073 // FIXME: We would manually legalize if it's important. 10074 if (!isTypeLegal(Vec.getValueType())) 10075 return; 10076 10077 MVT VecVT = Vec.getSimpleValueType(); 10078 10079 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 10080 VecVT.getVectorElementType() == MVT::i64 && 10081 "Unexpected EXTRACT_VECTOR_ELT legalization"); 10082 10083 // If this is a fixed vector, we need to convert it to a scalable vector. 10084 MVT ContainerVT = VecVT; 10085 if (VecVT.isFixedLengthVector()) { 10086 ContainerVT = getContainerForFixedLengthVector(VecVT); 10087 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 10088 } 10089 10090 MVT XLenVT = Subtarget.getXLenVT(); 10091 10092 // Use a VL of 1 to avoid processing more elements than we need. 10093 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget); 10094 10095 // Unless the index is known to be 0, we must slide the vector down to get 10096 // the desired element into index 0. 10097 if (!isNullConstant(Idx)) { 10098 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, 10099 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 10100 } 10101 10102 // Extract the lower XLEN bits of the correct vector element. 10103 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 10104 10105 // To extract the upper XLEN bits of the vector element, shift the first 10106 // element right by 32 bits and re-extract the lower XLEN bits. 10107 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 10108 DAG.getUNDEF(ContainerVT), 10109 DAG.getConstant(32, DL, XLenVT), VL); 10110 SDValue LShr32 = 10111 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV, 10112 DAG.getUNDEF(ContainerVT), Mask, VL); 10113 10114 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 10115 10116 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 10117 break; 10118 } 10119 case ISD::INTRINSIC_WO_CHAIN: { 10120 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 10121 switch (IntNo) { 10122 default: 10123 llvm_unreachable( 10124 "Don't know how to custom type legalize this intrinsic!"); 10125 case Intrinsic::experimental_get_vector_length: { 10126 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget); 10127 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 10128 return; 10129 } 10130 case Intrinsic::riscv_orc_b: 10131 case Intrinsic::riscv_brev8: 10132 case Intrinsic::riscv_sha256sig0: 10133 case Intrinsic::riscv_sha256sig1: 10134 case Intrinsic::riscv_sha256sum0: 10135 case Intrinsic::riscv_sha256sum1: 10136 case Intrinsic::riscv_sm3p0: 10137 case Intrinsic::riscv_sm3p1: { 10138 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 10139 return; 10140 unsigned Opc; 10141 switch (IntNo) { 10142 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; 10143 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; 10144 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; 10145 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; 10146 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; 10147 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; 10148 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; 10149 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; 10150 } 10151 10152 SDValue NewOp = 10153 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 10154 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp); 10155 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 10156 return; 10157 } 10158 case Intrinsic::riscv_sm4ks: 10159 case Intrinsic::riscv_sm4ed: { 10160 unsigned Opc = 10161 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; 10162 SDValue NewOp0 = 10163 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 10164 SDValue NewOp1 = 10165 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 10166 SDValue Res = 10167 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3)); 10168 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 10169 return; 10170 } 10171 case Intrinsic::riscv_clmul: { 10172 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 10173 return; 10174 10175 SDValue NewOp0 = 10176 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 10177 SDValue NewOp1 = 10178 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 10179 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1); 10180 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 10181 return; 10182 } 10183 case Intrinsic::riscv_clmulh: 10184 case Intrinsic::riscv_clmulr: { 10185 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 10186 return; 10187 10188 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros 10189 // to the full 128-bit clmul result of multiplying two xlen values. 10190 // Perform clmulr or clmulh on the shifted values. Finally, extract the 10191 // upper 32 bits. 10192 // 10193 // The alternative is to mask the inputs to 32 bits and use clmul, but 10194 // that requires two shifts to mask each input without zext.w. 10195 // FIXME: If the inputs are known zero extended or could be freely 10196 // zero extended, the mask form would be better. 10197 SDValue NewOp0 = 10198 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 10199 SDValue NewOp1 = 10200 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 10201 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, 10202 DAG.getConstant(32, DL, MVT::i64)); 10203 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1, 10204 DAG.getConstant(32, DL, MVT::i64)); 10205 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH 10206 : RISCVISD::CLMULR; 10207 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1); 10208 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res, 10209 DAG.getConstant(32, DL, MVT::i64)); 10210 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 10211 return; 10212 } 10213 case Intrinsic::riscv_vmv_x_s: { 10214 EVT VT = N->getValueType(0); 10215 MVT XLenVT = Subtarget.getXLenVT(); 10216 if (VT.bitsLT(XLenVT)) { 10217 // Simple case just extract using vmv.x.s and truncate. 10218 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 10219 Subtarget.getXLenVT(), N->getOperand(1)); 10220 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 10221 return; 10222 } 10223 10224 assert(VT == MVT::i64 && !Subtarget.is64Bit() && 10225 "Unexpected custom legalization"); 10226 10227 // We need to do the move in two steps. 10228 SDValue Vec = N->getOperand(1); 10229 MVT VecVT = Vec.getSimpleValueType(); 10230 10231 // First extract the lower XLEN bits of the element. 10232 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 10233 10234 // To extract the upper XLEN bits of the vector element, shift the first 10235 // element right by 32 bits and re-extract the lower XLEN bits. 10236 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget); 10237 10238 SDValue ThirtyTwoV = 10239 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), 10240 DAG.getConstant(32, DL, XLenVT), VL); 10241 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, 10242 DAG.getUNDEF(VecVT), Mask, VL); 10243 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 10244 10245 Results.push_back( 10246 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 10247 break; 10248 } 10249 } 10250 break; 10251 } 10252 case ISD::VECREDUCE_ADD: 10253 case ISD::VECREDUCE_AND: 10254 case ISD::VECREDUCE_OR: 10255 case ISD::VECREDUCE_XOR: 10256 case ISD::VECREDUCE_SMAX: 10257 case ISD::VECREDUCE_UMAX: 10258 case ISD::VECREDUCE_SMIN: 10259 case ISD::VECREDUCE_UMIN: 10260 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) 10261 Results.push_back(V); 10262 break; 10263 case ISD::VP_REDUCE_ADD: 10264 case ISD::VP_REDUCE_AND: 10265 case ISD::VP_REDUCE_OR: 10266 case ISD::VP_REDUCE_XOR: 10267 case ISD::VP_REDUCE_SMAX: 10268 case ISD::VP_REDUCE_UMAX: 10269 case ISD::VP_REDUCE_SMIN: 10270 case ISD::VP_REDUCE_UMIN: 10271 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG)) 10272 Results.push_back(V); 10273 break; 10274 case ISD::GET_ROUNDING: { 10275 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other); 10276 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0)); 10277 Results.push_back(Res.getValue(0)); 10278 Results.push_back(Res.getValue(1)); 10279 break; 10280 } 10281 } 10282 } 10283 10284 // Try to fold (<bop> x, (reduction.<bop> vec, start)) 10285 static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, 10286 const RISCVSubtarget &Subtarget) { 10287 auto BinOpToRVVReduce = [](unsigned Opc) { 10288 switch (Opc) { 10289 default: 10290 llvm_unreachable("Unhandled binary to transfrom reduction"); 10291 case ISD::ADD: 10292 return RISCVISD::VECREDUCE_ADD_VL; 10293 case ISD::UMAX: 10294 return RISCVISD::VECREDUCE_UMAX_VL; 10295 case ISD::SMAX: 10296 return RISCVISD::VECREDUCE_SMAX_VL; 10297 case ISD::UMIN: 10298 return RISCVISD::VECREDUCE_UMIN_VL; 10299 case ISD::SMIN: 10300 return RISCVISD::VECREDUCE_SMIN_VL; 10301 case ISD::AND: 10302 return RISCVISD::VECREDUCE_AND_VL; 10303 case ISD::OR: 10304 return RISCVISD::VECREDUCE_OR_VL; 10305 case ISD::XOR: 10306 return RISCVISD::VECREDUCE_XOR_VL; 10307 case ISD::FADD: 10308 return RISCVISD::VECREDUCE_FADD_VL; 10309 case ISD::FMAXNUM: 10310 return RISCVISD::VECREDUCE_FMAX_VL; 10311 case ISD::FMINNUM: 10312 return RISCVISD::VECREDUCE_FMIN_VL; 10313 } 10314 }; 10315 10316 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) { 10317 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 10318 isNullConstant(V.getOperand(1)) && 10319 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc); 10320 }; 10321 10322 unsigned Opc = N->getOpcode(); 10323 unsigned ReduceIdx; 10324 if (IsReduction(N->getOperand(0), Opc)) 10325 ReduceIdx = 0; 10326 else if (IsReduction(N->getOperand(1), Opc)) 10327 ReduceIdx = 1; 10328 else 10329 return SDValue(); 10330 10331 // Skip if FADD disallows reassociation but the combiner needs. 10332 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation()) 10333 return SDValue(); 10334 10335 SDValue Extract = N->getOperand(ReduceIdx); 10336 SDValue Reduce = Extract.getOperand(0); 10337 if (!Extract.hasOneUse() || !Reduce.hasOneUse()) 10338 return SDValue(); 10339 10340 SDValue ScalarV = Reduce.getOperand(2); 10341 EVT ScalarVT = ScalarV.getValueType(); 10342 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR && 10343 ScalarV.getOperand(0)->isUndef() && 10344 isNullConstant(ScalarV.getOperand(2))) 10345 ScalarV = ScalarV.getOperand(1); 10346 10347 // Make sure that ScalarV is a splat with VL=1. 10348 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL && 10349 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL && 10350 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL) 10351 return SDValue(); 10352 10353 if (!isNonZeroAVL(ScalarV.getOperand(2))) 10354 return SDValue(); 10355 10356 // Check the scalar of ScalarV is neutral element 10357 // TODO: Deal with value other than neutral element. 10358 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1), 10359 0)) 10360 return SDValue(); 10361 10362 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold. 10363 // FIXME: We might be able to improve this if operand 0 is undef. 10364 if (!isNonZeroAVL(Reduce.getOperand(5))) 10365 return SDValue(); 10366 10367 SDValue NewStart = N->getOperand(1 - ReduceIdx); 10368 10369 SDLoc DL(N); 10370 SDValue NewScalarV = 10371 lowerScalarInsert(NewStart, ScalarV.getOperand(2), 10372 ScalarV.getSimpleValueType(), DL, DAG, Subtarget); 10373 10374 // If we looked through an INSERT_SUBVECTOR we need to restore it. 10375 if (ScalarVT != ScalarV.getValueType()) 10376 NewScalarV = 10377 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT), 10378 NewScalarV, DAG.getConstant(0, DL, Subtarget.getXLenVT())); 10379 10380 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1), 10381 NewScalarV, Reduce.getOperand(3), 10382 Reduce.getOperand(4), Reduce.getOperand(5)}; 10383 SDValue NewReduce = 10384 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops); 10385 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce, 10386 Extract.getOperand(1)); 10387 } 10388 10389 // Optimize (add (shl x, c0), (shl y, c1)) -> 10390 // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. 10391 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, 10392 const RISCVSubtarget &Subtarget) { 10393 // Perform this optimization only in the zba extension. 10394 if (!Subtarget.hasStdExtZba()) 10395 return SDValue(); 10396 10397 // Skip for vector types and larger types. 10398 EVT VT = N->getValueType(0); 10399 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) 10400 return SDValue(); 10401 10402 // The two operand nodes must be SHL and have no other use. 10403 SDValue N0 = N->getOperand(0); 10404 SDValue N1 = N->getOperand(1); 10405 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL || 10406 !N0->hasOneUse() || !N1->hasOneUse()) 10407 return SDValue(); 10408 10409 // Check c0 and c1. 10410 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 10411 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 10412 if (!N0C || !N1C) 10413 return SDValue(); 10414 int64_t C0 = N0C->getSExtValue(); 10415 int64_t C1 = N1C->getSExtValue(); 10416 if (C0 <= 0 || C1 <= 0) 10417 return SDValue(); 10418 10419 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable. 10420 int64_t Bits = std::min(C0, C1); 10421 int64_t Diff = std::abs(C0 - C1); 10422 if (Diff != 1 && Diff != 2 && Diff != 3) 10423 return SDValue(); 10424 10425 // Build nodes. 10426 SDLoc DL(N); 10427 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0); 10428 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0); 10429 SDValue NA0 = 10430 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT)); 10431 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS); 10432 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT)); 10433 } 10434 10435 // Combine a constant select operand into its use: 10436 // 10437 // (and (select cond, -1, c), x) 10438 // -> (select cond, x, (and x, c)) [AllOnes=1] 10439 // (or (select cond, 0, c), x) 10440 // -> (select cond, x, (or x, c)) [AllOnes=0] 10441 // (xor (select cond, 0, c), x) 10442 // -> (select cond, x, (xor x, c)) [AllOnes=0] 10443 // (add (select cond, 0, c), x) 10444 // -> (select cond, x, (add x, c)) [AllOnes=0] 10445 // (sub x, (select cond, 0, c)) 10446 // -> (select cond, x, (sub x, c)) [AllOnes=0] 10447 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 10448 SelectionDAG &DAG, bool AllOnes, 10449 const RISCVSubtarget &Subtarget) { 10450 EVT VT = N->getValueType(0); 10451 10452 // Skip vectors. 10453 if (VT.isVector()) 10454 return SDValue(); 10455 10456 if (!Subtarget.hasShortForwardBranchOpt() || 10457 (Slct.getOpcode() != ISD::SELECT && 10458 Slct.getOpcode() != RISCVISD::SELECT_CC) || 10459 !Slct.hasOneUse()) 10460 return SDValue(); 10461 10462 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) { 10463 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); 10464 }; 10465 10466 bool SwapSelectOps; 10467 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0; 10468 SDValue TrueVal = Slct.getOperand(1 + OpOffset); 10469 SDValue FalseVal = Slct.getOperand(2 + OpOffset); 10470 SDValue NonConstantVal; 10471 if (isZeroOrAllOnes(TrueVal, AllOnes)) { 10472 SwapSelectOps = false; 10473 NonConstantVal = FalseVal; 10474 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) { 10475 SwapSelectOps = true; 10476 NonConstantVal = TrueVal; 10477 } else 10478 return SDValue(); 10479 10480 // Slct is now know to be the desired identity constant when CC is true. 10481 TrueVal = OtherOp; 10482 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); 10483 // Unless SwapSelectOps says the condition should be false. 10484 if (SwapSelectOps) 10485 std::swap(TrueVal, FalseVal); 10486 10487 if (Slct.getOpcode() == RISCVISD::SELECT_CC) 10488 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT, 10489 {Slct.getOperand(0), Slct.getOperand(1), 10490 Slct.getOperand(2), TrueVal, FalseVal}); 10491 10492 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, 10493 {Slct.getOperand(0), TrueVal, FalseVal}); 10494 } 10495 10496 // Attempt combineSelectAndUse on each operand of a commutative operator N. 10497 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG, 10498 bool AllOnes, 10499 const RISCVSubtarget &Subtarget) { 10500 SDValue N0 = N->getOperand(0); 10501 SDValue N1 = N->getOperand(1); 10502 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget)) 10503 return Result; 10504 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget)) 10505 return Result; 10506 return SDValue(); 10507 } 10508 10509 // Transform (add (mul x, c0), c1) -> 10510 // (add (mul (add x, c1/c0), c0), c1%c0). 10511 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case 10512 // that should be excluded is when c0*(c1/c0) is simm12, which will lead 10513 // to an infinite loop in DAGCombine if transformed. 10514 // Or transform (add (mul x, c0), c1) -> 10515 // (add (mul (add x, c1/c0+1), c0), c1%c0-c0), 10516 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner 10517 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will 10518 // lead to an infinite loop in DAGCombine if transformed. 10519 // Or transform (add (mul x, c0), c1) -> 10520 // (add (mul (add x, c1/c0-1), c0), c1%c0+c0), 10521 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner 10522 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will 10523 // lead to an infinite loop in DAGCombine if transformed. 10524 // Or transform (add (mul x, c0), c1) -> 10525 // (mul (add x, c1/c0), c0). 10526 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not. 10527 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, 10528 const RISCVSubtarget &Subtarget) { 10529 // Skip for vector types and larger types. 10530 EVT VT = N->getValueType(0); 10531 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) 10532 return SDValue(); 10533 // The first operand node must be a MUL and has no other use. 10534 SDValue N0 = N->getOperand(0); 10535 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL) 10536 return SDValue(); 10537 // Check if c0 and c1 match above conditions. 10538 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 10539 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 10540 if (!N0C || !N1C) 10541 return SDValue(); 10542 // If N0C has multiple uses it's possible one of the cases in 10543 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result 10544 // in an infinite loop. 10545 if (!N0C->hasOneUse()) 10546 return SDValue(); 10547 int64_t C0 = N0C->getSExtValue(); 10548 int64_t C1 = N1C->getSExtValue(); 10549 int64_t CA, CB; 10550 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1)) 10551 return SDValue(); 10552 // Search for proper CA (non-zero) and CB that both are simm12. 10553 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) && 10554 !isInt<12>(C0 * (C1 / C0))) { 10555 CA = C1 / C0; 10556 CB = C1 % C0; 10557 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) && 10558 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) { 10559 CA = C1 / C0 + 1; 10560 CB = C1 % C0 - C0; 10561 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) && 10562 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) { 10563 CA = C1 / C0 - 1; 10564 CB = C1 % C0 + C0; 10565 } else 10566 return SDValue(); 10567 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0). 10568 SDLoc DL(N); 10569 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0), 10570 DAG.getConstant(CA, DL, VT)); 10571 SDValue New1 = 10572 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT)); 10573 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT)); 10574 } 10575 10576 // Try to turn (add (xor (setcc X, Y), 1) -1) into (neg (setcc X, Y)). 10577 static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) { 10578 SDValue N0 = N->getOperand(0); 10579 SDValue N1 = N->getOperand(1); 10580 EVT VT = N->getValueType(0); 10581 SDLoc DL(N); 10582 10583 // RHS should be -1. 10584 if (!isAllOnesConstant(N1)) 10585 return SDValue(); 10586 10587 // Look for an (xor (setcc X, Y), 1). 10588 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)) || 10589 N0.getOperand(0).getOpcode() != ISD::SETCC) 10590 return SDValue(); 10591 10592 // Emit a negate of the setcc. 10593 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), 10594 N0.getOperand(0)); 10595 } 10596 10597 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, 10598 const RISCVSubtarget &Subtarget) { 10599 if (SDValue V = combineAddOfBooleanXor(N, DAG)) 10600 return V; 10601 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) 10602 return V; 10603 if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) 10604 return V; 10605 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 10606 return V; 10607 // fold (add (select lhs, rhs, cc, 0, y), x) -> 10608 // (select lhs, rhs, cc, x, (add x, y)) 10609 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 10610 } 10611 10612 // Try to turn a sub boolean RHS and constant LHS into an addi. 10613 static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) { 10614 SDValue N0 = N->getOperand(0); 10615 SDValue N1 = N->getOperand(1); 10616 EVT VT = N->getValueType(0); 10617 SDLoc DL(N); 10618 10619 // Require a constant LHS. 10620 auto *N0C = dyn_cast<ConstantSDNode>(N0); 10621 if (!N0C) 10622 return SDValue(); 10623 10624 // All our optimizations involve subtracting 1 from the immediate and forming 10625 // an ADDI. Make sure the new immediate is valid for an ADDI. 10626 APInt ImmValMinus1 = N0C->getAPIntValue() - 1; 10627 if (!ImmValMinus1.isSignedIntN(12)) 10628 return SDValue(); 10629 10630 SDValue NewLHS; 10631 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) { 10632 // (sub constant, (setcc x, y, eq/neq)) -> 10633 // (add (setcc x, y, neq/eq), constant - 1) 10634 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get(); 10635 EVT SetCCOpVT = N1.getOperand(0).getValueType(); 10636 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger()) 10637 return SDValue(); 10638 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT); 10639 NewLHS = 10640 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal); 10641 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) && 10642 N1.getOperand(0).getOpcode() == ISD::SETCC) { 10643 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1). 10644 // Since setcc returns a bool the xor is equivalent to 1-setcc. 10645 NewLHS = N1.getOperand(0); 10646 } else 10647 return SDValue(); 10648 10649 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT); 10650 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS); 10651 } 10652 10653 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, 10654 const RISCVSubtarget &Subtarget) { 10655 if (SDValue V = combineSubOfBoolean(N, DAG)) 10656 return V; 10657 10658 SDValue N0 = N->getOperand(0); 10659 SDValue N1 = N->getOperand(1); 10660 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1) 10661 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && 10662 isNullConstant(N1.getOperand(1))) { 10663 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get(); 10664 if (CCVal == ISD::SETLT) { 10665 EVT VT = N->getValueType(0); 10666 SDLoc DL(N); 10667 unsigned ShAmt = N0.getValueSizeInBits() - 1; 10668 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), 10669 DAG.getConstant(ShAmt, DL, VT)); 10670 } 10671 } 10672 10673 // fold (sub x, (select lhs, rhs, cc, 0, y)) -> 10674 // (select lhs, rhs, cc, x, (sub x, y)) 10675 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget); 10676 } 10677 10678 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1. 10679 // Legalizing setcc can introduce xors like this. Doing this transform reduces 10680 // the number of xors and may allow the xor to fold into a branch condition. 10681 static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) { 10682 SDValue N0 = N->getOperand(0); 10683 SDValue N1 = N->getOperand(1); 10684 bool IsAnd = N->getOpcode() == ISD::AND; 10685 10686 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR) 10687 return SDValue(); 10688 10689 if (!N0.hasOneUse() || !N1.hasOneUse()) 10690 return SDValue(); 10691 10692 SDValue N01 = N0.getOperand(1); 10693 SDValue N11 = N1.getOperand(1); 10694 10695 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into 10696 // (xor X, -1) based on the upper bits of the other operand being 0. If the 10697 // operation is And, allow one of the Xors to use -1. 10698 if (isOneConstant(N01)) { 10699 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11))) 10700 return SDValue(); 10701 } else if (isOneConstant(N11)) { 10702 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1. 10703 if (!(IsAnd && isAllOnesConstant(N01))) 10704 return SDValue(); 10705 } else 10706 return SDValue(); 10707 10708 EVT VT = N->getValueType(0); 10709 10710 SDValue N00 = N0.getOperand(0); 10711 SDValue N10 = N1.getOperand(0); 10712 10713 // The LHS of the xors needs to be 0/1. 10714 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); 10715 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask)) 10716 return SDValue(); 10717 10718 // Invert the opcode and insert a new xor. 10719 SDLoc DL(N); 10720 unsigned Opc = IsAnd ? ISD::OR : ISD::AND; 10721 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10); 10722 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT)); 10723 } 10724 10725 static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, 10726 const RISCVSubtarget &Subtarget) { 10727 SDValue N0 = N->getOperand(0); 10728 EVT VT = N->getValueType(0); 10729 10730 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero 10731 // extending X. This is safe since we only need the LSB after the shift and 10732 // shift amounts larger than 31 would produce poison. If we wait until 10733 // type legalization, we'll create RISCVISD::SRLW and we can't recover it 10734 // to use a BEXT instruction. 10735 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 && 10736 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL && 10737 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { 10738 SDLoc DL(N0); 10739 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); 10740 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); 10741 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); 10742 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl); 10743 } 10744 10745 return SDValue(); 10746 } 10747 10748 // Combines two comparison operation and logic operation to one selection 10749 // operation(min, max) and logic operation. Returns new constructed Node if 10750 // conditions for optimization are satisfied. 10751 static SDValue performANDCombine(SDNode *N, 10752 TargetLowering::DAGCombinerInfo &DCI, 10753 const RISCVSubtarget &Subtarget) { 10754 SelectionDAG &DAG = DCI.DAG; 10755 10756 SDValue N0 = N->getOperand(0); 10757 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero 10758 // extending X. This is safe since we only need the LSB after the shift and 10759 // shift amounts larger than 31 would produce poison. If we wait until 10760 // type legalization, we'll create RISCVISD::SRLW and we can't recover it 10761 // to use a BEXT instruction. 10762 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && 10763 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) && 10764 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) && 10765 N0.hasOneUse()) { 10766 SDLoc DL(N); 10767 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); 10768 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); 10769 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); 10770 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl, 10771 DAG.getConstant(1, DL, MVT::i64)); 10772 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); 10773 } 10774 10775 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 10776 return V; 10777 10778 if (DCI.isAfterLegalizeDAG()) 10779 if (SDValue V = combineDeMorganOfBoolean(N, DAG)) 10780 return V; 10781 10782 // fold (and (select lhs, rhs, cc, -1, y), x) -> 10783 // (select lhs, rhs, cc, x, (and x, y)) 10784 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget); 10785 } 10786 10787 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, 10788 const RISCVSubtarget &Subtarget) { 10789 SelectionDAG &DAG = DCI.DAG; 10790 10791 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 10792 return V; 10793 10794 if (DCI.isAfterLegalizeDAG()) 10795 if (SDValue V = combineDeMorganOfBoolean(N, DAG)) 10796 return V; 10797 10798 // fold (or (select cond, 0, y), x) -> 10799 // (select cond, x, (or x, y)) 10800 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 10801 } 10802 10803 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 10804 const RISCVSubtarget &Subtarget) { 10805 SDValue N0 = N->getOperand(0); 10806 SDValue N1 = N->getOperand(1); 10807 10808 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x) 10809 // NOTE: Assumes ROL being legal means ROLW is legal. 10810 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 10811 if (N0.getOpcode() == RISCVISD::SLLW && 10812 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) && 10813 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) { 10814 SDLoc DL(N); 10815 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64, 10816 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1)); 10817 } 10818 10819 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt) 10820 if (N0.hasOneUse() && N0.getOpcode() == ISD::SETCC && isOneConstant(N1)) { 10821 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 10822 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 10823 if (ConstN00 && CC == ISD::SETLT) { 10824 EVT VT = N0.getValueType(); 10825 SDLoc DL(N0); 10826 const APInt &Imm = ConstN00->getAPIntValue(); 10827 if ((Imm + 1).isSignedIntN(12)) 10828 return DAG.getSetCC(DL, VT, N0.getOperand(1), 10829 DAG.getConstant(Imm + 1, DL, VT), CC); 10830 } 10831 } 10832 10833 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 10834 return V; 10835 // fold (xor (select cond, 0, y), x) -> 10836 // (select cond, x, (xor x, y)) 10837 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 10838 } 10839 10840 // According to the property that indexed load/store instructions 10841 // zero-extended their indices, \p narrowIndex tries to narrow the type of index 10842 // operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C < 10843 // bits(ty). 10844 static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) { 10845 if (N.getOpcode() != ISD::SHL || !N->hasOneUse()) 10846 return SDValue(); 10847 10848 SDValue N0 = N.getOperand(0); 10849 if (N0.getOpcode() != ISD::ZERO_EXTEND && 10850 N0.getOpcode() != RISCVISD::VZEXT_VL) 10851 return SDValue(); 10852 if (!N0->hasOneUse()) 10853 return SDValue(); 10854 10855 APInt ShAmt; 10856 SDValue N1 = N.getOperand(1); 10857 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt)) 10858 return SDValue(); 10859 10860 SDLoc DL(N); 10861 SDValue Src = N0.getOperand(0); 10862 EVT SrcVT = Src.getValueType(); 10863 unsigned SrcElen = SrcVT.getScalarSizeInBits(); 10864 unsigned ShAmtV = ShAmt.getZExtValue(); 10865 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV); 10866 NewElen = std::max(NewElen, 8U); 10867 10868 // Skip if NewElen is not narrower than the original extended type. 10869 if (NewElen >= N0.getValueType().getScalarSizeInBits()) 10870 return SDValue(); 10871 10872 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen); 10873 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT); 10874 10875 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops()); 10876 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT); 10877 return DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec); 10878 } 10879 10880 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with 10881 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from 10882 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg 10883 // can become a sext.w instead of a shift pair. 10884 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, 10885 const RISCVSubtarget &Subtarget) { 10886 SDValue N0 = N->getOperand(0); 10887 SDValue N1 = N->getOperand(1); 10888 EVT VT = N->getValueType(0); 10889 EVT OpVT = N0.getValueType(); 10890 10891 if (OpVT != MVT::i64 || !Subtarget.is64Bit()) 10892 return SDValue(); 10893 10894 // RHS needs to be a constant. 10895 auto *N1C = dyn_cast<ConstantSDNode>(N1); 10896 if (!N1C) 10897 return SDValue(); 10898 10899 // LHS needs to be (and X, 0xffffffff). 10900 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 10901 !isa<ConstantSDNode>(N0.getOperand(1)) || 10902 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff)) 10903 return SDValue(); 10904 10905 // Looking for an equality compare. 10906 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); 10907 if (!isIntEqualitySetCC(Cond)) 10908 return SDValue(); 10909 10910 // Don't do this if the sign bit is provably zero, it will be turned back into 10911 // an AND. 10912 APInt SignMask = APInt::getOneBitSet(64, 31); 10913 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask)) 10914 return SDValue(); 10915 10916 const APInt &C1 = N1C->getAPIntValue(); 10917 10918 SDLoc dl(N); 10919 // If the constant is larger than 2^32 - 1 it is impossible for both sides 10920 // to be equal. 10921 if (C1.getActiveBits() > 32) 10922 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT); 10923 10924 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT, 10925 N0.getOperand(0), DAG.getValueType(MVT::i32)); 10926 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64), 10927 dl, OpVT), Cond); 10928 } 10929 10930 static SDValue 10931 performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, 10932 const RISCVSubtarget &Subtarget) { 10933 SDValue Src = N->getOperand(0); 10934 EVT VT = N->getValueType(0); 10935 10936 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X) 10937 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH && 10938 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16)) 10939 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT, 10940 Src.getOperand(0)); 10941 10942 return SDValue(); 10943 } 10944 10945 namespace { 10946 // Forward declaration of the structure holding the necessary information to 10947 // apply a combine. 10948 struct CombineResult; 10949 10950 /// Helper class for folding sign/zero extensions. 10951 /// In particular, this class is used for the following combines: 10952 /// add_vl -> vwadd(u) | vwadd(u)_w 10953 /// sub_vl -> vwsub(u) | vwsub(u)_w 10954 /// mul_vl -> vwmul(u) | vwmul_su 10955 /// 10956 /// An object of this class represents an operand of the operation we want to 10957 /// combine. 10958 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of 10959 /// NodeExtensionHelper for `a` and one for `b`. 10960 /// 10961 /// This class abstracts away how the extension is materialized and 10962 /// how its Mask, VL, number of users affect the combines. 10963 /// 10964 /// In particular: 10965 /// - VWADD_W is conceptually == add(op0, sext(op1)) 10966 /// - VWADDU_W == add(op0, zext(op1)) 10967 /// - VWSUB_W == sub(op0, sext(op1)) 10968 /// - VWSUBU_W == sub(op0, zext(op1)) 10969 /// 10970 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to 10971 /// zext|sext(smaller_value). 10972 struct NodeExtensionHelper { 10973 /// Records if this operand is like being zero extended. 10974 bool SupportsZExt; 10975 /// Records if this operand is like being sign extended. 10976 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For 10977 /// instance, a splat constant (e.g., 3), would support being both sign and 10978 /// zero extended. 10979 bool SupportsSExt; 10980 /// This boolean captures whether we care if this operand would still be 10981 /// around after the folding happens. 10982 bool EnforceOneUse; 10983 /// Records if this operand's mask needs to match the mask of the operation 10984 /// that it will fold into. 10985 bool CheckMask; 10986 /// Value of the Mask for this operand. 10987 /// It may be SDValue(). 10988 SDValue Mask; 10989 /// Value of the vector length operand. 10990 /// It may be SDValue(). 10991 SDValue VL; 10992 /// Original value that this NodeExtensionHelper represents. 10993 SDValue OrigOperand; 10994 10995 /// Get the value feeding the extension or the value itself. 10996 /// E.g., for zext(a), this would return a. 10997 SDValue getSource() const { 10998 switch (OrigOperand.getOpcode()) { 10999 case RISCVISD::VSEXT_VL: 11000 case RISCVISD::VZEXT_VL: 11001 return OrigOperand.getOperand(0); 11002 default: 11003 return OrigOperand; 11004 } 11005 } 11006 11007 /// Check if this instance represents a splat. 11008 bool isSplat() const { 11009 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL; 11010 } 11011 11012 /// Get or create a value that can feed \p Root with the given extension \p 11013 /// SExt. If \p SExt is std::nullopt, this returns the source of this operand. 11014 /// \see ::getSource(). 11015 SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG, 11016 std::optional<bool> SExt) const { 11017 if (!SExt.has_value()) 11018 return OrigOperand; 11019 11020 MVT NarrowVT = getNarrowType(Root); 11021 11022 SDValue Source = getSource(); 11023 if (Source.getValueType() == NarrowVT) 11024 return Source; 11025 11026 unsigned ExtOpc = *SExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL; 11027 11028 // If we need an extension, we should be changing the type. 11029 SDLoc DL(Root); 11030 auto [Mask, VL] = getMaskAndVL(Root); 11031 switch (OrigOperand.getOpcode()) { 11032 case RISCVISD::VSEXT_VL: 11033 case RISCVISD::VZEXT_VL: 11034 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); 11035 case RISCVISD::VMV_V_X_VL: 11036 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, 11037 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL); 11038 default: 11039 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL 11040 // and that operand should already have the right NarrowVT so no 11041 // extension should be required at this point. 11042 llvm_unreachable("Unsupported opcode"); 11043 } 11044 } 11045 11046 /// Helper function to get the narrow type for \p Root. 11047 /// The narrow type is the type of \p Root where we divided the size of each 11048 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>. 11049 /// \pre The size of the type of the elements of Root must be a multiple of 2 11050 /// and be greater than 16. 11051 static MVT getNarrowType(const SDNode *Root) { 11052 MVT VT = Root->getSimpleValueType(0); 11053 11054 // Determine the narrow size. 11055 unsigned NarrowSize = VT.getScalarSizeInBits() / 2; 11056 assert(NarrowSize >= 8 && "Trying to extend something we can't represent"); 11057 MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize), 11058 VT.getVectorElementCount()); 11059 return NarrowVT; 11060 } 11061 11062 /// Return the opcode required to materialize the folding of the sign 11063 /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for 11064 /// both operands for \p Opcode. 11065 /// Put differently, get the opcode to materialize: 11066 /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b) 11067 /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b) 11068 /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()). 11069 static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) { 11070 switch (Opcode) { 11071 case RISCVISD::ADD_VL: 11072 case RISCVISD::VWADD_W_VL: 11073 case RISCVISD::VWADDU_W_VL: 11074 return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL; 11075 case RISCVISD::MUL_VL: 11076 return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; 11077 case RISCVISD::SUB_VL: 11078 case RISCVISD::VWSUB_W_VL: 11079 case RISCVISD::VWSUBU_W_VL: 11080 return IsSExt ? RISCVISD::VWSUB_VL : RISCVISD::VWSUBU_VL; 11081 default: 11082 llvm_unreachable("Unexpected opcode"); 11083 } 11084 } 11085 11086 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> 11087 /// newOpcode(a, b). 11088 static unsigned getSUOpcode(unsigned Opcode) { 11089 assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL"); 11090 return RISCVISD::VWMULSU_VL; 11091 } 11092 11093 /// Get the opcode to materialize \p Opcode(a, s|zext(b)) -> 11094 /// newOpcode(a, b). 11095 static unsigned getWOpcode(unsigned Opcode, bool IsSExt) { 11096 switch (Opcode) { 11097 case RISCVISD::ADD_VL: 11098 return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL; 11099 case RISCVISD::SUB_VL: 11100 return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL; 11101 default: 11102 llvm_unreachable("Unexpected opcode"); 11103 } 11104 } 11105 11106 using CombineToTry = std::function<std::optional<CombineResult>( 11107 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, 11108 const NodeExtensionHelper & /*RHS*/)>; 11109 11110 /// Check if this node needs to be fully folded or extended for all users. 11111 bool needToPromoteOtherUsers() const { return EnforceOneUse; } 11112 11113 /// Helper method to set the various fields of this struct based on the 11114 /// type of \p Root. 11115 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) { 11116 SupportsZExt = false; 11117 SupportsSExt = false; 11118 EnforceOneUse = true; 11119 CheckMask = true; 11120 switch (OrigOperand.getOpcode()) { 11121 case RISCVISD::VZEXT_VL: 11122 SupportsZExt = true; 11123 Mask = OrigOperand.getOperand(1); 11124 VL = OrigOperand.getOperand(2); 11125 break; 11126 case RISCVISD::VSEXT_VL: 11127 SupportsSExt = true; 11128 Mask = OrigOperand.getOperand(1); 11129 VL = OrigOperand.getOperand(2); 11130 break; 11131 case RISCVISD::VMV_V_X_VL: { 11132 // Historically, we didn't care about splat values not disappearing during 11133 // combines. 11134 EnforceOneUse = false; 11135 CheckMask = false; 11136 VL = OrigOperand.getOperand(2); 11137 11138 // The operand is a splat of a scalar. 11139 11140 // The pasthru must be undef for tail agnostic. 11141 if (!OrigOperand.getOperand(0).isUndef()) 11142 break; 11143 11144 // Get the scalar value. 11145 SDValue Op = OrigOperand.getOperand(1); 11146 11147 // See if we have enough sign bits or zero bits in the scalar to use a 11148 // widening opcode by splatting to smaller element size. 11149 MVT VT = Root->getSimpleValueType(0); 11150 unsigned EltBits = VT.getScalarSizeInBits(); 11151 unsigned ScalarBits = Op.getValueSizeInBits(); 11152 // Make sure we're getting all element bits from the scalar register. 11153 // FIXME: Support implicit sign extension of vmv.v.x? 11154 if (ScalarBits < EltBits) 11155 break; 11156 11157 unsigned NarrowSize = VT.getScalarSizeInBits() / 2; 11158 // If the narrow type cannot be expressed with a legal VMV, 11159 // this is not a valid candidate. 11160 if (NarrowSize < 8) 11161 break; 11162 11163 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) 11164 SupportsSExt = true; 11165 if (DAG.MaskedValueIsZero(Op, 11166 APInt::getBitsSetFrom(ScalarBits, NarrowSize))) 11167 SupportsZExt = true; 11168 break; 11169 } 11170 default: 11171 break; 11172 } 11173 } 11174 11175 /// Check if \p Root supports any extension folding combines. 11176 static bool isSupportedRoot(const SDNode *Root) { 11177 switch (Root->getOpcode()) { 11178 case RISCVISD::ADD_VL: 11179 case RISCVISD::MUL_VL: 11180 case RISCVISD::VWADD_W_VL: 11181 case RISCVISD::VWADDU_W_VL: 11182 case RISCVISD::SUB_VL: 11183 case RISCVISD::VWSUB_W_VL: 11184 case RISCVISD::VWSUBU_W_VL: 11185 return true; 11186 default: 11187 return false; 11188 } 11189 } 11190 11191 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). 11192 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) { 11193 assert(isSupportedRoot(Root) && "Trying to build an helper with an " 11194 "unsupported root"); 11195 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS"); 11196 OrigOperand = Root->getOperand(OperandIdx); 11197 11198 unsigned Opc = Root->getOpcode(); 11199 switch (Opc) { 11200 // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were 11201 // <ADD|SUB>(LHS, S|ZEXT(RHS)) 11202 case RISCVISD::VWADD_W_VL: 11203 case RISCVISD::VWADDU_W_VL: 11204 case RISCVISD::VWSUB_W_VL: 11205 case RISCVISD::VWSUBU_W_VL: 11206 if (OperandIdx == 1) { 11207 SupportsZExt = 11208 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; 11209 SupportsSExt = !SupportsZExt; 11210 std::tie(Mask, VL) = getMaskAndVL(Root); 11211 CheckMask = true; 11212 // There's no existing extension here, so we don't have to worry about 11213 // making sure it gets removed. 11214 EnforceOneUse = false; 11215 break; 11216 } 11217 [[fallthrough]]; 11218 default: 11219 fillUpExtensionSupport(Root, DAG); 11220 break; 11221 } 11222 } 11223 11224 /// Check if this operand is compatible with the given vector length \p VL. 11225 bool isVLCompatible(SDValue VL) const { 11226 return this->VL != SDValue() && this->VL == VL; 11227 } 11228 11229 /// Check if this operand is compatible with the given \p Mask. 11230 bool isMaskCompatible(SDValue Mask) const { 11231 return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask); 11232 } 11233 11234 /// Helper function to get the Mask and VL from \p Root. 11235 static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) { 11236 assert(isSupportedRoot(Root) && "Unexpected root"); 11237 return std::make_pair(Root->getOperand(3), Root->getOperand(4)); 11238 } 11239 11240 /// Check if the Mask and VL of this operand are compatible with \p Root. 11241 bool areVLAndMaskCompatible(const SDNode *Root) const { 11242 auto [Mask, VL] = getMaskAndVL(Root); 11243 return isMaskCompatible(Mask) && isVLCompatible(VL); 11244 } 11245 11246 /// Helper function to check if \p N is commutative with respect to the 11247 /// foldings that are supported by this class. 11248 static bool isCommutative(const SDNode *N) { 11249 switch (N->getOpcode()) { 11250 case RISCVISD::ADD_VL: 11251 case RISCVISD::MUL_VL: 11252 case RISCVISD::VWADD_W_VL: 11253 case RISCVISD::VWADDU_W_VL: 11254 return true; 11255 case RISCVISD::SUB_VL: 11256 case RISCVISD::VWSUB_W_VL: 11257 case RISCVISD::VWSUBU_W_VL: 11258 return false; 11259 default: 11260 llvm_unreachable("Unexpected opcode"); 11261 } 11262 } 11263 11264 /// Get a list of combine to try for folding extensions in \p Root. 11265 /// Note that each returned CombineToTry function doesn't actually modify 11266 /// anything. Instead they produce an optional CombineResult that if not None, 11267 /// need to be materialized for the combine to be applied. 11268 /// \see CombineResult::materialize. 11269 /// If the related CombineToTry function returns std::nullopt, that means the 11270 /// combine didn't match. 11271 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root); 11272 }; 11273 11274 /// Helper structure that holds all the necessary information to materialize a 11275 /// combine that does some extension folding. 11276 struct CombineResult { 11277 /// Opcode to be generated when materializing the combine. 11278 unsigned TargetOpcode; 11279 // No value means no extension is needed. If extension is needed, the value 11280 // indicates if it needs to be sign extended. 11281 std::optional<bool> SExtLHS; 11282 std::optional<bool> SExtRHS; 11283 /// Root of the combine. 11284 SDNode *Root; 11285 /// LHS of the TargetOpcode. 11286 NodeExtensionHelper LHS; 11287 /// RHS of the TargetOpcode. 11288 NodeExtensionHelper RHS; 11289 11290 CombineResult(unsigned TargetOpcode, SDNode *Root, 11291 const NodeExtensionHelper &LHS, std::optional<bool> SExtLHS, 11292 const NodeExtensionHelper &RHS, std::optional<bool> SExtRHS) 11293 : TargetOpcode(TargetOpcode), SExtLHS(SExtLHS), SExtRHS(SExtRHS), 11294 Root(Root), LHS(LHS), RHS(RHS) {} 11295 11296 /// Return a value that uses TargetOpcode and that can be used to replace 11297 /// Root. 11298 /// The actual replacement is *not* done in that method. 11299 SDValue materialize(SelectionDAG &DAG) const { 11300 SDValue Mask, VL, Merge; 11301 std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root); 11302 Merge = Root->getOperand(2); 11303 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), 11304 LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS), 11305 RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge, 11306 Mask, VL); 11307 } 11308 }; 11309 11310 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) 11311 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both 11312 /// are zext) and LHS and RHS can be folded into Root. 11313 /// AllowSExt and AllozZExt define which form `ext` can take in this pattern. 11314 /// 11315 /// \note If the pattern can match with both zext and sext, the returned 11316 /// CombineResult will feature the zext result. 11317 /// 11318 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 11319 /// can be used to apply the pattern. 11320 static std::optional<CombineResult> 11321 canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, 11322 const NodeExtensionHelper &RHS, bool AllowSExt, 11323 bool AllowZExt) { 11324 assert((AllowSExt || AllowZExt) && "Forgot to set what you want?"); 11325 if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) 11326 return std::nullopt; 11327 if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt) 11328 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( 11329 Root->getOpcode(), /*IsSExt=*/false), 11330 Root, LHS, /*SExtLHS=*/false, RHS, 11331 /*SExtRHS=*/false); 11332 if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt) 11333 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( 11334 Root->getOpcode(), /*IsSExt=*/true), 11335 Root, LHS, /*SExtLHS=*/true, RHS, 11336 /*SExtRHS=*/true); 11337 return std::nullopt; 11338 } 11339 11340 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) 11341 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both 11342 /// are zext) and LHS and RHS can be folded into Root. 11343 /// 11344 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 11345 /// can be used to apply the pattern. 11346 static std::optional<CombineResult> 11347 canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, 11348 const NodeExtensionHelper &RHS) { 11349 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, 11350 /*AllowZExt=*/true); 11351 } 11352 11353 /// Check if \p Root follows a pattern Root(LHS, ext(RHS)) 11354 /// 11355 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 11356 /// can be used to apply the pattern. 11357 static std::optional<CombineResult> 11358 canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, 11359 const NodeExtensionHelper &RHS) { 11360 if (!RHS.areVLAndMaskCompatible(Root)) 11361 return std::nullopt; 11362 11363 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar 11364 // sext/zext? 11365 // Control this behavior behind an option (AllowSplatInVW_W) for testing 11366 // purposes. 11367 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W)) 11368 return CombineResult( 11369 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/false), 11370 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/false); 11371 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W)) 11372 return CombineResult( 11373 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/true), 11374 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/true); 11375 return std::nullopt; 11376 } 11377 11378 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS)) 11379 /// 11380 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 11381 /// can be used to apply the pattern. 11382 static std::optional<CombineResult> 11383 canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, 11384 const NodeExtensionHelper &RHS) { 11385 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, 11386 /*AllowZExt=*/false); 11387 } 11388 11389 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) 11390 /// 11391 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 11392 /// can be used to apply the pattern. 11393 static std::optional<CombineResult> 11394 canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, 11395 const NodeExtensionHelper &RHS) { 11396 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false, 11397 /*AllowZExt=*/true); 11398 } 11399 11400 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) 11401 /// 11402 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 11403 /// can be used to apply the pattern. 11404 static std::optional<CombineResult> 11405 canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, 11406 const NodeExtensionHelper &RHS) { 11407 if (!LHS.SupportsSExt || !RHS.SupportsZExt) 11408 return std::nullopt; 11409 if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) 11410 return std::nullopt; 11411 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), 11412 Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false); 11413 } 11414 11415 SmallVector<NodeExtensionHelper::CombineToTry> 11416 NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { 11417 SmallVector<CombineToTry> Strategies; 11418 switch (Root->getOpcode()) { 11419 case RISCVISD::ADD_VL: 11420 case RISCVISD::SUB_VL: 11421 // add|sub -> vwadd(u)|vwsub(u) 11422 Strategies.push_back(canFoldToVWWithSameExtension); 11423 // add|sub -> vwadd(u)_w|vwsub(u)_w 11424 Strategies.push_back(canFoldToVW_W); 11425 break; 11426 case RISCVISD::MUL_VL: 11427 // mul -> vwmul(u) 11428 Strategies.push_back(canFoldToVWWithSameExtension); 11429 // mul -> vwmulsu 11430 Strategies.push_back(canFoldToVW_SU); 11431 break; 11432 case RISCVISD::VWADD_W_VL: 11433 case RISCVISD::VWSUB_W_VL: 11434 // vwadd_w|vwsub_w -> vwadd|vwsub 11435 Strategies.push_back(canFoldToVWWithSEXT); 11436 break; 11437 case RISCVISD::VWADDU_W_VL: 11438 case RISCVISD::VWSUBU_W_VL: 11439 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu 11440 Strategies.push_back(canFoldToVWWithZEXT); 11441 break; 11442 default: 11443 llvm_unreachable("Unexpected opcode"); 11444 } 11445 return Strategies; 11446 } 11447 } // End anonymous namespace. 11448 11449 /// Combine a binary operation to its equivalent VW or VW_W form. 11450 /// The supported combines are: 11451 /// add_vl -> vwadd(u) | vwadd(u)_w 11452 /// sub_vl -> vwsub(u) | vwsub(u)_w 11453 /// mul_vl -> vwmul(u) | vwmul_su 11454 /// vwadd_w(u) -> vwadd(u) 11455 /// vwub_w(u) -> vwadd(u) 11456 static SDValue 11457 combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 11458 SelectionDAG &DAG = DCI.DAG; 11459 11460 assert(NodeExtensionHelper::isSupportedRoot(N) && 11461 "Shouldn't have called this method"); 11462 SmallVector<SDNode *> Worklist; 11463 SmallSet<SDNode *, 8> Inserted; 11464 Worklist.push_back(N); 11465 Inserted.insert(N); 11466 SmallVector<CombineResult> CombinesToApply; 11467 11468 while (!Worklist.empty()) { 11469 SDNode *Root = Worklist.pop_back_val(); 11470 if (!NodeExtensionHelper::isSupportedRoot(Root)) 11471 return SDValue(); 11472 11473 NodeExtensionHelper LHS(N, 0, DAG); 11474 NodeExtensionHelper RHS(N, 1, DAG); 11475 auto AppendUsersIfNeeded = [&Worklist, 11476 &Inserted](const NodeExtensionHelper &Op) { 11477 if (Op.needToPromoteOtherUsers()) { 11478 for (SDNode *TheUse : Op.OrigOperand->uses()) { 11479 if (Inserted.insert(TheUse).second) 11480 Worklist.push_back(TheUse); 11481 } 11482 } 11483 }; 11484 11485 // Control the compile time by limiting the number of node we look at in 11486 // total. 11487 if (Inserted.size() > ExtensionMaxWebSize) 11488 return SDValue(); 11489 11490 SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies = 11491 NodeExtensionHelper::getSupportedFoldings(N); 11492 11493 assert(!FoldingStrategies.empty() && "Nothing to be folded"); 11494 bool Matched = false; 11495 for (int Attempt = 0; 11496 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched; 11497 ++Attempt) { 11498 11499 for (NodeExtensionHelper::CombineToTry FoldingStrategy : 11500 FoldingStrategies) { 11501 std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS); 11502 if (Res) { 11503 Matched = true; 11504 CombinesToApply.push_back(*Res); 11505 // All the inputs that are extended need to be folded, otherwise 11506 // we would be leaving the old input (since it is may still be used), 11507 // and the new one. 11508 if (Res->SExtLHS.has_value()) 11509 AppendUsersIfNeeded(LHS); 11510 if (Res->SExtRHS.has_value()) 11511 AppendUsersIfNeeded(RHS); 11512 break; 11513 } 11514 } 11515 std::swap(LHS, RHS); 11516 } 11517 // Right now we do an all or nothing approach. 11518 if (!Matched) 11519 return SDValue(); 11520 } 11521 // Store the value for the replacement of the input node separately. 11522 SDValue InputRootReplacement; 11523 // We do the RAUW after we materialize all the combines, because some replaced 11524 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently, 11525 // some of these nodes may appear in the NodeExtensionHelpers of some of the 11526 // yet-to-be-visited CombinesToApply roots. 11527 SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; 11528 ValuesToReplace.reserve(CombinesToApply.size()); 11529 for (CombineResult Res : CombinesToApply) { 11530 SDValue NewValue = Res.materialize(DAG); 11531 if (!InputRootReplacement) { 11532 assert(Res.Root == N && 11533 "First element is expected to be the current node"); 11534 InputRootReplacement = NewValue; 11535 } else { 11536 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue); 11537 } 11538 } 11539 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) { 11540 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second); 11541 DCI.AddToWorklist(OldNewValues.second.getNode()); 11542 } 11543 return InputRootReplacement; 11544 } 11545 11546 // Helper function for performMemPairCombine. 11547 // Try to combine the memory loads/stores LSNode1 and LSNode2 11548 // into a single memory pair operation. 11549 static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, 11550 LSBaseSDNode *LSNode2, SDValue BasePtr, 11551 uint64_t Imm) { 11552 SmallPtrSet<const SDNode *, 32> Visited; 11553 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2}; 11554 11555 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) || 11556 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist)) 11557 return SDValue(); 11558 11559 MachineFunction &MF = DAG.getMachineFunction(); 11560 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); 11561 11562 // The new operation has twice the width. 11563 MVT XLenVT = Subtarget.getXLenVT(); 11564 EVT MemVT = LSNode1->getMemoryVT(); 11565 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128; 11566 MachineMemOperand *MMO = LSNode1->getMemOperand(); 11567 MachineMemOperand *NewMMO = MF.getMachineMemOperand( 11568 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16); 11569 11570 if (LSNode1->getOpcode() == ISD::LOAD) { 11571 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType(); 11572 unsigned Opcode; 11573 if (MemVT == MVT::i32) 11574 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD; 11575 else 11576 Opcode = RISCVISD::TH_LDD; 11577 11578 SDValue Res = DAG.getMemIntrinsicNode( 11579 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}), 11580 {LSNode1->getChain(), BasePtr, 11581 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)}, 11582 NewMemVT, NewMMO); 11583 11584 SDValue Node1 = 11585 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1)); 11586 SDValue Node2 = 11587 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2)); 11588 11589 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode()); 11590 return Node1; 11591 } else { 11592 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD; 11593 11594 SDValue Res = DAG.getMemIntrinsicNode( 11595 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other), 11596 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1), 11597 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)}, 11598 NewMemVT, NewMMO); 11599 11600 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode()); 11601 return Res; 11602 } 11603 } 11604 11605 // Try to combine two adjacent loads/stores to a single pair instruction from 11606 // the XTHeadMemPair vendor extension. 11607 static SDValue performMemPairCombine(SDNode *N, 11608 TargetLowering::DAGCombinerInfo &DCI) { 11609 SelectionDAG &DAG = DCI.DAG; 11610 MachineFunction &MF = DAG.getMachineFunction(); 11611 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); 11612 11613 // Target does not support load/store pair. 11614 if (!Subtarget.hasVendorXTHeadMemPair()) 11615 return SDValue(); 11616 11617 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N); 11618 EVT MemVT = LSNode1->getMemoryVT(); 11619 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2; 11620 11621 // No volatile, indexed or atomic loads/stores. 11622 if (!LSNode1->isSimple() || LSNode1->isIndexed()) 11623 return SDValue(); 11624 11625 // Function to get a base + constant representation from a memory value. 11626 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> { 11627 if (Ptr->getOpcode() == ISD::ADD) 11628 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) 11629 return {Ptr->getOperand(0), C1->getZExtValue()}; 11630 return {Ptr, 0}; 11631 }; 11632 11633 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum)); 11634 11635 SDValue Chain = N->getOperand(0); 11636 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end(); 11637 UI != UE; ++UI) { 11638 SDUse &Use = UI.getUse(); 11639 if (Use.getUser() != N && Use.getResNo() == 0 && 11640 Use.getUser()->getOpcode() == N->getOpcode()) { 11641 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser()); 11642 11643 // No volatile, indexed or atomic loads/stores. 11644 if (!LSNode2->isSimple() || LSNode2->isIndexed()) 11645 continue; 11646 11647 // Check if LSNode1 and LSNode2 have the same type and extension. 11648 if (LSNode1->getOpcode() == ISD::LOAD) 11649 if (cast<LoadSDNode>(LSNode2)->getExtensionType() != 11650 cast<LoadSDNode>(LSNode1)->getExtensionType()) 11651 continue; 11652 11653 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT()) 11654 continue; 11655 11656 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum)); 11657 11658 // Check if the base pointer is the same for both instruction. 11659 if (Base1 != Base2) 11660 continue; 11661 11662 // Check if the offsets match the XTHeadMemPair encoding contraints. 11663 bool Valid = false; 11664 if (MemVT == MVT::i32) { 11665 // Check for adjacent i32 values and a 2-bit index. 11666 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1)) 11667 Valid = true; 11668 } else if (MemVT == MVT::i64) { 11669 // Check for adjacent i64 values and a 2-bit index. 11670 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1)) 11671 Valid = true; 11672 } 11673 11674 if (!Valid) 11675 continue; 11676 11677 // Try to combine. 11678 if (SDValue Res = 11679 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1)) 11680 return Res; 11681 } 11682 } 11683 11684 return SDValue(); 11685 } 11686 11687 // Fold 11688 // (fp_to_int (froundeven X)) -> fcvt X, rne 11689 // (fp_to_int (ftrunc X)) -> fcvt X, rtz 11690 // (fp_to_int (ffloor X)) -> fcvt X, rdn 11691 // (fp_to_int (fceil X)) -> fcvt X, rup 11692 // (fp_to_int (fround X)) -> fcvt X, rmm 11693 static SDValue performFP_TO_INTCombine(SDNode *N, 11694 TargetLowering::DAGCombinerInfo &DCI, 11695 const RISCVSubtarget &Subtarget) { 11696 SelectionDAG &DAG = DCI.DAG; 11697 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 11698 MVT XLenVT = Subtarget.getXLenVT(); 11699 11700 SDValue Src = N->getOperand(0); 11701 11702 // Don't do this for strict-fp Src. 11703 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) 11704 return SDValue(); 11705 11706 // Ensure the FP type is legal. 11707 if (!TLI.isTypeLegal(Src.getValueType())) 11708 return SDValue(); 11709 11710 // Don't do this for f16 with Zfhmin and not Zfh. 11711 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) 11712 return SDValue(); 11713 11714 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); 11715 // If the result is invalid, we didn't find a foldable instruction. 11716 // If the result is dynamic, then we found an frint which we don't yet 11717 // support. It will cause 7 to be written to the FRM CSR for vector. 11718 // FIXME: We could support this by using VFCVT_X_F_VL/VFCVT_XU_F_VL below. 11719 if (FRM == RISCVFPRndMode::Invalid || FRM == RISCVFPRndMode::DYN) 11720 return SDValue(); 11721 11722 SDLoc DL(N); 11723 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; 11724 EVT VT = N->getValueType(0); 11725 11726 if (VT.isVector() && TLI.isTypeLegal(VT)) { 11727 MVT SrcVT = Src.getSimpleValueType(); 11728 MVT SrcContainerVT = SrcVT; 11729 MVT ContainerVT = VT.getSimpleVT(); 11730 SDValue XVal = Src.getOperand(0); 11731 11732 // For widening and narrowing conversions we just combine it into a 11733 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They 11734 // end up getting lowered to their appropriate pseudo instructions based on 11735 // their operand types 11736 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 || 11737 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits()) 11738 return SDValue(); 11739 11740 // Make fixed-length vectors scalable first 11741 if (SrcVT.isFixedLengthVector()) { 11742 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 11743 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget); 11744 ContainerVT = 11745 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); 11746 } 11747 11748 auto [Mask, VL] = 11749 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget); 11750 11751 SDValue FpToInt; 11752 if (FRM == RISCVFPRndMode::RTZ) { 11753 // Use the dedicated trunc static rounding mode if we're truncating so we 11754 // don't need to generate calls to fsrmi/fsrm 11755 unsigned Opc = 11756 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; 11757 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); 11758 } else { 11759 unsigned Opc = 11760 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL; 11761 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, 11762 DAG.getTargetConstant(FRM, DL, XLenVT), VL); 11763 } 11764 11765 // If converted from fixed-length to scalable, convert back 11766 if (VT.isFixedLengthVector()) 11767 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget); 11768 11769 return FpToInt; 11770 } 11771 11772 // Only handle XLen or i32 types. Other types narrower than XLen will 11773 // eventually be legalized to XLenVT. 11774 if (VT != MVT::i32 && VT != XLenVT) 11775 return SDValue(); 11776 11777 unsigned Opc; 11778 if (VT == XLenVT) 11779 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 11780 else 11781 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 11782 11783 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0), 11784 DAG.getTargetConstant(FRM, DL, XLenVT)); 11785 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt); 11786 } 11787 11788 // Fold 11789 // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne)) 11790 // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz)) 11791 // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn)) 11792 // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup)) 11793 // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm)) 11794 static SDValue performFP_TO_INT_SATCombine(SDNode *N, 11795 TargetLowering::DAGCombinerInfo &DCI, 11796 const RISCVSubtarget &Subtarget) { 11797 SelectionDAG &DAG = DCI.DAG; 11798 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 11799 MVT XLenVT = Subtarget.getXLenVT(); 11800 11801 // Only handle XLen types. Other types narrower than XLen will eventually be 11802 // legalized to XLenVT. 11803 EVT DstVT = N->getValueType(0); 11804 if (DstVT != XLenVT) 11805 return SDValue(); 11806 11807 SDValue Src = N->getOperand(0); 11808 11809 // Don't do this for strict-fp Src. 11810 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) 11811 return SDValue(); 11812 11813 // Ensure the FP type is also legal. 11814 if (!TLI.isTypeLegal(Src.getValueType())) 11815 return SDValue(); 11816 11817 // Don't do this for f16 with Zfhmin and not Zfh. 11818 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) 11819 return SDValue(); 11820 11821 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 11822 11823 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); 11824 if (FRM == RISCVFPRndMode::Invalid) 11825 return SDValue(); 11826 11827 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT; 11828 11829 unsigned Opc; 11830 if (SatVT == DstVT) 11831 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 11832 else if (DstVT == MVT::i64 && SatVT == MVT::i32) 11833 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 11834 else 11835 return SDValue(); 11836 // FIXME: Support other SatVTs by clamping before or after the conversion. 11837 11838 Src = Src.getOperand(0); 11839 11840 SDLoc DL(N); 11841 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src, 11842 DAG.getTargetConstant(FRM, DL, XLenVT)); 11843 11844 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero 11845 // extend. 11846 if (Opc == RISCVISD::FCVT_WU_RV64) 11847 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); 11848 11849 // RISC-V FP-to-int conversions saturate to the destination register size, but 11850 // don't produce 0 for nan. 11851 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); 11852 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); 11853 } 11854 11855 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is 11856 // smaller than XLenVT. 11857 static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, 11858 const RISCVSubtarget &Subtarget) { 11859 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension"); 11860 11861 SDValue Src = N->getOperand(0); 11862 if (Src.getOpcode() != ISD::BSWAP) 11863 return SDValue(); 11864 11865 EVT VT = N->getValueType(0); 11866 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() || 11867 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits())) 11868 return SDValue(); 11869 11870 SDLoc DL(N); 11871 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0)); 11872 } 11873 11874 // Convert from one FMA opcode to another based on whether we are negating the 11875 // multiply result and/or the accumulator. 11876 // NOTE: Only supports RVV operations with VL. 11877 static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { 11878 // Negating the multiply result changes ADD<->SUB and toggles 'N'. 11879 if (NegMul) { 11880 // clang-format off 11881 switch (Opcode) { 11882 default: llvm_unreachable("Unexpected opcode"); 11883 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; 11884 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; 11885 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; 11886 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; 11887 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; 11888 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; 11889 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; 11890 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; 11891 } 11892 // clang-format on 11893 } 11894 11895 // Negating the accumulator changes ADD<->SUB. 11896 if (NegAcc) { 11897 // clang-format off 11898 switch (Opcode) { 11899 default: llvm_unreachable("Unexpected opcode"); 11900 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; 11901 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; 11902 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; 11903 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; 11904 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; 11905 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; 11906 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; 11907 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; 11908 } 11909 // clang-format on 11910 } 11911 11912 return Opcode; 11913 } 11914 11915 static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) { 11916 // Fold FNEG_VL into FMA opcodes. 11917 // The first operand of strict-fp is chain. 11918 unsigned Offset = N->isTargetStrictFPOpcode(); 11919 SDValue A = N->getOperand(0 + Offset); 11920 SDValue B = N->getOperand(1 + Offset); 11921 SDValue C = N->getOperand(2 + Offset); 11922 SDValue Mask = N->getOperand(3 + Offset); 11923 SDValue VL = N->getOperand(4 + Offset); 11924 11925 auto invertIfNegative = [&Mask, &VL](SDValue &V) { 11926 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask && 11927 V.getOperand(2) == VL) { 11928 // Return the negated input. 11929 V = V.getOperand(0); 11930 return true; 11931 } 11932 11933 return false; 11934 }; 11935 11936 bool NegA = invertIfNegative(A); 11937 bool NegB = invertIfNegative(B); 11938 bool NegC = invertIfNegative(C); 11939 11940 // If no operands are negated, we're done. 11941 if (!NegA && !NegB && !NegC) 11942 return SDValue(); 11943 11944 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC); 11945 if (N->isTargetStrictFPOpcode()) 11946 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(), 11947 {N->getOperand(0), A, B, C, Mask, VL}); 11948 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask, 11949 VL); 11950 } 11951 11952 static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG) { 11953 if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG)) 11954 return V; 11955 11956 // FIXME: Ignore strict opcodes for now. 11957 if (N->isTargetStrictFPOpcode()) 11958 return SDValue(); 11959 11960 // Try to form widening FMA. 11961 SDValue Op0 = N->getOperand(0); 11962 SDValue Op1 = N->getOperand(1); 11963 SDValue Mask = N->getOperand(3); 11964 SDValue VL = N->getOperand(4); 11965 11966 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL || 11967 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL) 11968 return SDValue(); 11969 11970 // TODO: Refactor to handle more complex cases similar to 11971 // combineBinOp_VLToVWBinOp_VL. 11972 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) && 11973 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0))) 11974 return SDValue(); 11975 11976 // Check the mask and VL are the same. 11977 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL || 11978 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL) 11979 return SDValue(); 11980 11981 unsigned NewOpc; 11982 switch (N->getOpcode()) { 11983 default: 11984 llvm_unreachable("Unexpected opcode"); 11985 case RISCVISD::VFMADD_VL: 11986 NewOpc = RISCVISD::VFWMADD_VL; 11987 break; 11988 case RISCVISD::VFNMSUB_VL: 11989 NewOpc = RISCVISD::VFWNMSUB_VL; 11990 break; 11991 case RISCVISD::VFNMADD_VL: 11992 NewOpc = RISCVISD::VFWNMADD_VL; 11993 break; 11994 case RISCVISD::VFMSUB_VL: 11995 NewOpc = RISCVISD::VFWMSUB_VL; 11996 break; 11997 } 11998 11999 Op0 = Op0.getOperand(0); 12000 Op1 = Op1.getOperand(0); 12001 12002 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1, 12003 N->getOperand(2), Mask, VL); 12004 } 12005 12006 static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG) { 12007 // FIXME: Ignore strict opcodes for now. 12008 assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode"); 12009 12010 // Try to form widening multiply. 12011 SDValue Op0 = N->getOperand(0); 12012 SDValue Op1 = N->getOperand(1); 12013 SDValue Merge = N->getOperand(2); 12014 SDValue Mask = N->getOperand(3); 12015 SDValue VL = N->getOperand(4); 12016 12017 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL || 12018 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL) 12019 return SDValue(); 12020 12021 // TODO: Refactor to handle more complex cases similar to 12022 // combineBinOp_VLToVWBinOp_VL. 12023 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) && 12024 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0))) 12025 return SDValue(); 12026 12027 // Check the mask and VL are the same. 12028 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL || 12029 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL) 12030 return SDValue(); 12031 12032 Op0 = Op0.getOperand(0); 12033 Op1 = Op1.getOperand(0); 12034 12035 return DAG.getNode(RISCVISD::VFWMUL_VL, SDLoc(N), N->getValueType(0), Op0, 12036 Op1, Merge, Mask, VL); 12037 } 12038 12039 static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG) { 12040 SDValue Op0 = N->getOperand(0); 12041 SDValue Op1 = N->getOperand(1); 12042 SDValue Merge = N->getOperand(2); 12043 SDValue Mask = N->getOperand(3); 12044 SDValue VL = N->getOperand(4); 12045 12046 bool IsAdd = N->getOpcode() == RISCVISD::FADD_VL; 12047 12048 // Look for foldable FP_EXTENDS. 12049 bool Op0IsExtend = 12050 Op0.getOpcode() == RISCVISD::FP_EXTEND_VL && 12051 (Op0.hasOneUse() || (Op0 == Op1 && Op0->hasNUsesOfValue(2, 0))); 12052 bool Op1IsExtend = 12053 (Op0 == Op1 && Op0IsExtend) || 12054 (Op1.getOpcode() == RISCVISD::FP_EXTEND_VL && Op1.hasOneUse()); 12055 12056 // Check the mask and VL. 12057 if (Op0IsExtend && (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL)) 12058 Op0IsExtend = false; 12059 if (Op1IsExtend && (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)) 12060 Op1IsExtend = false; 12061 12062 // Canonicalize. 12063 if (!Op1IsExtend) { 12064 // Sub requires at least operand 1 to be an extend. 12065 if (!IsAdd) 12066 return SDValue(); 12067 12068 // Add is commutable, if the other operand is foldable, swap them. 12069 if (!Op0IsExtend) 12070 return SDValue(); 12071 12072 std::swap(Op0, Op1); 12073 std::swap(Op0IsExtend, Op1IsExtend); 12074 } 12075 12076 // Op1 is a foldable extend. Op0 might be foldable. 12077 Op1 = Op1.getOperand(0); 12078 if (Op0IsExtend) 12079 Op0 = Op0.getOperand(0); 12080 12081 unsigned Opc; 12082 if (IsAdd) 12083 Opc = Op0IsExtend ? RISCVISD::VFWADD_VL : RISCVISD::VFWADD_W_VL; 12084 else 12085 Opc = Op0IsExtend ? RISCVISD::VFWSUB_VL : RISCVISD::VFWSUB_W_VL; 12086 12087 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op0, Op1, Merge, Mask, 12088 VL); 12089 } 12090 12091 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 12092 const RISCVSubtarget &Subtarget) { 12093 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode"); 12094 12095 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit()) 12096 return SDValue(); 12097 12098 if (!isa<ConstantSDNode>(N->getOperand(1))) 12099 return SDValue(); 12100 uint64_t ShAmt = N->getConstantOperandVal(1); 12101 if (ShAmt > 32) 12102 return SDValue(); 12103 12104 SDValue N0 = N->getOperand(0); 12105 12106 // Combine (sra (sext_inreg (shl X, C1), i32), C2) -> 12107 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of 12108 // SLLIW+SRAIW. SLLI+SRAI have compressed forms. 12109 if (ShAmt < 32 && 12110 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() && 12111 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 && 12112 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() && 12113 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { 12114 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1); 12115 if (LShAmt < 32) { 12116 SDLoc ShlDL(N0.getOperand(0)); 12117 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64, 12118 N0.getOperand(0).getOperand(0), 12119 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64)); 12120 SDLoc DL(N); 12121 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl, 12122 DAG.getConstant(ShAmt + 32, DL, MVT::i64)); 12123 } 12124 } 12125 12126 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C) 12127 // FIXME: Should this be a generic combine? There's a similar combine on X86. 12128 // 12129 // Also try these folds where an add or sub is in the middle. 12130 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C) 12131 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C) 12132 SDValue Shl; 12133 ConstantSDNode *AddC = nullptr; 12134 12135 // We might have an ADD or SUB between the SRA and SHL. 12136 bool IsAdd = N0.getOpcode() == ISD::ADD; 12137 if ((IsAdd || N0.getOpcode() == ISD::SUB)) { 12138 // Other operand needs to be a constant we can modify. 12139 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0)); 12140 if (!AddC) 12141 return SDValue(); 12142 12143 // AddC needs to have at least 32 trailing zeros. 12144 if (AddC->getAPIntValue().countr_zero() < 32) 12145 return SDValue(); 12146 12147 // All users should be a shift by constant less than or equal to 32. This 12148 // ensures we'll do this optimization for each of them to produce an 12149 // add/sub+sext_inreg they can all share. 12150 for (SDNode *U : N0->uses()) { 12151 if (U->getOpcode() != ISD::SRA || 12152 !isa<ConstantSDNode>(U->getOperand(1)) || 12153 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() > 32) 12154 return SDValue(); 12155 } 12156 12157 Shl = N0.getOperand(IsAdd ? 0 : 1); 12158 } else { 12159 // Not an ADD or SUB. 12160 Shl = N0; 12161 } 12162 12163 // Look for a shift left by 32. 12164 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) || 12165 Shl.getConstantOperandVal(1) != 32) 12166 return SDValue(); 12167 12168 // We if we didn't look through an add/sub, then the shl should have one use. 12169 // If we did look through an add/sub, the sext_inreg we create is free so 12170 // we're only creating 2 new instructions. It's enough to only remove the 12171 // original sra+add/sub. 12172 if (!AddC && !Shl.hasOneUse()) 12173 return SDValue(); 12174 12175 SDLoc DL(N); 12176 SDValue In = Shl.getOperand(0); 12177 12178 // If we looked through an ADD or SUB, we need to rebuild it with the shifted 12179 // constant. 12180 if (AddC) { 12181 SDValue ShiftedAddC = 12182 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64); 12183 if (IsAdd) 12184 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC); 12185 else 12186 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In); 12187 } 12188 12189 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In, 12190 DAG.getValueType(MVT::i32)); 12191 if (ShAmt == 32) 12192 return SExt; 12193 12194 return DAG.getNode( 12195 ISD::SHL, DL, MVT::i64, SExt, 12196 DAG.getConstant(32 - ShAmt, DL, MVT::i64)); 12197 } 12198 12199 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if 12200 // the result is used as the conditon of a br_cc or select_cc we can invert, 12201 // inverting the setcc is free, and Z is 0/1. Caller will invert the 12202 // br_cc/select_cc. 12203 static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) { 12204 bool IsAnd = Cond.getOpcode() == ISD::AND; 12205 if (!IsAnd && Cond.getOpcode() != ISD::OR) 12206 return SDValue(); 12207 12208 if (!Cond.hasOneUse()) 12209 return SDValue(); 12210 12211 SDValue Setcc = Cond.getOperand(0); 12212 SDValue Xor = Cond.getOperand(1); 12213 // Canonicalize setcc to LHS. 12214 if (Setcc.getOpcode() != ISD::SETCC) 12215 std::swap(Setcc, Xor); 12216 // LHS should be a setcc and RHS should be an xor. 12217 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() || 12218 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) 12219 return SDValue(); 12220 12221 // If the condition is an And, SimplifyDemandedBits may have changed 12222 // (xor Z, 1) to (not Z). 12223 SDValue Xor1 = Xor.getOperand(1); 12224 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1))) 12225 return SDValue(); 12226 12227 EVT VT = Cond.getValueType(); 12228 SDValue Xor0 = Xor.getOperand(0); 12229 12230 // The LHS of the xor needs to be 0/1. 12231 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); 12232 if (!DAG.MaskedValueIsZero(Xor0, Mask)) 12233 return SDValue(); 12234 12235 // We can only invert integer setccs. 12236 EVT SetCCOpVT = Setcc.getOperand(0).getValueType(); 12237 if (!SetCCOpVT.isScalarInteger()) 12238 return SDValue(); 12239 12240 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get(); 12241 if (ISD::isIntEqualitySetCC(CCVal)) { 12242 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT); 12243 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0), 12244 Setcc.getOperand(1), CCVal); 12245 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) { 12246 // Invert (setlt 0, X) by converting to (setlt X, 1). 12247 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1), 12248 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal); 12249 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) { 12250 // (setlt X, 1) by converting to (setlt 0, X). 12251 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, 12252 DAG.getConstant(0, SDLoc(Setcc), VT), 12253 Setcc.getOperand(0), CCVal); 12254 } else 12255 return SDValue(); 12256 12257 unsigned Opc = IsAnd ? ISD::OR : ISD::AND; 12258 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0)); 12259 } 12260 12261 // Perform common combines for BR_CC and SELECT_CC condtions. 12262 static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, 12263 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { 12264 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 12265 12266 // As far as arithmetic right shift always saves the sign, 12267 // shift can be omitted. 12268 // Fold setlt (sra X, N), 0 -> setlt X, 0 and 12269 // setge (sra X, N), 0 -> setge X, 0 12270 if (auto *RHSConst = dyn_cast<ConstantSDNode>(RHS.getNode())) { 12271 if ((CCVal == ISD::SETGE || CCVal == ISD::SETLT) && 12272 LHS.getOpcode() == ISD::SRA && RHSConst->isZero()) { 12273 LHS = LHS.getOperand(0); 12274 return true; 12275 } 12276 } 12277 12278 if (!ISD::isIntEqualitySetCC(CCVal)) 12279 return false; 12280 12281 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt) 12282 // Sometimes the setcc is introduced after br_cc/select_cc has been formed. 12283 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 12284 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 12285 // If we're looking for eq 0 instead of ne 0, we need to invert the 12286 // condition. 12287 bool Invert = CCVal == ISD::SETEQ; 12288 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 12289 if (Invert) 12290 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 12291 12292 RHS = LHS.getOperand(1); 12293 LHS = LHS.getOperand(0); 12294 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 12295 12296 CC = DAG.getCondCode(CCVal); 12297 return true; 12298 } 12299 12300 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne) 12301 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) { 12302 RHS = LHS.getOperand(1); 12303 LHS = LHS.getOperand(0); 12304 return true; 12305 } 12306 12307 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt) 12308 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() && 12309 LHS.getOperand(1).getOpcode() == ISD::Constant) { 12310 SDValue LHS0 = LHS.getOperand(0); 12311 if (LHS0.getOpcode() == ISD::AND && 12312 LHS0.getOperand(1).getOpcode() == ISD::Constant) { 12313 uint64_t Mask = LHS0.getConstantOperandVal(1); 12314 uint64_t ShAmt = LHS.getConstantOperandVal(1); 12315 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) { 12316 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; 12317 CC = DAG.getCondCode(CCVal); 12318 12319 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt; 12320 LHS = LHS0.getOperand(0); 12321 if (ShAmt != 0) 12322 LHS = 12323 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0), 12324 DAG.getConstant(ShAmt, DL, LHS.getValueType())); 12325 return true; 12326 } 12327 } 12328 } 12329 12330 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1. 12331 // This can occur when legalizing some floating point comparisons. 12332 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 12333 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 12334 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 12335 CC = DAG.getCondCode(CCVal); 12336 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 12337 return true; 12338 } 12339 12340 if (isNullConstant(RHS)) { 12341 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) { 12342 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 12343 CC = DAG.getCondCode(CCVal); 12344 LHS = NewCond; 12345 return true; 12346 } 12347 } 12348 12349 return false; 12350 } 12351 12352 // Fold 12353 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)). 12354 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)). 12355 // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)). 12356 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)). 12357 static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, 12358 SDValue TrueVal, SDValue FalseVal, 12359 bool Swapped) { 12360 bool Commutative = true; 12361 switch (TrueVal.getOpcode()) { 12362 default: 12363 return SDValue(); 12364 case ISD::SUB: 12365 Commutative = false; 12366 break; 12367 case ISD::ADD: 12368 case ISD::OR: 12369 case ISD::XOR: 12370 break; 12371 } 12372 12373 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal)) 12374 return SDValue(); 12375 12376 unsigned OpToFold; 12377 if (FalseVal == TrueVal.getOperand(0)) 12378 OpToFold = 0; 12379 else if (Commutative && FalseVal == TrueVal.getOperand(1)) 12380 OpToFold = 1; 12381 else 12382 return SDValue(); 12383 12384 EVT VT = N->getValueType(0); 12385 SDLoc DL(N); 12386 SDValue Zero = DAG.getConstant(0, DL, VT); 12387 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold); 12388 12389 if (Swapped) 12390 std::swap(OtherOp, Zero); 12391 SDValue NewSel = DAG.getSelect(DL, VT, N->getOperand(0), OtherOp, Zero); 12392 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel); 12393 } 12394 12395 // This tries to get rid of `select` and `icmp` that are being used to handle 12396 // `Targets` that do not support `cttz(0)`/`ctlz(0)`. 12397 static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) { 12398 SDValue Cond = N->getOperand(0); 12399 12400 // This represents either CTTZ or CTLZ instruction. 12401 SDValue CountZeroes; 12402 12403 SDValue ValOnZero; 12404 12405 if (Cond.getOpcode() != ISD::SETCC) 12406 return SDValue(); 12407 12408 if (!isNullConstant(Cond->getOperand(1))) 12409 return SDValue(); 12410 12411 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get(); 12412 if (CCVal == ISD::CondCode::SETEQ) { 12413 CountZeroes = N->getOperand(2); 12414 ValOnZero = N->getOperand(1); 12415 } else if (CCVal == ISD::CondCode::SETNE) { 12416 CountZeroes = N->getOperand(1); 12417 ValOnZero = N->getOperand(2); 12418 } else { 12419 return SDValue(); 12420 } 12421 12422 if (CountZeroes.getOpcode() == ISD::TRUNCATE || 12423 CountZeroes.getOpcode() == ISD::ZERO_EXTEND) 12424 CountZeroes = CountZeroes.getOperand(0); 12425 12426 if (CountZeroes.getOpcode() != ISD::CTTZ && 12427 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF && 12428 CountZeroes.getOpcode() != ISD::CTLZ && 12429 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF) 12430 return SDValue(); 12431 12432 if (!isNullConstant(ValOnZero)) 12433 return SDValue(); 12434 12435 SDValue CountZeroesArgument = CountZeroes->getOperand(0); 12436 if (Cond->getOperand(0) != CountZeroesArgument) 12437 return SDValue(); 12438 12439 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { 12440 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes), 12441 CountZeroes.getValueType(), CountZeroesArgument); 12442 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) { 12443 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes), 12444 CountZeroes.getValueType(), CountZeroesArgument); 12445 } 12446 12447 unsigned BitWidth = CountZeroes.getValueSizeInBits(); 12448 SDValue BitWidthMinusOne = 12449 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType()); 12450 12451 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(), 12452 CountZeroes, BitWidthMinusOne); 12453 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0)); 12454 } 12455 12456 static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, 12457 const RISCVSubtarget &Subtarget) { 12458 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG)) 12459 return Folded; 12460 12461 if (Subtarget.hasShortForwardBranchOpt()) 12462 return SDValue(); 12463 12464 SDValue TrueVal = N->getOperand(1); 12465 SDValue FalseVal = N->getOperand(2); 12466 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false)) 12467 return V; 12468 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true); 12469 } 12470 12471 // If we're concatenating a series of vector loads like 12472 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ... 12473 // Then we can turn this into a strided load by widening the vector elements 12474 // vlse32 p, stride=n 12475 static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, 12476 const RISCVSubtarget &Subtarget, 12477 const RISCVTargetLowering &TLI) { 12478 SDLoc DL(N); 12479 EVT VT = N->getValueType(0); 12480 12481 // Only perform this combine on legal MVTs. 12482 if (!TLI.isTypeLegal(VT)) 12483 return SDValue(); 12484 12485 // TODO: Potentially extend this to scalable vectors 12486 if (VT.isScalableVector()) 12487 return SDValue(); 12488 12489 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0)); 12490 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) || 12491 !SDValue(BaseLd, 0).hasOneUse()) 12492 return SDValue(); 12493 12494 EVT BaseLdVT = BaseLd->getValueType(0); 12495 SDValue BasePtr = BaseLd->getBasePtr(); 12496 12497 // Go through the loads and check that they're strided 12498 SDValue CurPtr = BasePtr; 12499 SDValue Stride; 12500 Align Align = BaseLd->getAlign(); 12501 12502 for (SDValue Op : N->ops().drop_front()) { 12503 auto *Ld = dyn_cast<LoadSDNode>(Op); 12504 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() || 12505 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) || 12506 Ld->getValueType(0) != BaseLdVT) 12507 return SDValue(); 12508 12509 SDValue Ptr = Ld->getBasePtr(); 12510 // Check that each load's pointer is (add CurPtr, Stride) 12511 if (Ptr.getOpcode() != ISD::ADD || Ptr.getOperand(0) != CurPtr) 12512 return SDValue(); 12513 SDValue Offset = Ptr.getOperand(1); 12514 if (!Stride) 12515 Stride = Offset; 12516 else if (Offset != Stride) 12517 return SDValue(); 12518 12519 // The common alignment is the most restrictive (smallest) of all the loads 12520 Align = std::min(Align, Ld->getAlign()); 12521 12522 CurPtr = Ptr; 12523 } 12524 12525 // A special case is if the stride is exactly the width of one of the loads, 12526 // in which case it's contiguous and can be combined into a regular vle 12527 // without changing the element size 12528 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride); 12529 ConstStride && 12530 ConstStride->getZExtValue() == BaseLdVT.getFixedSizeInBits() / 8) { 12531 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( 12532 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), 12533 VT.getStoreSize(), Align); 12534 // Can't do the combine if the load isn't naturally aligned with the element 12535 // type 12536 if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), 12537 DAG.getDataLayout(), VT, *MMO)) 12538 return SDValue(); 12539 12540 SDValue WideLoad = DAG.getLoad(VT, DL, BaseLd->getChain(), BasePtr, MMO); 12541 for (SDValue Ld : N->ops()) 12542 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), WideLoad); 12543 return WideLoad; 12544 } 12545 12546 // Get the widened scalar type, e.g. v4i8 -> i64 12547 unsigned WideScalarBitWidth = 12548 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements(); 12549 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth); 12550 12551 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64 12552 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands()); 12553 if (!TLI.isTypeLegal(WideVecVT)) 12554 return SDValue(); 12555 12556 // Check that the operation is legal 12557 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align)) 12558 return SDValue(); 12559 12560 MVT ContainerVT = TLI.getContainerForFixedLengthVector(WideVecVT); 12561 SDValue VL = 12562 getDefaultVLOps(WideVecVT, ContainerVT, DL, DAG, Subtarget).second; 12563 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 12564 SDValue IntID = 12565 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, Subtarget.getXLenVT()); 12566 SDValue Ops[] = {BaseLd->getChain(), 12567 IntID, 12568 DAG.getUNDEF(ContainerVT), 12569 BasePtr, 12570 Stride, 12571 VL}; 12572 12573 uint64_t MemSize; 12574 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride)) 12575 // total size = (elsize * n) + (stride - elsize) * (n-1) 12576 // = elsize + stride * (n-1) 12577 MemSize = WideScalarVT.getSizeInBits() + 12578 ConstStride->getSExtValue() * (N->getNumOperands() - 1); 12579 else 12580 // If Stride isn't constant, then we can't know how much it will load 12581 MemSize = MemoryLocation::UnknownSize; 12582 12583 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( 12584 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize, 12585 Align); 12586 12587 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, 12588 Ops, WideVecVT, MMO); 12589 for (SDValue Ld : N->ops()) 12590 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad); 12591 12592 // Note: Perform the bitcast before the convertFromScalableVector so we have 12593 // balanced pairs of convertFromScalable/convertToScalable 12594 SDValue Res = DAG.getBitcast( 12595 TLI.getContainerForFixedLengthVector(VT.getSimpleVT()), StridedLoad); 12596 return convertFromScalableVector(VT, Res, DAG, Subtarget); 12597 } 12598 12599 static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, 12600 const RISCVSubtarget &Subtarget) { 12601 assert(N->getOpcode() == RISCVISD::ADD_VL); 12602 SDValue Addend = N->getOperand(0); 12603 SDValue MulOp = N->getOperand(1); 12604 SDValue AddMergeOp = N->getOperand(2); 12605 12606 if (!AddMergeOp.isUndef()) 12607 return SDValue(); 12608 12609 auto IsVWMulOpc = [](unsigned Opc) { 12610 switch (Opc) { 12611 case RISCVISD::VWMUL_VL: 12612 case RISCVISD::VWMULU_VL: 12613 case RISCVISD::VWMULSU_VL: 12614 return true; 12615 default: 12616 return false; 12617 } 12618 }; 12619 12620 if (!IsVWMulOpc(MulOp.getOpcode())) 12621 std::swap(Addend, MulOp); 12622 12623 if (!IsVWMulOpc(MulOp.getOpcode())) 12624 return SDValue(); 12625 12626 SDValue MulMergeOp = MulOp.getOperand(2); 12627 12628 if (!MulMergeOp.isUndef()) 12629 return SDValue(); 12630 12631 SDValue AddMask = N->getOperand(3); 12632 SDValue AddVL = N->getOperand(4); 12633 SDValue MulMask = MulOp.getOperand(3); 12634 SDValue MulVL = MulOp.getOperand(4); 12635 12636 if (AddMask != MulMask || AddVL != MulVL) 12637 return SDValue(); 12638 12639 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL; 12640 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL, 12641 "Unexpected opcode after VWMACC_VL"); 12642 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL, 12643 "Unexpected opcode after VWMACC_VL!"); 12644 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL, 12645 "Unexpected opcode after VWMUL_VL!"); 12646 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL, 12647 "Unexpected opcode after VWMUL_VL!"); 12648 12649 SDLoc DL(N); 12650 EVT VT = N->getValueType(0); 12651 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask, 12652 AddVL}; 12653 return DAG.getNode(Opc, DL, VT, Ops); 12654 } 12655 12656 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 12657 DAGCombinerInfo &DCI) const { 12658 SelectionDAG &DAG = DCI.DAG; 12659 12660 // Helper to call SimplifyDemandedBits on an operand of N where only some low 12661 // bits are demanded. N will be added to the Worklist if it was not deleted. 12662 // Caller should return SDValue(N, 0) if this returns true. 12663 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) { 12664 SDValue Op = N->getOperand(OpNo); 12665 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits); 12666 if (!SimplifyDemandedBits(Op, Mask, DCI)) 12667 return false; 12668 12669 if (N->getOpcode() != ISD::DELETED_NODE) 12670 DCI.AddToWorklist(N); 12671 return true; 12672 }; 12673 12674 switch (N->getOpcode()) { 12675 default: 12676 break; 12677 case RISCVISD::SplitF64: { 12678 SDValue Op0 = N->getOperand(0); 12679 // If the input to SplitF64 is just BuildPairF64 then the operation is 12680 // redundant. Instead, use BuildPairF64's operands directly. 12681 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 12682 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 12683 12684 if (Op0->isUndef()) { 12685 SDValue Lo = DAG.getUNDEF(MVT::i32); 12686 SDValue Hi = DAG.getUNDEF(MVT::i32); 12687 return DCI.CombineTo(N, Lo, Hi); 12688 } 12689 12690 SDLoc DL(N); 12691 12692 // It's cheaper to materialise two 32-bit integers than to load a double 12693 // from the constant pool and transfer it to integer registers through the 12694 // stack. 12695 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 12696 APInt V = C->getValueAPF().bitcastToAPInt(); 12697 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 12698 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 12699 return DCI.CombineTo(N, Lo, Hi); 12700 } 12701 12702 // This is a target-specific version of a DAGCombine performed in 12703 // DAGCombiner::visitBITCAST. It performs the equivalent of: 12704 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 12705 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 12706 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 12707 !Op0.getNode()->hasOneUse()) 12708 break; 12709 SDValue NewSplitF64 = 12710 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 12711 Op0.getOperand(0)); 12712 SDValue Lo = NewSplitF64.getValue(0); 12713 SDValue Hi = NewSplitF64.getValue(1); 12714 APInt SignBit = APInt::getSignMask(32); 12715 if (Op0.getOpcode() == ISD::FNEG) { 12716 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 12717 DAG.getConstant(SignBit, DL, MVT::i32)); 12718 return DCI.CombineTo(N, Lo, NewHi); 12719 } 12720 assert(Op0.getOpcode() == ISD::FABS); 12721 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 12722 DAG.getConstant(~SignBit, DL, MVT::i32)); 12723 return DCI.CombineTo(N, Lo, NewHi); 12724 } 12725 case RISCVISD::SLLW: 12726 case RISCVISD::SRAW: 12727 case RISCVISD::SRLW: 12728 case RISCVISD::RORW: 12729 case RISCVISD::ROLW: { 12730 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 12731 if (SimplifyDemandedLowBitsHelper(0, 32) || 12732 SimplifyDemandedLowBitsHelper(1, 5)) 12733 return SDValue(N, 0); 12734 12735 break; 12736 } 12737 case RISCVISD::CLZW: 12738 case RISCVISD::CTZW: { 12739 // Only the lower 32 bits of the first operand are read 12740 if (SimplifyDemandedLowBitsHelper(0, 32)) 12741 return SDValue(N, 0); 12742 break; 12743 } 12744 case RISCVISD::FMV_W_X_RV64: { 12745 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the 12746 // conversion is unnecessary and can be replaced with the 12747 // FMV_X_ANYEXTW_RV64 operand. 12748 SDValue Op0 = N->getOperand(0); 12749 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64) 12750 return Op0.getOperand(0); 12751 break; 12752 } 12753 case RISCVISD::FMV_X_ANYEXTH: 12754 case RISCVISD::FMV_X_ANYEXTW_RV64: { 12755 SDLoc DL(N); 12756 SDValue Op0 = N->getOperand(0); 12757 MVT VT = N->getSimpleValueType(0); 12758 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 12759 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64 12760 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X. 12761 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 && 12762 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) || 12763 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH && 12764 Op0->getOpcode() == RISCVISD::FMV_H_X)) { 12765 assert(Op0.getOperand(0).getValueType() == VT && 12766 "Unexpected value type!"); 12767 return Op0.getOperand(0); 12768 } 12769 12770 // This is a target-specific version of a DAGCombine performed in 12771 // DAGCombiner::visitBITCAST. It performs the equivalent of: 12772 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 12773 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 12774 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 12775 !Op0.getNode()->hasOneUse()) 12776 break; 12777 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0)); 12778 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16; 12779 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits()); 12780 if (Op0.getOpcode() == ISD::FNEG) 12781 return DAG.getNode(ISD::XOR, DL, VT, NewFMV, 12782 DAG.getConstant(SignBit, DL, VT)); 12783 12784 assert(Op0.getOpcode() == ISD::FABS); 12785 return DAG.getNode(ISD::AND, DL, VT, NewFMV, 12786 DAG.getConstant(~SignBit, DL, VT)); 12787 } 12788 case ISD::ADD: 12789 return performADDCombine(N, DAG, Subtarget); 12790 case ISD::SUB: 12791 return performSUBCombine(N, DAG, Subtarget); 12792 case ISD::AND: 12793 return performANDCombine(N, DCI, Subtarget); 12794 case ISD::OR: 12795 return performORCombine(N, DCI, Subtarget); 12796 case ISD::XOR: 12797 return performXORCombine(N, DAG, Subtarget); 12798 case ISD::FADD: 12799 case ISD::UMAX: 12800 case ISD::UMIN: 12801 case ISD::SMAX: 12802 case ISD::SMIN: 12803 case ISD::FMAXNUM: 12804 case ISD::FMINNUM: 12805 return combineBinOpToReduce(N, DAG, Subtarget); 12806 case ISD::SETCC: 12807 return performSETCCCombine(N, DAG, Subtarget); 12808 case ISD::SIGN_EXTEND_INREG: 12809 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); 12810 case ISD::ZERO_EXTEND: 12811 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during 12812 // type legalization. This is safe because fp_to_uint produces poison if 12813 // it overflows. 12814 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) { 12815 SDValue Src = N->getOperand(0); 12816 if (Src.getOpcode() == ISD::FP_TO_UINT && 12817 isTypeLegal(Src.getOperand(0).getValueType())) 12818 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, 12819 Src.getOperand(0)); 12820 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() && 12821 isTypeLegal(Src.getOperand(1).getValueType())) { 12822 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); 12823 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs, 12824 Src.getOperand(0), Src.getOperand(1)); 12825 DCI.CombineTo(N, Res); 12826 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1)); 12827 DCI.recursivelyDeleteUnusedNodes(Src.getNode()); 12828 return SDValue(N, 0); // Return N so it doesn't get rechecked. 12829 } 12830 } 12831 return SDValue(); 12832 case ISD::TRUNCATE: 12833 return performTRUNCATECombine(N, DAG, Subtarget); 12834 case ISD::SELECT: 12835 return performSELECTCombine(N, DAG, Subtarget); 12836 case RISCVISD::CZERO_EQZ: 12837 case RISCVISD::CZERO_NEZ: 12838 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1. 12839 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1. 12840 if (N->getOperand(1).getOpcode() == ISD::XOR && 12841 isOneConstant(N->getOperand(1).getOperand(1))) { 12842 SDValue Cond = N->getOperand(1).getOperand(0); 12843 APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1); 12844 if (DAG.MaskedValueIsZero(Cond, Mask)) { 12845 unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ 12846 ? RISCVISD::CZERO_NEZ 12847 : RISCVISD::CZERO_EQZ; 12848 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), 12849 N->getOperand(0), Cond); 12850 } 12851 } 12852 return SDValue(); 12853 12854 case RISCVISD::SELECT_CC: { 12855 // Transform 12856 SDValue LHS = N->getOperand(0); 12857 SDValue RHS = N->getOperand(1); 12858 SDValue CC = N->getOperand(2); 12859 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 12860 SDValue TrueV = N->getOperand(3); 12861 SDValue FalseV = N->getOperand(4); 12862 SDLoc DL(N); 12863 EVT VT = N->getValueType(0); 12864 12865 // If the True and False values are the same, we don't need a select_cc. 12866 if (TrueV == FalseV) 12867 return TrueV; 12868 12869 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z 12870 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y 12871 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) && 12872 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) && 12873 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) { 12874 if (CCVal == ISD::CondCode::SETGE) 12875 std::swap(TrueV, FalseV); 12876 12877 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue(); 12878 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue(); 12879 // Only handle simm12, if it is not in this range, it can be considered as 12880 // register. 12881 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) && 12882 isInt<12>(TrueSImm - FalseSImm)) { 12883 SDValue SRA = 12884 DAG.getNode(ISD::SRA, DL, VT, LHS, 12885 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT)); 12886 SDValue AND = 12887 DAG.getNode(ISD::AND, DL, VT, SRA, 12888 DAG.getConstant(TrueSImm - FalseSImm, DL, VT)); 12889 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV); 12890 } 12891 12892 if (CCVal == ISD::CondCode::SETGE) 12893 std::swap(TrueV, FalseV); 12894 } 12895 12896 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) 12897 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 12898 {LHS, RHS, CC, TrueV, FalseV}); 12899 12900 if (!Subtarget.hasShortForwardBranchOpt()) { 12901 // (select c, -1, y) -> -c | y 12902 if (isAllOnesConstant(TrueV)) { 12903 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); 12904 SDValue Neg = DAG.getNegative(C, DL, VT); 12905 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); 12906 } 12907 // (select c, y, -1) -> -!c | y 12908 if (isAllOnesConstant(FalseV)) { 12909 SDValue C = 12910 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); 12911 SDValue Neg = DAG.getNegative(C, DL, VT); 12912 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); 12913 } 12914 12915 // (select c, 0, y) -> -!c & y 12916 if (isNullConstant(TrueV)) { 12917 SDValue C = 12918 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); 12919 SDValue Neg = DAG.getNegative(C, DL, VT); 12920 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); 12921 } 12922 // (select c, y, 0) -> -c & y 12923 if (isNullConstant(FalseV)) { 12924 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); 12925 SDValue Neg = DAG.getNegative(C, DL, VT); 12926 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); 12927 } 12928 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq)) 12929 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq)) 12930 if (((isOneConstant(FalseV) && LHS == TrueV && 12931 CCVal == ISD::CondCode::SETNE) || 12932 (isOneConstant(TrueV) && LHS == FalseV && 12933 CCVal == ISD::CondCode::SETEQ)) && 12934 isNullConstant(RHS)) { 12935 // freeze it to be safe. 12936 LHS = DAG.getFreeze(LHS); 12937 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ); 12938 return DAG.getNode(ISD::ADD, DL, VT, LHS, C); 12939 } 12940 } 12941 12942 return SDValue(); 12943 } 12944 case RISCVISD::BR_CC: { 12945 SDValue LHS = N->getOperand(1); 12946 SDValue RHS = N->getOperand(2); 12947 SDValue CC = N->getOperand(3); 12948 SDLoc DL(N); 12949 12950 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) 12951 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 12952 N->getOperand(0), LHS, RHS, CC, N->getOperand(4)); 12953 12954 return SDValue(); 12955 } 12956 case ISD::BITREVERSE: 12957 return performBITREVERSECombine(N, DAG, Subtarget); 12958 case ISD::FP_TO_SINT: 12959 case ISD::FP_TO_UINT: 12960 return performFP_TO_INTCombine(N, DCI, Subtarget); 12961 case ISD::FP_TO_SINT_SAT: 12962 case ISD::FP_TO_UINT_SAT: 12963 return performFP_TO_INT_SATCombine(N, DCI, Subtarget); 12964 case ISD::FCOPYSIGN: { 12965 EVT VT = N->getValueType(0); 12966 if (!VT.isVector()) 12967 break; 12968 // There is a form of VFSGNJ which injects the negated sign of its second 12969 // operand. Try and bubble any FNEG up after the extend/round to produce 12970 // this optimized pattern. Avoid modifying cases where FP_ROUND and 12971 // TRUNC=1. 12972 SDValue In2 = N->getOperand(1); 12973 // Avoid cases where the extend/round has multiple uses, as duplicating 12974 // those is typically more expensive than removing a fneg. 12975 if (!In2.hasOneUse()) 12976 break; 12977 if (In2.getOpcode() != ISD::FP_EXTEND && 12978 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 12979 break; 12980 In2 = In2.getOperand(0); 12981 if (In2.getOpcode() != ISD::FNEG) 12982 break; 12983 SDLoc DL(N); 12984 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 12985 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 12986 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 12987 } 12988 case ISD::MGATHER: 12989 case ISD::MSCATTER: 12990 case ISD::VP_GATHER: 12991 case ISD::VP_SCATTER: { 12992 if (!DCI.isBeforeLegalize()) 12993 break; 12994 SDValue Index, ScaleOp; 12995 bool IsIndexSigned = false; 12996 if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) { 12997 Index = VPGSN->getIndex(); 12998 ScaleOp = VPGSN->getScale(); 12999 IsIndexSigned = VPGSN->isIndexSigned(); 13000 assert(!VPGSN->isIndexScaled() && 13001 "Scaled gather/scatter should not be formed"); 13002 } else { 13003 const auto *MGSN = cast<MaskedGatherScatterSDNode>(N); 13004 Index = MGSN->getIndex(); 13005 ScaleOp = MGSN->getScale(); 13006 IsIndexSigned = MGSN->isIndexSigned(); 13007 assert(!MGSN->isIndexScaled() && 13008 "Scaled gather/scatter should not be formed"); 13009 13010 } 13011 EVT IndexVT = Index.getValueType(); 13012 MVT XLenVT = Subtarget.getXLenVT(); 13013 // RISC-V indexed loads only support the "unsigned unscaled" addressing 13014 // mode, so anything else must be manually legalized. 13015 bool NeedsIdxLegalization = 13016 (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT)); 13017 if (!NeedsIdxLegalization) 13018 break; 13019 13020 SDLoc DL(N); 13021 13022 // Any index legalization should first promote to XLenVT, so we don't lose 13023 // bits when scaling. This may create an illegal index type so we let 13024 // LLVM's legalization take care of the splitting. 13025 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet. 13026 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { 13027 IndexVT = IndexVT.changeVectorElementType(XLenVT); 13028 Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, 13029 DL, IndexVT, Index); 13030 } 13031 13032 ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED; 13033 if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N)) 13034 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, 13035 {VPGN->getChain(), VPGN->getBasePtr(), Index, 13036 ScaleOp, VPGN->getMask(), 13037 VPGN->getVectorLength()}, 13038 VPGN->getMemOperand(), NewIndexTy); 13039 if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N)) 13040 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, 13041 {VPSN->getChain(), VPSN->getValue(), 13042 VPSN->getBasePtr(), Index, ScaleOp, 13043 VPSN->getMask(), VPSN->getVectorLength()}, 13044 VPSN->getMemOperand(), NewIndexTy); 13045 if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) 13046 return DAG.getMaskedGather( 13047 N->getVTList(), MGN->getMemoryVT(), DL, 13048 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), 13049 MGN->getBasePtr(), Index, ScaleOp}, 13050 MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType()); 13051 const auto *MSN = cast<MaskedScatterSDNode>(N); 13052 return DAG.getMaskedScatter( 13053 N->getVTList(), MSN->getMemoryVT(), DL, 13054 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), 13055 Index, ScaleOp}, 13056 MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore()); 13057 } 13058 case RISCVISD::SRA_VL: 13059 case RISCVISD::SRL_VL: 13060 case RISCVISD::SHL_VL: { 13061 SDValue ShAmt = N->getOperand(1); 13062 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { 13063 // We don't need the upper 32 bits of a 64-bit element for a shift amount. 13064 SDLoc DL(N); 13065 SDValue VL = N->getOperand(3); 13066 EVT VT = N->getValueType(0); 13067 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 13068 ShAmt.getOperand(1), VL); 13069 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt, 13070 N->getOperand(2), N->getOperand(3), N->getOperand(4)); 13071 } 13072 break; 13073 } 13074 case ISD::SRA: 13075 if (SDValue V = performSRACombine(N, DAG, Subtarget)) 13076 return V; 13077 [[fallthrough]]; 13078 case ISD::SRL: 13079 case ISD::SHL: { 13080 SDValue ShAmt = N->getOperand(1); 13081 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { 13082 // We don't need the upper 32 bits of a 64-bit element for a shift amount. 13083 SDLoc DL(N); 13084 EVT VT = N->getValueType(0); 13085 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 13086 ShAmt.getOperand(1), 13087 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT())); 13088 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt); 13089 } 13090 break; 13091 } 13092 case RISCVISD::ADD_VL: 13093 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI)) 13094 return V; 13095 return combineToVWMACC(N, DAG, Subtarget); 13096 case RISCVISD::SUB_VL: 13097 case RISCVISD::VWADD_W_VL: 13098 case RISCVISD::VWADDU_W_VL: 13099 case RISCVISD::VWSUB_W_VL: 13100 case RISCVISD::VWSUBU_W_VL: 13101 case RISCVISD::MUL_VL: 13102 return combineBinOp_VLToVWBinOp_VL(N, DCI); 13103 case RISCVISD::VFMADD_VL: 13104 case RISCVISD::VFNMADD_VL: 13105 case RISCVISD::VFMSUB_VL: 13106 case RISCVISD::VFNMSUB_VL: 13107 case RISCVISD::STRICT_VFMADD_VL: 13108 case RISCVISD::STRICT_VFNMADD_VL: 13109 case RISCVISD::STRICT_VFMSUB_VL: 13110 case RISCVISD::STRICT_VFNMSUB_VL: 13111 return performVFMADD_VLCombine(N, DAG); 13112 case RISCVISD::FMUL_VL: 13113 return performVFMUL_VLCombine(N, DAG); 13114 case RISCVISD::FADD_VL: 13115 case RISCVISD::FSUB_VL: 13116 return performFADDSUB_VLCombine(N, DAG); 13117 case ISD::LOAD: 13118 case ISD::STORE: { 13119 if (DCI.isAfterLegalizeDAG()) 13120 if (SDValue V = performMemPairCombine(N, DCI)) 13121 return V; 13122 13123 if (N->getOpcode() != ISD::STORE) 13124 break; 13125 13126 auto *Store = cast<StoreSDNode>(N); 13127 SDValue Chain = Store->getChain(); 13128 EVT MemVT = Store->getMemoryVT(); 13129 SDValue Val = Store->getValue(); 13130 SDLoc DL(N); 13131 13132 bool IsScalarizable = 13133 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) && 13134 Store->isSimple() && 13135 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) && 13136 isPowerOf2_64(MemVT.getSizeInBits()) && 13137 MemVT.getSizeInBits() <= Subtarget.getXLen(); 13138 13139 // If sufficiently aligned we can scalarize stores of constant vectors of 13140 // any power-of-two size up to XLen bits, provided that they aren't too 13141 // expensive to materialize. 13142 // vsetivli zero, 2, e8, m1, ta, ma 13143 // vmv.v.i v8, 4 13144 // vse64.v v8, (a0) 13145 // -> 13146 // li a1, 1028 13147 // sh a1, 0(a0) 13148 if (DCI.isBeforeLegalize() && IsScalarizable && 13149 ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) { 13150 // Get the constant vector bits 13151 APInt NewC(Val.getValueSizeInBits(), 0); 13152 for (unsigned i = 0; i < Val.getNumOperands(); i++) { 13153 if (Val.getOperand(i).isUndef()) 13154 continue; 13155 NewC.insertBits(Val.getConstantOperandAPInt(i), 13156 i * Val.getScalarValueSizeInBits()); 13157 } 13158 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); 13159 13160 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), 13161 Subtarget.getFeatureBits(), true) <= 2 && 13162 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 13163 NewVT, *Store->getMemOperand())) { 13164 SDValue NewV = DAG.getConstant(NewC, DL, NewVT); 13165 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(), 13166 Store->getPointerInfo(), Store->getOriginalAlign(), 13167 Store->getMemOperand()->getFlags()); 13168 } 13169 } 13170 13171 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g. 13172 // vsetivli zero, 2, e16, m1, ta, ma 13173 // vle16.v v8, (a0) 13174 // vse16.v v8, (a1) 13175 if (auto *L = dyn_cast<LoadSDNode>(Val); 13176 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() && 13177 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) && 13178 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) && 13179 L->getMemoryVT() == MemVT) { 13180 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); 13181 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 13182 NewVT, *Store->getMemOperand()) && 13183 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 13184 NewVT, *L->getMemOperand())) { 13185 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(), 13186 L->getPointerInfo(), L->getOriginalAlign(), 13187 L->getMemOperand()->getFlags()); 13188 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(), 13189 Store->getPointerInfo(), Store->getOriginalAlign(), 13190 Store->getMemOperand()->getFlags()); 13191 } 13192 } 13193 13194 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1. 13195 // vfmv.f.s is represented as extract element from 0. Match it late to avoid 13196 // any illegal types. 13197 if (Val.getOpcode() == RISCVISD::VMV_X_S || 13198 (DCI.isAfterLegalizeDAG() && 13199 Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 13200 isNullConstant(Val.getOperand(1)))) { 13201 SDValue Src = Val.getOperand(0); 13202 MVT VecVT = Src.getSimpleValueType(); 13203 // VecVT should be scalable and memory VT should match the element type. 13204 if (VecVT.isScalableVector() && 13205 MemVT == VecVT.getVectorElementType()) { 13206 SDLoc DL(N); 13207 MVT MaskVT = getMaskTypeFor(VecVT); 13208 return DAG.getStoreVP( 13209 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(), 13210 DAG.getConstant(1, DL, MaskVT), 13211 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT, 13212 Store->getMemOperand(), Store->getAddressingMode(), 13213 Store->isTruncatingStore(), /*IsCompress*/ false); 13214 } 13215 } 13216 13217 break; 13218 } 13219 case ISD::SPLAT_VECTOR: { 13220 EVT VT = N->getValueType(0); 13221 // Only perform this combine on legal MVT types. 13222 if (!isTypeLegal(VT)) 13223 break; 13224 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N, 13225 DAG, Subtarget)) 13226 return Gather; 13227 break; 13228 } 13229 case ISD::CONCAT_VECTORS: 13230 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this)) 13231 return V; 13232 break; 13233 case RISCVISD::VMV_V_X_VL: { 13234 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the 13235 // scalar input. 13236 unsigned ScalarSize = N->getOperand(1).getValueSizeInBits(); 13237 unsigned EltWidth = N->getValueType(0).getScalarSizeInBits(); 13238 if (ScalarSize > EltWidth && N->getOperand(0).isUndef()) 13239 if (SimplifyDemandedLowBitsHelper(1, EltWidth)) 13240 return SDValue(N, 0); 13241 13242 break; 13243 } 13244 case RISCVISD::VFMV_S_F_VL: { 13245 SDValue Src = N->getOperand(1); 13246 // Try to remove vector->scalar->vector if the scalar->vector is inserting 13247 // into an undef vector. 13248 // TODO: Could use a vslide or vmv.v.v for non-undef. 13249 if (N->getOperand(0).isUndef() && 13250 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 13251 isNullConstant(Src.getOperand(1)) && 13252 Src.getOperand(0).getValueType().isScalableVector()) { 13253 EVT VT = N->getValueType(0); 13254 EVT SrcVT = Src.getOperand(0).getValueType(); 13255 assert(SrcVT.getVectorElementType() == VT.getVectorElementType()); 13256 // Widths match, just return the original vector. 13257 if (SrcVT == VT) 13258 return Src.getOperand(0); 13259 // TODO: Use insert_subvector/extract_subvector to change widen/narrow? 13260 } 13261 break; 13262 } 13263 case ISD::INTRINSIC_VOID: 13264 case ISD::INTRINSIC_W_CHAIN: 13265 case ISD::INTRINSIC_WO_CHAIN: { 13266 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; 13267 unsigned IntNo = N->getConstantOperandVal(IntOpNo); 13268 switch (IntNo) { 13269 // By default we do not combine any intrinsic. 13270 default: 13271 return SDValue(); 13272 case Intrinsic::riscv_vcpop: 13273 case Intrinsic::riscv_vcpop_mask: 13274 case Intrinsic::riscv_vfirst: 13275 case Intrinsic::riscv_vfirst_mask: { 13276 SDValue VL = N->getOperand(2); 13277 if (IntNo == Intrinsic::riscv_vcpop_mask || 13278 IntNo == Intrinsic::riscv_vfirst_mask) 13279 VL = N->getOperand(3); 13280 if (!isNullConstant(VL)) 13281 return SDValue(); 13282 // If VL is 0, vcpop -> li 0, vfirst -> li -1. 13283 SDLoc DL(N); 13284 EVT VT = N->getValueType(0); 13285 if (IntNo == Intrinsic::riscv_vfirst || 13286 IntNo == Intrinsic::riscv_vfirst_mask) 13287 return DAG.getConstant(-1, DL, VT); 13288 return DAG.getConstant(0, DL, VT); 13289 } 13290 case Intrinsic::riscv_vloxei: 13291 case Intrinsic::riscv_vloxei_mask: 13292 case Intrinsic::riscv_vluxei: 13293 case Intrinsic::riscv_vluxei_mask: 13294 case Intrinsic::riscv_vsoxei: 13295 case Intrinsic::riscv_vsoxei_mask: 13296 case Intrinsic::riscv_vsuxei: 13297 case Intrinsic::riscv_vsuxei_mask: 13298 if (SDValue V = narrowIndex(N->getOperand(4), DAG)) { 13299 SmallVector<SDValue, 8> Ops(N->ops()); 13300 Ops[4] = V; 13301 const auto *MemSD = cast<MemIntrinsicSDNode>(N); 13302 return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(), 13303 Ops, MemSD->getMemoryVT(), 13304 MemSD->getMemOperand()); 13305 } 13306 return SDValue(); 13307 } 13308 } 13309 case ISD::BITCAST: { 13310 assert(Subtarget.useRVVForFixedLengthVectors()); 13311 SDValue N0 = N->getOperand(0); 13312 EVT VT = N->getValueType(0); 13313 EVT SrcVT = N0.getValueType(); 13314 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer 13315 // type, widen both sides to avoid a trip through memory. 13316 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) && 13317 VT.isScalarInteger()) { 13318 unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); 13319 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT)); 13320 Ops[0] = N0; 13321 SDLoc DL(N); 13322 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops); 13323 N0 = DAG.getBitcast(MVT::i8, N0); 13324 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0); 13325 } 13326 13327 return SDValue(); 13328 } 13329 } 13330 13331 return SDValue(); 13332 } 13333 13334 bool RISCVTargetLowering::shouldTransformSignedTruncationCheck( 13335 EVT XVT, unsigned KeptBits) const { 13336 // For vectors, we don't have a preference.. 13337 if (XVT.isVector()) 13338 return false; 13339 13340 if (XVT != MVT::i32 && XVT != MVT::i64) 13341 return false; 13342 13343 // We can use sext.w for RV64 or an srai 31 on RV32. 13344 if (KeptBits == 32 || KeptBits == 64) 13345 return true; 13346 13347 // With Zbb we can use sext.h/sext.b. 13348 return Subtarget.hasStdExtZbb() && 13349 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) || 13350 KeptBits == 16); 13351 } 13352 13353 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 13354 const SDNode *N, CombineLevel Level) const { 13355 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || 13356 N->getOpcode() == ISD::SRL) && 13357 "Expected shift op"); 13358 13359 // The following folds are only desirable if `(OP _, c1 << c2)` can be 13360 // materialised in fewer instructions than `(OP _, c1)`: 13361 // 13362 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 13363 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 13364 SDValue N0 = N->getOperand(0); 13365 EVT Ty = N0.getValueType(); 13366 if (Ty.isScalarInteger() && 13367 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 13368 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 13369 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 13370 if (C1 && C2) { 13371 const APInt &C1Int = C1->getAPIntValue(); 13372 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 13373 13374 // We can materialise `c1 << c2` into an add immediate, so it's "free", 13375 // and the combine should happen, to potentially allow further combines 13376 // later. 13377 if (ShiftedC1Int.getSignificantBits() <= 64 && 13378 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 13379 return true; 13380 13381 // We can materialise `c1` in an add immediate, so it's "free", and the 13382 // combine should be prevented. 13383 if (C1Int.getSignificantBits() <= 64 && 13384 isLegalAddImmediate(C1Int.getSExtValue())) 13385 return false; 13386 13387 // Neither constant will fit into an immediate, so find materialisation 13388 // costs. 13389 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 13390 Subtarget.getFeatureBits(), 13391 /*CompressionCost*/true); 13392 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 13393 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(), 13394 /*CompressionCost*/true); 13395 13396 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 13397 // combine should be prevented. 13398 if (C1Cost < ShiftedC1Cost) 13399 return false; 13400 } 13401 } 13402 return true; 13403 } 13404 13405 bool RISCVTargetLowering::targetShrinkDemandedConstant( 13406 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 13407 TargetLoweringOpt &TLO) const { 13408 // Delay this optimization as late as possible. 13409 if (!TLO.LegalOps) 13410 return false; 13411 13412 EVT VT = Op.getValueType(); 13413 if (VT.isVector()) 13414 return false; 13415 13416 unsigned Opcode = Op.getOpcode(); 13417 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR) 13418 return false; 13419 13420 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 13421 if (!C) 13422 return false; 13423 13424 const APInt &Mask = C->getAPIntValue(); 13425 13426 // Clear all non-demanded bits initially. 13427 APInt ShrunkMask = Mask & DemandedBits; 13428 13429 // Try to make a smaller immediate by setting undemanded bits. 13430 13431 APInt ExpandedMask = Mask | ~DemandedBits; 13432 13433 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool { 13434 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask); 13435 }; 13436 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool { 13437 if (NewMask == Mask) 13438 return true; 13439 SDLoc DL(Op); 13440 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType()); 13441 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 13442 Op.getOperand(0), NewC); 13443 return TLO.CombineTo(Op, NewOp); 13444 }; 13445 13446 // If the shrunk mask fits in sign extended 12 bits, let the target 13447 // independent code apply it. 13448 if (ShrunkMask.isSignedIntN(12)) 13449 return false; 13450 13451 // And has a few special cases for zext. 13452 if (Opcode == ISD::AND) { 13453 // Preserve (and X, 0xffff), if zext.h exists use zext.h, 13454 // otherwise use SLLI + SRLI. 13455 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff); 13456 if (IsLegalMask(NewMask)) 13457 return UseMask(NewMask); 13458 13459 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. 13460 if (VT == MVT::i64) { 13461 APInt NewMask = APInt(64, 0xffffffff); 13462 if (IsLegalMask(NewMask)) 13463 return UseMask(NewMask); 13464 } 13465 } 13466 13467 // For the remaining optimizations, we need to be able to make a negative 13468 // number through a combination of mask and undemanded bits. 13469 if (!ExpandedMask.isNegative()) 13470 return false; 13471 13472 // What is the fewest number of bits we need to represent the negative number. 13473 unsigned MinSignedBits = ExpandedMask.getSignificantBits(); 13474 13475 // Try to make a 12 bit negative immediate. If that fails try to make a 32 13476 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 13477 // If we can't create a simm12, we shouldn't change opaque constants. 13478 APInt NewMask = ShrunkMask; 13479 if (MinSignedBits <= 12) 13480 NewMask.setBitsFrom(11); 13481 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 13482 NewMask.setBitsFrom(31); 13483 else 13484 return false; 13485 13486 // Check that our new mask is a subset of the demanded mask. 13487 assert(IsLegalMask(NewMask)); 13488 return UseMask(NewMask); 13489 } 13490 13491 static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) { 13492 static const uint64_t GREVMasks[] = { 13493 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 13494 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 13495 13496 for (unsigned Stage = 0; Stage != 6; ++Stage) { 13497 unsigned Shift = 1 << Stage; 13498 if (ShAmt & Shift) { 13499 uint64_t Mask = GREVMasks[Stage]; 13500 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask); 13501 if (IsGORC) 13502 Res |= x; 13503 x = Res; 13504 } 13505 } 13506 13507 return x; 13508 } 13509 13510 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 13511 KnownBits &Known, 13512 const APInt &DemandedElts, 13513 const SelectionDAG &DAG, 13514 unsigned Depth) const { 13515 unsigned BitWidth = Known.getBitWidth(); 13516 unsigned Opc = Op.getOpcode(); 13517 assert((Opc >= ISD::BUILTIN_OP_END || 13518 Opc == ISD::INTRINSIC_WO_CHAIN || 13519 Opc == ISD::INTRINSIC_W_CHAIN || 13520 Opc == ISD::INTRINSIC_VOID) && 13521 "Should use MaskedValueIsZero if you don't know whether Op" 13522 " is a target node!"); 13523 13524 Known.resetAll(); 13525 switch (Opc) { 13526 default: break; 13527 case RISCVISD::SELECT_CC: { 13528 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1); 13529 // If we don't know any bits, early out. 13530 if (Known.isUnknown()) 13531 break; 13532 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1); 13533 13534 // Only known if known in both the LHS and RHS. 13535 Known = Known.intersectWith(Known2); 13536 break; 13537 } 13538 case RISCVISD::CZERO_EQZ: 13539 case RISCVISD::CZERO_NEZ: 13540 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 13541 // Result is either all zero or operand 0. We can propagate zeros, but not 13542 // ones. 13543 Known.One.clearAllBits(); 13544 break; 13545 case RISCVISD::REMUW: { 13546 KnownBits Known2; 13547 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 13548 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 13549 // We only care about the lower 32 bits. 13550 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 13551 // Restore the original width by sign extending. 13552 Known = Known.sext(BitWidth); 13553 break; 13554 } 13555 case RISCVISD::DIVUW: { 13556 KnownBits Known2; 13557 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 13558 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 13559 // We only care about the lower 32 bits. 13560 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 13561 // Restore the original width by sign extending. 13562 Known = Known.sext(BitWidth); 13563 break; 13564 } 13565 case RISCVISD::CTZW: { 13566 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 13567 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros(); 13568 unsigned LowBits = llvm::bit_width(PossibleTZ); 13569 Known.Zero.setBitsFrom(LowBits); 13570 break; 13571 } 13572 case RISCVISD::CLZW: { 13573 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 13574 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros(); 13575 unsigned LowBits = llvm::bit_width(PossibleLZ); 13576 Known.Zero.setBitsFrom(LowBits); 13577 break; 13578 } 13579 case RISCVISD::BREV8: 13580 case RISCVISD::ORC_B: { 13581 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a 13582 // control value of 7 is equivalent to brev8 and orc.b. 13583 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 13584 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B; 13585 // To compute zeros, we need to invert the value and invert it back after. 13586 Known.Zero = 13587 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC); 13588 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC); 13589 break; 13590 } 13591 case RISCVISD::READ_VLENB: { 13592 // We can use the minimum and maximum VLEN values to bound VLENB. We 13593 // know VLEN must be a power of two. 13594 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8; 13595 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8; 13596 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?"); 13597 Known.Zero.setLowBits(Log2_32(MinVLenB)); 13598 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1); 13599 if (MaxVLenB == MinVLenB) 13600 Known.One.setBit(Log2_32(MinVLenB)); 13601 break; 13602 } 13603 case RISCVISD::FPCLASS: { 13604 // fclass will only set one of the low 10 bits. 13605 Known.Zero.setBitsFrom(10); 13606 break; 13607 } 13608 case ISD::INTRINSIC_W_CHAIN: 13609 case ISD::INTRINSIC_WO_CHAIN: { 13610 unsigned IntNo = 13611 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1); 13612 switch (IntNo) { 13613 default: 13614 // We can't do anything for most intrinsics. 13615 break; 13616 case Intrinsic::riscv_vsetvli: 13617 case Intrinsic::riscv_vsetvlimax: 13618 // Assume that VL output is >= 65536. 13619 // TODO: Take SEW and LMUL into account. 13620 if (BitWidth > 17) 13621 Known.Zero.setBitsFrom(17); 13622 break; 13623 } 13624 break; 13625 } 13626 } 13627 } 13628 13629 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 13630 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 13631 unsigned Depth) const { 13632 switch (Op.getOpcode()) { 13633 default: 13634 break; 13635 case RISCVISD::SELECT_CC: { 13636 unsigned Tmp = 13637 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1); 13638 if (Tmp == 1) return 1; // Early out. 13639 unsigned Tmp2 = 13640 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1); 13641 return std::min(Tmp, Tmp2); 13642 } 13643 case RISCVISD::CZERO_EQZ: 13644 case RISCVISD::CZERO_NEZ: 13645 // Output is either all zero or operand 0. We can propagate sign bit count 13646 // from operand 0. 13647 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 13648 case RISCVISD::ABSW: { 13649 // We expand this at isel to negw+max. The result will have 33 sign bits 13650 // if the input has at least 33 sign bits. 13651 unsigned Tmp = 13652 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 13653 if (Tmp < 33) return 1; 13654 return 33; 13655 } 13656 case RISCVISD::SLLW: 13657 case RISCVISD::SRAW: 13658 case RISCVISD::SRLW: 13659 case RISCVISD::DIVW: 13660 case RISCVISD::DIVUW: 13661 case RISCVISD::REMUW: 13662 case RISCVISD::ROLW: 13663 case RISCVISD::RORW: 13664 case RISCVISD::FCVT_W_RV64: 13665 case RISCVISD::FCVT_WU_RV64: 13666 case RISCVISD::STRICT_FCVT_W_RV64: 13667 case RISCVISD::STRICT_FCVT_WU_RV64: 13668 // TODO: As the result is sign-extended, this is conservatively correct. A 13669 // more precise answer could be calculated for SRAW depending on known 13670 // bits in the shift amount. 13671 return 33; 13672 case RISCVISD::VMV_X_S: { 13673 // The number of sign bits of the scalar result is computed by obtaining the 13674 // element type of the input vector operand, subtracting its width from the 13675 // XLEN, and then adding one (sign bit within the element type). If the 13676 // element type is wider than XLen, the least-significant XLEN bits are 13677 // taken. 13678 unsigned XLen = Subtarget.getXLen(); 13679 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits(); 13680 if (EltBits <= XLen) 13681 return XLen - EltBits + 1; 13682 break; 13683 } 13684 case ISD::INTRINSIC_W_CHAIN: { 13685 unsigned IntNo = Op.getConstantOperandVal(1); 13686 switch (IntNo) { 13687 default: 13688 break; 13689 case Intrinsic::riscv_masked_atomicrmw_xchg_i64: 13690 case Intrinsic::riscv_masked_atomicrmw_add_i64: 13691 case Intrinsic::riscv_masked_atomicrmw_sub_i64: 13692 case Intrinsic::riscv_masked_atomicrmw_nand_i64: 13693 case Intrinsic::riscv_masked_atomicrmw_max_i64: 13694 case Intrinsic::riscv_masked_atomicrmw_min_i64: 13695 case Intrinsic::riscv_masked_atomicrmw_umax_i64: 13696 case Intrinsic::riscv_masked_atomicrmw_umin_i64: 13697 case Intrinsic::riscv_masked_cmpxchg_i64: 13698 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated 13699 // narrow atomic operation. These are implemented using atomic 13700 // operations at the minimum supported atomicrmw/cmpxchg width whose 13701 // result is then sign extended to XLEN. With +A, the minimum width is 13702 // 32 for both 64 and 32. 13703 assert(Subtarget.getXLen() == 64); 13704 assert(getMinCmpXchgSizeInBits() == 32); 13705 assert(Subtarget.hasStdExtA()); 13706 return 33; 13707 } 13708 } 13709 } 13710 13711 return 1; 13712 } 13713 13714 const Constant * 13715 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const { 13716 assert(Ld && "Unexpected null LoadSDNode"); 13717 if (!ISD::isNormalLoad(Ld)) 13718 return nullptr; 13719 13720 SDValue Ptr = Ld->getBasePtr(); 13721 13722 // Only constant pools with no offset are supported. 13723 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * { 13724 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr); 13725 if (!CNode || CNode->isMachineConstantPoolEntry() || 13726 CNode->getOffset() != 0) 13727 return nullptr; 13728 13729 return CNode; 13730 }; 13731 13732 // Simple case, LLA. 13733 if (Ptr.getOpcode() == RISCVISD::LLA) { 13734 auto *CNode = GetSupportedConstantPool(Ptr); 13735 if (!CNode || CNode->getTargetFlags() != 0) 13736 return nullptr; 13737 13738 return CNode->getConstVal(); 13739 } 13740 13741 // Look for a HI and ADD_LO pair. 13742 if (Ptr.getOpcode() != RISCVISD::ADD_LO || 13743 Ptr.getOperand(0).getOpcode() != RISCVISD::HI) 13744 return nullptr; 13745 13746 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1)); 13747 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0)); 13748 13749 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO || 13750 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI) 13751 return nullptr; 13752 13753 if (CNodeLo->getConstVal() != CNodeHi->getConstVal()) 13754 return nullptr; 13755 13756 return CNodeLo->getConstVal(); 13757 } 13758 13759 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 13760 MachineBasicBlock *BB) { 13761 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 13762 13763 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 13764 // Should the count have wrapped while it was being read, we need to try 13765 // again. 13766 // ... 13767 // read: 13768 // rdcycleh x3 # load high word of cycle 13769 // rdcycle x2 # load low word of cycle 13770 // rdcycleh x4 # load high word of cycle 13771 // bne x3, x4, read # check if high word reads match, otherwise try again 13772 // ... 13773 13774 MachineFunction &MF = *BB->getParent(); 13775 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 13776 MachineFunction::iterator It = ++BB->getIterator(); 13777 13778 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 13779 MF.insert(It, LoopMBB); 13780 13781 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 13782 MF.insert(It, DoneMBB); 13783 13784 // Transfer the remainder of BB and its successor edges to DoneMBB. 13785 DoneMBB->splice(DoneMBB->begin(), BB, 13786 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 13787 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 13788 13789 BB->addSuccessor(LoopMBB); 13790 13791 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 13792 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 13793 Register LoReg = MI.getOperand(0).getReg(); 13794 Register HiReg = MI.getOperand(1).getReg(); 13795 DebugLoc DL = MI.getDebugLoc(); 13796 13797 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 13798 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 13799 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 13800 .addReg(RISCV::X0); 13801 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 13802 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 13803 .addReg(RISCV::X0); 13804 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 13805 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 13806 .addReg(RISCV::X0); 13807 13808 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 13809 .addReg(HiReg) 13810 .addReg(ReadAgainReg) 13811 .addMBB(LoopMBB); 13812 13813 LoopMBB->addSuccessor(LoopMBB); 13814 LoopMBB->addSuccessor(DoneMBB); 13815 13816 MI.eraseFromParent(); 13817 13818 return DoneMBB; 13819 } 13820 13821 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 13822 MachineBasicBlock *BB, 13823 const RISCVSubtarget &Subtarget) { 13824 assert((MI.getOpcode() == RISCV::SplitF64Pseudo || 13825 MI.getOpcode() == RISCV::SplitF64Pseudo_INX) && 13826 "Unexpected instruction"); 13827 13828 MachineFunction &MF = *BB->getParent(); 13829 DebugLoc DL = MI.getDebugLoc(); 13830 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 13831 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 13832 Register LoReg = MI.getOperand(0).getReg(); 13833 Register HiReg = MI.getOperand(1).getReg(); 13834 Register SrcReg = MI.getOperand(2).getReg(); 13835 13836 const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX 13837 ? &RISCV::GPRPF64RegClass 13838 : &RISCV::FPR64RegClass; 13839 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 13840 13841 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 13842 RI, Register()); 13843 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 13844 MachineMemOperand *MMOLo = 13845 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 13846 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 13847 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 13848 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 13849 .addFrameIndex(FI) 13850 .addImm(0) 13851 .addMemOperand(MMOLo); 13852 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 13853 .addFrameIndex(FI) 13854 .addImm(4) 13855 .addMemOperand(MMOHi); 13856 MI.eraseFromParent(); // The pseudo instruction is gone now. 13857 return BB; 13858 } 13859 13860 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 13861 MachineBasicBlock *BB, 13862 const RISCVSubtarget &Subtarget) { 13863 assert((MI.getOpcode() == RISCV::BuildPairF64Pseudo || 13864 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX) && 13865 "Unexpected instruction"); 13866 13867 MachineFunction &MF = *BB->getParent(); 13868 DebugLoc DL = MI.getDebugLoc(); 13869 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 13870 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 13871 Register DstReg = MI.getOperand(0).getReg(); 13872 Register LoReg = MI.getOperand(1).getReg(); 13873 Register HiReg = MI.getOperand(2).getReg(); 13874 13875 const TargetRegisterClass *DstRC = 13876 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPF64RegClass 13877 : &RISCV::FPR64RegClass; 13878 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 13879 13880 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 13881 MachineMemOperand *MMOLo = 13882 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 13883 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 13884 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 13885 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 13886 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 13887 .addFrameIndex(FI) 13888 .addImm(0) 13889 .addMemOperand(MMOLo); 13890 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 13891 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 13892 .addFrameIndex(FI) 13893 .addImm(4) 13894 .addMemOperand(MMOHi); 13895 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register()); 13896 MI.eraseFromParent(); // The pseudo instruction is gone now. 13897 return BB; 13898 } 13899 13900 static bool isSelectPseudo(MachineInstr &MI) { 13901 switch (MI.getOpcode()) { 13902 default: 13903 return false; 13904 case RISCV::Select_GPR_Using_CC_GPR: 13905 case RISCV::Select_FPR16_Using_CC_GPR: 13906 case RISCV::Select_FPR16INX_Using_CC_GPR: 13907 case RISCV::Select_FPR32_Using_CC_GPR: 13908 case RISCV::Select_FPR32INX_Using_CC_GPR: 13909 case RISCV::Select_FPR64_Using_CC_GPR: 13910 case RISCV::Select_FPR64INX_Using_CC_GPR: 13911 case RISCV::Select_FPR64IN32X_Using_CC_GPR: 13912 return true; 13913 } 13914 } 13915 13916 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, 13917 unsigned RelOpcode, unsigned EqOpcode, 13918 const RISCVSubtarget &Subtarget) { 13919 DebugLoc DL = MI.getDebugLoc(); 13920 Register DstReg = MI.getOperand(0).getReg(); 13921 Register Src1Reg = MI.getOperand(1).getReg(); 13922 Register Src2Reg = MI.getOperand(2).getReg(); 13923 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 13924 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass); 13925 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 13926 13927 // Save the current FFLAGS. 13928 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags); 13929 13930 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg) 13931 .addReg(Src1Reg) 13932 .addReg(Src2Reg); 13933 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 13934 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 13935 13936 // Restore the FFLAGS. 13937 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) 13938 .addReg(SavedFFlags, RegState::Kill); 13939 13940 // Issue a dummy FEQ opcode to raise exception for signaling NaNs. 13941 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0) 13942 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill())) 13943 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill())); 13944 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 13945 MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept); 13946 13947 // Erase the pseudoinstruction. 13948 MI.eraseFromParent(); 13949 return BB; 13950 } 13951 13952 static MachineBasicBlock * 13953 EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, 13954 MachineBasicBlock *ThisMBB, 13955 const RISCVSubtarget &Subtarget) { 13956 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5) 13957 // Without this, custom-inserter would have generated: 13958 // 13959 // A 13960 // | \ 13961 // | B 13962 // | / 13963 // C 13964 // | \ 13965 // | D 13966 // | / 13967 // E 13968 // 13969 // A: X = ...; Y = ... 13970 // B: empty 13971 // C: Z = PHI [X, A], [Y, B] 13972 // D: empty 13973 // E: PHI [X, C], [Z, D] 13974 // 13975 // If we lower both Select_FPRX_ in a single step, we can instead generate: 13976 // 13977 // A 13978 // | \ 13979 // | C 13980 // | /| 13981 // |/ | 13982 // | | 13983 // | D 13984 // | / 13985 // E 13986 // 13987 // A: X = ...; Y = ... 13988 // D: empty 13989 // E: PHI [X, A], [X, C], [Y, D] 13990 13991 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 13992 const DebugLoc &DL = First.getDebugLoc(); 13993 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); 13994 MachineFunction *F = ThisMBB->getParent(); 13995 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB); 13996 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB); 13997 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 13998 MachineFunction::iterator It = ++ThisMBB->getIterator(); 13999 F->insert(It, FirstMBB); 14000 F->insert(It, SecondMBB); 14001 F->insert(It, SinkMBB); 14002 14003 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. 14004 SinkMBB->splice(SinkMBB->begin(), ThisMBB, 14005 std::next(MachineBasicBlock::iterator(First)), 14006 ThisMBB->end()); 14007 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); 14008 14009 // Fallthrough block for ThisMBB. 14010 ThisMBB->addSuccessor(FirstMBB); 14011 // Fallthrough block for FirstMBB. 14012 FirstMBB->addSuccessor(SecondMBB); 14013 ThisMBB->addSuccessor(SinkMBB); 14014 FirstMBB->addSuccessor(SinkMBB); 14015 // This is fallthrough. 14016 SecondMBB->addSuccessor(SinkMBB); 14017 14018 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm()); 14019 Register FLHS = First.getOperand(1).getReg(); 14020 Register FRHS = First.getOperand(2).getReg(); 14021 // Insert appropriate branch. 14022 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC)) 14023 .addReg(FLHS) 14024 .addReg(FRHS) 14025 .addMBB(SinkMBB); 14026 14027 Register SLHS = Second.getOperand(1).getReg(); 14028 Register SRHS = Second.getOperand(2).getReg(); 14029 Register Op1Reg4 = First.getOperand(4).getReg(); 14030 Register Op1Reg5 = First.getOperand(5).getReg(); 14031 14032 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm()); 14033 // Insert appropriate branch. 14034 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC)) 14035 .addReg(SLHS) 14036 .addReg(SRHS) 14037 .addMBB(SinkMBB); 14038 14039 Register DestReg = Second.getOperand(0).getReg(); 14040 Register Op2Reg4 = Second.getOperand(4).getReg(); 14041 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg) 14042 .addReg(Op2Reg4) 14043 .addMBB(ThisMBB) 14044 .addReg(Op1Reg4) 14045 .addMBB(FirstMBB) 14046 .addReg(Op1Reg5) 14047 .addMBB(SecondMBB); 14048 14049 // Now remove the Select_FPRX_s. 14050 First.eraseFromParent(); 14051 Second.eraseFromParent(); 14052 return SinkMBB; 14053 } 14054 14055 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 14056 MachineBasicBlock *BB, 14057 const RISCVSubtarget &Subtarget) { 14058 // To "insert" Select_* instructions, we actually have to insert the triangle 14059 // control-flow pattern. The incoming instructions know the destination vreg 14060 // to set, the condition code register to branch on, the true/false values to 14061 // select between, and the condcode to use to select the appropriate branch. 14062 // 14063 // We produce the following control flow: 14064 // HeadMBB 14065 // | \ 14066 // | IfFalseMBB 14067 // | / 14068 // TailMBB 14069 // 14070 // When we find a sequence of selects we attempt to optimize their emission 14071 // by sharing the control flow. Currently we only handle cases where we have 14072 // multiple selects with the exact same condition (same LHS, RHS and CC). 14073 // The selects may be interleaved with other instructions if the other 14074 // instructions meet some requirements we deem safe: 14075 // - They are not pseudo instructions. 14076 // - They are debug instructions. Otherwise, 14077 // - They do not have side-effects, do not access memory and their inputs do 14078 // not depend on the results of the select pseudo-instructions. 14079 // The TrueV/FalseV operands of the selects cannot depend on the result of 14080 // previous selects in the sequence. 14081 // These conditions could be further relaxed. See the X86 target for a 14082 // related approach and more information. 14083 // 14084 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)) 14085 // is checked here and handled by a separate function - 14086 // EmitLoweredCascadedSelect. 14087 Register LHS = MI.getOperand(1).getReg(); 14088 Register RHS = MI.getOperand(2).getReg(); 14089 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 14090 14091 SmallVector<MachineInstr *, 4> SelectDebugValues; 14092 SmallSet<Register, 4> SelectDests; 14093 SelectDests.insert(MI.getOperand(0).getReg()); 14094 14095 MachineInstr *LastSelectPseudo = &MI; 14096 auto Next = next_nodbg(MI.getIterator(), BB->instr_end()); 14097 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() && 14098 Next->getOpcode() == MI.getOpcode() && 14099 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() && 14100 Next->getOperand(5).isKill()) { 14101 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget); 14102 } 14103 14104 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 14105 SequenceMBBI != E; ++SequenceMBBI) { 14106 if (SequenceMBBI->isDebugInstr()) 14107 continue; 14108 if (isSelectPseudo(*SequenceMBBI)) { 14109 if (SequenceMBBI->getOperand(1).getReg() != LHS || 14110 SequenceMBBI->getOperand(2).getReg() != RHS || 14111 SequenceMBBI->getOperand(3).getImm() != CC || 14112 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 14113 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 14114 break; 14115 LastSelectPseudo = &*SequenceMBBI; 14116 SequenceMBBI->collectDebugValues(SelectDebugValues); 14117 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 14118 continue; 14119 } 14120 if (SequenceMBBI->hasUnmodeledSideEffects() || 14121 SequenceMBBI->mayLoadOrStore() || 14122 SequenceMBBI->usesCustomInsertionHook()) 14123 break; 14124 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 14125 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 14126 })) 14127 break; 14128 } 14129 14130 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 14131 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 14132 DebugLoc DL = MI.getDebugLoc(); 14133 MachineFunction::iterator I = ++BB->getIterator(); 14134 14135 MachineBasicBlock *HeadMBB = BB; 14136 MachineFunction *F = BB->getParent(); 14137 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 14138 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 14139 14140 F->insert(I, IfFalseMBB); 14141 F->insert(I, TailMBB); 14142 14143 // Transfer debug instructions associated with the selects to TailMBB. 14144 for (MachineInstr *DebugInstr : SelectDebugValues) { 14145 TailMBB->push_back(DebugInstr->removeFromParent()); 14146 } 14147 14148 // Move all instructions after the sequence to TailMBB. 14149 TailMBB->splice(TailMBB->end(), HeadMBB, 14150 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 14151 // Update machine-CFG edges by transferring all successors of the current 14152 // block to the new block which will contain the Phi nodes for the selects. 14153 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 14154 // Set the successors for HeadMBB. 14155 HeadMBB->addSuccessor(IfFalseMBB); 14156 HeadMBB->addSuccessor(TailMBB); 14157 14158 // Insert appropriate branch. 14159 BuildMI(HeadMBB, DL, TII.getBrCond(CC)) 14160 .addReg(LHS) 14161 .addReg(RHS) 14162 .addMBB(TailMBB); 14163 14164 // IfFalseMBB just falls through to TailMBB. 14165 IfFalseMBB->addSuccessor(TailMBB); 14166 14167 // Create PHIs for all of the select pseudo-instructions. 14168 auto SelectMBBI = MI.getIterator(); 14169 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 14170 auto InsertionPoint = TailMBB->begin(); 14171 while (SelectMBBI != SelectEnd) { 14172 auto Next = std::next(SelectMBBI); 14173 if (isSelectPseudo(*SelectMBBI)) { 14174 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 14175 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 14176 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 14177 .addReg(SelectMBBI->getOperand(4).getReg()) 14178 .addMBB(HeadMBB) 14179 .addReg(SelectMBBI->getOperand(5).getReg()) 14180 .addMBB(IfFalseMBB); 14181 SelectMBBI->eraseFromParent(); 14182 } 14183 SelectMBBI = Next; 14184 } 14185 14186 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 14187 return TailMBB; 14188 } 14189 14190 static MachineBasicBlock *emitVFCVT_RM(MachineInstr &MI, MachineBasicBlock *BB, 14191 unsigned Opcode) { 14192 DebugLoc DL = MI.getDebugLoc(); 14193 14194 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 14195 14196 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 14197 Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass); 14198 14199 assert(MI.getNumOperands() == 8 || MI.getNumOperands() == 7); 14200 unsigned FRMIdx = MI.getNumOperands() == 8 ? 4 : 3; 14201 14202 // Update FRM and save the old value. 14203 BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM) 14204 .addImm(MI.getOperand(FRMIdx).getImm()); 14205 14206 // Emit an VFCVT with the FRM == DYN 14207 auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode)); 14208 14209 for (unsigned I = 0; I < MI.getNumOperands(); I++) 14210 if (I != FRMIdx) 14211 MIB = MIB.add(MI.getOperand(I)); 14212 else 14213 MIB = MIB.add(MachineOperand::CreateImm(7)); // frm = DYN 14214 14215 MIB.add(MachineOperand::CreateReg(RISCV::FRM, 14216 /*IsDef*/ false, 14217 /*IsImp*/ true)); 14218 14219 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 14220 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 14221 14222 // Restore FRM. 14223 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM)) 14224 .addReg(SavedFRM, RegState::Kill); 14225 14226 // Erase the pseudoinstruction. 14227 MI.eraseFromParent(); 14228 return BB; 14229 } 14230 14231 static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, 14232 MachineBasicBlock *BB, 14233 unsigned CVTXOpc, 14234 unsigned CVTFOpc) { 14235 DebugLoc DL = MI.getDebugLoc(); 14236 14237 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 14238 14239 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 14240 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass); 14241 14242 // Save the old value of FFLAGS. 14243 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS); 14244 14245 assert(MI.getNumOperands() == 7); 14246 14247 // Emit a VFCVT_X_F 14248 const TargetRegisterInfo *TRI = 14249 BB->getParent()->getSubtarget().getRegisterInfo(); 14250 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI); 14251 Register Tmp = MRI.createVirtualRegister(RC); 14252 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp) 14253 .add(MI.getOperand(1)) 14254 .add(MI.getOperand(2)) 14255 .add(MI.getOperand(3)) 14256 .add(MachineOperand::CreateImm(7)) // frm = DYN 14257 .add(MI.getOperand(4)) 14258 .add(MI.getOperand(5)) 14259 .add(MI.getOperand(6)) 14260 .add(MachineOperand::CreateReg(RISCV::FRM, 14261 /*IsDef*/ false, 14262 /*IsImp*/ true)); 14263 14264 // Emit a VFCVT_F_X 14265 BuildMI(*BB, MI, DL, TII.get(CVTFOpc)) 14266 .add(MI.getOperand(0)) 14267 .add(MI.getOperand(1)) 14268 .addReg(Tmp) 14269 .add(MI.getOperand(3)) 14270 .add(MachineOperand::CreateImm(7)) // frm = DYN 14271 .add(MI.getOperand(4)) 14272 .add(MI.getOperand(5)) 14273 .add(MI.getOperand(6)) 14274 .add(MachineOperand::CreateReg(RISCV::FRM, 14275 /*IsDef*/ false, 14276 /*IsImp*/ true)); 14277 14278 // Restore FFLAGS. 14279 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) 14280 .addReg(SavedFFLAGS, RegState::Kill); 14281 14282 // Erase the pseudoinstruction. 14283 MI.eraseFromParent(); 14284 return BB; 14285 } 14286 14287 static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, 14288 const RISCVSubtarget &Subtarget) { 14289 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc; 14290 const TargetRegisterClass *RC; 14291 switch (MI.getOpcode()) { 14292 default: 14293 llvm_unreachable("Unexpected opcode"); 14294 case RISCV::PseudoFROUND_H: 14295 CmpOpc = RISCV::FLT_H; 14296 F2IOpc = RISCV::FCVT_W_H; 14297 I2FOpc = RISCV::FCVT_H_W; 14298 FSGNJOpc = RISCV::FSGNJ_H; 14299 FSGNJXOpc = RISCV::FSGNJX_H; 14300 RC = &RISCV::FPR16RegClass; 14301 break; 14302 case RISCV::PseudoFROUND_H_INX: 14303 CmpOpc = RISCV::FLT_H_INX; 14304 F2IOpc = RISCV::FCVT_W_H_INX; 14305 I2FOpc = RISCV::FCVT_H_W_INX; 14306 FSGNJOpc = RISCV::FSGNJ_H_INX; 14307 FSGNJXOpc = RISCV::FSGNJX_H_INX; 14308 RC = &RISCV::GPRF16RegClass; 14309 break; 14310 case RISCV::PseudoFROUND_S: 14311 CmpOpc = RISCV::FLT_S; 14312 F2IOpc = RISCV::FCVT_W_S; 14313 I2FOpc = RISCV::FCVT_S_W; 14314 FSGNJOpc = RISCV::FSGNJ_S; 14315 FSGNJXOpc = RISCV::FSGNJX_S; 14316 RC = &RISCV::FPR32RegClass; 14317 break; 14318 case RISCV::PseudoFROUND_S_INX: 14319 CmpOpc = RISCV::FLT_S_INX; 14320 F2IOpc = RISCV::FCVT_W_S_INX; 14321 I2FOpc = RISCV::FCVT_S_W_INX; 14322 FSGNJOpc = RISCV::FSGNJ_S_INX; 14323 FSGNJXOpc = RISCV::FSGNJX_S_INX; 14324 RC = &RISCV::GPRF32RegClass; 14325 break; 14326 case RISCV::PseudoFROUND_D: 14327 assert(Subtarget.is64Bit() && "Expected 64-bit GPR."); 14328 CmpOpc = RISCV::FLT_D; 14329 F2IOpc = RISCV::FCVT_L_D; 14330 I2FOpc = RISCV::FCVT_D_L; 14331 FSGNJOpc = RISCV::FSGNJ_D; 14332 FSGNJXOpc = RISCV::FSGNJX_D; 14333 RC = &RISCV::FPR64RegClass; 14334 break; 14335 case RISCV::PseudoFROUND_D_INX: 14336 assert(Subtarget.is64Bit() && "Expected 64-bit GPR."); 14337 CmpOpc = RISCV::FLT_D_INX; 14338 F2IOpc = RISCV::FCVT_L_D_INX; 14339 I2FOpc = RISCV::FCVT_D_L_INX; 14340 FSGNJOpc = RISCV::FSGNJ_D_INX; 14341 FSGNJXOpc = RISCV::FSGNJX_D_INX; 14342 RC = &RISCV::GPRRegClass; 14343 break; 14344 } 14345 14346 const BasicBlock *BB = MBB->getBasicBlock(); 14347 DebugLoc DL = MI.getDebugLoc(); 14348 MachineFunction::iterator I = ++MBB->getIterator(); 14349 14350 MachineFunction *F = MBB->getParent(); 14351 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB); 14352 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB); 14353 14354 F->insert(I, CvtMBB); 14355 F->insert(I, DoneMBB); 14356 // Move all instructions after the sequence to DoneMBB. 14357 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI), 14358 MBB->end()); 14359 // Update machine-CFG edges by transferring all successors of the current 14360 // block to the new block which will contain the Phi nodes for the selects. 14361 DoneMBB->transferSuccessorsAndUpdatePHIs(MBB); 14362 // Set the successors for MBB. 14363 MBB->addSuccessor(CvtMBB); 14364 MBB->addSuccessor(DoneMBB); 14365 14366 Register DstReg = MI.getOperand(0).getReg(); 14367 Register SrcReg = MI.getOperand(1).getReg(); 14368 Register MaxReg = MI.getOperand(2).getReg(); 14369 int64_t FRM = MI.getOperand(3).getImm(); 14370 14371 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 14372 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 14373 14374 Register FabsReg = MRI.createVirtualRegister(RC); 14375 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg); 14376 14377 // Compare the FP value to the max value. 14378 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 14379 auto MIB = 14380 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg); 14381 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 14382 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 14383 14384 // Insert branch. 14385 BuildMI(MBB, DL, TII.get(RISCV::BEQ)) 14386 .addReg(CmpReg) 14387 .addReg(RISCV::X0) 14388 .addMBB(DoneMBB); 14389 14390 CvtMBB->addSuccessor(DoneMBB); 14391 14392 // Convert to integer. 14393 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 14394 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM); 14395 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 14396 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 14397 14398 // Convert back to FP. 14399 Register I2FReg = MRI.createVirtualRegister(RC); 14400 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM); 14401 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 14402 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 14403 14404 // Restore the sign bit. 14405 Register CvtReg = MRI.createVirtualRegister(RC); 14406 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg); 14407 14408 // Merge the results. 14409 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg) 14410 .addReg(SrcReg) 14411 .addMBB(MBB) 14412 .addReg(CvtReg) 14413 .addMBB(CvtMBB); 14414 14415 MI.eraseFromParent(); 14416 return DoneMBB; 14417 } 14418 14419 MachineBasicBlock * 14420 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 14421 MachineBasicBlock *BB) const { 14422 switch (MI.getOpcode()) { 14423 default: 14424 llvm_unreachable("Unexpected instr type to insert"); 14425 case RISCV::ReadCycleWide: 14426 assert(!Subtarget.is64Bit() && 14427 "ReadCycleWrite is only to be used on riscv32"); 14428 return emitReadCycleWidePseudo(MI, BB); 14429 case RISCV::Select_GPR_Using_CC_GPR: 14430 case RISCV::Select_FPR16_Using_CC_GPR: 14431 case RISCV::Select_FPR16INX_Using_CC_GPR: 14432 case RISCV::Select_FPR32_Using_CC_GPR: 14433 case RISCV::Select_FPR32INX_Using_CC_GPR: 14434 case RISCV::Select_FPR64_Using_CC_GPR: 14435 case RISCV::Select_FPR64INX_Using_CC_GPR: 14436 case RISCV::Select_FPR64IN32X_Using_CC_GPR: 14437 return emitSelectPseudo(MI, BB, Subtarget); 14438 case RISCV::BuildPairF64Pseudo: 14439 case RISCV::BuildPairF64Pseudo_INX: 14440 return emitBuildPairF64Pseudo(MI, BB, Subtarget); 14441 case RISCV::SplitF64Pseudo: 14442 case RISCV::SplitF64Pseudo_INX: 14443 return emitSplitF64Pseudo(MI, BB, Subtarget); 14444 case RISCV::PseudoQuietFLE_H: 14445 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget); 14446 case RISCV::PseudoQuietFLE_H_INX: 14447 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget); 14448 case RISCV::PseudoQuietFLT_H: 14449 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget); 14450 case RISCV::PseudoQuietFLT_H_INX: 14451 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget); 14452 case RISCV::PseudoQuietFLE_S: 14453 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget); 14454 case RISCV::PseudoQuietFLE_S_INX: 14455 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget); 14456 case RISCV::PseudoQuietFLT_S: 14457 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget); 14458 case RISCV::PseudoQuietFLT_S_INX: 14459 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget); 14460 case RISCV::PseudoQuietFLE_D: 14461 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget); 14462 case RISCV::PseudoQuietFLE_D_INX: 14463 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget); 14464 case RISCV::PseudoQuietFLE_D_IN32X: 14465 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X, 14466 Subtarget); 14467 case RISCV::PseudoQuietFLT_D: 14468 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget); 14469 case RISCV::PseudoQuietFLT_D_INX: 14470 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget); 14471 case RISCV::PseudoQuietFLT_D_IN32X: 14472 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X, 14473 Subtarget); 14474 14475 #define PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, LMUL) \ 14476 case RISCV::RMOpc##_##LMUL: \ 14477 return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL); \ 14478 case RISCV::RMOpc##_##LMUL##_MASK: \ 14479 return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL##_MASK); 14480 14481 #define PseudoVFCVT_RM_CASE(RMOpc, Opc) \ 14482 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M1) \ 14483 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M2) \ 14484 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M4) \ 14485 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF2) \ 14486 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF4) 14487 14488 #define PseudoVFCVT_RM_CASE_M8(RMOpc, Opc) \ 14489 PseudoVFCVT_RM_CASE(RMOpc, Opc) \ 14490 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M8) 14491 14492 #define PseudoVFCVT_RM_CASE_MF8(RMOpc, Opc) \ 14493 PseudoVFCVT_RM_CASE(RMOpc, Opc) \ 14494 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF8) 14495 14496 // VFCVT 14497 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_X_F_V, PseudoVFCVT_X_F_V) 14498 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_XU_F_V, PseudoVFCVT_XU_F_V) 14499 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_XU_V, PseudoVFCVT_F_XU_V) 14500 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_X_V, PseudoVFCVT_F_X_V) 14501 14502 // VFWCVT 14503 PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_XU_F_V, PseudoVFWCVT_XU_F_V); 14504 PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_X_F_V, PseudoVFWCVT_X_F_V); 14505 14506 // VFNCVT 14507 PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_XU_F_W, PseudoVFNCVT_XU_F_W); 14508 PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_X_F_W, PseudoVFNCVT_X_F_W); 14509 PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_XU_W, PseudoVFNCVT_F_XU_W); 14510 PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_X_W, PseudoVFNCVT_F_X_W); 14511 14512 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: 14513 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK, 14514 RISCV::PseudoVFCVT_F_X_V_M1_MASK); 14515 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK: 14516 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK, 14517 RISCV::PseudoVFCVT_F_X_V_M2_MASK); 14518 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK: 14519 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK, 14520 RISCV::PseudoVFCVT_F_X_V_M4_MASK); 14521 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK: 14522 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK, 14523 RISCV::PseudoVFCVT_F_X_V_M8_MASK); 14524 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK: 14525 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK, 14526 RISCV::PseudoVFCVT_F_X_V_MF2_MASK); 14527 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK: 14528 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK, 14529 RISCV::PseudoVFCVT_F_X_V_MF4_MASK); 14530 case RISCV::PseudoFROUND_H: 14531 case RISCV::PseudoFROUND_H_INX: 14532 case RISCV::PseudoFROUND_S: 14533 case RISCV::PseudoFROUND_S_INX: 14534 case RISCV::PseudoFROUND_D: 14535 case RISCV::PseudoFROUND_D_INX: 14536 case RISCV::PseudoFROUND_D_IN32X: 14537 return emitFROUND(MI, BB, Subtarget); 14538 } 14539 } 14540 14541 // Returns the index to the rounding mode immediate value if any, otherwise the 14542 // function will return None. 14543 static std::optional<unsigned> getRoundModeIdx(const MachineInstr &MI) { 14544 uint64_t TSFlags = MI.getDesc().TSFlags; 14545 if (!RISCVII::hasRoundModeOp(TSFlags)) 14546 return std::nullopt; 14547 14548 // The operand order 14549 // ------------------------------------- 14550 // | n-1 (if any) | n-2 | n-3 | n-4 | 14551 // | policy | sew | vl | rm | 14552 // ------------------------------------- 14553 return MI.getNumExplicitOperands() - RISCVII::hasVecPolicyOp(TSFlags) - 3; 14554 } 14555 14556 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, 14557 SDNode *Node) const { 14558 // Add FRM dependency to vector floating-point instructions with dynamic 14559 // rounding mode. 14560 if (auto RoundModeIdx = getRoundModeIdx(MI)) { 14561 unsigned FRMImm = MI.getOperand(*RoundModeIdx).getImm(); 14562 if (FRMImm == RISCVFPRndMode::DYN && !MI.readsRegister(RISCV::FRM)) { 14563 MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, 14564 /*isImp*/ true)); 14565 } 14566 } 14567 14568 // Add FRM dependency to any instructions with dynamic rounding mode. 14569 unsigned Opc = MI.getOpcode(); 14570 auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm); 14571 if (Idx < 0) 14572 return; 14573 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN) 14574 return; 14575 // If the instruction already reads FRM, don't add another read. 14576 if (MI.readsRegister(RISCV::FRM)) 14577 return; 14578 MI.addOperand( 14579 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true)); 14580 } 14581 14582 // Calling Convention Implementation. 14583 // The expectations for frontend ABI lowering vary from target to target. 14584 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 14585 // details, but this is a longer term goal. For now, we simply try to keep the 14586 // role of the frontend as simple and well-defined as possible. The rules can 14587 // be summarised as: 14588 // * Never split up large scalar arguments. We handle them here. 14589 // * If a hardfloat calling convention is being used, and the struct may be 14590 // passed in a pair of registers (fp+fp, int+fp), and both registers are 14591 // available, then pass as two separate arguments. If either the GPRs or FPRs 14592 // are exhausted, then pass according to the rule below. 14593 // * If a struct could never be passed in registers or directly in a stack 14594 // slot (as it is larger than 2*XLEN and the floating point rules don't 14595 // apply), then pass it using a pointer with the byval attribute. 14596 // * If a struct is less than 2*XLEN, then coerce to either a two-element 14597 // word-sized array or a 2*XLEN scalar (depending on alignment). 14598 // * The frontend can determine whether a struct is returned by reference or 14599 // not based on its size and fields. If it will be returned by reference, the 14600 // frontend must modify the prototype so a pointer with the sret annotation is 14601 // passed as the first argument. This is not necessary for large scalar 14602 // returns. 14603 // * Struct return values and varargs should be coerced to structs containing 14604 // register-size fields in the same situations they would be for fixed 14605 // arguments. 14606 14607 static const MCPhysReg ArgGPRs[] = { 14608 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 14609 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 14610 }; 14611 static const MCPhysReg ArgFPR16s[] = { 14612 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 14613 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 14614 }; 14615 static const MCPhysReg ArgFPR32s[] = { 14616 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 14617 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 14618 }; 14619 static const MCPhysReg ArgFPR64s[] = { 14620 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 14621 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 14622 }; 14623 // This is an interim calling convention and it may be changed in the future. 14624 static const MCPhysReg ArgVRs[] = { 14625 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 14626 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 14627 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 14628 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 14629 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 14630 RISCV::V20M2, RISCV::V22M2}; 14631 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 14632 RISCV::V20M4}; 14633 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 14634 14635 // Pass a 2*XLEN argument that has been split into two XLEN values through 14636 // registers or the stack as necessary. 14637 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 14638 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 14639 MVT ValVT2, MVT LocVT2, 14640 ISD::ArgFlagsTy ArgFlags2) { 14641 unsigned XLenInBytes = XLen / 8; 14642 if (Register Reg = State.AllocateReg(ArgGPRs)) { 14643 // At least one half can be passed via register. 14644 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 14645 VA1.getLocVT(), CCValAssign::Full)); 14646 } else { 14647 // Both halves must be passed on the stack, with proper alignment. 14648 Align StackAlign = 14649 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 14650 State.addLoc( 14651 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 14652 State.AllocateStack(XLenInBytes, StackAlign), 14653 VA1.getLocVT(), CCValAssign::Full)); 14654 State.addLoc(CCValAssign::getMem( 14655 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 14656 LocVT2, CCValAssign::Full)); 14657 return false; 14658 } 14659 14660 if (Register Reg = State.AllocateReg(ArgGPRs)) { 14661 // The second half can also be passed via register. 14662 State.addLoc( 14663 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 14664 } else { 14665 // The second half is passed via the stack, without additional alignment. 14666 State.addLoc(CCValAssign::getMem( 14667 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 14668 LocVT2, CCValAssign::Full)); 14669 } 14670 14671 return false; 14672 } 14673 14674 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo, 14675 std::optional<unsigned> FirstMaskArgument, 14676 CCState &State, const RISCVTargetLowering &TLI) { 14677 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 14678 if (RC == &RISCV::VRRegClass) { 14679 // Assign the first mask argument to V0. 14680 // This is an interim calling convention and it may be changed in the 14681 // future. 14682 if (FirstMaskArgument && ValNo == *FirstMaskArgument) 14683 return State.AllocateReg(RISCV::V0); 14684 return State.AllocateReg(ArgVRs); 14685 } 14686 if (RC == &RISCV::VRM2RegClass) 14687 return State.AllocateReg(ArgVRM2s); 14688 if (RC == &RISCV::VRM4RegClass) 14689 return State.AllocateReg(ArgVRM4s); 14690 if (RC == &RISCV::VRM8RegClass) 14691 return State.AllocateReg(ArgVRM8s); 14692 llvm_unreachable("Unhandled register class for ValueType"); 14693 } 14694 14695 // Implements the RISC-V calling convention. Returns true upon failure. 14696 bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 14697 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 14698 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 14699 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 14700 std::optional<unsigned> FirstMaskArgument) { 14701 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 14702 assert(XLen == 32 || XLen == 64); 14703 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 14704 14705 // Static chain parameter must not be passed in normal argument registers, 14706 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain 14707 if (ArgFlags.isNest()) { 14708 if (unsigned Reg = State.AllocateReg(RISCV::X7)) { 14709 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 14710 return false; 14711 } 14712 } 14713 14714 // Any return value split in to more than two values can't be returned 14715 // directly. Vectors are returned via the available vector registers. 14716 if (!LocVT.isVector() && IsRet && ValNo > 1) 14717 return true; 14718 14719 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 14720 // variadic argument, or if no F16/F32 argument registers are available. 14721 bool UseGPRForF16_F32 = true; 14722 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 14723 // variadic argument, or if no F64 argument registers are available. 14724 bool UseGPRForF64 = true; 14725 14726 switch (ABI) { 14727 default: 14728 llvm_unreachable("Unexpected ABI"); 14729 case RISCVABI::ABI_ILP32: 14730 case RISCVABI::ABI_LP64: 14731 break; 14732 case RISCVABI::ABI_ILP32F: 14733 case RISCVABI::ABI_LP64F: 14734 UseGPRForF16_F32 = !IsFixed; 14735 break; 14736 case RISCVABI::ABI_ILP32D: 14737 case RISCVABI::ABI_LP64D: 14738 UseGPRForF16_F32 = !IsFixed; 14739 UseGPRForF64 = !IsFixed; 14740 break; 14741 } 14742 14743 // FPR16, FPR32, and FPR64 alias each other. 14744 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) { 14745 UseGPRForF16_F32 = true; 14746 UseGPRForF64 = true; 14747 } 14748 14749 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 14750 // similar local variables rather than directly checking against the target 14751 // ABI. 14752 14753 if (UseGPRForF16_F32 && 14754 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) { 14755 LocVT = XLenVT; 14756 LocInfo = CCValAssign::BCvt; 14757 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 14758 LocVT = MVT::i64; 14759 LocInfo = CCValAssign::BCvt; 14760 } 14761 14762 // If this is a variadic argument, the RISC-V calling convention requires 14763 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 14764 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 14765 // be used regardless of whether the original argument was split during 14766 // legalisation or not. The argument will not be passed by registers if the 14767 // original type is larger than 2*XLEN, so the register alignment rule does 14768 // not apply. 14769 unsigned TwoXLenInBytes = (2 * XLen) / 8; 14770 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 14771 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 14772 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 14773 // Skip 'odd' register if necessary. 14774 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) 14775 State.AllocateReg(ArgGPRs); 14776 } 14777 14778 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 14779 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 14780 State.getPendingArgFlags(); 14781 14782 assert(PendingLocs.size() == PendingArgFlags.size() && 14783 "PendingLocs and PendingArgFlags out of sync"); 14784 14785 // Handle passing f64 on RV32D with a soft float ABI or when floating point 14786 // registers are exhausted. 14787 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 14788 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 14789 "Can't lower f64 if it is split"); 14790 // Depending on available argument GPRS, f64 may be passed in a pair of 14791 // GPRs, split between a GPR and the stack, or passed completely on the 14792 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 14793 // cases. 14794 Register Reg = State.AllocateReg(ArgGPRs); 14795 LocVT = MVT::i32; 14796 if (!Reg) { 14797 unsigned StackOffset = State.AllocateStack(8, Align(8)); 14798 State.addLoc( 14799 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 14800 return false; 14801 } 14802 if (!State.AllocateReg(ArgGPRs)) 14803 State.AllocateStack(4, Align(4)); 14804 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 14805 return false; 14806 } 14807 14808 // Fixed-length vectors are located in the corresponding scalable-vector 14809 // container types. 14810 if (ValVT.isFixedLengthVector()) 14811 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 14812 14813 // Split arguments might be passed indirectly, so keep track of the pending 14814 // values. Split vectors are passed via a mix of registers and indirectly, so 14815 // treat them as we would any other argument. 14816 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 14817 LocVT = XLenVT; 14818 LocInfo = CCValAssign::Indirect; 14819 PendingLocs.push_back( 14820 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 14821 PendingArgFlags.push_back(ArgFlags); 14822 if (!ArgFlags.isSplitEnd()) { 14823 return false; 14824 } 14825 } 14826 14827 // If the split argument only had two elements, it should be passed directly 14828 // in registers or on the stack. 14829 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && 14830 PendingLocs.size() <= 2) { 14831 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 14832 // Apply the normal calling convention rules to the first half of the 14833 // split argument. 14834 CCValAssign VA = PendingLocs[0]; 14835 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 14836 PendingLocs.clear(); 14837 PendingArgFlags.clear(); 14838 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 14839 ArgFlags); 14840 } 14841 14842 // Allocate to a register if possible, or else a stack slot. 14843 Register Reg; 14844 unsigned StoreSizeBytes = XLen / 8; 14845 Align StackAlign = Align(XLen / 8); 14846 14847 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32) 14848 Reg = State.AllocateReg(ArgFPR16s); 14849 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 14850 Reg = State.AllocateReg(ArgFPR32s); 14851 else if (ValVT == MVT::f64 && !UseGPRForF64) 14852 Reg = State.AllocateReg(ArgFPR64s); 14853 else if (ValVT.isVector()) { 14854 Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI); 14855 if (!Reg) { 14856 // For return values, the vector must be passed fully via registers or 14857 // via the stack. 14858 // FIXME: The proposed vector ABI only mandates v8-v15 for return values, 14859 // but we're using all of them. 14860 if (IsRet) 14861 return true; 14862 // Try using a GPR to pass the address 14863 if ((Reg = State.AllocateReg(ArgGPRs))) { 14864 LocVT = XLenVT; 14865 LocInfo = CCValAssign::Indirect; 14866 } else if (ValVT.isScalableVector()) { 14867 LocVT = XLenVT; 14868 LocInfo = CCValAssign::Indirect; 14869 } else { 14870 // Pass fixed-length vectors on the stack. 14871 LocVT = ValVT; 14872 StoreSizeBytes = ValVT.getStoreSize(); 14873 // Align vectors to their element sizes, being careful for vXi1 14874 // vectors. 14875 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); 14876 } 14877 } 14878 } else { 14879 Reg = State.AllocateReg(ArgGPRs); 14880 } 14881 14882 unsigned StackOffset = 14883 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); 14884 14885 // If we reach this point and PendingLocs is non-empty, we must be at the 14886 // end of a split argument that must be passed indirectly. 14887 if (!PendingLocs.empty()) { 14888 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 14889 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 14890 14891 for (auto &It : PendingLocs) { 14892 if (Reg) 14893 It.convertToReg(Reg); 14894 else 14895 It.convertToMem(StackOffset); 14896 State.addLoc(It); 14897 } 14898 PendingLocs.clear(); 14899 PendingArgFlags.clear(); 14900 return false; 14901 } 14902 14903 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 14904 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) && 14905 "Expected an XLenVT or vector types at this stage"); 14906 14907 if (Reg) { 14908 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 14909 return false; 14910 } 14911 14912 // When a scalar floating-point value is passed on the stack, no 14913 // bit-conversion is needed. 14914 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) { 14915 assert(!ValVT.isVector()); 14916 LocVT = ValVT; 14917 LocInfo = CCValAssign::Full; 14918 } 14919 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 14920 return false; 14921 } 14922 14923 template <typename ArgTy> 14924 static std::optional<unsigned> preAssignMask(const ArgTy &Args) { 14925 for (const auto &ArgIdx : enumerate(Args)) { 14926 MVT ArgVT = ArgIdx.value().VT; 14927 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) 14928 return ArgIdx.index(); 14929 } 14930 return std::nullopt; 14931 } 14932 14933 void RISCVTargetLowering::analyzeInputArgs( 14934 MachineFunction &MF, CCState &CCInfo, 14935 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 14936 RISCVCCAssignFn Fn) const { 14937 unsigned NumArgs = Ins.size(); 14938 FunctionType *FType = MF.getFunction().getFunctionType(); 14939 14940 std::optional<unsigned> FirstMaskArgument; 14941 if (Subtarget.hasVInstructions()) 14942 FirstMaskArgument = preAssignMask(Ins); 14943 14944 for (unsigned i = 0; i != NumArgs; ++i) { 14945 MVT ArgVT = Ins[i].VT; 14946 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 14947 14948 Type *ArgTy = nullptr; 14949 if (IsRet) 14950 ArgTy = FType->getReturnType(); 14951 else if (Ins[i].isOrigArg()) 14952 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 14953 14954 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 14955 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 14956 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 14957 FirstMaskArgument)) { 14958 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 14959 << ArgVT << '\n'); 14960 llvm_unreachable(nullptr); 14961 } 14962 } 14963 } 14964 14965 void RISCVTargetLowering::analyzeOutputArgs( 14966 MachineFunction &MF, CCState &CCInfo, 14967 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 14968 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const { 14969 unsigned NumArgs = Outs.size(); 14970 14971 std::optional<unsigned> FirstMaskArgument; 14972 if (Subtarget.hasVInstructions()) 14973 FirstMaskArgument = preAssignMask(Outs); 14974 14975 for (unsigned i = 0; i != NumArgs; i++) { 14976 MVT ArgVT = Outs[i].VT; 14977 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 14978 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 14979 14980 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 14981 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 14982 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 14983 FirstMaskArgument)) { 14984 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 14985 << ArgVT << "\n"); 14986 llvm_unreachable(nullptr); 14987 } 14988 } 14989 } 14990 14991 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 14992 // values. 14993 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 14994 const CCValAssign &VA, const SDLoc &DL, 14995 const RISCVSubtarget &Subtarget) { 14996 switch (VA.getLocInfo()) { 14997 default: 14998 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 14999 case CCValAssign::Full: 15000 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) 15001 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget); 15002 break; 15003 case CCValAssign::BCvt: 15004 if (VA.getLocVT().isInteger() && 15005 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) 15006 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val); 15007 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 15008 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 15009 else 15010 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 15011 break; 15012 } 15013 return Val; 15014 } 15015 15016 // The caller is responsible for loading the full value if the argument is 15017 // passed with CCValAssign::Indirect. 15018 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 15019 const CCValAssign &VA, const SDLoc &DL, 15020 const ISD::InputArg &In, 15021 const RISCVTargetLowering &TLI) { 15022 MachineFunction &MF = DAG.getMachineFunction(); 15023 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 15024 EVT LocVT = VA.getLocVT(); 15025 SDValue Val; 15026 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 15027 Register VReg = RegInfo.createVirtualRegister(RC); 15028 RegInfo.addLiveIn(VA.getLocReg(), VReg); 15029 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 15030 15031 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass. 15032 if (In.isOrigArg()) { 15033 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex()); 15034 if (OrigArg->getType()->isIntegerTy()) { 15035 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); 15036 // An input zero extended from i31 can also be considered sign extended. 15037 if ((BitWidth <= 32 && In.Flags.isSExt()) || 15038 (BitWidth < 32 && In.Flags.isZExt())) { 15039 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 15040 RVFI->addSExt32Register(VReg); 15041 } 15042 } 15043 } 15044 15045 if (VA.getLocInfo() == CCValAssign::Indirect) 15046 return Val; 15047 15048 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget()); 15049 } 15050 15051 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 15052 const CCValAssign &VA, const SDLoc &DL, 15053 const RISCVSubtarget &Subtarget) { 15054 EVT LocVT = VA.getLocVT(); 15055 15056 switch (VA.getLocInfo()) { 15057 default: 15058 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 15059 case CCValAssign::Full: 15060 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) 15061 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget); 15062 break; 15063 case CCValAssign::BCvt: 15064 if (VA.getLocVT().isInteger() && 15065 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) 15066 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 15067 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 15068 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 15069 else 15070 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 15071 break; 15072 } 15073 return Val; 15074 } 15075 15076 // The caller is responsible for loading the full value if the argument is 15077 // passed with CCValAssign::Indirect. 15078 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 15079 const CCValAssign &VA, const SDLoc &DL) { 15080 MachineFunction &MF = DAG.getMachineFunction(); 15081 MachineFrameInfo &MFI = MF.getFrameInfo(); 15082 EVT LocVT = VA.getLocVT(); 15083 EVT ValVT = VA.getValVT(); 15084 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 15085 if (ValVT.isScalableVector()) { 15086 // When the value is a scalable vector, we save the pointer which points to 15087 // the scalable vector value in the stack. The ValVT will be the pointer 15088 // type, instead of the scalable vector type. 15089 ValVT = LocVT; 15090 } 15091 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), 15092 /*IsImmutable=*/true); 15093 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 15094 SDValue Val; 15095 15096 ISD::LoadExtType ExtType; 15097 switch (VA.getLocInfo()) { 15098 default: 15099 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 15100 case CCValAssign::Full: 15101 case CCValAssign::Indirect: 15102 case CCValAssign::BCvt: 15103 ExtType = ISD::NON_EXTLOAD; 15104 break; 15105 } 15106 Val = DAG.getExtLoad( 15107 ExtType, DL, LocVT, Chain, FIN, 15108 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 15109 return Val; 15110 } 15111 15112 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 15113 const CCValAssign &VA, const SDLoc &DL) { 15114 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 15115 "Unexpected VA"); 15116 MachineFunction &MF = DAG.getMachineFunction(); 15117 MachineFrameInfo &MFI = MF.getFrameInfo(); 15118 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 15119 15120 if (VA.isMemLoc()) { 15121 // f64 is passed on the stack. 15122 int FI = 15123 MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true); 15124 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 15125 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 15126 MachinePointerInfo::getFixedStack(MF, FI)); 15127 } 15128 15129 assert(VA.isRegLoc() && "Expected register VA assignment"); 15130 15131 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 15132 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 15133 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 15134 SDValue Hi; 15135 if (VA.getLocReg() == RISCV::X17) { 15136 // Second half of f64 is passed on the stack. 15137 int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true); 15138 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 15139 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 15140 MachinePointerInfo::getFixedStack(MF, FI)); 15141 } else { 15142 // Second half of f64 is passed in another GPR. 15143 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 15144 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 15145 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 15146 } 15147 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 15148 } 15149 15150 // FastCC has less than 1% performance improvement for some particular 15151 // benchmark. But theoretically, it may has benenfit for some cases. 15152 bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, 15153 unsigned ValNo, MVT ValVT, MVT LocVT, 15154 CCValAssign::LocInfo LocInfo, 15155 ISD::ArgFlagsTy ArgFlags, CCState &State, 15156 bool IsFixed, bool IsRet, Type *OrigTy, 15157 const RISCVTargetLowering &TLI, 15158 std::optional<unsigned> FirstMaskArgument) { 15159 15160 // X5 and X6 might be used for save-restore libcall. 15161 static const MCPhysReg GPRList[] = { 15162 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 15163 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 15164 RISCV::X29, RISCV::X30, RISCV::X31}; 15165 15166 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 15167 if (unsigned Reg = State.AllocateReg(GPRList)) { 15168 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 15169 return false; 15170 } 15171 } 15172 15173 const RISCVSubtarget &Subtarget = TLI.getSubtarget(); 15174 15175 if (LocVT == MVT::f16 && 15176 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) { 15177 static const MCPhysReg FPR16List[] = { 15178 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 15179 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 15180 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 15181 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 15182 if (unsigned Reg = State.AllocateReg(FPR16List)) { 15183 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 15184 return false; 15185 } 15186 } 15187 15188 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { 15189 static const MCPhysReg FPR32List[] = { 15190 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 15191 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 15192 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 15193 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 15194 if (unsigned Reg = State.AllocateReg(FPR32List)) { 15195 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 15196 return false; 15197 } 15198 } 15199 15200 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { 15201 static const MCPhysReg FPR64List[] = { 15202 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 15203 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 15204 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 15205 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 15206 if (unsigned Reg = State.AllocateReg(FPR64List)) { 15207 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 15208 return false; 15209 } 15210 } 15211 15212 // Check if there is an available GPR before hitting the stack. 15213 if ((LocVT == MVT::f16 && 15214 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) || 15215 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || 15216 (LocVT == MVT::f64 && Subtarget.is64Bit() && 15217 Subtarget.hasStdExtZdinx())) { 15218 if (unsigned Reg = State.AllocateReg(GPRList)) { 15219 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 15220 return false; 15221 } 15222 } 15223 15224 if (LocVT == MVT::f16) { 15225 unsigned Offset2 = State.AllocateStack(2, Align(2)); 15226 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo)); 15227 return false; 15228 } 15229 15230 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 15231 unsigned Offset4 = State.AllocateStack(4, Align(4)); 15232 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 15233 return false; 15234 } 15235 15236 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 15237 unsigned Offset5 = State.AllocateStack(8, Align(8)); 15238 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 15239 return false; 15240 } 15241 15242 if (LocVT.isVector()) { 15243 if (unsigned Reg = 15244 allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) { 15245 // Fixed-length vectors are located in the corresponding scalable-vector 15246 // container types. 15247 if (ValVT.isFixedLengthVector()) 15248 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 15249 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 15250 } else { 15251 // Try and pass the address via a "fast" GPR. 15252 if (unsigned GPRReg = State.AllocateReg(GPRList)) { 15253 LocInfo = CCValAssign::Indirect; 15254 LocVT = TLI.getSubtarget().getXLenVT(); 15255 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo)); 15256 } else if (ValVT.isFixedLengthVector()) { 15257 auto StackAlign = 15258 MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); 15259 unsigned StackOffset = 15260 State.AllocateStack(ValVT.getStoreSize(), StackAlign); 15261 State.addLoc( 15262 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 15263 } else { 15264 // Can't pass scalable vectors on the stack. 15265 return true; 15266 } 15267 } 15268 15269 return false; 15270 } 15271 15272 return true; // CC didn't match. 15273 } 15274 15275 bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 15276 CCValAssign::LocInfo LocInfo, 15277 ISD::ArgFlagsTy ArgFlags, CCState &State) { 15278 if (ArgFlags.isNest()) { 15279 report_fatal_error( 15280 "Attribute 'nest' is not supported in GHC calling convention"); 15281 } 15282 15283 static const MCPhysReg GPRList[] = { 15284 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 15285 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 15286 15287 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 15288 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 15289 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 15290 if (unsigned Reg = State.AllocateReg(GPRList)) { 15291 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 15292 return false; 15293 } 15294 } 15295 15296 const RISCVSubtarget &Subtarget = 15297 State.getMachineFunction().getSubtarget<RISCVSubtarget>(); 15298 15299 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { 15300 // Pass in STG registers: F1, ..., F6 15301 // fs0 ... fs5 15302 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 15303 RISCV::F18_F, RISCV::F19_F, 15304 RISCV::F20_F, RISCV::F21_F}; 15305 if (unsigned Reg = State.AllocateReg(FPR32List)) { 15306 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 15307 return false; 15308 } 15309 } 15310 15311 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { 15312 // Pass in STG registers: D1, ..., D6 15313 // fs6 ... fs11 15314 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 15315 RISCV::F24_D, RISCV::F25_D, 15316 RISCV::F26_D, RISCV::F27_D}; 15317 if (unsigned Reg = State.AllocateReg(FPR64List)) { 15318 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 15319 return false; 15320 } 15321 } 15322 15323 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || 15324 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && 15325 Subtarget.is64Bit())) { 15326 if (unsigned Reg = State.AllocateReg(GPRList)) { 15327 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 15328 return false; 15329 } 15330 } 15331 15332 report_fatal_error("No registers left in GHC calling convention"); 15333 return true; 15334 } 15335 15336 // Transform physical registers into virtual registers. 15337 SDValue RISCVTargetLowering::LowerFormalArguments( 15338 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 15339 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 15340 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 15341 15342 MachineFunction &MF = DAG.getMachineFunction(); 15343 15344 switch (CallConv) { 15345 default: 15346 report_fatal_error("Unsupported calling convention"); 15347 case CallingConv::C: 15348 case CallingConv::Fast: 15349 break; 15350 case CallingConv::GHC: 15351 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx()) 15352 report_fatal_error("GHC calling convention requires the (Zfinx/F) and " 15353 "(Zdinx/D) instruction set extensions"); 15354 } 15355 15356 const Function &Func = MF.getFunction(); 15357 if (Func.hasFnAttribute("interrupt")) { 15358 if (!Func.arg_empty()) 15359 report_fatal_error( 15360 "Functions with the interrupt attribute cannot have arguments!"); 15361 15362 StringRef Kind = 15363 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 15364 15365 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 15366 report_fatal_error( 15367 "Function interrupt attribute argument not supported!"); 15368 } 15369 15370 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 15371 MVT XLenVT = Subtarget.getXLenVT(); 15372 unsigned XLenInBytes = Subtarget.getXLen() / 8; 15373 // Used with vargs to acumulate store chains. 15374 std::vector<SDValue> OutChains; 15375 15376 // Assign locations to all of the incoming arguments. 15377 SmallVector<CCValAssign, 16> ArgLocs; 15378 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 15379 15380 if (CallConv == CallingConv::GHC) 15381 CCInfo.AnalyzeFormalArguments(Ins, RISCV::CC_RISCV_GHC); 15382 else 15383 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, 15384 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC 15385 : RISCV::CC_RISCV); 15386 15387 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 15388 CCValAssign &VA = ArgLocs[i]; 15389 SDValue ArgValue; 15390 // Passing f64 on RV32D with a soft float ABI must be handled as a special 15391 // case. 15392 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 15393 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 15394 else if (VA.isRegLoc()) 15395 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this); 15396 else 15397 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 15398 15399 if (VA.getLocInfo() == CCValAssign::Indirect) { 15400 // If the original argument was split and passed by reference (e.g. i128 15401 // on RV32), we need to load all parts of it here (using the same 15402 // address). Vectors may be partly split to registers and partly to the 15403 // stack, in which case the base address is partly offset and subsequent 15404 // stores are relative to that. 15405 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 15406 MachinePointerInfo())); 15407 unsigned ArgIndex = Ins[i].OrigArgIndex; 15408 unsigned ArgPartOffset = Ins[i].PartOffset; 15409 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 15410 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 15411 CCValAssign &PartVA = ArgLocs[i + 1]; 15412 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; 15413 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 15414 if (PartVA.getValVT().isScalableVector()) 15415 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); 15416 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); 15417 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 15418 MachinePointerInfo())); 15419 ++i; 15420 } 15421 continue; 15422 } 15423 InVals.push_back(ArgValue); 15424 } 15425 15426 if (any_of(ArgLocs, 15427 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) 15428 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); 15429 15430 if (IsVarArg) { 15431 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs); 15432 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 15433 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 15434 MachineFrameInfo &MFI = MF.getFrameInfo(); 15435 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 15436 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 15437 15438 // Offset of the first variable argument from stack pointer, and size of 15439 // the vararg save area. For now, the varargs save area is either zero or 15440 // large enough to hold a0-a7. 15441 int VaArgOffset, VarArgsSaveSize; 15442 15443 // If all registers are allocated, then all varargs must be passed on the 15444 // stack and we don't need to save any argregs. 15445 if (ArgRegs.size() == Idx) { 15446 VaArgOffset = CCInfo.getStackSize(); 15447 VarArgsSaveSize = 0; 15448 } else { 15449 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 15450 VaArgOffset = -VarArgsSaveSize; 15451 } 15452 15453 // Record the frame index of the first variable argument 15454 // which is a value necessary to VASTART. 15455 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 15456 RVFI->setVarArgsFrameIndex(FI); 15457 15458 // If saving an odd number of registers then create an extra stack slot to 15459 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 15460 // offsets to even-numbered registered remain 2*XLEN-aligned. 15461 if (Idx % 2) { 15462 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 15463 VarArgsSaveSize += XLenInBytes; 15464 } 15465 15466 // Copy the integer registers that may have been used for passing varargs 15467 // to the vararg save area. 15468 for (unsigned I = Idx; I < ArgRegs.size(); 15469 ++I, VaArgOffset += XLenInBytes) { 15470 const Register Reg = RegInfo.createVirtualRegister(RC); 15471 RegInfo.addLiveIn(ArgRegs[I], Reg); 15472 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 15473 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 15474 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 15475 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 15476 MachinePointerInfo::getFixedStack(MF, FI)); 15477 cast<StoreSDNode>(Store.getNode()) 15478 ->getMemOperand() 15479 ->setValue((Value *)nullptr); 15480 OutChains.push_back(Store); 15481 } 15482 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 15483 } 15484 15485 // All stores are grouped in one node to allow the matching between 15486 // the size of Ins and InVals. This only happens for vararg functions. 15487 if (!OutChains.empty()) { 15488 OutChains.push_back(Chain); 15489 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 15490 } 15491 15492 return Chain; 15493 } 15494 15495 /// isEligibleForTailCallOptimization - Check whether the call is eligible 15496 /// for tail call optimization. 15497 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 15498 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 15499 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 15500 const SmallVector<CCValAssign, 16> &ArgLocs) const { 15501 15502 auto CalleeCC = CLI.CallConv; 15503 auto &Outs = CLI.Outs; 15504 auto &Caller = MF.getFunction(); 15505 auto CallerCC = Caller.getCallingConv(); 15506 15507 // Exception-handling functions need a special set of instructions to 15508 // indicate a return to the hardware. Tail-calling another function would 15509 // probably break this. 15510 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 15511 // should be expanded as new function attributes are introduced. 15512 if (Caller.hasFnAttribute("interrupt")) 15513 return false; 15514 15515 // Do not tail call opt if the stack is used to pass parameters. 15516 if (CCInfo.getStackSize() != 0) 15517 return false; 15518 15519 // Do not tail call opt if any parameters need to be passed indirectly. 15520 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 15521 // passed indirectly. So the address of the value will be passed in a 15522 // register, or if not available, then the address is put on the stack. In 15523 // order to pass indirectly, space on the stack often needs to be allocated 15524 // in order to store the value. In this case the CCInfo.getNextStackOffset() 15525 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 15526 // are passed CCValAssign::Indirect. 15527 for (auto &VA : ArgLocs) 15528 if (VA.getLocInfo() == CCValAssign::Indirect) 15529 return false; 15530 15531 // Do not tail call opt if either caller or callee uses struct return 15532 // semantics. 15533 auto IsCallerStructRet = Caller.hasStructRetAttr(); 15534 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 15535 if (IsCallerStructRet || IsCalleeStructRet) 15536 return false; 15537 15538 // The callee has to preserve all registers the caller needs to preserve. 15539 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 15540 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 15541 if (CalleeCC != CallerCC) { 15542 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 15543 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 15544 return false; 15545 } 15546 15547 // Byval parameters hand the function a pointer directly into the stack area 15548 // we want to reuse during a tail call. Working around this *is* possible 15549 // but less efficient and uglier in LowerCall. 15550 for (auto &Arg : Outs) 15551 if (Arg.Flags.isByVal()) 15552 return false; 15553 15554 return true; 15555 } 15556 15557 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { 15558 return DAG.getDataLayout().getPrefTypeAlign( 15559 VT.getTypeForEVT(*DAG.getContext())); 15560 } 15561 15562 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 15563 // and output parameter nodes. 15564 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 15565 SmallVectorImpl<SDValue> &InVals) const { 15566 SelectionDAG &DAG = CLI.DAG; 15567 SDLoc &DL = CLI.DL; 15568 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 15569 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 15570 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 15571 SDValue Chain = CLI.Chain; 15572 SDValue Callee = CLI.Callee; 15573 bool &IsTailCall = CLI.IsTailCall; 15574 CallingConv::ID CallConv = CLI.CallConv; 15575 bool IsVarArg = CLI.IsVarArg; 15576 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 15577 MVT XLenVT = Subtarget.getXLenVT(); 15578 15579 MachineFunction &MF = DAG.getMachineFunction(); 15580 15581 // Analyze the operands of the call, assigning locations to each operand. 15582 SmallVector<CCValAssign, 16> ArgLocs; 15583 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 15584 15585 if (CallConv == CallingConv::GHC) 15586 ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC); 15587 else 15588 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, 15589 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC 15590 : RISCV::CC_RISCV); 15591 15592 // Check if it's really possible to do a tail call. 15593 if (IsTailCall) 15594 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 15595 15596 if (IsTailCall) 15597 ++NumTailCalls; 15598 else if (CLI.CB && CLI.CB->isMustTailCall()) 15599 report_fatal_error("failed to perform tail call elimination on a call " 15600 "site marked musttail"); 15601 15602 // Get a count of how many bytes are to be pushed on the stack. 15603 unsigned NumBytes = ArgCCInfo.getStackSize(); 15604 15605 // Create local copies for byval args 15606 SmallVector<SDValue, 8> ByValArgs; 15607 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 15608 ISD::ArgFlagsTy Flags = Outs[i].Flags; 15609 if (!Flags.isByVal()) 15610 continue; 15611 15612 SDValue Arg = OutVals[i]; 15613 unsigned Size = Flags.getByValSize(); 15614 Align Alignment = Flags.getNonZeroByValAlign(); 15615 15616 int FI = 15617 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 15618 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 15619 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 15620 15621 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 15622 /*IsVolatile=*/false, 15623 /*AlwaysInline=*/false, IsTailCall, 15624 MachinePointerInfo(), MachinePointerInfo()); 15625 ByValArgs.push_back(FIPtr); 15626 } 15627 15628 if (!IsTailCall) 15629 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 15630 15631 // Copy argument values to their designated locations. 15632 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 15633 SmallVector<SDValue, 8> MemOpChains; 15634 SDValue StackPtr; 15635 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 15636 CCValAssign &VA = ArgLocs[i]; 15637 SDValue ArgValue = OutVals[i]; 15638 ISD::ArgFlagsTy Flags = Outs[i].Flags; 15639 15640 // Handle passing f64 on RV32D with a soft float ABI as a special case. 15641 bool IsF64OnRV32DSoftABI = 15642 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 15643 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 15644 SDValue SplitF64 = DAG.getNode( 15645 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 15646 SDValue Lo = SplitF64.getValue(0); 15647 SDValue Hi = SplitF64.getValue(1); 15648 15649 Register RegLo = VA.getLocReg(); 15650 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 15651 15652 if (RegLo == RISCV::X17) { 15653 // Second half of f64 is passed on the stack. 15654 // Work out the address of the stack slot. 15655 if (!StackPtr.getNode()) 15656 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 15657 // Emit the store. 15658 MemOpChains.push_back( 15659 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 15660 } else { 15661 // Second half of f64 is passed in another GPR. 15662 assert(RegLo < RISCV::X31 && "Invalid register pair"); 15663 Register RegHigh = RegLo + 1; 15664 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 15665 } 15666 continue; 15667 } 15668 15669 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 15670 // as any other MemLoc. 15671 15672 // Promote the value if needed. 15673 // For now, only handle fully promoted and indirect arguments. 15674 if (VA.getLocInfo() == CCValAssign::Indirect) { 15675 // Store the argument in a stack slot and pass its address. 15676 Align StackAlign = 15677 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), 15678 getPrefTypeAlign(ArgValue.getValueType(), DAG)); 15679 TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); 15680 // If the original argument was split (e.g. i128), we need 15681 // to store the required parts of it here (and pass just one address). 15682 // Vectors may be partly split to registers and partly to the stack, in 15683 // which case the base address is partly offset and subsequent stores are 15684 // relative to that. 15685 unsigned ArgIndex = Outs[i].OrigArgIndex; 15686 unsigned ArgPartOffset = Outs[i].PartOffset; 15687 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 15688 // Calculate the total size to store. We don't have access to what we're 15689 // actually storing other than performing the loop and collecting the 15690 // info. 15691 SmallVector<std::pair<SDValue, SDValue>> Parts; 15692 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 15693 SDValue PartValue = OutVals[i + 1]; 15694 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; 15695 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 15696 EVT PartVT = PartValue.getValueType(); 15697 if (PartVT.isScalableVector()) 15698 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); 15699 StoredSize += PartVT.getStoreSize(); 15700 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); 15701 Parts.push_back(std::make_pair(PartValue, Offset)); 15702 ++i; 15703 } 15704 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); 15705 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 15706 MemOpChains.push_back( 15707 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 15708 MachinePointerInfo::getFixedStack(MF, FI))); 15709 for (const auto &Part : Parts) { 15710 SDValue PartValue = Part.first; 15711 SDValue PartOffset = Part.second; 15712 SDValue Address = 15713 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); 15714 MemOpChains.push_back( 15715 DAG.getStore(Chain, DL, PartValue, Address, 15716 MachinePointerInfo::getFixedStack(MF, FI))); 15717 } 15718 ArgValue = SpillSlot; 15719 } else { 15720 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget); 15721 } 15722 15723 // Use local copy if it is a byval arg. 15724 if (Flags.isByVal()) 15725 ArgValue = ByValArgs[j++]; 15726 15727 if (VA.isRegLoc()) { 15728 // Queue up the argument copies and emit them at the end. 15729 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 15730 } else { 15731 assert(VA.isMemLoc() && "Argument not register or memory"); 15732 assert(!IsTailCall && "Tail call not allowed if stack is used " 15733 "for passing parameters"); 15734 15735 // Work out the address of the stack slot. 15736 if (!StackPtr.getNode()) 15737 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 15738 SDValue Address = 15739 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 15740 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 15741 15742 // Emit the store. 15743 MemOpChains.push_back( 15744 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 15745 } 15746 } 15747 15748 // Join the stores, which are independent of one another. 15749 if (!MemOpChains.empty()) 15750 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 15751 15752 SDValue Glue; 15753 15754 // Build a sequence of copy-to-reg nodes, chained and glued together. 15755 for (auto &Reg : RegsToPass) { 15756 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 15757 Glue = Chain.getValue(1); 15758 } 15759 15760 // Validate that none of the argument registers have been marked as 15761 // reserved, if so report an error. Do the same for the return address if this 15762 // is not a tailcall. 15763 validateCCReservedRegs(RegsToPass, MF); 15764 if (!IsTailCall && 15765 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 15766 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 15767 MF.getFunction(), 15768 "Return address register required, but has been reserved."}); 15769 15770 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 15771 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 15772 // split it and then direct call can be matched by PseudoCALL. 15773 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 15774 const GlobalValue *GV = S->getGlobal(); 15775 15776 unsigned OpFlags = RISCVII::MO_CALL; 15777 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 15778 OpFlags = RISCVII::MO_PLT; 15779 15780 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 15781 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 15782 unsigned OpFlags = RISCVII::MO_CALL; 15783 15784 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 15785 nullptr)) 15786 OpFlags = RISCVII::MO_PLT; 15787 15788 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 15789 } 15790 15791 // The first call operand is the chain and the second is the target address. 15792 SmallVector<SDValue, 8> Ops; 15793 Ops.push_back(Chain); 15794 Ops.push_back(Callee); 15795 15796 // Add argument registers to the end of the list so that they are 15797 // known live into the call. 15798 for (auto &Reg : RegsToPass) 15799 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 15800 15801 if (!IsTailCall) { 15802 // Add a register mask operand representing the call-preserved registers. 15803 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 15804 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 15805 assert(Mask && "Missing call preserved mask for calling convention"); 15806 Ops.push_back(DAG.getRegisterMask(Mask)); 15807 } 15808 15809 // Glue the call to the argument copies, if any. 15810 if (Glue.getNode()) 15811 Ops.push_back(Glue); 15812 15813 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) && 15814 "Unexpected CFI type for a direct call"); 15815 15816 // Emit the call. 15817 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 15818 15819 if (IsTailCall) { 15820 MF.getFrameInfo().setHasTailCall(); 15821 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 15822 if (CLI.CFIType) 15823 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 15824 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 15825 return Ret; 15826 } 15827 15828 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 15829 if (CLI.CFIType) 15830 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 15831 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 15832 Glue = Chain.getValue(1); 15833 15834 // Mark the end of the call, which is glued to the call itself. 15835 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 15836 Glue = Chain.getValue(1); 15837 15838 // Assign locations to each value returned by this call. 15839 SmallVector<CCValAssign, 16> RVLocs; 15840 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 15841 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV); 15842 15843 // Copy all of the result registers out of their specified physreg. 15844 for (auto &VA : RVLocs) { 15845 // Copy the value out 15846 SDValue RetValue = 15847 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 15848 // Glue the RetValue to the end of the call sequence 15849 Chain = RetValue.getValue(1); 15850 Glue = RetValue.getValue(2); 15851 15852 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 15853 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 15854 SDValue RetValue2 = 15855 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 15856 Chain = RetValue2.getValue(1); 15857 Glue = RetValue2.getValue(2); 15858 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 15859 RetValue2); 15860 } 15861 15862 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget); 15863 15864 InVals.push_back(RetValue); 15865 } 15866 15867 return Chain; 15868 } 15869 15870 bool RISCVTargetLowering::CanLowerReturn( 15871 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 15872 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 15873 SmallVector<CCValAssign, 16> RVLocs; 15874 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 15875 15876 std::optional<unsigned> FirstMaskArgument; 15877 if (Subtarget.hasVInstructions()) 15878 FirstMaskArgument = preAssignMask(Outs); 15879 15880 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 15881 MVT VT = Outs[i].VT; 15882 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 15883 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 15884 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 15885 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 15886 *this, FirstMaskArgument)) 15887 return false; 15888 } 15889 return true; 15890 } 15891 15892 SDValue 15893 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 15894 bool IsVarArg, 15895 const SmallVectorImpl<ISD::OutputArg> &Outs, 15896 const SmallVectorImpl<SDValue> &OutVals, 15897 const SDLoc &DL, SelectionDAG &DAG) const { 15898 MachineFunction &MF = DAG.getMachineFunction(); 15899 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 15900 15901 // Stores the assignment of the return value to a location. 15902 SmallVector<CCValAssign, 16> RVLocs; 15903 15904 // Info about the registers and stack slot. 15905 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 15906 *DAG.getContext()); 15907 15908 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 15909 nullptr, RISCV::CC_RISCV); 15910 15911 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 15912 report_fatal_error("GHC functions return void only"); 15913 15914 SDValue Glue; 15915 SmallVector<SDValue, 4> RetOps(1, Chain); 15916 15917 // Copy the result values into the output registers. 15918 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 15919 SDValue Val = OutVals[i]; 15920 CCValAssign &VA = RVLocs[i]; 15921 assert(VA.isRegLoc() && "Can only return in registers!"); 15922 15923 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 15924 // Handle returning f64 on RV32D with a soft float ABI. 15925 assert(VA.isRegLoc() && "Expected return via registers"); 15926 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 15927 DAG.getVTList(MVT::i32, MVT::i32), Val); 15928 SDValue Lo = SplitF64.getValue(0); 15929 SDValue Hi = SplitF64.getValue(1); 15930 Register RegLo = VA.getLocReg(); 15931 assert(RegLo < RISCV::X31 && "Invalid register pair"); 15932 Register RegHi = RegLo + 1; 15933 15934 if (STI.isRegisterReservedByUser(RegLo) || 15935 STI.isRegisterReservedByUser(RegHi)) 15936 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 15937 MF.getFunction(), 15938 "Return value register required, but has been reserved."}); 15939 15940 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 15941 Glue = Chain.getValue(1); 15942 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 15943 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 15944 Glue = Chain.getValue(1); 15945 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 15946 } else { 15947 // Handle a 'normal' return. 15948 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget); 15949 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 15950 15951 if (STI.isRegisterReservedByUser(VA.getLocReg())) 15952 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 15953 MF.getFunction(), 15954 "Return value register required, but has been reserved."}); 15955 15956 // Guarantee that all emitted copies are stuck together. 15957 Glue = Chain.getValue(1); 15958 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 15959 } 15960 } 15961 15962 RetOps[0] = Chain; // Update chain. 15963 15964 // Add the glue node if we have it. 15965 if (Glue.getNode()) { 15966 RetOps.push_back(Glue); 15967 } 15968 15969 if (any_of(RVLocs, 15970 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) 15971 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); 15972 15973 unsigned RetOpc = RISCVISD::RET_GLUE; 15974 // Interrupt service routines use different return instructions. 15975 const Function &Func = DAG.getMachineFunction().getFunction(); 15976 if (Func.hasFnAttribute("interrupt")) { 15977 if (!Func.getReturnType()->isVoidTy()) 15978 report_fatal_error( 15979 "Functions with the interrupt attribute must have void return type!"); 15980 15981 MachineFunction &MF = DAG.getMachineFunction(); 15982 StringRef Kind = 15983 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 15984 15985 if (Kind == "supervisor") 15986 RetOpc = RISCVISD::SRET_GLUE; 15987 else 15988 RetOpc = RISCVISD::MRET_GLUE; 15989 } 15990 15991 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 15992 } 15993 15994 void RISCVTargetLowering::validateCCReservedRegs( 15995 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 15996 MachineFunction &MF) const { 15997 const Function &F = MF.getFunction(); 15998 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 15999 16000 if (llvm::any_of(Regs, [&STI](auto Reg) { 16001 return STI.isRegisterReservedByUser(Reg.first); 16002 })) 16003 F.getContext().diagnose(DiagnosticInfoUnsupported{ 16004 F, "Argument register required, but has been reserved."}); 16005 } 16006 16007 // Check if the result of the node is only used as a return value, as 16008 // otherwise we can't perform a tail-call. 16009 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 16010 if (N->getNumValues() != 1) 16011 return false; 16012 if (!N->hasNUsesOfValue(1, 0)) 16013 return false; 16014 16015 SDNode *Copy = *N->use_begin(); 16016 16017 if (Copy->getOpcode() == ISD::BITCAST) { 16018 return isUsedByReturnOnly(Copy, Chain); 16019 } 16020 16021 // TODO: Handle additional opcodes in order to support tail-calling libcalls 16022 // with soft float ABIs. 16023 if (Copy->getOpcode() != ISD::CopyToReg) { 16024 return false; 16025 } 16026 16027 // If the ISD::CopyToReg has a glue operand, we conservatively assume it 16028 // isn't safe to perform a tail call. 16029 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue) 16030 return false; 16031 16032 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else. 16033 bool HasRet = false; 16034 for (SDNode *Node : Copy->uses()) { 16035 if (Node->getOpcode() != RISCVISD::RET_GLUE) 16036 return false; 16037 HasRet = true; 16038 } 16039 if (!HasRet) 16040 return false; 16041 16042 Chain = Copy->getOperand(0); 16043 return true; 16044 } 16045 16046 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 16047 return CI->isTailCall(); 16048 } 16049 16050 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 16051 #define NODE_NAME_CASE(NODE) \ 16052 case RISCVISD::NODE: \ 16053 return "RISCVISD::" #NODE; 16054 // clang-format off 16055 switch ((RISCVISD::NodeType)Opcode) { 16056 case RISCVISD::FIRST_NUMBER: 16057 break; 16058 NODE_NAME_CASE(RET_GLUE) 16059 NODE_NAME_CASE(SRET_GLUE) 16060 NODE_NAME_CASE(MRET_GLUE) 16061 NODE_NAME_CASE(CALL) 16062 NODE_NAME_CASE(SELECT_CC) 16063 NODE_NAME_CASE(BR_CC) 16064 NODE_NAME_CASE(BuildPairF64) 16065 NODE_NAME_CASE(SplitF64) 16066 NODE_NAME_CASE(TAIL) 16067 NODE_NAME_CASE(ADD_LO) 16068 NODE_NAME_CASE(HI) 16069 NODE_NAME_CASE(LLA) 16070 NODE_NAME_CASE(LGA) 16071 NODE_NAME_CASE(ADD_TPREL) 16072 NODE_NAME_CASE(LA_TLS_IE) 16073 NODE_NAME_CASE(LA_TLS_GD) 16074 NODE_NAME_CASE(MULHSU) 16075 NODE_NAME_CASE(SLLW) 16076 NODE_NAME_CASE(SRAW) 16077 NODE_NAME_CASE(SRLW) 16078 NODE_NAME_CASE(DIVW) 16079 NODE_NAME_CASE(DIVUW) 16080 NODE_NAME_CASE(REMUW) 16081 NODE_NAME_CASE(ROLW) 16082 NODE_NAME_CASE(RORW) 16083 NODE_NAME_CASE(CLZW) 16084 NODE_NAME_CASE(CTZW) 16085 NODE_NAME_CASE(ABSW) 16086 NODE_NAME_CASE(FMV_H_X) 16087 NODE_NAME_CASE(FMV_X_ANYEXTH) 16088 NODE_NAME_CASE(FMV_X_SIGNEXTH) 16089 NODE_NAME_CASE(FMV_W_X_RV64) 16090 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 16091 NODE_NAME_CASE(FCVT_X) 16092 NODE_NAME_CASE(FCVT_XU) 16093 NODE_NAME_CASE(FCVT_W_RV64) 16094 NODE_NAME_CASE(FCVT_WU_RV64) 16095 NODE_NAME_CASE(STRICT_FCVT_W_RV64) 16096 NODE_NAME_CASE(STRICT_FCVT_WU_RV64) 16097 NODE_NAME_CASE(FP_ROUND_BF16) 16098 NODE_NAME_CASE(FP_EXTEND_BF16) 16099 NODE_NAME_CASE(FROUND) 16100 NODE_NAME_CASE(FPCLASS) 16101 NODE_NAME_CASE(FMAX) 16102 NODE_NAME_CASE(FMIN) 16103 NODE_NAME_CASE(READ_CYCLE_WIDE) 16104 NODE_NAME_CASE(BREV8) 16105 NODE_NAME_CASE(ORC_B) 16106 NODE_NAME_CASE(ZIP) 16107 NODE_NAME_CASE(UNZIP) 16108 NODE_NAME_CASE(CLMUL) 16109 NODE_NAME_CASE(CLMULH) 16110 NODE_NAME_CASE(CLMULR) 16111 NODE_NAME_CASE(SHA256SIG0) 16112 NODE_NAME_CASE(SHA256SIG1) 16113 NODE_NAME_CASE(SHA256SUM0) 16114 NODE_NAME_CASE(SHA256SUM1) 16115 NODE_NAME_CASE(SM4KS) 16116 NODE_NAME_CASE(SM4ED) 16117 NODE_NAME_CASE(SM3P0) 16118 NODE_NAME_CASE(SM3P1) 16119 NODE_NAME_CASE(TH_LWD) 16120 NODE_NAME_CASE(TH_LWUD) 16121 NODE_NAME_CASE(TH_LDD) 16122 NODE_NAME_CASE(TH_SWD) 16123 NODE_NAME_CASE(TH_SDD) 16124 NODE_NAME_CASE(VMV_V_V_VL) 16125 NODE_NAME_CASE(VMV_V_X_VL) 16126 NODE_NAME_CASE(VFMV_V_F_VL) 16127 NODE_NAME_CASE(VMV_X_S) 16128 NODE_NAME_CASE(VMV_S_X_VL) 16129 NODE_NAME_CASE(VFMV_S_F_VL) 16130 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL) 16131 NODE_NAME_CASE(READ_VLENB) 16132 NODE_NAME_CASE(TRUNCATE_VECTOR_VL) 16133 NODE_NAME_CASE(VSLIDEUP_VL) 16134 NODE_NAME_CASE(VSLIDE1UP_VL) 16135 NODE_NAME_CASE(VSLIDEDOWN_VL) 16136 NODE_NAME_CASE(VSLIDE1DOWN_VL) 16137 NODE_NAME_CASE(VFSLIDE1UP_VL) 16138 NODE_NAME_CASE(VFSLIDE1DOWN_VL) 16139 NODE_NAME_CASE(VID_VL) 16140 NODE_NAME_CASE(VFNCVT_ROD_VL) 16141 NODE_NAME_CASE(VECREDUCE_ADD_VL) 16142 NODE_NAME_CASE(VECREDUCE_UMAX_VL) 16143 NODE_NAME_CASE(VECREDUCE_SMAX_VL) 16144 NODE_NAME_CASE(VECREDUCE_UMIN_VL) 16145 NODE_NAME_CASE(VECREDUCE_SMIN_VL) 16146 NODE_NAME_CASE(VECREDUCE_AND_VL) 16147 NODE_NAME_CASE(VECREDUCE_OR_VL) 16148 NODE_NAME_CASE(VECREDUCE_XOR_VL) 16149 NODE_NAME_CASE(VECREDUCE_FADD_VL) 16150 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) 16151 NODE_NAME_CASE(VECREDUCE_FMIN_VL) 16152 NODE_NAME_CASE(VECREDUCE_FMAX_VL) 16153 NODE_NAME_CASE(ADD_VL) 16154 NODE_NAME_CASE(AND_VL) 16155 NODE_NAME_CASE(MUL_VL) 16156 NODE_NAME_CASE(OR_VL) 16157 NODE_NAME_CASE(SDIV_VL) 16158 NODE_NAME_CASE(SHL_VL) 16159 NODE_NAME_CASE(SREM_VL) 16160 NODE_NAME_CASE(SRA_VL) 16161 NODE_NAME_CASE(SRL_VL) 16162 NODE_NAME_CASE(SUB_VL) 16163 NODE_NAME_CASE(UDIV_VL) 16164 NODE_NAME_CASE(UREM_VL) 16165 NODE_NAME_CASE(XOR_VL) 16166 NODE_NAME_CASE(SADDSAT_VL) 16167 NODE_NAME_CASE(UADDSAT_VL) 16168 NODE_NAME_CASE(SSUBSAT_VL) 16169 NODE_NAME_CASE(USUBSAT_VL) 16170 NODE_NAME_CASE(FADD_VL) 16171 NODE_NAME_CASE(FSUB_VL) 16172 NODE_NAME_CASE(FMUL_VL) 16173 NODE_NAME_CASE(FDIV_VL) 16174 NODE_NAME_CASE(FNEG_VL) 16175 NODE_NAME_CASE(FABS_VL) 16176 NODE_NAME_CASE(FSQRT_VL) 16177 NODE_NAME_CASE(FCLASS_VL) 16178 NODE_NAME_CASE(VFMADD_VL) 16179 NODE_NAME_CASE(VFNMADD_VL) 16180 NODE_NAME_CASE(VFMSUB_VL) 16181 NODE_NAME_CASE(VFNMSUB_VL) 16182 NODE_NAME_CASE(VFWMADD_VL) 16183 NODE_NAME_CASE(VFWNMADD_VL) 16184 NODE_NAME_CASE(VFWMSUB_VL) 16185 NODE_NAME_CASE(VFWNMSUB_VL) 16186 NODE_NAME_CASE(FCOPYSIGN_VL) 16187 NODE_NAME_CASE(SMIN_VL) 16188 NODE_NAME_CASE(SMAX_VL) 16189 NODE_NAME_CASE(UMIN_VL) 16190 NODE_NAME_CASE(UMAX_VL) 16191 NODE_NAME_CASE(BITREVERSE_VL) 16192 NODE_NAME_CASE(BSWAP_VL) 16193 NODE_NAME_CASE(CTLZ_VL) 16194 NODE_NAME_CASE(CTTZ_VL) 16195 NODE_NAME_CASE(CTPOP_VL) 16196 NODE_NAME_CASE(FMINNUM_VL) 16197 NODE_NAME_CASE(FMAXNUM_VL) 16198 NODE_NAME_CASE(MULHS_VL) 16199 NODE_NAME_CASE(MULHU_VL) 16200 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL) 16201 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL) 16202 NODE_NAME_CASE(VFCVT_RM_X_F_VL) 16203 NODE_NAME_CASE(VFCVT_RM_XU_F_VL) 16204 NODE_NAME_CASE(VFCVT_X_F_VL) 16205 NODE_NAME_CASE(VFCVT_XU_F_VL) 16206 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL) 16207 NODE_NAME_CASE(SINT_TO_FP_VL) 16208 NODE_NAME_CASE(UINT_TO_FP_VL) 16209 NODE_NAME_CASE(VFCVT_RM_F_XU_VL) 16210 NODE_NAME_CASE(VFCVT_RM_F_X_VL) 16211 NODE_NAME_CASE(FP_EXTEND_VL) 16212 NODE_NAME_CASE(FP_ROUND_VL) 16213 NODE_NAME_CASE(STRICT_FADD_VL) 16214 NODE_NAME_CASE(STRICT_FSUB_VL) 16215 NODE_NAME_CASE(STRICT_FMUL_VL) 16216 NODE_NAME_CASE(STRICT_FDIV_VL) 16217 NODE_NAME_CASE(STRICT_FSQRT_VL) 16218 NODE_NAME_CASE(STRICT_VFMADD_VL) 16219 NODE_NAME_CASE(STRICT_VFNMADD_VL) 16220 NODE_NAME_CASE(STRICT_VFMSUB_VL) 16221 NODE_NAME_CASE(STRICT_VFNMSUB_VL) 16222 NODE_NAME_CASE(STRICT_FP_ROUND_VL) 16223 NODE_NAME_CASE(STRICT_FP_EXTEND_VL) 16224 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL) 16225 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL) 16226 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL) 16227 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL) 16228 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL) 16229 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL) 16230 NODE_NAME_CASE(STRICT_FSETCC_VL) 16231 NODE_NAME_CASE(STRICT_FSETCCS_VL) 16232 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL) 16233 NODE_NAME_CASE(VWMUL_VL) 16234 NODE_NAME_CASE(VWMULU_VL) 16235 NODE_NAME_CASE(VWMULSU_VL) 16236 NODE_NAME_CASE(VWADD_VL) 16237 NODE_NAME_CASE(VWADDU_VL) 16238 NODE_NAME_CASE(VWSUB_VL) 16239 NODE_NAME_CASE(VWSUBU_VL) 16240 NODE_NAME_CASE(VWADD_W_VL) 16241 NODE_NAME_CASE(VWADDU_W_VL) 16242 NODE_NAME_CASE(VWSUB_W_VL) 16243 NODE_NAME_CASE(VWSUBU_W_VL) 16244 NODE_NAME_CASE(VFWMUL_VL) 16245 NODE_NAME_CASE(VFWADD_VL) 16246 NODE_NAME_CASE(VFWSUB_VL) 16247 NODE_NAME_CASE(VFWADD_W_VL) 16248 NODE_NAME_CASE(VFWSUB_W_VL) 16249 NODE_NAME_CASE(VWMACC_VL) 16250 NODE_NAME_CASE(VWMACCU_VL) 16251 NODE_NAME_CASE(VWMACCSU_VL) 16252 NODE_NAME_CASE(VNSRL_VL) 16253 NODE_NAME_CASE(SETCC_VL) 16254 NODE_NAME_CASE(VSELECT_VL) 16255 NODE_NAME_CASE(VP_MERGE_VL) 16256 NODE_NAME_CASE(VMAND_VL) 16257 NODE_NAME_CASE(VMOR_VL) 16258 NODE_NAME_CASE(VMXOR_VL) 16259 NODE_NAME_CASE(VMCLR_VL) 16260 NODE_NAME_CASE(VMSET_VL) 16261 NODE_NAME_CASE(VRGATHER_VX_VL) 16262 NODE_NAME_CASE(VRGATHER_VV_VL) 16263 NODE_NAME_CASE(VRGATHEREI16_VV_VL) 16264 NODE_NAME_CASE(VSEXT_VL) 16265 NODE_NAME_CASE(VZEXT_VL) 16266 NODE_NAME_CASE(VCPOP_VL) 16267 NODE_NAME_CASE(VFIRST_VL) 16268 NODE_NAME_CASE(READ_CSR) 16269 NODE_NAME_CASE(WRITE_CSR) 16270 NODE_NAME_CASE(SWAP_CSR) 16271 NODE_NAME_CASE(CZERO_EQZ) 16272 NODE_NAME_CASE(CZERO_NEZ) 16273 } 16274 // clang-format on 16275 return nullptr; 16276 #undef NODE_NAME_CASE 16277 } 16278 16279 /// getConstraintType - Given a constraint letter, return the type of 16280 /// constraint it is for this target. 16281 RISCVTargetLowering::ConstraintType 16282 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 16283 if (Constraint.size() == 1) { 16284 switch (Constraint[0]) { 16285 default: 16286 break; 16287 case 'f': 16288 return C_RegisterClass; 16289 case 'I': 16290 case 'J': 16291 case 'K': 16292 return C_Immediate; 16293 case 'A': 16294 return C_Memory; 16295 case 'S': // A symbolic address 16296 return C_Other; 16297 } 16298 } else { 16299 if (Constraint == "vr" || Constraint == "vm") 16300 return C_RegisterClass; 16301 } 16302 return TargetLowering::getConstraintType(Constraint); 16303 } 16304 16305 std::pair<unsigned, const TargetRegisterClass *> 16306 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 16307 StringRef Constraint, 16308 MVT VT) const { 16309 // First, see if this is a constraint that directly corresponds to a RISC-V 16310 // register class. 16311 if (Constraint.size() == 1) { 16312 switch (Constraint[0]) { 16313 case 'r': 16314 // TODO: Support fixed vectors up to XLen for P extension? 16315 if (VT.isVector()) 16316 break; 16317 return std::make_pair(0U, &RISCV::GPRNoX0RegClass); 16318 case 'f': 16319 if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) 16320 return std::make_pair(0U, &RISCV::FPR16RegClass); 16321 if (Subtarget.hasStdExtF() && VT == MVT::f32) 16322 return std::make_pair(0U, &RISCV::FPR32RegClass); 16323 if (Subtarget.hasStdExtD() && VT == MVT::f64) 16324 return std::make_pair(0U, &RISCV::FPR64RegClass); 16325 break; 16326 default: 16327 break; 16328 } 16329 } else if (Constraint == "vr") { 16330 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, 16331 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 16332 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) 16333 return std::make_pair(0U, RC); 16334 } 16335 } else if (Constraint == "vm") { 16336 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) 16337 return std::make_pair(0U, &RISCV::VMV0RegClass); 16338 } 16339 16340 // Clang will correctly decode the usage of register name aliases into their 16341 // official names. However, other frontends like `rustc` do not. This allows 16342 // users of these frontends to use the ABI names for registers in LLVM-style 16343 // register constraints. 16344 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 16345 .Case("{zero}", RISCV::X0) 16346 .Case("{ra}", RISCV::X1) 16347 .Case("{sp}", RISCV::X2) 16348 .Case("{gp}", RISCV::X3) 16349 .Case("{tp}", RISCV::X4) 16350 .Case("{t0}", RISCV::X5) 16351 .Case("{t1}", RISCV::X6) 16352 .Case("{t2}", RISCV::X7) 16353 .Cases("{s0}", "{fp}", RISCV::X8) 16354 .Case("{s1}", RISCV::X9) 16355 .Case("{a0}", RISCV::X10) 16356 .Case("{a1}", RISCV::X11) 16357 .Case("{a2}", RISCV::X12) 16358 .Case("{a3}", RISCV::X13) 16359 .Case("{a4}", RISCV::X14) 16360 .Case("{a5}", RISCV::X15) 16361 .Case("{a6}", RISCV::X16) 16362 .Case("{a7}", RISCV::X17) 16363 .Case("{s2}", RISCV::X18) 16364 .Case("{s3}", RISCV::X19) 16365 .Case("{s4}", RISCV::X20) 16366 .Case("{s5}", RISCV::X21) 16367 .Case("{s6}", RISCV::X22) 16368 .Case("{s7}", RISCV::X23) 16369 .Case("{s8}", RISCV::X24) 16370 .Case("{s9}", RISCV::X25) 16371 .Case("{s10}", RISCV::X26) 16372 .Case("{s11}", RISCV::X27) 16373 .Case("{t3}", RISCV::X28) 16374 .Case("{t4}", RISCV::X29) 16375 .Case("{t5}", RISCV::X30) 16376 .Case("{t6}", RISCV::X31) 16377 .Default(RISCV::NoRegister); 16378 if (XRegFromAlias != RISCV::NoRegister) 16379 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 16380 16381 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 16382 // TableGen record rather than the AsmName to choose registers for InlineAsm 16383 // constraints, plus we want to match those names to the widest floating point 16384 // register type available, manually select floating point registers here. 16385 // 16386 // The second case is the ABI name of the register, so that frontends can also 16387 // use the ABI names in register constraint lists. 16388 if (Subtarget.hasStdExtF()) { 16389 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 16390 .Cases("{f0}", "{ft0}", RISCV::F0_F) 16391 .Cases("{f1}", "{ft1}", RISCV::F1_F) 16392 .Cases("{f2}", "{ft2}", RISCV::F2_F) 16393 .Cases("{f3}", "{ft3}", RISCV::F3_F) 16394 .Cases("{f4}", "{ft4}", RISCV::F4_F) 16395 .Cases("{f5}", "{ft5}", RISCV::F5_F) 16396 .Cases("{f6}", "{ft6}", RISCV::F6_F) 16397 .Cases("{f7}", "{ft7}", RISCV::F7_F) 16398 .Cases("{f8}", "{fs0}", RISCV::F8_F) 16399 .Cases("{f9}", "{fs1}", RISCV::F9_F) 16400 .Cases("{f10}", "{fa0}", RISCV::F10_F) 16401 .Cases("{f11}", "{fa1}", RISCV::F11_F) 16402 .Cases("{f12}", "{fa2}", RISCV::F12_F) 16403 .Cases("{f13}", "{fa3}", RISCV::F13_F) 16404 .Cases("{f14}", "{fa4}", RISCV::F14_F) 16405 .Cases("{f15}", "{fa5}", RISCV::F15_F) 16406 .Cases("{f16}", "{fa6}", RISCV::F16_F) 16407 .Cases("{f17}", "{fa7}", RISCV::F17_F) 16408 .Cases("{f18}", "{fs2}", RISCV::F18_F) 16409 .Cases("{f19}", "{fs3}", RISCV::F19_F) 16410 .Cases("{f20}", "{fs4}", RISCV::F20_F) 16411 .Cases("{f21}", "{fs5}", RISCV::F21_F) 16412 .Cases("{f22}", "{fs6}", RISCV::F22_F) 16413 .Cases("{f23}", "{fs7}", RISCV::F23_F) 16414 .Cases("{f24}", "{fs8}", RISCV::F24_F) 16415 .Cases("{f25}", "{fs9}", RISCV::F25_F) 16416 .Cases("{f26}", "{fs10}", RISCV::F26_F) 16417 .Cases("{f27}", "{fs11}", RISCV::F27_F) 16418 .Cases("{f28}", "{ft8}", RISCV::F28_F) 16419 .Cases("{f29}", "{ft9}", RISCV::F29_F) 16420 .Cases("{f30}", "{ft10}", RISCV::F30_F) 16421 .Cases("{f31}", "{ft11}", RISCV::F31_F) 16422 .Default(RISCV::NoRegister); 16423 if (FReg != RISCV::NoRegister) { 16424 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 16425 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) { 16426 unsigned RegNo = FReg - RISCV::F0_F; 16427 unsigned DReg = RISCV::F0_D + RegNo; 16428 return std::make_pair(DReg, &RISCV::FPR64RegClass); 16429 } 16430 if (VT == MVT::f32 || VT == MVT::Other) 16431 return std::make_pair(FReg, &RISCV::FPR32RegClass); 16432 if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) { 16433 unsigned RegNo = FReg - RISCV::F0_F; 16434 unsigned HReg = RISCV::F0_H + RegNo; 16435 return std::make_pair(HReg, &RISCV::FPR16RegClass); 16436 } 16437 } 16438 } 16439 16440 if (Subtarget.hasVInstructions()) { 16441 Register VReg = StringSwitch<Register>(Constraint.lower()) 16442 .Case("{v0}", RISCV::V0) 16443 .Case("{v1}", RISCV::V1) 16444 .Case("{v2}", RISCV::V2) 16445 .Case("{v3}", RISCV::V3) 16446 .Case("{v4}", RISCV::V4) 16447 .Case("{v5}", RISCV::V5) 16448 .Case("{v6}", RISCV::V6) 16449 .Case("{v7}", RISCV::V7) 16450 .Case("{v8}", RISCV::V8) 16451 .Case("{v9}", RISCV::V9) 16452 .Case("{v10}", RISCV::V10) 16453 .Case("{v11}", RISCV::V11) 16454 .Case("{v12}", RISCV::V12) 16455 .Case("{v13}", RISCV::V13) 16456 .Case("{v14}", RISCV::V14) 16457 .Case("{v15}", RISCV::V15) 16458 .Case("{v16}", RISCV::V16) 16459 .Case("{v17}", RISCV::V17) 16460 .Case("{v18}", RISCV::V18) 16461 .Case("{v19}", RISCV::V19) 16462 .Case("{v20}", RISCV::V20) 16463 .Case("{v21}", RISCV::V21) 16464 .Case("{v22}", RISCV::V22) 16465 .Case("{v23}", RISCV::V23) 16466 .Case("{v24}", RISCV::V24) 16467 .Case("{v25}", RISCV::V25) 16468 .Case("{v26}", RISCV::V26) 16469 .Case("{v27}", RISCV::V27) 16470 .Case("{v28}", RISCV::V28) 16471 .Case("{v29}", RISCV::V29) 16472 .Case("{v30}", RISCV::V30) 16473 .Case("{v31}", RISCV::V31) 16474 .Default(RISCV::NoRegister); 16475 if (VReg != RISCV::NoRegister) { 16476 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) 16477 return std::make_pair(VReg, &RISCV::VMRegClass); 16478 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy)) 16479 return std::make_pair(VReg, &RISCV::VRRegClass); 16480 for (const auto *RC : 16481 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 16482 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) { 16483 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC); 16484 return std::make_pair(VReg, RC); 16485 } 16486 } 16487 } 16488 } 16489 16490 std::pair<Register, const TargetRegisterClass *> Res = 16491 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 16492 16493 // If we picked one of the Zfinx register classes, remap it to the GPR class. 16494 // FIXME: When Zfinx is supported in CodeGen this will need to take the 16495 // Subtarget into account. 16496 if (Res.second == &RISCV::GPRF16RegClass || 16497 Res.second == &RISCV::GPRF32RegClass || 16498 Res.second == &RISCV::GPRPF64RegClass) 16499 return std::make_pair(Res.first, &RISCV::GPRRegClass); 16500 16501 return Res; 16502 } 16503 16504 unsigned 16505 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 16506 // Currently only support length 1 constraints. 16507 if (ConstraintCode.size() == 1) { 16508 switch (ConstraintCode[0]) { 16509 case 'A': 16510 return InlineAsm::Constraint_A; 16511 default: 16512 break; 16513 } 16514 } 16515 16516 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 16517 } 16518 16519 void RISCVTargetLowering::LowerAsmOperandForConstraint( 16520 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 16521 SelectionDAG &DAG) const { 16522 // Currently only support length 1 constraints. 16523 if (Constraint.length() == 1) { 16524 switch (Constraint[0]) { 16525 case 'I': 16526 // Validate & create a 12-bit signed immediate operand. 16527 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 16528 uint64_t CVal = C->getSExtValue(); 16529 if (isInt<12>(CVal)) 16530 Ops.push_back( 16531 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 16532 } 16533 return; 16534 case 'J': 16535 // Validate & create an integer zero operand. 16536 if (isNullConstant(Op)) 16537 Ops.push_back( 16538 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 16539 return; 16540 case 'K': 16541 // Validate & create a 5-bit unsigned immediate operand. 16542 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 16543 uint64_t CVal = C->getZExtValue(); 16544 if (isUInt<5>(CVal)) 16545 Ops.push_back( 16546 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 16547 } 16548 return; 16549 case 'S': 16550 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) { 16551 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), 16552 GA->getValueType(0))); 16553 } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) { 16554 Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(), 16555 BA->getValueType(0))); 16556 } 16557 return; 16558 default: 16559 break; 16560 } 16561 } 16562 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 16563 } 16564 16565 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder, 16566 Instruction *Inst, 16567 AtomicOrdering Ord) const { 16568 if (Subtarget.hasStdExtZtso()) { 16569 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 16570 return Builder.CreateFence(Ord); 16571 return nullptr; 16572 } 16573 16574 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 16575 return Builder.CreateFence(Ord); 16576 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 16577 return Builder.CreateFence(AtomicOrdering::Release); 16578 return nullptr; 16579 } 16580 16581 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, 16582 Instruction *Inst, 16583 AtomicOrdering Ord) const { 16584 if (Subtarget.hasStdExtZtso()) 16585 return nullptr; 16586 16587 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 16588 return Builder.CreateFence(AtomicOrdering::Acquire); 16589 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) && 16590 Ord == AtomicOrdering::SequentiallyConsistent) 16591 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); 16592 return nullptr; 16593 } 16594 16595 TargetLowering::AtomicExpansionKind 16596 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 16597 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 16598 // point operations can't be used in an lr/sc sequence without breaking the 16599 // forward-progress guarantee. 16600 if (AI->isFloatingPointOperation() || 16601 AI->getOperation() == AtomicRMWInst::UIncWrap || 16602 AI->getOperation() == AtomicRMWInst::UDecWrap) 16603 return AtomicExpansionKind::CmpXChg; 16604 16605 // Don't expand forced atomics, we want to have __sync libcalls instead. 16606 if (Subtarget.hasForcedAtomics()) 16607 return AtomicExpansionKind::None; 16608 16609 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 16610 if (Size == 8 || Size == 16) 16611 return AtomicExpansionKind::MaskedIntrinsic; 16612 return AtomicExpansionKind::None; 16613 } 16614 16615 static Intrinsic::ID 16616 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 16617 if (XLen == 32) { 16618 switch (BinOp) { 16619 default: 16620 llvm_unreachable("Unexpected AtomicRMW BinOp"); 16621 case AtomicRMWInst::Xchg: 16622 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 16623 case AtomicRMWInst::Add: 16624 return Intrinsic::riscv_masked_atomicrmw_add_i32; 16625 case AtomicRMWInst::Sub: 16626 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 16627 case AtomicRMWInst::Nand: 16628 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 16629 case AtomicRMWInst::Max: 16630 return Intrinsic::riscv_masked_atomicrmw_max_i32; 16631 case AtomicRMWInst::Min: 16632 return Intrinsic::riscv_masked_atomicrmw_min_i32; 16633 case AtomicRMWInst::UMax: 16634 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 16635 case AtomicRMWInst::UMin: 16636 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 16637 } 16638 } 16639 16640 if (XLen == 64) { 16641 switch (BinOp) { 16642 default: 16643 llvm_unreachable("Unexpected AtomicRMW BinOp"); 16644 case AtomicRMWInst::Xchg: 16645 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 16646 case AtomicRMWInst::Add: 16647 return Intrinsic::riscv_masked_atomicrmw_add_i64; 16648 case AtomicRMWInst::Sub: 16649 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 16650 case AtomicRMWInst::Nand: 16651 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 16652 case AtomicRMWInst::Max: 16653 return Intrinsic::riscv_masked_atomicrmw_max_i64; 16654 case AtomicRMWInst::Min: 16655 return Intrinsic::riscv_masked_atomicrmw_min_i64; 16656 case AtomicRMWInst::UMax: 16657 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 16658 case AtomicRMWInst::UMin: 16659 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 16660 } 16661 } 16662 16663 llvm_unreachable("Unexpected XLen\n"); 16664 } 16665 16666 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 16667 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 16668 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 16669 unsigned XLen = Subtarget.getXLen(); 16670 Value *Ordering = 16671 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 16672 Type *Tys[] = {AlignedAddr->getType()}; 16673 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 16674 AI->getModule(), 16675 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 16676 16677 if (XLen == 64) { 16678 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 16679 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 16680 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 16681 } 16682 16683 Value *Result; 16684 16685 // Must pass the shift amount needed to sign extend the loaded value prior 16686 // to performing a signed comparison for min/max. ShiftAmt is the number of 16687 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 16688 // is the number of bits to left+right shift the value in order to 16689 // sign-extend. 16690 if (AI->getOperation() == AtomicRMWInst::Min || 16691 AI->getOperation() == AtomicRMWInst::Max) { 16692 const DataLayout &DL = AI->getModule()->getDataLayout(); 16693 unsigned ValWidth = 16694 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 16695 Value *SextShamt = 16696 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 16697 Result = Builder.CreateCall(LrwOpScwLoop, 16698 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 16699 } else { 16700 Result = 16701 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 16702 } 16703 16704 if (XLen == 64) 16705 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 16706 return Result; 16707 } 16708 16709 TargetLowering::AtomicExpansionKind 16710 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 16711 AtomicCmpXchgInst *CI) const { 16712 // Don't expand forced atomics, we want to have __sync libcalls instead. 16713 if (Subtarget.hasForcedAtomics()) 16714 return AtomicExpansionKind::None; 16715 16716 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 16717 if (Size == 8 || Size == 16) 16718 return AtomicExpansionKind::MaskedIntrinsic; 16719 return AtomicExpansionKind::None; 16720 } 16721 16722 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 16723 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 16724 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 16725 unsigned XLen = Subtarget.getXLen(); 16726 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 16727 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 16728 if (XLen == 64) { 16729 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 16730 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 16731 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 16732 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 16733 } 16734 Type *Tys[] = {AlignedAddr->getType()}; 16735 Function *MaskedCmpXchg = 16736 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 16737 Value *Result = Builder.CreateCall( 16738 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 16739 if (XLen == 64) 16740 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 16741 return Result; 16742 } 16743 16744 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT, 16745 EVT DataVT) const { 16746 return false; 16747 } 16748 16749 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, 16750 EVT VT) const { 16751 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) 16752 return false; 16753 16754 switch (FPVT.getSimpleVT().SimpleTy) { 16755 case MVT::f16: 16756 return Subtarget.hasStdExtZfhOrZfhmin(); 16757 case MVT::f32: 16758 return Subtarget.hasStdExtF(); 16759 case MVT::f64: 16760 return Subtarget.hasStdExtD(); 16761 default: 16762 return false; 16763 } 16764 } 16765 16766 unsigned RISCVTargetLowering::getJumpTableEncoding() const { 16767 // If we are using the small code model, we can reduce size of jump table 16768 // entry to 4 bytes. 16769 if (Subtarget.is64Bit() && !isPositionIndependent() && 16770 getTargetMachine().getCodeModel() == CodeModel::Small) { 16771 return MachineJumpTableInfo::EK_Custom32; 16772 } 16773 return TargetLowering::getJumpTableEncoding(); 16774 } 16775 16776 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry( 16777 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, 16778 unsigned uid, MCContext &Ctx) const { 16779 assert(Subtarget.is64Bit() && !isPositionIndependent() && 16780 getTargetMachine().getCodeModel() == CodeModel::Small); 16781 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx); 16782 } 16783 16784 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { 16785 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power 16786 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be 16787 // a power of two as well. 16788 // FIXME: This doesn't work for zve32, but that's already broken 16789 // elsewhere for the same reason. 16790 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported"); 16791 static_assert(RISCV::RVVBitsPerBlock == 64, 16792 "RVVBitsPerBlock changed, audit needed"); 16793 return true; 16794 } 16795 16796 bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, 16797 SDValue &Offset, 16798 ISD::MemIndexedMode &AM, 16799 bool &IsInc, 16800 SelectionDAG &DAG) const { 16801 // Target does not support indexed loads. 16802 if (!Subtarget.hasVendorXTHeadMemIdx()) 16803 return false; 16804 16805 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) 16806 return false; 16807 16808 Base = Op->getOperand(0); 16809 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { 16810 int64_t RHSC = RHS->getSExtValue(); 16811 if (Op->getOpcode() == ISD::SUB) 16812 RHSC = -(uint64_t)RHSC; 16813 16814 // The constants that can be encoded in the THeadMemIdx instructions 16815 // are of the form (sign_extend(imm5) << imm2). 16816 bool isLegalIndexedOffset = false; 16817 for (unsigned i = 0; i < 4; i++) 16818 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) { 16819 isLegalIndexedOffset = true; 16820 break; 16821 } 16822 16823 if (!isLegalIndexedOffset) 16824 return false; 16825 16826 IsInc = (Op->getOpcode() == ISD::ADD); 16827 Offset = Op->getOperand(1); 16828 return true; 16829 } 16830 16831 return false; 16832 } 16833 16834 bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 16835 SDValue &Offset, 16836 ISD::MemIndexedMode &AM, 16837 SelectionDAG &DAG) const { 16838 EVT VT; 16839 SDValue Ptr; 16840 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 16841 VT = LD->getMemoryVT(); 16842 Ptr = LD->getBasePtr(); 16843 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 16844 VT = ST->getMemoryVT(); 16845 Ptr = ST->getBasePtr(); 16846 } else 16847 return false; 16848 16849 bool IsInc; 16850 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG)) 16851 return false; 16852 16853 AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC; 16854 return true; 16855 } 16856 16857 bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 16858 SDValue &Base, 16859 SDValue &Offset, 16860 ISD::MemIndexedMode &AM, 16861 SelectionDAG &DAG) const { 16862 EVT VT; 16863 SDValue Ptr; 16864 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 16865 VT = LD->getMemoryVT(); 16866 Ptr = LD->getBasePtr(); 16867 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 16868 VT = ST->getMemoryVT(); 16869 Ptr = ST->getBasePtr(); 16870 } else 16871 return false; 16872 16873 bool IsInc; 16874 if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG)) 16875 return false; 16876 // Post-indexing updates the base, so it's not a valid transform 16877 // if that's not the same as the load's pointer. 16878 if (Ptr != Base) 16879 return false; 16880 16881 AM = IsInc ? ISD::POST_INC : ISD::POST_DEC; 16882 return true; 16883 } 16884 16885 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 16886 EVT VT) const { 16887 EVT SVT = VT.getScalarType(); 16888 16889 if (!SVT.isSimple()) 16890 return false; 16891 16892 switch (SVT.getSimpleVT().SimpleTy) { 16893 case MVT::f16: 16894 return VT.isVector() ? Subtarget.hasVInstructionsF16() 16895 : Subtarget.hasStdExtZfhOrZhinx(); 16896 case MVT::f32: 16897 return Subtarget.hasStdExtFOrZfinx(); 16898 case MVT::f64: 16899 return Subtarget.hasStdExtDOrZdinx(); 16900 default: 16901 break; 16902 } 16903 16904 return false; 16905 } 16906 16907 Register RISCVTargetLowering::getExceptionPointerRegister( 16908 const Constant *PersonalityFn) const { 16909 return RISCV::X10; 16910 } 16911 16912 Register RISCVTargetLowering::getExceptionSelectorRegister( 16913 const Constant *PersonalityFn) const { 16914 return RISCV::X11; 16915 } 16916 16917 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 16918 // Return false to suppress the unnecessary extensions if the LibCall 16919 // arguments or return value is a float narrower than XLEN on a soft FP ABI. 16920 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() && 16921 Type.getSizeInBits() < Subtarget.getXLen())) 16922 return false; 16923 16924 return true; 16925 } 16926 16927 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 16928 if (Subtarget.is64Bit() && Type == MVT::i32) 16929 return true; 16930 16931 return IsSigned; 16932 } 16933 16934 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 16935 SDValue C) const { 16936 // Check integral scalar types. 16937 const bool HasExtMOrZmmul = 16938 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul(); 16939 if (!VT.isScalarInteger()) 16940 return false; 16941 16942 // Omit the optimization if the sub target has the M extension and the data 16943 // size exceeds XLen. 16944 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen()) 16945 return false; 16946 16947 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 16948 // Break the MUL to a SLLI and an ADD/SUB. 16949 const APInt &Imm = ConstNode->getAPIntValue(); 16950 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 16951 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 16952 return true; 16953 16954 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12. 16955 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) && 16956 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || 16957 (Imm - 8).isPowerOf2())) 16958 return true; 16959 16960 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 16961 // a pair of LUI/ADDI. 16962 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 && 16963 ConstNode->hasOneUse()) { 16964 APInt ImmS = Imm.ashr(Imm.countr_zero()); 16965 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 16966 (1 - ImmS).isPowerOf2()) 16967 return true; 16968 } 16969 } 16970 16971 return false; 16972 } 16973 16974 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode, 16975 SDValue ConstNode) const { 16976 // Let the DAGCombiner decide for vectors. 16977 EVT VT = AddNode.getValueType(); 16978 if (VT.isVector()) 16979 return true; 16980 16981 // Let the DAGCombiner decide for larger types. 16982 if (VT.getScalarSizeInBits() > Subtarget.getXLen()) 16983 return true; 16984 16985 // It is worse if c1 is simm12 while c1*c2 is not. 16986 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1)); 16987 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode); 16988 const APInt &C1 = C1Node->getAPIntValue(); 16989 const APInt &C2 = C2Node->getAPIntValue(); 16990 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12)) 16991 return false; 16992 16993 // Default to true and let the DAGCombiner decide. 16994 return true; 16995 } 16996 16997 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 16998 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 16999 unsigned *Fast) const { 17000 if (!VT.isVector()) { 17001 if (Fast) 17002 *Fast = Subtarget.enableUnalignedScalarMem(); 17003 return Subtarget.enableUnalignedScalarMem(); 17004 } 17005 17006 // All vector implementations must support element alignment 17007 EVT ElemVT = VT.getVectorElementType(); 17008 if (Alignment >= ElemVT.getStoreSize()) { 17009 if (Fast) 17010 *Fast = 1; 17011 return true; 17012 } 17013 17014 // Note: We lower an unmasked unaligned vector access to an equally sized 17015 // e8 element type access. Given this, we effectively support all unmasked 17016 // misaligned accesses. TODO: Work through the codegen implications of 17017 // allowing such accesses to be formed, and considered fast. 17018 if (Fast) 17019 *Fast = Subtarget.enableUnalignedVectorMem(); 17020 return Subtarget.enableUnalignedVectorMem(); 17021 } 17022 17023 bool RISCVTargetLowering::splitValueIntoRegisterParts( 17024 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 17025 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { 17026 bool IsABIRegCopy = CC.has_value(); 17027 EVT ValueVT = Val.getValueType(); 17028 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && 17029 PartVT == MVT::f32) { 17030 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float 17031 // nan, and cast to f32. 17032 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val); 17033 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val); 17034 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val, 17035 DAG.getConstant(0xFFFF0000, DL, MVT::i32)); 17036 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); 17037 Parts[0] = Val; 17038 return true; 17039 } 17040 17041 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 17042 LLVMContext &Context = *DAG.getContext(); 17043 EVT ValueEltVT = ValueVT.getVectorElementType(); 17044 EVT PartEltVT = PartVT.getVectorElementType(); 17045 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); 17046 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); 17047 if (PartVTBitSize % ValueVTBitSize == 0) { 17048 assert(PartVTBitSize >= ValueVTBitSize); 17049 // If the element types are different, bitcast to the same element type of 17050 // PartVT first. 17051 // Give an example here, we want copy a <vscale x 1 x i8> value to 17052 // <vscale x 4 x i16>. 17053 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert 17054 // subvector, then we can bitcast to <vscale x 4 x i16>. 17055 if (ValueEltVT != PartEltVT) { 17056 if (PartVTBitSize > ValueVTBitSize) { 17057 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); 17058 assert(Count != 0 && "The number of element should not be zero."); 17059 EVT SameEltTypeVT = 17060 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); 17061 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT, 17062 DAG.getUNDEF(SameEltTypeVT), Val, 17063 DAG.getVectorIdxConstant(0, DL)); 17064 } 17065 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 17066 } else { 17067 Val = 17068 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), 17069 Val, DAG.getVectorIdxConstant(0, DL)); 17070 } 17071 Parts[0] = Val; 17072 return true; 17073 } 17074 } 17075 return false; 17076 } 17077 17078 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( 17079 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 17080 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { 17081 bool IsABIRegCopy = CC.has_value(); 17082 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && 17083 PartVT == MVT::f32) { 17084 SDValue Val = Parts[0]; 17085 17086 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16. 17087 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); 17088 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val); 17089 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 17090 return Val; 17091 } 17092 17093 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 17094 LLVMContext &Context = *DAG.getContext(); 17095 SDValue Val = Parts[0]; 17096 EVT ValueEltVT = ValueVT.getVectorElementType(); 17097 EVT PartEltVT = PartVT.getVectorElementType(); 17098 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); 17099 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); 17100 if (PartVTBitSize % ValueVTBitSize == 0) { 17101 assert(PartVTBitSize >= ValueVTBitSize); 17102 EVT SameEltTypeVT = ValueVT; 17103 // If the element types are different, convert it to the same element type 17104 // of PartVT. 17105 // Give an example here, we want copy a <vscale x 1 x i8> value from 17106 // <vscale x 4 x i16>. 17107 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first, 17108 // then we can extract <vscale x 1 x i8>. 17109 if (ValueEltVT != PartEltVT) { 17110 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); 17111 assert(Count != 0 && "The number of element should not be zero."); 17112 SameEltTypeVT = 17113 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); 17114 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); 17115 } 17116 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, 17117 DAG.getVectorIdxConstant(0, DL)); 17118 return Val; 17119 } 17120 } 17121 return SDValue(); 17122 } 17123 17124 bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { 17125 // When aggressively optimizing for code size, we prefer to use a div 17126 // instruction, as it is usually smaller than the alternative sequence. 17127 // TODO: Add vector division? 17128 bool OptSize = Attr.hasFnAttr(Attribute::MinSize); 17129 return OptSize && !VT.isVector(); 17130 } 17131 17132 bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const { 17133 // Scalarize zero_ext and sign_ext might stop match to widening instruction in 17134 // some situation. 17135 unsigned Opc = N->getOpcode(); 17136 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND) 17137 return false; 17138 return true; 17139 } 17140 17141 static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) { 17142 Module *M = IRB.GetInsertBlock()->getParent()->getParent(); 17143 Function *ThreadPointerFunc = 17144 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); 17145 return IRB.CreatePointerCast( 17146 IRB.CreateConstGEP1_32(IRB.getInt8Ty(), 17147 IRB.CreateCall(ThreadPointerFunc), Offset), 17148 IRB.getInt8PtrTy()->getPointerTo(0)); 17149 } 17150 17151 Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { 17152 // Fuchsia provides a fixed TLS slot for the stack cookie. 17153 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. 17154 if (Subtarget.isTargetFuchsia()) 17155 return useTpOffset(IRB, -0x10); 17156 17157 return TargetLowering::getIRStackGuard(IRB); 17158 } 17159 17160 bool RISCVTargetLowering::isLegalInterleavedAccessType( 17161 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, 17162 const DataLayout &DL) const { 17163 EVT VT = getValueType(DL, VTy); 17164 // Don't lower vlseg/vsseg for vector types that can't be split. 17165 if (!isTypeLegal(VT)) 17166 return false; 17167 17168 if (!isLegalElementTypeForRVV(VT.getScalarType()) || 17169 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace, 17170 Alignment)) 17171 return false; 17172 17173 MVT ContainerVT = VT.getSimpleVT(); 17174 17175 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { 17176 if (!Subtarget.useRVVForFixedLengthVectors()) 17177 return false; 17178 // Sometimes the interleaved access pass picks up splats as interleaves of 17179 // one element. Don't lower these. 17180 if (FVTy->getNumElements() < 2) 17181 return false; 17182 17183 ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT()); 17184 } 17185 17186 // Need to make sure that EMUL * NFIELDS ≤ 8 17187 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT)); 17188 if (Fractional) 17189 return true; 17190 return Factor * LMUL <= 8; 17191 } 17192 17193 bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, 17194 Align Alignment) const { 17195 if (!Subtarget.hasVInstructions()) 17196 return false; 17197 17198 // Only support fixed vectors if we know the minimum vector size. 17199 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors()) 17200 return false; 17201 17202 EVT ScalarType = DataType.getScalarType(); 17203 if (!isLegalElementTypeForRVV(ScalarType)) 17204 return false; 17205 17206 if (!Subtarget.enableUnalignedVectorMem() && 17207 Alignment < ScalarType.getStoreSize()) 17208 return false; 17209 17210 return true; 17211 } 17212 17213 static const Intrinsic::ID FixedVlsegIntrIds[] = { 17214 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load, 17215 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load, 17216 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load, 17217 Intrinsic::riscv_seg8_load}; 17218 17219 /// Lower an interleaved load into a vlsegN intrinsic. 17220 /// 17221 /// E.g. Lower an interleaved load (Factor = 2): 17222 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr 17223 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements 17224 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements 17225 /// 17226 /// Into: 17227 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64( 17228 /// %ptr, i64 4) 17229 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 17230 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 17231 bool RISCVTargetLowering::lowerInterleavedLoad( 17232 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, 17233 ArrayRef<unsigned> Indices, unsigned Factor) const { 17234 IRBuilder<> Builder(LI); 17235 17236 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType()); 17237 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(), 17238 LI->getPointerAddressSpace(), 17239 LI->getModule()->getDataLayout())) 17240 return false; 17241 17242 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); 17243 17244 Function *VlsegNFunc = 17245 Intrinsic::getDeclaration(LI->getModule(), FixedVlsegIntrIds[Factor - 2], 17246 {VTy, LI->getPointerOperandType(), XLenTy}); 17247 17248 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); 17249 17250 CallInst *VlsegN = 17251 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL}); 17252 17253 for (unsigned i = 0; i < Shuffles.size(); i++) { 17254 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]); 17255 Shuffles[i]->replaceAllUsesWith(SubVec); 17256 } 17257 17258 return true; 17259 } 17260 17261 static const Intrinsic::ID FixedVssegIntrIds[] = { 17262 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store, 17263 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store, 17264 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store, 17265 Intrinsic::riscv_seg8_store}; 17266 17267 /// Lower an interleaved store into a vssegN intrinsic. 17268 /// 17269 /// E.g. Lower an interleaved store (Factor = 3): 17270 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, 17271 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> 17272 /// store <12 x i32> %i.vec, <12 x i32>* %ptr 17273 /// 17274 /// Into: 17275 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> 17276 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> 17277 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> 17278 /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2, 17279 /// %ptr, i32 4) 17280 /// 17281 /// Note that the new shufflevectors will be removed and we'll only generate one 17282 /// vsseg3 instruction in CodeGen. 17283 bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, 17284 ShuffleVectorInst *SVI, 17285 unsigned Factor) const { 17286 IRBuilder<> Builder(SI); 17287 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType()); 17288 // Given SVI : <n*factor x ty>, then VTy : <n x ty> 17289 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(), 17290 ShuffleVTy->getNumElements() / Factor); 17291 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(), 17292 SI->getPointerAddressSpace(), 17293 SI->getModule()->getDataLayout())) 17294 return false; 17295 17296 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); 17297 17298 Function *VssegNFunc = 17299 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2], 17300 {VTy, SI->getPointerOperandType(), XLenTy}); 17301 17302 auto Mask = SVI->getShuffleMask(); 17303 SmallVector<Value *, 10> Ops; 17304 17305 for (unsigned i = 0; i < Factor; i++) { 17306 Value *Shuffle = Builder.CreateShuffleVector( 17307 SVI->getOperand(0), SVI->getOperand(1), 17308 createSequentialMask(Mask[i], VTy->getNumElements(), 0)); 17309 Ops.push_back(Shuffle); 17310 } 17311 // This VL should be OK (should be executable in one vsseg instruction, 17312 // potentially under larger LMULs) because we checked that the fixed vector 17313 // type fits in isLegalInterleavedAccessType 17314 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); 17315 Ops.append({SI->getPointerOperand(), VL}); 17316 17317 Builder.CreateCall(VssegNFunc, Ops); 17318 17319 return true; 17320 } 17321 17322 bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, 17323 LoadInst *LI) const { 17324 assert(LI->isSimple()); 17325 IRBuilder<> Builder(LI); 17326 17327 // Only deinterleave2 supported at present. 17328 if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2) 17329 return false; 17330 17331 unsigned Factor = 2; 17332 17333 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType()); 17334 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0)); 17335 17336 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(), 17337 LI->getPointerAddressSpace(), 17338 LI->getModule()->getDataLayout())) 17339 return false; 17340 17341 Function *VlsegNFunc; 17342 Value *VL; 17343 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); 17344 SmallVector<Value *, 10> Ops; 17345 17346 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { 17347 VlsegNFunc = Intrinsic::getDeclaration( 17348 LI->getModule(), FixedVlsegIntrIds[Factor - 2], 17349 {ResVTy, LI->getPointerOperandType(), XLenTy}); 17350 VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); 17351 } else { 17352 static const Intrinsic::ID IntrIds[] = { 17353 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, 17354 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, 17355 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, 17356 Intrinsic::riscv_vlseg8}; 17357 17358 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2], 17359 {ResVTy, XLenTy}); 17360 VL = Constant::getAllOnesValue(XLenTy); 17361 Ops.append(Factor, PoisonValue::get(ResVTy)); 17362 } 17363 17364 Ops.append({LI->getPointerOperand(), VL}); 17365 17366 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops); 17367 DI->replaceAllUsesWith(Vlseg); 17368 17369 return true; 17370 } 17371 17372 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II, 17373 StoreInst *SI) const { 17374 assert(SI->isSimple()); 17375 IRBuilder<> Builder(SI); 17376 17377 // Only interleave2 supported at present. 17378 if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2) 17379 return false; 17380 17381 unsigned Factor = 2; 17382 17383 VectorType *VTy = cast<VectorType>(II->getType()); 17384 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType()); 17385 17386 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(), 17387 SI->getPointerAddressSpace(), 17388 SI->getModule()->getDataLayout())) 17389 return false; 17390 17391 Function *VssegNFunc; 17392 Value *VL; 17393 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); 17394 17395 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { 17396 VssegNFunc = Intrinsic::getDeclaration( 17397 SI->getModule(), FixedVssegIntrIds[Factor - 2], 17398 {InVTy, SI->getPointerOperandType(), XLenTy}); 17399 VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); 17400 } else { 17401 static const Intrinsic::ID IntrIds[] = { 17402 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, 17403 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, 17404 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, 17405 Intrinsic::riscv_vsseg8}; 17406 17407 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2], 17408 {InVTy, XLenTy}); 17409 VL = Constant::getAllOnesValue(XLenTy); 17410 } 17411 17412 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1), 17413 SI->getPointerOperand(), VL}); 17414 17415 return true; 17416 } 17417 17418 MachineInstr * 17419 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, 17420 MachineBasicBlock::instr_iterator &MBBI, 17421 const TargetInstrInfo *TII) const { 17422 assert(MBBI->isCall() && MBBI->getCFIType() && 17423 "Invalid call instruction for a KCFI check"); 17424 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect}, 17425 MBBI->getOpcode())); 17426 17427 MachineOperand &Target = MBBI->getOperand(0); 17428 Target.setIsRenamable(false); 17429 17430 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK)) 17431 .addReg(Target.getReg()) 17432 .addImm(MBBI->getCFIType()) 17433 .getInstr(); 17434 } 17435 17436 #define GET_REGISTER_MATCHER 17437 #include "RISCVGenAsmMatcher.inc" 17438 17439 Register 17440 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 17441 const MachineFunction &MF) const { 17442 Register Reg = MatchRegisterAltName(RegName); 17443 if (Reg == RISCV::NoRegister) 17444 Reg = MatchRegisterName(RegName); 17445 if (Reg == RISCV::NoRegister) 17446 report_fatal_error( 17447 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 17448 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 17449 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 17450 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 17451 StringRef(RegName) + "\".")); 17452 return Reg; 17453 } 17454 17455 MachineMemOperand::Flags 17456 RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const { 17457 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal); 17458 17459 if (NontemporalInfo == nullptr) 17460 return MachineMemOperand::MONone; 17461 17462 // 1 for default value work as __RISCV_NTLH_ALL 17463 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE 17464 // 3 -> __RISCV_NTLH_ALL_PRIVATE 17465 // 4 -> __RISCV_NTLH_INNERMOST_SHARED 17466 // 5 -> __RISCV_NTLH_ALL 17467 int NontemporalLevel = 5; 17468 const MDNode *RISCVNontemporalInfo = 17469 I.getMetadata("riscv-nontemporal-domain"); 17470 if (RISCVNontemporalInfo != nullptr) 17471 NontemporalLevel = 17472 cast<ConstantInt>( 17473 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0)) 17474 ->getValue()) 17475 ->getZExtValue(); 17476 17477 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) && 17478 "RISC-V target doesn't support this non-temporal domain."); 17479 17480 NontemporalLevel -= 2; 17481 MachineMemOperand::Flags Flags = MachineMemOperand::MONone; 17482 if (NontemporalLevel & 0b1) 17483 Flags |= MONontemporalBit0; 17484 if (NontemporalLevel & 0b10) 17485 Flags |= MONontemporalBit1; 17486 17487 return Flags; 17488 } 17489 17490 MachineMemOperand::Flags 17491 RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const { 17492 17493 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags(); 17494 MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone; 17495 TargetFlags |= (NodeFlags & MONontemporalBit0); 17496 TargetFlags |= (NodeFlags & MONontemporalBit1); 17497 17498 return TargetFlags; 17499 } 17500 17501 bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable( 17502 const MemSDNode &NodeX, const MemSDNode &NodeY) const { 17503 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY); 17504 } 17505 17506 namespace llvm::RISCVVIntrinsicsTable { 17507 17508 #define GET_RISCVVIntrinsicsTable_IMPL 17509 #include "RISCVGenSearchableTables.inc" 17510 17511 } // namespace llvm::RISCVVIntrinsicsTable 17512