1 //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/MemoryLocation.h" 24 #include "llvm/Analysis/VectorUtils.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineJumpTableInfo.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" 31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 32 #include "llvm/CodeGen/ValueTypes.h" 33 #include "llvm/IR/DiagnosticInfo.h" 34 #include "llvm/IR/DiagnosticPrinter.h" 35 #include "llvm/IR/IRBuilder.h" 36 #include "llvm/IR/Instructions.h" 37 #include "llvm/IR/IntrinsicsRISCV.h" 38 #include "llvm/IR/PatternMatch.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/InstructionCost.h" 43 #include "llvm/Support/KnownBits.h" 44 #include "llvm/Support/MathExtras.h" 45 #include "llvm/Support/raw_ostream.h" 46 #include <optional> 47 48 using namespace llvm; 49 50 #define DEBUG_TYPE "riscv-lower" 51 52 STATISTIC(NumTailCalls, "Number of tail calls"); 53 54 static cl::opt<unsigned> ExtensionMaxWebSize( 55 DEBUG_TYPE "-ext-max-web-size", cl::Hidden, 56 cl::desc("Give the maximum size (in number of nodes) of the web of " 57 "instructions that we will consider for VW expansion"), 58 cl::init(18)); 59 60 static cl::opt<bool> 61 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, 62 cl::desc("Allow the formation of VW_W operations (e.g., " 63 "VWADD_W) with splat constants"), 64 cl::init(false)); 65 66 static cl::opt<unsigned> NumRepeatedDivisors( 67 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, 68 cl::desc("Set the minimum number of repetitions of a divisor to allow " 69 "transformation to multiplications by the reciprocal"), 70 cl::init(2)); 71 72 static cl::opt<int> 73 FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, 74 cl::desc("Give the maximum number of instructions that we will " 75 "use for creating a floating-point immediate value"), 76 cl::init(2)); 77 78 static cl::opt<bool> 79 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, 80 cl::desc("Make i32 a legal type for SelectionDAG on RV64.")); 81 82 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 83 const RISCVSubtarget &STI) 84 : TargetLowering(TM), Subtarget(STI) { 85 86 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 87 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 88 89 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 90 !Subtarget.hasStdExtF()) { 91 errs() << "Hard-float 'f' ABI can't be used for a target that " 92 "doesn't support the F instruction set extension (ignoring " 93 "target-abi)\n"; 94 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 95 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 96 !Subtarget.hasStdExtD()) { 97 errs() << "Hard-float 'd' ABI can't be used for a target that " 98 "doesn't support the D instruction set extension (ignoring " 99 "target-abi)\n"; 100 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 101 } 102 103 switch (ABI) { 104 default: 105 report_fatal_error("Don't know how to lower this ABI"); 106 case RISCVABI::ABI_ILP32: 107 case RISCVABI::ABI_ILP32E: 108 case RISCVABI::ABI_LP64E: 109 case RISCVABI::ABI_ILP32F: 110 case RISCVABI::ABI_ILP32D: 111 case RISCVABI::ABI_LP64: 112 case RISCVABI::ABI_LP64F: 113 case RISCVABI::ABI_LP64D: 114 break; 115 } 116 117 MVT XLenVT = Subtarget.getXLenVT(); 118 119 // Set up the register classes. 120 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 121 if (Subtarget.is64Bit() && RV64LegalI32) 122 addRegisterClass(MVT::i32, &RISCV::GPRRegClass); 123 124 if (Subtarget.hasStdExtZfhmin()) 125 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 126 if (Subtarget.hasStdExtZfbfmin()) 127 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass); 128 if (Subtarget.hasStdExtF()) 129 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 130 if (Subtarget.hasStdExtD()) 131 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 132 if (Subtarget.hasStdExtZhinxmin()) 133 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass); 134 if (Subtarget.hasStdExtZfinx()) 135 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass); 136 if (Subtarget.hasStdExtZdinx()) { 137 if (Subtarget.is64Bit()) 138 addRegisterClass(MVT::f64, &RISCV::GPRRegClass); 139 else 140 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass); 141 } 142 143 static const MVT::SimpleValueType BoolVecVTs[] = { 144 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 145 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 146 static const MVT::SimpleValueType IntVecVTs[] = { 147 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 148 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 149 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 150 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 151 MVT::nxv4i64, MVT::nxv8i64}; 152 static const MVT::SimpleValueType F16VecVTs[] = { 153 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 154 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 155 static const MVT::SimpleValueType BF16VecVTs[] = { 156 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16, 157 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16}; 158 static const MVT::SimpleValueType F32VecVTs[] = { 159 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 160 static const MVT::SimpleValueType F64VecVTs[] = { 161 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 162 163 if (Subtarget.hasVInstructions()) { 164 auto addRegClassForRVV = [this](MVT VT) { 165 // Disable the smallest fractional LMUL types if ELEN is less than 166 // RVVBitsPerBlock. 167 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen(); 168 if (VT.getVectorMinNumElements() < MinElts) 169 return; 170 171 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 172 const TargetRegisterClass *RC; 173 if (Size <= RISCV::RVVBitsPerBlock) 174 RC = &RISCV::VRRegClass; 175 else if (Size == 2 * RISCV::RVVBitsPerBlock) 176 RC = &RISCV::VRM2RegClass; 177 else if (Size == 4 * RISCV::RVVBitsPerBlock) 178 RC = &RISCV::VRM4RegClass; 179 else if (Size == 8 * RISCV::RVVBitsPerBlock) 180 RC = &RISCV::VRM8RegClass; 181 else 182 llvm_unreachable("Unexpected size"); 183 184 addRegisterClass(VT, RC); 185 }; 186 187 for (MVT VT : BoolVecVTs) 188 addRegClassForRVV(VT); 189 for (MVT VT : IntVecVTs) { 190 if (VT.getVectorElementType() == MVT::i64 && 191 !Subtarget.hasVInstructionsI64()) 192 continue; 193 addRegClassForRVV(VT); 194 } 195 196 if (Subtarget.hasVInstructionsF16Minimal()) 197 for (MVT VT : F16VecVTs) 198 addRegClassForRVV(VT); 199 200 if (Subtarget.hasVInstructionsBF16()) 201 for (MVT VT : BF16VecVTs) 202 addRegClassForRVV(VT); 203 204 if (Subtarget.hasVInstructionsF32()) 205 for (MVT VT : F32VecVTs) 206 addRegClassForRVV(VT); 207 208 if (Subtarget.hasVInstructionsF64()) 209 for (MVT VT : F64VecVTs) 210 addRegClassForRVV(VT); 211 212 if (Subtarget.useRVVForFixedLengthVectors()) { 213 auto addRegClassForFixedVectors = [this](MVT VT) { 214 MVT ContainerVT = getContainerForFixedLengthVector(VT); 215 unsigned RCID = getRegClassIDForVecVT(ContainerVT); 216 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 217 addRegisterClass(VT, TRI.getRegClass(RCID)); 218 }; 219 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 220 if (useRVVForFixedLengthVectorVT(VT)) 221 addRegClassForFixedVectors(VT); 222 223 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 224 if (useRVVForFixedLengthVectorVT(VT)) 225 addRegClassForFixedVectors(VT); 226 } 227 } 228 229 // Compute derived properties from the register classes. 230 computeRegisterProperties(STI.getRegisterInfo()); 231 232 setStackPointerRegisterToSaveRestore(RISCV::X2); 233 234 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT, 235 MVT::i1, Promote); 236 // DAGCombiner can call isLoadExtLegal for types that aren't legal. 237 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, 238 MVT::i1, Promote); 239 240 // TODO: add all necessary setOperationAction calls. 241 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 242 243 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 244 setOperationAction(ISD::BR_CC, XLenVT, Expand); 245 if (RV64LegalI32 && Subtarget.is64Bit()) 246 setOperationAction(ISD::BR_CC, MVT::i32, Expand); 247 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 248 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 249 if (RV64LegalI32 && Subtarget.is64Bit()) 250 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 251 252 setCondCodeAction(ISD::SETLE, XLenVT, Expand); 253 setCondCodeAction(ISD::SETGT, XLenVT, Custom); 254 setCondCodeAction(ISD::SETGE, XLenVT, Expand); 255 setCondCodeAction(ISD::SETULE, XLenVT, Expand); 256 setCondCodeAction(ISD::SETUGT, XLenVT, Custom); 257 setCondCodeAction(ISD::SETUGE, XLenVT, Expand); 258 259 if (RV64LegalI32 && Subtarget.is64Bit()) 260 setOperationAction(ISD::SETCC, MVT::i32, Promote); 261 262 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); 263 264 setOperationAction(ISD::VASTART, MVT::Other, Custom); 265 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); 266 267 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 268 269 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 270 271 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb()) 272 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand); 273 274 if (Subtarget.is64Bit()) { 275 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); 276 277 if (!RV64LegalI32) { 278 setOperationAction(ISD::LOAD, MVT::i32, Custom); 279 setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL}, 280 MVT::i32, Custom); 281 setOperationAction(ISD::SADDO, MVT::i32, Custom); 282 setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT}, 283 MVT::i32, Custom); 284 } 285 } else { 286 setLibcallName( 287 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128}, 288 nullptr); 289 setLibcallName(RTLIB::MULO_I64, nullptr); 290 } 291 292 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) { 293 setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand); 294 if (RV64LegalI32 && Subtarget.is64Bit()) 295 setOperationAction(ISD::MUL, MVT::i32, Promote); 296 } else if (Subtarget.is64Bit()) { 297 setOperationAction(ISD::MUL, MVT::i128, Custom); 298 if (!RV64LegalI32) 299 setOperationAction(ISD::MUL, MVT::i32, Custom); 300 } else { 301 setOperationAction(ISD::MUL, MVT::i64, Custom); 302 } 303 304 if (!Subtarget.hasStdExtM()) { 305 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, 306 XLenVT, Expand); 307 if (RV64LegalI32 && Subtarget.is64Bit()) 308 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32, 309 Promote); 310 } else if (Subtarget.is64Bit()) { 311 if (!RV64LegalI32) 312 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, 313 {MVT::i8, MVT::i16, MVT::i32}, Custom); 314 } 315 316 if (RV64LegalI32 && Subtarget.is64Bit()) { 317 setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand); 318 setOperationAction( 319 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, 320 Expand); 321 } 322 323 setOperationAction( 324 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT, 325 Expand); 326 327 setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT, 328 Custom); 329 330 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) { 331 if (!RV64LegalI32 && Subtarget.is64Bit()) 332 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); 333 } else if (Subtarget.hasVendorXTHeadBb()) { 334 if (Subtarget.is64Bit()) 335 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); 336 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom); 337 } else if (Subtarget.hasVendorXCVbitmanip()) { 338 setOperationAction(ISD::ROTL, XLenVT, Expand); 339 } else { 340 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand); 341 if (RV64LegalI32 && Subtarget.is64Bit()) 342 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand); 343 } 344 345 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 346 // pattern match it directly in isel. 347 setOperationAction(ISD::BSWAP, XLenVT, 348 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || 349 Subtarget.hasVendorXTHeadBb()) 350 ? Legal 351 : Expand); 352 if (RV64LegalI32 && Subtarget.is64Bit()) 353 setOperationAction(ISD::BSWAP, MVT::i32, 354 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || 355 Subtarget.hasVendorXTHeadBb()) 356 ? Promote 357 : Expand); 358 359 360 if (Subtarget.hasVendorXCVbitmanip()) { 361 setOperationAction(ISD::BITREVERSE, XLenVT, Legal); 362 } else { 363 // Zbkb can use rev8+brev8 to implement bitreverse. 364 setOperationAction(ISD::BITREVERSE, XLenVT, 365 Subtarget.hasStdExtZbkb() ? Custom : Expand); 366 } 367 368 if (Subtarget.hasStdExtZbb()) { 369 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT, 370 Legal); 371 if (RV64LegalI32 && Subtarget.is64Bit()) 372 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32, 373 Promote); 374 375 if (Subtarget.is64Bit()) { 376 if (RV64LegalI32) 377 setOperationAction(ISD::CTTZ, MVT::i32, Legal); 378 else 379 setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); 380 } 381 } else if (!Subtarget.hasVendorXCVbitmanip()) { 382 setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand); 383 if (RV64LegalI32 && Subtarget.is64Bit()) 384 setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand); 385 } 386 387 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || 388 Subtarget.hasVendorXCVbitmanip()) { 389 // We need the custom lowering to make sure that the resulting sequence 390 // for the 32bit case is efficient on 64bit targets. 391 if (Subtarget.is64Bit()) { 392 if (RV64LegalI32) { 393 setOperationAction(ISD::CTLZ, MVT::i32, 394 Subtarget.hasStdExtZbb() ? Legal : Promote); 395 if (!Subtarget.hasStdExtZbb()) 396 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); 397 } else 398 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom); 399 } 400 } else { 401 setOperationAction(ISD::CTLZ, XLenVT, Expand); 402 if (RV64LegalI32 && Subtarget.is64Bit()) 403 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 404 } 405 406 if (!RV64LegalI32 && Subtarget.is64Bit() && 407 !Subtarget.hasShortForwardBranchOpt()) 408 setOperationAction(ISD::ABS, MVT::i32, Custom); 409 410 // We can use PseudoCCSUB to implement ABS. 411 if (Subtarget.hasShortForwardBranchOpt()) 412 setOperationAction(ISD::ABS, XLenVT, Legal); 413 414 if (!Subtarget.hasVendorXTHeadCondMov()) 415 setOperationAction(ISD::SELECT, XLenVT, Custom); 416 417 if (RV64LegalI32 && Subtarget.is64Bit()) 418 setOperationAction(ISD::SELECT, MVT::i32, Promote); 419 420 static const unsigned FPLegalNodeTypes[] = { 421 ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT, 422 ISD::LLRINT, ISD::LROUND, ISD::LLROUND, 423 ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND, 424 ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD, 425 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, 426 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS}; 427 428 static const ISD::CondCode FPCCToExpand[] = { 429 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 430 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 431 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 432 433 static const unsigned FPOpToExpand[] = { 434 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, 435 ISD::FREM}; 436 437 static const unsigned FPRndMode[] = { 438 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND, 439 ISD::FROUNDEVEN}; 440 441 if (Subtarget.hasStdExtZfhminOrZhinxmin()) 442 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 443 444 static const unsigned ZfhminZfbfminPromoteOps[] = { 445 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, 446 ISD::FSUB, ISD::FMUL, ISD::FMA, 447 ISD::FDIV, ISD::FSQRT, ISD::FABS, 448 ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD, 449 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, 450 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, 451 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR, 452 ISD::FTRUNC, ISD::FRINT, ISD::FROUND, 453 ISD::FROUNDEVEN, ISD::SELECT}; 454 455 if (Subtarget.hasStdExtZfbfmin()) { 456 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 457 setOperationAction(ISD::BITCAST, MVT::bf16, Custom); 458 setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom); 459 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); 460 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); 461 setOperationAction(ISD::ConstantFP, MVT::bf16, Expand); 462 setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand); 463 setOperationAction(ISD::BR_CC, MVT::bf16, Expand); 464 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote); 465 setOperationAction(ISD::FREM, MVT::bf16, Promote); 466 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the 467 // DAGCombiner::visitFP_ROUND probably needs improvements first. 468 setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand); 469 } 470 471 if (Subtarget.hasStdExtZfhminOrZhinxmin()) { 472 if (Subtarget.hasStdExtZfhOrZhinx()) { 473 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); 474 setOperationAction(FPRndMode, MVT::f16, 475 Subtarget.hasStdExtZfa() ? Legal : Custom); 476 setOperationAction(ISD::SELECT, MVT::f16, Custom); 477 setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom); 478 } else { 479 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote); 480 setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT, 481 ISD::STRICT_LROUND, ISD::STRICT_LLROUND}, 482 MVT::f16, Legal); 483 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the 484 // DAGCombiner::visitFP_ROUND probably needs improvements first. 485 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); 486 } 487 488 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); 489 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); 490 setCondCodeAction(FPCCToExpand, MVT::f16, Expand); 491 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 492 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 493 494 setOperationAction(ISD::FNEARBYINT, MVT::f16, 495 Subtarget.hasStdExtZfa() ? Legal : Promote); 496 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI, 497 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, 498 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, 499 ISD::FLOG10}, 500 MVT::f16, Promote); 501 502 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have 503 // complete support for all operations in LegalizeDAG. 504 setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, 505 ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, 506 ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, 507 ISD::STRICT_FTRUNC}, 508 MVT::f16, Promote); 509 510 // We need to custom promote this. 511 if (Subtarget.is64Bit()) 512 setOperationAction(ISD::FPOWI, MVT::i32, Custom); 513 514 if (!Subtarget.hasStdExtZfa()) 515 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom); 516 } 517 518 if (Subtarget.hasStdExtFOrZfinx()) { 519 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal); 520 setOperationAction(FPRndMode, MVT::f32, 521 Subtarget.hasStdExtZfa() ? Legal : Custom); 522 setCondCodeAction(FPCCToExpand, MVT::f32, Expand); 523 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 524 setOperationAction(ISD::SELECT, MVT::f32, Custom); 525 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 526 setOperationAction(FPOpToExpand, MVT::f32, Expand); 527 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 528 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 529 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand); 530 setTruncStoreAction(MVT::f32, MVT::bf16, Expand); 531 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom); 532 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom); 533 setOperationAction(ISD::FP_TO_BF16, MVT::f32, 534 Subtarget.isSoftFPABI() ? LibCall : Custom); 535 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom); 536 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom); 537 538 if (Subtarget.hasStdExtZfa()) 539 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); 540 else 541 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom); 542 } 543 544 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit()) 545 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 546 547 if (Subtarget.hasStdExtDOrZdinx()) { 548 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal); 549 550 if (Subtarget.hasStdExtZfa()) { 551 setOperationAction(FPRndMode, MVT::f64, Legal); 552 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); 553 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 554 setOperationAction(ISD::BITCAST, MVT::f64, Custom); 555 } else { 556 if (Subtarget.is64Bit()) 557 setOperationAction(FPRndMode, MVT::f64, Custom); 558 559 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom); 560 } 561 562 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); 563 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); 564 setCondCodeAction(FPCCToExpand, MVT::f64, Expand); 565 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 566 setOperationAction(ISD::SELECT, MVT::f64, Custom); 567 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 568 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 569 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 570 setOperationAction(FPOpToExpand, MVT::f64, Expand); 571 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 572 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 573 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand); 574 setTruncStoreAction(MVT::f64, MVT::bf16, Expand); 575 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom); 576 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom); 577 setOperationAction(ISD::FP_TO_BF16, MVT::f64, 578 Subtarget.isSoftFPABI() ? LibCall : Custom); 579 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom); 580 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); 581 } 582 583 if (Subtarget.is64Bit()) { 584 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT, 585 ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT}, 586 MVT::i32, Custom); 587 setOperationAction(ISD::LROUND, MVT::i32, Custom); 588 } 589 590 if (Subtarget.hasStdExtFOrZfinx()) { 591 setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT, 592 Custom); 593 594 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, 595 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, 596 XLenVT, Legal); 597 598 if (RV64LegalI32 && Subtarget.is64Bit()) 599 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, 600 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, 601 MVT::i32, Legal); 602 603 setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom); 604 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); 605 } 606 607 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, 608 ISD::JumpTable}, 609 XLenVT, Custom); 610 611 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 612 613 if (Subtarget.is64Bit()) 614 setOperationAction(ISD::Constant, MVT::i64, Custom); 615 616 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 617 // Unfortunately this can't be determined just from the ISA naming string. 618 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 619 Subtarget.is64Bit() ? Legal : Custom); 620 621 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal); 622 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 623 if (Subtarget.is64Bit()) 624 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 625 626 if (Subtarget.hasStdExtZicbop()) { 627 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 628 } 629 630 if (Subtarget.hasStdExtA()) { 631 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 632 setMinCmpXchgSizeInBits(32); 633 } else if (Subtarget.hasForcedAtomics()) { 634 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 635 } else { 636 setMaxAtomicSizeInBitsSupported(0); 637 } 638 639 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 640 641 setBooleanContents(ZeroOrOneBooleanContent); 642 643 if (Subtarget.hasVInstructions()) { 644 setBooleanVectorContents(ZeroOrOneBooleanContent); 645 646 setOperationAction(ISD::VSCALE, XLenVT, Custom); 647 if (RV64LegalI32 && Subtarget.is64Bit()) 648 setOperationAction(ISD::VSCALE, MVT::i32, Custom); 649 650 // RVV intrinsics may have illegal operands. 651 // We also need to custom legalize vmv.x.s. 652 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN, 653 ISD::INTRINSIC_VOID}, 654 {MVT::i8, MVT::i16}, Custom); 655 if (Subtarget.is64Bit()) 656 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, 657 MVT::i32, Custom); 658 else 659 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN}, 660 MVT::i64, Custom); 661 662 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, 663 MVT::Other, Custom); 664 665 static const unsigned IntegerVPOps[] = { 666 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, 667 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM, 668 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, 669 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR, 670 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, 671 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, 672 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, 673 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT, 674 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, 675 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, 676 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, 677 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE}; 678 679 static const unsigned FloatingPointVPOps[] = { 680 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, 681 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, 682 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, 683 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, 684 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP, 685 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND, 686 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, 687 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, 688 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, 689 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, 690 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::EXPERIMENTAL_VP_REVERSE, 691 ISD::EXPERIMENTAL_VP_SPLICE}; 692 693 static const unsigned IntegerVecReduceOps[] = { 694 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, 695 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, 696 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}; 697 698 static const unsigned FloatingPointVecReduceOps[] = { 699 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN, 700 ISD::VECREDUCE_FMAX}; 701 702 if (!Subtarget.is64Bit()) { 703 // We must custom-lower certain vXi64 operations on RV32 due to the vector 704 // element type being illegal. 705 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, 706 MVT::i64, Custom); 707 708 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom); 709 710 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, 711 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, 712 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, 713 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN}, 714 MVT::i64, Custom); 715 } 716 717 for (MVT VT : BoolVecVTs) { 718 if (!isTypeLegal(VT)) 719 continue; 720 721 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 722 723 // Mask VTs are custom-expanded into a series of standard nodes 724 setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS, 725 ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, 726 ISD::SCALAR_TO_VECTOR}, 727 VT, Custom); 728 729 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 730 Custom); 731 732 setOperationAction(ISD::SELECT, VT, Custom); 733 setOperationAction( 734 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT, 735 Expand); 736 737 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom); 738 739 setOperationAction( 740 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, 741 Custom); 742 743 setOperationAction( 744 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, 745 Custom); 746 747 // RVV has native int->float & float->int conversions where the 748 // element type sizes are within one power-of-two of each other. Any 749 // wider distances between type sizes have to be lowered as sequences 750 // which progressively narrow the gap in stages. 751 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, 752 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, 753 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, 754 ISD::STRICT_FP_TO_UINT}, 755 VT, Custom); 756 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 757 Custom); 758 759 // Expand all extending loads to types larger than this, and truncating 760 // stores from types larger than this. 761 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 762 setTruncStoreAction(VT, OtherVT, Expand); 763 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, 764 OtherVT, Expand); 765 } 766 767 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, 768 ISD::VP_TRUNCATE, ISD::VP_SETCC}, 769 VT, Custom); 770 771 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); 772 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); 773 774 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 775 776 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); 777 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); 778 779 setOperationPromotedToType( 780 ISD::VECTOR_SPLICE, VT, 781 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount())); 782 } 783 784 for (MVT VT : IntVecVTs) { 785 if (!isTypeLegal(VT)) 786 continue; 787 788 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 789 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 790 791 // Vectors implement MULHS/MULHU. 792 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand); 793 794 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*. 795 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) 796 setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand); 797 798 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT, 799 Legal); 800 801 // Custom-lower extensions and truncations from/to mask types. 802 setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, 803 VT, Custom); 804 805 // RVV has native int->float & float->int conversions where the 806 // element type sizes are within one power-of-two of each other. Any 807 // wider distances between type sizes have to be lowered as sequences 808 // which progressively narrow the gap in stages. 809 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, 810 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, 811 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, 812 ISD::STRICT_FP_TO_UINT}, 813 VT, Custom); 814 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 815 Custom); 816 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom); 817 setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, 818 ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, 819 VT, Legal); 820 821 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 822 // nodes which truncate by one power of two at a time. 823 setOperationAction(ISD::TRUNCATE, VT, Custom); 824 825 // Custom-lower insert/extract operations to simplify patterns. 826 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 827 Custom); 828 829 // Custom-lower reduction operations to set up the corresponding custom 830 // nodes' operands. 831 setOperationAction(IntegerVecReduceOps, VT, Custom); 832 833 setOperationAction(IntegerVPOps, VT, Custom); 834 835 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 836 837 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, 838 VT, Custom); 839 840 setOperationAction( 841 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 842 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, 843 VT, Custom); 844 845 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 846 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, 847 VT, Custom); 848 849 setOperationAction(ISD::SELECT, VT, Custom); 850 setOperationAction(ISD::SELECT_CC, VT, Expand); 851 852 setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom); 853 854 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 855 setTruncStoreAction(VT, OtherVT, Expand); 856 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, 857 OtherVT, Expand); 858 } 859 860 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); 861 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); 862 863 // Splice 864 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); 865 866 if (Subtarget.hasStdExtZvkb()) { 867 setOperationAction(ISD::BSWAP, VT, Legal); 868 setOperationAction(ISD::VP_BSWAP, VT, Custom); 869 } else { 870 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand); 871 setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand); 872 } 873 874 if (Subtarget.hasStdExtZvbb()) { 875 setOperationAction(ISD::BITREVERSE, VT, Legal); 876 setOperationAction(ISD::VP_BITREVERSE, VT, Custom); 877 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, 878 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, 879 VT, Custom); 880 } else { 881 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand); 882 setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand); 883 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, 884 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, 885 VT, Expand); 886 887 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the 888 // range of f32. 889 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 890 if (isTypeLegal(FloatVT)) { 891 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, 892 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ, 893 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF}, 894 VT, Custom); 895 } 896 } 897 } 898 899 // Expand various CCs to best match the RVV ISA, which natively supports UNE 900 // but no other unordered comparisons, and supports all ordered comparisons 901 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 902 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 903 // and we pattern-match those back to the "original", swapping operands once 904 // more. This way we catch both operations and both "vf" and "fv" forms with 905 // fewer patterns. 906 static const ISD::CondCode VFPCCToExpand[] = { 907 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 908 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 909 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 910 }; 911 912 // TODO: support more ops. 913 static const unsigned ZvfhminPromoteOps[] = { 914 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, 915 ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, 916 ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL, 917 ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT, 918 ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM, 919 ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, 920 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}; 921 922 // TODO: support more vp ops. 923 static const unsigned ZvfhminPromoteVPOps[] = { 924 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, 925 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, 926 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, 927 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT, 928 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL, 929 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, 930 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, 931 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM, 932 ISD::VP_FMAXIMUM}; 933 934 // Sets common operation actions on RVV floating-point vector types. 935 const auto SetCommonVFPActions = [&](MVT VT) { 936 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 937 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 938 // sizes are within one power-of-two of each other. Therefore conversions 939 // between vXf16 and vXf64 must be lowered as sequences which convert via 940 // vXf32. 941 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 942 // Custom-lower insert/extract operations to simplify patterns. 943 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 944 Custom); 945 // Expand various condition codes (explained above). 946 setCondCodeAction(VFPCCToExpand, VT, Expand); 947 948 setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal); 949 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom); 950 951 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, 952 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, 953 ISD::IS_FPCLASS}, 954 VT, Custom); 955 956 setOperationAction(FloatingPointVecReduceOps, VT, Custom); 957 958 // Expand FP operations that need libcalls. 959 setOperationAction(ISD::FREM, VT, Expand); 960 setOperationAction(ISD::FPOW, VT, Expand); 961 setOperationAction(ISD::FCOS, VT, Expand); 962 setOperationAction(ISD::FSIN, VT, Expand); 963 setOperationAction(ISD::FSINCOS, VT, Expand); 964 setOperationAction(ISD::FEXP, VT, Expand); 965 setOperationAction(ISD::FEXP2, VT, Expand); 966 setOperationAction(ISD::FEXP10, VT, Expand); 967 setOperationAction(ISD::FLOG, VT, Expand); 968 setOperationAction(ISD::FLOG2, VT, Expand); 969 setOperationAction(ISD::FLOG10, VT, Expand); 970 971 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 972 973 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 974 975 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, 976 VT, Custom); 977 978 setOperationAction( 979 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 980 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, 981 VT, Custom); 982 983 setOperationAction(ISD::SELECT, VT, Custom); 984 setOperationAction(ISD::SELECT_CC, VT, Expand); 985 986 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 987 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, 988 VT, Custom); 989 990 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); 991 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); 992 993 setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom); 994 995 setOperationAction(FloatingPointVPOps, VT, Custom); 996 997 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, 998 Custom); 999 setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, 1000 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}, 1001 VT, Legal); 1002 setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, 1003 ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL, 1004 ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, 1005 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, 1006 VT, Custom); 1007 }; 1008 1009 // Sets common extload/truncstore actions on RVV floating-point vector 1010 // types. 1011 const auto SetCommonVFPExtLoadTruncStoreActions = 1012 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) { 1013 for (auto SmallVT : SmallerVTs) { 1014 setTruncStoreAction(VT, SmallVT, Expand); 1015 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand); 1016 } 1017 }; 1018 1019 if (Subtarget.hasVInstructionsF16()) { 1020 for (MVT VT : F16VecVTs) { 1021 if (!isTypeLegal(VT)) 1022 continue; 1023 SetCommonVFPActions(VT); 1024 } 1025 } else if (Subtarget.hasVInstructionsF16Minimal()) { 1026 for (MVT VT : F16VecVTs) { 1027 if (!isTypeLegal(VT)) 1028 continue; 1029 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 1030 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, 1031 Custom); 1032 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); 1033 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, 1034 Custom); 1035 setOperationAction(ISD::SELECT_CC, VT, Expand); 1036 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, 1037 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, 1038 VT, Custom); 1039 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 1040 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, 1041 VT, Custom); 1042 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 1043 // load/store 1044 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 1045 1046 // Custom split nxv32f16 since nxv32f32 if not legal. 1047 if (VT == MVT::nxv32f16) { 1048 setOperationAction(ZvfhminPromoteOps, VT, Custom); 1049 setOperationAction(ZvfhminPromoteVPOps, VT, Custom); 1050 continue; 1051 } 1052 // Add more promote ops. 1053 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1054 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); 1055 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); 1056 } 1057 } 1058 1059 if (Subtarget.hasVInstructionsF32()) { 1060 for (MVT VT : F32VecVTs) { 1061 if (!isTypeLegal(VT)) 1062 continue; 1063 SetCommonVFPActions(VT); 1064 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 1065 } 1066 } 1067 1068 if (Subtarget.hasVInstructionsF64()) { 1069 for (MVT VT : F64VecVTs) { 1070 if (!isTypeLegal(VT)) 1071 continue; 1072 SetCommonVFPActions(VT); 1073 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 1074 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); 1075 } 1076 } 1077 1078 if (Subtarget.useRVVForFixedLengthVectors()) { 1079 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 1080 if (!useRVVForFixedLengthVectorVT(VT)) 1081 continue; 1082 1083 // By default everything must be expanded. 1084 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 1085 setOperationAction(Op, VT, Expand); 1086 for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { 1087 setTruncStoreAction(VT, OtherVT, Expand); 1088 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, 1089 OtherVT, Expand); 1090 } 1091 1092 // Custom lower fixed vector undefs to scalable vector undefs to avoid 1093 // expansion to a build_vector of 0s. 1094 setOperationAction(ISD::UNDEF, VT, Custom); 1095 1096 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 1097 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, 1098 Custom); 1099 1100 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT, 1101 Custom); 1102 1103 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, 1104 VT, Custom); 1105 1106 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 1107 1108 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 1109 1110 setOperationAction(ISD::SETCC, VT, Custom); 1111 1112 setOperationAction(ISD::SELECT, VT, Custom); 1113 1114 setOperationAction(ISD::TRUNCATE, VT, Custom); 1115 1116 setOperationAction(ISD::BITCAST, VT, Custom); 1117 1118 setOperationAction( 1119 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, 1120 Custom); 1121 1122 setOperationAction( 1123 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, 1124 Custom); 1125 1126 setOperationAction( 1127 { 1128 ISD::SINT_TO_FP, 1129 ISD::UINT_TO_FP, 1130 ISD::FP_TO_SINT, 1131 ISD::FP_TO_UINT, 1132 ISD::STRICT_SINT_TO_FP, 1133 ISD::STRICT_UINT_TO_FP, 1134 ISD::STRICT_FP_TO_SINT, 1135 ISD::STRICT_FP_TO_UINT, 1136 }, 1137 VT, Custom); 1138 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 1139 Custom); 1140 1141 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 1142 1143 // Operations below are different for between masks and other vectors. 1144 if (VT.getVectorElementType() == MVT::i1) { 1145 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND, 1146 ISD::OR, ISD::XOR}, 1147 VT, Custom); 1148 1149 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, 1150 ISD::VP_SETCC, ISD::VP_TRUNCATE}, 1151 VT, Custom); 1152 1153 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); 1154 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); 1155 continue; 1156 } 1157 1158 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to 1159 // it before type legalization for i64 vectors on RV32. It will then be 1160 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle. 1161 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs 1162 // improvements first. 1163 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { 1164 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 1165 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 1166 } 1167 1168 setOperationAction( 1169 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom); 1170 1171 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, 1172 ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 1173 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, 1174 ISD::VP_SCATTER}, 1175 VT, Custom); 1176 1177 setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR, 1178 ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV, 1179 ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL}, 1180 VT, Custom); 1181 1182 setOperationAction( 1183 {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom); 1184 1185 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*. 1186 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) 1187 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); 1188 1189 setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, 1190 ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, 1191 VT, Custom); 1192 1193 setOperationAction(ISD::VSELECT, VT, Custom); 1194 setOperationAction(ISD::SELECT_CC, VT, Expand); 1195 1196 setOperationAction( 1197 {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom); 1198 1199 // Custom-lower reduction operations to set up the corresponding custom 1200 // nodes' operands. 1201 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX, 1202 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX, 1203 ISD::VECREDUCE_UMIN}, 1204 VT, Custom); 1205 1206 setOperationAction(IntegerVPOps, VT, Custom); 1207 1208 if (Subtarget.hasStdExtZvkb()) 1209 setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom); 1210 1211 if (Subtarget.hasStdExtZvbb()) { 1212 setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, 1213 ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP}, 1214 VT, Custom); 1215 } else { 1216 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the 1217 // range of f32. 1218 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1219 if (isTypeLegal(FloatVT)) 1220 setOperationAction( 1221 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, 1222 Custom); 1223 } 1224 } 1225 1226 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 1227 // There are no extending loads or truncating stores. 1228 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) { 1229 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 1230 setTruncStoreAction(VT, InnerVT, Expand); 1231 } 1232 1233 if (!useRVVForFixedLengthVectorVT(VT)) 1234 continue; 1235 1236 // By default everything must be expanded. 1237 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 1238 setOperationAction(Op, VT, Expand); 1239 1240 // Custom lower fixed vector undefs to scalable vector undefs to avoid 1241 // expansion to a build_vector of 0s. 1242 setOperationAction(ISD::UNDEF, VT, Custom); 1243 1244 if (VT.getVectorElementType() == MVT::f16 && 1245 !Subtarget.hasVInstructionsF16()) { 1246 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 1247 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, 1248 Custom); 1249 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); 1250 setOperationAction( 1251 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, 1252 Custom); 1253 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, 1254 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, 1255 VT, Custom); 1256 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 1257 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, 1258 VT, Custom); 1259 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 1260 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 1261 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1262 // Don't promote f16 vector operations to f32 if f32 vector type is 1263 // not legal. 1264 // TODO: could split the f16 vector into two vectors and do promotion. 1265 if (!isTypeLegal(F32VecVT)) 1266 continue; 1267 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); 1268 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); 1269 continue; 1270 } 1271 1272 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 1273 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, 1274 Custom); 1275 1276 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, 1277 ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT, 1278 ISD::EXTRACT_VECTOR_ELT}, 1279 VT, Custom); 1280 1281 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, 1282 ISD::MGATHER, ISD::MSCATTER}, 1283 VT, Custom); 1284 1285 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, 1286 ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 1287 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, 1288 ISD::VP_SCATTER}, 1289 VT, Custom); 1290 1291 setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, 1292 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT, 1293 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM, 1294 ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM}, 1295 VT, Custom); 1296 1297 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 1298 1299 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, 1300 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT}, 1301 VT, Custom); 1302 1303 setCondCodeAction(VFPCCToExpand, VT, Expand); 1304 1305 setOperationAction(ISD::SETCC, VT, Custom); 1306 setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom); 1307 setOperationAction(ISD::SELECT_CC, VT, Expand); 1308 1309 setOperationAction(ISD::BITCAST, VT, Custom); 1310 1311 setOperationAction(FloatingPointVecReduceOps, VT, Custom); 1312 1313 setOperationAction(FloatingPointVPOps, VT, Custom); 1314 1315 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, 1316 Custom); 1317 setOperationAction( 1318 {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, 1319 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA, 1320 ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC, 1321 ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, 1322 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, 1323 VT, Custom); 1324 } 1325 1326 // Custom-legalize bitcasts from fixed-length vectors to scalar types. 1327 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64}, 1328 Custom); 1329 if (Subtarget.hasStdExtZfhminOrZhinxmin()) 1330 setOperationAction(ISD::BITCAST, MVT::f16, Custom); 1331 if (Subtarget.hasStdExtFOrZfinx()) 1332 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 1333 if (Subtarget.hasStdExtDOrZdinx()) 1334 setOperationAction(ISD::BITCAST, MVT::f64, Custom); 1335 } 1336 } 1337 1338 if (Subtarget.hasStdExtA()) { 1339 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand); 1340 if (RV64LegalI32 && Subtarget.is64Bit()) 1341 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 1342 } 1343 1344 if (Subtarget.hasForcedAtomics()) { 1345 // Force __sync libcalls to be emitted for atomic rmw/cas operations. 1346 setOperationAction( 1347 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD, 1348 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR, 1349 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN, 1350 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX}, 1351 XLenVT, LibCall); 1352 } 1353 1354 if (Subtarget.hasVendorXTHeadMemIdx()) { 1355 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) { 1356 setIndexedLoadAction(im, MVT::i8, Legal); 1357 setIndexedStoreAction(im, MVT::i8, Legal); 1358 setIndexedLoadAction(im, MVT::i16, Legal); 1359 setIndexedStoreAction(im, MVT::i16, Legal); 1360 setIndexedLoadAction(im, MVT::i32, Legal); 1361 setIndexedStoreAction(im, MVT::i32, Legal); 1362 1363 if (Subtarget.is64Bit()) { 1364 setIndexedLoadAction(im, MVT::i64, Legal); 1365 setIndexedStoreAction(im, MVT::i64, Legal); 1366 } 1367 } 1368 } 1369 1370 // Function alignments. 1371 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4); 1372 setMinFunctionAlignment(FunctionAlignment); 1373 // Set preferred alignments. 1374 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); 1375 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); 1376 1377 setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, 1378 ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL, 1379 ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); 1380 if (Subtarget.is64Bit()) 1381 setTargetDAGCombine(ISD::SRA); 1382 1383 if (Subtarget.hasStdExtFOrZfinx()) 1384 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM}); 1385 1386 if (Subtarget.hasStdExtZbb()) 1387 setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}); 1388 1389 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) 1390 setTargetDAGCombine(ISD::TRUNCATE); 1391 1392 if (Subtarget.hasStdExtZbkb()) 1393 setTargetDAGCombine(ISD::BITREVERSE); 1394 if (Subtarget.hasStdExtZfhminOrZhinxmin()) 1395 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); 1396 if (Subtarget.hasStdExtFOrZfinx()) 1397 setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, 1398 ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}); 1399 if (Subtarget.hasVInstructions()) 1400 setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, 1401 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL, 1402 ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR, 1403 ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, 1404 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL, 1405 ISD::INSERT_VECTOR_ELT}); 1406 if (Subtarget.hasVendorXTHeadMemPair()) 1407 setTargetDAGCombine({ISD::LOAD, ISD::STORE}); 1408 if (Subtarget.useRVVForFixedLengthVectors()) 1409 setTargetDAGCombine(ISD::BITCAST); 1410 1411 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); 1412 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); 1413 1414 // Disable strict node mutation. 1415 IsStrictFPEnabled = true; 1416 } 1417 1418 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 1419 LLVMContext &Context, 1420 EVT VT) const { 1421 if (!VT.isVector()) 1422 return getPointerTy(DL); 1423 if (Subtarget.hasVInstructions() && 1424 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 1425 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 1426 return VT.changeVectorElementTypeToInteger(); 1427 } 1428 1429 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const { 1430 return Subtarget.getXLenVT(); 1431 } 1432 1433 // Return false if we can lower get_vector_length to a vsetvli intrinsic. 1434 bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT, 1435 unsigned VF, 1436 bool IsScalable) const { 1437 if (!Subtarget.hasVInstructions()) 1438 return true; 1439 1440 if (!IsScalable) 1441 return true; 1442 1443 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT()) 1444 return true; 1445 1446 // Don't allow VF=1 if those types are't legal. 1447 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen()) 1448 return true; 1449 1450 // VLEN=32 support is incomplete. 1451 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) 1452 return true; 1453 1454 // The maximum VF is for the smallest element width with LMUL=8. 1455 // VF must be a power of 2. 1456 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8; 1457 return VF > MaxVF || !isPowerOf2_32(VF); 1458 } 1459 1460 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 1461 const CallInst &I, 1462 MachineFunction &MF, 1463 unsigned Intrinsic) const { 1464 auto &DL = I.getModule()->getDataLayout(); 1465 1466 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore, 1467 bool IsUnitStrided) { 1468 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN; 1469 Info.ptrVal = I.getArgOperand(PtrOp); 1470 Type *MemTy; 1471 if (IsStore) { 1472 // Store value is the first operand. 1473 MemTy = I.getArgOperand(0)->getType(); 1474 } else { 1475 // Use return type. If it's segment load, return type is a struct. 1476 MemTy = I.getType(); 1477 if (MemTy->isStructTy()) 1478 MemTy = MemTy->getStructElementType(0); 1479 } 1480 if (!IsUnitStrided) 1481 MemTy = MemTy->getScalarType(); 1482 1483 Info.memVT = getValueType(DL, MemTy); 1484 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8); 1485 Info.size = MemoryLocation::UnknownSize; 1486 Info.flags |= 1487 IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; 1488 return true; 1489 }; 1490 1491 if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) 1492 Info.flags |= MachineMemOperand::MONonTemporal; 1493 1494 Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I); 1495 switch (Intrinsic) { 1496 default: 1497 return false; 1498 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 1499 case Intrinsic::riscv_masked_atomicrmw_add_i32: 1500 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 1501 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 1502 case Intrinsic::riscv_masked_atomicrmw_max_i32: 1503 case Intrinsic::riscv_masked_atomicrmw_min_i32: 1504 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 1505 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 1506 case Intrinsic::riscv_masked_cmpxchg_i32: 1507 Info.opc = ISD::INTRINSIC_W_CHAIN; 1508 Info.memVT = MVT::i32; 1509 Info.ptrVal = I.getArgOperand(0); 1510 Info.offset = 0; 1511 Info.align = Align(4); 1512 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 1513 MachineMemOperand::MOVolatile; 1514 return true; 1515 case Intrinsic::riscv_masked_strided_load: 1516 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false, 1517 /*IsUnitStrided*/ false); 1518 case Intrinsic::riscv_masked_strided_store: 1519 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true, 1520 /*IsUnitStrided*/ false); 1521 case Intrinsic::riscv_seg2_load: 1522 case Intrinsic::riscv_seg3_load: 1523 case Intrinsic::riscv_seg4_load: 1524 case Intrinsic::riscv_seg5_load: 1525 case Intrinsic::riscv_seg6_load: 1526 case Intrinsic::riscv_seg7_load: 1527 case Intrinsic::riscv_seg8_load: 1528 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false, 1529 /*IsUnitStrided*/ false); 1530 case Intrinsic::riscv_seg2_store: 1531 case Intrinsic::riscv_seg3_store: 1532 case Intrinsic::riscv_seg4_store: 1533 case Intrinsic::riscv_seg5_store: 1534 case Intrinsic::riscv_seg6_store: 1535 case Intrinsic::riscv_seg7_store: 1536 case Intrinsic::riscv_seg8_store: 1537 // Operands are (vec, ..., vec, ptr, vl) 1538 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, 1539 /*IsStore*/ true, 1540 /*IsUnitStrided*/ false); 1541 case Intrinsic::riscv_vle: 1542 case Intrinsic::riscv_vle_mask: 1543 case Intrinsic::riscv_vleff: 1544 case Intrinsic::riscv_vleff_mask: 1545 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1546 /*IsStore*/ false, 1547 /*IsUnitStrided*/ true); 1548 case Intrinsic::riscv_vse: 1549 case Intrinsic::riscv_vse_mask: 1550 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1551 /*IsStore*/ true, 1552 /*IsUnitStrided*/ true); 1553 case Intrinsic::riscv_vlse: 1554 case Intrinsic::riscv_vlse_mask: 1555 case Intrinsic::riscv_vloxei: 1556 case Intrinsic::riscv_vloxei_mask: 1557 case Intrinsic::riscv_vluxei: 1558 case Intrinsic::riscv_vluxei_mask: 1559 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1560 /*IsStore*/ false, 1561 /*IsUnitStrided*/ false); 1562 case Intrinsic::riscv_vsse: 1563 case Intrinsic::riscv_vsse_mask: 1564 case Intrinsic::riscv_vsoxei: 1565 case Intrinsic::riscv_vsoxei_mask: 1566 case Intrinsic::riscv_vsuxei: 1567 case Intrinsic::riscv_vsuxei_mask: 1568 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1569 /*IsStore*/ true, 1570 /*IsUnitStrided*/ false); 1571 case Intrinsic::riscv_vlseg2: 1572 case Intrinsic::riscv_vlseg3: 1573 case Intrinsic::riscv_vlseg4: 1574 case Intrinsic::riscv_vlseg5: 1575 case Intrinsic::riscv_vlseg6: 1576 case Intrinsic::riscv_vlseg7: 1577 case Intrinsic::riscv_vlseg8: 1578 case Intrinsic::riscv_vlseg2ff: 1579 case Intrinsic::riscv_vlseg3ff: 1580 case Intrinsic::riscv_vlseg4ff: 1581 case Intrinsic::riscv_vlseg5ff: 1582 case Intrinsic::riscv_vlseg6ff: 1583 case Intrinsic::riscv_vlseg7ff: 1584 case Intrinsic::riscv_vlseg8ff: 1585 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, 1586 /*IsStore*/ false, 1587 /*IsUnitStrided*/ false); 1588 case Intrinsic::riscv_vlseg2_mask: 1589 case Intrinsic::riscv_vlseg3_mask: 1590 case Intrinsic::riscv_vlseg4_mask: 1591 case Intrinsic::riscv_vlseg5_mask: 1592 case Intrinsic::riscv_vlseg6_mask: 1593 case Intrinsic::riscv_vlseg7_mask: 1594 case Intrinsic::riscv_vlseg8_mask: 1595 case Intrinsic::riscv_vlseg2ff_mask: 1596 case Intrinsic::riscv_vlseg3ff_mask: 1597 case Intrinsic::riscv_vlseg4ff_mask: 1598 case Intrinsic::riscv_vlseg5ff_mask: 1599 case Intrinsic::riscv_vlseg6ff_mask: 1600 case Intrinsic::riscv_vlseg7ff_mask: 1601 case Intrinsic::riscv_vlseg8ff_mask: 1602 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, 1603 /*IsStore*/ false, 1604 /*IsUnitStrided*/ false); 1605 case Intrinsic::riscv_vlsseg2: 1606 case Intrinsic::riscv_vlsseg3: 1607 case Intrinsic::riscv_vlsseg4: 1608 case Intrinsic::riscv_vlsseg5: 1609 case Intrinsic::riscv_vlsseg6: 1610 case Intrinsic::riscv_vlsseg7: 1611 case Intrinsic::riscv_vlsseg8: 1612 case Intrinsic::riscv_vloxseg2: 1613 case Intrinsic::riscv_vloxseg3: 1614 case Intrinsic::riscv_vloxseg4: 1615 case Intrinsic::riscv_vloxseg5: 1616 case Intrinsic::riscv_vloxseg6: 1617 case Intrinsic::riscv_vloxseg7: 1618 case Intrinsic::riscv_vloxseg8: 1619 case Intrinsic::riscv_vluxseg2: 1620 case Intrinsic::riscv_vluxseg3: 1621 case Intrinsic::riscv_vluxseg4: 1622 case Intrinsic::riscv_vluxseg5: 1623 case Intrinsic::riscv_vluxseg6: 1624 case Intrinsic::riscv_vluxseg7: 1625 case Intrinsic::riscv_vluxseg8: 1626 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, 1627 /*IsStore*/ false, 1628 /*IsUnitStrided*/ false); 1629 case Intrinsic::riscv_vlsseg2_mask: 1630 case Intrinsic::riscv_vlsseg3_mask: 1631 case Intrinsic::riscv_vlsseg4_mask: 1632 case Intrinsic::riscv_vlsseg5_mask: 1633 case Intrinsic::riscv_vlsseg6_mask: 1634 case Intrinsic::riscv_vlsseg7_mask: 1635 case Intrinsic::riscv_vlsseg8_mask: 1636 case Intrinsic::riscv_vloxseg2_mask: 1637 case Intrinsic::riscv_vloxseg3_mask: 1638 case Intrinsic::riscv_vloxseg4_mask: 1639 case Intrinsic::riscv_vloxseg5_mask: 1640 case Intrinsic::riscv_vloxseg6_mask: 1641 case Intrinsic::riscv_vloxseg7_mask: 1642 case Intrinsic::riscv_vloxseg8_mask: 1643 case Intrinsic::riscv_vluxseg2_mask: 1644 case Intrinsic::riscv_vluxseg3_mask: 1645 case Intrinsic::riscv_vluxseg4_mask: 1646 case Intrinsic::riscv_vluxseg5_mask: 1647 case Intrinsic::riscv_vluxseg6_mask: 1648 case Intrinsic::riscv_vluxseg7_mask: 1649 case Intrinsic::riscv_vluxseg8_mask: 1650 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5, 1651 /*IsStore*/ false, 1652 /*IsUnitStrided*/ false); 1653 case Intrinsic::riscv_vsseg2: 1654 case Intrinsic::riscv_vsseg3: 1655 case Intrinsic::riscv_vsseg4: 1656 case Intrinsic::riscv_vsseg5: 1657 case Intrinsic::riscv_vsseg6: 1658 case Intrinsic::riscv_vsseg7: 1659 case Intrinsic::riscv_vsseg8: 1660 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, 1661 /*IsStore*/ true, 1662 /*IsUnitStrided*/ false); 1663 case Intrinsic::riscv_vsseg2_mask: 1664 case Intrinsic::riscv_vsseg3_mask: 1665 case Intrinsic::riscv_vsseg4_mask: 1666 case Intrinsic::riscv_vsseg5_mask: 1667 case Intrinsic::riscv_vsseg6_mask: 1668 case Intrinsic::riscv_vsseg7_mask: 1669 case Intrinsic::riscv_vsseg8_mask: 1670 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, 1671 /*IsStore*/ true, 1672 /*IsUnitStrided*/ false); 1673 case Intrinsic::riscv_vssseg2: 1674 case Intrinsic::riscv_vssseg3: 1675 case Intrinsic::riscv_vssseg4: 1676 case Intrinsic::riscv_vssseg5: 1677 case Intrinsic::riscv_vssseg6: 1678 case Intrinsic::riscv_vssseg7: 1679 case Intrinsic::riscv_vssseg8: 1680 case Intrinsic::riscv_vsoxseg2: 1681 case Intrinsic::riscv_vsoxseg3: 1682 case Intrinsic::riscv_vsoxseg4: 1683 case Intrinsic::riscv_vsoxseg5: 1684 case Intrinsic::riscv_vsoxseg6: 1685 case Intrinsic::riscv_vsoxseg7: 1686 case Intrinsic::riscv_vsoxseg8: 1687 case Intrinsic::riscv_vsuxseg2: 1688 case Intrinsic::riscv_vsuxseg3: 1689 case Intrinsic::riscv_vsuxseg4: 1690 case Intrinsic::riscv_vsuxseg5: 1691 case Intrinsic::riscv_vsuxseg6: 1692 case Intrinsic::riscv_vsuxseg7: 1693 case Intrinsic::riscv_vsuxseg8: 1694 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, 1695 /*IsStore*/ true, 1696 /*IsUnitStrided*/ false); 1697 case Intrinsic::riscv_vssseg2_mask: 1698 case Intrinsic::riscv_vssseg3_mask: 1699 case Intrinsic::riscv_vssseg4_mask: 1700 case Intrinsic::riscv_vssseg5_mask: 1701 case Intrinsic::riscv_vssseg6_mask: 1702 case Intrinsic::riscv_vssseg7_mask: 1703 case Intrinsic::riscv_vssseg8_mask: 1704 case Intrinsic::riscv_vsoxseg2_mask: 1705 case Intrinsic::riscv_vsoxseg3_mask: 1706 case Intrinsic::riscv_vsoxseg4_mask: 1707 case Intrinsic::riscv_vsoxseg5_mask: 1708 case Intrinsic::riscv_vsoxseg6_mask: 1709 case Intrinsic::riscv_vsoxseg7_mask: 1710 case Intrinsic::riscv_vsoxseg8_mask: 1711 case Intrinsic::riscv_vsuxseg2_mask: 1712 case Intrinsic::riscv_vsuxseg3_mask: 1713 case Intrinsic::riscv_vsuxseg4_mask: 1714 case Intrinsic::riscv_vsuxseg5_mask: 1715 case Intrinsic::riscv_vsuxseg6_mask: 1716 case Intrinsic::riscv_vsuxseg7_mask: 1717 case Intrinsic::riscv_vsuxseg8_mask: 1718 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, 1719 /*IsStore*/ true, 1720 /*IsUnitStrided*/ false); 1721 } 1722 } 1723 1724 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 1725 const AddrMode &AM, Type *Ty, 1726 unsigned AS, 1727 Instruction *I) const { 1728 // No global is ever allowed as a base. 1729 if (AM.BaseGV) 1730 return false; 1731 1732 // RVV instructions only support register addressing. 1733 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty)) 1734 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs; 1735 1736 // Require a 12-bit signed offset. 1737 if (!isInt<12>(AM.BaseOffs)) 1738 return false; 1739 1740 switch (AM.Scale) { 1741 case 0: // "r+i" or just "i", depending on HasBaseReg. 1742 break; 1743 case 1: 1744 if (!AM.HasBaseReg) // allow "r+i". 1745 break; 1746 return false; // disallow "r+r" or "r+r+i". 1747 default: 1748 return false; 1749 } 1750 1751 return true; 1752 } 1753 1754 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 1755 return isInt<12>(Imm); 1756 } 1757 1758 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 1759 return isInt<12>(Imm); 1760 } 1761 1762 // On RV32, 64-bit integers are split into their high and low parts and held 1763 // in two different registers, so the trunc is free since the low register can 1764 // just be used. 1765 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of 1766 // isTruncateFree? 1767 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 1768 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 1769 return false; 1770 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 1771 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 1772 return (SrcBits == 64 && DestBits == 32); 1773 } 1774 1775 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 1776 // We consider i64->i32 free on RV64 since we have good selection of W 1777 // instructions that make promoting operations back to i64 free in many cases. 1778 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || 1779 !DstVT.isInteger()) 1780 return false; 1781 unsigned SrcBits = SrcVT.getSizeInBits(); 1782 unsigned DestBits = DstVT.getSizeInBits(); 1783 return (SrcBits == 64 && DestBits == 32); 1784 } 1785 1786 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 1787 // Zexts are free if they can be combined with a load. 1788 // Don't advertise i32->i64 zextload as being free for RV64. It interacts 1789 // poorly with type legalization of compares preferring sext. 1790 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 1791 EVT MemVT = LD->getMemoryVT(); 1792 if ((MemVT == MVT::i8 || MemVT == MVT::i16) && 1793 (LD->getExtensionType() == ISD::NON_EXTLOAD || 1794 LD->getExtensionType() == ISD::ZEXTLOAD)) 1795 return true; 1796 } 1797 1798 return TargetLowering::isZExtFree(Val, VT2); 1799 } 1800 1801 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 1802 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 1803 } 1804 1805 bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const { 1806 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32); 1807 } 1808 1809 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { 1810 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip(); 1811 } 1812 1813 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { 1814 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || 1815 Subtarget.hasVendorXCVbitmanip(); 1816 } 1817 1818 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( 1819 const Instruction &AndI) const { 1820 // We expect to be able to match a bit extraction instruction if the Zbs 1821 // extension is supported and the mask is a power of two. However, we 1822 // conservatively return false if the mask would fit in an ANDI instruction, 1823 // on the basis that it's possible the sinking+duplication of the AND in 1824 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction 1825 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ). 1826 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs()) 1827 return false; 1828 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1)); 1829 if (!Mask) 1830 return false; 1831 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2(); 1832 } 1833 1834 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const { 1835 EVT VT = Y.getValueType(); 1836 1837 // FIXME: Support vectors once we have tests. 1838 if (VT.isVector()) 1839 return false; 1840 1841 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && 1842 !isa<ConstantSDNode>(Y); 1843 } 1844 1845 bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const { 1846 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test. 1847 if (Subtarget.hasStdExtZbs()) 1848 return X.getValueType().isScalarInteger(); 1849 auto *C = dyn_cast<ConstantSDNode>(Y); 1850 // XTheadBs provides th.tst (similar to bexti), if Y is a constant 1851 if (Subtarget.hasVendorXTHeadBs()) 1852 return C != nullptr; 1853 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position. 1854 return C && C->getAPIntValue().ule(10); 1855 } 1856 1857 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode, 1858 EVT VT) const { 1859 // Only enable for rvv. 1860 if (!VT.isVector() || !Subtarget.hasVInstructions()) 1861 return false; 1862 1863 if (VT.isFixedLengthVector() && !isTypeLegal(VT)) 1864 return false; 1865 1866 return true; 1867 } 1868 1869 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 1870 Type *Ty) const { 1871 assert(Ty->isIntegerTy()); 1872 1873 unsigned BitSize = Ty->getIntegerBitWidth(); 1874 if (BitSize > Subtarget.getXLen()) 1875 return false; 1876 1877 // Fast path, assume 32-bit immediates are cheap. 1878 int64_t Val = Imm.getSExtValue(); 1879 if (isInt<32>(Val)) 1880 return true; 1881 1882 // A constant pool entry may be more aligned thant he load we're trying to 1883 // replace. If we don't support unaligned scalar mem, prefer the constant 1884 // pool. 1885 // TODO: Can the caller pass down the alignment? 1886 if (!Subtarget.hasFastUnalignedAccess()) 1887 return true; 1888 1889 // Prefer to keep the load if it would require many instructions. 1890 // This uses the same threshold we use for constant pools but doesn't 1891 // check useConstantPoolForLargeInts. 1892 // TODO: Should we keep the load only when we're definitely going to emit a 1893 // constant pool? 1894 1895 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget); 1896 return Seq.size() <= Subtarget.getMaxBuildIntsCost(); 1897 } 1898 1899 bool RISCVTargetLowering:: 1900 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 1901 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 1902 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 1903 SelectionDAG &DAG) const { 1904 // One interesting pattern that we'd want to form is 'bit extract': 1905 // ((1 >> Y) & 1) ==/!= 0 1906 // But we also need to be careful not to try to reverse that fold. 1907 1908 // Is this '((1 >> Y) & 1)'? 1909 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne()) 1910 return false; // Keep the 'bit extract' pattern. 1911 1912 // Will this be '((1 >> Y) & 1)' after the transform? 1913 if (NewShiftOpcode == ISD::SRL && CC->isOne()) 1914 return true; // Do form the 'bit extract' pattern. 1915 1916 // If 'X' is a constant, and we transform, then we will immediately 1917 // try to undo the fold, thus causing endless combine loop. 1918 // So only do the transform if X is not a constant. This matches the default 1919 // implementation of this function. 1920 return !XC; 1921 } 1922 1923 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const { 1924 switch (Opcode) { 1925 case Instruction::Add: 1926 case Instruction::Sub: 1927 case Instruction::Mul: 1928 case Instruction::And: 1929 case Instruction::Or: 1930 case Instruction::Xor: 1931 case Instruction::FAdd: 1932 case Instruction::FSub: 1933 case Instruction::FMul: 1934 case Instruction::FDiv: 1935 case Instruction::ICmp: 1936 case Instruction::FCmp: 1937 return true; 1938 case Instruction::Shl: 1939 case Instruction::LShr: 1940 case Instruction::AShr: 1941 case Instruction::UDiv: 1942 case Instruction::SDiv: 1943 case Instruction::URem: 1944 case Instruction::SRem: 1945 return Operand == 1; 1946 default: 1947 return false; 1948 } 1949 } 1950 1951 1952 bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const { 1953 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) 1954 return false; 1955 1956 if (canSplatOperand(I->getOpcode(), Operand)) 1957 return true; 1958 1959 auto *II = dyn_cast<IntrinsicInst>(I); 1960 if (!II) 1961 return false; 1962 1963 switch (II->getIntrinsicID()) { 1964 case Intrinsic::fma: 1965 case Intrinsic::vp_fma: 1966 return Operand == 0 || Operand == 1; 1967 case Intrinsic::vp_shl: 1968 case Intrinsic::vp_lshr: 1969 case Intrinsic::vp_ashr: 1970 case Intrinsic::vp_udiv: 1971 case Intrinsic::vp_sdiv: 1972 case Intrinsic::vp_urem: 1973 case Intrinsic::vp_srem: 1974 return Operand == 1; 1975 // These intrinsics are commutative. 1976 case Intrinsic::vp_add: 1977 case Intrinsic::vp_mul: 1978 case Intrinsic::vp_and: 1979 case Intrinsic::vp_or: 1980 case Intrinsic::vp_xor: 1981 case Intrinsic::vp_fadd: 1982 case Intrinsic::vp_fmul: 1983 case Intrinsic::vp_icmp: 1984 case Intrinsic::vp_fcmp: 1985 // These intrinsics have 'vr' versions. 1986 case Intrinsic::vp_sub: 1987 case Intrinsic::vp_fsub: 1988 case Intrinsic::vp_fdiv: 1989 return Operand == 0 || Operand == 1; 1990 default: 1991 return false; 1992 } 1993 } 1994 1995 /// Check if sinking \p I's operands to I's basic block is profitable, because 1996 /// the operands can be folded into a target instruction, e.g. 1997 /// splats of scalars can fold into vector instructions. 1998 bool RISCVTargetLowering::shouldSinkOperands( 1999 Instruction *I, SmallVectorImpl<Use *> &Ops) const { 2000 using namespace llvm::PatternMatch; 2001 2002 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) 2003 return false; 2004 2005 for (auto OpIdx : enumerate(I->operands())) { 2006 if (!canSplatOperand(I, OpIdx.index())) 2007 continue; 2008 2009 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get()); 2010 // Make sure we are not already sinking this operand 2011 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; })) 2012 continue; 2013 2014 // We are looking for a splat that can be sunk. 2015 if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), 2016 m_Undef(), m_ZeroMask()))) 2017 continue; 2018 2019 // Don't sink i1 splats. 2020 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1)) 2021 continue; 2022 2023 // All uses of the shuffle should be sunk to avoid duplicating it across gpr 2024 // and vector registers 2025 for (Use &U : Op->uses()) { 2026 Instruction *Insn = cast<Instruction>(U.getUser()); 2027 if (!canSplatOperand(Insn, U.getOperandNo())) 2028 return false; 2029 } 2030 2031 Ops.push_back(&Op->getOperandUse(0)); 2032 Ops.push_back(&OpIdx.value()); 2033 } 2034 return true; 2035 } 2036 2037 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { 2038 unsigned Opc = VecOp.getOpcode(); 2039 2040 // Assume target opcodes can't be scalarized. 2041 // TODO - do we have any exceptions? 2042 if (Opc >= ISD::BUILTIN_OP_END) 2043 return false; 2044 2045 // If the vector op is not supported, try to convert to scalar. 2046 EVT VecVT = VecOp.getValueType(); 2047 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) 2048 return true; 2049 2050 // If the vector op is supported, but the scalar op is not, the transform may 2051 // not be worthwhile. 2052 // Permit a vector binary operation can be converted to scalar binary 2053 // operation which is custom lowered with illegal type. 2054 EVT ScalarVT = VecVT.getScalarType(); 2055 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) || 2056 isOperationCustom(Opc, ScalarVT); 2057 } 2058 2059 bool RISCVTargetLowering::isOffsetFoldingLegal( 2060 const GlobalAddressSDNode *GA) const { 2061 // In order to maximise the opportunity for common subexpression elimination, 2062 // keep a separate ADD node for the global address offset instead of folding 2063 // it in the global address node. Later peephole optimisations may choose to 2064 // fold it back in when profitable. 2065 return false; 2066 } 2067 2068 // Return one of the followings: 2069 // (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value. 2070 // (2) `{0-31 value, true}` if Imm is negative and FLI is available for its 2071 // positive counterpart, which will be materialized from the first returned 2072 // element. The second returned element indicated that there should be a FNEG 2073 // followed. 2074 // (3) `{-1, _}` if there is no way FLI can be used to materialize Imm. 2075 std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, 2076 EVT VT) const { 2077 if (!Subtarget.hasStdExtZfa()) 2078 return std::make_pair(-1, false); 2079 2080 bool IsSupportedVT = false; 2081 if (VT == MVT::f16) { 2082 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh(); 2083 } else if (VT == MVT::f32) { 2084 IsSupportedVT = true; 2085 } else if (VT == MVT::f64) { 2086 assert(Subtarget.hasStdExtD() && "Expect D extension"); 2087 IsSupportedVT = true; 2088 } 2089 2090 if (!IsSupportedVT) 2091 return std::make_pair(-1, false); 2092 2093 int Index = RISCVLoadFPImm::getLoadFPImm(Imm); 2094 if (Index < 0 && Imm.isNegative()) 2095 // Try the combination of its positive counterpart + FNEG. 2096 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true); 2097 else 2098 return std::make_pair(Index, false); 2099 } 2100 2101 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 2102 bool ForCodeSize) const { 2103 bool IsLegalVT = false; 2104 if (VT == MVT::f16) 2105 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin(); 2106 else if (VT == MVT::f32) 2107 IsLegalVT = Subtarget.hasStdExtFOrZfinx(); 2108 else if (VT == MVT::f64) 2109 IsLegalVT = Subtarget.hasStdExtDOrZdinx(); 2110 else if (VT == MVT::bf16) 2111 IsLegalVT = Subtarget.hasStdExtZfbfmin(); 2112 2113 if (!IsLegalVT) 2114 return false; 2115 2116 if (getLegalZfaFPImm(Imm, VT).first >= 0) 2117 return true; 2118 2119 // Cannot create a 64 bit floating-point immediate value for rv32. 2120 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) { 2121 // td can handle +0.0 or -0.0 already. 2122 // -0.0 can be created by fmv + fneg. 2123 return Imm.isZero(); 2124 } 2125 2126 // Special case: fmv + fneg 2127 if (Imm.isNegZero()) 2128 return true; 2129 2130 // Building an integer and then converting requires a fmv at the end of 2131 // the integer sequence. 2132 const int Cost = 2133 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(), 2134 Subtarget); 2135 return Cost <= FPImmCost; 2136 } 2137 2138 // TODO: This is very conservative. 2139 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 2140 unsigned Index) const { 2141 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) 2142 return false; 2143 2144 // Only support extracting a fixed from a fixed vector for now. 2145 if (ResVT.isScalableVector() || SrcVT.isScalableVector()) 2146 return false; 2147 2148 unsigned ResElts = ResVT.getVectorNumElements(); 2149 unsigned SrcElts = SrcVT.getVectorNumElements(); 2150 2151 // Convervatively only handle extracting half of a vector. 2152 // TODO: Relax this. 2153 if ((ResElts * 2) != SrcElts) 2154 return false; 2155 2156 // The smallest type we can slide is i8. 2157 // TODO: We can extract index 0 from a mask vector without a slide. 2158 if (ResVT.getVectorElementType() == MVT::i1) 2159 return false; 2160 2161 // Slide can support arbitrary index, but we only treat vslidedown.vi as 2162 // cheap. 2163 if (Index >= 32) 2164 return false; 2165 2166 // TODO: We can do arbitrary slidedowns, but for now only support extracting 2167 // the upper half of a vector until we have more test coverage. 2168 return Index == 0 || Index == ResElts; 2169 } 2170 2171 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, 2172 CallingConv::ID CC, 2173 EVT VT) const { 2174 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. 2175 // We might still end up using a GPR but that will be decided based on ABI. 2176 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && 2177 !Subtarget.hasStdExtZfhminOrZhinxmin()) 2178 return MVT::f32; 2179 2180 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); 2181 2182 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32) 2183 return MVT::i64; 2184 2185 return PartVT; 2186 } 2187 2188 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, 2189 CallingConv::ID CC, 2190 EVT VT) const { 2191 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. 2192 // We might still end up using a GPR but that will be decided based on ABI. 2193 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && 2194 !Subtarget.hasStdExtZfhminOrZhinxmin()) 2195 return 1; 2196 2197 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); 2198 } 2199 2200 unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv( 2201 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 2202 unsigned &NumIntermediates, MVT &RegisterVT) const { 2203 unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv( 2204 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT); 2205 2206 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32) 2207 IntermediateVT = MVT::i64; 2208 2209 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32) 2210 RegisterVT = MVT::i64; 2211 2212 return NumRegs; 2213 } 2214 2215 // Changes the condition code and swaps operands if necessary, so the SetCC 2216 // operation matches one of the comparisons supported directly by branches 2217 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare 2218 // with 1/-1. 2219 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, 2220 ISD::CondCode &CC, SelectionDAG &DAG) { 2221 // If this is a single bit test that can't be handled by ANDI, shift the 2222 // bit to be tested to the MSB and perform a signed compare with 0. 2223 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) && 2224 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() && 2225 isa<ConstantSDNode>(LHS.getOperand(1))) { 2226 uint64_t Mask = LHS.getConstantOperandVal(1); 2227 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) { 2228 unsigned ShAmt = 0; 2229 if (isPowerOf2_64(Mask)) { 2230 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; 2231 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask); 2232 } else { 2233 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask); 2234 } 2235 2236 LHS = LHS.getOperand(0); 2237 if (ShAmt != 0) 2238 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS, 2239 DAG.getConstant(ShAmt, DL, LHS.getValueType())); 2240 return; 2241 } 2242 } 2243 2244 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 2245 int64_t C = RHSC->getSExtValue(); 2246 switch (CC) { 2247 default: break; 2248 case ISD::SETGT: 2249 // Convert X > -1 to X >= 0. 2250 if (C == -1) { 2251 RHS = DAG.getConstant(0, DL, RHS.getValueType()); 2252 CC = ISD::SETGE; 2253 return; 2254 } 2255 break; 2256 case ISD::SETLT: 2257 // Convert X < 1 to 0 >= X. 2258 if (C == 1) { 2259 RHS = LHS; 2260 LHS = DAG.getConstant(0, DL, RHS.getValueType()); 2261 CC = ISD::SETGE; 2262 return; 2263 } 2264 break; 2265 } 2266 } 2267 2268 switch (CC) { 2269 default: 2270 break; 2271 case ISD::SETGT: 2272 case ISD::SETLE: 2273 case ISD::SETUGT: 2274 case ISD::SETULE: 2275 CC = ISD::getSetCCSwappedOperands(CC); 2276 std::swap(LHS, RHS); 2277 break; 2278 } 2279 } 2280 2281 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) { 2282 assert(VT.isScalableVector() && "Expecting a scalable vector type"); 2283 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 2284 if (VT.getVectorElementType() == MVT::i1) 2285 KnownSize *= 8; 2286 2287 switch (KnownSize) { 2288 default: 2289 llvm_unreachable("Invalid LMUL."); 2290 case 8: 2291 return RISCVII::VLMUL::LMUL_F8; 2292 case 16: 2293 return RISCVII::VLMUL::LMUL_F4; 2294 case 32: 2295 return RISCVII::VLMUL::LMUL_F2; 2296 case 64: 2297 return RISCVII::VLMUL::LMUL_1; 2298 case 128: 2299 return RISCVII::VLMUL::LMUL_2; 2300 case 256: 2301 return RISCVII::VLMUL::LMUL_4; 2302 case 512: 2303 return RISCVII::VLMUL::LMUL_8; 2304 } 2305 } 2306 2307 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) { 2308 switch (LMul) { 2309 default: 2310 llvm_unreachable("Invalid LMUL."); 2311 case RISCVII::VLMUL::LMUL_F8: 2312 case RISCVII::VLMUL::LMUL_F4: 2313 case RISCVII::VLMUL::LMUL_F2: 2314 case RISCVII::VLMUL::LMUL_1: 2315 return RISCV::VRRegClassID; 2316 case RISCVII::VLMUL::LMUL_2: 2317 return RISCV::VRM2RegClassID; 2318 case RISCVII::VLMUL::LMUL_4: 2319 return RISCV::VRM4RegClassID; 2320 case RISCVII::VLMUL::LMUL_8: 2321 return RISCV::VRM8RegClassID; 2322 } 2323 } 2324 2325 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 2326 RISCVII::VLMUL LMUL = getLMUL(VT); 2327 if (LMUL == RISCVII::VLMUL::LMUL_F8 || 2328 LMUL == RISCVII::VLMUL::LMUL_F4 || 2329 LMUL == RISCVII::VLMUL::LMUL_F2 || 2330 LMUL == RISCVII::VLMUL::LMUL_1) { 2331 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 2332 "Unexpected subreg numbering"); 2333 return RISCV::sub_vrm1_0 + Index; 2334 } 2335 if (LMUL == RISCVII::VLMUL::LMUL_2) { 2336 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 2337 "Unexpected subreg numbering"); 2338 return RISCV::sub_vrm2_0 + Index; 2339 } 2340 if (LMUL == RISCVII::VLMUL::LMUL_4) { 2341 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 2342 "Unexpected subreg numbering"); 2343 return RISCV::sub_vrm4_0 + Index; 2344 } 2345 llvm_unreachable("Invalid vector type."); 2346 } 2347 2348 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 2349 if (VT.getVectorElementType() == MVT::i1) 2350 return RISCV::VRRegClassID; 2351 return getRegClassIDForLMUL(getLMUL(VT)); 2352 } 2353 2354 // Attempt to decompose a subvector insert/extract between VecVT and 2355 // SubVecVT via subregister indices. Returns the subregister index that 2356 // can perform the subvector insert/extract with the given element index, as 2357 // well as the index corresponding to any leftover subvectors that must be 2358 // further inserted/extracted within the register class for SubVecVT. 2359 std::pair<unsigned, unsigned> 2360 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2361 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 2362 const RISCVRegisterInfo *TRI) { 2363 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 2364 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 2365 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 2366 "Register classes not ordered"); 2367 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 2368 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 2369 // Try to compose a subregister index that takes us from the incoming 2370 // LMUL>1 register class down to the outgoing one. At each step we half 2371 // the LMUL: 2372 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 2373 // Note that this is not guaranteed to find a subregister index, such as 2374 // when we are extracting from one VR type to another. 2375 unsigned SubRegIdx = RISCV::NoSubRegister; 2376 for (const unsigned RCID : 2377 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 2378 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 2379 VecVT = VecVT.getHalfNumVectorElementsVT(); 2380 bool IsHi = 2381 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 2382 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 2383 getSubregIndexByMVT(VecVT, IsHi)); 2384 if (IsHi) 2385 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 2386 } 2387 return {SubRegIdx, InsertExtractIdx}; 2388 } 2389 2390 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar 2391 // stores for those types. 2392 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const { 2393 return !Subtarget.useRVVForFixedLengthVectors() || 2394 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1); 2395 } 2396 2397 bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const { 2398 if (!ScalarTy.isSimple()) 2399 return false; 2400 switch (ScalarTy.getSimpleVT().SimpleTy) { 2401 case MVT::iPTR: 2402 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true; 2403 case MVT::i8: 2404 case MVT::i16: 2405 case MVT::i32: 2406 return true; 2407 case MVT::i64: 2408 return Subtarget.hasVInstructionsI64(); 2409 case MVT::f16: 2410 return Subtarget.hasVInstructionsF16(); 2411 case MVT::f32: 2412 return Subtarget.hasVInstructionsF32(); 2413 case MVT::f64: 2414 return Subtarget.hasVInstructionsF64(); 2415 default: 2416 return false; 2417 } 2418 } 2419 2420 2421 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const { 2422 return NumRepeatedDivisors; 2423 } 2424 2425 static SDValue getVLOperand(SDValue Op) { 2426 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 2427 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 2428 "Unexpected opcode"); 2429 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 2430 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 2431 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2432 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 2433 if (!II) 2434 return SDValue(); 2435 return Op.getOperand(II->VLOperand + 1 + HasChain); 2436 } 2437 2438 static bool useRVVForFixedLengthVectorVT(MVT VT, 2439 const RISCVSubtarget &Subtarget) { 2440 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!"); 2441 if (!Subtarget.useRVVForFixedLengthVectors()) 2442 return false; 2443 2444 // We only support a set of vector types with a consistent maximum fixed size 2445 // across all supported vector element types to avoid legalization issues. 2446 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest 2447 // fixed-length vector type we support is 1024 bytes. 2448 if (VT.getFixedSizeInBits() > 1024 * 8) 2449 return false; 2450 2451 unsigned MinVLen = Subtarget.getRealMinVLen(); 2452 2453 MVT EltVT = VT.getVectorElementType(); 2454 2455 // Don't use RVV for vectors we cannot scalarize if required. 2456 switch (EltVT.SimpleTy) { 2457 // i1 is supported but has different rules. 2458 default: 2459 return false; 2460 case MVT::i1: 2461 // Masks can only use a single register. 2462 if (VT.getVectorNumElements() > MinVLen) 2463 return false; 2464 MinVLen /= 8; 2465 break; 2466 case MVT::i8: 2467 case MVT::i16: 2468 case MVT::i32: 2469 break; 2470 case MVT::i64: 2471 if (!Subtarget.hasVInstructionsI64()) 2472 return false; 2473 break; 2474 case MVT::f16: 2475 if (!Subtarget.hasVInstructionsF16Minimal()) 2476 return false; 2477 break; 2478 case MVT::f32: 2479 if (!Subtarget.hasVInstructionsF32()) 2480 return false; 2481 break; 2482 case MVT::f64: 2483 if (!Subtarget.hasVInstructionsF64()) 2484 return false; 2485 break; 2486 } 2487 2488 // Reject elements larger than ELEN. 2489 if (EltVT.getSizeInBits() > Subtarget.getELen()) 2490 return false; 2491 2492 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen); 2493 // Don't use RVV for types that don't fit. 2494 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 2495 return false; 2496 2497 // TODO: Perhaps an artificial restriction, but worth having whilst getting 2498 // the base fixed length RVV support in place. 2499 if (!VT.isPow2VectorType()) 2500 return false; 2501 2502 return true; 2503 } 2504 2505 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 2506 return ::useRVVForFixedLengthVectorVT(VT, Subtarget); 2507 } 2508 2509 // Return the largest legal scalable vector type that matches VT's element type. 2510 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, 2511 const RISCVSubtarget &Subtarget) { 2512 // This may be called before legal types are setup. 2513 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) || 2514 useRVVForFixedLengthVectorVT(VT, Subtarget)) && 2515 "Expected legal fixed length vector!"); 2516 2517 unsigned MinVLen = Subtarget.getRealMinVLen(); 2518 unsigned MaxELen = Subtarget.getELen(); 2519 2520 MVT EltVT = VT.getVectorElementType(); 2521 switch (EltVT.SimpleTy) { 2522 default: 2523 llvm_unreachable("unexpected element type for RVV container"); 2524 case MVT::i1: 2525 case MVT::i8: 2526 case MVT::i16: 2527 case MVT::i32: 2528 case MVT::i64: 2529 case MVT::f16: 2530 case MVT::f32: 2531 case MVT::f64: { 2532 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for 2533 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within 2534 // each fractional LMUL we support SEW between 8 and LMUL*ELEN. 2535 unsigned NumElts = 2536 (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen; 2537 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen); 2538 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts"); 2539 return MVT::getScalableVectorVT(EltVT, NumElts); 2540 } 2541 } 2542 } 2543 2544 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, 2545 const RISCVSubtarget &Subtarget) { 2546 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, 2547 Subtarget); 2548 } 2549 2550 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { 2551 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget()); 2552 } 2553 2554 // Grow V to consume an entire RVV register. 2555 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 2556 const RISCVSubtarget &Subtarget) { 2557 assert(VT.isScalableVector() && 2558 "Expected to convert into a scalable vector!"); 2559 assert(V.getValueType().isFixedLengthVector() && 2560 "Expected a fixed length vector operand!"); 2561 SDLoc DL(V); 2562 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2563 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 2564 } 2565 2566 // Shrink V so it's just big enough to maintain a VT's worth of data. 2567 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 2568 const RISCVSubtarget &Subtarget) { 2569 assert(VT.isFixedLengthVector() && 2570 "Expected to convert into a fixed length vector!"); 2571 assert(V.getValueType().isScalableVector() && 2572 "Expected a scalable vector operand!"); 2573 SDLoc DL(V); 2574 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2575 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 2576 } 2577 2578 /// Return the type of the mask type suitable for masking the provided 2579 /// vector type. This is simply an i1 element type vector of the same 2580 /// (possibly scalable) length. 2581 static MVT getMaskTypeFor(MVT VecVT) { 2582 assert(VecVT.isVector()); 2583 ElementCount EC = VecVT.getVectorElementCount(); 2584 return MVT::getVectorVT(MVT::i1, EC); 2585 } 2586 2587 /// Creates an all ones mask suitable for masking a vector of type VecTy with 2588 /// vector length VL. . 2589 static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, 2590 SelectionDAG &DAG) { 2591 MVT MaskVT = getMaskTypeFor(VecVT); 2592 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2593 } 2594 2595 static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, 2596 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { 2597 // If we know the exact VLEN, and our VL is exactly equal to VLMAX, 2598 // canonicalize the representation. InsertVSETVLI will pick the immediate 2599 // encoding later if profitable. 2600 const auto [MinVLMAX, MaxVLMAX] = 2601 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); 2602 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX) 2603 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); 2604 2605 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT()); 2606 } 2607 2608 static std::pair<SDValue, SDValue> 2609 getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, 2610 const RISCVSubtarget &Subtarget) { 2611 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 2612 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); 2613 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG); 2614 return {Mask, VL}; 2615 } 2616 2617 static std::pair<SDValue, SDValue> 2618 getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, 2619 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { 2620 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 2621 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget); 2622 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); 2623 return {Mask, VL}; 2624 } 2625 2626 // Gets the two common "VL" operands: an all-ones mask and the vector length. 2627 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 2628 // the vector type that the fixed-length vector is contained in. Otherwise if 2629 // VecVT is scalable, then ContainerVT should be the same as VecVT. 2630 static std::pair<SDValue, SDValue> 2631 getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, 2632 const RISCVSubtarget &Subtarget) { 2633 if (VecVT.isFixedLengthVector()) 2634 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG, 2635 Subtarget); 2636 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 2637 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget); 2638 } 2639 2640 SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL, 2641 SelectionDAG &DAG) const { 2642 assert(VecVT.isScalableVector() && "Expected scalable vector"); 2643 return DAG.getElementCount(DL, Subtarget.getXLenVT(), 2644 VecVT.getVectorElementCount()); 2645 } 2646 2647 std::pair<unsigned, unsigned> 2648 RISCVTargetLowering::computeVLMAXBounds(MVT VecVT, 2649 const RISCVSubtarget &Subtarget) { 2650 assert(VecVT.isScalableVector() && "Expected scalable vector"); 2651 2652 unsigned EltSize = VecVT.getScalarSizeInBits(); 2653 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 2654 2655 unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); 2656 unsigned MaxVLMAX = 2657 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); 2658 2659 unsigned VectorBitsMin = Subtarget.getRealMinVLen(); 2660 unsigned MinVLMAX = 2661 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize); 2662 2663 return std::make_pair(MinVLMAX, MaxVLMAX); 2664 } 2665 2666 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 2667 // of either is (currently) supported. This can get us into an infinite loop 2668 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 2669 // as a ..., etc. 2670 // Until either (or both) of these can reliably lower any node, reporting that 2671 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 2672 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 2673 // which is not desirable. 2674 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( 2675 EVT VT, unsigned DefinedValues) const { 2676 return false; 2677 } 2678 2679 InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const { 2680 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is 2681 // implementation-defined. 2682 if (!VT.isVector()) 2683 return InstructionCost::getInvalid(); 2684 unsigned DLenFactor = Subtarget.getDLenFactor(); 2685 unsigned Cost; 2686 if (VT.isScalableVector()) { 2687 unsigned LMul; 2688 bool Fractional; 2689 std::tie(LMul, Fractional) = 2690 RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT)); 2691 if (Fractional) 2692 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1; 2693 else 2694 Cost = (LMul * DLenFactor); 2695 } else { 2696 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor); 2697 } 2698 return Cost; 2699 } 2700 2701 2702 /// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv 2703 /// is generally quadratic in the number of vreg implied by LMUL. Note that 2704 /// operand (index and possibly mask) are handled separately. 2705 InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const { 2706 return getLMULCost(VT) * getLMULCost(VT); 2707 } 2708 2709 /// Return the cost of a vrgather.vi (or vx) instruction for the type VT. 2710 /// vrgather.vi/vx may be linear in the number of vregs implied by LMUL, 2711 /// or may track the vrgather.vv cost. It is implementation-dependent. 2712 InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const { 2713 return getLMULCost(VT); 2714 } 2715 2716 /// Return the cost of a vslidedown.vx or vslideup.vx instruction 2717 /// for the type VT. (This does not cover the vslide1up or vslide1down 2718 /// variants.) Slides may be linear in the number of vregs implied by LMUL, 2719 /// or may track the vrgather.vv cost. It is implementation-dependent. 2720 InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const { 2721 return getLMULCost(VT); 2722 } 2723 2724 /// Return the cost of a vslidedown.vi or vslideup.vi instruction 2725 /// for the type VT. (This does not cover the vslide1up or vslide1down 2726 /// variants.) Slides may be linear in the number of vregs implied by LMUL, 2727 /// or may track the vrgather.vv cost. It is implementation-dependent. 2728 InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const { 2729 return getLMULCost(VT); 2730 } 2731 2732 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, 2733 const RISCVSubtarget &Subtarget) { 2734 // RISC-V FP-to-int conversions saturate to the destination register size, but 2735 // don't produce 0 for nan. We can use a conversion instruction and fix the 2736 // nan case with a compare and a select. 2737 SDValue Src = Op.getOperand(0); 2738 2739 MVT DstVT = Op.getSimpleValueType(); 2740 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 2741 2742 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; 2743 2744 if (!DstVT.isVector()) { 2745 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate 2746 // the result. 2747 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) || 2748 Src.getValueType() == MVT::bf16) { 2749 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src); 2750 } 2751 2752 unsigned Opc; 2753 if (SatVT == DstVT) 2754 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 2755 else if (DstVT == MVT::i64 && SatVT == MVT::i32) 2756 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 2757 else 2758 return SDValue(); 2759 // FIXME: Support other SatVTs by clamping before or after the conversion. 2760 2761 SDLoc DL(Op); 2762 SDValue FpToInt = DAG.getNode( 2763 Opc, DL, DstVT, Src, 2764 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT())); 2765 2766 if (Opc == RISCVISD::FCVT_WU_RV64) 2767 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); 2768 2769 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); 2770 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, 2771 ISD::CondCode::SETUO); 2772 } 2773 2774 // Vectors. 2775 2776 MVT DstEltVT = DstVT.getVectorElementType(); 2777 MVT SrcVT = Src.getSimpleValueType(); 2778 MVT SrcEltVT = SrcVT.getVectorElementType(); 2779 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 2780 unsigned DstEltSize = DstEltVT.getSizeInBits(); 2781 2782 // Only handle saturating to the destination type. 2783 if (SatVT != DstEltVT) 2784 return SDValue(); 2785 2786 // FIXME: Don't support narrowing by more than 1 steps for now. 2787 if (SrcEltSize > (2 * DstEltSize)) 2788 return SDValue(); 2789 2790 MVT DstContainerVT = DstVT; 2791 MVT SrcContainerVT = SrcVT; 2792 if (DstVT.isFixedLengthVector()) { 2793 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget); 2794 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 2795 assert(DstContainerVT.getVectorElementCount() == 2796 SrcContainerVT.getVectorElementCount() && 2797 "Expected same element count"); 2798 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 2799 } 2800 2801 SDLoc DL(Op); 2802 2803 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget); 2804 2805 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), 2806 {Src, Src, DAG.getCondCode(ISD::SETNE), 2807 DAG.getUNDEF(Mask.getValueType()), Mask, VL}); 2808 2809 // Need to widen by more than 1 step, promote the FP type, then do a widening 2810 // convert. 2811 if (DstEltSize > (2 * SrcEltSize)) { 2812 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!"); 2813 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32); 2814 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL); 2815 } 2816 2817 unsigned RVVOpc = 2818 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; 2819 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL); 2820 2821 SDValue SplatZero = DAG.getNode( 2822 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT), 2823 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL); 2824 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero, 2825 Res, DAG.getUNDEF(DstContainerVT), VL); 2826 2827 if (DstVT.isFixedLengthVector()) 2828 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget); 2829 2830 return Res; 2831 } 2832 2833 static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) { 2834 switch (Opc) { 2835 case ISD::FROUNDEVEN: 2836 case ISD::STRICT_FROUNDEVEN: 2837 case ISD::VP_FROUNDEVEN: 2838 return RISCVFPRndMode::RNE; 2839 case ISD::FTRUNC: 2840 case ISD::STRICT_FTRUNC: 2841 case ISD::VP_FROUNDTOZERO: 2842 return RISCVFPRndMode::RTZ; 2843 case ISD::FFLOOR: 2844 case ISD::STRICT_FFLOOR: 2845 case ISD::VP_FFLOOR: 2846 return RISCVFPRndMode::RDN; 2847 case ISD::FCEIL: 2848 case ISD::STRICT_FCEIL: 2849 case ISD::VP_FCEIL: 2850 return RISCVFPRndMode::RUP; 2851 case ISD::FROUND: 2852 case ISD::STRICT_FROUND: 2853 case ISD::VP_FROUND: 2854 return RISCVFPRndMode::RMM; 2855 case ISD::FRINT: 2856 return RISCVFPRndMode::DYN; 2857 } 2858 2859 return RISCVFPRndMode::Invalid; 2860 } 2861 2862 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND 2863 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to 2864 // the integer domain and back. Taking care to avoid converting values that are 2865 // nan or already correct. 2866 static SDValue 2867 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 2868 const RISCVSubtarget &Subtarget) { 2869 MVT VT = Op.getSimpleValueType(); 2870 assert(VT.isVector() && "Unexpected type"); 2871 2872 SDLoc DL(Op); 2873 2874 SDValue Src = Op.getOperand(0); 2875 2876 MVT ContainerVT = VT; 2877 if (VT.isFixedLengthVector()) { 2878 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2879 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2880 } 2881 2882 SDValue Mask, VL; 2883 if (Op->isVPOpcode()) { 2884 Mask = Op.getOperand(1); 2885 if (VT.isFixedLengthVector()) 2886 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 2887 Subtarget); 2888 VL = Op.getOperand(2); 2889 } else { 2890 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2891 } 2892 2893 // Freeze the source since we are increasing the number of uses. 2894 Src = DAG.getFreeze(Src); 2895 2896 // We do the conversion on the absolute value and fix the sign at the end. 2897 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL); 2898 2899 // Determine the largest integer that can be represented exactly. This and 2900 // values larger than it don't have any fractional bits so don't need to 2901 // be converted. 2902 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT); 2903 unsigned Precision = APFloat::semanticsPrecision(FltSem); 2904 APFloat MaxVal = APFloat(FltSem); 2905 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 2906 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 2907 SDValue MaxValNode = 2908 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType()); 2909 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, 2910 DAG.getUNDEF(ContainerVT), MaxValNode, VL); 2911 2912 // If abs(Src) was larger than MaxVal or nan, keep it. 2913 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2914 Mask = 2915 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, 2916 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), 2917 Mask, Mask, VL}); 2918 2919 // Truncate to integer and convert back to FP. 2920 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); 2921 MVT XLenVT = Subtarget.getXLenVT(); 2922 SDValue Truncated; 2923 2924 switch (Op.getOpcode()) { 2925 default: 2926 llvm_unreachable("Unexpected opcode"); 2927 case ISD::FCEIL: 2928 case ISD::VP_FCEIL: 2929 case ISD::FFLOOR: 2930 case ISD::VP_FFLOOR: 2931 case ISD::FROUND: 2932 case ISD::FROUNDEVEN: 2933 case ISD::VP_FROUND: 2934 case ISD::VP_FROUNDEVEN: 2935 case ISD::VP_FROUNDTOZERO: { 2936 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 2937 assert(FRM != RISCVFPRndMode::Invalid); 2938 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask, 2939 DAG.getTargetConstant(FRM, DL, XLenVT), VL); 2940 break; 2941 } 2942 case ISD::FTRUNC: 2943 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src, 2944 Mask, VL); 2945 break; 2946 case ISD::FRINT: 2947 case ISD::VP_FRINT: 2948 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL); 2949 break; 2950 case ISD::FNEARBYINT: 2951 case ISD::VP_FNEARBYINT: 2952 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src, 2953 Mask, VL); 2954 break; 2955 } 2956 2957 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. 2958 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL) 2959 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated, 2960 Mask, VL); 2961 2962 // Restore the original sign so that -0.0 is preserved. 2963 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated, 2964 Src, Src, Mask, VL); 2965 2966 if (!VT.isFixedLengthVector()) 2967 return Truncated; 2968 2969 return convertFromScalableVector(VT, Truncated, DAG, Subtarget); 2970 } 2971 2972 // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND 2973 // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to 2974 // qNan and coverting the new source to integer and back to FP. 2975 static SDValue 2976 lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 2977 const RISCVSubtarget &Subtarget) { 2978 SDLoc DL(Op); 2979 MVT VT = Op.getSimpleValueType(); 2980 SDValue Chain = Op.getOperand(0); 2981 SDValue Src = Op.getOperand(1); 2982 2983 MVT ContainerVT = VT; 2984 if (VT.isFixedLengthVector()) { 2985 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2986 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2987 } 2988 2989 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2990 2991 // Freeze the source since we are increasing the number of uses. 2992 Src = DAG.getFreeze(Src); 2993 2994 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src. 2995 MVT MaskVT = Mask.getSimpleValueType(); 2996 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL, 2997 DAG.getVTList(MaskVT, MVT::Other), 2998 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE), 2999 DAG.getUNDEF(MaskVT), Mask, VL}); 3000 Chain = Unorder.getValue(1); 3001 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL, 3002 DAG.getVTList(ContainerVT, MVT::Other), 3003 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL}); 3004 Chain = Src.getValue(1); 3005 3006 // We do the conversion on the absolute value and fix the sign at the end. 3007 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL); 3008 3009 // Determine the largest integer that can be represented exactly. This and 3010 // values larger than it don't have any fractional bits so don't need to 3011 // be converted. 3012 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT); 3013 unsigned Precision = APFloat::semanticsPrecision(FltSem); 3014 APFloat MaxVal = APFloat(FltSem); 3015 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 3016 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 3017 SDValue MaxValNode = 3018 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType()); 3019 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, 3020 DAG.getUNDEF(ContainerVT), MaxValNode, VL); 3021 3022 // If abs(Src) was larger than MaxVal or nan, keep it. 3023 Mask = DAG.getNode( 3024 RISCVISD::SETCC_VL, DL, MaskVT, 3025 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL}); 3026 3027 // Truncate to integer and convert back to FP. 3028 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); 3029 MVT XLenVT = Subtarget.getXLenVT(); 3030 SDValue Truncated; 3031 3032 switch (Op.getOpcode()) { 3033 default: 3034 llvm_unreachable("Unexpected opcode"); 3035 case ISD::STRICT_FCEIL: 3036 case ISD::STRICT_FFLOOR: 3037 case ISD::STRICT_FROUND: 3038 case ISD::STRICT_FROUNDEVEN: { 3039 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 3040 assert(FRM != RISCVFPRndMode::Invalid); 3041 Truncated = DAG.getNode( 3042 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other), 3043 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL}); 3044 break; 3045 } 3046 case ISD::STRICT_FTRUNC: 3047 Truncated = 3048 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL, 3049 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL); 3050 break; 3051 case ISD::STRICT_FNEARBYINT: 3052 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL, 3053 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src, 3054 Mask, VL); 3055 break; 3056 } 3057 Chain = Truncated.getValue(1); 3058 3059 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. 3060 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) { 3061 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL, 3062 DAG.getVTList(ContainerVT, MVT::Other), Chain, 3063 Truncated, Mask, VL); 3064 Chain = Truncated.getValue(1); 3065 } 3066 3067 // Restore the original sign so that -0.0 is preserved. 3068 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated, 3069 Src, Src, Mask, VL); 3070 3071 if (VT.isFixedLengthVector()) 3072 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget); 3073 return DAG.getMergeValues({Truncated, Chain}, DL); 3074 } 3075 3076 static SDValue 3077 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 3078 const RISCVSubtarget &Subtarget) { 3079 MVT VT = Op.getSimpleValueType(); 3080 if (VT.isVector()) 3081 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 3082 3083 if (DAG.shouldOptForSize()) 3084 return SDValue(); 3085 3086 SDLoc DL(Op); 3087 SDValue Src = Op.getOperand(0); 3088 3089 // Create an integer the size of the mantissa with the MSB set. This and all 3090 // values larger than it don't have any fractional bits so don't need to be 3091 // converted. 3092 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); 3093 unsigned Precision = APFloat::semanticsPrecision(FltSem); 3094 APFloat MaxVal = APFloat(FltSem); 3095 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 3096 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 3097 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT); 3098 3099 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 3100 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode, 3101 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT())); 3102 } 3103 3104 // Expand vector LRINT and LLRINT by converting to the integer domain. 3105 static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, 3106 const RISCVSubtarget &Subtarget) { 3107 MVT VT = Op.getSimpleValueType(); 3108 assert(VT.isVector() && "Unexpected type"); 3109 3110 SDLoc DL(Op); 3111 SDValue Src = Op.getOperand(0); 3112 MVT ContainerVT = VT; 3113 3114 if (VT.isFixedLengthVector()) { 3115 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3116 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 3117 } 3118 3119 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3120 SDValue Truncated = 3121 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL); 3122 3123 if (!VT.isFixedLengthVector()) 3124 return Truncated; 3125 3126 return convertFromScalableVector(VT, Truncated, DAG, Subtarget); 3127 } 3128 3129 static SDValue 3130 getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, 3131 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, 3132 SDValue Offset, SDValue Mask, SDValue VL, 3133 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { 3134 if (Merge.isUndef()) 3135 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 3136 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT()); 3137 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; 3138 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops); 3139 } 3140 3141 static SDValue 3142 getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, 3143 EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, 3144 SDValue VL, 3145 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { 3146 if (Merge.isUndef()) 3147 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 3148 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT()); 3149 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; 3150 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops); 3151 } 3152 3153 static MVT getLMUL1VT(MVT VT) { 3154 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 3155 "Unexpected vector MVT"); 3156 return MVT::getScalableVectorVT( 3157 VT.getVectorElementType(), 3158 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 3159 } 3160 3161 struct VIDSequence { 3162 int64_t StepNumerator; 3163 unsigned StepDenominator; 3164 int64_t Addend; 3165 }; 3166 3167 static std::optional<uint64_t> getExactInteger(const APFloat &APF, 3168 uint32_t BitWidth) { 3169 APSInt ValInt(BitWidth, !APF.isNegative()); 3170 // We use an arbitrary rounding mode here. If a floating-point is an exact 3171 // integer (e.g., 1.0), the rounding mode does not affect the output value. If 3172 // the rounding mode changes the output value, then it is not an exact 3173 // integer. 3174 RoundingMode ArbitraryRM = RoundingMode::TowardZero; 3175 bool IsExact; 3176 // If it is out of signed integer range, it will return an invalid operation. 3177 // If it is not an exact integer, IsExact is false. 3178 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) == 3179 APFloatBase::opInvalidOp) || 3180 !IsExact) 3181 return std::nullopt; 3182 return ValInt.extractBitsAsZExtValue(BitWidth, 0); 3183 } 3184 3185 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] 3186 // to the (non-zero) step S and start value X. This can be then lowered as the 3187 // RVV sequence (VID * S) + X, for example. 3188 // The step S is represented as an integer numerator divided by a positive 3189 // denominator. Note that the implementation currently only identifies 3190 // sequences in which either the numerator is +/- 1 or the denominator is 1. It 3191 // cannot detect 2/3, for example. 3192 // Note that this method will also match potentially unappealing index 3193 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to 3194 // determine whether this is worth generating code for. 3195 static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op, 3196 unsigned EltSizeInBits) { 3197 unsigned NumElts = Op.getNumOperands(); 3198 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); 3199 bool IsInteger = Op.getValueType().isInteger(); 3200 3201 std::optional<unsigned> SeqStepDenom; 3202 std::optional<int64_t> SeqStepNum, SeqAddend; 3203 std::optional<std::pair<uint64_t, unsigned>> PrevElt; 3204 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits()); 3205 for (unsigned Idx = 0; Idx < NumElts; Idx++) { 3206 // Assume undef elements match the sequence; we just have to be careful 3207 // when interpolating across them. 3208 if (Op.getOperand(Idx).isUndef()) 3209 continue; 3210 3211 uint64_t Val; 3212 if (IsInteger) { 3213 // The BUILD_VECTOR must be all constants. 3214 if (!isa<ConstantSDNode>(Op.getOperand(Idx))) 3215 return std::nullopt; 3216 Val = Op.getConstantOperandVal(Idx) & 3217 maskTrailingOnes<uint64_t>(Op.getScalarValueSizeInBits()); 3218 } else { 3219 // The BUILD_VECTOR must be all constants. 3220 if (!isa<ConstantFPSDNode>(Op.getOperand(Idx))) 3221 return std::nullopt; 3222 if (auto ExactInteger = getExactInteger( 3223 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(), 3224 Op.getScalarValueSizeInBits())) 3225 Val = *ExactInteger; 3226 else 3227 return std::nullopt; 3228 } 3229 3230 if (PrevElt) { 3231 // Calculate the step since the last non-undef element, and ensure 3232 // it's consistent across the entire sequence. 3233 unsigned IdxDiff = Idx - PrevElt->second; 3234 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits); 3235 3236 // A zero-value value difference means that we're somewhere in the middle 3237 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a 3238 // step change before evaluating the sequence. 3239 if (ValDiff == 0) 3240 continue; 3241 3242 int64_t Remainder = ValDiff % IdxDiff; 3243 // Normalize the step if it's greater than 1. 3244 if (Remainder != ValDiff) { 3245 // The difference must cleanly divide the element span. 3246 if (Remainder != 0) 3247 return std::nullopt; 3248 ValDiff /= IdxDiff; 3249 IdxDiff = 1; 3250 } 3251 3252 if (!SeqStepNum) 3253 SeqStepNum = ValDiff; 3254 else if (ValDiff != SeqStepNum) 3255 return std::nullopt; 3256 3257 if (!SeqStepDenom) 3258 SeqStepDenom = IdxDiff; 3259 else if (IdxDiff != *SeqStepDenom) 3260 return std::nullopt; 3261 } 3262 3263 // Record this non-undef element for later. 3264 if (!PrevElt || PrevElt->first != Val) 3265 PrevElt = std::make_pair(Val, Idx); 3266 } 3267 3268 // We need to have logged a step for this to count as a legal index sequence. 3269 if (!SeqStepNum || !SeqStepDenom) 3270 return std::nullopt; 3271 3272 // Loop back through the sequence and validate elements we might have skipped 3273 // while waiting for a valid step. While doing this, log any sequence addend. 3274 for (unsigned Idx = 0; Idx < NumElts; Idx++) { 3275 if (Op.getOperand(Idx).isUndef()) 3276 continue; 3277 uint64_t Val; 3278 if (IsInteger) { 3279 Val = Op.getConstantOperandVal(Idx) & 3280 maskTrailingOnes<uint64_t>(Op.getScalarValueSizeInBits()); 3281 } else { 3282 Val = *getExactInteger( 3283 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(), 3284 Op.getScalarValueSizeInBits()); 3285 } 3286 uint64_t ExpectedVal = 3287 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; 3288 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits); 3289 if (!SeqAddend) 3290 SeqAddend = Addend; 3291 else if (Addend != SeqAddend) 3292 return std::nullopt; 3293 } 3294 3295 assert(SeqAddend && "Must have an addend if we have a step"); 3296 3297 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend}; 3298 } 3299 3300 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT 3301 // and lower it as a VRGATHER_VX_VL from the source vector. 3302 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, 3303 SelectionDAG &DAG, 3304 const RISCVSubtarget &Subtarget) { 3305 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 3306 return SDValue(); 3307 SDValue Vec = SplatVal.getOperand(0); 3308 // Only perform this optimization on vectors of the same size for simplicity. 3309 // Don't perform this optimization for i1 vectors. 3310 // FIXME: Support i1 vectors, maybe by promoting to i8? 3311 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1) 3312 return SDValue(); 3313 SDValue Idx = SplatVal.getOperand(1); 3314 // The index must be a legal type. 3315 if (Idx.getValueType() != Subtarget.getXLenVT()) 3316 return SDValue(); 3317 3318 MVT ContainerVT = VT; 3319 if (VT.isFixedLengthVector()) { 3320 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3321 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3322 } 3323 3324 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3325 3326 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec, 3327 Idx, DAG.getUNDEF(ContainerVT), Mask, VL); 3328 3329 if (!VT.isFixedLengthVector()) 3330 return Gather; 3331 3332 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 3333 } 3334 3335 3336 /// Try and optimize BUILD_VECTORs with "dominant values" - these are values 3337 /// which constitute a large proportion of the elements. In such cases we can 3338 /// splat a vector with the dominant element and make up the shortfall with 3339 /// INSERT_VECTOR_ELTs. Returns SDValue if not profitable. 3340 /// Note that this includes vectors of 2 elements by association. The 3341 /// upper-most element is the "dominant" one, allowing us to use a splat to 3342 /// "insert" the upper element, and an insert of the lower element at position 3343 /// 0, which improves codegen. 3344 static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, 3345 const RISCVSubtarget &Subtarget) { 3346 MVT VT = Op.getSimpleValueType(); 3347 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 3348 3349 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3350 3351 SDLoc DL(Op); 3352 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3353 3354 MVT XLenVT = Subtarget.getXLenVT(); 3355 unsigned NumElts = Op.getNumOperands(); 3356 3357 SDValue DominantValue; 3358 unsigned MostCommonCount = 0; 3359 DenseMap<SDValue, unsigned> ValueCounts; 3360 unsigned NumUndefElts = 3361 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); 3362 3363 // Track the number of scalar loads we know we'd be inserting, estimated as 3364 // any non-zero floating-point constant. Other kinds of element are either 3365 // already in registers or are materialized on demand. The threshold at which 3366 // a vector load is more desirable than several scalar materializion and 3367 // vector-insertion instructions is not known. 3368 unsigned NumScalarLoads = 0; 3369 3370 for (SDValue V : Op->op_values()) { 3371 if (V.isUndef()) 3372 continue; 3373 3374 ValueCounts.insert(std::make_pair(V, 0)); 3375 unsigned &Count = ValueCounts[V]; 3376 if (0 == Count) 3377 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V)) 3378 NumScalarLoads += !CFP->isExactlyValue(+0.0); 3379 3380 // Is this value dominant? In case of a tie, prefer the highest element as 3381 // it's cheaper to insert near the beginning of a vector than it is at the 3382 // end. 3383 if (++Count >= MostCommonCount) { 3384 DominantValue = V; 3385 MostCommonCount = Count; 3386 } 3387 } 3388 3389 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); 3390 unsigned NumDefElts = NumElts - NumUndefElts; 3391 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; 3392 3393 // Don't perform this optimization when optimizing for size, since 3394 // materializing elements and inserting them tends to cause code bloat. 3395 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && 3396 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) && 3397 ((MostCommonCount > DominantValueCountThreshold) || 3398 (ValueCounts.size() <= Log2_32(NumDefElts)))) { 3399 // Start by splatting the most common element. 3400 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); 3401 3402 DenseSet<SDValue> Processed{DominantValue}; 3403 3404 // We can handle an insert into the last element (of a splat) via 3405 // v(f)slide1down. This is slightly better than the vslideup insert 3406 // lowering as it avoids the need for a vector group temporary. It 3407 // is also better than using vmerge.vx as it avoids the need to 3408 // materialize the mask in a vector register. 3409 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1); 3410 !LastOp.isUndef() && ValueCounts[LastOp] == 1 && 3411 LastOp != DominantValue) { 3412 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3413 auto OpCode = 3414 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; 3415 if (!VT.isFloatingPoint()) 3416 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp); 3417 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, 3418 LastOp, Mask, VL); 3419 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget); 3420 Processed.insert(LastOp); 3421 } 3422 3423 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); 3424 for (const auto &OpIdx : enumerate(Op->ops())) { 3425 const SDValue &V = OpIdx.value(); 3426 if (V.isUndef() || !Processed.insert(V).second) 3427 continue; 3428 if (ValueCounts[V] == 1) { 3429 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, 3430 DAG.getConstant(OpIdx.index(), DL, XLenVT)); 3431 } else { 3432 // Blend in all instances of this value using a VSELECT, using a 3433 // mask where each bit signals whether that element is the one 3434 // we're after. 3435 SmallVector<SDValue> Ops; 3436 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { 3437 return DAG.getConstant(V == V1, DL, XLenVT); 3438 }); 3439 Vec = DAG.getNode(ISD::VSELECT, DL, VT, 3440 DAG.getBuildVector(SelMaskTy, DL, Ops), 3441 DAG.getSplatBuildVector(VT, DL, V), Vec); 3442 } 3443 } 3444 3445 return Vec; 3446 } 3447 3448 return SDValue(); 3449 } 3450 3451 static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, 3452 const RISCVSubtarget &Subtarget) { 3453 MVT VT = Op.getSimpleValueType(); 3454 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 3455 3456 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3457 3458 SDLoc DL(Op); 3459 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3460 3461 MVT XLenVT = Subtarget.getXLenVT(); 3462 unsigned NumElts = Op.getNumOperands(); 3463 3464 if (VT.getVectorElementType() == MVT::i1) { 3465 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 3466 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 3467 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 3468 } 3469 3470 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 3471 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 3472 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 3473 } 3474 3475 // Lower constant mask BUILD_VECTORs via an integer vector type, in 3476 // scalar integer chunks whose bit-width depends on the number of mask 3477 // bits and XLEN. 3478 // First, determine the most appropriate scalar integer type to use. This 3479 // is at most XLenVT, but may be shrunk to a smaller vector element type 3480 // according to the size of the final vector - use i8 chunks rather than 3481 // XLenVT if we're producing a v8i1. This results in more consistent 3482 // codegen across RV32 and RV64. 3483 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen()); 3484 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen()); 3485 // If we have to use more than one INSERT_VECTOR_ELT then this 3486 // optimization is likely to increase code size; avoid peforming it in 3487 // such a case. We can use a load from a constant pool in this case. 3488 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) 3489 return SDValue(); 3490 // Now we can create our integer vector type. Note that it may be larger 3491 // than the resulting mask type: v4i1 would use v1i8 as its integer type. 3492 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits); 3493 MVT IntegerViaVecVT = 3494 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), 3495 IntegerViaVecElts); 3496 3497 uint64_t Bits = 0; 3498 unsigned BitPos = 0, IntegerEltIdx = 0; 3499 SmallVector<SDValue, 8> Elts(IntegerViaVecElts); 3500 3501 for (unsigned I = 0; I < NumElts;) { 3502 SDValue V = Op.getOperand(I); 3503 bool BitValue = !V.isUndef() && V->getAsZExtVal(); 3504 Bits |= ((uint64_t)BitValue << BitPos); 3505 ++BitPos; 3506 ++I; 3507 3508 // Once we accumulate enough bits to fill our scalar type or process the 3509 // last element, insert into our vector and clear our accumulated data. 3510 if (I % NumViaIntegerBits == 0 || I == NumElts) { 3511 if (NumViaIntegerBits <= 32) 3512 Bits = SignExtend64<32>(Bits); 3513 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); 3514 Elts[IntegerEltIdx] = Elt; 3515 Bits = 0; 3516 BitPos = 0; 3517 IntegerEltIdx++; 3518 } 3519 } 3520 3521 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts); 3522 3523 if (NumElts < NumViaIntegerBits) { 3524 // If we're producing a smaller vector than our minimum legal integer 3525 // type, bitcast to the equivalent (known-legal) mask type, and extract 3526 // our final mask. 3527 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); 3528 Vec = DAG.getBitcast(MVT::v8i1, Vec); 3529 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, 3530 DAG.getConstant(0, DL, XLenVT)); 3531 } else { 3532 // Else we must have produced an integer type with the same size as the 3533 // mask type; bitcast for the final result. 3534 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); 3535 Vec = DAG.getBitcast(VT, Vec); 3536 } 3537 3538 return Vec; 3539 } 3540 3541 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 3542 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 3543 : RISCVISD::VMV_V_X_VL; 3544 if (!VT.isFloatingPoint()) 3545 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat); 3546 Splat = 3547 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); 3548 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 3549 } 3550 3551 // Try and match index sequences, which we can lower to the vid instruction 3552 // with optional modifications. An all-undef vector is matched by 3553 // getSplatValue, above. 3554 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) { 3555 int64_t StepNumerator = SimpleVID->StepNumerator; 3556 unsigned StepDenominator = SimpleVID->StepDenominator; 3557 int64_t Addend = SimpleVID->Addend; 3558 3559 assert(StepNumerator != 0 && "Invalid step"); 3560 bool Negate = false; 3561 int64_t SplatStepVal = StepNumerator; 3562 unsigned StepOpcode = ISD::MUL; 3563 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it 3564 // anyway as the shift of 63 won't fit in uimm5. 3565 if (StepNumerator != 1 && StepNumerator != INT64_MIN && 3566 isPowerOf2_64(std::abs(StepNumerator))) { 3567 Negate = StepNumerator < 0; 3568 StepOpcode = ISD::SHL; 3569 SplatStepVal = Log2_64(std::abs(StepNumerator)); 3570 } 3571 3572 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a 3573 // threshold since it's the immediate value many RVV instructions accept. 3574 // There is no vmul.vi instruction so ensure multiply constant can fit in 3575 // a single addi instruction. 3576 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) || 3577 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) && 3578 isPowerOf2_32(StepDenominator) && 3579 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) { 3580 MVT VIDVT = 3581 VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; 3582 MVT VIDContainerVT = 3583 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget); 3584 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL); 3585 // Convert right out of the scalable type so we can use standard ISD 3586 // nodes for the rest of the computation. If we used scalable types with 3587 // these, we'd lose the fixed-length vector info and generate worse 3588 // vsetvli code. 3589 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget); 3590 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) || 3591 (StepOpcode == ISD::SHL && SplatStepVal != 0)) { 3592 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT); 3593 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep); 3594 } 3595 if (StepDenominator != 1) { 3596 SDValue SplatStep = 3597 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT); 3598 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep); 3599 } 3600 if (Addend != 0 || Negate) { 3601 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT); 3602 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend, 3603 VID); 3604 } 3605 if (VT.isFloatingPoint()) { 3606 // TODO: Use vfwcvt to reduce register pressure. 3607 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID); 3608 } 3609 return VID; 3610 } 3611 } 3612 3613 // For very small build_vectors, use a single scalar insert of a constant. 3614 // TODO: Base this on constant rematerialization cost, not size. 3615 const unsigned EltBitSize = VT.getScalarSizeInBits(); 3616 if (VT.getSizeInBits() <= 32 && 3617 ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { 3618 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits()); 3619 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) && 3620 "Unexpected sequence type"); 3621 // If we can use the original VL with the modified element type, this 3622 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this 3623 // be moved into InsertVSETVLI? 3624 unsigned ViaVecLen = 3625 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1; 3626 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen); 3627 3628 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); 3629 uint64_t SplatValue = 0; 3630 // Construct the amalgamated value at this larger vector type. 3631 for (const auto &OpIdx : enumerate(Op->op_values())) { 3632 const auto &SeqV = OpIdx.value(); 3633 if (!SeqV.isUndef()) 3634 SplatValue |= 3635 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize)); 3636 } 3637 3638 // On RV64, sign-extend from 32 to 64 bits where possible in order to 3639 // achieve better constant materializion. 3640 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) 3641 SplatValue = SignExtend64<32>(SplatValue); 3642 3643 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT, 3644 DAG.getUNDEF(ViaVecVT), 3645 DAG.getConstant(SplatValue, DL, XLenVT), 3646 DAG.getConstant(0, DL, XLenVT)); 3647 if (ViaVecLen != 1) 3648 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, 3649 MVT::getVectorVT(ViaIntVT, 1), Vec, 3650 DAG.getConstant(0, DL, XLenVT)); 3651 return DAG.getBitcast(VT, Vec); 3652 } 3653 3654 3655 // Attempt to detect "hidden" splats, which only reveal themselves as splats 3656 // when re-interpreted as a vector with a larger element type. For example, 3657 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 3658 // could be instead splat as 3659 // v2i32 = build_vector i32 0x00010000, i32 0x00010000 3660 // TODO: This optimization could also work on non-constant splats, but it 3661 // would require bit-manipulation instructions to construct the splat value. 3662 SmallVector<SDValue> Sequence; 3663 const auto *BV = cast<BuildVectorSDNode>(Op); 3664 if (VT.isInteger() && EltBitSize < Subtarget.getELen() && 3665 ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && 3666 BV->getRepeatedSequence(Sequence) && 3667 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) { 3668 unsigned SeqLen = Sequence.size(); 3669 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); 3670 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || 3671 ViaIntVT == MVT::i64) && 3672 "Unexpected sequence type"); 3673 3674 // If we can use the original VL with the modified element type, this 3675 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this 3676 // be moved into InsertVSETVLI? 3677 const unsigned RequiredVL = NumElts / SeqLen; 3678 const unsigned ViaVecLen = 3679 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ? 3680 NumElts : RequiredVL; 3681 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen); 3682 3683 unsigned EltIdx = 0; 3684 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); 3685 uint64_t SplatValue = 0; 3686 // Construct the amalgamated value which can be splatted as this larger 3687 // vector type. 3688 for (const auto &SeqV : Sequence) { 3689 if (!SeqV.isUndef()) 3690 SplatValue |= 3691 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize)); 3692 EltIdx++; 3693 } 3694 3695 // On RV64, sign-extend from 32 to 64 bits where possible in order to 3696 // achieve better constant materializion. 3697 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) 3698 SplatValue = SignExtend64<32>(SplatValue); 3699 3700 // Since we can't introduce illegal i64 types at this stage, we can only 3701 // perform an i64 splat on RV32 if it is its own sign-extended value. That 3702 // way we can use RVV instructions to splat. 3703 assert((ViaIntVT.bitsLE(XLenVT) || 3704 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && 3705 "Unexpected bitcast sequence"); 3706 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) { 3707 SDValue ViaVL = 3708 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT); 3709 MVT ViaContainerVT = 3710 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget); 3711 SDValue Splat = 3712 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, 3713 DAG.getUNDEF(ViaContainerVT), 3714 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); 3715 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); 3716 if (ViaVecLen != RequiredVL) 3717 Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, 3718 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 3719 DAG.getConstant(0, DL, XLenVT)); 3720 return DAG.getBitcast(VT, Splat); 3721 } 3722 } 3723 3724 // If the number of signbits allows, see if we can lower as a <N x i8>. 3725 // Our main goal here is to reduce LMUL (and thus work) required to 3726 // build the constant, but we will also narrow if the resulting 3727 // narrow vector is known to materialize cheaply. 3728 // TODO: We really should be costing the smaller vector. There are 3729 // profitable cases this misses. 3730 if (EltBitSize > 8 && VT.isInteger() && 3731 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) { 3732 unsigned SignBits = DAG.ComputeNumSignBits(Op); 3733 if (EltBitSize - SignBits < 8) { 3734 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8), 3735 DL, Op->ops()); 3736 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8), 3737 Source, DAG, Subtarget); 3738 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL); 3739 return convertFromScalableVector(VT, Res, DAG, Subtarget); 3740 } 3741 } 3742 3743 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) 3744 return Res; 3745 3746 // For constant vectors, use generic constant pool lowering. Otherwise, 3747 // we'd have to materialize constants in GPRs just to move them into the 3748 // vector. 3749 return SDValue(); 3750 } 3751 3752 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 3753 const RISCVSubtarget &Subtarget) { 3754 MVT VT = Op.getSimpleValueType(); 3755 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 3756 3757 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || 3758 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) 3759 return lowerBuildVectorOfConstants(Op, DAG, Subtarget); 3760 3761 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3762 3763 SDLoc DL(Op); 3764 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3765 3766 MVT XLenVT = Subtarget.getXLenVT(); 3767 3768 if (VT.getVectorElementType() == MVT::i1) { 3769 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask 3770 // vector type, we have a legal equivalently-sized i8 type, so we can use 3771 // that. 3772 MVT WideVecVT = VT.changeVectorElementType(MVT::i8); 3773 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT); 3774 3775 SDValue WideVec; 3776 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 3777 // For a splat, perform a scalar truncate before creating the wider 3778 // vector. 3779 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat, 3780 DAG.getConstant(1, DL, Splat.getValueType())); 3781 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat); 3782 } else { 3783 SmallVector<SDValue, 8> Ops(Op->op_values()); 3784 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops); 3785 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT); 3786 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne); 3787 } 3788 3789 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE); 3790 } 3791 3792 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 3793 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget)) 3794 return Gather; 3795 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 3796 : RISCVISD::VMV_V_X_VL; 3797 if (!VT.isFloatingPoint()) 3798 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat); 3799 Splat = 3800 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); 3801 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 3802 } 3803 3804 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) 3805 return Res; 3806 3807 // If we're compiling for an exact VLEN value, we can split our work per 3808 // register in the register group. 3809 const unsigned MinVLen = Subtarget.getRealMinVLen(); 3810 const unsigned MaxVLen = Subtarget.getRealMaxVLen(); 3811 if (MinVLen == MaxVLen && VT.getSizeInBits().getKnownMinValue() > MinVLen) { 3812 MVT ElemVT = VT.getVectorElementType(); 3813 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); 3814 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3815 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg); 3816 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget); 3817 assert(M1VT == getLMUL1VT(M1VT)); 3818 3819 // The following semantically builds up a fixed length concat_vector 3820 // of the component build_vectors. We eagerly lower to scalable and 3821 // insert_subvector here to avoid DAG combining it back to a large 3822 // build_vector. 3823 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end()); 3824 unsigned NumOpElts = M1VT.getVectorMinNumElements(); 3825 SDValue Vec = DAG.getUNDEF(ContainerVT); 3826 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) { 3827 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg); 3828 SDValue SubBV = 3829 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps); 3830 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget); 3831 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts; 3832 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV, 3833 DAG.getVectorIdxConstant(InsertIdx, DL)); 3834 } 3835 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 3836 } 3837 3838 // Cap the cost at a value linear to the number of elements in the vector. 3839 // The default lowering is to use the stack. The vector store + scalar loads 3840 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up 3841 // being (at least) linear in LMUL. As a result, using the vslidedown 3842 // lowering for every element ends up being VL*LMUL.. 3843 // TODO: Should we be directly costing the stack alternative? Doing so might 3844 // give us a more accurate upper bound. 3845 InstructionCost LinearBudget = VT.getVectorNumElements() * 2; 3846 3847 // TODO: unify with TTI getSlideCost. 3848 InstructionCost PerSlideCost = 1; 3849 switch (RISCVTargetLowering::getLMUL(ContainerVT)) { 3850 default: break; 3851 case RISCVII::VLMUL::LMUL_2: 3852 PerSlideCost = 2; 3853 break; 3854 case RISCVII::VLMUL::LMUL_4: 3855 PerSlideCost = 4; 3856 break; 3857 case RISCVII::VLMUL::LMUL_8: 3858 PerSlideCost = 8; 3859 break; 3860 } 3861 3862 // TODO: Should we be using the build instseq then cost + evaluate scheme 3863 // we use for integer constants here? 3864 unsigned UndefCount = 0; 3865 for (const SDValue &V : Op->ops()) { 3866 if (V.isUndef()) { 3867 UndefCount++; 3868 continue; 3869 } 3870 if (UndefCount) { 3871 LinearBudget -= PerSlideCost; 3872 UndefCount = 0; 3873 } 3874 LinearBudget -= PerSlideCost; 3875 } 3876 if (UndefCount) { 3877 LinearBudget -= PerSlideCost; 3878 } 3879 3880 if (LinearBudget < 0) 3881 return SDValue(); 3882 3883 assert((!VT.isFloatingPoint() || 3884 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) && 3885 "Illegal type which will result in reserved encoding"); 3886 3887 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 3888 3889 SDValue Vec; 3890 UndefCount = 0; 3891 for (SDValue V : Op->ops()) { 3892 if (V.isUndef()) { 3893 UndefCount++; 3894 continue; 3895 } 3896 3897 // Start our sequence with a TA splat in the hopes that hardware is able to 3898 // recognize there's no dependency on the prior value of our temporary 3899 // register. 3900 if (!Vec) { 3901 Vec = DAG.getSplatVector(VT, DL, V); 3902 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3903 UndefCount = 0; 3904 continue; 3905 } 3906 3907 if (UndefCount) { 3908 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); 3909 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 3910 Vec, Offset, Mask, VL, Policy); 3911 UndefCount = 0; 3912 } 3913 auto OpCode = 3914 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; 3915 if (!VT.isFloatingPoint()) 3916 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V); 3917 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, 3918 V, Mask, VL); 3919 } 3920 if (UndefCount) { 3921 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); 3922 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 3923 Vec, Offset, Mask, VL, Policy); 3924 } 3925 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 3926 } 3927 3928 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, 3929 SDValue Lo, SDValue Hi, SDValue VL, 3930 SelectionDAG &DAG) { 3931 if (!Passthru) 3932 Passthru = DAG.getUNDEF(VT); 3933 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 3934 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 3935 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 3936 // If Hi constant is all the same sign bit as Lo, lower this as a custom 3937 // node in order to try and match RVV vector/scalar instructions. 3938 if ((LoC >> 31) == HiC) 3939 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); 3940 3941 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo, 3942 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use 3943 // vlmax vsetvli or vsetivli to change the VL. 3944 // FIXME: Support larger constants? 3945 // FIXME: Support non-constant VLs by saturating? 3946 if (LoC == HiC) { 3947 SDValue NewVL; 3948 if (isAllOnesConstant(VL) || 3949 (isa<RegisterSDNode>(VL) && 3950 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0)) 3951 NewVL = DAG.getRegister(RISCV::X0, MVT::i32); 3952 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal())) 3953 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL); 3954 3955 if (NewVL) { 3956 MVT InterVT = 3957 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); 3958 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, 3959 DAG.getUNDEF(InterVT), Lo, 3960 DAG.getRegister(RISCV::X0, MVT::i32)); 3961 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec); 3962 } 3963 } 3964 } 3965 3966 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended. 3967 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo && 3968 isa<ConstantSDNode>(Hi.getOperand(1)) && 3969 Hi.getConstantOperandVal(1) == 31) 3970 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); 3971 3972 // If the hi bits of the splat are undefined, then it's fine to just splat Lo 3973 // even if it might be sign extended. 3974 if (Hi.isUndef()) 3975 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); 3976 3977 // Fall back to a stack store and stride x0 vector load. 3978 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo, 3979 Hi, VL); 3980 } 3981 3982 // Called by type legalization to handle splat of i64 on RV32. 3983 // FIXME: We can optimize this when the type has sign or zero bits in one 3984 // of the halves. 3985 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, 3986 SDValue Scalar, SDValue VL, 3987 SelectionDAG &DAG) { 3988 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!"); 3989 SDValue Lo, Hi; 3990 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32); 3991 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG); 3992 } 3993 3994 // This function lowers a splat of a scalar operand Splat with the vector 3995 // length VL. It ensures the final sequence is type legal, which is useful when 3996 // lowering a splat after type legalization. 3997 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, 3998 MVT VT, const SDLoc &DL, SelectionDAG &DAG, 3999 const RISCVSubtarget &Subtarget) { 4000 bool HasPassthru = Passthru && !Passthru.isUndef(); 4001 if (!HasPassthru && !Passthru) 4002 Passthru = DAG.getUNDEF(VT); 4003 if (VT.isFloatingPoint()) 4004 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); 4005 4006 MVT XLenVT = Subtarget.getXLenVT(); 4007 4008 // Simplest case is that the operand needs to be promoted to XLenVT. 4009 if (Scalar.getValueType().bitsLE(XLenVT)) { 4010 // If the operand is a constant, sign extend to increase our chances 4011 // of being able to use a .vi instruction. ANY_EXTEND would become a 4012 // a zero extend and the simm5 check in isel would fail. 4013 // FIXME: Should we ignore the upper bits in isel instead? 4014 unsigned ExtOpc = 4015 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 4016 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 4017 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); 4018 } 4019 4020 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 && 4021 "Unexpected scalar for splat lowering!"); 4022 4023 if (isOneConstant(VL) && isNullConstant(Scalar)) 4024 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, 4025 DAG.getConstant(0, DL, XLenVT), VL); 4026 4027 // Otherwise use the more complicated splatting algorithm. 4028 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); 4029 } 4030 4031 // This function lowers an insert of a scalar operand Scalar into lane 4032 // 0 of the vector regardless of the value of VL. The contents of the 4033 // remaining lanes of the result vector are unspecified. VL is assumed 4034 // to be non-zero. 4035 static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, 4036 const SDLoc &DL, SelectionDAG &DAG, 4037 const RISCVSubtarget &Subtarget) { 4038 assert(VT.isScalableVector() && "Expect VT is scalable vector type."); 4039 4040 const MVT XLenVT = Subtarget.getXLenVT(); 4041 SDValue Passthru = DAG.getUNDEF(VT); 4042 4043 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 4044 isNullConstant(Scalar.getOperand(1))) { 4045 SDValue ExtractedVal = Scalar.getOperand(0); 4046 // The element types must be the same. 4047 if (ExtractedVal.getValueType().getVectorElementType() == 4048 VT.getVectorElementType()) { 4049 MVT ExtractedVT = ExtractedVal.getSimpleValueType(); 4050 MVT ExtractedContainerVT = ExtractedVT; 4051 if (ExtractedContainerVT.isFixedLengthVector()) { 4052 ExtractedContainerVT = getContainerForFixedLengthVector( 4053 DAG, ExtractedContainerVT, Subtarget); 4054 ExtractedVal = convertToScalableVector(ExtractedContainerVT, 4055 ExtractedVal, DAG, Subtarget); 4056 } 4057 if (ExtractedContainerVT.bitsLE(VT)) 4058 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, 4059 ExtractedVal, DAG.getConstant(0, DL, XLenVT)); 4060 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal, 4061 DAG.getConstant(0, DL, XLenVT)); 4062 } 4063 } 4064 4065 4066 if (VT.isFloatingPoint()) 4067 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, 4068 DAG.getUNDEF(VT), Scalar, VL); 4069 4070 // Avoid the tricky legalization cases by falling back to using the 4071 // splat code which already handles it gracefully. 4072 if (!Scalar.getValueType().bitsLE(XLenVT)) 4073 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar, 4074 DAG.getConstant(1, DL, XLenVT), 4075 VT, DL, DAG, Subtarget); 4076 4077 // If the operand is a constant, sign extend to increase our chances 4078 // of being able to use a .vi instruction. ANY_EXTEND would become a 4079 // a zero extend and the simm5 check in isel would fail. 4080 // FIXME: Should we ignore the upper bits in isel instead? 4081 unsigned ExtOpc = 4082 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 4083 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 4084 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, 4085 DAG.getUNDEF(VT), Scalar, VL); 4086 } 4087 4088 // Is this a shuffle extracts either the even or odd elements of a vector? 4089 // That is, specifically, either (a) or (b) below. 4090 // t34: v8i8 = extract_subvector t11, Constant:i64<0> 4091 // t33: v8i8 = extract_subvector t11, Constant:i64<8> 4092 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 4093 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 4094 // Returns {Src Vector, Even Elements} om success 4095 static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, 4096 SDValue V2, ArrayRef<int> Mask, 4097 const RISCVSubtarget &Subtarget) { 4098 // Need to be able to widen the vector. 4099 if (VT.getScalarSizeInBits() >= Subtarget.getELen()) 4100 return false; 4101 4102 // Both input must be extracts. 4103 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR || 4104 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR) 4105 return false; 4106 4107 // Extracting from the same source. 4108 SDValue Src = V1.getOperand(0); 4109 if (Src != V2.getOperand(0)) 4110 return false; 4111 4112 // Src needs to have twice the number of elements. 4113 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) 4114 return false; 4115 4116 // The extracts must extract the two halves of the source. 4117 if (V1.getConstantOperandVal(1) != 0 || 4118 V2.getConstantOperandVal(1) != Mask.size()) 4119 return false; 4120 4121 // First index must be the first even or odd element from V1. 4122 if (Mask[0] != 0 && Mask[0] != 1) 4123 return false; 4124 4125 // The others must increase by 2 each time. 4126 // TODO: Support undef elements? 4127 for (unsigned i = 1; i != Mask.size(); ++i) 4128 if (Mask[i] != Mask[i - 1] + 2) 4129 return false; 4130 4131 return true; 4132 } 4133 4134 /// Is this shuffle interleaving contiguous elements from one vector into the 4135 /// even elements and contiguous elements from another vector into the odd 4136 /// elements. \p EvenSrc will contain the element that should be in the first 4137 /// even element. \p OddSrc will contain the element that should be in the first 4138 /// odd element. These can be the first element in a source or the element half 4139 /// way through the source. 4140 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc, 4141 int &OddSrc, const RISCVSubtarget &Subtarget) { 4142 // We need to be able to widen elements to the next larger integer type. 4143 if (VT.getScalarSizeInBits() >= Subtarget.getELen()) 4144 return false; 4145 4146 int Size = Mask.size(); 4147 int NumElts = VT.getVectorNumElements(); 4148 assert(Size == (int)NumElts && "Unexpected mask size"); 4149 4150 SmallVector<unsigned, 2> StartIndexes; 4151 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes)) 4152 return false; 4153 4154 EvenSrc = StartIndexes[0]; 4155 OddSrc = StartIndexes[1]; 4156 4157 // One source should be low half of first vector. 4158 if (EvenSrc != 0 && OddSrc != 0) 4159 return false; 4160 4161 // Subvectors will be subtracted from either at the start of the two input 4162 // vectors, or at the start and middle of the first vector if it's an unary 4163 // interleave. 4164 // In both cases, HalfNumElts will be extracted. 4165 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise 4166 // we'll create an illegal extract_subvector. 4167 // FIXME: We could support other values using a slidedown first. 4168 int HalfNumElts = NumElts / 2; 4169 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0); 4170 } 4171 4172 /// Match shuffles that concatenate two vectors, rotate the concatenation, 4173 /// and then extract the original number of elements from the rotated result. 4174 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The 4175 /// returned rotation amount is for a rotate right, where elements move from 4176 /// higher elements to lower elements. \p LoSrc indicates the first source 4177 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector 4178 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be 4179 /// 0 or 1 if a rotation is found. 4180 /// 4181 /// NOTE: We talk about rotate to the right which matches how bit shift and 4182 /// rotate instructions are described where LSBs are on the right, but LLVM IR 4183 /// and the table below write vectors with the lowest elements on the left. 4184 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) { 4185 int Size = Mask.size(); 4186 4187 // We need to detect various ways of spelling a rotation: 4188 // [11, 12, 13, 14, 15, 0, 1, 2] 4189 // [-1, 12, 13, 14, -1, -1, 1, -1] 4190 // [-1, -1, -1, -1, -1, -1, 1, 2] 4191 // [ 3, 4, 5, 6, 7, 8, 9, 10] 4192 // [-1, 4, 5, 6, -1, -1, 9, -1] 4193 // [-1, 4, 5, 6, -1, -1, -1, -1] 4194 int Rotation = 0; 4195 LoSrc = -1; 4196 HiSrc = -1; 4197 for (int i = 0; i != Size; ++i) { 4198 int M = Mask[i]; 4199 if (M < 0) 4200 continue; 4201 4202 // Determine where a rotate vector would have started. 4203 int StartIdx = i - (M % Size); 4204 // The identity rotation isn't interesting, stop. 4205 if (StartIdx == 0) 4206 return -1; 4207 4208 // If we found the tail of a vector the rotation must be the missing 4209 // front. If we found the head of a vector, it must be how much of the 4210 // head. 4211 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx; 4212 4213 if (Rotation == 0) 4214 Rotation = CandidateRotation; 4215 else if (Rotation != CandidateRotation) 4216 // The rotations don't match, so we can't match this mask. 4217 return -1; 4218 4219 // Compute which value this mask is pointing at. 4220 int MaskSrc = M < Size ? 0 : 1; 4221 4222 // Compute which of the two target values this index should be assigned to. 4223 // This reflects whether the high elements are remaining or the low elemnts 4224 // are remaining. 4225 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc; 4226 4227 // Either set up this value if we've not encountered it before, or check 4228 // that it remains consistent. 4229 if (TargetSrc < 0) 4230 TargetSrc = MaskSrc; 4231 else if (TargetSrc != MaskSrc) 4232 // This may be a rotation, but it pulls from the inputs in some 4233 // unsupported interleaving. 4234 return -1; 4235 } 4236 4237 // Check that we successfully analyzed the mask, and normalize the results. 4238 assert(Rotation != 0 && "Failed to locate a viable rotation!"); 4239 assert((LoSrc >= 0 || HiSrc >= 0) && 4240 "Failed to find a rotated input vector!"); 4241 4242 return Rotation; 4243 } 4244 4245 // Lower a deinterleave shuffle to vnsrl. 4246 // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true) 4247 // -> [p, q, r, s] (EvenElts == false) 4248 // VT is the type of the vector to return, <[vscale x ]n x ty> 4249 // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty> 4250 static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, 4251 bool EvenElts, 4252 const RISCVSubtarget &Subtarget, 4253 SelectionDAG &DAG) { 4254 // The result is a vector of type <m x n x ty> 4255 MVT ContainerVT = VT; 4256 // Convert fixed vectors to scalable if needed 4257 if (ContainerVT.isFixedLengthVector()) { 4258 assert(Src.getSimpleValueType().isFixedLengthVector()); 4259 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); 4260 4261 // The source is a vector of type <m x n*2 x ty> 4262 MVT SrcContainerVT = 4263 MVT::getVectorVT(ContainerVT.getVectorElementType(), 4264 ContainerVT.getVectorElementCount() * 2); 4265 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 4266 } 4267 4268 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 4269 4270 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2> 4271 // This also converts FP to int. 4272 unsigned EltBits = ContainerVT.getScalarSizeInBits(); 4273 MVT WideSrcContainerVT = MVT::getVectorVT( 4274 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount()); 4275 Src = DAG.getBitcast(WideSrcContainerVT, Src); 4276 4277 // The integer version of the container type. 4278 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger(); 4279 4280 // If we want even elements, then the shift amount is 0. Otherwise, shift by 4281 // the original element size. 4282 unsigned Shift = EvenElts ? 0 : EltBits; 4283 SDValue SplatShift = DAG.getNode( 4284 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT), 4285 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL); 4286 SDValue Res = 4287 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift, 4288 DAG.getUNDEF(IntContainerVT), TrueMask, VL); 4289 // Cast back to FP if needed. 4290 Res = DAG.getBitcast(ContainerVT, Res); 4291 4292 if (VT.isFixedLengthVector()) 4293 Res = convertFromScalableVector(VT, Res, DAG, Subtarget); 4294 return Res; 4295 } 4296 4297 // Lower the following shuffle to vslidedown. 4298 // a) 4299 // t49: v8i8 = extract_subvector t13, Constant:i64<0> 4300 // t109: v8i8 = extract_subvector t13, Constant:i64<8> 4301 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106 4302 // b) 4303 // t69: v16i16 = extract_subvector t68, Constant:i64<0> 4304 // t23: v8i16 = extract_subvector t69, Constant:i64<0> 4305 // t29: v4i16 = extract_subvector t23, Constant:i64<4> 4306 // t26: v8i16 = extract_subvector t69, Constant:i64<8> 4307 // t30: v4i16 = extract_subvector t26, Constant:i64<0> 4308 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30 4309 static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, 4310 SDValue V1, SDValue V2, 4311 ArrayRef<int> Mask, 4312 const RISCVSubtarget &Subtarget, 4313 SelectionDAG &DAG) { 4314 auto findNonEXTRACT_SUBVECTORParent = 4315 [](SDValue Parent) -> std::pair<SDValue, uint64_t> { 4316 uint64_t Offset = 0; 4317 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR && 4318 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from 4319 // a scalable vector. But we don't want to match the case. 4320 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) { 4321 Offset += Parent.getConstantOperandVal(1); 4322 Parent = Parent.getOperand(0); 4323 } 4324 return std::make_pair(Parent, Offset); 4325 }; 4326 4327 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1); 4328 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2); 4329 4330 // Extracting from the same source. 4331 SDValue Src = V1Src; 4332 if (Src != V2Src) 4333 return SDValue(); 4334 4335 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs. 4336 SmallVector<int, 16> NewMask(Mask); 4337 for (size_t i = 0; i != NewMask.size(); ++i) { 4338 if (NewMask[i] == -1) 4339 continue; 4340 4341 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) { 4342 NewMask[i] = NewMask[i] + V1IndexOffset; 4343 } else { 4344 // Minus NewMask.size() is needed. Otherwise, the b case would be 4345 // <5,6,7,12> instead of <5,6,7,8>. 4346 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset; 4347 } 4348 } 4349 4350 // First index must be known and non-zero. It will be used as the slidedown 4351 // amount. 4352 if (NewMask[0] <= 0) 4353 return SDValue(); 4354 4355 // NewMask is also continuous. 4356 for (unsigned i = 1; i != NewMask.size(); ++i) 4357 if (NewMask[i - 1] + 1 != NewMask[i]) 4358 return SDValue(); 4359 4360 MVT XLenVT = Subtarget.getXLenVT(); 4361 MVT SrcVT = Src.getSimpleValueType(); 4362 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 4363 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 4364 SDValue Slidedown = 4365 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 4366 convertToScalableVector(ContainerVT, Src, DAG, Subtarget), 4367 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL); 4368 return DAG.getNode( 4369 ISD::EXTRACT_SUBVECTOR, DL, VT, 4370 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 4371 DAG.getConstant(0, DL, XLenVT)); 4372 } 4373 4374 // Because vslideup leaves the destination elements at the start intact, we can 4375 // use it to perform shuffles that insert subvectors: 4376 // 4377 // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11> 4378 // -> 4379 // vsetvli zero, 8, e8, mf2, ta, ma 4380 // vslideup.vi v8, v9, 4 4381 // 4382 // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7> 4383 // -> 4384 // vsetvli zero, 5, e8, mf2, tu, ma 4385 // vslideup.v1 v8, v9, 2 4386 static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, 4387 SDValue V1, SDValue V2, 4388 ArrayRef<int> Mask, 4389 const RISCVSubtarget &Subtarget, 4390 SelectionDAG &DAG) { 4391 unsigned NumElts = VT.getVectorNumElements(); 4392 int NumSubElts, Index; 4393 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts, 4394 Index)) 4395 return SDValue(); 4396 4397 bool OpsSwapped = Mask[Index] < (int)NumElts; 4398 SDValue InPlace = OpsSwapped ? V2 : V1; 4399 SDValue ToInsert = OpsSwapped ? V1 : V2; 4400 4401 MVT XLenVT = Subtarget.getXLenVT(); 4402 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 4403 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first; 4404 // We slide up by the index that the subvector is being inserted at, and set 4405 // VL to the index + the number of elements being inserted. 4406 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC; 4407 // If the we're adding a suffix to the in place vector, i.e. inserting right 4408 // up to the very end of it, then we don't actually care about the tail. 4409 if (NumSubElts + Index >= (int)NumElts) 4410 Policy |= RISCVII::TAIL_AGNOSTIC; 4411 4412 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget); 4413 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget); 4414 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT); 4415 4416 SDValue Res; 4417 // If we're inserting into the lowest elements, use a tail undisturbed 4418 // vmv.v.v. 4419 if (Index == 0) 4420 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert, 4421 VL); 4422 else 4423 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert, 4424 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy); 4425 return convertFromScalableVector(VT, Res, DAG, Subtarget); 4426 } 4427 4428 /// Match v(f)slide1up/down idioms. These operations involve sliding 4429 /// N-1 elements to make room for an inserted scalar at one end. 4430 static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, 4431 SDValue V1, SDValue V2, 4432 ArrayRef<int> Mask, 4433 const RISCVSubtarget &Subtarget, 4434 SelectionDAG &DAG) { 4435 bool OpsSwapped = false; 4436 if (!isa<BuildVectorSDNode>(V1)) { 4437 if (!isa<BuildVectorSDNode>(V2)) 4438 return SDValue(); 4439 std::swap(V1, V2); 4440 OpsSwapped = true; 4441 } 4442 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue(); 4443 if (!Splat) 4444 return SDValue(); 4445 4446 // Return true if the mask could describe a slide of Mask.size() - 1 4447 // elements from concat_vector(V1, V2)[Base:] to [Offset:]. 4448 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) { 4449 const unsigned S = (Offset > 0) ? 0 : -Offset; 4450 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0); 4451 for (unsigned i = S; i != E; ++i) 4452 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset) 4453 return false; 4454 return true; 4455 }; 4456 4457 const unsigned NumElts = VT.getVectorNumElements(); 4458 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1); 4459 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1)) 4460 return SDValue(); 4461 4462 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0]; 4463 // Inserted lane must come from splat, undef scalar is legal but not profitable. 4464 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped) 4465 return SDValue(); 4466 4467 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 4468 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 4469 auto OpCode = IsVSlidedown ? 4470 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) : 4471 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL); 4472 if (!VT.isFloatingPoint()) 4473 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat); 4474 auto Vec = DAG.getNode(OpCode, DL, ContainerVT, 4475 DAG.getUNDEF(ContainerVT), 4476 convertToScalableVector(ContainerVT, V2, DAG, Subtarget), 4477 Splat, TrueMask, VL); 4478 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 4479 } 4480 4481 // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx 4482 // to create an interleaved vector of <[vscale x] n*2 x ty>. 4483 // This requires that the size of ty is less than the subtarget's maximum ELEN. 4484 static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, 4485 const SDLoc &DL, SelectionDAG &DAG, 4486 const RISCVSubtarget &Subtarget) { 4487 MVT VecVT = EvenV.getSimpleValueType(); 4488 MVT VecContainerVT = VecVT; // <vscale x n x ty> 4489 // Convert fixed vectors to scalable if needed 4490 if (VecContainerVT.isFixedLengthVector()) { 4491 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); 4492 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget); 4493 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget); 4494 } 4495 4496 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen()); 4497 4498 // We're working with a vector of the same size as the resulting 4499 // interleaved vector, but with half the number of elements and 4500 // twice the SEW (Hence the restriction on not using the maximum 4501 // ELEN) 4502 MVT WideVT = 4503 MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2), 4504 VecVT.getVectorElementCount()); 4505 MVT WideContainerVT = WideVT; // <vscale x n x ty*2> 4506 if (WideContainerVT.isFixedLengthVector()) 4507 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget); 4508 4509 // Bitcast the input vectors to integers in case they are FP 4510 VecContainerVT = VecContainerVT.changeTypeToInteger(); 4511 EvenV = DAG.getBitcast(VecContainerVT, EvenV); 4512 OddV = DAG.getBitcast(VecContainerVT, OddV); 4513 4514 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget); 4515 SDValue Passthru = DAG.getUNDEF(WideContainerVT); 4516 4517 SDValue Interleaved; 4518 if (Subtarget.hasStdExtZvbb()) { 4519 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV. 4520 SDValue OffsetVec = 4521 DAG.getSplatVector(VecContainerVT, DL, 4522 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, 4523 Subtarget.getXLenVT())); 4524 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV, 4525 OffsetVec, Passthru, Mask, VL); 4526 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT, 4527 Interleaved, EvenV, Passthru, Mask, VL); 4528 } else { 4529 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with 4530 // vwaddu.vv 4531 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV, 4532 OddV, Passthru, Mask, VL); 4533 4534 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1) 4535 SDValue AllOnesVec = DAG.getSplatVector( 4536 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT())); 4537 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, 4538 OddV, AllOnesVec, Passthru, Mask, VL); 4539 4540 // Add the two together so we get 4541 // (OddV * 0xff...ff) + (OddV + EvenV) 4542 // = (OddV * 0x100...00) + EvenV 4543 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV 4544 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx 4545 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, 4546 Interleaved, OddsMul, Passthru, Mask, VL); 4547 } 4548 4549 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty> 4550 MVT ResultContainerVT = MVT::getVectorVT( 4551 VecVT.getVectorElementType(), // Make sure to use original type 4552 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2)); 4553 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved); 4554 4555 // Convert back to a fixed vector if needed 4556 MVT ResultVT = 4557 MVT::getVectorVT(VecVT.getVectorElementType(), 4558 VecVT.getVectorElementCount().multiplyCoefficientBy(2)); 4559 if (ResultVT.isFixedLengthVector()) 4560 Interleaved = 4561 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget); 4562 4563 return Interleaved; 4564 } 4565 4566 // If we have a vector of bits that we want to reverse, we can use a vbrev on a 4567 // larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse. 4568 static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, 4569 SelectionDAG &DAG, 4570 const RISCVSubtarget &Subtarget) { 4571 SDLoc DL(SVN); 4572 MVT VT = SVN->getSimpleValueType(0); 4573 SDValue V = SVN->getOperand(0); 4574 unsigned NumElts = VT.getVectorNumElements(); 4575 4576 assert(VT.getVectorElementType() == MVT::i1); 4577 4578 if (!ShuffleVectorInst::isReverseMask(SVN->getMask(), 4579 SVN->getMask().size()) || 4580 !SVN->getOperand(1).isUndef()) 4581 return SDValue(); 4582 4583 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts)); 4584 EVT ViaVT = EVT::getVectorVT( 4585 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1); 4586 EVT ViaBitVT = 4587 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits()); 4588 4589 // If we don't have zvbb or the larger element type > ELEN, the operation will 4590 // be illegal. 4591 if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE, 4592 ViaVT) || 4593 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT)) 4594 return SDValue(); 4595 4596 // If the bit vector doesn't fit exactly into the larger element type, we need 4597 // to insert it into the larger vector and then shift up the reversed bits 4598 // afterwards to get rid of the gap introduced. 4599 if (ViaEltSize > NumElts) 4600 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT), 4601 V, DAG.getVectorIdxConstant(0, DL)); 4602 4603 SDValue Res = 4604 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V)); 4605 4606 // Shift up the reversed bits if the vector didn't exactly fit into the larger 4607 // element type. 4608 if (ViaEltSize > NumElts) 4609 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res, 4610 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT)); 4611 4612 Res = DAG.getBitcast(ViaBitVT, Res); 4613 4614 if (ViaEltSize > NumElts) 4615 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, 4616 DAG.getVectorIdxConstant(0, DL)); 4617 return Res; 4618 } 4619 4620 // Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can 4621 // reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this 4622 // as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor. 4623 static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, 4624 SelectionDAG &DAG, 4625 const RISCVSubtarget &Subtarget) { 4626 SDLoc DL(SVN); 4627 4628 EVT VT = SVN->getValueType(0); 4629 unsigned NumElts = VT.getVectorNumElements(); 4630 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 4631 unsigned NumSubElts, RotateAmt; 4632 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2, 4633 NumElts, NumSubElts, RotateAmt)) 4634 return SDValue(); 4635 MVT RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts), 4636 NumElts / NumSubElts); 4637 4638 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x. 4639 if (!Subtarget.getTargetLowering()->isTypeLegal(RotateVT)) 4640 return SDValue(); 4641 4642 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0)); 4643 4644 SDValue Rotate; 4645 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap, 4646 // so canonicalize to vrev8. 4647 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8) 4648 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op); 4649 else 4650 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op, 4651 DAG.getConstant(RotateAmt, DL, RotateVT)); 4652 4653 return DAG.getBitcast(VT, Rotate); 4654 } 4655 4656 // If compiling with an exactly known VLEN, see if we can split a 4657 // shuffle on m2 or larger into a small number of m1 sized shuffles 4658 // which write each destination registers exactly once. 4659 static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, 4660 SelectionDAG &DAG, 4661 const RISCVSubtarget &Subtarget) { 4662 SDLoc DL(SVN); 4663 MVT VT = SVN->getSimpleValueType(0); 4664 SDValue V1 = SVN->getOperand(0); 4665 SDValue V2 = SVN->getOperand(1); 4666 ArrayRef<int> Mask = SVN->getMask(); 4667 unsigned NumElts = VT.getVectorNumElements(); 4668 4669 // If we don't know exact data layout, not much we can do. If this 4670 // is already m1 or smaller, no point in splitting further. 4671 const unsigned MinVLen = Subtarget.getRealMinVLen(); 4672 const unsigned MaxVLen = Subtarget.getRealMaxVLen(); 4673 if (MinVLen != MaxVLen || VT.getSizeInBits().getFixedValue() <= MinVLen) 4674 return SDValue(); 4675 4676 MVT ElemVT = VT.getVectorElementType(); 4677 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); 4678 unsigned VRegsPerSrc = NumElts / ElemsPerVReg; 4679 4680 SmallVector<std::pair<int, SmallVector<int>>> 4681 OutMasks(VRegsPerSrc, {-1, {}}); 4682 4683 // Check if our mask can be done as a 1-to-1 mapping from source 4684 // to destination registers in the group without needing to 4685 // write each destination more than once. 4686 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) { 4687 int DstVecIdx = DstIdx / ElemsPerVReg; 4688 int DstSubIdx = DstIdx % ElemsPerVReg; 4689 int SrcIdx = Mask[DstIdx]; 4690 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts) 4691 continue; 4692 int SrcVecIdx = SrcIdx / ElemsPerVReg; 4693 int SrcSubIdx = SrcIdx % ElemsPerVReg; 4694 if (OutMasks[DstVecIdx].first == -1) 4695 OutMasks[DstVecIdx].first = SrcVecIdx; 4696 if (OutMasks[DstVecIdx].first != SrcVecIdx) 4697 // Note: This case could easily be handled by keeping track of a chain 4698 // of source values and generating two element shuffles below. This is 4699 // less an implementation question, and more a profitability one. 4700 return SDValue(); 4701 4702 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1); 4703 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx; 4704 } 4705 4706 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 4707 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg); 4708 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget); 4709 assert(M1VT == getLMUL1VT(M1VT)); 4710 unsigned NumOpElts = M1VT.getVectorMinNumElements(); 4711 SDValue Vec = DAG.getUNDEF(ContainerVT); 4712 // The following semantically builds up a fixed length concat_vector 4713 // of the component shuffle_vectors. We eagerly lower to scalable here 4714 // to avoid DAG combining it back to a large shuffle_vector again. 4715 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 4716 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); 4717 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) { 4718 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx]; 4719 if (SrcVecIdx == -1) 4720 continue; 4721 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts; 4722 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1; 4723 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec, 4724 DAG.getVectorIdxConstant(ExtractIdx, DL)); 4725 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget); 4726 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask); 4727 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget); 4728 unsigned InsertIdx = DstVecIdx * NumOpElts; 4729 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec, 4730 DAG.getVectorIdxConstant(InsertIdx, DL)); 4731 } 4732 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 4733 } 4734 4735 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 4736 const RISCVSubtarget &Subtarget) { 4737 SDValue V1 = Op.getOperand(0); 4738 SDValue V2 = Op.getOperand(1); 4739 SDLoc DL(Op); 4740 MVT XLenVT = Subtarget.getXLenVT(); 4741 MVT VT = Op.getSimpleValueType(); 4742 unsigned NumElts = VT.getVectorNumElements(); 4743 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 4744 4745 if (VT.getVectorElementType() == MVT::i1) { 4746 // Lower to a vror.vi of a larger element type if possible before we promote 4747 // i1s to i8s. 4748 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) 4749 return V; 4750 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget)) 4751 return V; 4752 4753 // Promote i1 shuffle to i8 shuffle. 4754 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()); 4755 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1); 4756 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT) 4757 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2); 4758 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask()); 4759 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT), 4760 ISD::SETNE); 4761 } 4762 4763 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 4764 4765 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 4766 4767 if (SVN->isSplat()) { 4768 const int Lane = SVN->getSplatIndex(); 4769 if (Lane >= 0) { 4770 MVT SVT = VT.getVectorElementType(); 4771 4772 // Turn splatted vector load into a strided load with an X0 stride. 4773 SDValue V = V1; 4774 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector 4775 // with undef. 4776 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts? 4777 int Offset = Lane; 4778 if (V.getOpcode() == ISD::CONCAT_VECTORS) { 4779 int OpElements = 4780 V.getOperand(0).getSimpleValueType().getVectorNumElements(); 4781 V = V.getOperand(Offset / OpElements); 4782 Offset %= OpElements; 4783 } 4784 4785 // We need to ensure the load isn't atomic or volatile. 4786 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) { 4787 auto *Ld = cast<LoadSDNode>(V); 4788 Offset *= SVT.getStoreSize(); 4789 SDValue NewAddr = DAG.getMemBasePlusOffset( 4790 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL); 4791 4792 // If this is SEW=64 on RV32, use a strided load with a stride of x0. 4793 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) { 4794 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 4795 SDValue IntID = 4796 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT); 4797 SDValue Ops[] = {Ld->getChain(), 4798 IntID, 4799 DAG.getUNDEF(ContainerVT), 4800 NewAddr, 4801 DAG.getRegister(RISCV::X0, XLenVT), 4802 VL}; 4803 SDValue NewLoad = DAG.getMemIntrinsicNode( 4804 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT, 4805 DAG.getMachineFunction().getMachineMemOperand( 4806 Ld->getMemOperand(), Offset, SVT.getStoreSize())); 4807 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad); 4808 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 4809 } 4810 4811 // Otherwise use a scalar load and splat. This will give the best 4812 // opportunity to fold a splat into the operation. ISel can turn it into 4813 // the x0 strided load if we aren't able to fold away the select. 4814 if (SVT.isFloatingPoint()) 4815 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, 4816 Ld->getPointerInfo().getWithOffset(Offset), 4817 Ld->getOriginalAlign(), 4818 Ld->getMemOperand()->getFlags()); 4819 else 4820 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr, 4821 Ld->getPointerInfo().getWithOffset(Offset), SVT, 4822 Ld->getOriginalAlign(), 4823 Ld->getMemOperand()->getFlags()); 4824 DAG.makeEquivalentMemoryOrdering(Ld, V); 4825 4826 unsigned Opc = 4827 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; 4828 SDValue Splat = 4829 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL); 4830 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 4831 } 4832 4833 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 4834 assert(Lane < (int)NumElts && "Unexpected lane!"); 4835 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, 4836 V1, DAG.getConstant(Lane, DL, XLenVT), 4837 DAG.getUNDEF(ContainerVT), TrueMask, VL); 4838 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 4839 } 4840 } 4841 4842 // For exact VLEN m2 or greater, try to split to m1 operations if we 4843 // can split cleanly. 4844 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget)) 4845 return V; 4846 4847 ArrayRef<int> Mask = SVN->getMask(); 4848 4849 if (SDValue V = 4850 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG)) 4851 return V; 4852 4853 if (SDValue V = 4854 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG)) 4855 return V; 4856 4857 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if 4858 // available. 4859 if (Subtarget.hasStdExtZvkb()) 4860 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) 4861 return V; 4862 4863 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may 4864 // be undef which can be handled with a single SLIDEDOWN/UP. 4865 int LoSrc, HiSrc; 4866 int Rotation = isElementRotate(LoSrc, HiSrc, Mask); 4867 if (Rotation > 0) { 4868 SDValue LoV, HiV; 4869 if (LoSrc >= 0) { 4870 LoV = LoSrc == 0 ? V1 : V2; 4871 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget); 4872 } 4873 if (HiSrc >= 0) { 4874 HiV = HiSrc == 0 ? V1 : V2; 4875 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget); 4876 } 4877 4878 // We found a rotation. We need to slide HiV down by Rotation. Then we need 4879 // to slide LoV up by (NumElts - Rotation). 4880 unsigned InvRotate = NumElts - Rotation; 4881 4882 SDValue Res = DAG.getUNDEF(ContainerVT); 4883 if (HiV) { 4884 // Even though we could use a smaller VL, don't to avoid a vsetivli 4885 // toggle. 4886 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV, 4887 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL); 4888 } 4889 if (LoV) 4890 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV, 4891 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL, 4892 RISCVII::TAIL_AGNOSTIC); 4893 4894 return convertFromScalableVector(VT, Res, DAG, Subtarget); 4895 } 4896 4897 // If this is a deinterleave and we can widen the vector, then we can use 4898 // vnsrl to deinterleave. 4899 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) { 4900 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0, 4901 Subtarget, DAG); 4902 } 4903 4904 if (SDValue V = 4905 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) 4906 return V; 4907 4908 // Detect an interleave shuffle and lower to 4909 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1)) 4910 int EvenSrc, OddSrc; 4911 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) { 4912 // Extract the halves of the vectors. 4913 MVT HalfVT = VT.getHalfNumVectorElementsVT(); 4914 4915 int Size = Mask.size(); 4916 SDValue EvenV, OddV; 4917 assert(EvenSrc >= 0 && "Undef source?"); 4918 EvenV = (EvenSrc / Size) == 0 ? V1 : V2; 4919 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV, 4920 DAG.getConstant(EvenSrc % Size, DL, XLenVT)); 4921 4922 assert(OddSrc >= 0 && "Undef source?"); 4923 OddV = (OddSrc / Size) == 0 ? V1 : V2; 4924 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV, 4925 DAG.getConstant(OddSrc % Size, DL, XLenVT)); 4926 4927 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); 4928 } 4929 4930 // Detect shuffles which can be re-expressed as vector selects; these are 4931 // shuffles in which each element in the destination is taken from an element 4932 // at the corresponding index in either source vectors. 4933 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) { 4934 int MaskIndex = MaskIdx.value(); 4935 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts; 4936 }); 4937 4938 assert(!V1.isUndef() && "Unexpected shuffle canonicalization"); 4939 4940 // By default we preserve the original operand order, and use a mask to 4941 // select LHS as true and RHS as false. However, since RVV vector selects may 4942 // feature splats but only on the LHS, we may choose to invert our mask and 4943 // instead select between RHS and LHS. 4944 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1); 4945 4946 if (IsSelect) { 4947 // Now construct the mask that will be used by the vselect operation. 4948 SmallVector<SDValue> MaskVals; 4949 for (int MaskIndex : Mask) { 4950 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps; 4951 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); 4952 } 4953 4954 if (SwapOps) 4955 std::swap(V1, V2); 4956 4957 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); 4958 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); 4959 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); 4960 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2); 4961 } 4962 4963 // We might be able to express the shuffle as a bitrotate. But even if we 4964 // don't have Zvkb and have to expand, the expanded sequence of approx. 2 4965 // shifts and a vor will have a higher throughput than a vrgather. 4966 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) 4967 return V; 4968 4969 if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) { 4970 // On such a large vector we're unable to use i8 as the index type. 4971 // FIXME: We could promote the index to i16 and use vrgatherei16, but that 4972 // may involve vector splitting if we're already at LMUL=8, or our 4973 // user-supplied maximum fixed-length LMUL. 4974 return SDValue(); 4975 } 4976 4977 // As a backup, shuffles can be lowered via a vrgather instruction, possibly 4978 // merged with a second vrgather. 4979 SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS; 4980 4981 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle 4982 // half. 4983 DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts; 4984 4985 SmallVector<SDValue> MaskVals; 4986 4987 // Now construct the mask that will be used by the blended vrgather operation. 4988 // Cconstruct the appropriate indices into each vector. 4989 for (int MaskIndex : Mask) { 4990 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps; 4991 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); 4992 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; 4993 GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0 4994 ? DAG.getConstant(MaskIndex, DL, XLenVT) 4995 : DAG.getUNDEF(XLenVT)); 4996 GatherIndicesRHS.push_back( 4997 IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT) 4998 : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT)); 4999 if (IsLHSOrUndefIndex && MaskIndex >= 0) 5000 ++LHSIndexCounts[MaskIndex]; 5001 if (!IsLHSOrUndefIndex) 5002 ++RHSIndexCounts[MaskIndex - NumElts]; 5003 } 5004 5005 if (SwapOps) { 5006 std::swap(V1, V2); 5007 std::swap(GatherIndicesLHS, GatherIndicesRHS); 5008 } 5009 5010 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); 5011 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); 5012 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); 5013 5014 unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL; 5015 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL; 5016 MVT IndexVT = VT.changeTypeToInteger(); 5017 // Since we can't introduce illegal index types at this stage, use i16 and 5018 // vrgatherei16 if the corresponding index type for plain vrgather is greater 5019 // than XLenVT. 5020 if (IndexVT.getScalarType().bitsGT(XLenVT)) { 5021 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; 5022 IndexVT = IndexVT.changeVectorElementType(MVT::i16); 5023 } 5024 5025 // If the mask allows, we can do all the index computation in 16 bits. This 5026 // requires less work and less register pressure at high LMUL, and creates 5027 // smaller constants which may be cheaper to materialize. 5028 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) && 5029 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) { 5030 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; 5031 IndexVT = IndexVT.changeVectorElementType(MVT::i16); 5032 } 5033 5034 MVT IndexContainerVT = 5035 ContainerVT.changeVectorElementType(IndexVT.getScalarType()); 5036 5037 SDValue Gather; 5038 // TODO: This doesn't trigger for i64 vectors on RV32, since there we 5039 // encounter a bitcasted BUILD_VECTOR with low/high i32 values. 5040 if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) { 5041 Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG, 5042 Subtarget); 5043 } else { 5044 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 5045 // If only one index is used, we can use a "splat" vrgather. 5046 // TODO: We can splat the most-common index and fix-up any stragglers, if 5047 // that's beneficial. 5048 if (LHSIndexCounts.size() == 1) { 5049 int SplatIndex = LHSIndexCounts.begin()->getFirst(); 5050 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1, 5051 DAG.getConstant(SplatIndex, DL, XLenVT), 5052 DAG.getUNDEF(ContainerVT), TrueMask, VL); 5053 } else { 5054 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); 5055 LHSIndices = 5056 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget); 5057 5058 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, 5059 DAG.getUNDEF(ContainerVT), TrueMask, VL); 5060 } 5061 } 5062 5063 // If a second vector operand is used by this shuffle, blend it in with an 5064 // additional vrgather. 5065 if (!V2.isUndef()) { 5066 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); 5067 5068 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 5069 SelectMask = 5070 convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); 5071 5072 // If only one index is used, we can use a "splat" vrgather. 5073 // TODO: We can splat the most-common index and fix-up any stragglers, if 5074 // that's beneficial. 5075 if (RHSIndexCounts.size() == 1) { 5076 int SplatIndex = RHSIndexCounts.begin()->getFirst(); 5077 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, 5078 DAG.getConstant(SplatIndex, DL, XLenVT), Gather, 5079 SelectMask, VL); 5080 } else { 5081 SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); 5082 RHSIndices = 5083 convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); 5084 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather, 5085 SelectMask, VL); 5086 } 5087 } 5088 5089 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 5090 } 5091 5092 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { 5093 // Support splats for any type. These should type legalize well. 5094 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) 5095 return true; 5096 5097 // Only support legal VTs for other shuffles for now. 5098 if (!isTypeLegal(VT)) 5099 return false; 5100 5101 MVT SVT = VT.getSimpleVT(); 5102 5103 // Not for i1 vectors. 5104 if (SVT.getScalarType() == MVT::i1) 5105 return false; 5106 5107 int Dummy1, Dummy2; 5108 return (isElementRotate(Dummy1, Dummy2, M) > 0) || 5109 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget); 5110 } 5111 5112 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting 5113 // the exponent. 5114 SDValue 5115 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, 5116 SelectionDAG &DAG) const { 5117 MVT VT = Op.getSimpleValueType(); 5118 unsigned EltSize = VT.getScalarSizeInBits(); 5119 SDValue Src = Op.getOperand(0); 5120 SDLoc DL(Op); 5121 MVT ContainerVT = VT; 5122 5123 SDValue Mask, VL; 5124 if (Op->isVPOpcode()) { 5125 Mask = Op.getOperand(1); 5126 if (VT.isFixedLengthVector()) 5127 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 5128 Subtarget); 5129 VL = Op.getOperand(2); 5130 } 5131 5132 // We choose FP type that can represent the value if possible. Otherwise, we 5133 // use rounding to zero conversion for correct exponent of the result. 5134 // TODO: Use f16 for i8 when possible? 5135 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32; 5136 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()))) 5137 FloatEltVT = MVT::f32; 5138 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); 5139 5140 // Legal types should have been checked in the RISCVTargetLowering 5141 // constructor. 5142 // TODO: Splitting may make sense in some cases. 5143 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) && 5144 "Expected legal float type!"); 5145 5146 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X. 5147 // The trailing zero count is equal to log2 of this single bit value. 5148 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { 5149 SDValue Neg = DAG.getNegative(Src, DL, VT); 5150 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg); 5151 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) { 5152 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT), 5153 Src, Mask, VL); 5154 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL); 5155 } 5156 5157 // We have a legal FP type, convert to it. 5158 SDValue FloatVal; 5159 if (FloatVT.bitsGT(VT)) { 5160 if (Op->isVPOpcode()) 5161 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL); 5162 else 5163 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); 5164 } else { 5165 // Use RTZ to avoid rounding influencing exponent of FloatVal. 5166 if (VT.isFixedLengthVector()) { 5167 ContainerVT = getContainerForFixedLengthVector(VT); 5168 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 5169 } 5170 if (!Op->isVPOpcode()) 5171 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 5172 SDValue RTZRM = 5173 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()); 5174 MVT ContainerFloatVT = 5175 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount()); 5176 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT, 5177 Src, Mask, RTZRM, VL); 5178 if (VT.isFixedLengthVector()) 5179 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget); 5180 } 5181 // Bitcast to integer and shift the exponent to the LSB. 5182 EVT IntVT = FloatVT.changeVectorElementTypeToInteger(); 5183 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal); 5184 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23; 5185 5186 SDValue Exp; 5187 // Restore back to original type. Truncation after SRL is to generate vnsrl. 5188 if (Op->isVPOpcode()) { 5189 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast, 5190 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL); 5191 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL); 5192 } else { 5193 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast, 5194 DAG.getConstant(ShiftAmt, DL, IntVT)); 5195 if (IntVT.bitsLT(VT)) 5196 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp); 5197 else if (IntVT.bitsGT(VT)) 5198 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp); 5199 } 5200 5201 // The exponent contains log2 of the value in biased form. 5202 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127; 5203 // For trailing zeros, we just need to subtract the bias. 5204 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) 5205 return DAG.getNode(ISD::SUB, DL, VT, Exp, 5206 DAG.getConstant(ExponentBias, DL, VT)); 5207 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) 5208 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp, 5209 DAG.getConstant(ExponentBias, DL, VT), Mask, VL); 5210 5211 // For leading zeros, we need to remove the bias and convert from log2 to 5212 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). 5213 unsigned Adjust = ExponentBias + (EltSize - 1); 5214 SDValue Res; 5215 if (Op->isVPOpcode()) 5216 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp, 5217 Mask, VL); 5218 else 5219 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp); 5220 5221 // The above result with zero input equals to Adjust which is greater than 5222 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ. 5223 if (Op.getOpcode() == ISD::CTLZ) 5224 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT)); 5225 else if (Op.getOpcode() == ISD::VP_CTLZ) 5226 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res, 5227 DAG.getConstant(EltSize, DL, VT), Mask, VL); 5228 return Res; 5229 } 5230 5231 // While RVV has alignment restrictions, we should always be able to load as a 5232 // legal equivalently-sized byte-typed vector instead. This method is 5233 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If 5234 // the load is already correctly-aligned, it returns SDValue(). 5235 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op, 5236 SelectionDAG &DAG) const { 5237 auto *Load = cast<LoadSDNode>(Op); 5238 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load"); 5239 5240 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 5241 Load->getMemoryVT(), 5242 *Load->getMemOperand())) 5243 return SDValue(); 5244 5245 SDLoc DL(Op); 5246 MVT VT = Op.getSimpleValueType(); 5247 unsigned EltSizeBits = VT.getScalarSizeInBits(); 5248 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && 5249 "Unexpected unaligned RVV load type"); 5250 MVT NewVT = 5251 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); 5252 assert(NewVT.isValid() && 5253 "Expecting equally-sized RVV vector types to be legal"); 5254 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(), 5255 Load->getPointerInfo(), Load->getOriginalAlign(), 5256 Load->getMemOperand()->getFlags()); 5257 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL); 5258 } 5259 5260 // While RVV has alignment restrictions, we should always be able to store as a 5261 // legal equivalently-sized byte-typed vector instead. This method is 5262 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It 5263 // returns SDValue() if the store is already correctly aligned. 5264 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, 5265 SelectionDAG &DAG) const { 5266 auto *Store = cast<StoreSDNode>(Op); 5267 assert(Store && Store->getValue().getValueType().isVector() && 5268 "Expected vector store"); 5269 5270 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 5271 Store->getMemoryVT(), 5272 *Store->getMemOperand())) 5273 return SDValue(); 5274 5275 SDLoc DL(Op); 5276 SDValue StoredVal = Store->getValue(); 5277 MVT VT = StoredVal.getSimpleValueType(); 5278 unsigned EltSizeBits = VT.getScalarSizeInBits(); 5279 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && 5280 "Unexpected unaligned RVV store type"); 5281 MVT NewVT = 5282 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); 5283 assert(NewVT.isValid() && 5284 "Expecting equally-sized RVV vector types to be legal"); 5285 StoredVal = DAG.getBitcast(NewVT, StoredVal); 5286 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(), 5287 Store->getPointerInfo(), Store->getOriginalAlign(), 5288 Store->getMemOperand()->getFlags()); 5289 } 5290 5291 static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, 5292 const RISCVSubtarget &Subtarget) { 5293 assert(Op.getValueType() == MVT::i64 && "Unexpected VT"); 5294 5295 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue(); 5296 5297 // All simm32 constants should be handled by isel. 5298 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making 5299 // this check redundant, but small immediates are common so this check 5300 // should have better compile time. 5301 if (isInt<32>(Imm)) 5302 return Op; 5303 5304 // We only need to cost the immediate, if constant pool lowering is enabled. 5305 if (!Subtarget.useConstantPoolForLargeInts()) 5306 return Op; 5307 5308 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget); 5309 if (Seq.size() <= Subtarget.getMaxBuildIntsCost()) 5310 return Op; 5311 5312 // Optimizations below are disabled for opt size. If we're optimizing for 5313 // size, use a constant pool. 5314 if (DAG.shouldOptForSize()) 5315 return SDValue(); 5316 5317 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do 5318 // that if it will avoid a constant pool. 5319 // It will require an extra temporary register though. 5320 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where 5321 // low and high 32 bits are the same and bit 31 and 63 are set. 5322 unsigned ShiftAmt, AddOpc; 5323 RISCVMatInt::InstSeq SeqLo = 5324 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc); 5325 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost()) 5326 return Op; 5327 5328 return SDValue(); 5329 } 5330 5331 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 5332 const RISCVSubtarget &Subtarget) { 5333 SDLoc dl(Op); 5334 AtomicOrdering FenceOrdering = 5335 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); 5336 SyncScope::ID FenceSSID = 5337 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); 5338 5339 if (Subtarget.hasStdExtZtso()) { 5340 // The only fence that needs an instruction is a sequentially-consistent 5341 // cross-thread fence. 5342 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && 5343 FenceSSID == SyncScope::System) 5344 return Op; 5345 5346 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 5347 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); 5348 } 5349 5350 // singlethread fences only synchronize with signal handlers on the same 5351 // thread and thus only need to preserve instruction order, not actually 5352 // enforce memory ordering. 5353 if (FenceSSID == SyncScope::SingleThread) 5354 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 5355 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); 5356 5357 return Op; 5358 } 5359 5360 SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, 5361 SelectionDAG &DAG) const { 5362 SDLoc DL(Op); 5363 MVT VT = Op.getSimpleValueType(); 5364 MVT XLenVT = Subtarget.getXLenVT(); 5365 unsigned Check = Op.getConstantOperandVal(1); 5366 unsigned TDCMask = 0; 5367 if (Check & fcSNan) 5368 TDCMask |= RISCV::FPMASK_Signaling_NaN; 5369 if (Check & fcQNan) 5370 TDCMask |= RISCV::FPMASK_Quiet_NaN; 5371 if (Check & fcPosInf) 5372 TDCMask |= RISCV::FPMASK_Positive_Infinity; 5373 if (Check & fcNegInf) 5374 TDCMask |= RISCV::FPMASK_Negative_Infinity; 5375 if (Check & fcPosNormal) 5376 TDCMask |= RISCV::FPMASK_Positive_Normal; 5377 if (Check & fcNegNormal) 5378 TDCMask |= RISCV::FPMASK_Negative_Normal; 5379 if (Check & fcPosSubnormal) 5380 TDCMask |= RISCV::FPMASK_Positive_Subnormal; 5381 if (Check & fcNegSubnormal) 5382 TDCMask |= RISCV::FPMASK_Negative_Subnormal; 5383 if (Check & fcPosZero) 5384 TDCMask |= RISCV::FPMASK_Positive_Zero; 5385 if (Check & fcNegZero) 5386 TDCMask |= RISCV::FPMASK_Negative_Zero; 5387 5388 bool IsOneBitMask = isPowerOf2_32(TDCMask); 5389 5390 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT); 5391 5392 if (VT.isVector()) { 5393 SDValue Op0 = Op.getOperand(0); 5394 MVT VT0 = Op.getOperand(0).getSimpleValueType(); 5395 5396 if (VT.isScalableVector()) { 5397 MVT DstVT = VT0.changeVectorElementTypeToInteger(); 5398 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget); 5399 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { 5400 Mask = Op.getOperand(2); 5401 VL = Op.getOperand(3); 5402 } 5403 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask, 5404 VL, Op->getFlags()); 5405 if (IsOneBitMask) 5406 return DAG.getSetCC(DL, VT, FPCLASS, 5407 DAG.getConstant(TDCMask, DL, DstVT), 5408 ISD::CondCode::SETEQ); 5409 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS, 5410 DAG.getConstant(TDCMask, DL, DstVT)); 5411 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT), 5412 ISD::SETNE); 5413 } 5414 5415 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0); 5416 MVT ContainerVT = getContainerForFixedLengthVector(VT); 5417 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger(); 5418 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget); 5419 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { 5420 Mask = Op.getOperand(2); 5421 MVT MaskContainerVT = 5422 getContainerForFixedLengthVector(Mask.getSimpleValueType()); 5423 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); 5424 VL = Op.getOperand(3); 5425 } 5426 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget); 5427 5428 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0, 5429 Mask, VL, Op->getFlags()); 5430 5431 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT, 5432 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL); 5433 if (IsOneBitMask) { 5434 SDValue VMSEQ = 5435 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, 5436 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ), 5437 DAG.getUNDEF(ContainerVT), Mask, VL}); 5438 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget); 5439 } 5440 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS, 5441 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL); 5442 5443 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 5444 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT, 5445 DAG.getUNDEF(ContainerDstVT), SplatZero, VL); 5446 5447 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, 5448 {AND, SplatZero, DAG.getCondCode(ISD::SETNE), 5449 DAG.getUNDEF(ContainerVT), Mask, VL}); 5450 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget); 5451 } 5452 5453 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0)); 5454 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV); 5455 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT), 5456 ISD::CondCode::SETNE); 5457 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res); 5458 } 5459 5460 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these 5461 // operations propagate nans. 5462 static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, 5463 const RISCVSubtarget &Subtarget) { 5464 SDLoc DL(Op); 5465 MVT VT = Op.getSimpleValueType(); 5466 5467 SDValue X = Op.getOperand(0); 5468 SDValue Y = Op.getOperand(1); 5469 5470 if (!VT.isVector()) { 5471 MVT XLenVT = Subtarget.getXLenVT(); 5472 5473 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This 5474 // ensures that when one input is a nan, the other will also be a nan 5475 // allowing the nan to propagate. If both inputs are nan, this will swap the 5476 // inputs which is harmless. 5477 5478 SDValue NewY = Y; 5479 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) { 5480 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ); 5481 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X); 5482 } 5483 5484 SDValue NewX = X; 5485 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) { 5486 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ); 5487 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y); 5488 } 5489 5490 unsigned Opc = 5491 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN; 5492 return DAG.getNode(Opc, DL, VT, NewX, NewY); 5493 } 5494 5495 // Check no NaNs before converting to fixed vector scalable. 5496 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X); 5497 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y); 5498 5499 MVT ContainerVT = VT; 5500 if (VT.isFixedLengthVector()) { 5501 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 5502 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 5503 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget); 5504 } 5505 5506 SDValue Mask, VL; 5507 if (Op->isVPOpcode()) { 5508 Mask = Op.getOperand(2); 5509 if (VT.isFixedLengthVector()) 5510 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 5511 Subtarget); 5512 VL = Op.getOperand(3); 5513 } else { 5514 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 5515 } 5516 5517 SDValue NewY = Y; 5518 if (!XIsNeverNan) { 5519 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), 5520 {X, X, DAG.getCondCode(ISD::SETOEQ), 5521 DAG.getUNDEF(ContainerVT), Mask, VL}); 5522 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X, 5523 DAG.getUNDEF(ContainerVT), VL); 5524 } 5525 5526 SDValue NewX = X; 5527 if (!YIsNeverNan) { 5528 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), 5529 {Y, Y, DAG.getCondCode(ISD::SETOEQ), 5530 DAG.getUNDEF(ContainerVT), Mask, VL}); 5531 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y, 5532 DAG.getUNDEF(ContainerVT), VL); 5533 } 5534 5535 unsigned Opc = 5536 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM 5537 ? RISCVISD::VFMAX_VL 5538 : RISCVISD::VFMIN_VL; 5539 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY, 5540 DAG.getUNDEF(ContainerVT), Mask, VL); 5541 if (VT.isFixedLengthVector()) 5542 Res = convertFromScalableVector(VT, Res, DAG, Subtarget); 5543 return Res; 5544 } 5545 5546 /// Get a RISC-V target specified VL op for a given SDNode. 5547 static unsigned getRISCVVLOp(SDValue Op) { 5548 #define OP_CASE(NODE) \ 5549 case ISD::NODE: \ 5550 return RISCVISD::NODE##_VL; 5551 #define VP_CASE(NODE) \ 5552 case ISD::VP_##NODE: \ 5553 return RISCVISD::NODE##_VL; 5554 // clang-format off 5555 switch (Op.getOpcode()) { 5556 default: 5557 llvm_unreachable("don't have RISC-V specified VL op for this SDNode"); 5558 OP_CASE(ADD) 5559 OP_CASE(SUB) 5560 OP_CASE(MUL) 5561 OP_CASE(MULHS) 5562 OP_CASE(MULHU) 5563 OP_CASE(SDIV) 5564 OP_CASE(SREM) 5565 OP_CASE(UDIV) 5566 OP_CASE(UREM) 5567 OP_CASE(SHL) 5568 OP_CASE(SRA) 5569 OP_CASE(SRL) 5570 OP_CASE(ROTL) 5571 OP_CASE(ROTR) 5572 OP_CASE(BSWAP) 5573 OP_CASE(CTTZ) 5574 OP_CASE(CTLZ) 5575 OP_CASE(CTPOP) 5576 OP_CASE(BITREVERSE) 5577 OP_CASE(SADDSAT) 5578 OP_CASE(UADDSAT) 5579 OP_CASE(SSUBSAT) 5580 OP_CASE(USUBSAT) 5581 OP_CASE(AVGFLOORU) 5582 OP_CASE(AVGCEILU) 5583 OP_CASE(FADD) 5584 OP_CASE(FSUB) 5585 OP_CASE(FMUL) 5586 OP_CASE(FDIV) 5587 OP_CASE(FNEG) 5588 OP_CASE(FABS) 5589 OP_CASE(FSQRT) 5590 OP_CASE(SMIN) 5591 OP_CASE(SMAX) 5592 OP_CASE(UMIN) 5593 OP_CASE(UMAX) 5594 OP_CASE(STRICT_FADD) 5595 OP_CASE(STRICT_FSUB) 5596 OP_CASE(STRICT_FMUL) 5597 OP_CASE(STRICT_FDIV) 5598 OP_CASE(STRICT_FSQRT) 5599 VP_CASE(ADD) // VP_ADD 5600 VP_CASE(SUB) // VP_SUB 5601 VP_CASE(MUL) // VP_MUL 5602 VP_CASE(SDIV) // VP_SDIV 5603 VP_CASE(SREM) // VP_SREM 5604 VP_CASE(UDIV) // VP_UDIV 5605 VP_CASE(UREM) // VP_UREM 5606 VP_CASE(SHL) // VP_SHL 5607 VP_CASE(FADD) // VP_FADD 5608 VP_CASE(FSUB) // VP_FSUB 5609 VP_CASE(FMUL) // VP_FMUL 5610 VP_CASE(FDIV) // VP_FDIV 5611 VP_CASE(FNEG) // VP_FNEG 5612 VP_CASE(FABS) // VP_FABS 5613 VP_CASE(SMIN) // VP_SMIN 5614 VP_CASE(SMAX) // VP_SMAX 5615 VP_CASE(UMIN) // VP_UMIN 5616 VP_CASE(UMAX) // VP_UMAX 5617 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN 5618 VP_CASE(SETCC) // VP_SETCC 5619 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP 5620 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP 5621 VP_CASE(BITREVERSE) // VP_BITREVERSE 5622 VP_CASE(BSWAP) // VP_BSWAP 5623 VP_CASE(CTLZ) // VP_CTLZ 5624 VP_CASE(CTTZ) // VP_CTTZ 5625 VP_CASE(CTPOP) // VP_CTPOP 5626 case ISD::CTLZ_ZERO_UNDEF: 5627 case ISD::VP_CTLZ_ZERO_UNDEF: 5628 return RISCVISD::CTLZ_VL; 5629 case ISD::CTTZ_ZERO_UNDEF: 5630 case ISD::VP_CTTZ_ZERO_UNDEF: 5631 return RISCVISD::CTTZ_VL; 5632 case ISD::FMA: 5633 case ISD::VP_FMA: 5634 return RISCVISD::VFMADD_VL; 5635 case ISD::STRICT_FMA: 5636 return RISCVISD::STRICT_VFMADD_VL; 5637 case ISD::AND: 5638 case ISD::VP_AND: 5639 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) 5640 return RISCVISD::VMAND_VL; 5641 return RISCVISD::AND_VL; 5642 case ISD::OR: 5643 case ISD::VP_OR: 5644 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) 5645 return RISCVISD::VMOR_VL; 5646 return RISCVISD::OR_VL; 5647 case ISD::XOR: 5648 case ISD::VP_XOR: 5649 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) 5650 return RISCVISD::VMXOR_VL; 5651 return RISCVISD::XOR_VL; 5652 case ISD::VP_SELECT: 5653 case ISD::VP_MERGE: 5654 return RISCVISD::VMERGE_VL; 5655 case ISD::VP_ASHR: 5656 return RISCVISD::SRA_VL; 5657 case ISD::VP_LSHR: 5658 return RISCVISD::SRL_VL; 5659 case ISD::VP_SQRT: 5660 return RISCVISD::FSQRT_VL; 5661 case ISD::VP_SIGN_EXTEND: 5662 return RISCVISD::VSEXT_VL; 5663 case ISD::VP_ZERO_EXTEND: 5664 return RISCVISD::VZEXT_VL; 5665 case ISD::VP_FP_TO_SINT: 5666 return RISCVISD::VFCVT_RTZ_X_F_VL; 5667 case ISD::VP_FP_TO_UINT: 5668 return RISCVISD::VFCVT_RTZ_XU_F_VL; 5669 case ISD::FMINNUM: 5670 case ISD::VP_FMINNUM: 5671 return RISCVISD::VFMIN_VL; 5672 case ISD::FMAXNUM: 5673 case ISD::VP_FMAXNUM: 5674 return RISCVISD::VFMAX_VL; 5675 } 5676 // clang-format on 5677 #undef OP_CASE 5678 #undef VP_CASE 5679 } 5680 5681 /// Return true if a RISC-V target specified op has a merge operand. 5682 static bool hasMergeOp(unsigned Opcode) { 5683 assert(Opcode > RISCVISD::FIRST_NUMBER && 5684 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && 5685 "not a RISC-V target specific op"); 5686 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 5687 126 && 5688 RISCVISD::LAST_RISCV_STRICTFP_OPCODE - 5689 ISD::FIRST_TARGET_STRICTFP_OPCODE == 5690 21 && 5691 "adding target specific op should update this function"); 5692 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) 5693 return true; 5694 if (Opcode == RISCVISD::FCOPYSIGN_VL) 5695 return true; 5696 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL) 5697 return true; 5698 if (Opcode == RISCVISD::SETCC_VL) 5699 return true; 5700 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL) 5701 return true; 5702 if (Opcode == RISCVISD::VMERGE_VL) 5703 return true; 5704 return false; 5705 } 5706 5707 /// Return true if a RISC-V target specified op has a mask operand. 5708 static bool hasMaskOp(unsigned Opcode) { 5709 assert(Opcode > RISCVISD::FIRST_NUMBER && 5710 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && 5711 "not a RISC-V target specific op"); 5712 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 5713 126 && 5714 RISCVISD::LAST_RISCV_STRICTFP_OPCODE - 5715 ISD::FIRST_TARGET_STRICTFP_OPCODE == 5716 21 && 5717 "adding target specific op should update this function"); 5718 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) 5719 return true; 5720 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL) 5721 return true; 5722 if (Opcode >= RISCVISD::STRICT_FADD_VL && 5723 Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL) 5724 return true; 5725 return false; 5726 } 5727 5728 static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) { 5729 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType()); 5730 SDLoc DL(Op); 5731 5732 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); 5733 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); 5734 5735 for (unsigned j = 0; j != Op.getNumOperands(); ++j) { 5736 if (!Op.getOperand(j).getValueType().isVector()) { 5737 LoOperands[j] = Op.getOperand(j); 5738 HiOperands[j] = Op.getOperand(j); 5739 continue; 5740 } 5741 std::tie(LoOperands[j], HiOperands[j]) = 5742 DAG.SplitVector(Op.getOperand(j), DL); 5743 } 5744 5745 SDValue LoRes = 5746 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags()); 5747 SDValue HiRes = 5748 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags()); 5749 5750 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes); 5751 } 5752 5753 static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) { 5754 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op"); 5755 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType()); 5756 SDLoc DL(Op); 5757 5758 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); 5759 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); 5760 5761 for (unsigned j = 0; j != Op.getNumOperands(); ++j) { 5762 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) { 5763 std::tie(LoOperands[j], HiOperands[j]) = 5764 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL); 5765 continue; 5766 } 5767 if (!Op.getOperand(j).getValueType().isVector()) { 5768 LoOperands[j] = Op.getOperand(j); 5769 HiOperands[j] = Op.getOperand(j); 5770 continue; 5771 } 5772 std::tie(LoOperands[j], HiOperands[j]) = 5773 DAG.SplitVector(Op.getOperand(j), DL); 5774 } 5775 5776 SDValue LoRes = 5777 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags()); 5778 SDValue HiRes = 5779 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags()); 5780 5781 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes); 5782 } 5783 5784 static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) { 5785 SDLoc DL(Op); 5786 5787 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL); 5788 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL); 5789 auto [EVLLo, EVLHi] = 5790 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL); 5791 5792 SDValue ResLo = 5793 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 5794 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags()); 5795 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 5796 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags()); 5797 } 5798 5799 static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) { 5800 5801 assert(Op->isStrictFPOpcode()); 5802 5803 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0)); 5804 5805 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1)); 5806 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1)); 5807 5808 SDLoc DL(Op); 5809 5810 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); 5811 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); 5812 5813 for (unsigned j = 0; j != Op.getNumOperands(); ++j) { 5814 if (!Op.getOperand(j).getValueType().isVector()) { 5815 LoOperands[j] = Op.getOperand(j); 5816 HiOperands[j] = Op.getOperand(j); 5817 continue; 5818 } 5819 std::tie(LoOperands[j], HiOperands[j]) = 5820 DAG.SplitVector(Op.getOperand(j), DL); 5821 } 5822 5823 SDValue LoRes = 5824 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags()); 5825 HiOperands[0] = LoRes.getValue(1); 5826 SDValue HiRes = 5827 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags()); 5828 5829 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0), 5830 LoRes.getValue(0), HiRes.getValue(0)); 5831 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL); 5832 } 5833 5834 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 5835 SelectionDAG &DAG) const { 5836 switch (Op.getOpcode()) { 5837 default: 5838 report_fatal_error("unimplemented operand"); 5839 case ISD::ATOMIC_FENCE: 5840 return LowerATOMIC_FENCE(Op, DAG, Subtarget); 5841 case ISD::GlobalAddress: 5842 return lowerGlobalAddress(Op, DAG); 5843 case ISD::BlockAddress: 5844 return lowerBlockAddress(Op, DAG); 5845 case ISD::ConstantPool: 5846 return lowerConstantPool(Op, DAG); 5847 case ISD::JumpTable: 5848 return lowerJumpTable(Op, DAG); 5849 case ISD::GlobalTLSAddress: 5850 return lowerGlobalTLSAddress(Op, DAG); 5851 case ISD::Constant: 5852 return lowerConstant(Op, DAG, Subtarget); 5853 case ISD::SELECT: 5854 return lowerSELECT(Op, DAG); 5855 case ISD::BRCOND: 5856 return lowerBRCOND(Op, DAG); 5857 case ISD::VASTART: 5858 return lowerVASTART(Op, DAG); 5859 case ISD::FRAMEADDR: 5860 return lowerFRAMEADDR(Op, DAG); 5861 case ISD::RETURNADDR: 5862 return lowerRETURNADDR(Op, DAG); 5863 case ISD::SHL_PARTS: 5864 return lowerShiftLeftParts(Op, DAG); 5865 case ISD::SRA_PARTS: 5866 return lowerShiftRightParts(Op, DAG, true); 5867 case ISD::SRL_PARTS: 5868 return lowerShiftRightParts(Op, DAG, false); 5869 case ISD::ROTL: 5870 case ISD::ROTR: 5871 if (Op.getValueType().isFixedLengthVector()) { 5872 assert(Subtarget.hasStdExtZvkb()); 5873 return lowerToScalableOp(Op, DAG); 5874 } 5875 assert(Subtarget.hasVendorXTHeadBb() && 5876 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && 5877 "Unexpected custom legalization"); 5878 // XTHeadBb only supports rotate by constant. 5879 if (!isa<ConstantSDNode>(Op.getOperand(1))) 5880 return SDValue(); 5881 return Op; 5882 case ISD::BITCAST: { 5883 SDLoc DL(Op); 5884 EVT VT = Op.getValueType(); 5885 SDValue Op0 = Op.getOperand(0); 5886 EVT Op0VT = Op0.getValueType(); 5887 MVT XLenVT = Subtarget.getXLenVT(); 5888 if (VT == MVT::f16 && Op0VT == MVT::i16 && 5889 Subtarget.hasStdExtZfhminOrZhinxmin()) { 5890 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); 5891 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 5892 return FPConv; 5893 } 5894 if (VT == MVT::bf16 && Op0VT == MVT::i16 && 5895 Subtarget.hasStdExtZfbfmin()) { 5896 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); 5897 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0); 5898 return FPConv; 5899 } 5900 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && 5901 Subtarget.hasStdExtFOrZfinx()) { 5902 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 5903 SDValue FPConv = 5904 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 5905 return FPConv; 5906 } 5907 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 && 5908 Subtarget.hasStdExtZfa()) { 5909 SDValue Lo, Hi; 5910 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32); 5911 SDValue RetReg = 5912 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 5913 return RetReg; 5914 } 5915 5916 // Consider other scalar<->scalar casts as legal if the types are legal. 5917 // Otherwise expand them. 5918 if (!VT.isVector() && !Op0VT.isVector()) { 5919 if (isTypeLegal(VT) && isTypeLegal(Op0VT)) 5920 return Op; 5921 return SDValue(); 5922 } 5923 5924 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() && 5925 "Unexpected types"); 5926 5927 if (VT.isFixedLengthVector()) { 5928 // We can handle fixed length vector bitcasts with a simple replacement 5929 // in isel. 5930 if (Op0VT.isFixedLengthVector()) 5931 return Op; 5932 // When bitcasting from scalar to fixed-length vector, insert the scalar 5933 // into a one-element vector of the result type, and perform a vector 5934 // bitcast. 5935 if (!Op0VT.isVector()) { 5936 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1); 5937 if (!isTypeLegal(BVT)) 5938 return SDValue(); 5939 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT, 5940 DAG.getUNDEF(BVT), Op0, 5941 DAG.getConstant(0, DL, XLenVT))); 5942 } 5943 return SDValue(); 5944 } 5945 // Custom-legalize bitcasts from fixed-length vector types to scalar types 5946 // thus: bitcast the vector to a one-element vector type whose element type 5947 // is the same as the result type, and extract the first element. 5948 if (!VT.isVector() && Op0VT.isFixedLengthVector()) { 5949 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); 5950 if (!isTypeLegal(BVT)) 5951 return SDValue(); 5952 SDValue BVec = DAG.getBitcast(BVT, Op0); 5953 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 5954 DAG.getConstant(0, DL, XLenVT)); 5955 } 5956 return SDValue(); 5957 } 5958 case ISD::INTRINSIC_WO_CHAIN: 5959 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5960 case ISD::INTRINSIC_W_CHAIN: 5961 return LowerINTRINSIC_W_CHAIN(Op, DAG); 5962 case ISD::INTRINSIC_VOID: 5963 return LowerINTRINSIC_VOID(Op, DAG); 5964 case ISD::IS_FPCLASS: 5965 return LowerIS_FPCLASS(Op, DAG); 5966 case ISD::BITREVERSE: { 5967 MVT VT = Op.getSimpleValueType(); 5968 if (VT.isFixedLengthVector()) { 5969 assert(Subtarget.hasStdExtZvbb()); 5970 return lowerToScalableOp(Op, DAG); 5971 } 5972 SDLoc DL(Op); 5973 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization"); 5974 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode"); 5975 // Expand bitreverse to a bswap(rev8) followed by brev8. 5976 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0)); 5977 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap); 5978 } 5979 case ISD::TRUNCATE: 5980 // Only custom-lower vector truncates 5981 if (!Op.getSimpleValueType().isVector()) 5982 return Op; 5983 return lowerVectorTruncLike(Op, DAG); 5984 case ISD::ANY_EXTEND: 5985 case ISD::ZERO_EXTEND: 5986 if (Op.getOperand(0).getValueType().isVector() && 5987 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 5988 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 5989 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); 5990 case ISD::SIGN_EXTEND: 5991 if (Op.getOperand(0).getValueType().isVector() && 5992 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 5993 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 5994 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); 5995 case ISD::SPLAT_VECTOR_PARTS: 5996 return lowerSPLAT_VECTOR_PARTS(Op, DAG); 5997 case ISD::INSERT_VECTOR_ELT: 5998 return lowerINSERT_VECTOR_ELT(Op, DAG); 5999 case ISD::EXTRACT_VECTOR_ELT: 6000 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 6001 case ISD::SCALAR_TO_VECTOR: { 6002 MVT VT = Op.getSimpleValueType(); 6003 SDLoc DL(Op); 6004 SDValue Scalar = Op.getOperand(0); 6005 if (VT.getVectorElementType() == MVT::i1) { 6006 MVT WideVT = VT.changeVectorElementType(MVT::i8); 6007 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar); 6008 return DAG.getNode(ISD::TRUNCATE, DL, VT, V); 6009 } 6010 MVT ContainerVT = VT; 6011 if (VT.isFixedLengthVector()) 6012 ContainerVT = getContainerForFixedLengthVector(VT); 6013 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 6014 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar); 6015 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT, 6016 DAG.getUNDEF(ContainerVT), Scalar, VL); 6017 if (VT.isFixedLengthVector()) 6018 V = convertFromScalableVector(VT, V, DAG, Subtarget); 6019 return V; 6020 } 6021 case ISD::VSCALE: { 6022 MVT XLenVT = Subtarget.getXLenVT(); 6023 MVT VT = Op.getSimpleValueType(); 6024 SDLoc DL(Op); 6025 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT); 6026 // We define our scalable vector types for lmul=1 to use a 64 bit known 6027 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 6028 // vscale as VLENB / 8. 6029 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!"); 6030 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) 6031 report_fatal_error("Support for VLEN==32 is incomplete."); 6032 // We assume VLENB is a multiple of 8. We manually choose the best shift 6033 // here because SimplifyDemandedBits isn't always able to simplify it. 6034 uint64_t Val = Op.getConstantOperandVal(0); 6035 if (isPowerOf2_64(Val)) { 6036 uint64_t Log2 = Log2_64(Val); 6037 if (Log2 < 3) 6038 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res, 6039 DAG.getConstant(3 - Log2, DL, VT)); 6040 else if (Log2 > 3) 6041 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res, 6042 DAG.getConstant(Log2 - 3, DL, XLenVT)); 6043 } else if ((Val % 8) == 0) { 6044 // If the multiplier is a multiple of 8, scale it down to avoid needing 6045 // to shift the VLENB value. 6046 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res, 6047 DAG.getConstant(Val / 8, DL, XLenVT)); 6048 } else { 6049 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res, 6050 DAG.getConstant(3, DL, XLenVT)); 6051 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale, 6052 DAG.getConstant(Val, DL, XLenVT)); 6053 } 6054 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res); 6055 } 6056 case ISD::FPOWI: { 6057 // Custom promote f16 powi with illegal i32 integer type on RV64. Once 6058 // promoted this will be legalized into a libcall by LegalizeIntegerTypes. 6059 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() && 6060 Op.getOperand(1).getValueType() == MVT::i32) { 6061 SDLoc DL(Op); 6062 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); 6063 SDValue Powi = 6064 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1)); 6065 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi, 6066 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6067 } 6068 return SDValue(); 6069 } 6070 case ISD::FMAXIMUM: 6071 case ISD::FMINIMUM: 6072 if (Op.getValueType() == MVT::nxv32f16 && 6073 (Subtarget.hasVInstructionsF16Minimal() && 6074 !Subtarget.hasVInstructionsF16())) 6075 return SplitVectorOp(Op, DAG); 6076 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); 6077 case ISD::FP_EXTEND: { 6078 SDLoc DL(Op); 6079 EVT VT = Op.getValueType(); 6080 SDValue Op0 = Op.getOperand(0); 6081 EVT Op0VT = Op0.getValueType(); 6082 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) 6083 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0); 6084 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) { 6085 SDValue FloatVal = 6086 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0); 6087 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal); 6088 } 6089 6090 if (!Op.getValueType().isVector()) 6091 return Op; 6092 return lowerVectorFPExtendOrRoundLike(Op, DAG); 6093 } 6094 case ISD::FP_ROUND: { 6095 SDLoc DL(Op); 6096 EVT VT = Op.getValueType(); 6097 SDValue Op0 = Op.getOperand(0); 6098 EVT Op0VT = Op0.getValueType(); 6099 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin()) 6100 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0); 6101 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() && 6102 Subtarget.hasStdExtDOrZdinx()) { 6103 SDValue FloatVal = 6104 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0, 6105 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6106 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal); 6107 } 6108 6109 if (!Op.getValueType().isVector()) 6110 return Op; 6111 return lowerVectorFPExtendOrRoundLike(Op, DAG); 6112 } 6113 case ISD::STRICT_FP_ROUND: 6114 case ISD::STRICT_FP_EXTEND: 6115 return lowerStrictFPExtendOrRoundLike(Op, DAG); 6116 case ISD::SINT_TO_FP: 6117 case ISD::UINT_TO_FP: 6118 if (Op.getValueType().isVector() && 6119 Op.getValueType().getScalarType() == MVT::f16 && 6120 (Subtarget.hasVInstructionsF16Minimal() && 6121 !Subtarget.hasVInstructionsF16())) { 6122 if (Op.getValueType() == MVT::nxv32f16) 6123 return SplitVectorOp(Op, DAG); 6124 // int -> f32 6125 SDLoc DL(Op); 6126 MVT NVT = 6127 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()); 6128 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops()); 6129 // f32 -> f16 6130 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC, 6131 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6132 } 6133 [[fallthrough]]; 6134 case ISD::FP_TO_SINT: 6135 case ISD::FP_TO_UINT: 6136 if (SDValue Op1 = Op.getOperand(0); 6137 Op1.getValueType().isVector() && 6138 Op1.getValueType().getScalarType() == MVT::f16 && 6139 (Subtarget.hasVInstructionsF16Minimal() && 6140 !Subtarget.hasVInstructionsF16())) { 6141 if (Op1.getValueType() == MVT::nxv32f16) 6142 return SplitVectorOp(Op, DAG); 6143 // f16 -> f32 6144 SDLoc DL(Op); 6145 MVT NVT = MVT::getVectorVT(MVT::f32, 6146 Op1.getValueType().getVectorElementCount()); 6147 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1); 6148 // f32 -> int 6149 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec); 6150 } 6151 [[fallthrough]]; 6152 case ISD::STRICT_FP_TO_SINT: 6153 case ISD::STRICT_FP_TO_UINT: 6154 case ISD::STRICT_SINT_TO_FP: 6155 case ISD::STRICT_UINT_TO_FP: { 6156 // RVV can only do fp<->int conversions to types half/double the size as 6157 // the source. We custom-lower any conversions that do two hops into 6158 // sequences. 6159 MVT VT = Op.getSimpleValueType(); 6160 if (!VT.isVector()) 6161 return Op; 6162 SDLoc DL(Op); 6163 bool IsStrict = Op->isStrictFPOpcode(); 6164 SDValue Src = Op.getOperand(0 + IsStrict); 6165 MVT EltVT = VT.getVectorElementType(); 6166 MVT SrcVT = Src.getSimpleValueType(); 6167 MVT SrcEltVT = SrcVT.getVectorElementType(); 6168 unsigned EltSize = EltVT.getSizeInBits(); 6169 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 6170 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 6171 "Unexpected vector element types"); 6172 6173 bool IsInt2FP = SrcEltVT.isInteger(); 6174 // Widening conversions 6175 if (EltSize > (2 * SrcEltSize)) { 6176 if (IsInt2FP) { 6177 // Do a regular integer sign/zero extension then convert to float. 6178 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2), 6179 VT.getVectorElementCount()); 6180 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP || 6181 Op.getOpcode() == ISD::STRICT_UINT_TO_FP) 6182 ? ISD::ZERO_EXTEND 6183 : ISD::SIGN_EXTEND; 6184 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 6185 if (IsStrict) 6186 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), 6187 Op.getOperand(0), Ext); 6188 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 6189 } 6190 // FP2Int 6191 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 6192 // Do one doubling fp_extend then complete the operation by converting 6193 // to int. 6194 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 6195 if (IsStrict) { 6196 auto [FExt, Chain] = 6197 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT); 6198 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt); 6199 } 6200 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 6201 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 6202 } 6203 6204 // Narrowing conversions 6205 if (SrcEltSize > (2 * EltSize)) { 6206 if (IsInt2FP) { 6207 // One narrowing int_to_fp, then an fp_round. 6208 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 6209 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 6210 if (IsStrict) { 6211 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, 6212 DAG.getVTList(InterimFVT, MVT::Other), 6213 Op.getOperand(0), Src); 6214 SDValue Chain = Int2FP.getValue(1); 6215 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first; 6216 } 6217 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 6218 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 6219 } 6220 // FP2Int 6221 // One narrowing fp_to_int, then truncate the integer. If the float isn't 6222 // representable by the integer, the result is poison. 6223 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 6224 VT.getVectorElementCount()); 6225 if (IsStrict) { 6226 SDValue FP2Int = 6227 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other), 6228 Op.getOperand(0), Src); 6229 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 6230 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL); 6231 } 6232 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 6233 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 6234 } 6235 6236 // Scalable vectors can exit here. Patterns will handle equally-sized 6237 // conversions halving/doubling ones. 6238 if (!VT.isFixedLengthVector()) 6239 return Op; 6240 6241 // For fixed-length vectors we lower to a custom "VL" node. 6242 unsigned RVVOpc = 0; 6243 switch (Op.getOpcode()) { 6244 default: 6245 llvm_unreachable("Impossible opcode"); 6246 case ISD::FP_TO_SINT: 6247 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL; 6248 break; 6249 case ISD::FP_TO_UINT: 6250 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL; 6251 break; 6252 case ISD::SINT_TO_FP: 6253 RVVOpc = RISCVISD::SINT_TO_FP_VL; 6254 break; 6255 case ISD::UINT_TO_FP: 6256 RVVOpc = RISCVISD::UINT_TO_FP_VL; 6257 break; 6258 case ISD::STRICT_FP_TO_SINT: 6259 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL; 6260 break; 6261 case ISD::STRICT_FP_TO_UINT: 6262 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL; 6263 break; 6264 case ISD::STRICT_SINT_TO_FP: 6265 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL; 6266 break; 6267 case ISD::STRICT_UINT_TO_FP: 6268 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL; 6269 break; 6270 } 6271 6272 MVT ContainerVT = getContainerForFixedLengthVector(VT); 6273 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 6274 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() && 6275 "Expected same element count"); 6276 6277 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 6278 6279 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 6280 if (IsStrict) { 6281 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), 6282 Op.getOperand(0), Src, Mask, VL); 6283 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget); 6284 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL); 6285 } 6286 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); 6287 return convertFromScalableVector(VT, Src, DAG, Subtarget); 6288 } 6289 case ISD::FP_TO_SINT_SAT: 6290 case ISD::FP_TO_UINT_SAT: 6291 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget); 6292 case ISD::FP_TO_BF16: { 6293 // Custom lower to ensure the libcall return is passed in an FPR on hard 6294 // float ABIs. 6295 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization"); 6296 SDLoc DL(Op); 6297 MakeLibCallOptions CallOptions; 6298 RTLIB::Libcall LC = 6299 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16); 6300 SDValue Res = 6301 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; 6302 if (Subtarget.is64Bit() && !RV64LegalI32) 6303 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); 6304 return DAG.getBitcast(MVT::i32, Res); 6305 } 6306 case ISD::BF16_TO_FP: { 6307 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization"); 6308 MVT VT = Op.getSimpleValueType(); 6309 SDLoc DL(Op); 6310 Op = DAG.getNode( 6311 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0), 6312 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL)); 6313 SDValue Res = Subtarget.is64Bit() 6314 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op) 6315 : DAG.getBitcast(MVT::f32, Op); 6316 // fp_extend if the target VT is bigger than f32. 6317 if (VT != MVT::f32) 6318 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res); 6319 return Res; 6320 } 6321 case ISD::FP_TO_FP16: { 6322 // Custom lower to ensure the libcall return is passed in an FPR on hard 6323 // float ABIs. 6324 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation"); 6325 SDLoc DL(Op); 6326 MakeLibCallOptions CallOptions; 6327 RTLIB::Libcall LC = 6328 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16); 6329 SDValue Res = 6330 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; 6331 if (Subtarget.is64Bit() && !RV64LegalI32) 6332 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); 6333 return DAG.getBitcast(MVT::i32, Res); 6334 } 6335 case ISD::FP16_TO_FP: { 6336 // Custom lower to ensure the libcall argument is passed in an FPR on hard 6337 // float ABIs. 6338 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation"); 6339 SDLoc DL(Op); 6340 MakeLibCallOptions CallOptions; 6341 SDValue Arg = Subtarget.is64Bit() 6342 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, 6343 Op.getOperand(0)) 6344 : DAG.getBitcast(MVT::f32, Op.getOperand(0)); 6345 SDValue Res = 6346 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL) 6347 .first; 6348 return Res; 6349 } 6350 case ISD::FTRUNC: 6351 case ISD::FCEIL: 6352 case ISD::FFLOOR: 6353 case ISD::FNEARBYINT: 6354 case ISD::FRINT: 6355 case ISD::FROUND: 6356 case ISD::FROUNDEVEN: 6357 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 6358 case ISD::LRINT: 6359 case ISD::LLRINT: 6360 return lowerVectorXRINT(Op, DAG, Subtarget); 6361 case ISD::VECREDUCE_ADD: 6362 case ISD::VECREDUCE_UMAX: 6363 case ISD::VECREDUCE_SMAX: 6364 case ISD::VECREDUCE_UMIN: 6365 case ISD::VECREDUCE_SMIN: 6366 return lowerVECREDUCE(Op, DAG); 6367 case ISD::VECREDUCE_AND: 6368 case ISD::VECREDUCE_OR: 6369 case ISD::VECREDUCE_XOR: 6370 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 6371 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false); 6372 return lowerVECREDUCE(Op, DAG); 6373 case ISD::VECREDUCE_FADD: 6374 case ISD::VECREDUCE_SEQ_FADD: 6375 case ISD::VECREDUCE_FMIN: 6376 case ISD::VECREDUCE_FMAX: 6377 return lowerFPVECREDUCE(Op, DAG); 6378 case ISD::VP_REDUCE_ADD: 6379 case ISD::VP_REDUCE_UMAX: 6380 case ISD::VP_REDUCE_SMAX: 6381 case ISD::VP_REDUCE_UMIN: 6382 case ISD::VP_REDUCE_SMIN: 6383 case ISD::VP_REDUCE_FADD: 6384 case ISD::VP_REDUCE_SEQ_FADD: 6385 case ISD::VP_REDUCE_FMIN: 6386 case ISD::VP_REDUCE_FMAX: 6387 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 && 6388 (Subtarget.hasVInstructionsF16Minimal() && 6389 !Subtarget.hasVInstructionsF16())) 6390 return SplitVectorReductionOp(Op, DAG); 6391 return lowerVPREDUCE(Op, DAG); 6392 case ISD::VP_REDUCE_AND: 6393 case ISD::VP_REDUCE_OR: 6394 case ISD::VP_REDUCE_XOR: 6395 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1) 6396 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true); 6397 return lowerVPREDUCE(Op, DAG); 6398 case ISD::UNDEF: { 6399 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType()); 6400 return convertFromScalableVector(Op.getSimpleValueType(), 6401 DAG.getUNDEF(ContainerVT), DAG, Subtarget); 6402 } 6403 case ISD::INSERT_SUBVECTOR: 6404 return lowerINSERT_SUBVECTOR(Op, DAG); 6405 case ISD::EXTRACT_SUBVECTOR: 6406 return lowerEXTRACT_SUBVECTOR(Op, DAG); 6407 case ISD::VECTOR_DEINTERLEAVE: 6408 return lowerVECTOR_DEINTERLEAVE(Op, DAG); 6409 case ISD::VECTOR_INTERLEAVE: 6410 return lowerVECTOR_INTERLEAVE(Op, DAG); 6411 case ISD::STEP_VECTOR: 6412 return lowerSTEP_VECTOR(Op, DAG); 6413 case ISD::VECTOR_REVERSE: 6414 return lowerVECTOR_REVERSE(Op, DAG); 6415 case ISD::VECTOR_SPLICE: 6416 return lowerVECTOR_SPLICE(Op, DAG); 6417 case ISD::BUILD_VECTOR: 6418 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 6419 case ISD::SPLAT_VECTOR: 6420 if (Op.getValueType().getScalarType() == MVT::f16 && 6421 (Subtarget.hasVInstructionsF16Minimal() && 6422 !Subtarget.hasVInstructionsF16())) { 6423 if (Op.getValueType() == MVT::nxv32f16) 6424 return SplitVectorOp(Op, DAG); 6425 SDLoc DL(Op); 6426 SDValue NewScalar = 6427 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); 6428 SDValue NewSplat = DAG.getNode( 6429 ISD::SPLAT_VECTOR, DL, 6430 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()), 6431 NewScalar); 6432 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat, 6433 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6434 } 6435 if (Op.getValueType().getVectorElementType() == MVT::i1) 6436 return lowerVectorMaskSplat(Op, DAG); 6437 return SDValue(); 6438 case ISD::VECTOR_SHUFFLE: 6439 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 6440 case ISD::CONCAT_VECTORS: { 6441 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 6442 // better than going through the stack, as the default expansion does. 6443 SDLoc DL(Op); 6444 MVT VT = Op.getSimpleValueType(); 6445 unsigned NumOpElts = 6446 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); 6447 SDValue Vec = DAG.getUNDEF(VT); 6448 for (const auto &OpIdx : enumerate(Op->ops())) { 6449 SDValue SubVec = OpIdx.value(); 6450 // Don't insert undef subvectors. 6451 if (SubVec.isUndef()) 6452 continue; 6453 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec, 6454 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); 6455 } 6456 return Vec; 6457 } 6458 case ISD::LOAD: 6459 if (auto V = expandUnalignedRVVLoad(Op, DAG)) 6460 return V; 6461 if (Op.getValueType().isFixedLengthVector()) 6462 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 6463 return Op; 6464 case ISD::STORE: 6465 if (auto V = expandUnalignedRVVStore(Op, DAG)) 6466 return V; 6467 if (Op.getOperand(1).getValueType().isFixedLengthVector()) 6468 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 6469 return Op; 6470 case ISD::MLOAD: 6471 case ISD::VP_LOAD: 6472 return lowerMaskedLoad(Op, DAG); 6473 case ISD::MSTORE: 6474 case ISD::VP_STORE: 6475 return lowerMaskedStore(Op, DAG); 6476 case ISD::SELECT_CC: { 6477 // This occurs because we custom legalize SETGT and SETUGT for setcc. That 6478 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand 6479 // into separate SETCC+SELECT just like LegalizeDAG. 6480 SDValue Tmp1 = Op.getOperand(0); 6481 SDValue Tmp2 = Op.getOperand(1); 6482 SDValue True = Op.getOperand(2); 6483 SDValue False = Op.getOperand(3); 6484 EVT VT = Op.getValueType(); 6485 SDValue CC = Op.getOperand(4); 6486 EVT CmpVT = Tmp1.getValueType(); 6487 EVT CCVT = 6488 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); 6489 SDLoc DL(Op); 6490 SDValue Cond = 6491 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags()); 6492 return DAG.getSelect(DL, VT, Cond, True, False); 6493 } 6494 case ISD::SETCC: { 6495 MVT OpVT = Op.getOperand(0).getSimpleValueType(); 6496 if (OpVT.isScalarInteger()) { 6497 MVT VT = Op.getSimpleValueType(); 6498 SDValue LHS = Op.getOperand(0); 6499 SDValue RHS = Op.getOperand(1); 6500 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 6501 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) && 6502 "Unexpected CondCode"); 6503 6504 SDLoc DL(Op); 6505 6506 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can 6507 // convert this to the equivalent of (set(u)ge X, C+1) by using 6508 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant 6509 // in a register. 6510 if (isa<ConstantSDNode>(RHS)) { 6511 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue(); 6512 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) { 6513 // If this is an unsigned compare and the constant is -1, incrementing 6514 // the constant would change behavior. The result should be false. 6515 if (CCVal == ISD::SETUGT && Imm == -1) 6516 return DAG.getConstant(0, DL, VT); 6517 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT. 6518 CCVal = ISD::getSetCCSwappedOperands(CCVal); 6519 SDValue SetCC = DAG.getSetCC( 6520 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal); 6521 return DAG.getLogicalNOT(DL, SetCC, VT); 6522 } 6523 } 6524 6525 // Not a constant we could handle, swap the operands and condition code to 6526 // SETLT/SETULT. 6527 CCVal = ISD::getSetCCSwappedOperands(CCVal); 6528 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal); 6529 } 6530 6531 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && 6532 (Subtarget.hasVInstructionsF16Minimal() && 6533 !Subtarget.hasVInstructionsF16())) 6534 return SplitVectorOp(Op, DAG); 6535 6536 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 6537 } 6538 case ISD::ADD: 6539 case ISD::SUB: 6540 case ISD::MUL: 6541 case ISD::MULHS: 6542 case ISD::MULHU: 6543 case ISD::AND: 6544 case ISD::OR: 6545 case ISD::XOR: 6546 case ISD::SDIV: 6547 case ISD::SREM: 6548 case ISD::UDIV: 6549 case ISD::UREM: 6550 case ISD::BSWAP: 6551 case ISD::CTPOP: 6552 return lowerToScalableOp(Op, DAG); 6553 case ISD::SHL: 6554 case ISD::SRA: 6555 case ISD::SRL: 6556 if (Op.getSimpleValueType().isFixedLengthVector()) 6557 return lowerToScalableOp(Op, DAG); 6558 // This can be called for an i32 shift amount that needs to be promoted. 6559 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && 6560 "Unexpected custom legalisation"); 6561 return SDValue(); 6562 case ISD::FADD: 6563 case ISD::FSUB: 6564 case ISD::FMUL: 6565 case ISD::FDIV: 6566 case ISD::FNEG: 6567 case ISD::FABS: 6568 case ISD::FSQRT: 6569 case ISD::FMA: 6570 case ISD::FMINNUM: 6571 case ISD::FMAXNUM: 6572 if (Op.getValueType() == MVT::nxv32f16 && 6573 (Subtarget.hasVInstructionsF16Minimal() && 6574 !Subtarget.hasVInstructionsF16())) 6575 return SplitVectorOp(Op, DAG); 6576 [[fallthrough]]; 6577 case ISD::AVGFLOORU: 6578 case ISD::AVGCEILU: 6579 case ISD::SADDSAT: 6580 case ISD::UADDSAT: 6581 case ISD::SSUBSAT: 6582 case ISD::USUBSAT: 6583 case ISD::SMIN: 6584 case ISD::SMAX: 6585 case ISD::UMIN: 6586 case ISD::UMAX: 6587 return lowerToScalableOp(Op, DAG); 6588 case ISD::ABS: 6589 case ISD::VP_ABS: 6590 return lowerABS(Op, DAG); 6591 case ISD::CTLZ: 6592 case ISD::CTLZ_ZERO_UNDEF: 6593 case ISD::CTTZ: 6594 case ISD::CTTZ_ZERO_UNDEF: 6595 if (Subtarget.hasStdExtZvbb()) 6596 return lowerToScalableOp(Op, DAG); 6597 assert(Op.getOpcode() != ISD::CTTZ); 6598 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 6599 case ISD::VSELECT: 6600 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 6601 case ISD::FCOPYSIGN: 6602 if (Op.getValueType() == MVT::nxv32f16 && 6603 (Subtarget.hasVInstructionsF16Minimal() && 6604 !Subtarget.hasVInstructionsF16())) 6605 return SplitVectorOp(Op, DAG); 6606 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); 6607 case ISD::STRICT_FADD: 6608 case ISD::STRICT_FSUB: 6609 case ISD::STRICT_FMUL: 6610 case ISD::STRICT_FDIV: 6611 case ISD::STRICT_FSQRT: 6612 case ISD::STRICT_FMA: 6613 if (Op.getValueType() == MVT::nxv32f16 && 6614 (Subtarget.hasVInstructionsF16Minimal() && 6615 !Subtarget.hasVInstructionsF16())) 6616 return SplitStrictFPVectorOp(Op, DAG); 6617 return lowerToScalableOp(Op, DAG); 6618 case ISD::STRICT_FSETCC: 6619 case ISD::STRICT_FSETCCS: 6620 return lowerVectorStrictFSetcc(Op, DAG); 6621 case ISD::STRICT_FCEIL: 6622 case ISD::STRICT_FRINT: 6623 case ISD::STRICT_FFLOOR: 6624 case ISD::STRICT_FTRUNC: 6625 case ISD::STRICT_FNEARBYINT: 6626 case ISD::STRICT_FROUND: 6627 case ISD::STRICT_FROUNDEVEN: 6628 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 6629 case ISD::MGATHER: 6630 case ISD::VP_GATHER: 6631 return lowerMaskedGather(Op, DAG); 6632 case ISD::MSCATTER: 6633 case ISD::VP_SCATTER: 6634 return lowerMaskedScatter(Op, DAG); 6635 case ISD::GET_ROUNDING: 6636 return lowerGET_ROUNDING(Op, DAG); 6637 case ISD::SET_ROUNDING: 6638 return lowerSET_ROUNDING(Op, DAG); 6639 case ISD::EH_DWARF_CFA: 6640 return lowerEH_DWARF_CFA(Op, DAG); 6641 case ISD::VP_SELECT: 6642 case ISD::VP_MERGE: 6643 case ISD::VP_ADD: 6644 case ISD::VP_SUB: 6645 case ISD::VP_MUL: 6646 case ISD::VP_SDIV: 6647 case ISD::VP_UDIV: 6648 case ISD::VP_SREM: 6649 case ISD::VP_UREM: 6650 return lowerVPOp(Op, DAG); 6651 case ISD::VP_AND: 6652 case ISD::VP_OR: 6653 case ISD::VP_XOR: 6654 return lowerLogicVPOp(Op, DAG); 6655 case ISD::VP_FADD: 6656 case ISD::VP_FSUB: 6657 case ISD::VP_FMUL: 6658 case ISD::VP_FDIV: 6659 case ISD::VP_FNEG: 6660 case ISD::VP_FABS: 6661 case ISD::VP_SQRT: 6662 case ISD::VP_FMA: 6663 case ISD::VP_FMINNUM: 6664 case ISD::VP_FMAXNUM: 6665 case ISD::VP_FCOPYSIGN: 6666 if (Op.getValueType() == MVT::nxv32f16 && 6667 (Subtarget.hasVInstructionsF16Minimal() && 6668 !Subtarget.hasVInstructionsF16())) 6669 return SplitVPOp(Op, DAG); 6670 [[fallthrough]]; 6671 case ISD::VP_ASHR: 6672 case ISD::VP_LSHR: 6673 case ISD::VP_SHL: 6674 return lowerVPOp(Op, DAG); 6675 case ISD::VP_IS_FPCLASS: 6676 return LowerIS_FPCLASS(Op, DAG); 6677 case ISD::VP_SIGN_EXTEND: 6678 case ISD::VP_ZERO_EXTEND: 6679 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) 6680 return lowerVPExtMaskOp(Op, DAG); 6681 return lowerVPOp(Op, DAG); 6682 case ISD::VP_TRUNCATE: 6683 return lowerVectorTruncLike(Op, DAG); 6684 case ISD::VP_FP_EXTEND: 6685 case ISD::VP_FP_ROUND: 6686 return lowerVectorFPExtendOrRoundLike(Op, DAG); 6687 case ISD::VP_SINT_TO_FP: 6688 case ISD::VP_UINT_TO_FP: 6689 if (Op.getValueType().isVector() && 6690 Op.getValueType().getScalarType() == MVT::f16 && 6691 (Subtarget.hasVInstructionsF16Minimal() && 6692 !Subtarget.hasVInstructionsF16())) { 6693 if (Op.getValueType() == MVT::nxv32f16) 6694 return SplitVPOp(Op, DAG); 6695 // int -> f32 6696 SDLoc DL(Op); 6697 MVT NVT = 6698 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()); 6699 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops()); 6700 // f32 -> f16 6701 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC, 6702 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6703 } 6704 [[fallthrough]]; 6705 case ISD::VP_FP_TO_SINT: 6706 case ISD::VP_FP_TO_UINT: 6707 if (SDValue Op1 = Op.getOperand(0); 6708 Op1.getValueType().isVector() && 6709 Op1.getValueType().getScalarType() == MVT::f16 && 6710 (Subtarget.hasVInstructionsF16Minimal() && 6711 !Subtarget.hasVInstructionsF16())) { 6712 if (Op1.getValueType() == MVT::nxv32f16) 6713 return SplitVPOp(Op, DAG); 6714 // f16 -> f32 6715 SDLoc DL(Op); 6716 MVT NVT = MVT::getVectorVT(MVT::f32, 6717 Op1.getValueType().getVectorElementCount()); 6718 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1); 6719 // f32 -> int 6720 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 6721 {WidenVec, Op.getOperand(1), Op.getOperand(2)}); 6722 } 6723 return lowerVPFPIntConvOp(Op, DAG); 6724 case ISD::VP_SETCC: 6725 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && 6726 (Subtarget.hasVInstructionsF16Minimal() && 6727 !Subtarget.hasVInstructionsF16())) 6728 return SplitVPOp(Op, DAG); 6729 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) 6730 return lowerVPSetCCMaskOp(Op, DAG); 6731 [[fallthrough]]; 6732 case ISD::VP_SMIN: 6733 case ISD::VP_SMAX: 6734 case ISD::VP_UMIN: 6735 case ISD::VP_UMAX: 6736 case ISD::VP_BITREVERSE: 6737 case ISD::VP_BSWAP: 6738 return lowerVPOp(Op, DAG); 6739 case ISD::VP_CTLZ: 6740 case ISD::VP_CTLZ_ZERO_UNDEF: 6741 if (Subtarget.hasStdExtZvbb()) 6742 return lowerVPOp(Op, DAG); 6743 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 6744 case ISD::VP_CTTZ: 6745 case ISD::VP_CTTZ_ZERO_UNDEF: 6746 if (Subtarget.hasStdExtZvbb()) 6747 return lowerVPOp(Op, DAG); 6748 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 6749 case ISD::VP_CTPOP: 6750 return lowerVPOp(Op, DAG); 6751 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: 6752 return lowerVPStridedLoad(Op, DAG); 6753 case ISD::EXPERIMENTAL_VP_STRIDED_STORE: 6754 return lowerVPStridedStore(Op, DAG); 6755 case ISD::VP_FCEIL: 6756 case ISD::VP_FFLOOR: 6757 case ISD::VP_FRINT: 6758 case ISD::VP_FNEARBYINT: 6759 case ISD::VP_FROUND: 6760 case ISD::VP_FROUNDEVEN: 6761 case ISD::VP_FROUNDTOZERO: 6762 if (Op.getValueType() == MVT::nxv32f16 && 6763 (Subtarget.hasVInstructionsF16Minimal() && 6764 !Subtarget.hasVInstructionsF16())) 6765 return SplitVPOp(Op, DAG); 6766 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 6767 case ISD::VP_FMAXIMUM: 6768 case ISD::VP_FMINIMUM: 6769 if (Op.getValueType() == MVT::nxv32f16 && 6770 (Subtarget.hasVInstructionsF16Minimal() && 6771 !Subtarget.hasVInstructionsF16())) 6772 return SplitVPOp(Op, DAG); 6773 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); 6774 case ISD::EXPERIMENTAL_VP_SPLICE: 6775 return lowerVPSpliceExperimental(Op, DAG); 6776 case ISD::EXPERIMENTAL_VP_REVERSE: 6777 return lowerVPReverseExperimental(Op, DAG); 6778 } 6779 } 6780 6781 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, 6782 SelectionDAG &DAG, unsigned Flags) { 6783 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 6784 } 6785 6786 static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty, 6787 SelectionDAG &DAG, unsigned Flags) { 6788 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 6789 Flags); 6790 } 6791 6792 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, 6793 SelectionDAG &DAG, unsigned Flags) { 6794 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 6795 N->getOffset(), Flags); 6796 } 6797 6798 static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty, 6799 SelectionDAG &DAG, unsigned Flags) { 6800 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 6801 } 6802 6803 template <class NodeTy> 6804 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 6805 bool IsLocal, bool IsExternWeak) const { 6806 SDLoc DL(N); 6807 EVT Ty = getPointerTy(DAG.getDataLayout()); 6808 6809 // When HWASAN is used and tagging of global variables is enabled 6810 // they should be accessed via the GOT, since the tagged address of a global 6811 // is incompatible with existing code models. This also applies to non-pic 6812 // mode. 6813 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) { 6814 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 6815 if (IsLocal && !Subtarget.allowTaggedGlobals()) 6816 // Use PC-relative addressing to access the symbol. This generates the 6817 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 6818 // %pcrel_lo(auipc)). 6819 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); 6820 6821 // Use PC-relative addressing to access the GOT for this symbol, then load 6822 // the address from the GOT. This generates the pattern (PseudoLGA sym), 6823 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 6824 SDValue Load = 6825 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); 6826 MachineFunction &MF = DAG.getMachineFunction(); 6827 MachineMemOperand *MemOp = MF.getMachineMemOperand( 6828 MachinePointerInfo::getGOT(MF), 6829 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 6830 MachineMemOperand::MOInvariant, 6831 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 6832 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); 6833 return Load; 6834 } 6835 6836 switch (getTargetMachine().getCodeModel()) { 6837 default: 6838 report_fatal_error("Unsupported code model for lowering"); 6839 case CodeModel::Small: { 6840 // Generate a sequence for accessing addresses within the first 2 GiB of 6841 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 6842 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 6843 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 6844 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); 6845 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo); 6846 } 6847 case CodeModel::Medium: { 6848 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 6849 if (IsExternWeak) { 6850 // An extern weak symbol may be undefined, i.e. have value 0, which may 6851 // not be within 2GiB of PC, so use GOT-indirect addressing to access the 6852 // symbol. This generates the pattern (PseudoLGA sym), which expands to 6853 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 6854 SDValue Load = 6855 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); 6856 MachineFunction &MF = DAG.getMachineFunction(); 6857 MachineMemOperand *MemOp = MF.getMachineMemOperand( 6858 MachinePointerInfo::getGOT(MF), 6859 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 6860 MachineMemOperand::MOInvariant, 6861 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 6862 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); 6863 return Load; 6864 } 6865 6866 // Generate a sequence for accessing addresses within any 2GiB range within 6867 // the address space. This generates the pattern (PseudoLLA sym), which 6868 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 6869 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); 6870 } 6871 } 6872 } 6873 6874 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 6875 SelectionDAG &DAG) const { 6876 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 6877 assert(N->getOffset() == 0 && "unexpected offset in global node"); 6878 const GlobalValue *GV = N->getGlobal(); 6879 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage()); 6880 } 6881 6882 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 6883 SelectionDAG &DAG) const { 6884 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 6885 6886 return getAddr(N, DAG); 6887 } 6888 6889 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 6890 SelectionDAG &DAG) const { 6891 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 6892 6893 return getAddr(N, DAG); 6894 } 6895 6896 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 6897 SelectionDAG &DAG) const { 6898 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 6899 6900 return getAddr(N, DAG); 6901 } 6902 6903 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 6904 SelectionDAG &DAG, 6905 bool UseGOT) const { 6906 SDLoc DL(N); 6907 EVT Ty = getPointerTy(DAG.getDataLayout()); 6908 const GlobalValue *GV = N->getGlobal(); 6909 MVT XLenVT = Subtarget.getXLenVT(); 6910 6911 if (UseGOT) { 6912 // Use PC-relative addressing to access the GOT for this TLS symbol, then 6913 // load the address from the GOT and add the thread pointer. This generates 6914 // the pattern (PseudoLA_TLS_IE sym), which expands to 6915 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 6916 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 6917 SDValue Load = 6918 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 6919 MachineFunction &MF = DAG.getMachineFunction(); 6920 MachineMemOperand *MemOp = MF.getMachineMemOperand( 6921 MachinePointerInfo::getGOT(MF), 6922 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 6923 MachineMemOperand::MOInvariant, 6924 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 6925 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); 6926 6927 // Add the thread pointer. 6928 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 6929 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 6930 } 6931 6932 // Generate a sequence for accessing the address relative to the thread 6933 // pointer, with the appropriate adjustment for the thread pointer offset. 6934 // This generates the pattern 6935 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 6936 SDValue AddrHi = 6937 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 6938 SDValue AddrAdd = 6939 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 6940 SDValue AddrLo = 6941 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 6942 6943 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); 6944 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 6945 SDValue MNAdd = 6946 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd); 6947 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo); 6948 } 6949 6950 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 6951 SelectionDAG &DAG) const { 6952 SDLoc DL(N); 6953 EVT Ty = getPointerTy(DAG.getDataLayout()); 6954 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 6955 const GlobalValue *GV = N->getGlobal(); 6956 6957 // Use a PC-relative addressing mode to access the global dynamic GOT address. 6958 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 6959 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 6960 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 6961 SDValue Load = 6962 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 6963 6964 // Prepare argument list to generate call. 6965 ArgListTy Args; 6966 ArgListEntry Entry; 6967 Entry.Node = Load; 6968 Entry.Ty = CallTy; 6969 Args.push_back(Entry); 6970 6971 // Setup call to __tls_get_addr. 6972 TargetLowering::CallLoweringInfo CLI(DAG); 6973 CLI.setDebugLoc(DL) 6974 .setChain(DAG.getEntryNode()) 6975 .setLibCallee(CallingConv::C, CallTy, 6976 DAG.getExternalSymbol("__tls_get_addr", Ty), 6977 std::move(Args)); 6978 6979 return LowerCallTo(CLI).first; 6980 } 6981 6982 SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N, 6983 SelectionDAG &DAG) const { 6984 SDLoc DL(N); 6985 EVT Ty = getPointerTy(DAG.getDataLayout()); 6986 const GlobalValue *GV = N->getGlobal(); 6987 6988 // Use a PC-relative addressing mode to access the global dynamic GOT address. 6989 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to 6990 // 6991 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol) 6992 // lw tY, tX, %tlsdesc_lo_load(label) // R_RISCV_TLSDESC_LOAD_LO12_I(label) 6993 // addi a0, tX, %tlsdesc_lo_add(label) // R_RISCV_TLSDESC_ADD_LO12_I(label) 6994 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label) 6995 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 6996 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0); 6997 } 6998 6999 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 7000 SelectionDAG &DAG) const { 7001 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 7002 assert(N->getOffset() == 0 && "unexpected offset in global node"); 7003 7004 if (DAG.getTarget().useEmulatedTLS()) 7005 return LowerToTLSEmulatedModel(N, DAG); 7006 7007 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 7008 7009 if (DAG.getMachineFunction().getFunction().getCallingConv() == 7010 CallingConv::GHC) 7011 report_fatal_error("In GHC calling convention TLS is not supported"); 7012 7013 SDValue Addr; 7014 switch (Model) { 7015 case TLSModel::LocalExec: 7016 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 7017 break; 7018 case TLSModel::InitialExec: 7019 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 7020 break; 7021 case TLSModel::LocalDynamic: 7022 case TLSModel::GeneralDynamic: 7023 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG) 7024 : getDynamicTLSAddr(N, DAG); 7025 break; 7026 } 7027 7028 return Addr; 7029 } 7030 7031 // Return true if Val is equal to (setcc LHS, RHS, CC). 7032 // Return false if Val is the inverse of (setcc LHS, RHS, CC). 7033 // Otherwise, return std::nullopt. 7034 static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS, 7035 ISD::CondCode CC, SDValue Val) { 7036 assert(Val->getOpcode() == ISD::SETCC); 7037 SDValue LHS2 = Val.getOperand(0); 7038 SDValue RHS2 = Val.getOperand(1); 7039 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get(); 7040 7041 if (LHS == LHS2 && RHS == RHS2) { 7042 if (CC == CC2) 7043 return true; 7044 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType())) 7045 return false; 7046 } else if (LHS == RHS2 && RHS == LHS2) { 7047 CC2 = ISD::getSetCCSwappedOperands(CC2); 7048 if (CC == CC2) 7049 return true; 7050 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType())) 7051 return false; 7052 } 7053 7054 return std::nullopt; 7055 } 7056 7057 static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, 7058 const RISCVSubtarget &Subtarget) { 7059 SDValue CondV = N->getOperand(0); 7060 SDValue TrueV = N->getOperand(1); 7061 SDValue FalseV = N->getOperand(2); 7062 MVT VT = N->getSimpleValueType(0); 7063 SDLoc DL(N); 7064 7065 if (!Subtarget.hasConditionalMoveFusion()) { 7066 // (select c, -1, y) -> -c | y 7067 if (isAllOnesConstant(TrueV)) { 7068 SDValue Neg = DAG.getNegative(CondV, DL, VT); 7069 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); 7070 } 7071 // (select c, y, -1) -> (c-1) | y 7072 if (isAllOnesConstant(FalseV)) { 7073 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, 7074 DAG.getAllOnesConstant(DL, VT)); 7075 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); 7076 } 7077 7078 // (select c, 0, y) -> (c-1) & y 7079 if (isNullConstant(TrueV)) { 7080 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, 7081 DAG.getAllOnesConstant(DL, VT)); 7082 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); 7083 } 7084 // (select c, y, 0) -> -c & y 7085 if (isNullConstant(FalseV)) { 7086 SDValue Neg = DAG.getNegative(CondV, DL, VT); 7087 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); 7088 } 7089 } 7090 7091 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops 7092 // when both truev and falsev are also setcc. 7093 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC && 7094 FalseV.getOpcode() == ISD::SETCC) { 7095 SDValue LHS = CondV.getOperand(0); 7096 SDValue RHS = CondV.getOperand(1); 7097 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 7098 7099 // (select x, x, y) -> x | y 7100 // (select !x, x, y) -> x & y 7101 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) { 7102 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV, 7103 FalseV); 7104 } 7105 // (select x, y, x) -> x & y 7106 // (select !x, y, x) -> x | y 7107 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) { 7108 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV, 7109 FalseV); 7110 } 7111 } 7112 7113 return SDValue(); 7114 } 7115 7116 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants 7117 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable. 7118 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up 7119 // being `0` or `-1`. In such cases we can replace `select` with `and`. 7120 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize 7121 // than `c0`? 7122 static SDValue 7123 foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, 7124 const RISCVSubtarget &Subtarget) { 7125 if (Subtarget.hasShortForwardBranchOpt()) 7126 return SDValue(); 7127 7128 unsigned SelOpNo = 0; 7129 SDValue Sel = BO->getOperand(0); 7130 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { 7131 SelOpNo = 1; 7132 Sel = BO->getOperand(1); 7133 } 7134 7135 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) 7136 return SDValue(); 7137 7138 unsigned ConstSelOpNo = 1; 7139 unsigned OtherSelOpNo = 2; 7140 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) { 7141 ConstSelOpNo = 2; 7142 OtherSelOpNo = 1; 7143 } 7144 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo); 7145 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp); 7146 if (!ConstSelOpNode || ConstSelOpNode->isOpaque()) 7147 return SDValue(); 7148 7149 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1); 7150 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp); 7151 if (!ConstBinOpNode || ConstBinOpNode->isOpaque()) 7152 return SDValue(); 7153 7154 SDLoc DL(Sel); 7155 EVT VT = BO->getValueType(0); 7156 7157 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp}; 7158 if (SelOpNo == 1) 7159 std::swap(NewConstOps[0], NewConstOps[1]); 7160 7161 SDValue NewConstOp = 7162 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps); 7163 if (!NewConstOp) 7164 return SDValue(); 7165 7166 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal(); 7167 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes()) 7168 return SDValue(); 7169 7170 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo); 7171 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp}; 7172 if (SelOpNo == 1) 7173 std::swap(NewNonConstOps[0], NewNonConstOps[1]); 7174 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps); 7175 7176 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp; 7177 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp; 7178 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF); 7179 } 7180 7181 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 7182 SDValue CondV = Op.getOperand(0); 7183 SDValue TrueV = Op.getOperand(1); 7184 SDValue FalseV = Op.getOperand(2); 7185 SDLoc DL(Op); 7186 MVT VT = Op.getSimpleValueType(); 7187 MVT XLenVT = Subtarget.getXLenVT(); 7188 7189 // Lower vector SELECTs to VSELECTs by splatting the condition. 7190 if (VT.isVector()) { 7191 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1); 7192 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV); 7193 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV); 7194 } 7195 7196 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ 7197 // nodes to implement the SELECT. Performing the lowering here allows for 7198 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless 7199 // sequence or RISCVISD::SELECT_CC node (branch-based select). 7200 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) && 7201 VT.isScalarInteger()) { 7202 // (select c, t, 0) -> (czero_eqz t, c) 7203 if (isNullConstant(FalseV)) 7204 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV); 7205 // (select c, 0, f) -> (czero_nez f, c) 7206 if (isNullConstant(TrueV)) 7207 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV); 7208 7209 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c)) 7210 if (TrueV.getOpcode() == ISD::AND && 7211 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) 7212 return DAG.getNode( 7213 ISD::OR, DL, VT, TrueV, 7214 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); 7215 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x)) 7216 if (FalseV.getOpcode() == ISD::AND && 7217 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) 7218 return DAG.getNode( 7219 ISD::OR, DL, VT, FalseV, 7220 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV)); 7221 7222 // Try some other optimizations before falling back to generic lowering. 7223 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) 7224 return V; 7225 7226 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c)) 7227 // Unless we have the short forward branch optimization. 7228 if (!Subtarget.hasConditionalMoveFusion()) 7229 return DAG.getNode( 7230 ISD::OR, DL, VT, 7231 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV), 7232 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); 7233 } 7234 7235 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) 7236 return V; 7237 7238 if (Op.hasOneUse()) { 7239 unsigned UseOpc = Op->use_begin()->getOpcode(); 7240 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) { 7241 SDNode *BinOp = *Op->use_begin(); 7242 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(), 7243 DAG, Subtarget)) { 7244 DAG.ReplaceAllUsesWith(BinOp, &NewSel); 7245 return lowerSELECT(NewSel, DAG); 7246 } 7247 } 7248 } 7249 7250 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc)) 7251 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1))) 7252 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV); 7253 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV); 7254 if (FPTV && FPFV) { 7255 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0)) 7256 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV); 7257 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) { 7258 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV, 7259 DAG.getConstant(1, DL, XLenVT)); 7260 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR); 7261 } 7262 } 7263 7264 // If the condition is not an integer SETCC which operates on XLenVT, we need 7265 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.: 7266 // (select condv, truev, falsev) 7267 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 7268 if (CondV.getOpcode() != ISD::SETCC || 7269 CondV.getOperand(0).getSimpleValueType() != XLenVT) { 7270 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 7271 SDValue SetNE = DAG.getCondCode(ISD::SETNE); 7272 7273 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 7274 7275 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops); 7276 } 7277 7278 // If the CondV is the output of a SETCC node which operates on XLenVT inputs, 7279 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take 7280 // advantage of the integer compare+branch instructions. i.e.: 7281 // (select (setcc lhs, rhs, cc), truev, falsev) 7282 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 7283 SDValue LHS = CondV.getOperand(0); 7284 SDValue RHS = CondV.getOperand(1); 7285 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 7286 7287 // Special case for a select of 2 constants that have a diffence of 1. 7288 // Normally this is done by DAGCombine, but if the select is introduced by 7289 // type legalization or op legalization, we miss it. Restricting to SETLT 7290 // case for now because that is what signed saturating add/sub need. 7291 // FIXME: We don't need the condition to be SETLT or even a SETCC, 7292 // but we would probably want to swap the true/false values if the condition 7293 // is SETGE/SETLE to avoid an XORI. 7294 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) && 7295 CCVal == ISD::SETLT) { 7296 const APInt &TrueVal = TrueV->getAsAPIntVal(); 7297 const APInt &FalseVal = FalseV->getAsAPIntVal(); 7298 if (TrueVal - 1 == FalseVal) 7299 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV); 7300 if (TrueVal + 1 == FalseVal) 7301 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV); 7302 } 7303 7304 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 7305 // 1 < x ? x : 1 -> 0 < x ? x : 1 7306 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) && 7307 RHS == TrueV && LHS == FalseV) { 7308 LHS = DAG.getConstant(0, DL, VT); 7309 // 0 <u x is the same as x != 0. 7310 if (CCVal == ISD::SETULT) { 7311 std::swap(LHS, RHS); 7312 CCVal = ISD::SETNE; 7313 } 7314 } 7315 7316 // x <s -1 ? x : -1 -> x <s 0 ? x : -1 7317 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV && 7318 RHS == FalseV) { 7319 RHS = DAG.getConstant(0, DL, VT); 7320 } 7321 7322 SDValue TargetCC = DAG.getCondCode(CCVal); 7323 7324 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) { 7325 // (select (setcc lhs, rhs, CC), constant, falsev) 7326 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant) 7327 std::swap(TrueV, FalseV); 7328 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType())); 7329 } 7330 7331 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 7332 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops); 7333 } 7334 7335 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 7336 SDValue CondV = Op.getOperand(1); 7337 SDLoc DL(Op); 7338 MVT XLenVT = Subtarget.getXLenVT(); 7339 7340 if (CondV.getOpcode() == ISD::SETCC && 7341 CondV.getOperand(0).getValueType() == XLenVT) { 7342 SDValue LHS = CondV.getOperand(0); 7343 SDValue RHS = CondV.getOperand(1); 7344 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 7345 7346 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 7347 7348 SDValue TargetCC = DAG.getCondCode(CCVal); 7349 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 7350 LHS, RHS, TargetCC, Op.getOperand(2)); 7351 } 7352 7353 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 7354 CondV, DAG.getConstant(0, DL, XLenVT), 7355 DAG.getCondCode(ISD::SETNE), Op.getOperand(2)); 7356 } 7357 7358 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 7359 MachineFunction &MF = DAG.getMachineFunction(); 7360 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 7361 7362 SDLoc DL(Op); 7363 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 7364 getPointerTy(MF.getDataLayout())); 7365 7366 // vastart just stores the address of the VarArgsFrameIndex slot into the 7367 // memory location argument. 7368 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 7369 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 7370 MachinePointerInfo(SV)); 7371 } 7372 7373 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 7374 SelectionDAG &DAG) const { 7375 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 7376 MachineFunction &MF = DAG.getMachineFunction(); 7377 MachineFrameInfo &MFI = MF.getFrameInfo(); 7378 MFI.setFrameAddressIsTaken(true); 7379 Register FrameReg = RI.getFrameRegister(MF); 7380 int XLenInBytes = Subtarget.getXLen() / 8; 7381 7382 EVT VT = Op.getValueType(); 7383 SDLoc DL(Op); 7384 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 7385 unsigned Depth = Op.getConstantOperandVal(0); 7386 while (Depth--) { 7387 int Offset = -(XLenInBytes * 2); 7388 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 7389 DAG.getIntPtrConstant(Offset, DL)); 7390 FrameAddr = 7391 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 7392 } 7393 return FrameAddr; 7394 } 7395 7396 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 7397 SelectionDAG &DAG) const { 7398 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 7399 MachineFunction &MF = DAG.getMachineFunction(); 7400 MachineFrameInfo &MFI = MF.getFrameInfo(); 7401 MFI.setReturnAddressIsTaken(true); 7402 MVT XLenVT = Subtarget.getXLenVT(); 7403 int XLenInBytes = Subtarget.getXLen() / 8; 7404 7405 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 7406 return SDValue(); 7407 7408 EVT VT = Op.getValueType(); 7409 SDLoc DL(Op); 7410 unsigned Depth = Op.getConstantOperandVal(0); 7411 if (Depth) { 7412 int Off = -XLenInBytes; 7413 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 7414 SDValue Offset = DAG.getConstant(Off, DL, VT); 7415 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 7416 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 7417 MachinePointerInfo()); 7418 } 7419 7420 // Return the value of the return address register, marking it an implicit 7421 // live-in. 7422 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 7423 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 7424 } 7425 7426 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 7427 SelectionDAG &DAG) const { 7428 SDLoc DL(Op); 7429 SDValue Lo = Op.getOperand(0); 7430 SDValue Hi = Op.getOperand(1); 7431 SDValue Shamt = Op.getOperand(2); 7432 EVT VT = Lo.getValueType(); 7433 7434 // if Shamt-XLEN < 0: // Shamt < XLEN 7435 // Lo = Lo << Shamt 7436 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 7437 // else: 7438 // Lo = 0 7439 // Hi = Lo << (Shamt-XLEN) 7440 7441 SDValue Zero = DAG.getConstant(0, DL, VT); 7442 SDValue One = DAG.getConstant(1, DL, VT); 7443 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 7444 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 7445 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 7446 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 7447 7448 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 7449 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 7450 SDValue ShiftRightLo = 7451 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 7452 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 7453 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 7454 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 7455 7456 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 7457 7458 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 7459 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 7460 7461 SDValue Parts[2] = {Lo, Hi}; 7462 return DAG.getMergeValues(Parts, DL); 7463 } 7464 7465 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 7466 bool IsSRA) const { 7467 SDLoc DL(Op); 7468 SDValue Lo = Op.getOperand(0); 7469 SDValue Hi = Op.getOperand(1); 7470 SDValue Shamt = Op.getOperand(2); 7471 EVT VT = Lo.getValueType(); 7472 7473 // SRA expansion: 7474 // if Shamt-XLEN < 0: // Shamt < XLEN 7475 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) 7476 // Hi = Hi >>s Shamt 7477 // else: 7478 // Lo = Hi >>s (Shamt-XLEN); 7479 // Hi = Hi >>s (XLEN-1) 7480 // 7481 // SRL expansion: 7482 // if Shamt-XLEN < 0: // Shamt < XLEN 7483 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) 7484 // Hi = Hi >>u Shamt 7485 // else: 7486 // Lo = Hi >>u (Shamt-XLEN); 7487 // Hi = 0; 7488 7489 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 7490 7491 SDValue Zero = DAG.getConstant(0, DL, VT); 7492 SDValue One = DAG.getConstant(1, DL, VT); 7493 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 7494 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 7495 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 7496 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 7497 7498 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 7499 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 7500 SDValue ShiftLeftHi = 7501 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 7502 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 7503 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 7504 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 7505 SDValue HiFalse = 7506 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 7507 7508 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 7509 7510 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 7511 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 7512 7513 SDValue Parts[2] = {Lo, Hi}; 7514 return DAG.getMergeValues(Parts, DL); 7515 } 7516 7517 // Lower splats of i1 types to SETCC. For each mask vector type, we have a 7518 // legal equivalently-sized i8 type, so we can use that as a go-between. 7519 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, 7520 SelectionDAG &DAG) const { 7521 SDLoc DL(Op); 7522 MVT VT = Op.getSimpleValueType(); 7523 SDValue SplatVal = Op.getOperand(0); 7524 // All-zeros or all-ones splats are handled specially. 7525 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) { 7526 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; 7527 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL); 7528 } 7529 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) { 7530 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; 7531 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL); 7532 } 7533 MVT InterVT = VT.changeVectorElementType(MVT::i8); 7534 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal, 7535 DAG.getConstant(1, DL, SplatVal.getValueType())); 7536 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal); 7537 SDValue Zero = DAG.getConstant(0, DL, InterVT); 7538 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE); 7539 } 7540 7541 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is 7542 // illegal (currently only vXi64 RV32). 7543 // FIXME: We could also catch non-constant sign-extended i32 values and lower 7544 // them to VMV_V_X_VL. 7545 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, 7546 SelectionDAG &DAG) const { 7547 SDLoc DL(Op); 7548 MVT VecVT = Op.getSimpleValueType(); 7549 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 7550 "Unexpected SPLAT_VECTOR_PARTS lowering"); 7551 7552 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!"); 7553 SDValue Lo = Op.getOperand(0); 7554 SDValue Hi = Op.getOperand(1); 7555 7556 MVT ContainerVT = VecVT; 7557 if (VecVT.isFixedLengthVector()) 7558 ContainerVT = getContainerForFixedLengthVector(VecVT); 7559 7560 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; 7561 7562 SDValue Res = 7563 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG); 7564 7565 if (VecVT.isFixedLengthVector()) 7566 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget); 7567 7568 return Res; 7569 } 7570 7571 // Custom-lower extensions from mask vectors by using a vselect either with 1 7572 // for zero/any-extension or -1 for sign-extension: 7573 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 7574 // Note that any-extension is lowered identically to zero-extension. 7575 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 7576 int64_t ExtTrueVal) const { 7577 SDLoc DL(Op); 7578 MVT VecVT = Op.getSimpleValueType(); 7579 SDValue Src = Op.getOperand(0); 7580 // Only custom-lower extensions from mask types 7581 assert(Src.getValueType().isVector() && 7582 Src.getValueType().getVectorElementType() == MVT::i1); 7583 7584 if (VecVT.isScalableVector()) { 7585 SDValue SplatZero = DAG.getConstant(0, DL, VecVT); 7586 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT); 7587 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 7588 } 7589 7590 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 7591 MVT I1ContainerVT = 7592 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 7593 7594 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 7595 7596 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; 7597 7598 MVT XLenVT = Subtarget.getXLenVT(); 7599 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 7600 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 7601 7602 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 7603 DAG.getUNDEF(ContainerVT), SplatZero, VL); 7604 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 7605 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL); 7606 SDValue Select = 7607 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal, 7608 SplatZero, DAG.getUNDEF(ContainerVT), VL); 7609 7610 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 7611 } 7612 7613 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( 7614 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { 7615 MVT ExtVT = Op.getSimpleValueType(); 7616 // Only custom-lower extensions from fixed-length vector types. 7617 if (!ExtVT.isFixedLengthVector()) 7618 return Op; 7619 MVT VT = Op.getOperand(0).getSimpleValueType(); 7620 // Grab the canonical container type for the extended type. Infer the smaller 7621 // type from that to ensure the same number of vector elements, as we know 7622 // the LMUL will be sufficient to hold the smaller type. 7623 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); 7624 // Get the extended container type manually to ensure the same number of 7625 // vector elements between source and dest. 7626 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 7627 ContainerExtVT.getVectorElementCount()); 7628 7629 SDValue Op1 = 7630 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 7631 7632 SDLoc DL(Op); 7633 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 7634 7635 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); 7636 7637 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); 7638 } 7639 7640 // Custom-lower truncations from vectors to mask vectors by using a mask and a 7641 // setcc operation: 7642 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 7643 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op, 7644 SelectionDAG &DAG) const { 7645 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; 7646 SDLoc DL(Op); 7647 EVT MaskVT = Op.getValueType(); 7648 // Only expect to custom-lower truncations to mask types 7649 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 7650 "Unexpected type for vector mask lowering"); 7651 SDValue Src = Op.getOperand(0); 7652 MVT VecVT = Src.getSimpleValueType(); 7653 SDValue Mask, VL; 7654 if (IsVPTrunc) { 7655 Mask = Op.getOperand(1); 7656 VL = Op.getOperand(2); 7657 } 7658 // If this is a fixed vector, we need to convert it to a scalable vector. 7659 MVT ContainerVT = VecVT; 7660 7661 if (VecVT.isFixedLengthVector()) { 7662 ContainerVT = getContainerForFixedLengthVector(VecVT); 7663 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 7664 if (IsVPTrunc) { 7665 MVT MaskContainerVT = 7666 getContainerForFixedLengthVector(Mask.getSimpleValueType()); 7667 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); 7668 } 7669 } 7670 7671 if (!IsVPTrunc) { 7672 std::tie(Mask, VL) = 7673 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 7674 } 7675 7676 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 7677 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 7678 7679 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 7680 DAG.getUNDEF(ContainerVT), SplatOne, VL); 7681 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 7682 DAG.getUNDEF(ContainerVT), SplatZero, VL); 7683 7684 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 7685 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, 7686 DAG.getUNDEF(ContainerVT), Mask, VL); 7687 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, 7688 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE), 7689 DAG.getUNDEF(MaskContainerVT), Mask, VL}); 7690 if (MaskVT.isFixedLengthVector()) 7691 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); 7692 return Trunc; 7693 } 7694 7695 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op, 7696 SelectionDAG &DAG) const { 7697 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; 7698 SDLoc DL(Op); 7699 7700 MVT VT = Op.getSimpleValueType(); 7701 // Only custom-lower vector truncates 7702 assert(VT.isVector() && "Unexpected type for vector truncate lowering"); 7703 7704 // Truncates to mask types are handled differently 7705 if (VT.getVectorElementType() == MVT::i1) 7706 return lowerVectorMaskTruncLike(Op, DAG); 7707 7708 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 7709 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 7710 // truncate by one power of two at a time. 7711 MVT DstEltVT = VT.getVectorElementType(); 7712 7713 SDValue Src = Op.getOperand(0); 7714 MVT SrcVT = Src.getSimpleValueType(); 7715 MVT SrcEltVT = SrcVT.getVectorElementType(); 7716 7717 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) && 7718 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 7719 "Unexpected vector truncate lowering"); 7720 7721 MVT ContainerVT = SrcVT; 7722 SDValue Mask, VL; 7723 if (IsVPTrunc) { 7724 Mask = Op.getOperand(1); 7725 VL = Op.getOperand(2); 7726 } 7727 if (SrcVT.isFixedLengthVector()) { 7728 ContainerVT = getContainerForFixedLengthVector(SrcVT); 7729 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 7730 if (IsVPTrunc) { 7731 MVT MaskVT = getMaskTypeFor(ContainerVT); 7732 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 7733 } 7734 } 7735 7736 SDValue Result = Src; 7737 if (!IsVPTrunc) { 7738 std::tie(Mask, VL) = 7739 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 7740 } 7741 7742 LLVMContext &Context = *DAG.getContext(); 7743 const ElementCount Count = ContainerVT.getVectorElementCount(); 7744 do { 7745 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); 7746 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 7747 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, 7748 Mask, VL); 7749 } while (SrcEltVT != DstEltVT); 7750 7751 if (SrcVT.isFixedLengthVector()) 7752 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 7753 7754 return Result; 7755 } 7756 7757 SDValue 7758 RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op, 7759 SelectionDAG &DAG) const { 7760 SDLoc DL(Op); 7761 SDValue Chain = Op.getOperand(0); 7762 SDValue Src = Op.getOperand(1); 7763 MVT VT = Op.getSimpleValueType(); 7764 MVT SrcVT = Src.getSimpleValueType(); 7765 MVT ContainerVT = VT; 7766 if (VT.isFixedLengthVector()) { 7767 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 7768 ContainerVT = 7769 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 7770 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 7771 } 7772 7773 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 7774 7775 // RVV can only widen/truncate fp to types double/half the size as the source. 7776 if ((VT.getVectorElementType() == MVT::f64 && 7777 SrcVT.getVectorElementType() == MVT::f16) || 7778 (VT.getVectorElementType() == MVT::f16 && 7779 SrcVT.getVectorElementType() == MVT::f64)) { 7780 // For double rounding, the intermediate rounding should be round-to-odd. 7781 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND 7782 ? RISCVISD::STRICT_FP_EXTEND_VL 7783 : RISCVISD::STRICT_VFNCVT_ROD_VL; 7784 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 7785 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other), 7786 Chain, Src, Mask, VL); 7787 Chain = Src.getValue(1); 7788 } 7789 7790 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND 7791 ? RISCVISD::STRICT_FP_EXTEND_VL 7792 : RISCVISD::STRICT_FP_ROUND_VL; 7793 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), 7794 Chain, Src, Mask, VL); 7795 if (VT.isFixedLengthVector()) { 7796 // StrictFP operations have two result values. Their lowered result should 7797 // have same result count. 7798 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget); 7799 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL); 7800 } 7801 return Res; 7802 } 7803 7804 SDValue 7805 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op, 7806 SelectionDAG &DAG) const { 7807 bool IsVP = 7808 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND; 7809 bool IsExtend = 7810 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND; 7811 // RVV can only do truncate fp to types half the size as the source. We 7812 // custom-lower f64->f16 rounds via RVV's round-to-odd float 7813 // conversion instruction. 7814 SDLoc DL(Op); 7815 MVT VT = Op.getSimpleValueType(); 7816 7817 assert(VT.isVector() && "Unexpected type for vector truncate lowering"); 7818 7819 SDValue Src = Op.getOperand(0); 7820 MVT SrcVT = Src.getSimpleValueType(); 7821 7822 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 || 7823 SrcVT.getVectorElementType() != MVT::f16); 7824 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 || 7825 SrcVT.getVectorElementType() != MVT::f64); 7826 7827 bool IsDirectConv = IsDirectExtend || IsDirectTrunc; 7828 7829 // Prepare any fixed-length vector operands. 7830 MVT ContainerVT = VT; 7831 SDValue Mask, VL; 7832 if (IsVP) { 7833 Mask = Op.getOperand(1); 7834 VL = Op.getOperand(2); 7835 } 7836 if (VT.isFixedLengthVector()) { 7837 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 7838 ContainerVT = 7839 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 7840 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 7841 if (IsVP) { 7842 MVT MaskVT = getMaskTypeFor(ContainerVT); 7843 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 7844 } 7845 } 7846 7847 if (!IsVP) 7848 std::tie(Mask, VL) = 7849 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 7850 7851 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL; 7852 7853 if (IsDirectConv) { 7854 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL); 7855 if (VT.isFixedLengthVector()) 7856 Src = convertFromScalableVector(VT, Src, DAG, Subtarget); 7857 return Src; 7858 } 7859 7860 unsigned InterConvOpc = 7861 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL; 7862 7863 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 7864 SDValue IntermediateConv = 7865 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL); 7866 SDValue Result = 7867 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL); 7868 if (VT.isFixedLengthVector()) 7869 return convertFromScalableVector(VT, Result, DAG, Subtarget); 7870 return Result; 7871 } 7872 7873 // Given a scalable vector type and an index into it, returns the type for the 7874 // smallest subvector that the index fits in. This can be used to reduce LMUL 7875 // for operations like vslidedown. 7876 // 7877 // E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32. 7878 static std::optional<MVT> 7879 getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, 7880 const RISCVSubtarget &Subtarget) { 7881 assert(VecVT.isScalableVector()); 7882 const unsigned EltSize = VecVT.getScalarSizeInBits(); 7883 const unsigned VectorBitsMin = Subtarget.getRealMinVLen(); 7884 const unsigned MinVLMAX = VectorBitsMin / EltSize; 7885 MVT SmallerVT; 7886 if (MaxIdx < MinVLMAX) 7887 SmallerVT = getLMUL1VT(VecVT); 7888 else if (MaxIdx < MinVLMAX * 2) 7889 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT(); 7890 else if (MaxIdx < MinVLMAX * 4) 7891 SmallerVT = getLMUL1VT(VecVT) 7892 .getDoubleNumVectorElementsVT() 7893 .getDoubleNumVectorElementsVT(); 7894 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT)) 7895 return std::nullopt; 7896 return SmallerVT; 7897 } 7898 7899 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the 7900 // first position of a vector, and that vector is slid up to the insert index. 7901 // By limiting the active vector length to index+1 and merging with the 7902 // original vector (with an undisturbed tail policy for elements >= VL), we 7903 // achieve the desired result of leaving all elements untouched except the one 7904 // at VL-1, which is replaced with the desired value. 7905 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 7906 SelectionDAG &DAG) const { 7907 SDLoc DL(Op); 7908 MVT VecVT = Op.getSimpleValueType(); 7909 SDValue Vec = Op.getOperand(0); 7910 SDValue Val = Op.getOperand(1); 7911 SDValue Idx = Op.getOperand(2); 7912 7913 if (VecVT.getVectorElementType() == MVT::i1) { 7914 // FIXME: For now we just promote to an i8 vector and insert into that, 7915 // but this is probably not optimal. 7916 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 7917 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 7918 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx); 7919 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec); 7920 } 7921 7922 MVT ContainerVT = VecVT; 7923 // If the operand is a fixed-length vector, convert to a scalable one. 7924 if (VecVT.isFixedLengthVector()) { 7925 ContainerVT = getContainerForFixedLengthVector(VecVT); 7926 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 7927 } 7928 7929 // If we know the index we're going to insert at, we can shrink Vec so that 7930 // we're performing the scalar inserts and slideup on a smaller LMUL. 7931 MVT OrigContainerVT = ContainerVT; 7932 SDValue OrigVec = Vec; 7933 SDValue AlignedIdx; 7934 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) { 7935 const unsigned OrigIdx = IdxC->getZExtValue(); 7936 // Do we know an upper bound on LMUL? 7937 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx, 7938 DL, DAG, Subtarget)) { 7939 ContainerVT = *ShrunkVT; 7940 AlignedIdx = DAG.getVectorIdxConstant(0, DL); 7941 } 7942 7943 // If we're compiling for an exact VLEN value, we can always perform 7944 // the insert in m1 as we can determine the register corresponding to 7945 // the index in the register group. 7946 const unsigned MinVLen = Subtarget.getRealMinVLen(); 7947 const unsigned MaxVLen = Subtarget.getRealMaxVLen(); 7948 const MVT M1VT = getLMUL1VT(ContainerVT); 7949 if (MinVLen == MaxVLen && ContainerVT.bitsGT(M1VT)) { 7950 EVT ElemVT = VecVT.getVectorElementType(); 7951 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); 7952 unsigned RemIdx = OrigIdx % ElemsPerVReg; 7953 unsigned SubRegIdx = OrigIdx / ElemsPerVReg; 7954 unsigned ExtractIdx = 7955 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); 7956 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL); 7957 Idx = DAG.getVectorIdxConstant(RemIdx, DL); 7958 ContainerVT = M1VT; 7959 } 7960 7961 if (AlignedIdx) 7962 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, 7963 AlignedIdx); 7964 } 7965 7966 MVT XLenVT = Subtarget.getXLenVT(); 7967 7968 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; 7969 // Even i64-element vectors on RV32 can be lowered without scalar 7970 // legalization if the most-significant 32 bits of the value are not affected 7971 // by the sign-extension of the lower 32 bits. 7972 // TODO: We could also catch sign extensions of a 32-bit value. 7973 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) { 7974 const auto *CVal = cast<ConstantSDNode>(Val); 7975 if (isInt<32>(CVal->getSExtValue())) { 7976 IsLegalInsert = true; 7977 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 7978 } 7979 } 7980 7981 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 7982 7983 SDValue ValInVec; 7984 7985 if (IsLegalInsert) { 7986 unsigned Opc = 7987 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL; 7988 if (isNullConstant(Idx)) { 7989 if (!VecVT.isFloatingPoint()) 7990 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val); 7991 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL); 7992 7993 if (AlignedIdx) 7994 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, 7995 Vec, AlignedIdx); 7996 if (!VecVT.isFixedLengthVector()) 7997 return Vec; 7998 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); 7999 } 8000 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget); 8001 } else { 8002 // On RV32, i64-element vectors must be specially handled to place the 8003 // value at element 0, by using two vslide1down instructions in sequence on 8004 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for 8005 // this. 8006 SDValue ValLo, ValHi; 8007 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32); 8008 MVT I32ContainerVT = 8009 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2); 8010 SDValue I32Mask = 8011 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; 8012 // Limit the active VL to two. 8013 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); 8014 // If the Idx is 0 we can insert directly into the vector. 8015 if (isNullConstant(Idx)) { 8016 // First slide in the lo value, then the hi in above it. We use slide1down 8017 // to avoid the register group overlap constraint of vslide1up. 8018 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 8019 Vec, Vec, ValLo, I32Mask, InsertI64VL); 8020 // If the source vector is undef don't pass along the tail elements from 8021 // the previous slide1down. 8022 SDValue Tail = Vec.isUndef() ? Vec : ValInVec; 8023 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 8024 Tail, ValInVec, ValHi, I32Mask, InsertI64VL); 8025 // Bitcast back to the right container type. 8026 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 8027 8028 if (AlignedIdx) 8029 ValInVec = 8030 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, 8031 ValInVec, AlignedIdx); 8032 if (!VecVT.isFixedLengthVector()) 8033 return ValInVec; 8034 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget); 8035 } 8036 8037 // First slide in the lo value, then the hi in above it. We use slide1down 8038 // to avoid the register group overlap constraint of vslide1up. 8039 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 8040 DAG.getUNDEF(I32ContainerVT), 8041 DAG.getUNDEF(I32ContainerVT), ValLo, 8042 I32Mask, InsertI64VL); 8043 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 8044 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi, 8045 I32Mask, InsertI64VL); 8046 // Bitcast back to the right container type. 8047 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 8048 } 8049 8050 // Now that the value is in a vector, slide it into position. 8051 SDValue InsertVL = 8052 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT)); 8053 8054 // Use tail agnostic policy if Idx is the last index of Vec. 8055 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; 8056 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) && 8057 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements()) 8058 Policy = RISCVII::TAIL_AGNOSTIC; 8059 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec, 8060 Idx, Mask, InsertVL, Policy); 8061 8062 if (AlignedIdx) 8063 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, 8064 Slideup, AlignedIdx); 8065 if (!VecVT.isFixedLengthVector()) 8066 return Slideup; 8067 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 8068 } 8069 8070 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 8071 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 8072 // types this is done using VMV_X_S to allow us to glean information about the 8073 // sign bits of the result. 8074 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 8075 SelectionDAG &DAG) const { 8076 SDLoc DL(Op); 8077 SDValue Idx = Op.getOperand(1); 8078 SDValue Vec = Op.getOperand(0); 8079 EVT EltVT = Op.getValueType(); 8080 MVT VecVT = Vec.getSimpleValueType(); 8081 MVT XLenVT = Subtarget.getXLenVT(); 8082 8083 if (VecVT.getVectorElementType() == MVT::i1) { 8084 // Use vfirst.m to extract the first bit. 8085 if (isNullConstant(Idx)) { 8086 MVT ContainerVT = VecVT; 8087 if (VecVT.isFixedLengthVector()) { 8088 ContainerVT = getContainerForFixedLengthVector(VecVT); 8089 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 8090 } 8091 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 8092 SDValue Vfirst = 8093 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL); 8094 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst, 8095 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ); 8096 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res); 8097 } 8098 if (VecVT.isFixedLengthVector()) { 8099 unsigned NumElts = VecVT.getVectorNumElements(); 8100 if (NumElts >= 8) { 8101 MVT WideEltVT; 8102 unsigned WidenVecLen; 8103 SDValue ExtractElementIdx; 8104 SDValue ExtractBitIdx; 8105 unsigned MaxEEW = Subtarget.getELen(); 8106 MVT LargestEltVT = MVT::getIntegerVT( 8107 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits()))); 8108 if (NumElts <= LargestEltVT.getSizeInBits()) { 8109 assert(isPowerOf2_32(NumElts) && 8110 "the number of elements should be power of 2"); 8111 WideEltVT = MVT::getIntegerVT(NumElts); 8112 WidenVecLen = 1; 8113 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT); 8114 ExtractBitIdx = Idx; 8115 } else { 8116 WideEltVT = LargestEltVT; 8117 WidenVecLen = NumElts / WideEltVT.getSizeInBits(); 8118 // extract element index = index / element width 8119 ExtractElementIdx = DAG.getNode( 8120 ISD::SRL, DL, XLenVT, Idx, 8121 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT)); 8122 // mask bit index = index % element width 8123 ExtractBitIdx = DAG.getNode( 8124 ISD::AND, DL, XLenVT, Idx, 8125 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT)); 8126 } 8127 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen); 8128 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec); 8129 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, 8130 Vec, ExtractElementIdx); 8131 // Extract the bit from GPR. 8132 SDValue ShiftRight = 8133 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx); 8134 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight, 8135 DAG.getConstant(1, DL, XLenVT)); 8136 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res); 8137 } 8138 } 8139 // Otherwise, promote to an i8 vector and extract from that. 8140 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 8141 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 8142 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx); 8143 } 8144 8145 // If this is a fixed vector, we need to convert it to a scalable vector. 8146 MVT ContainerVT = VecVT; 8147 if (VecVT.isFixedLengthVector()) { 8148 ContainerVT = getContainerForFixedLengthVector(VecVT); 8149 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 8150 } 8151 8152 // If we're compiling for an exact VLEN value and we have a known 8153 // constant index, we can always perform the extract in m1 (or 8154 // smaller) as we can determine the register corresponding to 8155 // the index in the register group. 8156 const unsigned MinVLen = Subtarget.getRealMinVLen(); 8157 const unsigned MaxVLen = Subtarget.getRealMaxVLen(); 8158 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx); 8159 IdxC && MinVLen == MaxVLen && 8160 VecVT.getSizeInBits().getKnownMinValue() > MinVLen) { 8161 MVT M1VT = getLMUL1VT(ContainerVT); 8162 unsigned OrigIdx = IdxC->getZExtValue(); 8163 EVT ElemVT = VecVT.getVectorElementType(); 8164 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); 8165 unsigned RemIdx = OrigIdx % ElemsPerVReg; 8166 unsigned SubRegIdx = OrigIdx / ElemsPerVReg; 8167 unsigned ExtractIdx = 8168 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); 8169 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec, 8170 DAG.getVectorIdxConstant(ExtractIdx, DL)); 8171 Idx = DAG.getVectorIdxConstant(RemIdx, DL); 8172 ContainerVT = M1VT; 8173 } 8174 8175 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which 8176 // contains our index. 8177 std::optional<uint64_t> MaxIdx; 8178 if (VecVT.isFixedLengthVector()) 8179 MaxIdx = VecVT.getVectorNumElements() - 1; 8180 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) 8181 MaxIdx = IdxC->getZExtValue(); 8182 if (MaxIdx) { 8183 if (auto SmallerVT = 8184 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) { 8185 ContainerVT = *SmallerVT; 8186 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, 8187 DAG.getConstant(0, DL, XLenVT)); 8188 } 8189 } 8190 8191 // If after narrowing, the required slide is still greater than LMUL2, 8192 // fallback to generic expansion and go through the stack. This is done 8193 // for a subtle reason: extracting *all* elements out of a vector is 8194 // widely expected to be linear in vector size, but because vslidedown 8195 // is linear in LMUL, performing N extracts using vslidedown becomes 8196 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack 8197 // seems to have the same problem (the store is linear in LMUL), but the 8198 // generic expansion *memoizes* the store, and thus for many extracts of 8199 // the same vector we end up with one store and a bunch of loads. 8200 // TODO: We don't have the same code for insert_vector_elt because we 8201 // have BUILD_VECTOR and handle the degenerate case there. Should we 8202 // consider adding an inverse BUILD_VECTOR node? 8203 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT(); 8204 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector()) 8205 return SDValue(); 8206 8207 // If the index is 0, the vector is already in the right position. 8208 if (!isNullConstant(Idx)) { 8209 // Use a VL of 1 to avoid processing more elements than we need. 8210 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget); 8211 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, 8212 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 8213 } 8214 8215 if (!EltVT.isInteger()) { 8216 // Floating-point extracts are handled in TableGen. 8217 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 8218 DAG.getConstant(0, DL, XLenVT)); 8219 } 8220 8221 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 8222 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 8223 } 8224 8225 // Some RVV intrinsics may claim that they want an integer operand to be 8226 // promoted or expanded. 8227 static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, 8228 const RISCVSubtarget &Subtarget) { 8229 assert((Op.getOpcode() == ISD::INTRINSIC_VOID || 8230 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 8231 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 8232 "Unexpected opcode"); 8233 8234 if (!Subtarget.hasVInstructions()) 8235 return SDValue(); 8236 8237 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID || 8238 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 8239 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 8240 8241 SDLoc DL(Op); 8242 8243 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 8244 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 8245 if (!II || !II->hasScalarOperand()) 8246 return SDValue(); 8247 8248 unsigned SplatOp = II->ScalarOperand + 1 + HasChain; 8249 assert(SplatOp < Op.getNumOperands()); 8250 8251 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 8252 SDValue &ScalarOp = Operands[SplatOp]; 8253 MVT OpVT = ScalarOp.getSimpleValueType(); 8254 MVT XLenVT = Subtarget.getXLenVT(); 8255 8256 // If this isn't a scalar, or its type is XLenVT we're done. 8257 if (!OpVT.isScalarInteger() || OpVT == XLenVT) 8258 return SDValue(); 8259 8260 // Simplest case is that the operand needs to be promoted to XLenVT. 8261 if (OpVT.bitsLT(XLenVT)) { 8262 // If the operand is a constant, sign extend to increase our chances 8263 // of being able to use a .vi instruction. ANY_EXTEND would become a 8264 // a zero extend and the simm5 check in isel would fail. 8265 // FIXME: Should we ignore the upper bits in isel instead? 8266 unsigned ExtOpc = 8267 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 8268 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); 8269 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 8270 } 8271 8272 // Use the previous operand to get the vXi64 VT. The result might be a mask 8273 // VT for compares. Using the previous operand assumes that the previous 8274 // operand will never have a smaller element size than a scalar operand and 8275 // that a widening operation never uses SEW=64. 8276 // NOTE: If this fails the below assert, we can probably just find the 8277 // element count from any operand or result and use it to construct the VT. 8278 assert(II->ScalarOperand > 0 && "Unexpected splat operand!"); 8279 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType(); 8280 8281 // The more complex case is when the scalar is larger than XLenVT. 8282 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && 8283 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); 8284 8285 // If this is a sign-extended 32-bit value, we can truncate it and rely on the 8286 // instruction to sign-extend since SEW>XLEN. 8287 if (DAG.ComputeNumSignBits(ScalarOp) > 32) { 8288 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp); 8289 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 8290 } 8291 8292 switch (IntNo) { 8293 case Intrinsic::riscv_vslide1up: 8294 case Intrinsic::riscv_vslide1down: 8295 case Intrinsic::riscv_vslide1up_mask: 8296 case Intrinsic::riscv_vslide1down_mask: { 8297 // We need to special case these when the scalar is larger than XLen. 8298 unsigned NumOps = Op.getNumOperands(); 8299 bool IsMasked = NumOps == 7; 8300 8301 // Convert the vector source to the equivalent nxvXi32 vector. 8302 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); 8303 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]); 8304 SDValue ScalarLo, ScalarHi; 8305 std::tie(ScalarLo, ScalarHi) = 8306 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32); 8307 8308 // Double the VL since we halved SEW. 8309 SDValue AVL = getVLOperand(Op); 8310 SDValue I32VL; 8311 8312 // Optimize for constant AVL 8313 if (isa<ConstantSDNode>(AVL)) { 8314 const auto [MinVLMAX, MaxVLMAX] = 8315 RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget); 8316 8317 uint64_t AVLInt = AVL->getAsZExtVal(); 8318 if (AVLInt <= MinVLMAX) { 8319 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT); 8320 } else if (AVLInt >= 2 * MaxVLMAX) { 8321 // Just set vl to VLMAX in this situation 8322 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT); 8323 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); 8324 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits()); 8325 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); 8326 SDValue SETVLMAX = DAG.getTargetConstant( 8327 Intrinsic::riscv_vsetvlimax, DL, MVT::i32); 8328 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW, 8329 LMUL); 8330 } else { 8331 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl 8332 // is related to the hardware implementation. 8333 // So let the following code handle 8334 } 8335 } 8336 if (!I32VL) { 8337 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT); 8338 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); 8339 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits()); 8340 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); 8341 SDValue SETVL = 8342 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32); 8343 // Using vsetvli instruction to get actually used length which related to 8344 // the hardware implementation 8345 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL, 8346 SEW, LMUL); 8347 I32VL = 8348 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); 8349 } 8350 8351 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG); 8352 8353 // Shift the two scalar parts in using SEW=32 slide1up/slide1down 8354 // instructions. 8355 SDValue Passthru; 8356 if (IsMasked) 8357 Passthru = DAG.getUNDEF(I32VT); 8358 else 8359 Passthru = DAG.getBitcast(I32VT, Operands[1]); 8360 8361 if (IntNo == Intrinsic::riscv_vslide1up || 8362 IntNo == Intrinsic::riscv_vslide1up_mask) { 8363 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, 8364 ScalarHi, I32Mask, I32VL); 8365 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, 8366 ScalarLo, I32Mask, I32VL); 8367 } else { 8368 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, 8369 ScalarLo, I32Mask, I32VL); 8370 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, 8371 ScalarHi, I32Mask, I32VL); 8372 } 8373 8374 // Convert back to nxvXi64. 8375 Vec = DAG.getBitcast(VT, Vec); 8376 8377 if (!IsMasked) 8378 return Vec; 8379 // Apply mask after the operation. 8380 SDValue Mask = Operands[NumOps - 3]; 8381 SDValue MaskedOff = Operands[1]; 8382 // Assume Policy operand is the last operand. 8383 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal(); 8384 // We don't need to select maskedoff if it's undef. 8385 if (MaskedOff.isUndef()) 8386 return Vec; 8387 // TAMU 8388 if (Policy == RISCVII::TAIL_AGNOSTIC) 8389 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff, 8390 DAG.getUNDEF(VT), AVL); 8391 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. 8392 // It's fine because vmerge does not care mask policy. 8393 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff, 8394 MaskedOff, AVL); 8395 } 8396 } 8397 8398 // We need to convert the scalar to a splat vector. 8399 SDValue VL = getVLOperand(Op); 8400 assert(VL.getValueType() == XLenVT); 8401 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG); 8402 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 8403 } 8404 8405 // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support 8406 // scalable vector llvm.get.vector.length for now. 8407 // 8408 // We need to convert from a scalable VF to a vsetvli with VLMax equal to 8409 // (vscale * VF). The vscale and VF are independent of element width. We use 8410 // SEW=8 for the vsetvli because it is the only element width that supports all 8411 // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is 8412 // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The 8413 // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different 8414 // SEW and LMUL are better for the surrounding vector instructions. 8415 static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, 8416 const RISCVSubtarget &Subtarget) { 8417 MVT XLenVT = Subtarget.getXLenVT(); 8418 8419 // The smallest LMUL is only valid for the smallest element width. 8420 const unsigned ElementWidth = 8; 8421 8422 // Determine the VF that corresponds to LMUL 1 for ElementWidth. 8423 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth; 8424 // We don't support VF==1 with ELEN==32. 8425 unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELen(); 8426 8427 unsigned VF = N->getConstantOperandVal(2); 8428 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) && 8429 "Unexpected VF"); 8430 (void)MinVF; 8431 8432 bool Fractional = VF < LMul1VF; 8433 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF; 8434 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional); 8435 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth); 8436 8437 SDLoc DL(N); 8438 8439 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT); 8440 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT); 8441 8442 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1)); 8443 8444 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT); 8445 SDValue Res = 8446 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul); 8447 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res); 8448 } 8449 8450 static void getVCIXOperands(SDValue &Op, SelectionDAG &DAG, 8451 SmallVector<SDValue> &Ops) { 8452 SDLoc DL(Op); 8453 8454 const RISCVSubtarget &Subtarget = 8455 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); 8456 for (const SDValue &V : Op->op_values()) { 8457 EVT ValType = V.getValueType(); 8458 if (ValType.isScalableVector() && ValType.isFloatingPoint()) { 8459 MVT InterimIVT = 8460 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()), 8461 ValType.getVectorElementCount()); 8462 Ops.push_back(DAG.getBitcast(InterimIVT, V)); 8463 } else if (ValType.isFixedLengthVector()) { 8464 MVT OpContainerVT = getContainerForFixedLengthVector( 8465 DAG, V.getSimpleValueType(), Subtarget); 8466 Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget)); 8467 } else 8468 Ops.push_back(V); 8469 } 8470 } 8471 8472 // LMUL * VLEN should be greater than or equal to EGS * SEW 8473 static inline bool isValidEGW(int EGS, EVT VT, 8474 const RISCVSubtarget &Subtarget) { 8475 return (Subtarget.getRealMinVLen() * 8476 VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >= 8477 EGS * VT.getScalarSizeInBits(); 8478 } 8479 8480 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 8481 SelectionDAG &DAG) const { 8482 unsigned IntNo = Op.getConstantOperandVal(0); 8483 SDLoc DL(Op); 8484 MVT XLenVT = Subtarget.getXLenVT(); 8485 8486 switch (IntNo) { 8487 default: 8488 break; // Don't custom lower most intrinsics. 8489 case Intrinsic::thread_pointer: { 8490 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8491 return DAG.getRegister(RISCV::X4, PtrVT); 8492 } 8493 case Intrinsic::riscv_orc_b: 8494 case Intrinsic::riscv_brev8: 8495 case Intrinsic::riscv_sha256sig0: 8496 case Intrinsic::riscv_sha256sig1: 8497 case Intrinsic::riscv_sha256sum0: 8498 case Intrinsic::riscv_sha256sum1: 8499 case Intrinsic::riscv_sm3p0: 8500 case Intrinsic::riscv_sm3p1: { 8501 unsigned Opc; 8502 switch (IntNo) { 8503 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; 8504 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; 8505 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; 8506 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; 8507 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; 8508 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; 8509 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; 8510 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; 8511 } 8512 8513 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 8514 SDValue NewOp = 8515 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 8516 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp); 8517 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 8518 } 8519 8520 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); 8521 } 8522 case Intrinsic::riscv_sm4ks: 8523 case Intrinsic::riscv_sm4ed: { 8524 unsigned Opc = 8525 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; 8526 8527 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 8528 SDValue NewOp0 = 8529 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 8530 SDValue NewOp1 = 8531 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); 8532 SDValue Res = 8533 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3)); 8534 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 8535 } 8536 8537 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2), 8538 Op.getOperand(3)); 8539 } 8540 case Intrinsic::riscv_zip: 8541 case Intrinsic::riscv_unzip: { 8542 unsigned Opc = 8543 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP; 8544 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); 8545 } 8546 case Intrinsic::riscv_clmul: 8547 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 8548 SDValue NewOp0 = 8549 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 8550 SDValue NewOp1 = 8551 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); 8552 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1); 8553 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 8554 } 8555 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1), 8556 Op.getOperand(2)); 8557 case Intrinsic::riscv_clmulh: 8558 case Intrinsic::riscv_clmulr: { 8559 unsigned Opc = 8560 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR; 8561 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 8562 SDValue NewOp0 = 8563 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 8564 SDValue NewOp1 = 8565 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); 8566 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, 8567 DAG.getConstant(32, DL, MVT::i64)); 8568 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1, 8569 DAG.getConstant(32, DL, MVT::i64)); 8570 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1); 8571 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res, 8572 DAG.getConstant(32, DL, MVT::i64)); 8573 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 8574 } 8575 8576 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); 8577 } 8578 case Intrinsic::experimental_get_vector_length: 8579 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget); 8580 case Intrinsic::riscv_vmv_x_s: { 8581 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1)); 8582 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res); 8583 } 8584 case Intrinsic::riscv_vfmv_f_s: 8585 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), 8586 Op.getOperand(1), DAG.getConstant(0, DL, XLenVT)); 8587 case Intrinsic::riscv_vmv_v_x: 8588 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2), 8589 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG, 8590 Subtarget); 8591 case Intrinsic::riscv_vfmv_v_f: 8592 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 8593 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 8594 case Intrinsic::riscv_vmv_s_x: { 8595 SDValue Scalar = Op.getOperand(2); 8596 8597 if (Scalar.getValueType().bitsLE(XLenVT)) { 8598 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar); 8599 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(), 8600 Op.getOperand(1), Scalar, Op.getOperand(3)); 8601 } 8602 8603 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 8604 8605 // This is an i64 value that lives in two scalar registers. We have to 8606 // insert this in a convoluted way. First we build vXi64 splat containing 8607 // the two values that we assemble using some bit math. Next we'll use 8608 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask 8609 // to merge element 0 from our splat into the source vector. 8610 // FIXME: This is probably not the best way to do this, but it is 8611 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting 8612 // point. 8613 // sw lo, (a0) 8614 // sw hi, 4(a0) 8615 // vlse vX, (a0) 8616 // 8617 // vid.v vVid 8618 // vmseq.vx mMask, vVid, 0 8619 // vmerge.vvm vDest, vSrc, vVal, mMask 8620 MVT VT = Op.getSimpleValueType(); 8621 SDValue Vec = Op.getOperand(1); 8622 SDValue VL = getVLOperand(Op); 8623 8624 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG); 8625 if (Op.getOperand(1).isUndef()) 8626 return SplattedVal; 8627 SDValue SplattedIdx = 8628 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 8629 DAG.getConstant(0, DL, MVT::i32), VL); 8630 8631 MVT MaskVT = getMaskTypeFor(VT); 8632 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG); 8633 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 8634 SDValue SelectCond = 8635 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, 8636 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ), 8637 DAG.getUNDEF(MaskVT), Mask, VL}); 8638 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal, 8639 Vec, DAG.getUNDEF(VT), VL); 8640 } 8641 case Intrinsic::riscv_vfmv_s_f: 8642 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(), 8643 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 8644 // EGS * EEW >= 128 bits 8645 case Intrinsic::riscv_vaesdf_vv: 8646 case Intrinsic::riscv_vaesdf_vs: 8647 case Intrinsic::riscv_vaesdm_vv: 8648 case Intrinsic::riscv_vaesdm_vs: 8649 case Intrinsic::riscv_vaesef_vv: 8650 case Intrinsic::riscv_vaesef_vs: 8651 case Intrinsic::riscv_vaesem_vv: 8652 case Intrinsic::riscv_vaesem_vs: 8653 case Intrinsic::riscv_vaeskf1: 8654 case Intrinsic::riscv_vaeskf2: 8655 case Intrinsic::riscv_vaesz_vs: 8656 case Intrinsic::riscv_vsm4k: 8657 case Intrinsic::riscv_vsm4r_vv: 8658 case Intrinsic::riscv_vsm4r_vs: { 8659 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) || 8660 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) || 8661 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget)) 8662 report_fatal_error("EGW should be greater than or equal to 4 * SEW."); 8663 return Op; 8664 } 8665 // EGS * EEW >= 256 bits 8666 case Intrinsic::riscv_vsm3c: 8667 case Intrinsic::riscv_vsm3me: { 8668 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) || 8669 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget)) 8670 report_fatal_error("EGW should be greater than or equal to 8 * SEW."); 8671 return Op; 8672 } 8673 // zvknha(SEW=32)/zvknhb(SEW=[32|64]) 8674 case Intrinsic::riscv_vsha2ch: 8675 case Intrinsic::riscv_vsha2cl: 8676 case Intrinsic::riscv_vsha2ms: { 8677 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 && 8678 !Subtarget.hasStdExtZvknhb()) 8679 report_fatal_error("SEW=64 needs Zvknhb to be enabled."); 8680 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) || 8681 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) || 8682 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget)) 8683 report_fatal_error("EGW should be greater than or equal to 4 * SEW."); 8684 return Op; 8685 } 8686 case Intrinsic::riscv_sf_vc_v_x: 8687 case Intrinsic::riscv_sf_vc_v_i: 8688 case Intrinsic::riscv_sf_vc_v_xv: 8689 case Intrinsic::riscv_sf_vc_v_iv: 8690 case Intrinsic::riscv_sf_vc_v_vv: 8691 case Intrinsic::riscv_sf_vc_v_fv: 8692 case Intrinsic::riscv_sf_vc_v_xvv: 8693 case Intrinsic::riscv_sf_vc_v_ivv: 8694 case Intrinsic::riscv_sf_vc_v_vvv: 8695 case Intrinsic::riscv_sf_vc_v_fvv: 8696 case Intrinsic::riscv_sf_vc_v_xvw: 8697 case Intrinsic::riscv_sf_vc_v_ivw: 8698 case Intrinsic::riscv_sf_vc_v_vvw: 8699 case Intrinsic::riscv_sf_vc_v_fvw: { 8700 MVT VT = Op.getSimpleValueType(); 8701 8702 SmallVector<SDValue> Ops; 8703 getVCIXOperands(Op, DAG, Ops); 8704 8705 MVT RetVT = VT; 8706 if (VT.isFixedLengthVector()) 8707 RetVT = getContainerForFixedLengthVector(VT); 8708 else if (VT.isFloatingPoint()) 8709 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()), 8710 VT.getVectorElementCount()); 8711 8712 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Ops); 8713 8714 if (VT.isFixedLengthVector()) 8715 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget); 8716 else if (VT.isFloatingPoint()) 8717 NewNode = DAG.getBitcast(VT, NewNode); 8718 8719 if (Op == NewNode) 8720 break; 8721 8722 return NewNode; 8723 } 8724 } 8725 8726 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 8727 } 8728 8729 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 8730 SelectionDAG &DAG) const { 8731 unsigned IntNo = Op.getConstantOperandVal(1); 8732 switch (IntNo) { 8733 default: 8734 break; 8735 case Intrinsic::riscv_masked_strided_load: { 8736 SDLoc DL(Op); 8737 MVT XLenVT = Subtarget.getXLenVT(); 8738 8739 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 8740 // the selection of the masked intrinsics doesn't do this for us. 8741 SDValue Mask = Op.getOperand(5); 8742 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 8743 8744 MVT VT = Op->getSimpleValueType(0); 8745 MVT ContainerVT = VT; 8746 if (VT.isFixedLengthVector()) 8747 ContainerVT = getContainerForFixedLengthVector(VT); 8748 8749 SDValue PassThru = Op.getOperand(2); 8750 if (!IsUnmasked) { 8751 MVT MaskVT = getMaskTypeFor(ContainerVT); 8752 if (VT.isFixedLengthVector()) { 8753 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 8754 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 8755 } 8756 } 8757 8758 auto *Load = cast<MemIntrinsicSDNode>(Op); 8759 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 8760 SDValue Ptr = Op.getOperand(3); 8761 SDValue Stride = Op.getOperand(4); 8762 SDValue Result, Chain; 8763 8764 // TODO: We restrict this to unmasked loads currently in consideration of 8765 // the complexity of hanlding all falses masks. 8766 if (IsUnmasked && isNullConstant(Stride)) { 8767 MVT ScalarVT = ContainerVT.getVectorElementType(); 8768 SDValue ScalarLoad = 8769 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr, 8770 ScalarVT, Load->getMemOperand()); 8771 Chain = ScalarLoad.getValue(1); 8772 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG, 8773 Subtarget); 8774 } else { 8775 SDValue IntID = DAG.getTargetConstant( 8776 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL, 8777 XLenVT); 8778 8779 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID}; 8780 if (IsUnmasked) 8781 Ops.push_back(DAG.getUNDEF(ContainerVT)); 8782 else 8783 Ops.push_back(PassThru); 8784 Ops.push_back(Ptr); 8785 Ops.push_back(Stride); 8786 if (!IsUnmasked) 8787 Ops.push_back(Mask); 8788 Ops.push_back(VL); 8789 if (!IsUnmasked) { 8790 SDValue Policy = 8791 DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 8792 Ops.push_back(Policy); 8793 } 8794 8795 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 8796 Result = 8797 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 8798 Load->getMemoryVT(), Load->getMemOperand()); 8799 Chain = Result.getValue(1); 8800 } 8801 if (VT.isFixedLengthVector()) 8802 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 8803 return DAG.getMergeValues({Result, Chain}, DL); 8804 } 8805 case Intrinsic::riscv_seg2_load: 8806 case Intrinsic::riscv_seg3_load: 8807 case Intrinsic::riscv_seg4_load: 8808 case Intrinsic::riscv_seg5_load: 8809 case Intrinsic::riscv_seg6_load: 8810 case Intrinsic::riscv_seg7_load: 8811 case Intrinsic::riscv_seg8_load: { 8812 SDLoc DL(Op); 8813 static const Intrinsic::ID VlsegInts[7] = { 8814 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, 8815 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, 8816 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, 8817 Intrinsic::riscv_vlseg8}; 8818 unsigned NF = Op->getNumValues() - 1; 8819 assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); 8820 MVT XLenVT = Subtarget.getXLenVT(); 8821 MVT VT = Op->getSimpleValueType(0); 8822 MVT ContainerVT = getContainerForFixedLengthVector(VT); 8823 8824 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, 8825 Subtarget); 8826 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT); 8827 auto *Load = cast<MemIntrinsicSDNode>(Op); 8828 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT); 8829 ContainerVTs.push_back(MVT::Other); 8830 SDVTList VTs = DAG.getVTList(ContainerVTs); 8831 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID}; 8832 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT)); 8833 Ops.push_back(Op.getOperand(2)); 8834 Ops.push_back(VL); 8835 SDValue Result = 8836 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 8837 Load->getMemoryVT(), Load->getMemOperand()); 8838 SmallVector<SDValue, 9> Results; 8839 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) 8840 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx), 8841 DAG, Subtarget)); 8842 Results.push_back(Result.getValue(NF)); 8843 return DAG.getMergeValues(Results, DL); 8844 } 8845 case Intrinsic::riscv_sf_vc_v_x_se: 8846 case Intrinsic::riscv_sf_vc_v_i_se: 8847 case Intrinsic::riscv_sf_vc_v_xv_se: 8848 case Intrinsic::riscv_sf_vc_v_iv_se: 8849 case Intrinsic::riscv_sf_vc_v_vv_se: 8850 case Intrinsic::riscv_sf_vc_v_fv_se: 8851 case Intrinsic::riscv_sf_vc_v_xvv_se: 8852 case Intrinsic::riscv_sf_vc_v_ivv_se: 8853 case Intrinsic::riscv_sf_vc_v_vvv_se: 8854 case Intrinsic::riscv_sf_vc_v_fvv_se: 8855 case Intrinsic::riscv_sf_vc_v_xvw_se: 8856 case Intrinsic::riscv_sf_vc_v_ivw_se: 8857 case Intrinsic::riscv_sf_vc_v_vvw_se: 8858 case Intrinsic::riscv_sf_vc_v_fvw_se: { 8859 MVT VT = Op.getSimpleValueType(); 8860 SDLoc DL(Op); 8861 SmallVector<SDValue> Ops; 8862 getVCIXOperands(Op, DAG, Ops); 8863 8864 MVT RetVT = VT; 8865 if (VT.isFixedLengthVector()) 8866 RetVT = getContainerForFixedLengthVector(VT); 8867 else if (VT.isFloatingPoint()) 8868 RetVT = MVT::getVectorVT(MVT::getIntegerVT(RetVT.getScalarSizeInBits()), 8869 RetVT.getVectorElementCount()); 8870 8871 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other}); 8872 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops); 8873 8874 if (VT.isFixedLengthVector()) { 8875 SDValue FixedVector = 8876 convertFromScalableVector(VT, NewNode, DAG, Subtarget); 8877 NewNode = DAG.getMergeValues({FixedVector, NewNode.getValue(1)}, DL); 8878 } else if (VT.isFloatingPoint()) { 8879 SDValue BitCast = DAG.getBitcast(VT, NewNode.getValue(0)); 8880 NewNode = DAG.getMergeValues({BitCast, NewNode.getValue(1)}, DL); 8881 } 8882 8883 if (Op == NewNode) 8884 break; 8885 8886 return NewNode; 8887 } 8888 } 8889 8890 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 8891 } 8892 8893 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, 8894 SelectionDAG &DAG) const { 8895 unsigned IntNo = Op.getConstantOperandVal(1); 8896 switch (IntNo) { 8897 default: 8898 break; 8899 case Intrinsic::riscv_masked_strided_store: { 8900 SDLoc DL(Op); 8901 MVT XLenVT = Subtarget.getXLenVT(); 8902 8903 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 8904 // the selection of the masked intrinsics doesn't do this for us. 8905 SDValue Mask = Op.getOperand(5); 8906 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 8907 8908 SDValue Val = Op.getOperand(2); 8909 MVT VT = Val.getSimpleValueType(); 8910 MVT ContainerVT = VT; 8911 if (VT.isFixedLengthVector()) { 8912 ContainerVT = getContainerForFixedLengthVector(VT); 8913 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 8914 } 8915 if (!IsUnmasked) { 8916 MVT MaskVT = getMaskTypeFor(ContainerVT); 8917 if (VT.isFixedLengthVector()) 8918 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 8919 } 8920 8921 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 8922 8923 SDValue IntID = DAG.getTargetConstant( 8924 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL, 8925 XLenVT); 8926 8927 auto *Store = cast<MemIntrinsicSDNode>(Op); 8928 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID}; 8929 Ops.push_back(Val); 8930 Ops.push_back(Op.getOperand(3)); // Ptr 8931 Ops.push_back(Op.getOperand(4)); // Stride 8932 if (!IsUnmasked) 8933 Ops.push_back(Mask); 8934 Ops.push_back(VL); 8935 8936 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(), 8937 Ops, Store->getMemoryVT(), 8938 Store->getMemOperand()); 8939 } 8940 case Intrinsic::riscv_seg2_store: 8941 case Intrinsic::riscv_seg3_store: 8942 case Intrinsic::riscv_seg4_store: 8943 case Intrinsic::riscv_seg5_store: 8944 case Intrinsic::riscv_seg6_store: 8945 case Intrinsic::riscv_seg7_store: 8946 case Intrinsic::riscv_seg8_store: { 8947 SDLoc DL(Op); 8948 static const Intrinsic::ID VssegInts[] = { 8949 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, 8950 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, 8951 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, 8952 Intrinsic::riscv_vsseg8}; 8953 // Operands are (chain, int_id, vec*, ptr, vl) 8954 unsigned NF = Op->getNumOperands() - 4; 8955 assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); 8956 MVT XLenVT = Subtarget.getXLenVT(); 8957 MVT VT = Op->getOperand(2).getSimpleValueType(); 8958 MVT ContainerVT = getContainerForFixedLengthVector(VT); 8959 8960 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, 8961 Subtarget); 8962 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT); 8963 SDValue Ptr = Op->getOperand(NF + 2); 8964 8965 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op); 8966 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID}; 8967 for (unsigned i = 0; i < NF; i++) 8968 Ops.push_back(convertToScalableVector( 8969 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget)); 8970 Ops.append({Ptr, VL}); 8971 8972 return DAG.getMemIntrinsicNode( 8973 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops, 8974 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand()); 8975 } 8976 case Intrinsic::riscv_sf_vc_x_se_e8mf8: 8977 case Intrinsic::riscv_sf_vc_x_se_e8mf4: 8978 case Intrinsic::riscv_sf_vc_x_se_e8mf2: 8979 case Intrinsic::riscv_sf_vc_x_se_e8m1: 8980 case Intrinsic::riscv_sf_vc_x_se_e8m2: 8981 case Intrinsic::riscv_sf_vc_x_se_e8m4: 8982 case Intrinsic::riscv_sf_vc_x_se_e8m8: 8983 case Intrinsic::riscv_sf_vc_x_se_e16mf4: 8984 case Intrinsic::riscv_sf_vc_x_se_e16mf2: 8985 case Intrinsic::riscv_sf_vc_x_se_e16m1: 8986 case Intrinsic::riscv_sf_vc_x_se_e16m2: 8987 case Intrinsic::riscv_sf_vc_x_se_e16m4: 8988 case Intrinsic::riscv_sf_vc_x_se_e16m8: 8989 case Intrinsic::riscv_sf_vc_x_se_e32mf2: 8990 case Intrinsic::riscv_sf_vc_x_se_e32m1: 8991 case Intrinsic::riscv_sf_vc_x_se_e32m2: 8992 case Intrinsic::riscv_sf_vc_x_se_e32m4: 8993 case Intrinsic::riscv_sf_vc_x_se_e32m8: 8994 case Intrinsic::riscv_sf_vc_x_se_e64m1: 8995 case Intrinsic::riscv_sf_vc_x_se_e64m2: 8996 case Intrinsic::riscv_sf_vc_x_se_e64m4: 8997 case Intrinsic::riscv_sf_vc_x_se_e64m8: 8998 case Intrinsic::riscv_sf_vc_i_se_e8mf8: 8999 case Intrinsic::riscv_sf_vc_i_se_e8mf4: 9000 case Intrinsic::riscv_sf_vc_i_se_e8mf2: 9001 case Intrinsic::riscv_sf_vc_i_se_e8m1: 9002 case Intrinsic::riscv_sf_vc_i_se_e8m2: 9003 case Intrinsic::riscv_sf_vc_i_se_e8m4: 9004 case Intrinsic::riscv_sf_vc_i_se_e8m8: 9005 case Intrinsic::riscv_sf_vc_i_se_e16mf4: 9006 case Intrinsic::riscv_sf_vc_i_se_e16mf2: 9007 case Intrinsic::riscv_sf_vc_i_se_e16m1: 9008 case Intrinsic::riscv_sf_vc_i_se_e16m2: 9009 case Intrinsic::riscv_sf_vc_i_se_e16m4: 9010 case Intrinsic::riscv_sf_vc_i_se_e16m8: 9011 case Intrinsic::riscv_sf_vc_i_se_e32mf2: 9012 case Intrinsic::riscv_sf_vc_i_se_e32m1: 9013 case Intrinsic::riscv_sf_vc_i_se_e32m2: 9014 case Intrinsic::riscv_sf_vc_i_se_e32m4: 9015 case Intrinsic::riscv_sf_vc_i_se_e32m8: 9016 case Intrinsic::riscv_sf_vc_i_se_e64m1: 9017 case Intrinsic::riscv_sf_vc_i_se_e64m2: 9018 case Intrinsic::riscv_sf_vc_i_se_e64m4: 9019 case Intrinsic::riscv_sf_vc_i_se_e64m8: 9020 case Intrinsic::riscv_sf_vc_xv_se: 9021 case Intrinsic::riscv_sf_vc_iv_se: 9022 case Intrinsic::riscv_sf_vc_vv_se: 9023 case Intrinsic::riscv_sf_vc_fv_se: 9024 case Intrinsic::riscv_sf_vc_xvv_se: 9025 case Intrinsic::riscv_sf_vc_ivv_se: 9026 case Intrinsic::riscv_sf_vc_vvv_se: 9027 case Intrinsic::riscv_sf_vc_fvv_se: 9028 case Intrinsic::riscv_sf_vc_xvw_se: 9029 case Intrinsic::riscv_sf_vc_ivw_se: 9030 case Intrinsic::riscv_sf_vc_vvw_se: 9031 case Intrinsic::riscv_sf_vc_fvw_se: { 9032 SmallVector<SDValue> Ops; 9033 getVCIXOperands(Op, DAG, Ops); 9034 9035 SDValue NewNode = 9036 DAG.getNode(ISD::INTRINSIC_VOID, SDLoc(Op), Op->getVTList(), Ops); 9037 9038 if (Op == NewNode) 9039 break; 9040 9041 return NewNode; 9042 } 9043 } 9044 9045 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 9046 } 9047 9048 static unsigned getRVVReductionOp(unsigned ISDOpcode) { 9049 switch (ISDOpcode) { 9050 default: 9051 llvm_unreachable("Unhandled reduction"); 9052 case ISD::VP_REDUCE_ADD: 9053 case ISD::VECREDUCE_ADD: 9054 return RISCVISD::VECREDUCE_ADD_VL; 9055 case ISD::VP_REDUCE_UMAX: 9056 case ISD::VECREDUCE_UMAX: 9057 return RISCVISD::VECREDUCE_UMAX_VL; 9058 case ISD::VP_REDUCE_SMAX: 9059 case ISD::VECREDUCE_SMAX: 9060 return RISCVISD::VECREDUCE_SMAX_VL; 9061 case ISD::VP_REDUCE_UMIN: 9062 case ISD::VECREDUCE_UMIN: 9063 return RISCVISD::VECREDUCE_UMIN_VL; 9064 case ISD::VP_REDUCE_SMIN: 9065 case ISD::VECREDUCE_SMIN: 9066 return RISCVISD::VECREDUCE_SMIN_VL; 9067 case ISD::VP_REDUCE_AND: 9068 case ISD::VECREDUCE_AND: 9069 return RISCVISD::VECREDUCE_AND_VL; 9070 case ISD::VP_REDUCE_OR: 9071 case ISD::VECREDUCE_OR: 9072 return RISCVISD::VECREDUCE_OR_VL; 9073 case ISD::VP_REDUCE_XOR: 9074 case ISD::VECREDUCE_XOR: 9075 return RISCVISD::VECREDUCE_XOR_VL; 9076 case ISD::VP_REDUCE_FADD: 9077 return RISCVISD::VECREDUCE_FADD_VL; 9078 case ISD::VP_REDUCE_SEQ_FADD: 9079 return RISCVISD::VECREDUCE_SEQ_FADD_VL; 9080 case ISD::VP_REDUCE_FMAX: 9081 return RISCVISD::VECREDUCE_FMAX_VL; 9082 case ISD::VP_REDUCE_FMIN: 9083 return RISCVISD::VECREDUCE_FMIN_VL; 9084 } 9085 9086 } 9087 9088 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, 9089 SelectionDAG &DAG, 9090 bool IsVP) const { 9091 SDLoc DL(Op); 9092 SDValue Vec = Op.getOperand(IsVP ? 1 : 0); 9093 MVT VecVT = Vec.getSimpleValueType(); 9094 assert((Op.getOpcode() == ISD::VECREDUCE_AND || 9095 Op.getOpcode() == ISD::VECREDUCE_OR || 9096 Op.getOpcode() == ISD::VECREDUCE_XOR || 9097 Op.getOpcode() == ISD::VP_REDUCE_AND || 9098 Op.getOpcode() == ISD::VP_REDUCE_OR || 9099 Op.getOpcode() == ISD::VP_REDUCE_XOR) && 9100 "Unexpected reduction lowering"); 9101 9102 MVT XLenVT = Subtarget.getXLenVT(); 9103 9104 MVT ContainerVT = VecVT; 9105 if (VecVT.isFixedLengthVector()) { 9106 ContainerVT = getContainerForFixedLengthVector(VecVT); 9107 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 9108 } 9109 9110 SDValue Mask, VL; 9111 if (IsVP) { 9112 Mask = Op.getOperand(2); 9113 VL = Op.getOperand(3); 9114 } else { 9115 std::tie(Mask, VL) = 9116 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 9117 } 9118 9119 unsigned BaseOpc; 9120 ISD::CondCode CC; 9121 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 9122 9123 switch (Op.getOpcode()) { 9124 default: 9125 llvm_unreachable("Unhandled reduction"); 9126 case ISD::VECREDUCE_AND: 9127 case ISD::VP_REDUCE_AND: { 9128 // vcpop ~x == 0 9129 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 9130 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL); 9131 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 9132 CC = ISD::SETEQ; 9133 BaseOpc = ISD::AND; 9134 break; 9135 } 9136 case ISD::VECREDUCE_OR: 9137 case ISD::VP_REDUCE_OR: 9138 // vcpop x != 0 9139 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 9140 CC = ISD::SETNE; 9141 BaseOpc = ISD::OR; 9142 break; 9143 case ISD::VECREDUCE_XOR: 9144 case ISD::VP_REDUCE_XOR: { 9145 // ((vcpop x) & 1) != 0 9146 SDValue One = DAG.getConstant(1, DL, XLenVT); 9147 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 9148 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One); 9149 CC = ISD::SETNE; 9150 BaseOpc = ISD::XOR; 9151 break; 9152 } 9153 } 9154 9155 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC); 9156 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC); 9157 9158 if (!IsVP) 9159 return SetCC; 9160 9161 // Now include the start value in the operation. 9162 // Note that we must return the start value when no elements are operated 9163 // upon. The vcpop instructions we've emitted in each case above will return 9164 // 0 for an inactive vector, and so we've already received the neutral value: 9165 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we 9166 // can simply include the start value. 9167 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0)); 9168 } 9169 9170 static bool isNonZeroAVL(SDValue AVL) { 9171 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL); 9172 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL); 9173 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) || 9174 (ImmAVL && ImmAVL->getZExtValue() >= 1); 9175 } 9176 9177 /// Helper to lower a reduction sequence of the form: 9178 /// scalar = reduce_op vec, scalar_start 9179 static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, 9180 SDValue StartValue, SDValue Vec, SDValue Mask, 9181 SDValue VL, const SDLoc &DL, SelectionDAG &DAG, 9182 const RISCVSubtarget &Subtarget) { 9183 const MVT VecVT = Vec.getSimpleValueType(); 9184 const MVT M1VT = getLMUL1VT(VecVT); 9185 const MVT XLenVT = Subtarget.getXLenVT(); 9186 const bool NonZeroAVL = isNonZeroAVL(VL); 9187 9188 // The reduction needs an LMUL1 input; do the splat at either LMUL1 9189 // or the original VT if fractional. 9190 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT; 9191 // We reuse the VL of the reduction to reduce vsetvli toggles if we can 9192 // prove it is non-zero. For the AVL=0 case, we need the scalar to 9193 // be the result of the reduction operation. 9194 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT); 9195 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, 9196 DAG, Subtarget); 9197 if (M1VT != InnerVT) 9198 InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, 9199 DAG.getUNDEF(M1VT), 9200 InitialValue, DAG.getConstant(0, DL, XLenVT)); 9201 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue; 9202 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 9203 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy}; 9204 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops); 9205 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, 9206 DAG.getConstant(0, DL, XLenVT)); 9207 } 9208 9209 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 9210 SelectionDAG &DAG) const { 9211 SDLoc DL(Op); 9212 SDValue Vec = Op.getOperand(0); 9213 EVT VecEVT = Vec.getValueType(); 9214 9215 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); 9216 9217 // Due to ordering in legalize types we may have a vector type that needs to 9218 // be split. Do that manually so we can get down to a legal type. 9219 while (getTypeAction(*DAG.getContext(), VecEVT) == 9220 TargetLowering::TypeSplitVector) { 9221 auto [Lo, Hi] = DAG.SplitVector(Vec, DL); 9222 VecEVT = Lo.getValueType(); 9223 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi); 9224 } 9225 9226 // TODO: The type may need to be widened rather than split. Or widened before 9227 // it can be split. 9228 if (!isTypeLegal(VecEVT)) 9229 return SDValue(); 9230 9231 MVT VecVT = VecEVT.getSimpleVT(); 9232 MVT VecEltVT = VecVT.getVectorElementType(); 9233 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 9234 9235 MVT ContainerVT = VecVT; 9236 if (VecVT.isFixedLengthVector()) { 9237 ContainerVT = getContainerForFixedLengthVector(VecVT); 9238 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 9239 } 9240 9241 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 9242 9243 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); 9244 switch (BaseOpc) { 9245 case ISD::AND: 9246 case ISD::OR: 9247 case ISD::UMAX: 9248 case ISD::UMIN: 9249 case ISD::SMAX: 9250 case ISD::SMIN: 9251 MVT XLenVT = Subtarget.getXLenVT(); 9252 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec, 9253 DAG.getConstant(0, DL, XLenVT)); 9254 } 9255 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec, 9256 Mask, VL, DL, DAG, Subtarget); 9257 } 9258 9259 // Given a reduction op, this function returns the matching reduction opcode, 9260 // the vector SDValue and the scalar SDValue required to lower this to a 9261 // RISCVISD node. 9262 static std::tuple<unsigned, SDValue, SDValue> 9263 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, 9264 const RISCVSubtarget &Subtarget) { 9265 SDLoc DL(Op); 9266 auto Flags = Op->getFlags(); 9267 unsigned Opcode = Op.getOpcode(); 9268 switch (Opcode) { 9269 default: 9270 llvm_unreachable("Unhandled reduction"); 9271 case ISD::VECREDUCE_FADD: { 9272 // Use positive zero if we can. It is cheaper to materialize. 9273 SDValue Zero = 9274 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT); 9275 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero); 9276 } 9277 case ISD::VECREDUCE_SEQ_FADD: 9278 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), 9279 Op.getOperand(0)); 9280 case ISD::VECREDUCE_FMIN: 9281 case ISD::VECREDUCE_FMAX: { 9282 MVT XLenVT = Subtarget.getXLenVT(); 9283 SDValue Front = 9284 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0), 9285 DAG.getConstant(0, DL, XLenVT)); 9286 unsigned RVVOpc = (Opcode == ISD::VECREDUCE_FMIN) 9287 ? RISCVISD::VECREDUCE_FMIN_VL 9288 : RISCVISD::VECREDUCE_FMAX_VL; 9289 return std::make_tuple(RVVOpc, Op.getOperand(0), Front); 9290 } 9291 } 9292 } 9293 9294 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 9295 SelectionDAG &DAG) const { 9296 SDLoc DL(Op); 9297 MVT VecEltVT = Op.getSimpleValueType(); 9298 9299 unsigned RVVOpcode; 9300 SDValue VectorVal, ScalarVal; 9301 std::tie(RVVOpcode, VectorVal, ScalarVal) = 9302 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget); 9303 MVT VecVT = VectorVal.getSimpleValueType(); 9304 9305 MVT ContainerVT = VecVT; 9306 if (VecVT.isFixedLengthVector()) { 9307 ContainerVT = getContainerForFixedLengthVector(VecVT); 9308 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); 9309 } 9310 9311 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 9312 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), ScalarVal, 9313 VectorVal, Mask, VL, DL, DAG, Subtarget); 9314 } 9315 9316 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op, 9317 SelectionDAG &DAG) const { 9318 SDLoc DL(Op); 9319 SDValue Vec = Op.getOperand(1); 9320 EVT VecEVT = Vec.getValueType(); 9321 9322 // TODO: The type may need to be widened rather than split. Or widened before 9323 // it can be split. 9324 if (!isTypeLegal(VecEVT)) 9325 return SDValue(); 9326 9327 MVT VecVT = VecEVT.getSimpleVT(); 9328 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 9329 9330 if (VecVT.isFixedLengthVector()) { 9331 auto ContainerVT = getContainerForFixedLengthVector(VecVT); 9332 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 9333 } 9334 9335 SDValue VL = Op.getOperand(3); 9336 SDValue Mask = Op.getOperand(2); 9337 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0), 9338 Vec, Mask, VL, DL, DAG, Subtarget); 9339 } 9340 9341 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 9342 SelectionDAG &DAG) const { 9343 SDValue Vec = Op.getOperand(0); 9344 SDValue SubVec = Op.getOperand(1); 9345 MVT VecVT = Vec.getSimpleValueType(); 9346 MVT SubVecVT = SubVec.getSimpleValueType(); 9347 9348 SDLoc DL(Op); 9349 MVT XLenVT = Subtarget.getXLenVT(); 9350 unsigned OrigIdx = Op.getConstantOperandVal(2); 9351 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 9352 9353 // We don't have the ability to slide mask vectors up indexed by their i1 9354 // elements; the smallest we can do is i8. Often we are able to bitcast to 9355 // equivalent i8 vectors. Note that when inserting a fixed-length vector 9356 // into a scalable one, we might not necessarily have enough scalable 9357 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 9358 if (SubVecVT.getVectorElementType() == MVT::i1 && 9359 (OrigIdx != 0 || !Vec.isUndef())) { 9360 if (VecVT.getVectorMinNumElements() >= 8 && 9361 SubVecVT.getVectorMinNumElements() >= 8) { 9362 assert(OrigIdx % 8 == 0 && "Invalid index"); 9363 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 9364 SubVecVT.getVectorMinNumElements() % 8 == 0 && 9365 "Unexpected mask vector lowering"); 9366 OrigIdx /= 8; 9367 SubVecVT = 9368 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 9369 SubVecVT.isScalableVector()); 9370 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 9371 VecVT.isScalableVector()); 9372 Vec = DAG.getBitcast(VecVT, Vec); 9373 SubVec = DAG.getBitcast(SubVecVT, SubVec); 9374 } else { 9375 // We can't slide this mask vector up indexed by its i1 elements. 9376 // This poses a problem when we wish to insert a scalable vector which 9377 // can't be re-expressed as a larger type. Just choose the slow path and 9378 // extend to a larger type, then truncate back down. 9379 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 9380 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 9381 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 9382 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); 9383 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, 9384 Op.getOperand(2)); 9385 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); 9386 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); 9387 } 9388 } 9389 9390 // If the subvector vector is a fixed-length type, we cannot use subregister 9391 // manipulation to simplify the codegen; we don't know which register of a 9392 // LMUL group contains the specific subvector as we only know the minimum 9393 // register size. Therefore we must slide the vector group up the full 9394 // amount. 9395 if (SubVecVT.isFixedLengthVector()) { 9396 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector()) 9397 return Op; 9398 MVT ContainerVT = VecVT; 9399 if (VecVT.isFixedLengthVector()) { 9400 ContainerVT = getContainerForFixedLengthVector(VecVT); 9401 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 9402 } 9403 9404 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) { 9405 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 9406 DAG.getUNDEF(ContainerVT), SubVec, 9407 DAG.getConstant(0, DL, XLenVT)); 9408 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); 9409 return DAG.getBitcast(Op.getValueType(), SubVec); 9410 } 9411 9412 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 9413 DAG.getUNDEF(ContainerVT), SubVec, 9414 DAG.getConstant(0, DL, XLenVT)); 9415 SDValue Mask = 9416 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 9417 // Set the vector length to only the number of elements we care about. Note 9418 // that for slideup this includes the offset. 9419 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements(); 9420 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget); 9421 9422 // Use tail agnostic policy if we're inserting over Vec's tail. 9423 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; 9424 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements()) 9425 Policy = RISCVII::TAIL_AGNOSTIC; 9426 9427 // If we're inserting into the lowest elements, use a tail undisturbed 9428 // vmv.v.v. 9429 if (OrigIdx == 0) { 9430 SubVec = 9431 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL); 9432 } else { 9433 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 9434 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec, 9435 SlideupAmt, Mask, VL, Policy); 9436 } 9437 9438 if (VecVT.isFixedLengthVector()) 9439 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); 9440 return DAG.getBitcast(Op.getValueType(), SubVec); 9441 } 9442 9443 unsigned SubRegIdx, RemIdx; 9444 std::tie(SubRegIdx, RemIdx) = 9445 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 9446 VecVT, SubVecVT, OrigIdx, TRI); 9447 9448 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); 9449 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 9450 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 9451 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 9452 9453 // 1. If the Idx has been completely eliminated and this subvector's size is 9454 // a vector register or a multiple thereof, or the surrounding elements are 9455 // undef, then this is a subvector insert which naturally aligns to a vector 9456 // register. These can easily be handled using subregister manipulation. 9457 // 2. If the subvector is smaller than a vector register, then the insertion 9458 // must preserve the undisturbed elements of the register. We do this by 9459 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 9460 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 9461 // subvector within the vector register, and an INSERT_SUBVECTOR of that 9462 // LMUL=1 type back into the larger vector (resolving to another subregister 9463 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 9464 // to avoid allocating a large register group to hold our subvector. 9465 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) 9466 return Op; 9467 9468 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 9469 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 9470 // (in our case undisturbed). This means we can set up a subvector insertion 9471 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 9472 // size of the subvector. 9473 MVT InterSubVT = VecVT; 9474 SDValue AlignedExtract = Vec; 9475 unsigned AlignedIdx = OrigIdx - RemIdx; 9476 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 9477 InterSubVT = getLMUL1VT(VecVT); 9478 // Extract a subvector equal to the nearest full vector register type. This 9479 // should resolve to a EXTRACT_SUBREG instruction. 9480 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 9481 DAG.getConstant(AlignedIdx, DL, XLenVT)); 9482 } 9483 9484 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 9485 DAG.getUNDEF(InterSubVT), SubVec, 9486 DAG.getConstant(0, DL, XLenVT)); 9487 9488 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 9489 9490 VL = computeVLMax(SubVecVT, DL, DAG); 9491 9492 // If we're inserting into the lowest elements, use a tail undisturbed 9493 // vmv.v.v. 9494 if (RemIdx == 0) { 9495 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract, 9496 SubVec, VL); 9497 } else { 9498 SDValue SlideupAmt = 9499 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx)); 9500 9501 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 9502 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 9503 9504 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec, 9505 SlideupAmt, Mask, VL); 9506 } 9507 9508 // If required, insert this subvector back into the correct vector register. 9509 // This should resolve to an INSERT_SUBREG instruction. 9510 if (VecVT.bitsGT(InterSubVT)) 9511 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec, 9512 DAG.getConstant(AlignedIdx, DL, XLenVT)); 9513 9514 // We might have bitcast from a mask type: cast back to the original type if 9515 // required. 9516 return DAG.getBitcast(Op.getSimpleValueType(), SubVec); 9517 } 9518 9519 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 9520 SelectionDAG &DAG) const { 9521 SDValue Vec = Op.getOperand(0); 9522 MVT SubVecVT = Op.getSimpleValueType(); 9523 MVT VecVT = Vec.getSimpleValueType(); 9524 9525 SDLoc DL(Op); 9526 MVT XLenVT = Subtarget.getXLenVT(); 9527 unsigned OrigIdx = Op.getConstantOperandVal(1); 9528 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 9529 9530 // We don't have the ability to slide mask vectors down indexed by their i1 9531 // elements; the smallest we can do is i8. Often we are able to bitcast to 9532 // equivalent i8 vectors. Note that when extracting a fixed-length vector 9533 // from a scalable one, we might not necessarily have enough scalable 9534 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 9535 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { 9536 if (VecVT.getVectorMinNumElements() >= 8 && 9537 SubVecVT.getVectorMinNumElements() >= 8) { 9538 assert(OrigIdx % 8 == 0 && "Invalid index"); 9539 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 9540 SubVecVT.getVectorMinNumElements() % 8 == 0 && 9541 "Unexpected mask vector lowering"); 9542 OrigIdx /= 8; 9543 SubVecVT = 9544 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 9545 SubVecVT.isScalableVector()); 9546 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 9547 VecVT.isScalableVector()); 9548 Vec = DAG.getBitcast(VecVT, Vec); 9549 } else { 9550 // We can't slide this mask vector down, indexed by its i1 elements. 9551 // This poses a problem when we wish to extract a scalable vector which 9552 // can't be re-expressed as a larger type. Just choose the slow path and 9553 // extend to a larger type, then truncate back down. 9554 // TODO: We could probably improve this when extracting certain fixed 9555 // from fixed, where we can extract as i8 and shift the correct element 9556 // right to reach the desired subvector? 9557 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 9558 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 9559 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 9560 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, 9561 Op.getOperand(1)); 9562 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); 9563 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); 9564 } 9565 } 9566 9567 // With an index of 0 this is a cast-like subvector, which can be performed 9568 // with subregister operations. 9569 if (OrigIdx == 0) 9570 return Op; 9571 9572 // If the subvector vector is a fixed-length type, we cannot use subregister 9573 // manipulation to simplify the codegen; we don't know which register of a 9574 // LMUL group contains the specific subvector as we only know the minimum 9575 // register size. Therefore we must slide the vector group down the full 9576 // amount. 9577 if (SubVecVT.isFixedLengthVector()) { 9578 MVT ContainerVT = VecVT; 9579 if (VecVT.isFixedLengthVector()) { 9580 ContainerVT = getContainerForFixedLengthVector(VecVT); 9581 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 9582 } 9583 9584 // Shrink down Vec so we're performing the slidedown on a smaller LMUL. 9585 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1; 9586 if (auto ShrunkVT = 9587 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) { 9588 ContainerVT = *ShrunkVT; 9589 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, 9590 DAG.getVectorIdxConstant(0, DL)); 9591 } 9592 9593 SDValue Mask = 9594 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 9595 // Set the vector length to only the number of elements we care about. This 9596 // avoids sliding down elements we're going to discard straight away. 9597 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG, 9598 Subtarget); 9599 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 9600 SDValue Slidedown = 9601 getVSlidedown(DAG, Subtarget, DL, ContainerVT, 9602 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); 9603 // Now we can use a cast-like subvector extract to get the result. 9604 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 9605 DAG.getConstant(0, DL, XLenVT)); 9606 return DAG.getBitcast(Op.getValueType(), Slidedown); 9607 } 9608 9609 unsigned SubRegIdx, RemIdx; 9610 std::tie(SubRegIdx, RemIdx) = 9611 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 9612 VecVT, SubVecVT, OrigIdx, TRI); 9613 9614 // If the Idx has been completely eliminated then this is a subvector extract 9615 // which naturally aligns to a vector register. These can easily be handled 9616 // using subregister manipulation. 9617 if (RemIdx == 0) 9618 return Op; 9619 9620 // Else SubVecVT is a fractional LMUL and may need to be slid down. 9621 assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT)).second); 9622 9623 // If the vector type is an LMUL-group type, extract a subvector equal to the 9624 // nearest full vector register type. 9625 MVT InterSubVT = VecVT; 9626 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 9627 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and 9628 // we should have successfully decomposed the extract into a subregister. 9629 assert(SubRegIdx != RISCV::NoSubRegister); 9630 InterSubVT = getLMUL1VT(VecVT); 9631 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec); 9632 } 9633 9634 // Slide this vector register down by the desired number of elements in order 9635 // to place the desired subvector starting at element 0. 9636 SDValue SlidedownAmt = 9637 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx)); 9638 9639 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 9640 SDValue Slidedown = 9641 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT), 9642 Vec, SlidedownAmt, Mask, VL); 9643 9644 // Now the vector is in the right position, extract our final subvector. This 9645 // should resolve to a COPY. 9646 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 9647 DAG.getConstant(0, DL, XLenVT)); 9648 9649 // We might have bitcast from a mask type: cast back to the original type if 9650 // required. 9651 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); 9652 } 9653 9654 // Widen a vector's operands to i8, then truncate its results back to the 9655 // original type, typically i1. All operand and result types must be the same. 9656 static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, 9657 SelectionDAG &DAG) { 9658 MVT VT = N.getSimpleValueType(); 9659 MVT WideVT = VT.changeVectorElementType(MVT::i8); 9660 SmallVector<SDValue, 4> WideOps; 9661 for (SDValue Op : N->ops()) { 9662 assert(Op.getSimpleValueType() == VT && 9663 "Operands and result must be same type"); 9664 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op)); 9665 } 9666 9667 unsigned NumVals = N->getNumValues(); 9668 9669 SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>( 9670 NumVals, N.getValueType().changeVectorElementType(MVT::i8))); 9671 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps); 9672 SmallVector<SDValue, 4> TruncVals; 9673 for (unsigned I = 0; I < NumVals; I++) { 9674 TruncVals.push_back( 9675 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I), 9676 DAG.getConstant(0, DL, WideVT), ISD::SETNE)); 9677 } 9678 9679 if (TruncVals.size() > 1) 9680 return DAG.getMergeValues(TruncVals, DL); 9681 return TruncVals.front(); 9682 } 9683 9684 SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, 9685 SelectionDAG &DAG) const { 9686 SDLoc DL(Op); 9687 MVT VecVT = Op.getSimpleValueType(); 9688 MVT XLenVT = Subtarget.getXLenVT(); 9689 9690 assert(VecVT.isScalableVector() && 9691 "vector_interleave on non-scalable vector!"); 9692 9693 // 1 bit element vectors need to be widened to e8 9694 if (VecVT.getVectorElementType() == MVT::i1) 9695 return widenVectorOpsToi8(Op, DL, DAG); 9696 9697 // If the VT is LMUL=8, we need to split and reassemble. 9698 if (VecVT.getSizeInBits().getKnownMinValue() == 9699 (8 * RISCV::RVVBitsPerBlock)) { 9700 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); 9701 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1); 9702 EVT SplitVT = Op0Lo.getValueType(); 9703 9704 SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, 9705 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi); 9706 SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, 9707 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi); 9708 9709 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, 9710 ResLo.getValue(0), ResHi.getValue(0)); 9711 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1), 9712 ResHi.getValue(1)); 9713 return DAG.getMergeValues({Even, Odd}, DL); 9714 } 9715 9716 // Concatenate the two vectors as one vector to deinterleave 9717 MVT ConcatVT = 9718 MVT::getVectorVT(VecVT.getVectorElementType(), 9719 VecVT.getVectorElementCount().multiplyCoefficientBy(2)); 9720 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, 9721 Op.getOperand(0), Op.getOperand(1)); 9722 9723 // We want to operate on all lanes, so get the mask and VL and mask for it 9724 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget); 9725 SDValue Passthru = DAG.getUNDEF(ConcatVT); 9726 9727 // We can deinterleave through vnsrl.wi if the element type is smaller than 9728 // ELEN 9729 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { 9730 SDValue Even = 9731 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG); 9732 SDValue Odd = 9733 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG); 9734 return DAG.getMergeValues({Even, Odd}, DL); 9735 } 9736 9737 // For the indices, use the same SEW to avoid an extra vsetvli 9738 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger(); 9739 // Create a vector of even indices {0, 2, 4, ...} 9740 SDValue EvenIdx = 9741 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2)); 9742 // Create a vector of odd indices {1, 3, 5, ... } 9743 SDValue OddIdx = 9744 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT)); 9745 9746 // Gather the even and odd elements into two separate vectors 9747 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT, 9748 Concat, EvenIdx, Passthru, Mask, VL); 9749 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT, 9750 Concat, OddIdx, Passthru, Mask, VL); 9751 9752 // Extract the result half of the gather for even and odd 9753 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide, 9754 DAG.getConstant(0, DL, XLenVT)); 9755 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide, 9756 DAG.getConstant(0, DL, XLenVT)); 9757 9758 return DAG.getMergeValues({Even, Odd}, DL); 9759 } 9760 9761 SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op, 9762 SelectionDAG &DAG) const { 9763 SDLoc DL(Op); 9764 MVT VecVT = Op.getSimpleValueType(); 9765 9766 assert(VecVT.isScalableVector() && 9767 "vector_interleave on non-scalable vector!"); 9768 9769 // i1 vectors need to be widened to i8 9770 if (VecVT.getVectorElementType() == MVT::i1) 9771 return widenVectorOpsToi8(Op, DL, DAG); 9772 9773 MVT XLenVT = Subtarget.getXLenVT(); 9774 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT); 9775 9776 // If the VT is LMUL=8, we need to split and reassemble. 9777 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) { 9778 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); 9779 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1); 9780 EVT SplitVT = Op0Lo.getValueType(); 9781 9782 SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, 9783 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo); 9784 SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, 9785 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi); 9786 9787 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, 9788 ResLo.getValue(0), ResLo.getValue(1)); 9789 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, 9790 ResHi.getValue(0), ResHi.getValue(1)); 9791 return DAG.getMergeValues({Lo, Hi}, DL); 9792 } 9793 9794 SDValue Interleaved; 9795 9796 // If the element type is smaller than ELEN, then we can interleave with 9797 // vwaddu.vv and vwmaccu.vx 9798 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { 9799 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL, 9800 DAG, Subtarget); 9801 } else { 9802 // Otherwise, fallback to using vrgathere16.vv 9803 MVT ConcatVT = 9804 MVT::getVectorVT(VecVT.getVectorElementType(), 9805 VecVT.getVectorElementCount().multiplyCoefficientBy(2)); 9806 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, 9807 Op.getOperand(0), Op.getOperand(1)); 9808 9809 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16); 9810 9811 // 0 1 2 3 4 5 6 7 ... 9812 SDValue StepVec = DAG.getStepVector(DL, IdxVT); 9813 9814 // 1 1 1 1 1 1 1 1 ... 9815 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT)); 9816 9817 // 1 0 1 0 1 0 1 0 ... 9818 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones); 9819 OddMask = DAG.getSetCC( 9820 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask, 9821 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)), 9822 ISD::CondCode::SETNE); 9823 9824 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG)); 9825 9826 // Build up the index vector for interleaving the concatenated vector 9827 // 0 0 1 1 2 2 3 3 ... 9828 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones); 9829 // 0 n 1 n+1 2 n+2 3 n+3 ... 9830 Idx = 9831 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL); 9832 9833 // Then perform the interleave 9834 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ... 9835 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG); 9836 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT, 9837 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL); 9838 } 9839 9840 // Extract the two halves from the interleaved result 9841 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved, 9842 DAG.getVectorIdxConstant(0, DL)); 9843 SDValue Hi = DAG.getNode( 9844 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved, 9845 DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL)); 9846 9847 return DAG.getMergeValues({Lo, Hi}, DL); 9848 } 9849 9850 // Lower step_vector to the vid instruction. Any non-identity step value must 9851 // be accounted for my manual expansion. 9852 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op, 9853 SelectionDAG &DAG) const { 9854 SDLoc DL(Op); 9855 MVT VT = Op.getSimpleValueType(); 9856 assert(VT.isScalableVector() && "Expected scalable vector"); 9857 MVT XLenVT = Subtarget.getXLenVT(); 9858 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); 9859 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 9860 uint64_t StepValImm = Op.getConstantOperandVal(0); 9861 if (StepValImm != 1) { 9862 if (isPowerOf2_64(StepValImm)) { 9863 SDValue StepVal = 9864 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 9865 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL); 9866 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal); 9867 } else { 9868 SDValue StepVal = lowerScalarSplat( 9869 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), 9870 VL, VT, DL, DAG, Subtarget); 9871 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal); 9872 } 9873 } 9874 return StepVec; 9875 } 9876 9877 // Implement vector_reverse using vrgather.vv with indices determined by 9878 // subtracting the id of each element from (VLMAX-1). This will convert 9879 // the indices like so: 9880 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). 9881 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 9882 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, 9883 SelectionDAG &DAG) const { 9884 SDLoc DL(Op); 9885 MVT VecVT = Op.getSimpleValueType(); 9886 if (VecVT.getVectorElementType() == MVT::i1) { 9887 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 9888 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0)); 9889 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1); 9890 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2); 9891 } 9892 unsigned EltSize = VecVT.getScalarSizeInBits(); 9893 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 9894 unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); 9895 unsigned MaxVLMAX = 9896 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); 9897 9898 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 9899 MVT IntVT = VecVT.changeVectorElementTypeToInteger(); 9900 9901 // If this is SEW=8 and VLMAX is potentially more than 256, we need 9902 // to use vrgatherei16.vv. 9903 // TODO: It's also possible to use vrgatherei16.vv for other types to 9904 // decrease register width for the index calculation. 9905 if (MaxVLMAX > 256 && EltSize == 8) { 9906 // If this is LMUL=8, we have to split before can use vrgatherei16.vv. 9907 // Reverse each half, then reassemble them in reverse order. 9908 // NOTE: It's also possible that after splitting that VLMAX no longer 9909 // requires vrgatherei16.vv. 9910 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 9911 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); 9912 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT); 9913 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 9914 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 9915 // Reassemble the low and high pieces reversed. 9916 // FIXME: This is a CONCAT_VECTORS. 9917 SDValue Res = 9918 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, 9919 DAG.getIntPtrConstant(0, DL)); 9920 return DAG.getNode( 9921 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, 9922 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL)); 9923 } 9924 9925 // Just promote the int type to i16 which will double the LMUL. 9926 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); 9927 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 9928 } 9929 9930 MVT XLenVT = Subtarget.getXLenVT(); 9931 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 9932 9933 // Calculate VLMAX-1 for the desired SEW. 9934 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT, 9935 computeVLMax(VecVT, DL, DAG), 9936 DAG.getConstant(1, DL, XLenVT)); 9937 9938 // Splat VLMAX-1 taking care to handle SEW==64 on RV32. 9939 bool IsRV32E64 = 9940 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; 9941 SDValue SplatVL; 9942 if (!IsRV32E64) 9943 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); 9944 else 9945 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT), 9946 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT)); 9947 9948 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); 9949 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, 9950 DAG.getUNDEF(IntVT), Mask, VL); 9951 9952 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, 9953 DAG.getUNDEF(VecVT), Mask, VL); 9954 } 9955 9956 SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op, 9957 SelectionDAG &DAG) const { 9958 SDLoc DL(Op); 9959 SDValue V1 = Op.getOperand(0); 9960 SDValue V2 = Op.getOperand(1); 9961 MVT XLenVT = Subtarget.getXLenVT(); 9962 MVT VecVT = Op.getSimpleValueType(); 9963 9964 SDValue VLMax = computeVLMax(VecVT, DL, DAG); 9965 9966 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue(); 9967 SDValue DownOffset, UpOffset; 9968 if (ImmValue >= 0) { 9969 // The operand is a TargetConstant, we need to rebuild it as a regular 9970 // constant. 9971 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); 9972 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset); 9973 } else { 9974 // The operand is a TargetConstant, we need to rebuild it as a regular 9975 // constant rather than negating the original operand. 9976 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); 9977 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset); 9978 } 9979 9980 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG); 9981 9982 SDValue SlideDown = 9983 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, 9984 DownOffset, TrueMask, UpOffset); 9985 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset, 9986 TrueMask, DAG.getRegister(RISCV::X0, XLenVT), 9987 RISCVII::TAIL_AGNOSTIC); 9988 } 9989 9990 SDValue 9991 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 9992 SelectionDAG &DAG) const { 9993 SDLoc DL(Op); 9994 auto *Load = cast<LoadSDNode>(Op); 9995 9996 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 9997 Load->getMemoryVT(), 9998 *Load->getMemOperand()) && 9999 "Expecting a correctly-aligned load"); 10000 10001 MVT VT = Op.getSimpleValueType(); 10002 MVT XLenVT = Subtarget.getXLenVT(); 10003 MVT ContainerVT = getContainerForFixedLengthVector(VT); 10004 10005 // If we know the exact VLEN and our fixed length vector completely fills 10006 // the container, use a whole register load instead. 10007 const auto [MinVLMAX, MaxVLMAX] = 10008 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); 10009 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && 10010 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) { 10011 SDValue NewLoad = 10012 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), 10013 Load->getMemOperand()); 10014 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 10015 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); 10016 } 10017 10018 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget); 10019 10020 bool IsMaskOp = VT.getVectorElementType() == MVT::i1; 10021 SDValue IntID = DAG.getTargetConstant( 10022 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT); 10023 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID}; 10024 if (!IsMaskOp) 10025 Ops.push_back(DAG.getUNDEF(ContainerVT)); 10026 Ops.push_back(Load->getBasePtr()); 10027 Ops.push_back(VL); 10028 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 10029 SDValue NewLoad = 10030 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 10031 Load->getMemoryVT(), Load->getMemOperand()); 10032 10033 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 10034 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); 10035 } 10036 10037 SDValue 10038 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 10039 SelectionDAG &DAG) const { 10040 SDLoc DL(Op); 10041 auto *Store = cast<StoreSDNode>(Op); 10042 10043 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 10044 Store->getMemoryVT(), 10045 *Store->getMemOperand()) && 10046 "Expecting a correctly-aligned store"); 10047 10048 SDValue StoreVal = Store->getValue(); 10049 MVT VT = StoreVal.getSimpleValueType(); 10050 MVT XLenVT = Subtarget.getXLenVT(); 10051 10052 // If the size less than a byte, we need to pad with zeros to make a byte. 10053 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) { 10054 VT = MVT::v8i1; 10055 StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, 10056 DAG.getConstant(0, DL, VT), StoreVal, 10057 DAG.getIntPtrConstant(0, DL)); 10058 } 10059 10060 MVT ContainerVT = getContainerForFixedLengthVector(VT); 10061 10062 SDValue NewValue = 10063 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); 10064 10065 10066 // If we know the exact VLEN and our fixed length vector completely fills 10067 // the container, use a whole register store instead. 10068 const auto [MinVLMAX, MaxVLMAX] = 10069 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); 10070 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && 10071 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) 10072 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(), 10073 Store->getMemOperand()); 10074 10075 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, 10076 Subtarget); 10077 10078 bool IsMaskOp = VT.getVectorElementType() == MVT::i1; 10079 SDValue IntID = DAG.getTargetConstant( 10080 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT); 10081 return DAG.getMemIntrinsicNode( 10082 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), 10083 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL}, 10084 Store->getMemoryVT(), Store->getMemOperand()); 10085 } 10086 10087 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, 10088 SelectionDAG &DAG) const { 10089 SDLoc DL(Op); 10090 MVT VT = Op.getSimpleValueType(); 10091 10092 const auto *MemSD = cast<MemSDNode>(Op); 10093 EVT MemVT = MemSD->getMemoryVT(); 10094 MachineMemOperand *MMO = MemSD->getMemOperand(); 10095 SDValue Chain = MemSD->getChain(); 10096 SDValue BasePtr = MemSD->getBasePtr(); 10097 10098 SDValue Mask, PassThru, VL; 10099 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) { 10100 Mask = VPLoad->getMask(); 10101 PassThru = DAG.getUNDEF(VT); 10102 VL = VPLoad->getVectorLength(); 10103 } else { 10104 const auto *MLoad = cast<MaskedLoadSDNode>(Op); 10105 Mask = MLoad->getMask(); 10106 PassThru = MLoad->getPassThru(); 10107 } 10108 10109 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 10110 10111 MVT XLenVT = Subtarget.getXLenVT(); 10112 10113 MVT ContainerVT = VT; 10114 if (VT.isFixedLengthVector()) { 10115 ContainerVT = getContainerForFixedLengthVector(VT); 10116 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 10117 if (!IsUnmasked) { 10118 MVT MaskVT = getMaskTypeFor(ContainerVT); 10119 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 10120 } 10121 } 10122 10123 if (!VL) 10124 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 10125 10126 unsigned IntID = 10127 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask; 10128 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 10129 if (IsUnmasked) 10130 Ops.push_back(DAG.getUNDEF(ContainerVT)); 10131 else 10132 Ops.push_back(PassThru); 10133 Ops.push_back(BasePtr); 10134 if (!IsUnmasked) 10135 Ops.push_back(Mask); 10136 Ops.push_back(VL); 10137 if (!IsUnmasked) 10138 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); 10139 10140 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 10141 10142 SDValue Result = 10143 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); 10144 Chain = Result.getValue(1); 10145 10146 if (VT.isFixedLengthVector()) 10147 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 10148 10149 return DAG.getMergeValues({Result, Chain}, DL); 10150 } 10151 10152 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, 10153 SelectionDAG &DAG) const { 10154 SDLoc DL(Op); 10155 10156 const auto *MemSD = cast<MemSDNode>(Op); 10157 EVT MemVT = MemSD->getMemoryVT(); 10158 MachineMemOperand *MMO = MemSD->getMemOperand(); 10159 SDValue Chain = MemSD->getChain(); 10160 SDValue BasePtr = MemSD->getBasePtr(); 10161 SDValue Val, Mask, VL; 10162 10163 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) { 10164 Val = VPStore->getValue(); 10165 Mask = VPStore->getMask(); 10166 VL = VPStore->getVectorLength(); 10167 } else { 10168 const auto *MStore = cast<MaskedStoreSDNode>(Op); 10169 Val = MStore->getValue(); 10170 Mask = MStore->getMask(); 10171 } 10172 10173 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 10174 10175 MVT VT = Val.getSimpleValueType(); 10176 MVT XLenVT = Subtarget.getXLenVT(); 10177 10178 MVT ContainerVT = VT; 10179 if (VT.isFixedLengthVector()) { 10180 ContainerVT = getContainerForFixedLengthVector(VT); 10181 10182 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 10183 if (!IsUnmasked) { 10184 MVT MaskVT = getMaskTypeFor(ContainerVT); 10185 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 10186 } 10187 } 10188 10189 if (!VL) 10190 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 10191 10192 unsigned IntID = 10193 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask; 10194 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 10195 Ops.push_back(Val); 10196 Ops.push_back(BasePtr); 10197 if (!IsUnmasked) 10198 Ops.push_back(Mask); 10199 Ops.push_back(VL); 10200 10201 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, 10202 DAG.getVTList(MVT::Other), Ops, MemVT, MMO); 10203 } 10204 10205 SDValue 10206 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 10207 SelectionDAG &DAG) const { 10208 MVT InVT = Op.getOperand(0).getSimpleValueType(); 10209 MVT ContainerVT = getContainerForFixedLengthVector(InVT); 10210 10211 MVT VT = Op.getSimpleValueType(); 10212 10213 SDValue Op1 = 10214 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 10215 SDValue Op2 = 10216 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 10217 10218 SDLoc DL(Op); 10219 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL, 10220 DAG, Subtarget); 10221 MVT MaskVT = getMaskTypeFor(ContainerVT); 10222 10223 SDValue Cmp = 10224 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, 10225 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL}); 10226 10227 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 10228 } 10229 10230 SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op, 10231 SelectionDAG &DAG) const { 10232 unsigned Opc = Op.getOpcode(); 10233 SDLoc DL(Op); 10234 SDValue Chain = Op.getOperand(0); 10235 SDValue Op1 = Op.getOperand(1); 10236 SDValue Op2 = Op.getOperand(2); 10237 SDValue CC = Op.getOperand(3); 10238 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 10239 MVT VT = Op.getSimpleValueType(); 10240 MVT InVT = Op1.getSimpleValueType(); 10241 10242 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE 10243 // condition code. 10244 if (Opc == ISD::STRICT_FSETCCS) { 10245 // Expand strict_fsetccs(x, oeq) to 10246 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole)) 10247 SDVTList VTList = Op->getVTList(); 10248 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) { 10249 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE); 10250 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1, 10251 Op2, OLECCVal); 10252 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2, 10253 Op1, OLECCVal); 10254 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 10255 Tmp1.getValue(1), Tmp2.getValue(1)); 10256 // Tmp1 and Tmp2 might be the same node. 10257 if (Tmp1 != Tmp2) 10258 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2); 10259 return DAG.getMergeValues({Tmp1, OutChain}, DL); 10260 } 10261 10262 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq)) 10263 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) { 10264 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ); 10265 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1, 10266 Op2, OEQCCVal); 10267 SDValue Res = DAG.getNOT(DL, OEQ, VT); 10268 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL); 10269 } 10270 } 10271 10272 MVT ContainerInVT = InVT; 10273 if (InVT.isFixedLengthVector()) { 10274 ContainerInVT = getContainerForFixedLengthVector(InVT); 10275 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget); 10276 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget); 10277 } 10278 MVT MaskVT = getMaskTypeFor(ContainerInVT); 10279 10280 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget); 10281 10282 SDValue Res; 10283 if (Opc == ISD::STRICT_FSETCC && 10284 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE || 10285 CCVal == ISD::SETOLE)) { 10286 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only 10287 // active when both input elements are ordered. 10288 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG); 10289 SDValue OrderMask1 = DAG.getNode( 10290 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other), 10291 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT), 10292 True, VL}); 10293 SDValue OrderMask2 = DAG.getNode( 10294 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other), 10295 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT), 10296 True, VL}); 10297 Mask = 10298 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL); 10299 // Use Mask as the merge operand to let the result be 0 if either of the 10300 // inputs is unordered. 10301 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL, 10302 DAG.getVTList(MaskVT, MVT::Other), 10303 {Chain, Op1, Op2, CC, Mask, Mask, VL}); 10304 } else { 10305 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL 10306 : RISCVISD::STRICT_FSETCCS_VL; 10307 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other), 10308 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL}); 10309 } 10310 10311 if (VT.isFixedLengthVector()) { 10312 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget); 10313 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL); 10314 } 10315 return Res; 10316 } 10317 10318 // Lower vector ABS to smax(X, sub(0, X)). 10319 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { 10320 SDLoc DL(Op); 10321 MVT VT = Op.getSimpleValueType(); 10322 SDValue X = Op.getOperand(0); 10323 10324 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) && 10325 "Unexpected type for ISD::ABS"); 10326 10327 MVT ContainerVT = VT; 10328 if (VT.isFixedLengthVector()) { 10329 ContainerVT = getContainerForFixedLengthVector(VT); 10330 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 10331 } 10332 10333 SDValue Mask, VL; 10334 if (Op->getOpcode() == ISD::VP_ABS) { 10335 Mask = Op->getOperand(1); 10336 if (VT.isFixedLengthVector()) 10337 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 10338 Subtarget); 10339 VL = Op->getOperand(2); 10340 } else 10341 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 10342 10343 SDValue SplatZero = DAG.getNode( 10344 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 10345 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL); 10346 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, 10347 DAG.getUNDEF(ContainerVT), Mask, VL); 10348 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, 10349 DAG.getUNDEF(ContainerVT), Mask, VL); 10350 10351 if (VT.isFixedLengthVector()) 10352 Max = convertFromScalableVector(VT, Max, DAG, Subtarget); 10353 return Max; 10354 } 10355 10356 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( 10357 SDValue Op, SelectionDAG &DAG) const { 10358 SDLoc DL(Op); 10359 MVT VT = Op.getSimpleValueType(); 10360 SDValue Mag = Op.getOperand(0); 10361 SDValue Sign = Op.getOperand(1); 10362 assert(Mag.getValueType() == Sign.getValueType() && 10363 "Can only handle COPYSIGN with matching types."); 10364 10365 MVT ContainerVT = getContainerForFixedLengthVector(VT); 10366 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget); 10367 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget); 10368 10369 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 10370 10371 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, 10372 Sign, DAG.getUNDEF(ContainerVT), Mask, VL); 10373 10374 return convertFromScalableVector(VT, CopySign, DAG, Subtarget); 10375 } 10376 10377 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 10378 SDValue Op, SelectionDAG &DAG) const { 10379 MVT VT = Op.getSimpleValueType(); 10380 MVT ContainerVT = getContainerForFixedLengthVector(VT); 10381 10382 MVT I1ContainerVT = 10383 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 10384 10385 SDValue CC = 10386 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 10387 SDValue Op1 = 10388 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 10389 SDValue Op2 = 10390 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 10391 10392 SDLoc DL(Op); 10393 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 10394 10395 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1, 10396 Op2, DAG.getUNDEF(ContainerVT), VL); 10397 10398 return convertFromScalableVector(VT, Select, DAG, Subtarget); 10399 } 10400 10401 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, 10402 SelectionDAG &DAG) const { 10403 unsigned NewOpc = getRISCVVLOp(Op); 10404 bool HasMergeOp = hasMergeOp(NewOpc); 10405 bool HasMask = hasMaskOp(NewOpc); 10406 10407 MVT VT = Op.getSimpleValueType(); 10408 MVT ContainerVT = getContainerForFixedLengthVector(VT); 10409 10410 // Create list of operands by converting existing ones to scalable types. 10411 SmallVector<SDValue, 6> Ops; 10412 for (const SDValue &V : Op->op_values()) { 10413 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 10414 10415 // Pass through non-vector operands. 10416 if (!V.getValueType().isVector()) { 10417 Ops.push_back(V); 10418 continue; 10419 } 10420 10421 // "cast" fixed length vector to a scalable vector. 10422 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 10423 "Only fixed length vectors are supported!"); 10424 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 10425 } 10426 10427 SDLoc DL(Op); 10428 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 10429 if (HasMergeOp) 10430 Ops.push_back(DAG.getUNDEF(ContainerVT)); 10431 if (HasMask) 10432 Ops.push_back(Mask); 10433 Ops.push_back(VL); 10434 10435 // StrictFP operations have two result values. Their lowered result should 10436 // have same result count. 10437 if (Op->isStrictFPOpcode()) { 10438 SDValue ScalableRes = 10439 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops, 10440 Op->getFlags()); 10441 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 10442 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL); 10443 } 10444 10445 SDValue ScalableRes = 10446 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags()); 10447 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 10448 } 10449 10450 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node: 10451 // * Operands of each node are assumed to be in the same order. 10452 // * The EVL operand is promoted from i32 to i64 on RV64. 10453 // * Fixed-length vectors are converted to their scalable-vector container 10454 // types. 10455 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const { 10456 unsigned RISCVISDOpc = getRISCVVLOp(Op); 10457 bool HasMergeOp = hasMergeOp(RISCVISDOpc); 10458 10459 SDLoc DL(Op); 10460 MVT VT = Op.getSimpleValueType(); 10461 SmallVector<SDValue, 4> Ops; 10462 10463 MVT ContainerVT = VT; 10464 if (VT.isFixedLengthVector()) 10465 ContainerVT = getContainerForFixedLengthVector(VT); 10466 10467 for (const auto &OpIdx : enumerate(Op->ops())) { 10468 SDValue V = OpIdx.value(); 10469 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 10470 // Add dummy merge value before the mask. Or if there isn't a mask, before 10471 // EVL. 10472 if (HasMergeOp) { 10473 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode()); 10474 if (MaskIdx) { 10475 if (*MaskIdx == OpIdx.index()) 10476 Ops.push_back(DAG.getUNDEF(ContainerVT)); 10477 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == 10478 OpIdx.index()) { 10479 if (Op.getOpcode() == ISD::VP_MERGE) { 10480 // For VP_MERGE, copy the false operand instead of an undef value. 10481 Ops.push_back(Ops.back()); 10482 } else { 10483 assert(Op.getOpcode() == ISD::VP_SELECT); 10484 // For VP_SELECT, add an undef value. 10485 Ops.push_back(DAG.getUNDEF(ContainerVT)); 10486 } 10487 } 10488 } 10489 // Pass through operands which aren't fixed-length vectors. 10490 if (!V.getValueType().isFixedLengthVector()) { 10491 Ops.push_back(V); 10492 continue; 10493 } 10494 // "cast" fixed length vector to a scalable vector. 10495 MVT OpVT = V.getSimpleValueType(); 10496 MVT ContainerVT = getContainerForFixedLengthVector(OpVT); 10497 assert(useRVVForFixedLengthVectorVT(OpVT) && 10498 "Only fixed length vectors are supported!"); 10499 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 10500 } 10501 10502 if (!VT.isFixedLengthVector()) 10503 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags()); 10504 10505 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags()); 10506 10507 return convertFromScalableVector(VT, VPOp, DAG, Subtarget); 10508 } 10509 10510 SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op, 10511 SelectionDAG &DAG) const { 10512 SDLoc DL(Op); 10513 MVT VT = Op.getSimpleValueType(); 10514 10515 SDValue Src = Op.getOperand(0); 10516 // NOTE: Mask is dropped. 10517 SDValue VL = Op.getOperand(2); 10518 10519 MVT ContainerVT = VT; 10520 if (VT.isFixedLengthVector()) { 10521 ContainerVT = getContainerForFixedLengthVector(VT); 10522 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 10523 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); 10524 } 10525 10526 MVT XLenVT = Subtarget.getXLenVT(); 10527 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 10528 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 10529 DAG.getUNDEF(ContainerVT), Zero, VL); 10530 10531 SDValue SplatValue = DAG.getConstant( 10532 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT); 10533 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 10534 DAG.getUNDEF(ContainerVT), SplatValue, VL); 10535 10536 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat, 10537 ZeroSplat, DAG.getUNDEF(ContainerVT), VL); 10538 if (!VT.isFixedLengthVector()) 10539 return Result; 10540 return convertFromScalableVector(VT, Result, DAG, Subtarget); 10541 } 10542 10543 SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op, 10544 SelectionDAG &DAG) const { 10545 SDLoc DL(Op); 10546 MVT VT = Op.getSimpleValueType(); 10547 10548 SDValue Op1 = Op.getOperand(0); 10549 SDValue Op2 = Op.getOperand(1); 10550 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 10551 // NOTE: Mask is dropped. 10552 SDValue VL = Op.getOperand(4); 10553 10554 MVT ContainerVT = VT; 10555 if (VT.isFixedLengthVector()) { 10556 ContainerVT = getContainerForFixedLengthVector(VT); 10557 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 10558 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); 10559 } 10560 10561 SDValue Result; 10562 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 10563 10564 switch (Condition) { 10565 default: 10566 break; 10567 // X != Y --> (X^Y) 10568 case ISD::SETNE: 10569 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); 10570 break; 10571 // X == Y --> ~(X^Y) 10572 case ISD::SETEQ: { 10573 SDValue Temp = 10574 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); 10575 Result = 10576 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL); 10577 break; 10578 } 10579 // X >s Y --> X == 0 & Y == 1 --> ~X & Y 10580 // X <u Y --> X == 0 & Y == 1 --> ~X & Y 10581 case ISD::SETGT: 10582 case ISD::SETULT: { 10583 SDValue Temp = 10584 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); 10585 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL); 10586 break; 10587 } 10588 // X <s Y --> X == 1 & Y == 0 --> ~Y & X 10589 // X >u Y --> X == 1 & Y == 0 --> ~Y & X 10590 case ISD::SETLT: 10591 case ISD::SETUGT: { 10592 SDValue Temp = 10593 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); 10594 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL); 10595 break; 10596 } 10597 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y 10598 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y 10599 case ISD::SETGE: 10600 case ISD::SETULE: { 10601 SDValue Temp = 10602 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); 10603 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL); 10604 break; 10605 } 10606 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X 10607 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X 10608 case ISD::SETLE: 10609 case ISD::SETUGE: { 10610 SDValue Temp = 10611 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); 10612 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL); 10613 break; 10614 } 10615 } 10616 10617 if (!VT.isFixedLengthVector()) 10618 return Result; 10619 return convertFromScalableVector(VT, Result, DAG, Subtarget); 10620 } 10621 10622 // Lower Floating-Point/Integer Type-Convert VP SDNodes 10623 SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, 10624 SelectionDAG &DAG) const { 10625 SDLoc DL(Op); 10626 10627 SDValue Src = Op.getOperand(0); 10628 SDValue Mask = Op.getOperand(1); 10629 SDValue VL = Op.getOperand(2); 10630 unsigned RISCVISDOpc = getRISCVVLOp(Op); 10631 10632 MVT DstVT = Op.getSimpleValueType(); 10633 MVT SrcVT = Src.getSimpleValueType(); 10634 if (DstVT.isFixedLengthVector()) { 10635 DstVT = getContainerForFixedLengthVector(DstVT); 10636 SrcVT = getContainerForFixedLengthVector(SrcVT); 10637 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); 10638 MVT MaskVT = getMaskTypeFor(DstVT); 10639 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 10640 } 10641 10642 unsigned DstEltSize = DstVT.getScalarSizeInBits(); 10643 unsigned SrcEltSize = SrcVT.getScalarSizeInBits(); 10644 10645 SDValue Result; 10646 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion. 10647 if (SrcVT.isInteger()) { 10648 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); 10649 10650 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL 10651 ? RISCVISD::VSEXT_VL 10652 : RISCVISD::VZEXT_VL; 10653 10654 // Do we need to do any pre-widening before converting? 10655 if (SrcEltSize == 1) { 10656 MVT IntVT = DstVT.changeVectorElementTypeToInteger(); 10657 MVT XLenVT = Subtarget.getXLenVT(); 10658 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 10659 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, 10660 DAG.getUNDEF(IntVT), Zero, VL); 10661 SDValue One = DAG.getConstant( 10662 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT); 10663 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, 10664 DAG.getUNDEF(IntVT), One, VL); 10665 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat, 10666 ZeroSplat, DAG.getUNDEF(IntVT), VL); 10667 } else if (DstEltSize > (2 * SrcEltSize)) { 10668 // Widen before converting. 10669 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2), 10670 DstVT.getVectorElementCount()); 10671 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL); 10672 } 10673 10674 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); 10675 } else { 10676 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && 10677 "Wrong input/output vector types"); 10678 10679 // Convert f16 to f32 then convert f32 to i64. 10680 if (DstEltSize > (2 * SrcEltSize)) { 10681 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); 10682 MVT InterimFVT = 10683 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); 10684 Src = 10685 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL); 10686 } 10687 10688 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); 10689 } 10690 } else { // Narrowing + Conversion 10691 if (SrcVT.isInteger()) { 10692 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); 10693 // First do a narrowing convert to an FP type half the size, then round 10694 // the FP type to a small FP type if needed. 10695 10696 MVT InterimFVT = DstVT; 10697 if (SrcEltSize > (2 * DstEltSize)) { 10698 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!"); 10699 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); 10700 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); 10701 } 10702 10703 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL); 10704 10705 if (InterimFVT != DstVT) { 10706 Src = Result; 10707 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL); 10708 } 10709 } else { 10710 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && 10711 "Wrong input/output vector types"); 10712 // First do a narrowing conversion to an integer half the size, then 10713 // truncate if needed. 10714 10715 if (DstEltSize == 1) { 10716 // First convert to the same size integer, then convert to mask using 10717 // setcc. 10718 assert(SrcEltSize >= 16 && "Unexpected FP type!"); 10719 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize), 10720 DstVT.getVectorElementCount()); 10721 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); 10722 10723 // Compare the integer result to 0. The integer should be 0 or 1/-1, 10724 // otherwise the conversion was undefined. 10725 MVT XLenVT = Subtarget.getXLenVT(); 10726 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 10727 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT, 10728 DAG.getUNDEF(InterimIVT), SplatZero, VL); 10729 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, 10730 {Result, SplatZero, DAG.getCondCode(ISD::SETNE), 10731 DAG.getUNDEF(DstVT), Mask, VL}); 10732 } else { 10733 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 10734 DstVT.getVectorElementCount()); 10735 10736 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); 10737 10738 while (InterimIVT != DstVT) { 10739 SrcEltSize /= 2; 10740 Src = Result; 10741 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 10742 DstVT.getVectorElementCount()); 10743 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT, 10744 Src, Mask, VL); 10745 } 10746 } 10747 } 10748 } 10749 10750 MVT VT = Op.getSimpleValueType(); 10751 if (!VT.isFixedLengthVector()) 10752 return Result; 10753 return convertFromScalableVector(VT, Result, DAG, Subtarget); 10754 } 10755 10756 SDValue 10757 RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, 10758 SelectionDAG &DAG) const { 10759 SDLoc DL(Op); 10760 10761 SDValue Op1 = Op.getOperand(0); 10762 SDValue Op2 = Op.getOperand(1); 10763 SDValue Offset = Op.getOperand(2); 10764 SDValue Mask = Op.getOperand(3); 10765 SDValue EVL1 = Op.getOperand(4); 10766 SDValue EVL2 = Op.getOperand(5); 10767 10768 const MVT XLenVT = Subtarget.getXLenVT(); 10769 MVT VT = Op.getSimpleValueType(); 10770 MVT ContainerVT = VT; 10771 if (VT.isFixedLengthVector()) { 10772 ContainerVT = getContainerForFixedLengthVector(VT); 10773 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 10774 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); 10775 MVT MaskVT = getMaskTypeFor(ContainerVT); 10776 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 10777 } 10778 10779 bool IsMaskVector = VT.getVectorElementType() == MVT::i1; 10780 if (IsMaskVector) { 10781 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8); 10782 10783 // Expand input operands 10784 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 10785 DAG.getUNDEF(ContainerVT), 10786 DAG.getConstant(1, DL, XLenVT), EVL1); 10787 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 10788 DAG.getUNDEF(ContainerVT), 10789 DAG.getConstant(0, DL, XLenVT), EVL1); 10790 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1, 10791 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1); 10792 10793 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 10794 DAG.getUNDEF(ContainerVT), 10795 DAG.getConstant(1, DL, XLenVT), EVL2); 10796 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 10797 DAG.getUNDEF(ContainerVT), 10798 DAG.getConstant(0, DL, XLenVT), EVL2); 10799 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2, 10800 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2); 10801 } 10802 10803 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue(); 10804 SDValue DownOffset, UpOffset; 10805 if (ImmValue >= 0) { 10806 // The operand is a TargetConstant, we need to rebuild it as a regular 10807 // constant. 10808 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); 10809 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset); 10810 } else { 10811 // The operand is a TargetConstant, we need to rebuild it as a regular 10812 // constant rather than negating the original operand. 10813 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); 10814 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset); 10815 } 10816 10817 SDValue SlideDown = 10818 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 10819 Op1, DownOffset, Mask, UpOffset); 10820 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2, 10821 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC); 10822 10823 if (IsMaskVector) { 10824 // Truncate Result back to a mask vector (Result has same EVL as Op2) 10825 Result = DAG.getNode( 10826 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1), 10827 {Result, DAG.getConstant(0, DL, ContainerVT), 10828 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)), 10829 Mask, EVL2}); 10830 } 10831 10832 if (!VT.isFixedLengthVector()) 10833 return Result; 10834 return convertFromScalableVector(VT, Result, DAG, Subtarget); 10835 } 10836 10837 SDValue 10838 RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, 10839 SelectionDAG &DAG) const { 10840 SDLoc DL(Op); 10841 MVT VT = Op.getSimpleValueType(); 10842 MVT XLenVT = Subtarget.getXLenVT(); 10843 10844 SDValue Op1 = Op.getOperand(0); 10845 SDValue Mask = Op.getOperand(1); 10846 SDValue EVL = Op.getOperand(2); 10847 10848 MVT ContainerVT = VT; 10849 if (VT.isFixedLengthVector()) { 10850 ContainerVT = getContainerForFixedLengthVector(VT); 10851 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 10852 MVT MaskVT = getMaskTypeFor(ContainerVT); 10853 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 10854 } 10855 10856 MVT GatherVT = ContainerVT; 10857 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger(); 10858 // Check if we are working with mask vectors 10859 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1; 10860 if (IsMaskVector) { 10861 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8); 10862 10863 // Expand input operand 10864 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, 10865 DAG.getUNDEF(IndicesVT), 10866 DAG.getConstant(1, DL, XLenVT), EVL); 10867 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, 10868 DAG.getUNDEF(IndicesVT), 10869 DAG.getConstant(0, DL, XLenVT), EVL); 10870 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne, 10871 SplatZero, DAG.getUNDEF(IndicesVT), EVL); 10872 } 10873 10874 unsigned EltSize = GatherVT.getScalarSizeInBits(); 10875 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue(); 10876 unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); 10877 unsigned MaxVLMAX = 10878 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); 10879 10880 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 10881 // If this is SEW=8 and VLMAX is unknown or more than 256, we need 10882 // to use vrgatherei16.vv. 10883 // TODO: It's also possible to use vrgatherei16.vv for other types to 10884 // decrease register width for the index calculation. 10885 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 10886 if (MaxVLMAX > 256 && EltSize == 8) { 10887 // If this is LMUL=8, we have to split before using vrgatherei16.vv. 10888 // Split the vector in half and reverse each half using a full register 10889 // reverse. 10890 // Swap the halves and concatenate them. 10891 // Slide the concatenated result by (VLMax - VL). 10892 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 10893 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT); 10894 auto [Lo, Hi] = DAG.SplitVector(Op1, DL); 10895 10896 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 10897 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 10898 10899 // Reassemble the low and high pieces reversed. 10900 // NOTE: this Result is unmasked (because we do not need masks for 10901 // shuffles). If in the future this has to change, we can use a SELECT_VL 10902 // between Result and UNDEF using the mask originally passed to VP_REVERSE 10903 SDValue Result = 10904 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev); 10905 10906 // Slide off any elements from past EVL that were reversed into the low 10907 // elements. 10908 unsigned MinElts = GatherVT.getVectorMinNumElements(); 10909 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, 10910 DAG.getConstant(MinElts, DL, XLenVT)); 10911 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL); 10912 10913 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT, 10914 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL); 10915 10916 if (IsMaskVector) { 10917 // Truncate Result back to a mask vector 10918 Result = 10919 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, 10920 {Result, DAG.getConstant(0, DL, GatherVT), 10921 DAG.getCondCode(ISD::SETNE), 10922 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL}); 10923 } 10924 10925 if (!VT.isFixedLengthVector()) 10926 return Result; 10927 return convertFromScalableVector(VT, Result, DAG, Subtarget); 10928 } 10929 10930 // Just promote the int type to i16 which will double the LMUL. 10931 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount()); 10932 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 10933 } 10934 10935 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL); 10936 SDValue VecLen = 10937 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT)); 10938 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, 10939 DAG.getUNDEF(IndicesVT), VecLen, EVL); 10940 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID, 10941 DAG.getUNDEF(IndicesVT), Mask, EVL); 10942 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB, 10943 DAG.getUNDEF(GatherVT), Mask, EVL); 10944 10945 if (IsMaskVector) { 10946 // Truncate Result back to a mask vector 10947 Result = DAG.getNode( 10948 RISCVISD::SETCC_VL, DL, ContainerVT, 10949 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE), 10950 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL}); 10951 } 10952 10953 if (!VT.isFixedLengthVector()) 10954 return Result; 10955 return convertFromScalableVector(VT, Result, DAG, Subtarget); 10956 } 10957 10958 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, 10959 SelectionDAG &DAG) const { 10960 MVT VT = Op.getSimpleValueType(); 10961 if (VT.getVectorElementType() != MVT::i1) 10962 return lowerVPOp(Op, DAG); 10963 10964 // It is safe to drop mask parameter as masked-off elements are undef. 10965 SDValue Op1 = Op->getOperand(0); 10966 SDValue Op2 = Op->getOperand(1); 10967 SDValue VL = Op->getOperand(3); 10968 10969 MVT ContainerVT = VT; 10970 const bool IsFixed = VT.isFixedLengthVector(); 10971 if (IsFixed) { 10972 ContainerVT = getContainerForFixedLengthVector(VT); 10973 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 10974 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); 10975 } 10976 10977 SDLoc DL(Op); 10978 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL); 10979 if (!IsFixed) 10980 return Val; 10981 return convertFromScalableVector(VT, Val, DAG, Subtarget); 10982 } 10983 10984 SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op, 10985 SelectionDAG &DAG) const { 10986 SDLoc DL(Op); 10987 MVT XLenVT = Subtarget.getXLenVT(); 10988 MVT VT = Op.getSimpleValueType(); 10989 MVT ContainerVT = VT; 10990 if (VT.isFixedLengthVector()) 10991 ContainerVT = getContainerForFixedLengthVector(VT); 10992 10993 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 10994 10995 auto *VPNode = cast<VPStridedLoadSDNode>(Op); 10996 // Check if the mask is known to be all ones 10997 SDValue Mask = VPNode->getMask(); 10998 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 10999 11000 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse 11001 : Intrinsic::riscv_vlse_mask, 11002 DL, XLenVT); 11003 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, 11004 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(), 11005 VPNode->getStride()}; 11006 if (!IsUnmasked) { 11007 if (VT.isFixedLengthVector()) { 11008 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); 11009 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11010 } 11011 Ops.push_back(Mask); 11012 } 11013 Ops.push_back(VPNode->getVectorLength()); 11014 if (!IsUnmasked) { 11015 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 11016 Ops.push_back(Policy); 11017 } 11018 11019 SDValue Result = 11020 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 11021 VPNode->getMemoryVT(), VPNode->getMemOperand()); 11022 SDValue Chain = Result.getValue(1); 11023 11024 if (VT.isFixedLengthVector()) 11025 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 11026 11027 return DAG.getMergeValues({Result, Chain}, DL); 11028 } 11029 11030 SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op, 11031 SelectionDAG &DAG) const { 11032 SDLoc DL(Op); 11033 MVT XLenVT = Subtarget.getXLenVT(); 11034 11035 auto *VPNode = cast<VPStridedStoreSDNode>(Op); 11036 SDValue StoreVal = VPNode->getValue(); 11037 MVT VT = StoreVal.getSimpleValueType(); 11038 MVT ContainerVT = VT; 11039 if (VT.isFixedLengthVector()) { 11040 ContainerVT = getContainerForFixedLengthVector(VT); 11041 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); 11042 } 11043 11044 // Check if the mask is known to be all ones 11045 SDValue Mask = VPNode->getMask(); 11046 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 11047 11048 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse 11049 : Intrinsic::riscv_vsse_mask, 11050 DL, XLenVT); 11051 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal, 11052 VPNode->getBasePtr(), VPNode->getStride()}; 11053 if (!IsUnmasked) { 11054 if (VT.isFixedLengthVector()) { 11055 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); 11056 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11057 } 11058 Ops.push_back(Mask); 11059 } 11060 Ops.push_back(VPNode->getVectorLength()); 11061 11062 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(), 11063 Ops, VPNode->getMemoryVT(), 11064 VPNode->getMemOperand()); 11065 } 11066 11067 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be 11068 // matched to a RVV indexed load. The RVV indexed load instructions only 11069 // support the "unsigned unscaled" addressing mode; indices are implicitly 11070 // zero-extended or truncated to XLEN and are treated as byte offsets. Any 11071 // signed or scaled indexing is extended to the XLEN value type and scaled 11072 // accordingly. 11073 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op, 11074 SelectionDAG &DAG) const { 11075 SDLoc DL(Op); 11076 MVT VT = Op.getSimpleValueType(); 11077 11078 const auto *MemSD = cast<MemSDNode>(Op.getNode()); 11079 EVT MemVT = MemSD->getMemoryVT(); 11080 MachineMemOperand *MMO = MemSD->getMemOperand(); 11081 SDValue Chain = MemSD->getChain(); 11082 SDValue BasePtr = MemSD->getBasePtr(); 11083 11084 ISD::LoadExtType LoadExtType; 11085 SDValue Index, Mask, PassThru, VL; 11086 11087 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) { 11088 Index = VPGN->getIndex(); 11089 Mask = VPGN->getMask(); 11090 PassThru = DAG.getUNDEF(VT); 11091 VL = VPGN->getVectorLength(); 11092 // VP doesn't support extending loads. 11093 LoadExtType = ISD::NON_EXTLOAD; 11094 } else { 11095 // Else it must be a MGATHER. 11096 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode()); 11097 Index = MGN->getIndex(); 11098 Mask = MGN->getMask(); 11099 PassThru = MGN->getPassThru(); 11100 LoadExtType = MGN->getExtensionType(); 11101 } 11102 11103 MVT IndexVT = Index.getSimpleValueType(); 11104 MVT XLenVT = Subtarget.getXLenVT(); 11105 11106 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 11107 "Unexpected VTs!"); 11108 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); 11109 // Targets have to explicitly opt-in for extending vector loads. 11110 assert(LoadExtType == ISD::NON_EXTLOAD && 11111 "Unexpected extending MGATHER/VP_GATHER"); 11112 (void)LoadExtType; 11113 11114 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 11115 // the selection of the masked intrinsics doesn't do this for us. 11116 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 11117 11118 MVT ContainerVT = VT; 11119 if (VT.isFixedLengthVector()) { 11120 ContainerVT = getContainerForFixedLengthVector(VT); 11121 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 11122 ContainerVT.getVectorElementCount()); 11123 11124 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 11125 11126 if (!IsUnmasked) { 11127 MVT MaskVT = getMaskTypeFor(ContainerVT); 11128 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11129 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 11130 } 11131 } 11132 11133 if (!VL) 11134 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 11135 11136 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { 11137 IndexVT = IndexVT.changeVectorElementType(XLenVT); 11138 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index); 11139 } 11140 11141 unsigned IntID = 11142 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask; 11143 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 11144 if (IsUnmasked) 11145 Ops.push_back(DAG.getUNDEF(ContainerVT)); 11146 else 11147 Ops.push_back(PassThru); 11148 Ops.push_back(BasePtr); 11149 Ops.push_back(Index); 11150 if (!IsUnmasked) 11151 Ops.push_back(Mask); 11152 Ops.push_back(VL); 11153 if (!IsUnmasked) 11154 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); 11155 11156 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 11157 SDValue Result = 11158 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); 11159 Chain = Result.getValue(1); 11160 11161 if (VT.isFixedLengthVector()) 11162 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 11163 11164 return DAG.getMergeValues({Result, Chain}, DL); 11165 } 11166 11167 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be 11168 // matched to a RVV indexed store. The RVV indexed store instructions only 11169 // support the "unsigned unscaled" addressing mode; indices are implicitly 11170 // zero-extended or truncated to XLEN and are treated as byte offsets. Any 11171 // signed or scaled indexing is extended to the XLEN value type and scaled 11172 // accordingly. 11173 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op, 11174 SelectionDAG &DAG) const { 11175 SDLoc DL(Op); 11176 const auto *MemSD = cast<MemSDNode>(Op.getNode()); 11177 EVT MemVT = MemSD->getMemoryVT(); 11178 MachineMemOperand *MMO = MemSD->getMemOperand(); 11179 SDValue Chain = MemSD->getChain(); 11180 SDValue BasePtr = MemSD->getBasePtr(); 11181 11182 bool IsTruncatingStore = false; 11183 SDValue Index, Mask, Val, VL; 11184 11185 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) { 11186 Index = VPSN->getIndex(); 11187 Mask = VPSN->getMask(); 11188 Val = VPSN->getValue(); 11189 VL = VPSN->getVectorLength(); 11190 // VP doesn't support truncating stores. 11191 IsTruncatingStore = false; 11192 } else { 11193 // Else it must be a MSCATTER. 11194 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode()); 11195 Index = MSN->getIndex(); 11196 Mask = MSN->getMask(); 11197 Val = MSN->getValue(); 11198 IsTruncatingStore = MSN->isTruncatingStore(); 11199 } 11200 11201 MVT VT = Val.getSimpleValueType(); 11202 MVT IndexVT = Index.getSimpleValueType(); 11203 MVT XLenVT = Subtarget.getXLenVT(); 11204 11205 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 11206 "Unexpected VTs!"); 11207 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); 11208 // Targets have to explicitly opt-in for extending vector loads and 11209 // truncating vector stores. 11210 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER"); 11211 (void)IsTruncatingStore; 11212 11213 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 11214 // the selection of the masked intrinsics doesn't do this for us. 11215 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 11216 11217 MVT ContainerVT = VT; 11218 if (VT.isFixedLengthVector()) { 11219 ContainerVT = getContainerForFixedLengthVector(VT); 11220 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 11221 ContainerVT.getVectorElementCount()); 11222 11223 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 11224 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 11225 11226 if (!IsUnmasked) { 11227 MVT MaskVT = getMaskTypeFor(ContainerVT); 11228 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11229 } 11230 } 11231 11232 if (!VL) 11233 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 11234 11235 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { 11236 IndexVT = IndexVT.changeVectorElementType(XLenVT); 11237 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index); 11238 } 11239 11240 unsigned IntID = 11241 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask; 11242 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 11243 Ops.push_back(Val); 11244 Ops.push_back(BasePtr); 11245 Ops.push_back(Index); 11246 if (!IsUnmasked) 11247 Ops.push_back(Mask); 11248 Ops.push_back(VL); 11249 11250 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, 11251 DAG.getVTList(MVT::Other), Ops, MemVT, MMO); 11252 } 11253 11254 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op, 11255 SelectionDAG &DAG) const { 11256 const MVT XLenVT = Subtarget.getXLenVT(); 11257 SDLoc DL(Op); 11258 SDValue Chain = Op->getOperand(0); 11259 SDValue SysRegNo = DAG.getTargetConstant( 11260 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); 11261 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other); 11262 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo); 11263 11264 // Encoding used for rounding mode in RISC-V differs from that used in 11265 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a 11266 // table, which consists of a sequence of 4-bit fields, each representing 11267 // corresponding FLT_ROUNDS mode. 11268 static const int Table = 11269 (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) | 11270 (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) | 11271 (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) | 11272 (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) | 11273 (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM); 11274 11275 SDValue Shift = 11276 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT)); 11277 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, 11278 DAG.getConstant(Table, DL, XLenVT), Shift); 11279 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, 11280 DAG.getConstant(7, DL, XLenVT)); 11281 11282 return DAG.getMergeValues({Masked, Chain}, DL); 11283 } 11284 11285 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op, 11286 SelectionDAG &DAG) const { 11287 const MVT XLenVT = Subtarget.getXLenVT(); 11288 SDLoc DL(Op); 11289 SDValue Chain = Op->getOperand(0); 11290 SDValue RMValue = Op->getOperand(1); 11291 SDValue SysRegNo = DAG.getTargetConstant( 11292 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); 11293 11294 // Encoding used for rounding mode in RISC-V differs from that used in 11295 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in 11296 // a table, which consists of a sequence of 4-bit fields, each representing 11297 // corresponding RISC-V mode. 11298 static const unsigned Table = 11299 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) | 11300 (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) | 11301 (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) | 11302 (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) | 11303 (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway)); 11304 11305 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue); 11306 11307 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue, 11308 DAG.getConstant(2, DL, XLenVT)); 11309 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, 11310 DAG.getConstant(Table, DL, XLenVT), Shift); 11311 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, 11312 DAG.getConstant(0x7, DL, XLenVT)); 11313 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo, 11314 RMValue); 11315 } 11316 11317 SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op, 11318 SelectionDAG &DAG) const { 11319 MachineFunction &MF = DAG.getMachineFunction(); 11320 11321 bool isRISCV64 = Subtarget.is64Bit(); 11322 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 11323 11324 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false); 11325 return DAG.getFrameIndex(FI, PtrVT); 11326 } 11327 11328 // Returns the opcode of the target-specific SDNode that implements the 32-bit 11329 // form of the given Opcode. 11330 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 11331 switch (Opcode) { 11332 default: 11333 llvm_unreachable("Unexpected opcode"); 11334 case ISD::SHL: 11335 return RISCVISD::SLLW; 11336 case ISD::SRA: 11337 return RISCVISD::SRAW; 11338 case ISD::SRL: 11339 return RISCVISD::SRLW; 11340 case ISD::SDIV: 11341 return RISCVISD::DIVW; 11342 case ISD::UDIV: 11343 return RISCVISD::DIVUW; 11344 case ISD::UREM: 11345 return RISCVISD::REMUW; 11346 case ISD::ROTL: 11347 return RISCVISD::ROLW; 11348 case ISD::ROTR: 11349 return RISCVISD::RORW; 11350 } 11351 } 11352 11353 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG 11354 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would 11355 // otherwise be promoted to i64, making it difficult to select the 11356 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of 11357 // type i8/i16/i32 is lost. 11358 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 11359 unsigned ExtOpc = ISD::ANY_EXTEND) { 11360 SDLoc DL(N); 11361 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 11362 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 11363 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 11364 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 11365 // ReplaceNodeResults requires we maintain the same type for the return value. 11366 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 11367 } 11368 11369 // Converts the given 32-bit operation to a i64 operation with signed extension 11370 // semantic to reduce the signed extension instructions. 11371 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 11372 SDLoc DL(N); 11373 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 11374 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 11375 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 11376 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 11377 DAG.getValueType(MVT::i32)); 11378 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 11379 } 11380 11381 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 11382 SmallVectorImpl<SDValue> &Results, 11383 SelectionDAG &DAG) const { 11384 SDLoc DL(N); 11385 switch (N->getOpcode()) { 11386 default: 11387 llvm_unreachable("Don't know how to custom type legalize this operation!"); 11388 case ISD::STRICT_FP_TO_SINT: 11389 case ISD::STRICT_FP_TO_UINT: 11390 case ISD::FP_TO_SINT: 11391 case ISD::FP_TO_UINT: { 11392 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11393 "Unexpected custom legalisation"); 11394 bool IsStrict = N->isStrictFPOpcode(); 11395 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || 11396 N->getOpcode() == ISD::STRICT_FP_TO_SINT; 11397 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 11398 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 11399 TargetLowering::TypeSoftenFloat) { 11400 if (!isTypeLegal(Op0.getValueType())) 11401 return; 11402 if (IsStrict) { 11403 SDValue Chain = N->getOperand(0); 11404 // In absense of Zfh, promote f16 to f32, then convert. 11405 if (Op0.getValueType() == MVT::f16 && 11406 !Subtarget.hasStdExtZfhOrZhinx()) { 11407 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other}, 11408 {Chain, Op0}); 11409 Chain = Op0.getValue(1); 11410 } 11411 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64 11412 : RISCVISD::STRICT_FCVT_WU_RV64; 11413 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); 11414 SDValue Res = DAG.getNode( 11415 Opc, DL, VTs, Chain, Op0, 11416 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); 11417 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11418 Results.push_back(Res.getValue(1)); 11419 return; 11420 } 11421 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then 11422 // convert. 11423 if ((Op0.getValueType() == MVT::f16 && 11424 !Subtarget.hasStdExtZfhOrZhinx()) || 11425 Op0.getValueType() == MVT::bf16) 11426 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); 11427 11428 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 11429 SDValue Res = 11430 DAG.getNode(Opc, DL, MVT::i64, Op0, 11431 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); 11432 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11433 return; 11434 } 11435 // If the FP type needs to be softened, emit a library call using the 'si' 11436 // version. If we left it to default legalization we'd end up with 'di'. If 11437 // the FP type doesn't need to be softened just let generic type 11438 // legalization promote the result type. 11439 RTLIB::Libcall LC; 11440 if (IsSigned) 11441 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 11442 else 11443 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 11444 MakeLibCallOptions CallOptions; 11445 EVT OpVT = Op0.getValueType(); 11446 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 11447 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 11448 SDValue Result; 11449 std::tie(Result, Chain) = 11450 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 11451 Results.push_back(Result); 11452 if (IsStrict) 11453 Results.push_back(Chain); 11454 break; 11455 } 11456 case ISD::LROUND: { 11457 SDValue Op0 = N->getOperand(0); 11458 EVT Op0VT = Op0.getValueType(); 11459 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 11460 TargetLowering::TypeSoftenFloat) { 11461 if (!isTypeLegal(Op0VT)) 11462 return; 11463 11464 // In absense of Zfh, promote f16 to f32, then convert. 11465 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) 11466 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); 11467 11468 SDValue Res = 11469 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0, 11470 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64)); 11471 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11472 return; 11473 } 11474 // If the FP type needs to be softened, emit a library call to lround. We'll 11475 // need to truncate the result. We assume any value that doesn't fit in i32 11476 // is allowed to return an unspecified value. 11477 RTLIB::Libcall LC = 11478 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32; 11479 MakeLibCallOptions CallOptions; 11480 EVT OpVT = Op0.getValueType(); 11481 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true); 11482 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first; 11483 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result); 11484 Results.push_back(Result); 11485 break; 11486 } 11487 case ISD::READCYCLECOUNTER: { 11488 assert(!Subtarget.is64Bit() && 11489 "READCYCLECOUNTER only has custom type legalization on riscv32"); 11490 11491 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 11492 SDValue RCW = 11493 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 11494 11495 Results.push_back( 11496 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 11497 Results.push_back(RCW.getValue(2)); 11498 break; 11499 } 11500 case ISD::LOAD: { 11501 if (!ISD::isNON_EXTLoad(N)) 11502 return; 11503 11504 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the 11505 // sext_inreg we emit for ADD/SUB/MUL/SLLI. 11506 LoadSDNode *Ld = cast<LoadSDNode>(N); 11507 11508 SDLoc dl(N); 11509 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(), 11510 Ld->getBasePtr(), Ld->getMemoryVT(), 11511 Ld->getMemOperand()); 11512 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res)); 11513 Results.push_back(Res.getValue(1)); 11514 return; 11515 } 11516 case ISD::MUL: { 11517 unsigned Size = N->getSimpleValueType(0).getSizeInBits(); 11518 unsigned XLen = Subtarget.getXLen(); 11519 // This multiply needs to be expanded, try to use MULHSU+MUL if possible. 11520 if (Size > XLen) { 11521 assert(Size == (XLen * 2) && "Unexpected custom legalisation"); 11522 SDValue LHS = N->getOperand(0); 11523 SDValue RHS = N->getOperand(1); 11524 APInt HighMask = APInt::getHighBitsSet(Size, XLen); 11525 11526 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask); 11527 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask); 11528 // We need exactly one side to be unsigned. 11529 if (LHSIsU == RHSIsU) 11530 return; 11531 11532 auto MakeMULPair = [&](SDValue S, SDValue U) { 11533 MVT XLenVT = Subtarget.getXLenVT(); 11534 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S); 11535 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U); 11536 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U); 11537 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U); 11538 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi); 11539 }; 11540 11541 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen; 11542 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen; 11543 11544 // The other operand should be signed, but still prefer MULH when 11545 // possible. 11546 if (RHSIsU && LHSIsS && !RHSIsS) 11547 Results.push_back(MakeMULPair(LHS, RHS)); 11548 else if (LHSIsU && RHSIsS && !LHSIsS) 11549 Results.push_back(MakeMULPair(RHS, LHS)); 11550 11551 return; 11552 } 11553 [[fallthrough]]; 11554 } 11555 case ISD::ADD: 11556 case ISD::SUB: 11557 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11558 "Unexpected custom legalisation"); 11559 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 11560 break; 11561 case ISD::SHL: 11562 case ISD::SRA: 11563 case ISD::SRL: 11564 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11565 "Unexpected custom legalisation"); 11566 if (N->getOperand(1).getOpcode() != ISD::Constant) { 11567 // If we can use a BSET instruction, allow default promotion to apply. 11568 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() && 11569 isOneConstant(N->getOperand(0))) 11570 break; 11571 Results.push_back(customLegalizeToWOp(N, DAG)); 11572 break; 11573 } 11574 11575 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is 11576 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the 11577 // shift amount. 11578 if (N->getOpcode() == ISD::SHL) { 11579 SDLoc DL(N); 11580 SDValue NewOp0 = 11581 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 11582 SDValue NewOp1 = 11583 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1)); 11584 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1); 11585 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 11586 DAG.getValueType(MVT::i32)); 11587 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 11588 } 11589 11590 break; 11591 case ISD::ROTL: 11592 case ISD::ROTR: 11593 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11594 "Unexpected custom legalisation"); 11595 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || 11596 Subtarget.hasVendorXTHeadBb()) && 11597 "Unexpected custom legalization"); 11598 if (!isa<ConstantSDNode>(N->getOperand(1)) && 11599 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())) 11600 return; 11601 Results.push_back(customLegalizeToWOp(N, DAG)); 11602 break; 11603 case ISD::CTTZ: 11604 case ISD::CTTZ_ZERO_UNDEF: 11605 case ISD::CTLZ: 11606 case ISD::CTLZ_ZERO_UNDEF: { 11607 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11608 "Unexpected custom legalisation"); 11609 11610 SDValue NewOp0 = 11611 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 11612 bool IsCTZ = 11613 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF; 11614 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW; 11615 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0); 11616 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11617 return; 11618 } 11619 case ISD::SDIV: 11620 case ISD::UDIV: 11621 case ISD::UREM: { 11622 MVT VT = N->getSimpleValueType(0); 11623 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 11624 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 11625 "Unexpected custom legalisation"); 11626 // Don't promote division/remainder by constant since we should expand those 11627 // to multiply by magic constant. 11628 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 11629 if (N->getOperand(1).getOpcode() == ISD::Constant && 11630 !isIntDivCheap(N->getValueType(0), Attr)) 11631 return; 11632 11633 // If the input is i32, use ANY_EXTEND since the W instructions don't read 11634 // the upper 32 bits. For other types we need to sign or zero extend 11635 // based on the opcode. 11636 unsigned ExtOpc = ISD::ANY_EXTEND; 11637 if (VT != MVT::i32) 11638 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 11639 : ISD::ZERO_EXTEND; 11640 11641 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 11642 break; 11643 } 11644 case ISD::SADDO: { 11645 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11646 "Unexpected custom legalisation"); 11647 11648 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise 11649 // use the default legalization. 11650 if (!isa<ConstantSDNode>(N->getOperand(1))) 11651 return; 11652 11653 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 11654 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 11655 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS); 11656 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 11657 DAG.getValueType(MVT::i32)); 11658 11659 SDValue Zero = DAG.getConstant(0, DL, MVT::i64); 11660 11661 // For an addition, the result should be less than one of the operands (LHS) 11662 // if and only if the other operand (RHS) is negative, otherwise there will 11663 // be overflow. 11664 // For a subtraction, the result should be less than one of the operands 11665 // (LHS) if and only if the other operand (RHS) is (non-zero) positive, 11666 // otherwise there will be overflow. 11667 EVT OType = N->getValueType(1); 11668 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT); 11669 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT); 11670 11671 SDValue Overflow = 11672 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS); 11673 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11674 Results.push_back(Overflow); 11675 return; 11676 } 11677 case ISD::UADDO: 11678 case ISD::USUBO: { 11679 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11680 "Unexpected custom legalisation"); 11681 bool IsAdd = N->getOpcode() == ISD::UADDO; 11682 // Create an ADDW or SUBW. 11683 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 11684 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 11685 SDValue Res = 11686 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); 11687 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 11688 DAG.getValueType(MVT::i32)); 11689 11690 SDValue Overflow; 11691 if (IsAdd && isOneConstant(RHS)) { 11692 // Special case uaddo X, 1 overflowed if the addition result is 0. 11693 // The general case (X + C) < C is not necessarily beneficial. Although we 11694 // reduce the live range of X, we may introduce the materialization of 11695 // constant C, especially when the setcc result is used by branch. We have 11696 // no compare with constant and branch instructions. 11697 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, 11698 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ); 11699 } else if (IsAdd && isAllOnesConstant(RHS)) { 11700 // Special case uaddo X, -1 overflowed if X != 0. 11701 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0), 11702 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE); 11703 } else { 11704 // Sign extend the LHS and perform an unsigned compare with the ADDW 11705 // result. Since the inputs are sign extended from i32, this is equivalent 11706 // to comparing the lower 32 bits. 11707 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 11708 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS, 11709 IsAdd ? ISD::SETULT : ISD::SETUGT); 11710 } 11711 11712 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11713 Results.push_back(Overflow); 11714 return; 11715 } 11716 case ISD::UADDSAT: 11717 case ISD::USUBSAT: { 11718 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11719 "Unexpected custom legalisation"); 11720 if (Subtarget.hasStdExtZbb()) { 11721 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using 11722 // sign extend allows overflow of the lower 32 bits to be detected on 11723 // the promoted size. 11724 SDValue LHS = 11725 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 11726 SDValue RHS = 11727 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 11728 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS); 11729 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11730 return; 11731 } 11732 11733 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom 11734 // promotion for UADDO/USUBO. 11735 Results.push_back(expandAddSubSat(N, DAG)); 11736 return; 11737 } 11738 case ISD::ABS: { 11739 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11740 "Unexpected custom legalisation"); 11741 11742 if (Subtarget.hasStdExtZbb()) { 11743 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel. 11744 // This allows us to remember that the result is sign extended. Expanding 11745 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits. 11746 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, 11747 N->getOperand(0)); 11748 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src); 11749 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs)); 11750 return; 11751 } 11752 11753 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y) 11754 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 11755 11756 // Freeze the source so we can increase it's use count. 11757 Src = DAG.getFreeze(Src); 11758 11759 // Copy sign bit to all bits using the sraiw pattern. 11760 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src, 11761 DAG.getValueType(MVT::i32)); 11762 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill, 11763 DAG.getConstant(31, DL, MVT::i64)); 11764 11765 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill); 11766 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill); 11767 11768 // NOTE: The result is only required to be anyextended, but sext is 11769 // consistent with type legalization of sub. 11770 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes, 11771 DAG.getValueType(MVT::i32)); 11772 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 11773 return; 11774 } 11775 case ISD::BITCAST: { 11776 EVT VT = N->getValueType(0); 11777 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!"); 11778 SDValue Op0 = N->getOperand(0); 11779 EVT Op0VT = Op0.getValueType(); 11780 MVT XLenVT = Subtarget.getXLenVT(); 11781 if (VT == MVT::i16 && Op0VT == MVT::f16 && 11782 Subtarget.hasStdExtZfhminOrZhinxmin()) { 11783 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); 11784 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 11785 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 && 11786 Subtarget.hasStdExtZfbfmin()) { 11787 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); 11788 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 11789 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() && 11790 Subtarget.hasStdExtFOrZfinx()) { 11791 SDValue FPConv = 11792 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 11793 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 11794 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32 && 11795 Subtarget.hasStdExtZfa()) { 11796 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL, 11797 DAG.getVTList(MVT::i32, MVT::i32), Op0); 11798 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, 11799 NewReg.getValue(0), NewReg.getValue(1)); 11800 Results.push_back(RetReg); 11801 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() && 11802 isTypeLegal(Op0VT)) { 11803 // Custom-legalize bitcasts from fixed-length vector types to illegal 11804 // scalar types in order to improve codegen. Bitcast the vector to a 11805 // one-element vector type whose element type is the same as the result 11806 // type, and extract the first element. 11807 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); 11808 if (isTypeLegal(BVT)) { 11809 SDValue BVec = DAG.getBitcast(BVT, Op0); 11810 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 11811 DAG.getConstant(0, DL, XLenVT))); 11812 } 11813 } 11814 break; 11815 } 11816 case RISCVISD::BREV8: { 11817 MVT VT = N->getSimpleValueType(0); 11818 MVT XLenVT = Subtarget.getXLenVT(); 11819 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) && 11820 "Unexpected custom legalisation"); 11821 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension"); 11822 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0)); 11823 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp); 11824 // ReplaceNodeResults requires we maintain the same type for the return 11825 // value. 11826 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes)); 11827 break; 11828 } 11829 case ISD::EXTRACT_VECTOR_ELT: { 11830 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 11831 // type is illegal (currently only vXi64 RV32). 11832 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 11833 // transferred to the destination register. We issue two of these from the 11834 // upper- and lower- halves of the SEW-bit vector element, slid down to the 11835 // first element. 11836 SDValue Vec = N->getOperand(0); 11837 SDValue Idx = N->getOperand(1); 11838 11839 // The vector type hasn't been legalized yet so we can't issue target 11840 // specific nodes if it needs legalization. 11841 // FIXME: We would manually legalize if it's important. 11842 if (!isTypeLegal(Vec.getValueType())) 11843 return; 11844 11845 MVT VecVT = Vec.getSimpleValueType(); 11846 11847 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 11848 VecVT.getVectorElementType() == MVT::i64 && 11849 "Unexpected EXTRACT_VECTOR_ELT legalization"); 11850 11851 // If this is a fixed vector, we need to convert it to a scalable vector. 11852 MVT ContainerVT = VecVT; 11853 if (VecVT.isFixedLengthVector()) { 11854 ContainerVT = getContainerForFixedLengthVector(VecVT); 11855 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 11856 } 11857 11858 MVT XLenVT = Subtarget.getXLenVT(); 11859 11860 // Use a VL of 1 to avoid processing more elements than we need. 11861 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget); 11862 11863 // Unless the index is known to be 0, we must slide the vector down to get 11864 // the desired element into index 0. 11865 if (!isNullConstant(Idx)) { 11866 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, 11867 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 11868 } 11869 11870 // Extract the lower XLEN bits of the correct vector element. 11871 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 11872 11873 // To extract the upper XLEN bits of the vector element, shift the first 11874 // element right by 32 bits and re-extract the lower XLEN bits. 11875 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 11876 DAG.getUNDEF(ContainerVT), 11877 DAG.getConstant(32, DL, XLenVT), VL); 11878 SDValue LShr32 = 11879 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV, 11880 DAG.getUNDEF(ContainerVT), Mask, VL); 11881 11882 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 11883 11884 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 11885 break; 11886 } 11887 case ISD::INTRINSIC_WO_CHAIN: { 11888 unsigned IntNo = N->getConstantOperandVal(0); 11889 switch (IntNo) { 11890 default: 11891 llvm_unreachable( 11892 "Don't know how to custom type legalize this intrinsic!"); 11893 case Intrinsic::experimental_get_vector_length: { 11894 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget); 11895 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11896 return; 11897 } 11898 case Intrinsic::riscv_orc_b: 11899 case Intrinsic::riscv_brev8: 11900 case Intrinsic::riscv_sha256sig0: 11901 case Intrinsic::riscv_sha256sig1: 11902 case Intrinsic::riscv_sha256sum0: 11903 case Intrinsic::riscv_sha256sum1: 11904 case Intrinsic::riscv_sm3p0: 11905 case Intrinsic::riscv_sm3p1: { 11906 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 11907 return; 11908 unsigned Opc; 11909 switch (IntNo) { 11910 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; 11911 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; 11912 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; 11913 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; 11914 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; 11915 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; 11916 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; 11917 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; 11918 } 11919 11920 SDValue NewOp = 11921 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 11922 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp); 11923 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11924 return; 11925 } 11926 case Intrinsic::riscv_sm4ks: 11927 case Intrinsic::riscv_sm4ed: { 11928 unsigned Opc = 11929 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; 11930 SDValue NewOp0 = 11931 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 11932 SDValue NewOp1 = 11933 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 11934 SDValue Res = 11935 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3)); 11936 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11937 return; 11938 } 11939 case Intrinsic::riscv_clmul: { 11940 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 11941 return; 11942 11943 SDValue NewOp0 = 11944 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 11945 SDValue NewOp1 = 11946 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 11947 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1); 11948 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11949 return; 11950 } 11951 case Intrinsic::riscv_clmulh: 11952 case Intrinsic::riscv_clmulr: { 11953 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 11954 return; 11955 11956 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros 11957 // to the full 128-bit clmul result of multiplying two xlen values. 11958 // Perform clmulr or clmulh on the shifted values. Finally, extract the 11959 // upper 32 bits. 11960 // 11961 // The alternative is to mask the inputs to 32 bits and use clmul, but 11962 // that requires two shifts to mask each input without zext.w. 11963 // FIXME: If the inputs are known zero extended or could be freely 11964 // zero extended, the mask form would be better. 11965 SDValue NewOp0 = 11966 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 11967 SDValue NewOp1 = 11968 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 11969 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, 11970 DAG.getConstant(32, DL, MVT::i64)); 11971 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1, 11972 DAG.getConstant(32, DL, MVT::i64)); 11973 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH 11974 : RISCVISD::CLMULR; 11975 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1); 11976 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res, 11977 DAG.getConstant(32, DL, MVT::i64)); 11978 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11979 return; 11980 } 11981 case Intrinsic::riscv_vmv_x_s: { 11982 EVT VT = N->getValueType(0); 11983 MVT XLenVT = Subtarget.getXLenVT(); 11984 if (VT.bitsLT(XLenVT)) { 11985 // Simple case just extract using vmv.x.s and truncate. 11986 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 11987 Subtarget.getXLenVT(), N->getOperand(1)); 11988 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 11989 return; 11990 } 11991 11992 assert(VT == MVT::i64 && !Subtarget.is64Bit() && 11993 "Unexpected custom legalization"); 11994 11995 // We need to do the move in two steps. 11996 SDValue Vec = N->getOperand(1); 11997 MVT VecVT = Vec.getSimpleValueType(); 11998 11999 // First extract the lower XLEN bits of the element. 12000 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 12001 12002 // To extract the upper XLEN bits of the vector element, shift the first 12003 // element right by 32 bits and re-extract the lower XLEN bits. 12004 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget); 12005 12006 SDValue ThirtyTwoV = 12007 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), 12008 DAG.getConstant(32, DL, XLenVT), VL); 12009 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, 12010 DAG.getUNDEF(VecVT), Mask, VL); 12011 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 12012 12013 Results.push_back( 12014 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 12015 break; 12016 } 12017 } 12018 break; 12019 } 12020 case ISD::VECREDUCE_ADD: 12021 case ISD::VECREDUCE_AND: 12022 case ISD::VECREDUCE_OR: 12023 case ISD::VECREDUCE_XOR: 12024 case ISD::VECREDUCE_SMAX: 12025 case ISD::VECREDUCE_UMAX: 12026 case ISD::VECREDUCE_SMIN: 12027 case ISD::VECREDUCE_UMIN: 12028 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) 12029 Results.push_back(V); 12030 break; 12031 case ISD::VP_REDUCE_ADD: 12032 case ISD::VP_REDUCE_AND: 12033 case ISD::VP_REDUCE_OR: 12034 case ISD::VP_REDUCE_XOR: 12035 case ISD::VP_REDUCE_SMAX: 12036 case ISD::VP_REDUCE_UMAX: 12037 case ISD::VP_REDUCE_SMIN: 12038 case ISD::VP_REDUCE_UMIN: 12039 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG)) 12040 Results.push_back(V); 12041 break; 12042 case ISD::GET_ROUNDING: { 12043 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other); 12044 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0)); 12045 Results.push_back(Res.getValue(0)); 12046 Results.push_back(Res.getValue(1)); 12047 break; 12048 } 12049 } 12050 } 12051 12052 /// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP 12053 /// which corresponds to it. 12054 static unsigned getVecReduceOpcode(unsigned Opc) { 12055 switch (Opc) { 12056 default: 12057 llvm_unreachable("Unhandled binary to transfrom reduction"); 12058 case ISD::ADD: 12059 return ISD::VECREDUCE_ADD; 12060 case ISD::UMAX: 12061 return ISD::VECREDUCE_UMAX; 12062 case ISD::SMAX: 12063 return ISD::VECREDUCE_SMAX; 12064 case ISD::UMIN: 12065 return ISD::VECREDUCE_UMIN; 12066 case ISD::SMIN: 12067 return ISD::VECREDUCE_SMIN; 12068 case ISD::AND: 12069 return ISD::VECREDUCE_AND; 12070 case ISD::OR: 12071 return ISD::VECREDUCE_OR; 12072 case ISD::XOR: 12073 return ISD::VECREDUCE_XOR; 12074 case ISD::FADD: 12075 // Note: This is the associative form of the generic reduction opcode. 12076 return ISD::VECREDUCE_FADD; 12077 } 12078 } 12079 12080 /// Perform two related transforms whose purpose is to incrementally recognize 12081 /// an explode_vector followed by scalar reduction as a vector reduction node. 12082 /// This exists to recover from a deficiency in SLP which can't handle 12083 /// forests with multiple roots sharing common nodes. In some cases, one 12084 /// of the trees will be vectorized, and the other will remain (unprofitably) 12085 /// scalarized. 12086 static SDValue 12087 combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, 12088 const RISCVSubtarget &Subtarget) { 12089 12090 // This transforms need to run before all integer types have been legalized 12091 // to i64 (so that the vector element type matches the add type), and while 12092 // it's safe to introduce odd sized vector types. 12093 if (DAG.NewNodesMustHaveLegalTypes) 12094 return SDValue(); 12095 12096 // Without V, this transform isn't useful. We could form the (illegal) 12097 // operations and let them be scalarized again, but there's really no point. 12098 if (!Subtarget.hasVInstructions()) 12099 return SDValue(); 12100 12101 const SDLoc DL(N); 12102 const EVT VT = N->getValueType(0); 12103 const unsigned Opc = N->getOpcode(); 12104 12105 // For FADD, we only handle the case with reassociation allowed. We 12106 // could handle strict reduction order, but at the moment, there's no 12107 // known reason to, and the complexity isn't worth it. 12108 // TODO: Handle fminnum and fmaxnum here 12109 if (!VT.isInteger() && 12110 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation())) 12111 return SDValue(); 12112 12113 const unsigned ReduceOpc = getVecReduceOpcode(Opc); 12114 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) && 12115 "Inconsistent mappings"); 12116 SDValue LHS = N->getOperand(0); 12117 SDValue RHS = N->getOperand(1); 12118 12119 if (!LHS.hasOneUse() || !RHS.hasOneUse()) 12120 return SDValue(); 12121 12122 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 12123 std::swap(LHS, RHS); 12124 12125 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 12126 !isa<ConstantSDNode>(RHS.getOperand(1))) 12127 return SDValue(); 12128 12129 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue(); 12130 SDValue SrcVec = RHS.getOperand(0); 12131 EVT SrcVecVT = SrcVec.getValueType(); 12132 assert(SrcVecVT.getVectorElementType() == VT); 12133 if (SrcVecVT.isScalableVector()) 12134 return SDValue(); 12135 12136 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen()) 12137 return SDValue(); 12138 12139 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to 12140 // reduce_op (extract_subvector [2 x VT] from V). This will form the 12141 // root of our reduction tree. TODO: We could extend this to any two 12142 // adjacent aligned constant indices if desired. 12143 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 12144 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) { 12145 uint64_t LHSIdx = 12146 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue(); 12147 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) { 12148 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2); 12149 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec, 12150 DAG.getVectorIdxConstant(0, DL)); 12151 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags()); 12152 } 12153 } 12154 12155 // Match (binop (reduce (extract_subvector V, 0), 12156 // (extract_vector_elt V, sizeof(SubVec)))) 12157 // into a reduction of one more element from the original vector V. 12158 if (LHS.getOpcode() != ReduceOpc) 12159 return SDValue(); 12160 12161 SDValue ReduceVec = LHS.getOperand(0); 12162 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && 12163 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) && 12164 isNullConstant(ReduceVec.getOperand(1)) && 12165 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) { 12166 // For illegal types (e.g. 3xi32), most will be combined again into a 12167 // wider (hopefully legal) type. If this is a terminal state, we are 12168 // relying on type legalization here to produce something reasonable 12169 // and this lowering quality could probably be improved. (TODO) 12170 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1); 12171 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec, 12172 DAG.getVectorIdxConstant(0, DL)); 12173 auto Flags = ReduceVec->getFlags(); 12174 Flags.intersectWith(N->getFlags()); 12175 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags); 12176 } 12177 12178 return SDValue(); 12179 } 12180 12181 12182 // Try to fold (<bop> x, (reduction.<bop> vec, start)) 12183 static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, 12184 const RISCVSubtarget &Subtarget) { 12185 auto BinOpToRVVReduce = [](unsigned Opc) { 12186 switch (Opc) { 12187 default: 12188 llvm_unreachable("Unhandled binary to transfrom reduction"); 12189 case ISD::ADD: 12190 return RISCVISD::VECREDUCE_ADD_VL; 12191 case ISD::UMAX: 12192 return RISCVISD::VECREDUCE_UMAX_VL; 12193 case ISD::SMAX: 12194 return RISCVISD::VECREDUCE_SMAX_VL; 12195 case ISD::UMIN: 12196 return RISCVISD::VECREDUCE_UMIN_VL; 12197 case ISD::SMIN: 12198 return RISCVISD::VECREDUCE_SMIN_VL; 12199 case ISD::AND: 12200 return RISCVISD::VECREDUCE_AND_VL; 12201 case ISD::OR: 12202 return RISCVISD::VECREDUCE_OR_VL; 12203 case ISD::XOR: 12204 return RISCVISD::VECREDUCE_XOR_VL; 12205 case ISD::FADD: 12206 return RISCVISD::VECREDUCE_FADD_VL; 12207 case ISD::FMAXNUM: 12208 return RISCVISD::VECREDUCE_FMAX_VL; 12209 case ISD::FMINNUM: 12210 return RISCVISD::VECREDUCE_FMIN_VL; 12211 } 12212 }; 12213 12214 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) { 12215 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 12216 isNullConstant(V.getOperand(1)) && 12217 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc); 12218 }; 12219 12220 unsigned Opc = N->getOpcode(); 12221 unsigned ReduceIdx; 12222 if (IsReduction(N->getOperand(0), Opc)) 12223 ReduceIdx = 0; 12224 else if (IsReduction(N->getOperand(1), Opc)) 12225 ReduceIdx = 1; 12226 else 12227 return SDValue(); 12228 12229 // Skip if FADD disallows reassociation but the combiner needs. 12230 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation()) 12231 return SDValue(); 12232 12233 SDValue Extract = N->getOperand(ReduceIdx); 12234 SDValue Reduce = Extract.getOperand(0); 12235 if (!Extract.hasOneUse() || !Reduce.hasOneUse()) 12236 return SDValue(); 12237 12238 SDValue ScalarV = Reduce.getOperand(2); 12239 EVT ScalarVT = ScalarV.getValueType(); 12240 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR && 12241 ScalarV.getOperand(0)->isUndef() && 12242 isNullConstant(ScalarV.getOperand(2))) 12243 ScalarV = ScalarV.getOperand(1); 12244 12245 // Make sure that ScalarV is a splat with VL=1. 12246 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL && 12247 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL && 12248 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL) 12249 return SDValue(); 12250 12251 if (!isNonZeroAVL(ScalarV.getOperand(2))) 12252 return SDValue(); 12253 12254 // Check the scalar of ScalarV is neutral element 12255 // TODO: Deal with value other than neutral element. 12256 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1), 12257 0)) 12258 return SDValue(); 12259 12260 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold. 12261 // FIXME: We might be able to improve this if operand 0 is undef. 12262 if (!isNonZeroAVL(Reduce.getOperand(5))) 12263 return SDValue(); 12264 12265 SDValue NewStart = N->getOperand(1 - ReduceIdx); 12266 12267 SDLoc DL(N); 12268 SDValue NewScalarV = 12269 lowerScalarInsert(NewStart, ScalarV.getOperand(2), 12270 ScalarV.getSimpleValueType(), DL, DAG, Subtarget); 12271 12272 // If we looked through an INSERT_SUBVECTOR we need to restore it. 12273 if (ScalarVT != ScalarV.getValueType()) 12274 NewScalarV = 12275 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT), 12276 NewScalarV, DAG.getConstant(0, DL, Subtarget.getXLenVT())); 12277 12278 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1), 12279 NewScalarV, Reduce.getOperand(3), 12280 Reduce.getOperand(4), Reduce.getOperand(5)}; 12281 SDValue NewReduce = 12282 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops); 12283 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce, 12284 Extract.getOperand(1)); 12285 } 12286 12287 // Optimize (add (shl x, c0), (shl y, c1)) -> 12288 // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. 12289 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, 12290 const RISCVSubtarget &Subtarget) { 12291 // Perform this optimization only in the zba extension. 12292 if (!Subtarget.hasStdExtZba()) 12293 return SDValue(); 12294 12295 // Skip for vector types and larger types. 12296 EVT VT = N->getValueType(0); 12297 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) 12298 return SDValue(); 12299 12300 // The two operand nodes must be SHL and have no other use. 12301 SDValue N0 = N->getOperand(0); 12302 SDValue N1 = N->getOperand(1); 12303 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL || 12304 !N0->hasOneUse() || !N1->hasOneUse()) 12305 return SDValue(); 12306 12307 // Check c0 and c1. 12308 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 12309 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 12310 if (!N0C || !N1C) 12311 return SDValue(); 12312 int64_t C0 = N0C->getSExtValue(); 12313 int64_t C1 = N1C->getSExtValue(); 12314 if (C0 <= 0 || C1 <= 0) 12315 return SDValue(); 12316 12317 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable. 12318 int64_t Bits = std::min(C0, C1); 12319 int64_t Diff = std::abs(C0 - C1); 12320 if (Diff != 1 && Diff != 2 && Diff != 3) 12321 return SDValue(); 12322 12323 // Build nodes. 12324 SDLoc DL(N); 12325 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0); 12326 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0); 12327 SDValue NA0 = 12328 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT)); 12329 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS); 12330 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT)); 12331 } 12332 12333 // Combine a constant select operand into its use: 12334 // 12335 // (and (select cond, -1, c), x) 12336 // -> (select cond, x, (and x, c)) [AllOnes=1] 12337 // (or (select cond, 0, c), x) 12338 // -> (select cond, x, (or x, c)) [AllOnes=0] 12339 // (xor (select cond, 0, c), x) 12340 // -> (select cond, x, (xor x, c)) [AllOnes=0] 12341 // (add (select cond, 0, c), x) 12342 // -> (select cond, x, (add x, c)) [AllOnes=0] 12343 // (sub x, (select cond, 0, c)) 12344 // -> (select cond, x, (sub x, c)) [AllOnes=0] 12345 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 12346 SelectionDAG &DAG, bool AllOnes, 12347 const RISCVSubtarget &Subtarget) { 12348 EVT VT = N->getValueType(0); 12349 12350 // Skip vectors. 12351 if (VT.isVector()) 12352 return SDValue(); 12353 12354 if (!Subtarget.hasConditionalMoveFusion()) { 12355 // (select cond, x, (and x, c)) has custom lowering with Zicond. 12356 if ((!Subtarget.hasStdExtZicond() && 12357 !Subtarget.hasVendorXVentanaCondOps()) || 12358 N->getOpcode() != ISD::AND) 12359 return SDValue(); 12360 12361 // Maybe harmful when condition code has multiple use. 12362 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse()) 12363 return SDValue(); 12364 12365 // Maybe harmful when VT is wider than XLen. 12366 if (VT.getSizeInBits() > Subtarget.getXLen()) 12367 return SDValue(); 12368 } 12369 12370 if ((Slct.getOpcode() != ISD::SELECT && 12371 Slct.getOpcode() != RISCVISD::SELECT_CC) || 12372 !Slct.hasOneUse()) 12373 return SDValue(); 12374 12375 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) { 12376 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); 12377 }; 12378 12379 bool SwapSelectOps; 12380 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0; 12381 SDValue TrueVal = Slct.getOperand(1 + OpOffset); 12382 SDValue FalseVal = Slct.getOperand(2 + OpOffset); 12383 SDValue NonConstantVal; 12384 if (isZeroOrAllOnes(TrueVal, AllOnes)) { 12385 SwapSelectOps = false; 12386 NonConstantVal = FalseVal; 12387 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) { 12388 SwapSelectOps = true; 12389 NonConstantVal = TrueVal; 12390 } else 12391 return SDValue(); 12392 12393 // Slct is now know to be the desired identity constant when CC is true. 12394 TrueVal = OtherOp; 12395 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); 12396 // Unless SwapSelectOps says the condition should be false. 12397 if (SwapSelectOps) 12398 std::swap(TrueVal, FalseVal); 12399 12400 if (Slct.getOpcode() == RISCVISD::SELECT_CC) 12401 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT, 12402 {Slct.getOperand(0), Slct.getOperand(1), 12403 Slct.getOperand(2), TrueVal, FalseVal}); 12404 12405 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, 12406 {Slct.getOperand(0), TrueVal, FalseVal}); 12407 } 12408 12409 // Attempt combineSelectAndUse on each operand of a commutative operator N. 12410 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG, 12411 bool AllOnes, 12412 const RISCVSubtarget &Subtarget) { 12413 SDValue N0 = N->getOperand(0); 12414 SDValue N1 = N->getOperand(1); 12415 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget)) 12416 return Result; 12417 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget)) 12418 return Result; 12419 return SDValue(); 12420 } 12421 12422 // Transform (add (mul x, c0), c1) -> 12423 // (add (mul (add x, c1/c0), c0), c1%c0). 12424 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case 12425 // that should be excluded is when c0*(c1/c0) is simm12, which will lead 12426 // to an infinite loop in DAGCombine if transformed. 12427 // Or transform (add (mul x, c0), c1) -> 12428 // (add (mul (add x, c1/c0+1), c0), c1%c0-c0), 12429 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner 12430 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will 12431 // lead to an infinite loop in DAGCombine if transformed. 12432 // Or transform (add (mul x, c0), c1) -> 12433 // (add (mul (add x, c1/c0-1), c0), c1%c0+c0), 12434 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner 12435 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will 12436 // lead to an infinite loop in DAGCombine if transformed. 12437 // Or transform (add (mul x, c0), c1) -> 12438 // (mul (add x, c1/c0), c0). 12439 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not. 12440 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, 12441 const RISCVSubtarget &Subtarget) { 12442 // Skip for vector types and larger types. 12443 EVT VT = N->getValueType(0); 12444 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) 12445 return SDValue(); 12446 // The first operand node must be a MUL and has no other use. 12447 SDValue N0 = N->getOperand(0); 12448 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL) 12449 return SDValue(); 12450 // Check if c0 and c1 match above conditions. 12451 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 12452 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 12453 if (!N0C || !N1C) 12454 return SDValue(); 12455 // If N0C has multiple uses it's possible one of the cases in 12456 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result 12457 // in an infinite loop. 12458 if (!N0C->hasOneUse()) 12459 return SDValue(); 12460 int64_t C0 = N0C->getSExtValue(); 12461 int64_t C1 = N1C->getSExtValue(); 12462 int64_t CA, CB; 12463 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1)) 12464 return SDValue(); 12465 // Search for proper CA (non-zero) and CB that both are simm12. 12466 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) && 12467 !isInt<12>(C0 * (C1 / C0))) { 12468 CA = C1 / C0; 12469 CB = C1 % C0; 12470 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) && 12471 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) { 12472 CA = C1 / C0 + 1; 12473 CB = C1 % C0 - C0; 12474 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) && 12475 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) { 12476 CA = C1 / C0 - 1; 12477 CB = C1 % C0 + C0; 12478 } else 12479 return SDValue(); 12480 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0). 12481 SDLoc DL(N); 12482 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0), 12483 DAG.getConstant(CA, DL, VT)); 12484 SDValue New1 = 12485 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT)); 12486 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT)); 12487 } 12488 12489 // Try to turn (add (xor bool, 1) -1) into (neg bool). 12490 static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) { 12491 SDValue N0 = N->getOperand(0); 12492 SDValue N1 = N->getOperand(1); 12493 EVT VT = N->getValueType(0); 12494 SDLoc DL(N); 12495 12496 // RHS should be -1. 12497 if (!isAllOnesConstant(N1)) 12498 return SDValue(); 12499 12500 // Look for (xor X, 1). 12501 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1))) 12502 return SDValue(); 12503 12504 // First xor input should be 0 or 1. 12505 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); 12506 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask)) 12507 return SDValue(); 12508 12509 // Emit a negate of the setcc. 12510 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), 12511 N0.getOperand(0)); 12512 } 12513 12514 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, 12515 const RISCVSubtarget &Subtarget) { 12516 if (SDValue V = combineAddOfBooleanXor(N, DAG)) 12517 return V; 12518 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) 12519 return V; 12520 if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) 12521 return V; 12522 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 12523 return V; 12524 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 12525 return V; 12526 12527 // fold (add (select lhs, rhs, cc, 0, y), x) -> 12528 // (select lhs, rhs, cc, x, (add x, y)) 12529 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 12530 } 12531 12532 // Try to turn a sub boolean RHS and constant LHS into an addi. 12533 static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) { 12534 SDValue N0 = N->getOperand(0); 12535 SDValue N1 = N->getOperand(1); 12536 EVT VT = N->getValueType(0); 12537 SDLoc DL(N); 12538 12539 // Require a constant LHS. 12540 auto *N0C = dyn_cast<ConstantSDNode>(N0); 12541 if (!N0C) 12542 return SDValue(); 12543 12544 // All our optimizations involve subtracting 1 from the immediate and forming 12545 // an ADDI. Make sure the new immediate is valid for an ADDI. 12546 APInt ImmValMinus1 = N0C->getAPIntValue() - 1; 12547 if (!ImmValMinus1.isSignedIntN(12)) 12548 return SDValue(); 12549 12550 SDValue NewLHS; 12551 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) { 12552 // (sub constant, (setcc x, y, eq/neq)) -> 12553 // (add (setcc x, y, neq/eq), constant - 1) 12554 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get(); 12555 EVT SetCCOpVT = N1.getOperand(0).getValueType(); 12556 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger()) 12557 return SDValue(); 12558 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT); 12559 NewLHS = 12560 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal); 12561 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) && 12562 N1.getOperand(0).getOpcode() == ISD::SETCC) { 12563 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1). 12564 // Since setcc returns a bool the xor is equivalent to 1-setcc. 12565 NewLHS = N1.getOperand(0); 12566 } else 12567 return SDValue(); 12568 12569 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT); 12570 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS); 12571 } 12572 12573 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, 12574 const RISCVSubtarget &Subtarget) { 12575 if (SDValue V = combineSubOfBoolean(N, DAG)) 12576 return V; 12577 12578 SDValue N0 = N->getOperand(0); 12579 SDValue N1 = N->getOperand(1); 12580 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1) 12581 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && 12582 isNullConstant(N1.getOperand(1))) { 12583 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get(); 12584 if (CCVal == ISD::SETLT) { 12585 EVT VT = N->getValueType(0); 12586 SDLoc DL(N); 12587 unsigned ShAmt = N0.getValueSizeInBits() - 1; 12588 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), 12589 DAG.getConstant(ShAmt, DL, VT)); 12590 } 12591 } 12592 12593 // fold (sub x, (select lhs, rhs, cc, 0, y)) -> 12594 // (select lhs, rhs, cc, x, (sub x, y)) 12595 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget); 12596 } 12597 12598 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1. 12599 // Legalizing setcc can introduce xors like this. Doing this transform reduces 12600 // the number of xors and may allow the xor to fold into a branch condition. 12601 static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) { 12602 SDValue N0 = N->getOperand(0); 12603 SDValue N1 = N->getOperand(1); 12604 bool IsAnd = N->getOpcode() == ISD::AND; 12605 12606 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR) 12607 return SDValue(); 12608 12609 if (!N0.hasOneUse() || !N1.hasOneUse()) 12610 return SDValue(); 12611 12612 SDValue N01 = N0.getOperand(1); 12613 SDValue N11 = N1.getOperand(1); 12614 12615 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into 12616 // (xor X, -1) based on the upper bits of the other operand being 0. If the 12617 // operation is And, allow one of the Xors to use -1. 12618 if (isOneConstant(N01)) { 12619 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11))) 12620 return SDValue(); 12621 } else if (isOneConstant(N11)) { 12622 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1. 12623 if (!(IsAnd && isAllOnesConstant(N01))) 12624 return SDValue(); 12625 } else 12626 return SDValue(); 12627 12628 EVT VT = N->getValueType(0); 12629 12630 SDValue N00 = N0.getOperand(0); 12631 SDValue N10 = N1.getOperand(0); 12632 12633 // The LHS of the xors needs to be 0/1. 12634 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); 12635 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask)) 12636 return SDValue(); 12637 12638 // Invert the opcode and insert a new xor. 12639 SDLoc DL(N); 12640 unsigned Opc = IsAnd ? ISD::OR : ISD::AND; 12641 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10); 12642 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT)); 12643 } 12644 12645 static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, 12646 const RISCVSubtarget &Subtarget) { 12647 SDValue N0 = N->getOperand(0); 12648 EVT VT = N->getValueType(0); 12649 12650 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero 12651 // extending X. This is safe since we only need the LSB after the shift and 12652 // shift amounts larger than 31 would produce poison. If we wait until 12653 // type legalization, we'll create RISCVISD::SRLW and we can't recover it 12654 // to use a BEXT instruction. 12655 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 && 12656 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL && 12657 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { 12658 SDLoc DL(N0); 12659 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); 12660 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); 12661 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); 12662 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl); 12663 } 12664 12665 return SDValue(); 12666 } 12667 12668 // Combines two comparison operation and logic operation to one selection 12669 // operation(min, max) and logic operation. Returns new constructed Node if 12670 // conditions for optimization are satisfied. 12671 static SDValue performANDCombine(SDNode *N, 12672 TargetLowering::DAGCombinerInfo &DCI, 12673 const RISCVSubtarget &Subtarget) { 12674 SelectionDAG &DAG = DCI.DAG; 12675 12676 SDValue N0 = N->getOperand(0); 12677 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero 12678 // extending X. This is safe since we only need the LSB after the shift and 12679 // shift amounts larger than 31 would produce poison. If we wait until 12680 // type legalization, we'll create RISCVISD::SRLW and we can't recover it 12681 // to use a BEXT instruction. 12682 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && 12683 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) && 12684 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) && 12685 N0.hasOneUse()) { 12686 SDLoc DL(N); 12687 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); 12688 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); 12689 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); 12690 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl, 12691 DAG.getConstant(1, DL, MVT::i64)); 12692 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); 12693 } 12694 12695 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 12696 return V; 12697 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 12698 return V; 12699 12700 if (DCI.isAfterLegalizeDAG()) 12701 if (SDValue V = combineDeMorganOfBoolean(N, DAG)) 12702 return V; 12703 12704 // fold (and (select lhs, rhs, cc, -1, y), x) -> 12705 // (select lhs, rhs, cc, x, (and x, y)) 12706 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget); 12707 } 12708 12709 // Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez. 12710 // FIXME: Generalize to other binary operators with same operand. 12711 static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, 12712 SelectionDAG &DAG) { 12713 assert(N->getOpcode() == ISD::OR && "Unexpected opcode"); 12714 12715 if (N0.getOpcode() != RISCVISD::CZERO_EQZ || 12716 N1.getOpcode() != RISCVISD::CZERO_NEZ || 12717 !N0.hasOneUse() || !N1.hasOneUse()) 12718 return SDValue(); 12719 12720 // Should have the same condition. 12721 SDValue Cond = N0.getOperand(1); 12722 if (Cond != N1.getOperand(1)) 12723 return SDValue(); 12724 12725 SDValue TrueV = N0.getOperand(0); 12726 SDValue FalseV = N1.getOperand(0); 12727 12728 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR || 12729 TrueV.getOperand(1) != FalseV.getOperand(1) || 12730 !isOneConstant(TrueV.getOperand(1)) || 12731 !TrueV.hasOneUse() || !FalseV.hasOneUse()) 12732 return SDValue(); 12733 12734 EVT VT = N->getValueType(0); 12735 SDLoc DL(N); 12736 12737 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0), 12738 Cond); 12739 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), 12740 Cond); 12741 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1); 12742 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1)); 12743 } 12744 12745 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, 12746 const RISCVSubtarget &Subtarget) { 12747 SelectionDAG &DAG = DCI.DAG; 12748 12749 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 12750 return V; 12751 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 12752 return V; 12753 12754 if (DCI.isAfterLegalizeDAG()) 12755 if (SDValue V = combineDeMorganOfBoolean(N, DAG)) 12756 return V; 12757 12758 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom. 12759 // We may be able to pull a common operation out of the true and false value. 12760 SDValue N0 = N->getOperand(0); 12761 SDValue N1 = N->getOperand(1); 12762 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG)) 12763 return V; 12764 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG)) 12765 return V; 12766 12767 // fold (or (select cond, 0, y), x) -> 12768 // (select cond, x, (or x, y)) 12769 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 12770 } 12771 12772 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 12773 const RISCVSubtarget &Subtarget) { 12774 SDValue N0 = N->getOperand(0); 12775 SDValue N1 = N->getOperand(1); 12776 12777 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use 12778 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create 12779 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction. 12780 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && 12781 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) && 12782 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) && 12783 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { 12784 SDLoc DL(N); 12785 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); 12786 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); 12787 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1); 12788 SDValue And = DAG.getNOT(DL, Shl, MVT::i64); 12789 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); 12790 } 12791 12792 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x) 12793 // NOTE: Assumes ROL being legal means ROLW is legal. 12794 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 12795 if (N0.getOpcode() == RISCVISD::SLLW && 12796 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) && 12797 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) { 12798 SDLoc DL(N); 12799 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64, 12800 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1)); 12801 } 12802 12803 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt) 12804 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) { 12805 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 12806 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 12807 if (ConstN00 && CC == ISD::SETLT) { 12808 EVT VT = N0.getValueType(); 12809 SDLoc DL(N0); 12810 const APInt &Imm = ConstN00->getAPIntValue(); 12811 if ((Imm + 1).isSignedIntN(12)) 12812 return DAG.getSetCC(DL, VT, N0.getOperand(1), 12813 DAG.getConstant(Imm + 1, DL, VT), CC); 12814 } 12815 } 12816 12817 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 12818 return V; 12819 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 12820 return V; 12821 12822 // fold (xor (select cond, 0, y), x) -> 12823 // (select cond, x, (xor x, y)) 12824 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 12825 } 12826 12827 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) { 12828 EVT VT = N->getValueType(0); 12829 if (!VT.isVector()) 12830 return SDValue(); 12831 12832 SDLoc DL(N); 12833 SDValue N0 = N->getOperand(0); 12834 SDValue N1 = N->getOperand(1); 12835 SDValue MulOper; 12836 unsigned AddSubOpc; 12837 12838 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y) 12839 // (mul x, add (y, 1)) -> (add x, (mul x, y)) 12840 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y)) 12841 // (mul x, (sub 1, y)) -> (sub x, (mul x, y)) 12842 auto IsAddSubWith1 = [&](SDValue V) -> bool { 12843 AddSubOpc = V->getOpcode(); 12844 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) { 12845 SDValue Opnd = V->getOperand(1); 12846 MulOper = V->getOperand(0); 12847 if (AddSubOpc == ISD::SUB) 12848 std::swap(Opnd, MulOper); 12849 if (isOneOrOneSplat(Opnd)) 12850 return true; 12851 } 12852 return false; 12853 }; 12854 12855 if (IsAddSubWith1(N0)) { 12856 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper); 12857 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal); 12858 } 12859 12860 if (IsAddSubWith1(N1)) { 12861 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper); 12862 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal); 12863 } 12864 12865 return SDValue(); 12866 } 12867 12868 /// According to the property that indexed load/store instructions zero-extend 12869 /// their indices, try to narrow the type of index operand. 12870 static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) { 12871 if (isIndexTypeSigned(IndexType)) 12872 return false; 12873 12874 if (!N->hasOneUse()) 12875 return false; 12876 12877 EVT VT = N.getValueType(); 12878 SDLoc DL(N); 12879 12880 // In general, what we're doing here is seeing if we can sink a truncate to 12881 // a smaller element type into the expression tree building our index. 12882 // TODO: We can generalize this and handle a bunch more cases if useful. 12883 12884 // Narrow a buildvector to the narrowest element type. This requires less 12885 // work and less register pressure at high LMUL, and creates smaller constants 12886 // which may be cheaper to materialize. 12887 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) { 12888 KnownBits Known = DAG.computeKnownBits(N); 12889 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits()); 12890 LLVMContext &C = *DAG.getContext(); 12891 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C); 12892 if (ResultVT.bitsLT(VT.getVectorElementType())) { 12893 N = DAG.getNode(ISD::TRUNCATE, DL, 12894 VT.changeVectorElementType(ResultVT), N); 12895 return true; 12896 } 12897 } 12898 12899 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty). 12900 if (N.getOpcode() != ISD::SHL) 12901 return false; 12902 12903 SDValue N0 = N.getOperand(0); 12904 if (N0.getOpcode() != ISD::ZERO_EXTEND && 12905 N0.getOpcode() != RISCVISD::VZEXT_VL) 12906 return false; 12907 if (!N0->hasOneUse()) 12908 return false; 12909 12910 APInt ShAmt; 12911 SDValue N1 = N.getOperand(1); 12912 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt)) 12913 return false; 12914 12915 SDValue Src = N0.getOperand(0); 12916 EVT SrcVT = Src.getValueType(); 12917 unsigned SrcElen = SrcVT.getScalarSizeInBits(); 12918 unsigned ShAmtV = ShAmt.getZExtValue(); 12919 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV); 12920 NewElen = std::max(NewElen, 8U); 12921 12922 // Skip if NewElen is not narrower than the original extended type. 12923 if (NewElen >= N0.getValueType().getScalarSizeInBits()) 12924 return false; 12925 12926 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen); 12927 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT); 12928 12929 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops()); 12930 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT); 12931 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec); 12932 return true; 12933 } 12934 12935 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with 12936 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from 12937 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg 12938 // can become a sext.w instead of a shift pair. 12939 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, 12940 const RISCVSubtarget &Subtarget) { 12941 SDValue N0 = N->getOperand(0); 12942 SDValue N1 = N->getOperand(1); 12943 EVT VT = N->getValueType(0); 12944 EVT OpVT = N0.getValueType(); 12945 12946 if (OpVT != MVT::i64 || !Subtarget.is64Bit()) 12947 return SDValue(); 12948 12949 // RHS needs to be a constant. 12950 auto *N1C = dyn_cast<ConstantSDNode>(N1); 12951 if (!N1C) 12952 return SDValue(); 12953 12954 // LHS needs to be (and X, 0xffffffff). 12955 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 12956 !isa<ConstantSDNode>(N0.getOperand(1)) || 12957 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff)) 12958 return SDValue(); 12959 12960 // Looking for an equality compare. 12961 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); 12962 if (!isIntEqualitySetCC(Cond)) 12963 return SDValue(); 12964 12965 // Don't do this if the sign bit is provably zero, it will be turned back into 12966 // an AND. 12967 APInt SignMask = APInt::getOneBitSet(64, 31); 12968 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask)) 12969 return SDValue(); 12970 12971 const APInt &C1 = N1C->getAPIntValue(); 12972 12973 SDLoc dl(N); 12974 // If the constant is larger than 2^32 - 1 it is impossible for both sides 12975 // to be equal. 12976 if (C1.getActiveBits() > 32) 12977 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT); 12978 12979 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT, 12980 N0.getOperand(0), DAG.getValueType(MVT::i32)); 12981 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64), 12982 dl, OpVT), Cond); 12983 } 12984 12985 static SDValue 12986 performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, 12987 const RISCVSubtarget &Subtarget) { 12988 SDValue Src = N->getOperand(0); 12989 EVT VT = N->getValueType(0); 12990 12991 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X) 12992 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH && 12993 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16)) 12994 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT, 12995 Src.getOperand(0)); 12996 12997 return SDValue(); 12998 } 12999 13000 namespace { 13001 // Forward declaration of the structure holding the necessary information to 13002 // apply a combine. 13003 struct CombineResult; 13004 13005 /// Helper class for folding sign/zero extensions. 13006 /// In particular, this class is used for the following combines: 13007 /// add | add_vl -> vwadd(u) | vwadd(u)_w 13008 /// sub | sub_vl -> vwsub(u) | vwsub(u)_w 13009 /// mul | mul_vl -> vwmul(u) | vwmul_su 13010 /// 13011 /// An object of this class represents an operand of the operation we want to 13012 /// combine. 13013 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of 13014 /// NodeExtensionHelper for `a` and one for `b`. 13015 /// 13016 /// This class abstracts away how the extension is materialized and 13017 /// how its Mask, VL, number of users affect the combines. 13018 /// 13019 /// In particular: 13020 /// - VWADD_W is conceptually == add(op0, sext(op1)) 13021 /// - VWADDU_W == add(op0, zext(op1)) 13022 /// - VWSUB_W == sub(op0, sext(op1)) 13023 /// - VWSUBU_W == sub(op0, zext(op1)) 13024 /// 13025 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to 13026 /// zext|sext(smaller_value). 13027 struct NodeExtensionHelper { 13028 /// Records if this operand is like being zero extended. 13029 bool SupportsZExt; 13030 /// Records if this operand is like being sign extended. 13031 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For 13032 /// instance, a splat constant (e.g., 3), would support being both sign and 13033 /// zero extended. 13034 bool SupportsSExt; 13035 /// This boolean captures whether we care if this operand would still be 13036 /// around after the folding happens. 13037 bool EnforceOneUse; 13038 /// Records if this operand's mask needs to match the mask of the operation 13039 /// that it will fold into. 13040 bool CheckMask; 13041 /// Value of the Mask for this operand. 13042 /// It may be SDValue(). 13043 SDValue Mask; 13044 /// Value of the vector length operand. 13045 /// It may be SDValue(). 13046 SDValue VL; 13047 /// Original value that this NodeExtensionHelper represents. 13048 SDValue OrigOperand; 13049 13050 /// Get the value feeding the extension or the value itself. 13051 /// E.g., for zext(a), this would return a. 13052 SDValue getSource() const { 13053 switch (OrigOperand.getOpcode()) { 13054 case ISD::ZERO_EXTEND: 13055 case ISD::SIGN_EXTEND: 13056 case RISCVISD::VSEXT_VL: 13057 case RISCVISD::VZEXT_VL: 13058 return OrigOperand.getOperand(0); 13059 default: 13060 return OrigOperand; 13061 } 13062 } 13063 13064 /// Check if this instance represents a splat. 13065 bool isSplat() const { 13066 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL; 13067 } 13068 13069 /// Get or create a value that can feed \p Root with the given extension \p 13070 /// SExt. If \p SExt is std::nullopt, this returns the source of this operand. 13071 /// \see ::getSource(). 13072 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG, 13073 const RISCVSubtarget &Subtarget, 13074 std::optional<bool> SExt) const { 13075 if (!SExt.has_value()) 13076 return OrigOperand; 13077 13078 MVT NarrowVT = getNarrowType(Root); 13079 13080 SDValue Source = getSource(); 13081 if (Source.getValueType() == NarrowVT) 13082 return Source; 13083 13084 unsigned ExtOpc = *SExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL; 13085 13086 // If we need an extension, we should be changing the type. 13087 SDLoc DL(Root); 13088 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); 13089 switch (OrigOperand.getOpcode()) { 13090 case ISD::ZERO_EXTEND: 13091 case ISD::SIGN_EXTEND: 13092 case RISCVISD::VSEXT_VL: 13093 case RISCVISD::VZEXT_VL: 13094 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); 13095 case RISCVISD::VMV_V_X_VL: 13096 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, 13097 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL); 13098 default: 13099 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL 13100 // and that operand should already have the right NarrowVT so no 13101 // extension should be required at this point. 13102 llvm_unreachable("Unsupported opcode"); 13103 } 13104 } 13105 13106 /// Helper function to get the narrow type for \p Root. 13107 /// The narrow type is the type of \p Root where we divided the size of each 13108 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>. 13109 /// \pre The size of the type of the elements of Root must be a multiple of 2 13110 /// and be greater than 16. 13111 static MVT getNarrowType(const SDNode *Root) { 13112 MVT VT = Root->getSimpleValueType(0); 13113 13114 // Determine the narrow size. 13115 unsigned NarrowSize = VT.getScalarSizeInBits() / 2; 13116 assert(NarrowSize >= 8 && "Trying to extend something we can't represent"); 13117 MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize), 13118 VT.getVectorElementCount()); 13119 return NarrowVT; 13120 } 13121 13122 /// Return the opcode required to materialize the folding of the sign 13123 /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for 13124 /// both operands for \p Opcode. 13125 /// Put differently, get the opcode to materialize: 13126 /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b) 13127 /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b) 13128 /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()). 13129 static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) { 13130 switch (Opcode) { 13131 case ISD::ADD: 13132 case RISCVISD::ADD_VL: 13133 case RISCVISD::VWADD_W_VL: 13134 case RISCVISD::VWADDU_W_VL: 13135 return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL; 13136 case ISD::MUL: 13137 case RISCVISD::MUL_VL: 13138 return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; 13139 case ISD::SUB: 13140 case RISCVISD::SUB_VL: 13141 case RISCVISD::VWSUB_W_VL: 13142 case RISCVISD::VWSUBU_W_VL: 13143 return IsSExt ? RISCVISD::VWSUB_VL : RISCVISD::VWSUBU_VL; 13144 default: 13145 llvm_unreachable("Unexpected opcode"); 13146 } 13147 } 13148 13149 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> 13150 /// newOpcode(a, b). 13151 static unsigned getSUOpcode(unsigned Opcode) { 13152 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) && 13153 "SU is only supported for MUL"); 13154 return RISCVISD::VWMULSU_VL; 13155 } 13156 13157 /// Get the opcode to materialize \p Opcode(a, s|zext(b)) -> 13158 /// newOpcode(a, b). 13159 static unsigned getWOpcode(unsigned Opcode, bool IsSExt) { 13160 switch (Opcode) { 13161 case ISD::ADD: 13162 case RISCVISD::ADD_VL: 13163 return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL; 13164 case ISD::SUB: 13165 case RISCVISD::SUB_VL: 13166 return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL; 13167 default: 13168 llvm_unreachable("Unexpected opcode"); 13169 } 13170 } 13171 13172 using CombineToTry = std::function<std::optional<CombineResult>( 13173 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, 13174 const NodeExtensionHelper & /*RHS*/, SelectionDAG &, 13175 const RISCVSubtarget &)>; 13176 13177 /// Check if this node needs to be fully folded or extended for all users. 13178 bool needToPromoteOtherUsers() const { return EnforceOneUse; } 13179 13180 /// Helper method to set the various fields of this struct based on the 13181 /// type of \p Root. 13182 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG, 13183 const RISCVSubtarget &Subtarget) { 13184 SupportsZExt = false; 13185 SupportsSExt = false; 13186 EnforceOneUse = true; 13187 CheckMask = true; 13188 unsigned Opc = OrigOperand.getOpcode(); 13189 switch (Opc) { 13190 case ISD::ZERO_EXTEND: 13191 case ISD::SIGN_EXTEND: { 13192 MVT VT = OrigOperand.getSimpleValueType(); 13193 if (!VT.isVector()) 13194 break; 13195 13196 SDValue NarrowElt = OrigOperand.getOperand(0); 13197 MVT NarrowVT = NarrowElt.getSimpleValueType(); 13198 13199 unsigned ScalarBits = VT.getScalarSizeInBits(); 13200 unsigned NarrowScalarBits = NarrowVT.getScalarSizeInBits(); 13201 13202 // Ensure the narrowing element type is legal 13203 if (!Subtarget.getTargetLowering()->isTypeLegal(NarrowElt.getValueType())) 13204 break; 13205 13206 // Ensure the extension's semantic is equivalent to rvv vzext or vsext. 13207 if (ScalarBits != NarrowScalarBits * 2) 13208 break; 13209 13210 SupportsZExt = Opc == ISD::ZERO_EXTEND; 13211 SupportsSExt = Opc == ISD::SIGN_EXTEND; 13212 13213 SDLoc DL(Root); 13214 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); 13215 break; 13216 } 13217 case RISCVISD::VZEXT_VL: 13218 SupportsZExt = true; 13219 Mask = OrigOperand.getOperand(1); 13220 VL = OrigOperand.getOperand(2); 13221 break; 13222 case RISCVISD::VSEXT_VL: 13223 SupportsSExt = true; 13224 Mask = OrigOperand.getOperand(1); 13225 VL = OrigOperand.getOperand(2); 13226 break; 13227 case RISCVISD::VMV_V_X_VL: { 13228 // Historically, we didn't care about splat values not disappearing during 13229 // combines. 13230 EnforceOneUse = false; 13231 CheckMask = false; 13232 VL = OrigOperand.getOperand(2); 13233 13234 // The operand is a splat of a scalar. 13235 13236 // The pasthru must be undef for tail agnostic. 13237 if (!OrigOperand.getOperand(0).isUndef()) 13238 break; 13239 13240 // Get the scalar value. 13241 SDValue Op = OrigOperand.getOperand(1); 13242 13243 // See if we have enough sign bits or zero bits in the scalar to use a 13244 // widening opcode by splatting to smaller element size. 13245 MVT VT = Root->getSimpleValueType(0); 13246 unsigned EltBits = VT.getScalarSizeInBits(); 13247 unsigned ScalarBits = Op.getValueSizeInBits(); 13248 // Make sure we're getting all element bits from the scalar register. 13249 // FIXME: Support implicit sign extension of vmv.v.x? 13250 if (ScalarBits < EltBits) 13251 break; 13252 13253 unsigned NarrowSize = VT.getScalarSizeInBits() / 2; 13254 // If the narrow type cannot be expressed with a legal VMV, 13255 // this is not a valid candidate. 13256 if (NarrowSize < 8) 13257 break; 13258 13259 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) 13260 SupportsSExt = true; 13261 if (DAG.MaskedValueIsZero(Op, 13262 APInt::getBitsSetFrom(ScalarBits, NarrowSize))) 13263 SupportsZExt = true; 13264 break; 13265 } 13266 default: 13267 break; 13268 } 13269 } 13270 13271 /// Check if \p Root supports any extension folding combines. 13272 static bool isSupportedRoot(const SDNode *Root, const SelectionDAG &DAG) { 13273 switch (Root->getOpcode()) { 13274 case ISD::ADD: 13275 case ISD::SUB: 13276 case ISD::MUL: { 13277 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 13278 if (!TLI.isTypeLegal(Root->getValueType(0))) 13279 return false; 13280 return Root->getValueType(0).isScalableVector(); 13281 } 13282 case RISCVISD::ADD_VL: 13283 case RISCVISD::MUL_VL: 13284 case RISCVISD::VWADD_W_VL: 13285 case RISCVISD::VWADDU_W_VL: 13286 case RISCVISD::SUB_VL: 13287 case RISCVISD::VWSUB_W_VL: 13288 case RISCVISD::VWSUBU_W_VL: 13289 return true; 13290 default: 13291 return false; 13292 } 13293 } 13294 13295 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). 13296 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG, 13297 const RISCVSubtarget &Subtarget) { 13298 assert(isSupportedRoot(Root, DAG) && "Trying to build an helper with an " 13299 "unsupported root"); 13300 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS"); 13301 OrigOperand = Root->getOperand(OperandIdx); 13302 13303 unsigned Opc = Root->getOpcode(); 13304 switch (Opc) { 13305 // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were 13306 // <ADD|SUB>(LHS, S|ZEXT(RHS)) 13307 case RISCVISD::VWADD_W_VL: 13308 case RISCVISD::VWADDU_W_VL: 13309 case RISCVISD::VWSUB_W_VL: 13310 case RISCVISD::VWSUBU_W_VL: 13311 if (OperandIdx == 1) { 13312 SupportsZExt = 13313 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; 13314 SupportsSExt = !SupportsZExt; 13315 std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget); 13316 CheckMask = true; 13317 // There's no existing extension here, so we don't have to worry about 13318 // making sure it gets removed. 13319 EnforceOneUse = false; 13320 break; 13321 } 13322 [[fallthrough]]; 13323 default: 13324 fillUpExtensionSupport(Root, DAG, Subtarget); 13325 break; 13326 } 13327 } 13328 13329 /// Check if this operand is compatible with the given vector length \p VL. 13330 bool isVLCompatible(SDValue VL) const { 13331 return this->VL != SDValue() && this->VL == VL; 13332 } 13333 13334 /// Check if this operand is compatible with the given \p Mask. 13335 bool isMaskCompatible(SDValue Mask) const { 13336 return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask); 13337 } 13338 13339 /// Helper function to get the Mask and VL from \p Root. 13340 static std::pair<SDValue, SDValue> 13341 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG, 13342 const RISCVSubtarget &Subtarget) { 13343 assert(isSupportedRoot(Root, DAG) && "Unexpected root"); 13344 switch (Root->getOpcode()) { 13345 case ISD::ADD: 13346 case ISD::SUB: 13347 case ISD::MUL: { 13348 SDLoc DL(Root); 13349 MVT VT = Root->getSimpleValueType(0); 13350 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget); 13351 } 13352 default: 13353 return std::make_pair(Root->getOperand(3), Root->getOperand(4)); 13354 } 13355 } 13356 13357 /// Check if the Mask and VL of this operand are compatible with \p Root. 13358 bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG, 13359 const RISCVSubtarget &Subtarget) const { 13360 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); 13361 return isMaskCompatible(Mask) && isVLCompatible(VL); 13362 } 13363 13364 /// Helper function to check if \p N is commutative with respect to the 13365 /// foldings that are supported by this class. 13366 static bool isCommutative(const SDNode *N) { 13367 switch (N->getOpcode()) { 13368 case ISD::ADD: 13369 case ISD::MUL: 13370 case RISCVISD::ADD_VL: 13371 case RISCVISD::MUL_VL: 13372 case RISCVISD::VWADD_W_VL: 13373 case RISCVISD::VWADDU_W_VL: 13374 return true; 13375 case ISD::SUB: 13376 case RISCVISD::SUB_VL: 13377 case RISCVISD::VWSUB_W_VL: 13378 case RISCVISD::VWSUBU_W_VL: 13379 return false; 13380 default: 13381 llvm_unreachable("Unexpected opcode"); 13382 } 13383 } 13384 13385 /// Get a list of combine to try for folding extensions in \p Root. 13386 /// Note that each returned CombineToTry function doesn't actually modify 13387 /// anything. Instead they produce an optional CombineResult that if not None, 13388 /// need to be materialized for the combine to be applied. 13389 /// \see CombineResult::materialize. 13390 /// If the related CombineToTry function returns std::nullopt, that means the 13391 /// combine didn't match. 13392 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root); 13393 }; 13394 13395 /// Helper structure that holds all the necessary information to materialize a 13396 /// combine that does some extension folding. 13397 struct CombineResult { 13398 /// Opcode to be generated when materializing the combine. 13399 unsigned TargetOpcode; 13400 // No value means no extension is needed. If extension is needed, the value 13401 // indicates if it needs to be sign extended. 13402 std::optional<bool> SExtLHS; 13403 std::optional<bool> SExtRHS; 13404 /// Root of the combine. 13405 SDNode *Root; 13406 /// LHS of the TargetOpcode. 13407 NodeExtensionHelper LHS; 13408 /// RHS of the TargetOpcode. 13409 NodeExtensionHelper RHS; 13410 13411 CombineResult(unsigned TargetOpcode, SDNode *Root, 13412 const NodeExtensionHelper &LHS, std::optional<bool> SExtLHS, 13413 const NodeExtensionHelper &RHS, std::optional<bool> SExtRHS) 13414 : TargetOpcode(TargetOpcode), SExtLHS(SExtLHS), SExtRHS(SExtRHS), 13415 Root(Root), LHS(LHS), RHS(RHS) {} 13416 13417 /// Return a value that uses TargetOpcode and that can be used to replace 13418 /// Root. 13419 /// The actual replacement is *not* done in that method. 13420 SDValue materialize(SelectionDAG &DAG, 13421 const RISCVSubtarget &Subtarget) const { 13422 SDValue Mask, VL, Merge; 13423 std::tie(Mask, VL) = 13424 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget); 13425 switch (Root->getOpcode()) { 13426 default: 13427 Merge = Root->getOperand(2); 13428 break; 13429 case ISD::ADD: 13430 case ISD::SUB: 13431 case ISD::MUL: 13432 Merge = DAG.getUNDEF(Root->getValueType(0)); 13433 break; 13434 } 13435 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), 13436 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtLHS), 13437 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtRHS), 13438 Merge, Mask, VL); 13439 } 13440 }; 13441 13442 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) 13443 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both 13444 /// are zext) and LHS and RHS can be folded into Root. 13445 /// AllowSExt and AllozZExt define which form `ext` can take in this pattern. 13446 /// 13447 /// \note If the pattern can match with both zext and sext, the returned 13448 /// CombineResult will feature the zext result. 13449 /// 13450 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 13451 /// can be used to apply the pattern. 13452 static std::optional<CombineResult> 13453 canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, 13454 const NodeExtensionHelper &RHS, bool AllowSExt, 13455 bool AllowZExt, SelectionDAG &DAG, 13456 const RISCVSubtarget &Subtarget) { 13457 assert((AllowSExt || AllowZExt) && "Forgot to set what you want?"); 13458 if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || 13459 !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) 13460 return std::nullopt; 13461 if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt) 13462 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( 13463 Root->getOpcode(), /*IsSExt=*/false), 13464 Root, LHS, /*SExtLHS=*/false, RHS, /*SExtRHS=*/false); 13465 if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt) 13466 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( 13467 Root->getOpcode(), /*IsSExt=*/true), 13468 Root, LHS, /*SExtLHS=*/true, RHS, 13469 /*SExtRHS=*/true); 13470 return std::nullopt; 13471 } 13472 13473 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) 13474 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both 13475 /// are zext) and LHS and RHS can be folded into Root. 13476 /// 13477 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 13478 /// can be used to apply the pattern. 13479 static std::optional<CombineResult> 13480 canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, 13481 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 13482 const RISCVSubtarget &Subtarget) { 13483 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, 13484 /*AllowZExt=*/true, DAG, Subtarget); 13485 } 13486 13487 /// Check if \p Root follows a pattern Root(LHS, ext(RHS)) 13488 /// 13489 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 13490 /// can be used to apply the pattern. 13491 static std::optional<CombineResult> 13492 canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, 13493 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 13494 const RISCVSubtarget &Subtarget) { 13495 if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) 13496 return std::nullopt; 13497 13498 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar 13499 // sext/zext? 13500 // Control this behavior behind an option (AllowSplatInVW_W) for testing 13501 // purposes. 13502 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W)) 13503 return CombineResult( 13504 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/false), 13505 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/false); 13506 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W)) 13507 return CombineResult( 13508 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/true), 13509 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/true); 13510 return std::nullopt; 13511 } 13512 13513 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS)) 13514 /// 13515 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 13516 /// can be used to apply the pattern. 13517 static std::optional<CombineResult> 13518 canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, 13519 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 13520 const RISCVSubtarget &Subtarget) { 13521 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, 13522 /*AllowZExt=*/false, DAG, Subtarget); 13523 } 13524 13525 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) 13526 /// 13527 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 13528 /// can be used to apply the pattern. 13529 static std::optional<CombineResult> 13530 canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, 13531 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 13532 const RISCVSubtarget &Subtarget) { 13533 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false, 13534 /*AllowZExt=*/true, DAG, Subtarget); 13535 } 13536 13537 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) 13538 /// 13539 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 13540 /// can be used to apply the pattern. 13541 static std::optional<CombineResult> 13542 canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, 13543 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 13544 const RISCVSubtarget &Subtarget) { 13545 13546 if (!LHS.SupportsSExt || !RHS.SupportsZExt) 13547 return std::nullopt; 13548 if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || 13549 !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) 13550 return std::nullopt; 13551 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), 13552 Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false); 13553 } 13554 13555 SmallVector<NodeExtensionHelper::CombineToTry> 13556 NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { 13557 SmallVector<CombineToTry> Strategies; 13558 switch (Root->getOpcode()) { 13559 case ISD::ADD: 13560 case ISD::SUB: 13561 case RISCVISD::ADD_VL: 13562 case RISCVISD::SUB_VL: 13563 // add|sub -> vwadd(u)|vwsub(u) 13564 Strategies.push_back(canFoldToVWWithSameExtension); 13565 // add|sub -> vwadd(u)_w|vwsub(u)_w 13566 Strategies.push_back(canFoldToVW_W); 13567 break; 13568 case ISD::MUL: 13569 case RISCVISD::MUL_VL: 13570 // mul -> vwmul(u) 13571 Strategies.push_back(canFoldToVWWithSameExtension); 13572 // mul -> vwmulsu 13573 Strategies.push_back(canFoldToVW_SU); 13574 break; 13575 case RISCVISD::VWADD_W_VL: 13576 case RISCVISD::VWSUB_W_VL: 13577 // vwadd_w|vwsub_w -> vwadd|vwsub 13578 Strategies.push_back(canFoldToVWWithSEXT); 13579 break; 13580 case RISCVISD::VWADDU_W_VL: 13581 case RISCVISD::VWSUBU_W_VL: 13582 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu 13583 Strategies.push_back(canFoldToVWWithZEXT); 13584 break; 13585 default: 13586 llvm_unreachable("Unexpected opcode"); 13587 } 13588 return Strategies; 13589 } 13590 } // End anonymous namespace. 13591 13592 /// Combine a binary operation to its equivalent VW or VW_W form. 13593 /// The supported combines are: 13594 /// add_vl -> vwadd(u) | vwadd(u)_w 13595 /// sub_vl -> vwsub(u) | vwsub(u)_w 13596 /// mul_vl -> vwmul(u) | vwmul_su 13597 /// vwadd_w(u) -> vwadd(u) 13598 /// vwub_w(u) -> vwadd(u) 13599 static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, 13600 TargetLowering::DAGCombinerInfo &DCI, 13601 const RISCVSubtarget &Subtarget) { 13602 SelectionDAG &DAG = DCI.DAG; 13603 13604 if (!NodeExtensionHelper::isSupportedRoot(N, DAG)) 13605 return SDValue(); 13606 13607 SmallVector<SDNode *> Worklist; 13608 SmallSet<SDNode *, 8> Inserted; 13609 Worklist.push_back(N); 13610 Inserted.insert(N); 13611 SmallVector<CombineResult> CombinesToApply; 13612 13613 while (!Worklist.empty()) { 13614 SDNode *Root = Worklist.pop_back_val(); 13615 if (!NodeExtensionHelper::isSupportedRoot(Root, DAG)) 13616 return SDValue(); 13617 13618 NodeExtensionHelper LHS(N, 0, DAG, Subtarget); 13619 NodeExtensionHelper RHS(N, 1, DAG, Subtarget); 13620 auto AppendUsersIfNeeded = [&Worklist, 13621 &Inserted](const NodeExtensionHelper &Op) { 13622 if (Op.needToPromoteOtherUsers()) { 13623 for (SDNode *TheUse : Op.OrigOperand->uses()) { 13624 if (Inserted.insert(TheUse).second) 13625 Worklist.push_back(TheUse); 13626 } 13627 } 13628 }; 13629 13630 // Control the compile time by limiting the number of node we look at in 13631 // total. 13632 if (Inserted.size() > ExtensionMaxWebSize) 13633 return SDValue(); 13634 13635 SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies = 13636 NodeExtensionHelper::getSupportedFoldings(N); 13637 13638 assert(!FoldingStrategies.empty() && "Nothing to be folded"); 13639 bool Matched = false; 13640 for (int Attempt = 0; 13641 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched; 13642 ++Attempt) { 13643 13644 for (NodeExtensionHelper::CombineToTry FoldingStrategy : 13645 FoldingStrategies) { 13646 std::optional<CombineResult> Res = 13647 FoldingStrategy(N, LHS, RHS, DAG, Subtarget); 13648 if (Res) { 13649 Matched = true; 13650 CombinesToApply.push_back(*Res); 13651 // All the inputs that are extended need to be folded, otherwise 13652 // we would be leaving the old input (since it is may still be used), 13653 // and the new one. 13654 if (Res->SExtLHS.has_value()) 13655 AppendUsersIfNeeded(LHS); 13656 if (Res->SExtRHS.has_value()) 13657 AppendUsersIfNeeded(RHS); 13658 break; 13659 } 13660 } 13661 std::swap(LHS, RHS); 13662 } 13663 // Right now we do an all or nothing approach. 13664 if (!Matched) 13665 return SDValue(); 13666 } 13667 // Store the value for the replacement of the input node separately. 13668 SDValue InputRootReplacement; 13669 // We do the RAUW after we materialize all the combines, because some replaced 13670 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently, 13671 // some of these nodes may appear in the NodeExtensionHelpers of some of the 13672 // yet-to-be-visited CombinesToApply roots. 13673 SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; 13674 ValuesToReplace.reserve(CombinesToApply.size()); 13675 for (CombineResult Res : CombinesToApply) { 13676 SDValue NewValue = Res.materialize(DAG, Subtarget); 13677 if (!InputRootReplacement) { 13678 assert(Res.Root == N && 13679 "First element is expected to be the current node"); 13680 InputRootReplacement = NewValue; 13681 } else { 13682 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue); 13683 } 13684 } 13685 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) { 13686 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second); 13687 DCI.AddToWorklist(OldNewValues.second.getNode()); 13688 } 13689 return InputRootReplacement; 13690 } 13691 13692 // Helper function for performMemPairCombine. 13693 // Try to combine the memory loads/stores LSNode1 and LSNode2 13694 // into a single memory pair operation. 13695 static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, 13696 LSBaseSDNode *LSNode2, SDValue BasePtr, 13697 uint64_t Imm) { 13698 SmallPtrSet<const SDNode *, 32> Visited; 13699 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2}; 13700 13701 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) || 13702 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist)) 13703 return SDValue(); 13704 13705 MachineFunction &MF = DAG.getMachineFunction(); 13706 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); 13707 13708 // The new operation has twice the width. 13709 MVT XLenVT = Subtarget.getXLenVT(); 13710 EVT MemVT = LSNode1->getMemoryVT(); 13711 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128; 13712 MachineMemOperand *MMO = LSNode1->getMemOperand(); 13713 MachineMemOperand *NewMMO = MF.getMachineMemOperand( 13714 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16); 13715 13716 if (LSNode1->getOpcode() == ISD::LOAD) { 13717 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType(); 13718 unsigned Opcode; 13719 if (MemVT == MVT::i32) 13720 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD; 13721 else 13722 Opcode = RISCVISD::TH_LDD; 13723 13724 SDValue Res = DAG.getMemIntrinsicNode( 13725 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}), 13726 {LSNode1->getChain(), BasePtr, 13727 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)}, 13728 NewMemVT, NewMMO); 13729 13730 SDValue Node1 = 13731 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1)); 13732 SDValue Node2 = 13733 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2)); 13734 13735 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode()); 13736 return Node1; 13737 } else { 13738 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD; 13739 13740 SDValue Res = DAG.getMemIntrinsicNode( 13741 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other), 13742 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1), 13743 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)}, 13744 NewMemVT, NewMMO); 13745 13746 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode()); 13747 return Res; 13748 } 13749 } 13750 13751 // Try to combine two adjacent loads/stores to a single pair instruction from 13752 // the XTHeadMemPair vendor extension. 13753 static SDValue performMemPairCombine(SDNode *N, 13754 TargetLowering::DAGCombinerInfo &DCI) { 13755 SelectionDAG &DAG = DCI.DAG; 13756 MachineFunction &MF = DAG.getMachineFunction(); 13757 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); 13758 13759 // Target does not support load/store pair. 13760 if (!Subtarget.hasVendorXTHeadMemPair()) 13761 return SDValue(); 13762 13763 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N); 13764 EVT MemVT = LSNode1->getMemoryVT(); 13765 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2; 13766 13767 // No volatile, indexed or atomic loads/stores. 13768 if (!LSNode1->isSimple() || LSNode1->isIndexed()) 13769 return SDValue(); 13770 13771 // Function to get a base + constant representation from a memory value. 13772 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> { 13773 if (Ptr->getOpcode() == ISD::ADD) 13774 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) 13775 return {Ptr->getOperand(0), C1->getZExtValue()}; 13776 return {Ptr, 0}; 13777 }; 13778 13779 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum)); 13780 13781 SDValue Chain = N->getOperand(0); 13782 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end(); 13783 UI != UE; ++UI) { 13784 SDUse &Use = UI.getUse(); 13785 if (Use.getUser() != N && Use.getResNo() == 0 && 13786 Use.getUser()->getOpcode() == N->getOpcode()) { 13787 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser()); 13788 13789 // No volatile, indexed or atomic loads/stores. 13790 if (!LSNode2->isSimple() || LSNode2->isIndexed()) 13791 continue; 13792 13793 // Check if LSNode1 and LSNode2 have the same type and extension. 13794 if (LSNode1->getOpcode() == ISD::LOAD) 13795 if (cast<LoadSDNode>(LSNode2)->getExtensionType() != 13796 cast<LoadSDNode>(LSNode1)->getExtensionType()) 13797 continue; 13798 13799 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT()) 13800 continue; 13801 13802 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum)); 13803 13804 // Check if the base pointer is the same for both instruction. 13805 if (Base1 != Base2) 13806 continue; 13807 13808 // Check if the offsets match the XTHeadMemPair encoding contraints. 13809 bool Valid = false; 13810 if (MemVT == MVT::i32) { 13811 // Check for adjacent i32 values and a 2-bit index. 13812 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1)) 13813 Valid = true; 13814 } else if (MemVT == MVT::i64) { 13815 // Check for adjacent i64 values and a 2-bit index. 13816 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1)) 13817 Valid = true; 13818 } 13819 13820 if (!Valid) 13821 continue; 13822 13823 // Try to combine. 13824 if (SDValue Res = 13825 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1)) 13826 return Res; 13827 } 13828 } 13829 13830 return SDValue(); 13831 } 13832 13833 // Fold 13834 // (fp_to_int (froundeven X)) -> fcvt X, rne 13835 // (fp_to_int (ftrunc X)) -> fcvt X, rtz 13836 // (fp_to_int (ffloor X)) -> fcvt X, rdn 13837 // (fp_to_int (fceil X)) -> fcvt X, rup 13838 // (fp_to_int (fround X)) -> fcvt X, rmm 13839 // (fp_to_int (frint X)) -> fcvt X 13840 static SDValue performFP_TO_INTCombine(SDNode *N, 13841 TargetLowering::DAGCombinerInfo &DCI, 13842 const RISCVSubtarget &Subtarget) { 13843 SelectionDAG &DAG = DCI.DAG; 13844 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 13845 MVT XLenVT = Subtarget.getXLenVT(); 13846 13847 SDValue Src = N->getOperand(0); 13848 13849 // Don't do this for strict-fp Src. 13850 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) 13851 return SDValue(); 13852 13853 // Ensure the FP type is legal. 13854 if (!TLI.isTypeLegal(Src.getValueType())) 13855 return SDValue(); 13856 13857 // Don't do this for f16 with Zfhmin and not Zfh. 13858 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) 13859 return SDValue(); 13860 13861 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); 13862 // If the result is invalid, we didn't find a foldable instruction. 13863 if (FRM == RISCVFPRndMode::Invalid) 13864 return SDValue(); 13865 13866 SDLoc DL(N); 13867 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; 13868 EVT VT = N->getValueType(0); 13869 13870 if (VT.isVector() && TLI.isTypeLegal(VT)) { 13871 MVT SrcVT = Src.getSimpleValueType(); 13872 MVT SrcContainerVT = SrcVT; 13873 MVT ContainerVT = VT.getSimpleVT(); 13874 SDValue XVal = Src.getOperand(0); 13875 13876 // For widening and narrowing conversions we just combine it into a 13877 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They 13878 // end up getting lowered to their appropriate pseudo instructions based on 13879 // their operand types 13880 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 || 13881 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits()) 13882 return SDValue(); 13883 13884 // Make fixed-length vectors scalable first 13885 if (SrcVT.isFixedLengthVector()) { 13886 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 13887 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget); 13888 ContainerVT = 13889 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); 13890 } 13891 13892 auto [Mask, VL] = 13893 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget); 13894 13895 SDValue FpToInt; 13896 if (FRM == RISCVFPRndMode::RTZ) { 13897 // Use the dedicated trunc static rounding mode if we're truncating so we 13898 // don't need to generate calls to fsrmi/fsrm 13899 unsigned Opc = 13900 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; 13901 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); 13902 } else if (FRM == RISCVFPRndMode::DYN) { 13903 unsigned Opc = 13904 IsSigned ? RISCVISD::VFCVT_X_F_VL : RISCVISD::VFCVT_XU_F_VL; 13905 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); 13906 } else { 13907 unsigned Opc = 13908 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL; 13909 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, 13910 DAG.getTargetConstant(FRM, DL, XLenVT), VL); 13911 } 13912 13913 // If converted from fixed-length to scalable, convert back 13914 if (VT.isFixedLengthVector()) 13915 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget); 13916 13917 return FpToInt; 13918 } 13919 13920 // Only handle XLen or i32 types. Other types narrower than XLen will 13921 // eventually be legalized to XLenVT. 13922 if (VT != MVT::i32 && VT != XLenVT) 13923 return SDValue(); 13924 13925 unsigned Opc; 13926 if (VT == XLenVT) 13927 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 13928 else 13929 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 13930 13931 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0), 13932 DAG.getTargetConstant(FRM, DL, XLenVT)); 13933 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt); 13934 } 13935 13936 // Fold 13937 // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne)) 13938 // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz)) 13939 // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn)) 13940 // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup)) 13941 // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm)) 13942 // (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn)) 13943 static SDValue performFP_TO_INT_SATCombine(SDNode *N, 13944 TargetLowering::DAGCombinerInfo &DCI, 13945 const RISCVSubtarget &Subtarget) { 13946 SelectionDAG &DAG = DCI.DAG; 13947 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 13948 MVT XLenVT = Subtarget.getXLenVT(); 13949 13950 // Only handle XLen types. Other types narrower than XLen will eventually be 13951 // legalized to XLenVT. 13952 EVT DstVT = N->getValueType(0); 13953 if (DstVT != XLenVT) 13954 return SDValue(); 13955 13956 SDValue Src = N->getOperand(0); 13957 13958 // Don't do this for strict-fp Src. 13959 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) 13960 return SDValue(); 13961 13962 // Ensure the FP type is also legal. 13963 if (!TLI.isTypeLegal(Src.getValueType())) 13964 return SDValue(); 13965 13966 // Don't do this for f16 with Zfhmin and not Zfh. 13967 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) 13968 return SDValue(); 13969 13970 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 13971 13972 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); 13973 if (FRM == RISCVFPRndMode::Invalid) 13974 return SDValue(); 13975 13976 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT; 13977 13978 unsigned Opc; 13979 if (SatVT == DstVT) 13980 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 13981 else if (DstVT == MVT::i64 && SatVT == MVT::i32) 13982 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 13983 else 13984 return SDValue(); 13985 // FIXME: Support other SatVTs by clamping before or after the conversion. 13986 13987 Src = Src.getOperand(0); 13988 13989 SDLoc DL(N); 13990 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src, 13991 DAG.getTargetConstant(FRM, DL, XLenVT)); 13992 13993 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero 13994 // extend. 13995 if (Opc == RISCVISD::FCVT_WU_RV64) 13996 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); 13997 13998 // RISC-V FP-to-int conversions saturate to the destination register size, but 13999 // don't produce 0 for nan. 14000 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); 14001 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); 14002 } 14003 14004 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is 14005 // smaller than XLenVT. 14006 static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, 14007 const RISCVSubtarget &Subtarget) { 14008 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension"); 14009 14010 SDValue Src = N->getOperand(0); 14011 if (Src.getOpcode() != ISD::BSWAP) 14012 return SDValue(); 14013 14014 EVT VT = N->getValueType(0); 14015 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() || 14016 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits())) 14017 return SDValue(); 14018 14019 SDLoc DL(N); 14020 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0)); 14021 } 14022 14023 // Convert from one FMA opcode to another based on whether we are negating the 14024 // multiply result and/or the accumulator. 14025 // NOTE: Only supports RVV operations with VL. 14026 static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { 14027 // Negating the multiply result changes ADD<->SUB and toggles 'N'. 14028 if (NegMul) { 14029 // clang-format off 14030 switch (Opcode) { 14031 default: llvm_unreachable("Unexpected opcode"); 14032 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; 14033 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; 14034 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; 14035 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; 14036 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; 14037 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; 14038 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; 14039 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; 14040 } 14041 // clang-format on 14042 } 14043 14044 // Negating the accumulator changes ADD<->SUB. 14045 if (NegAcc) { 14046 // clang-format off 14047 switch (Opcode) { 14048 default: llvm_unreachable("Unexpected opcode"); 14049 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; 14050 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; 14051 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; 14052 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; 14053 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; 14054 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; 14055 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; 14056 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; 14057 } 14058 // clang-format on 14059 } 14060 14061 return Opcode; 14062 } 14063 14064 static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) { 14065 // Fold FNEG_VL into FMA opcodes. 14066 // The first operand of strict-fp is chain. 14067 unsigned Offset = N->isTargetStrictFPOpcode(); 14068 SDValue A = N->getOperand(0 + Offset); 14069 SDValue B = N->getOperand(1 + Offset); 14070 SDValue C = N->getOperand(2 + Offset); 14071 SDValue Mask = N->getOperand(3 + Offset); 14072 SDValue VL = N->getOperand(4 + Offset); 14073 14074 auto invertIfNegative = [&Mask, &VL](SDValue &V) { 14075 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask && 14076 V.getOperand(2) == VL) { 14077 // Return the negated input. 14078 V = V.getOperand(0); 14079 return true; 14080 } 14081 14082 return false; 14083 }; 14084 14085 bool NegA = invertIfNegative(A); 14086 bool NegB = invertIfNegative(B); 14087 bool NegC = invertIfNegative(C); 14088 14089 // If no operands are negated, we're done. 14090 if (!NegA && !NegB && !NegC) 14091 return SDValue(); 14092 14093 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC); 14094 if (N->isTargetStrictFPOpcode()) 14095 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(), 14096 {N->getOperand(0), A, B, C, Mask, VL}); 14097 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask, 14098 VL); 14099 } 14100 14101 static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, 14102 const RISCVSubtarget &Subtarget) { 14103 if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG)) 14104 return V; 14105 14106 if (N->getValueType(0).isScalableVector() && 14107 N->getValueType(0).getVectorElementType() == MVT::f32 && 14108 (Subtarget.hasVInstructionsF16Minimal() && 14109 !Subtarget.hasVInstructionsF16())) { 14110 return SDValue(); 14111 } 14112 14113 // FIXME: Ignore strict opcodes for now. 14114 if (N->isTargetStrictFPOpcode()) 14115 return SDValue(); 14116 14117 // Try to form widening FMA. 14118 SDValue Op0 = N->getOperand(0); 14119 SDValue Op1 = N->getOperand(1); 14120 SDValue Mask = N->getOperand(3); 14121 SDValue VL = N->getOperand(4); 14122 14123 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL || 14124 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL) 14125 return SDValue(); 14126 14127 // TODO: Refactor to handle more complex cases similar to 14128 // combineBinOp_VLToVWBinOp_VL. 14129 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) && 14130 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0))) 14131 return SDValue(); 14132 14133 // Check the mask and VL are the same. 14134 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL || 14135 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL) 14136 return SDValue(); 14137 14138 unsigned NewOpc; 14139 switch (N->getOpcode()) { 14140 default: 14141 llvm_unreachable("Unexpected opcode"); 14142 case RISCVISD::VFMADD_VL: 14143 NewOpc = RISCVISD::VFWMADD_VL; 14144 break; 14145 case RISCVISD::VFNMSUB_VL: 14146 NewOpc = RISCVISD::VFWNMSUB_VL; 14147 break; 14148 case RISCVISD::VFNMADD_VL: 14149 NewOpc = RISCVISD::VFWNMADD_VL; 14150 break; 14151 case RISCVISD::VFMSUB_VL: 14152 NewOpc = RISCVISD::VFWMSUB_VL; 14153 break; 14154 } 14155 14156 Op0 = Op0.getOperand(0); 14157 Op1 = Op1.getOperand(0); 14158 14159 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1, 14160 N->getOperand(2), Mask, VL); 14161 } 14162 14163 static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG, 14164 const RISCVSubtarget &Subtarget) { 14165 if (N->getValueType(0).isScalableVector() && 14166 N->getValueType(0).getVectorElementType() == MVT::f32 && 14167 (Subtarget.hasVInstructionsF16Minimal() && 14168 !Subtarget.hasVInstructionsF16())) { 14169 return SDValue(); 14170 } 14171 14172 // FIXME: Ignore strict opcodes for now. 14173 assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode"); 14174 14175 // Try to form widening multiply. 14176 SDValue Op0 = N->getOperand(0); 14177 SDValue Op1 = N->getOperand(1); 14178 SDValue Merge = N->getOperand(2); 14179 SDValue Mask = N->getOperand(3); 14180 SDValue VL = N->getOperand(4); 14181 14182 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL || 14183 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL) 14184 return SDValue(); 14185 14186 // TODO: Refactor to handle more complex cases similar to 14187 // combineBinOp_VLToVWBinOp_VL. 14188 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) && 14189 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0))) 14190 return SDValue(); 14191 14192 // Check the mask and VL are the same. 14193 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL || 14194 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL) 14195 return SDValue(); 14196 14197 Op0 = Op0.getOperand(0); 14198 Op1 = Op1.getOperand(0); 14199 14200 return DAG.getNode(RISCVISD::VFWMUL_VL, SDLoc(N), N->getValueType(0), Op0, 14201 Op1, Merge, Mask, VL); 14202 } 14203 14204 static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG, 14205 const RISCVSubtarget &Subtarget) { 14206 if (N->getValueType(0).isScalableVector() && 14207 N->getValueType(0).getVectorElementType() == MVT::f32 && 14208 (Subtarget.hasVInstructionsF16Minimal() && 14209 !Subtarget.hasVInstructionsF16())) { 14210 return SDValue(); 14211 } 14212 14213 SDValue Op0 = N->getOperand(0); 14214 SDValue Op1 = N->getOperand(1); 14215 SDValue Merge = N->getOperand(2); 14216 SDValue Mask = N->getOperand(3); 14217 SDValue VL = N->getOperand(4); 14218 14219 bool IsAdd = N->getOpcode() == RISCVISD::FADD_VL; 14220 14221 // Look for foldable FP_EXTENDS. 14222 bool Op0IsExtend = 14223 Op0.getOpcode() == RISCVISD::FP_EXTEND_VL && 14224 (Op0.hasOneUse() || (Op0 == Op1 && Op0->hasNUsesOfValue(2, 0))); 14225 bool Op1IsExtend = 14226 (Op0 == Op1 && Op0IsExtend) || 14227 (Op1.getOpcode() == RISCVISD::FP_EXTEND_VL && Op1.hasOneUse()); 14228 14229 // Check the mask and VL. 14230 if (Op0IsExtend && (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL)) 14231 Op0IsExtend = false; 14232 if (Op1IsExtend && (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)) 14233 Op1IsExtend = false; 14234 14235 // Canonicalize. 14236 if (!Op1IsExtend) { 14237 // Sub requires at least operand 1 to be an extend. 14238 if (!IsAdd) 14239 return SDValue(); 14240 14241 // Add is commutable, if the other operand is foldable, swap them. 14242 if (!Op0IsExtend) 14243 return SDValue(); 14244 14245 std::swap(Op0, Op1); 14246 std::swap(Op0IsExtend, Op1IsExtend); 14247 } 14248 14249 // Op1 is a foldable extend. Op0 might be foldable. 14250 Op1 = Op1.getOperand(0); 14251 if (Op0IsExtend) 14252 Op0 = Op0.getOperand(0); 14253 14254 unsigned Opc; 14255 if (IsAdd) 14256 Opc = Op0IsExtend ? RISCVISD::VFWADD_VL : RISCVISD::VFWADD_W_VL; 14257 else 14258 Opc = Op0IsExtend ? RISCVISD::VFWSUB_VL : RISCVISD::VFWSUB_W_VL; 14259 14260 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op0, Op1, Merge, Mask, 14261 VL); 14262 } 14263 14264 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 14265 const RISCVSubtarget &Subtarget) { 14266 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode"); 14267 14268 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit()) 14269 return SDValue(); 14270 14271 if (!isa<ConstantSDNode>(N->getOperand(1))) 14272 return SDValue(); 14273 uint64_t ShAmt = N->getConstantOperandVal(1); 14274 if (ShAmt > 32) 14275 return SDValue(); 14276 14277 SDValue N0 = N->getOperand(0); 14278 14279 // Combine (sra (sext_inreg (shl X, C1), i32), C2) -> 14280 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of 14281 // SLLIW+SRAIW. SLLI+SRAI have compressed forms. 14282 if (ShAmt < 32 && 14283 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() && 14284 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 && 14285 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() && 14286 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { 14287 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1); 14288 if (LShAmt < 32) { 14289 SDLoc ShlDL(N0.getOperand(0)); 14290 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64, 14291 N0.getOperand(0).getOperand(0), 14292 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64)); 14293 SDLoc DL(N); 14294 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl, 14295 DAG.getConstant(ShAmt + 32, DL, MVT::i64)); 14296 } 14297 } 14298 14299 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C) 14300 // FIXME: Should this be a generic combine? There's a similar combine on X86. 14301 // 14302 // Also try these folds where an add or sub is in the middle. 14303 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C) 14304 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C) 14305 SDValue Shl; 14306 ConstantSDNode *AddC = nullptr; 14307 14308 // We might have an ADD or SUB between the SRA and SHL. 14309 bool IsAdd = N0.getOpcode() == ISD::ADD; 14310 if ((IsAdd || N0.getOpcode() == ISD::SUB)) { 14311 // Other operand needs to be a constant we can modify. 14312 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0)); 14313 if (!AddC) 14314 return SDValue(); 14315 14316 // AddC needs to have at least 32 trailing zeros. 14317 if (AddC->getAPIntValue().countr_zero() < 32) 14318 return SDValue(); 14319 14320 // All users should be a shift by constant less than or equal to 32. This 14321 // ensures we'll do this optimization for each of them to produce an 14322 // add/sub+sext_inreg they can all share. 14323 for (SDNode *U : N0->uses()) { 14324 if (U->getOpcode() != ISD::SRA || 14325 !isa<ConstantSDNode>(U->getOperand(1)) || 14326 U->getConstantOperandVal(1) > 32) 14327 return SDValue(); 14328 } 14329 14330 Shl = N0.getOperand(IsAdd ? 0 : 1); 14331 } else { 14332 // Not an ADD or SUB. 14333 Shl = N0; 14334 } 14335 14336 // Look for a shift left by 32. 14337 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) || 14338 Shl.getConstantOperandVal(1) != 32) 14339 return SDValue(); 14340 14341 // We if we didn't look through an add/sub, then the shl should have one use. 14342 // If we did look through an add/sub, the sext_inreg we create is free so 14343 // we're only creating 2 new instructions. It's enough to only remove the 14344 // original sra+add/sub. 14345 if (!AddC && !Shl.hasOneUse()) 14346 return SDValue(); 14347 14348 SDLoc DL(N); 14349 SDValue In = Shl.getOperand(0); 14350 14351 // If we looked through an ADD or SUB, we need to rebuild it with the shifted 14352 // constant. 14353 if (AddC) { 14354 SDValue ShiftedAddC = 14355 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64); 14356 if (IsAdd) 14357 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC); 14358 else 14359 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In); 14360 } 14361 14362 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In, 14363 DAG.getValueType(MVT::i32)); 14364 if (ShAmt == 32) 14365 return SExt; 14366 14367 return DAG.getNode( 14368 ISD::SHL, DL, MVT::i64, SExt, 14369 DAG.getConstant(32 - ShAmt, DL, MVT::i64)); 14370 } 14371 14372 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if 14373 // the result is used as the conditon of a br_cc or select_cc we can invert, 14374 // inverting the setcc is free, and Z is 0/1. Caller will invert the 14375 // br_cc/select_cc. 14376 static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) { 14377 bool IsAnd = Cond.getOpcode() == ISD::AND; 14378 if (!IsAnd && Cond.getOpcode() != ISD::OR) 14379 return SDValue(); 14380 14381 if (!Cond.hasOneUse()) 14382 return SDValue(); 14383 14384 SDValue Setcc = Cond.getOperand(0); 14385 SDValue Xor = Cond.getOperand(1); 14386 // Canonicalize setcc to LHS. 14387 if (Setcc.getOpcode() != ISD::SETCC) 14388 std::swap(Setcc, Xor); 14389 // LHS should be a setcc and RHS should be an xor. 14390 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() || 14391 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) 14392 return SDValue(); 14393 14394 // If the condition is an And, SimplifyDemandedBits may have changed 14395 // (xor Z, 1) to (not Z). 14396 SDValue Xor1 = Xor.getOperand(1); 14397 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1))) 14398 return SDValue(); 14399 14400 EVT VT = Cond.getValueType(); 14401 SDValue Xor0 = Xor.getOperand(0); 14402 14403 // The LHS of the xor needs to be 0/1. 14404 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); 14405 if (!DAG.MaskedValueIsZero(Xor0, Mask)) 14406 return SDValue(); 14407 14408 // We can only invert integer setccs. 14409 EVT SetCCOpVT = Setcc.getOperand(0).getValueType(); 14410 if (!SetCCOpVT.isScalarInteger()) 14411 return SDValue(); 14412 14413 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get(); 14414 if (ISD::isIntEqualitySetCC(CCVal)) { 14415 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT); 14416 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0), 14417 Setcc.getOperand(1), CCVal); 14418 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) { 14419 // Invert (setlt 0, X) by converting to (setlt X, 1). 14420 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1), 14421 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal); 14422 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) { 14423 // (setlt X, 1) by converting to (setlt 0, X). 14424 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, 14425 DAG.getConstant(0, SDLoc(Setcc), VT), 14426 Setcc.getOperand(0), CCVal); 14427 } else 14428 return SDValue(); 14429 14430 unsigned Opc = IsAnd ? ISD::OR : ISD::AND; 14431 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0)); 14432 } 14433 14434 // Perform common combines for BR_CC and SELECT_CC condtions. 14435 static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, 14436 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { 14437 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 14438 14439 // As far as arithmetic right shift always saves the sign, 14440 // shift can be omitted. 14441 // Fold setlt (sra X, N), 0 -> setlt X, 0 and 14442 // setge (sra X, N), 0 -> setge X, 0 14443 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) && 14444 LHS.getOpcode() == ISD::SRA) { 14445 LHS = LHS.getOperand(0); 14446 return true; 14447 } 14448 14449 if (!ISD::isIntEqualitySetCC(CCVal)) 14450 return false; 14451 14452 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt) 14453 // Sometimes the setcc is introduced after br_cc/select_cc has been formed. 14454 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 14455 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 14456 // If we're looking for eq 0 instead of ne 0, we need to invert the 14457 // condition. 14458 bool Invert = CCVal == ISD::SETEQ; 14459 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 14460 if (Invert) 14461 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 14462 14463 RHS = LHS.getOperand(1); 14464 LHS = LHS.getOperand(0); 14465 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 14466 14467 CC = DAG.getCondCode(CCVal); 14468 return true; 14469 } 14470 14471 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne) 14472 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) { 14473 RHS = LHS.getOperand(1); 14474 LHS = LHS.getOperand(0); 14475 return true; 14476 } 14477 14478 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt) 14479 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() && 14480 LHS.getOperand(1).getOpcode() == ISD::Constant) { 14481 SDValue LHS0 = LHS.getOperand(0); 14482 if (LHS0.getOpcode() == ISD::AND && 14483 LHS0.getOperand(1).getOpcode() == ISD::Constant) { 14484 uint64_t Mask = LHS0.getConstantOperandVal(1); 14485 uint64_t ShAmt = LHS.getConstantOperandVal(1); 14486 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) { 14487 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; 14488 CC = DAG.getCondCode(CCVal); 14489 14490 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt; 14491 LHS = LHS0.getOperand(0); 14492 if (ShAmt != 0) 14493 LHS = 14494 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0), 14495 DAG.getConstant(ShAmt, DL, LHS.getValueType())); 14496 return true; 14497 } 14498 } 14499 } 14500 14501 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1. 14502 // This can occur when legalizing some floating point comparisons. 14503 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 14504 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 14505 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 14506 CC = DAG.getCondCode(CCVal); 14507 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 14508 return true; 14509 } 14510 14511 if (isNullConstant(RHS)) { 14512 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) { 14513 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 14514 CC = DAG.getCondCode(CCVal); 14515 LHS = NewCond; 14516 return true; 14517 } 14518 } 14519 14520 return false; 14521 } 14522 14523 // Fold 14524 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)). 14525 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)). 14526 // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)). 14527 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)). 14528 static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, 14529 SDValue TrueVal, SDValue FalseVal, 14530 bool Swapped) { 14531 bool Commutative = true; 14532 unsigned Opc = TrueVal.getOpcode(); 14533 switch (Opc) { 14534 default: 14535 return SDValue(); 14536 case ISD::SHL: 14537 case ISD::SRA: 14538 case ISD::SRL: 14539 case ISD::SUB: 14540 Commutative = false; 14541 break; 14542 case ISD::ADD: 14543 case ISD::OR: 14544 case ISD::XOR: 14545 break; 14546 } 14547 14548 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal)) 14549 return SDValue(); 14550 14551 unsigned OpToFold; 14552 if (FalseVal == TrueVal.getOperand(0)) 14553 OpToFold = 0; 14554 else if (Commutative && FalseVal == TrueVal.getOperand(1)) 14555 OpToFold = 1; 14556 else 14557 return SDValue(); 14558 14559 EVT VT = N->getValueType(0); 14560 SDLoc DL(N); 14561 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold); 14562 EVT OtherOpVT = OtherOp->getValueType(0); 14563 SDValue IdentityOperand = 14564 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags()); 14565 if (!Commutative) 14566 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT); 14567 assert(IdentityOperand && "No identity operand!"); 14568 14569 if (Swapped) 14570 std::swap(OtherOp, IdentityOperand); 14571 SDValue NewSel = 14572 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand); 14573 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel); 14574 } 14575 14576 // This tries to get rid of `select` and `icmp` that are being used to handle 14577 // `Targets` that do not support `cttz(0)`/`ctlz(0)`. 14578 static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) { 14579 SDValue Cond = N->getOperand(0); 14580 14581 // This represents either CTTZ or CTLZ instruction. 14582 SDValue CountZeroes; 14583 14584 SDValue ValOnZero; 14585 14586 if (Cond.getOpcode() != ISD::SETCC) 14587 return SDValue(); 14588 14589 if (!isNullConstant(Cond->getOperand(1))) 14590 return SDValue(); 14591 14592 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get(); 14593 if (CCVal == ISD::CondCode::SETEQ) { 14594 CountZeroes = N->getOperand(2); 14595 ValOnZero = N->getOperand(1); 14596 } else if (CCVal == ISD::CondCode::SETNE) { 14597 CountZeroes = N->getOperand(1); 14598 ValOnZero = N->getOperand(2); 14599 } else { 14600 return SDValue(); 14601 } 14602 14603 if (CountZeroes.getOpcode() == ISD::TRUNCATE || 14604 CountZeroes.getOpcode() == ISD::ZERO_EXTEND) 14605 CountZeroes = CountZeroes.getOperand(0); 14606 14607 if (CountZeroes.getOpcode() != ISD::CTTZ && 14608 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF && 14609 CountZeroes.getOpcode() != ISD::CTLZ && 14610 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF) 14611 return SDValue(); 14612 14613 if (!isNullConstant(ValOnZero)) 14614 return SDValue(); 14615 14616 SDValue CountZeroesArgument = CountZeroes->getOperand(0); 14617 if (Cond->getOperand(0) != CountZeroesArgument) 14618 return SDValue(); 14619 14620 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { 14621 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes), 14622 CountZeroes.getValueType(), CountZeroesArgument); 14623 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) { 14624 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes), 14625 CountZeroes.getValueType(), CountZeroesArgument); 14626 } 14627 14628 unsigned BitWidth = CountZeroes.getValueSizeInBits(); 14629 SDValue BitWidthMinusOne = 14630 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType()); 14631 14632 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(), 14633 CountZeroes, BitWidthMinusOne); 14634 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0)); 14635 } 14636 14637 static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, 14638 const RISCVSubtarget &Subtarget) { 14639 SDValue Cond = N->getOperand(0); 14640 SDValue True = N->getOperand(1); 14641 SDValue False = N->getOperand(2); 14642 SDLoc DL(N); 14643 EVT VT = N->getValueType(0); 14644 EVT CondVT = Cond.getValueType(); 14645 14646 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 14647 return SDValue(); 14648 14649 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate 14650 // BEXTI, where C is power of 2. 14651 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() && 14652 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) { 14653 SDValue LHS = Cond.getOperand(0); 14654 SDValue RHS = Cond.getOperand(1); 14655 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 14656 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND && 14657 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) { 14658 const APInt &MaskVal = LHS.getConstantOperandAPInt(1); 14659 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12)) 14660 return DAG.getSelect(DL, VT, 14661 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE), 14662 False, True); 14663 } 14664 } 14665 return SDValue(); 14666 } 14667 14668 static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, 14669 const RISCVSubtarget &Subtarget) { 14670 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG)) 14671 return Folded; 14672 14673 if (SDValue V = useInversedSetcc(N, DAG, Subtarget)) 14674 return V; 14675 14676 if (Subtarget.hasConditionalMoveFusion()) 14677 return SDValue(); 14678 14679 SDValue TrueVal = N->getOperand(1); 14680 SDValue FalseVal = N->getOperand(2); 14681 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false)) 14682 return V; 14683 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true); 14684 } 14685 14686 /// If we have a build_vector where each lane is binop X, C, where C 14687 /// is a constant (but not necessarily the same constant on all lanes), 14688 /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..). 14689 /// We assume that materializing a constant build vector will be no more 14690 /// expensive that performing O(n) binops. 14691 static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, 14692 const RISCVSubtarget &Subtarget, 14693 const RISCVTargetLowering &TLI) { 14694 SDLoc DL(N); 14695 EVT VT = N->getValueType(0); 14696 14697 assert(!VT.isScalableVector() && "unexpected build vector"); 14698 14699 if (VT.getVectorNumElements() == 1) 14700 return SDValue(); 14701 14702 const unsigned Opcode = N->op_begin()->getNode()->getOpcode(); 14703 if (!TLI.isBinOp(Opcode)) 14704 return SDValue(); 14705 14706 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT)) 14707 return SDValue(); 14708 14709 SmallVector<SDValue> LHSOps; 14710 SmallVector<SDValue> RHSOps; 14711 for (SDValue Op : N->ops()) { 14712 if (Op.isUndef()) { 14713 // We can't form a divide or remainder from undef. 14714 if (!DAG.isSafeToSpeculativelyExecute(Opcode)) 14715 return SDValue(); 14716 14717 LHSOps.push_back(Op); 14718 RHSOps.push_back(Op); 14719 continue; 14720 } 14721 14722 // TODO: We can handle operations which have an neutral rhs value 14723 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track 14724 // of profit in a more explicit manner. 14725 if (Op.getOpcode() != Opcode || !Op.hasOneUse()) 14726 return SDValue(); 14727 14728 LHSOps.push_back(Op.getOperand(0)); 14729 if (!isa<ConstantSDNode>(Op.getOperand(1)) && 14730 !isa<ConstantFPSDNode>(Op.getOperand(1))) 14731 return SDValue(); 14732 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may 14733 // have different LHS and RHS types. 14734 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType()) 14735 return SDValue(); 14736 RHSOps.push_back(Op.getOperand(1)); 14737 } 14738 14739 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps), 14740 DAG.getBuildVector(VT, DL, RHSOps)); 14741 } 14742 14743 static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, 14744 const RISCVSubtarget &Subtarget, 14745 const RISCVTargetLowering &TLI) { 14746 SDValue InVec = N->getOperand(0); 14747 SDValue InVal = N->getOperand(1); 14748 SDValue EltNo = N->getOperand(2); 14749 SDLoc DL(N); 14750 14751 EVT VT = InVec.getValueType(); 14752 if (VT.isScalableVector()) 14753 return SDValue(); 14754 14755 if (!InVec.hasOneUse()) 14756 return SDValue(); 14757 14758 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt 14759 // move the insert_vector_elts into the arms of the binop. Note that 14760 // the new RHS must be a constant. 14761 const unsigned InVecOpcode = InVec->getOpcode(); 14762 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) && 14763 InVal.hasOneUse()) { 14764 SDValue InVecLHS = InVec->getOperand(0); 14765 SDValue InVecRHS = InVec->getOperand(1); 14766 SDValue InValLHS = InVal->getOperand(0); 14767 SDValue InValRHS = InVal->getOperand(1); 14768 14769 if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode())) 14770 return SDValue(); 14771 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS)) 14772 return SDValue(); 14773 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may 14774 // have different LHS and RHS types. 14775 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType()) 14776 return SDValue(); 14777 SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, 14778 InVecLHS, InValLHS, EltNo); 14779 SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, 14780 InVecRHS, InValRHS, EltNo); 14781 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS); 14782 } 14783 14784 // Given insert_vector_elt (concat_vectors ...), InVal, Elt 14785 // move the insert_vector_elt to the source operand of the concat_vector. 14786 if (InVec.getOpcode() != ISD::CONCAT_VECTORS) 14787 return SDValue(); 14788 14789 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); 14790 if (!IndexC) 14791 return SDValue(); 14792 unsigned Elt = IndexC->getZExtValue(); 14793 14794 EVT ConcatVT = InVec.getOperand(0).getValueType(); 14795 if (ConcatVT.getVectorElementType() != InVal.getValueType()) 14796 return SDValue(); 14797 unsigned ConcatNumElts = ConcatVT.getVectorNumElements(); 14798 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL, 14799 EltNo.getValueType()); 14800 14801 unsigned ConcatOpIdx = Elt / ConcatNumElts; 14802 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx); 14803 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT, 14804 ConcatOp, InVal, NewIdx); 14805 14806 SmallVector<SDValue> ConcatOps; 14807 ConcatOps.append(InVec->op_begin(), InVec->op_end()); 14808 ConcatOps[ConcatOpIdx] = ConcatOp; 14809 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); 14810 } 14811 14812 // If we're concatenating a series of vector loads like 14813 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ... 14814 // Then we can turn this into a strided load by widening the vector elements 14815 // vlse32 p, stride=n 14816 static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, 14817 const RISCVSubtarget &Subtarget, 14818 const RISCVTargetLowering &TLI) { 14819 SDLoc DL(N); 14820 EVT VT = N->getValueType(0); 14821 14822 // Only perform this combine on legal MVTs. 14823 if (!TLI.isTypeLegal(VT)) 14824 return SDValue(); 14825 14826 // TODO: Potentially extend this to scalable vectors 14827 if (VT.isScalableVector()) 14828 return SDValue(); 14829 14830 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0)); 14831 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) || 14832 !SDValue(BaseLd, 0).hasOneUse()) 14833 return SDValue(); 14834 14835 EVT BaseLdVT = BaseLd->getValueType(0); 14836 14837 // Go through the loads and check that they're strided 14838 SmallVector<LoadSDNode *> Lds; 14839 Lds.push_back(BaseLd); 14840 Align Align = BaseLd->getAlign(); 14841 for (SDValue Op : N->ops().drop_front()) { 14842 auto *Ld = dyn_cast<LoadSDNode>(Op); 14843 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() || 14844 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) || 14845 Ld->getValueType(0) != BaseLdVT) 14846 return SDValue(); 14847 14848 Lds.push_back(Ld); 14849 14850 // The common alignment is the most restrictive (smallest) of all the loads 14851 Align = std::min(Align, Ld->getAlign()); 14852 } 14853 14854 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>; 14855 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1, 14856 LoadSDNode *Ld2) -> std::optional<PtrDiff> { 14857 // If the load ptrs can be decomposed into a common (Base + Index) with a 14858 // common constant stride, then return the constant stride. 14859 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG); 14860 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG); 14861 if (BIO1.equalBaseIndex(BIO2, DAG)) 14862 return {{BIO2.getOffset() - BIO1.getOffset(), false}}; 14863 14864 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride) 14865 SDValue P1 = Ld1->getBasePtr(); 14866 SDValue P2 = Ld2->getBasePtr(); 14867 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1) 14868 return {{P2.getOperand(1), false}}; 14869 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2) 14870 return {{P1.getOperand(1), true}}; 14871 14872 return std::nullopt; 14873 }; 14874 14875 // Get the distance between the first and second loads 14876 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]); 14877 if (!BaseDiff) 14878 return SDValue(); 14879 14880 // Check all the loads are the same distance apart 14881 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++) 14882 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff) 14883 return SDValue(); 14884 14885 // TODO: At this point, we've successfully matched a generalized gather 14886 // load. Maybe we should emit that, and then move the specialized 14887 // matchers above and below into a DAG combine? 14888 14889 // Get the widened scalar type, e.g. v4i8 -> i64 14890 unsigned WideScalarBitWidth = 14891 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements(); 14892 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth); 14893 14894 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64 14895 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands()); 14896 if (!TLI.isTypeLegal(WideVecVT)) 14897 return SDValue(); 14898 14899 // Check that the operation is legal 14900 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align)) 14901 return SDValue(); 14902 14903 auto [StrideVariant, MustNegateStride] = *BaseDiff; 14904 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant) 14905 ? std::get<SDValue>(StrideVariant) 14906 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL, 14907 Lds[0]->getOffset().getValueType()); 14908 if (MustNegateStride) 14909 Stride = DAG.getNegative(Stride, DL, Stride.getValueType()); 14910 14911 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other}); 14912 SDValue IntID = 14913 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL, 14914 Subtarget.getXLenVT()); 14915 14916 SDValue AllOneMask = 14917 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL, 14918 DAG.getConstant(1, DL, MVT::i1)); 14919 14920 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT), 14921 BaseLd->getBasePtr(), Stride, AllOneMask}; 14922 14923 uint64_t MemSize; 14924 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride); 14925 ConstStride && ConstStride->getSExtValue() >= 0) 14926 // total size = (elsize * n) + (stride - elsize) * (n-1) 14927 // = elsize + stride * (n-1) 14928 MemSize = WideScalarVT.getSizeInBits() + 14929 ConstStride->getSExtValue() * (N->getNumOperands() - 1); 14930 else 14931 // If Stride isn't constant, then we can't know how much it will load 14932 MemSize = MemoryLocation::UnknownSize; 14933 14934 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( 14935 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize, 14936 Align); 14937 14938 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, 14939 Ops, WideVecVT, MMO); 14940 for (SDValue Ld : N->ops()) 14941 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad); 14942 14943 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad); 14944 } 14945 14946 static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, 14947 const RISCVSubtarget &Subtarget) { 14948 14949 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD); 14950 14951 if (N->getValueType(0).isFixedLengthVector()) 14952 return SDValue(); 14953 14954 SDValue Addend = N->getOperand(0); 14955 SDValue MulOp = N->getOperand(1); 14956 14957 if (N->getOpcode() == RISCVISD::ADD_VL) { 14958 SDValue AddMergeOp = N->getOperand(2); 14959 if (!AddMergeOp.isUndef()) 14960 return SDValue(); 14961 } 14962 14963 auto IsVWMulOpc = [](unsigned Opc) { 14964 switch (Opc) { 14965 case RISCVISD::VWMUL_VL: 14966 case RISCVISD::VWMULU_VL: 14967 case RISCVISD::VWMULSU_VL: 14968 return true; 14969 default: 14970 return false; 14971 } 14972 }; 14973 14974 if (!IsVWMulOpc(MulOp.getOpcode())) 14975 std::swap(Addend, MulOp); 14976 14977 if (!IsVWMulOpc(MulOp.getOpcode())) 14978 return SDValue(); 14979 14980 SDValue MulMergeOp = MulOp.getOperand(2); 14981 14982 if (!MulMergeOp.isUndef()) 14983 return SDValue(); 14984 14985 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG, 14986 const RISCVSubtarget &Subtarget) { 14987 if (N->getOpcode() == ISD::ADD) { 14988 SDLoc DL(N); 14989 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG, 14990 Subtarget); 14991 } 14992 return std::make_pair(N->getOperand(3), N->getOperand(4)); 14993 }(N, DAG, Subtarget); 14994 14995 SDValue MulMask = MulOp.getOperand(3); 14996 SDValue MulVL = MulOp.getOperand(4); 14997 14998 if (AddMask != MulMask || AddVL != MulVL) 14999 return SDValue(); 15000 15001 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL; 15002 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL, 15003 "Unexpected opcode after VWMACC_VL"); 15004 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL, 15005 "Unexpected opcode after VWMACC_VL!"); 15006 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL, 15007 "Unexpected opcode after VWMUL_VL!"); 15008 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL, 15009 "Unexpected opcode after VWMUL_VL!"); 15010 15011 SDLoc DL(N); 15012 EVT VT = N->getValueType(0); 15013 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask, 15014 AddVL}; 15015 return DAG.getNode(Opc, DL, VT, Ops); 15016 } 15017 15018 static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, 15019 ISD::MemIndexType &IndexType, 15020 RISCVTargetLowering::DAGCombinerInfo &DCI) { 15021 if (!DCI.isBeforeLegalize()) 15022 return false; 15023 15024 SelectionDAG &DAG = DCI.DAG; 15025 const MVT XLenVT = 15026 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT(); 15027 15028 const EVT IndexVT = Index.getValueType(); 15029 15030 // RISC-V indexed loads only support the "unsigned unscaled" addressing 15031 // mode, so anything else must be manually legalized. 15032 if (!isIndexTypeSigned(IndexType)) 15033 return false; 15034 15035 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { 15036 // Any index legalization should first promote to XLenVT, so we don't lose 15037 // bits when scaling. This may create an illegal index type so we let 15038 // LLVM's legalization take care of the splitting. 15039 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet. 15040 Index = DAG.getNode(ISD::SIGN_EXTEND, DL, 15041 IndexVT.changeVectorElementType(XLenVT), Index); 15042 } 15043 IndexType = ISD::UNSIGNED_SCALED; 15044 return true; 15045 } 15046 15047 /// Match the index vector of a scatter or gather node as the shuffle mask 15048 /// which performs the rearrangement if possible. Will only match if 15049 /// all lanes are touched, and thus replacing the scatter or gather with 15050 /// a unit strided access and shuffle is legal. 15051 static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, 15052 SmallVector<int> &ShuffleMask) { 15053 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) 15054 return false; 15055 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode())) 15056 return false; 15057 15058 const unsigned ElementSize = VT.getScalarStoreSize(); 15059 const unsigned NumElems = VT.getVectorNumElements(); 15060 15061 // Create the shuffle mask and check all bits active 15062 assert(ShuffleMask.empty()); 15063 BitVector ActiveLanes(NumElems); 15064 for (unsigned i = 0; i < Index->getNumOperands(); i++) { 15065 // TODO: We've found an active bit of UB, and could be 15066 // more aggressive here if desired. 15067 if (Index->getOperand(i)->isUndef()) 15068 return false; 15069 uint64_t C = Index->getConstantOperandVal(i); 15070 if (C % ElementSize != 0) 15071 return false; 15072 C = C / ElementSize; 15073 if (C >= NumElems) 15074 return false; 15075 ShuffleMask.push_back(C); 15076 ActiveLanes.set(C); 15077 } 15078 return ActiveLanes.all(); 15079 } 15080 15081 /// Match the index of a gather or scatter operation as an operation 15082 /// with twice the element width and half the number of elements. This is 15083 /// generally profitable (if legal) because these operations are linear 15084 /// in VL, so even if we cause some extract VTYPE/VL toggles, we still 15085 /// come out ahead. 15086 static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, 15087 Align BaseAlign, const RISCVSubtarget &ST) { 15088 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) 15089 return false; 15090 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode())) 15091 return false; 15092 15093 // Attempt a doubling. If we can use a element type 4x or 8x in 15094 // size, this will happen via multiply iterations of the transform. 15095 const unsigned NumElems = VT.getVectorNumElements(); 15096 if (NumElems % 2 != 0) 15097 return false; 15098 15099 const unsigned ElementSize = VT.getScalarStoreSize(); 15100 const unsigned WiderElementSize = ElementSize * 2; 15101 if (WiderElementSize > ST.getELen()/8) 15102 return false; 15103 15104 if (!ST.hasFastUnalignedAccess() && BaseAlign < WiderElementSize) 15105 return false; 15106 15107 for (unsigned i = 0; i < Index->getNumOperands(); i++) { 15108 // TODO: We've found an active bit of UB, and could be 15109 // more aggressive here if desired. 15110 if (Index->getOperand(i)->isUndef()) 15111 return false; 15112 // TODO: This offset check is too strict if we support fully 15113 // misaligned memory operations. 15114 uint64_t C = Index->getConstantOperandVal(i); 15115 if (i % 2 == 0) { 15116 if (C % WiderElementSize != 0) 15117 return false; 15118 continue; 15119 } 15120 uint64_t Last = Index->getConstantOperandVal(i-1); 15121 if (C != Last + ElementSize) 15122 return false; 15123 } 15124 return true; 15125 } 15126 15127 15128 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 15129 DAGCombinerInfo &DCI) const { 15130 SelectionDAG &DAG = DCI.DAG; 15131 const MVT XLenVT = Subtarget.getXLenVT(); 15132 SDLoc DL(N); 15133 15134 // Helper to call SimplifyDemandedBits on an operand of N where only some low 15135 // bits are demanded. N will be added to the Worklist if it was not deleted. 15136 // Caller should return SDValue(N, 0) if this returns true. 15137 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) { 15138 SDValue Op = N->getOperand(OpNo); 15139 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits); 15140 if (!SimplifyDemandedBits(Op, Mask, DCI)) 15141 return false; 15142 15143 if (N->getOpcode() != ISD::DELETED_NODE) 15144 DCI.AddToWorklist(N); 15145 return true; 15146 }; 15147 15148 switch (N->getOpcode()) { 15149 default: 15150 break; 15151 case RISCVISD::SplitF64: { 15152 SDValue Op0 = N->getOperand(0); 15153 // If the input to SplitF64 is just BuildPairF64 then the operation is 15154 // redundant. Instead, use BuildPairF64's operands directly. 15155 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 15156 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 15157 15158 if (Op0->isUndef()) { 15159 SDValue Lo = DAG.getUNDEF(MVT::i32); 15160 SDValue Hi = DAG.getUNDEF(MVT::i32); 15161 return DCI.CombineTo(N, Lo, Hi); 15162 } 15163 15164 // It's cheaper to materialise two 32-bit integers than to load a double 15165 // from the constant pool and transfer it to integer registers through the 15166 // stack. 15167 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 15168 APInt V = C->getValueAPF().bitcastToAPInt(); 15169 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 15170 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 15171 return DCI.CombineTo(N, Lo, Hi); 15172 } 15173 15174 // This is a target-specific version of a DAGCombine performed in 15175 // DAGCombiner::visitBITCAST. It performs the equivalent of: 15176 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 15177 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 15178 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 15179 !Op0.getNode()->hasOneUse()) 15180 break; 15181 SDValue NewSplitF64 = 15182 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 15183 Op0.getOperand(0)); 15184 SDValue Lo = NewSplitF64.getValue(0); 15185 SDValue Hi = NewSplitF64.getValue(1); 15186 APInt SignBit = APInt::getSignMask(32); 15187 if (Op0.getOpcode() == ISD::FNEG) { 15188 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 15189 DAG.getConstant(SignBit, DL, MVT::i32)); 15190 return DCI.CombineTo(N, Lo, NewHi); 15191 } 15192 assert(Op0.getOpcode() == ISD::FABS); 15193 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 15194 DAG.getConstant(~SignBit, DL, MVT::i32)); 15195 return DCI.CombineTo(N, Lo, NewHi); 15196 } 15197 case RISCVISD::SLLW: 15198 case RISCVISD::SRAW: 15199 case RISCVISD::SRLW: 15200 case RISCVISD::RORW: 15201 case RISCVISD::ROLW: { 15202 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 15203 if (SimplifyDemandedLowBitsHelper(0, 32) || 15204 SimplifyDemandedLowBitsHelper(1, 5)) 15205 return SDValue(N, 0); 15206 15207 break; 15208 } 15209 case RISCVISD::CLZW: 15210 case RISCVISD::CTZW: { 15211 // Only the lower 32 bits of the first operand are read 15212 if (SimplifyDemandedLowBitsHelper(0, 32)) 15213 return SDValue(N, 0); 15214 break; 15215 } 15216 case RISCVISD::FMV_W_X_RV64: { 15217 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the 15218 // conversion is unnecessary and can be replaced with the 15219 // FMV_X_ANYEXTW_RV64 operand. 15220 SDValue Op0 = N->getOperand(0); 15221 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64) 15222 return Op0.getOperand(0); 15223 break; 15224 } 15225 case RISCVISD::FMV_X_ANYEXTH: 15226 case RISCVISD::FMV_X_ANYEXTW_RV64: { 15227 SDLoc DL(N); 15228 SDValue Op0 = N->getOperand(0); 15229 MVT VT = N->getSimpleValueType(0); 15230 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 15231 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64 15232 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X. 15233 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 && 15234 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) || 15235 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH && 15236 Op0->getOpcode() == RISCVISD::FMV_H_X)) { 15237 assert(Op0.getOperand(0).getValueType() == VT && 15238 "Unexpected value type!"); 15239 return Op0.getOperand(0); 15240 } 15241 15242 // This is a target-specific version of a DAGCombine performed in 15243 // DAGCombiner::visitBITCAST. It performs the equivalent of: 15244 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 15245 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 15246 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 15247 !Op0.getNode()->hasOneUse()) 15248 break; 15249 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0)); 15250 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16; 15251 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits()); 15252 if (Op0.getOpcode() == ISD::FNEG) 15253 return DAG.getNode(ISD::XOR, DL, VT, NewFMV, 15254 DAG.getConstant(SignBit, DL, VT)); 15255 15256 assert(Op0.getOpcode() == ISD::FABS); 15257 return DAG.getNode(ISD::AND, DL, VT, NewFMV, 15258 DAG.getConstant(~SignBit, DL, VT)); 15259 } 15260 case ISD::ADD: { 15261 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 15262 return V; 15263 if (SDValue V = combineToVWMACC(N, DAG, Subtarget)) 15264 return V; 15265 return performADDCombine(N, DAG, Subtarget); 15266 } 15267 case ISD::SUB: { 15268 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 15269 return V; 15270 return performSUBCombine(N, DAG, Subtarget); 15271 } 15272 case ISD::AND: 15273 return performANDCombine(N, DCI, Subtarget); 15274 case ISD::OR: 15275 return performORCombine(N, DCI, Subtarget); 15276 case ISD::XOR: 15277 return performXORCombine(N, DAG, Subtarget); 15278 case ISD::MUL: 15279 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 15280 return V; 15281 return performMULCombine(N, DAG); 15282 case ISD::FADD: 15283 case ISD::UMAX: 15284 case ISD::UMIN: 15285 case ISD::SMAX: 15286 case ISD::SMIN: 15287 case ISD::FMAXNUM: 15288 case ISD::FMINNUM: { 15289 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 15290 return V; 15291 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 15292 return V; 15293 return SDValue(); 15294 } 15295 case ISD::SETCC: 15296 return performSETCCCombine(N, DAG, Subtarget); 15297 case ISD::SIGN_EXTEND_INREG: 15298 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); 15299 case ISD::ZERO_EXTEND: 15300 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during 15301 // type legalization. This is safe because fp_to_uint produces poison if 15302 // it overflows. 15303 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) { 15304 SDValue Src = N->getOperand(0); 15305 if (Src.getOpcode() == ISD::FP_TO_UINT && 15306 isTypeLegal(Src.getOperand(0).getValueType())) 15307 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, 15308 Src.getOperand(0)); 15309 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() && 15310 isTypeLegal(Src.getOperand(1).getValueType())) { 15311 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); 15312 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs, 15313 Src.getOperand(0), Src.getOperand(1)); 15314 DCI.CombineTo(N, Res); 15315 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1)); 15316 DCI.recursivelyDeleteUnusedNodes(Src.getNode()); 15317 return SDValue(N, 0); // Return N so it doesn't get rechecked. 15318 } 15319 } 15320 return SDValue(); 15321 case RISCVISD::TRUNCATE_VECTOR_VL: { 15322 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1)) 15323 // This would be benefit for the cases where X and Y are both the same value 15324 // type of low precision vectors. Since the truncate would be lowered into 15325 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate 15326 // restriction, such pattern would be expanded into a series of "vsetvli" 15327 // and "vnsrl" instructions later to reach this point. 15328 auto IsTruncNode = [](SDValue V) { 15329 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL) 15330 return false; 15331 SDValue VL = V.getOperand(2); 15332 auto *C = dyn_cast<ConstantSDNode>(VL); 15333 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand 15334 bool IsVLMAXForVMSET = (C && C->isAllOnes()) || 15335 (isa<RegisterSDNode>(VL) && 15336 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0); 15337 return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL && 15338 IsVLMAXForVMSET; 15339 }; 15340 15341 SDValue Op = N->getOperand(0); 15342 15343 // We need to first find the inner level of TRUNCATE_VECTOR_VL node 15344 // to distinguish such pattern. 15345 while (IsTruncNode(Op)) { 15346 if (!Op.hasOneUse()) 15347 return SDValue(); 15348 Op = Op.getOperand(0); 15349 } 15350 15351 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) { 15352 SDValue N0 = Op.getOperand(0); 15353 SDValue N1 = Op.getOperand(1); 15354 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && 15355 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) { 15356 SDValue N00 = N0.getOperand(0); 15357 SDValue N10 = N1.getOperand(0); 15358 if (N00.getValueType().isVector() && 15359 N00.getValueType() == N10.getValueType() && 15360 N->getValueType(0) == N10.getValueType()) { 15361 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1; 15362 SDValue SMin = DAG.getNode( 15363 ISD::SMIN, SDLoc(N1), N->getValueType(0), N10, 15364 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0))); 15365 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin); 15366 } 15367 } 15368 } 15369 break; 15370 } 15371 case ISD::TRUNCATE: 15372 return performTRUNCATECombine(N, DAG, Subtarget); 15373 case ISD::SELECT: 15374 return performSELECTCombine(N, DAG, Subtarget); 15375 case RISCVISD::CZERO_EQZ: 15376 case RISCVISD::CZERO_NEZ: 15377 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1. 15378 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1. 15379 if (N->getOperand(1).getOpcode() == ISD::XOR && 15380 isOneConstant(N->getOperand(1).getOperand(1))) { 15381 SDValue Cond = N->getOperand(1).getOperand(0); 15382 APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1); 15383 if (DAG.MaskedValueIsZero(Cond, Mask)) { 15384 unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ 15385 ? RISCVISD::CZERO_NEZ 15386 : RISCVISD::CZERO_EQZ; 15387 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), 15388 N->getOperand(0), Cond); 15389 } 15390 } 15391 return SDValue(); 15392 15393 case RISCVISD::SELECT_CC: { 15394 // Transform 15395 SDValue LHS = N->getOperand(0); 15396 SDValue RHS = N->getOperand(1); 15397 SDValue CC = N->getOperand(2); 15398 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 15399 SDValue TrueV = N->getOperand(3); 15400 SDValue FalseV = N->getOperand(4); 15401 SDLoc DL(N); 15402 EVT VT = N->getValueType(0); 15403 15404 // If the True and False values are the same, we don't need a select_cc. 15405 if (TrueV == FalseV) 15406 return TrueV; 15407 15408 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z 15409 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y 15410 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) && 15411 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) && 15412 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) { 15413 if (CCVal == ISD::CondCode::SETGE) 15414 std::swap(TrueV, FalseV); 15415 15416 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue(); 15417 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue(); 15418 // Only handle simm12, if it is not in this range, it can be considered as 15419 // register. 15420 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) && 15421 isInt<12>(TrueSImm - FalseSImm)) { 15422 SDValue SRA = 15423 DAG.getNode(ISD::SRA, DL, VT, LHS, 15424 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT)); 15425 SDValue AND = 15426 DAG.getNode(ISD::AND, DL, VT, SRA, 15427 DAG.getConstant(TrueSImm - FalseSImm, DL, VT)); 15428 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV); 15429 } 15430 15431 if (CCVal == ISD::CondCode::SETGE) 15432 std::swap(TrueV, FalseV); 15433 } 15434 15435 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) 15436 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 15437 {LHS, RHS, CC, TrueV, FalseV}); 15438 15439 if (!Subtarget.hasConditionalMoveFusion()) { 15440 // (select c, -1, y) -> -c | y 15441 if (isAllOnesConstant(TrueV)) { 15442 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); 15443 SDValue Neg = DAG.getNegative(C, DL, VT); 15444 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); 15445 } 15446 // (select c, y, -1) -> -!c | y 15447 if (isAllOnesConstant(FalseV)) { 15448 SDValue C = 15449 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); 15450 SDValue Neg = DAG.getNegative(C, DL, VT); 15451 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); 15452 } 15453 15454 // (select c, 0, y) -> -!c & y 15455 if (isNullConstant(TrueV)) { 15456 SDValue C = 15457 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); 15458 SDValue Neg = DAG.getNegative(C, DL, VT); 15459 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); 15460 } 15461 // (select c, y, 0) -> -c & y 15462 if (isNullConstant(FalseV)) { 15463 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); 15464 SDValue Neg = DAG.getNegative(C, DL, VT); 15465 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); 15466 } 15467 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq)) 15468 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq)) 15469 if (((isOneConstant(FalseV) && LHS == TrueV && 15470 CCVal == ISD::CondCode::SETNE) || 15471 (isOneConstant(TrueV) && LHS == FalseV && 15472 CCVal == ISD::CondCode::SETEQ)) && 15473 isNullConstant(RHS)) { 15474 // freeze it to be safe. 15475 LHS = DAG.getFreeze(LHS); 15476 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ); 15477 return DAG.getNode(ISD::ADD, DL, VT, LHS, C); 15478 } 15479 } 15480 15481 // If both true/false are an xor with 1, pull through the select. 15482 // This can occur after op legalization if both operands are setccs that 15483 // require an xor to invert. 15484 // FIXME: Generalize to other binary ops with identical operand? 15485 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR && 15486 TrueV.getOperand(1) == FalseV.getOperand(1) && 15487 isOneConstant(TrueV.getOperand(1)) && 15488 TrueV.hasOneUse() && FalseV.hasOneUse()) { 15489 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC, 15490 TrueV.getOperand(0), FalseV.getOperand(0)); 15491 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1)); 15492 } 15493 15494 return SDValue(); 15495 } 15496 case RISCVISD::BR_CC: { 15497 SDValue LHS = N->getOperand(1); 15498 SDValue RHS = N->getOperand(2); 15499 SDValue CC = N->getOperand(3); 15500 SDLoc DL(N); 15501 15502 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) 15503 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 15504 N->getOperand(0), LHS, RHS, CC, N->getOperand(4)); 15505 15506 return SDValue(); 15507 } 15508 case ISD::BITREVERSE: 15509 return performBITREVERSECombine(N, DAG, Subtarget); 15510 case ISD::FP_TO_SINT: 15511 case ISD::FP_TO_UINT: 15512 return performFP_TO_INTCombine(N, DCI, Subtarget); 15513 case ISD::FP_TO_SINT_SAT: 15514 case ISD::FP_TO_UINT_SAT: 15515 return performFP_TO_INT_SATCombine(N, DCI, Subtarget); 15516 case ISD::FCOPYSIGN: { 15517 EVT VT = N->getValueType(0); 15518 if (!VT.isVector()) 15519 break; 15520 // There is a form of VFSGNJ which injects the negated sign of its second 15521 // operand. Try and bubble any FNEG up after the extend/round to produce 15522 // this optimized pattern. Avoid modifying cases where FP_ROUND and 15523 // TRUNC=1. 15524 SDValue In2 = N->getOperand(1); 15525 // Avoid cases where the extend/round has multiple uses, as duplicating 15526 // those is typically more expensive than removing a fneg. 15527 if (!In2.hasOneUse()) 15528 break; 15529 if (In2.getOpcode() != ISD::FP_EXTEND && 15530 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 15531 break; 15532 In2 = In2.getOperand(0); 15533 if (In2.getOpcode() != ISD::FNEG) 15534 break; 15535 SDLoc DL(N); 15536 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 15537 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 15538 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 15539 } 15540 case ISD::MGATHER: { 15541 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N); 15542 const EVT VT = N->getValueType(0); 15543 SDValue Index = MGN->getIndex(); 15544 SDValue ScaleOp = MGN->getScale(); 15545 ISD::MemIndexType IndexType = MGN->getIndexType(); 15546 assert(!MGN->isIndexScaled() && 15547 "Scaled gather/scatter should not be formed"); 15548 15549 SDLoc DL(N); 15550 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) 15551 return DAG.getMaskedGather( 15552 N->getVTList(), MGN->getMemoryVT(), DL, 15553 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), 15554 MGN->getBasePtr(), Index, ScaleOp}, 15555 MGN->getMemOperand(), IndexType, MGN->getExtensionType()); 15556 15557 if (narrowIndex(Index, IndexType, DAG)) 15558 return DAG.getMaskedGather( 15559 N->getVTList(), MGN->getMemoryVT(), DL, 15560 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), 15561 MGN->getBasePtr(), Index, ScaleOp}, 15562 MGN->getMemOperand(), IndexType, MGN->getExtensionType()); 15563 15564 if (Index.getOpcode() == ISD::BUILD_VECTOR && 15565 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) { 15566 // The sequence will be XLenVT, not the type of Index. Tell 15567 // isSimpleVIDSequence this so we avoid overflow. 15568 if (std::optional<VIDSequence> SimpleVID = 15569 isSimpleVIDSequence(Index, Subtarget.getXLen()); 15570 SimpleVID && SimpleVID->StepDenominator == 1) { 15571 const int64_t StepNumerator = SimpleVID->StepNumerator; 15572 const int64_t Addend = SimpleVID->Addend; 15573 15574 // Note: We don't need to check alignment here since (by assumption 15575 // from the existance of the gather), our offsets must be sufficiently 15576 // aligned. 15577 15578 const EVT PtrVT = getPointerTy(DAG.getDataLayout()); 15579 assert(MGN->getBasePtr()->getValueType(0) == PtrVT); 15580 assert(IndexType == ISD::UNSIGNED_SCALED); 15581 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(), 15582 DAG.getConstant(Addend, DL, PtrVT)); 15583 15584 SDVTList VTs = DAG.getVTList({VT, MVT::Other}); 15585 SDValue IntID = 15586 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL, 15587 XLenVT); 15588 SDValue Ops[] = 15589 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr, 15590 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()}; 15591 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, 15592 Ops, VT, MGN->getMemOperand()); 15593 } 15594 } 15595 15596 SmallVector<int> ShuffleMask; 15597 if (MGN->getExtensionType() == ISD::NON_EXTLOAD && 15598 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) { 15599 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(), 15600 MGN->getBasePtr(), DAG.getUNDEF(XLenVT), 15601 MGN->getMask(), DAG.getUNDEF(VT), 15602 MGN->getMemoryVT(), MGN->getMemOperand(), 15603 ISD::UNINDEXED, ISD::NON_EXTLOAD); 15604 SDValue Shuffle = 15605 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask); 15606 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL); 15607 } 15608 15609 if (MGN->getExtensionType() == ISD::NON_EXTLOAD && 15610 matchIndexAsWiderOp(VT, Index, MGN->getMask(), 15611 MGN->getMemOperand()->getBaseAlign(), Subtarget)) { 15612 SmallVector<SDValue> NewIndices; 15613 for (unsigned i = 0; i < Index->getNumOperands(); i += 2) 15614 NewIndices.push_back(Index.getOperand(i)); 15615 EVT IndexVT = Index.getValueType() 15616 .getHalfNumVectorElementsVT(*DAG.getContext()); 15617 Index = DAG.getBuildVector(IndexVT, DL, NewIndices); 15618 15619 unsigned ElementSize = VT.getScalarStoreSize(); 15620 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2); 15621 auto EltCnt = VT.getVectorElementCount(); 15622 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!"); 15623 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT, 15624 EltCnt.divideCoefficientBy(2)); 15625 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru()); 15626 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 15627 EltCnt.divideCoefficientBy(2)); 15628 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1)); 15629 15630 SDValue Gather = 15631 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL, 15632 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(), 15633 Index, ScaleOp}, 15634 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD); 15635 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0)); 15636 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL); 15637 } 15638 break; 15639 } 15640 case ISD::MSCATTER:{ 15641 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N); 15642 SDValue Index = MSN->getIndex(); 15643 SDValue ScaleOp = MSN->getScale(); 15644 ISD::MemIndexType IndexType = MSN->getIndexType(); 15645 assert(!MSN->isIndexScaled() && 15646 "Scaled gather/scatter should not be formed"); 15647 15648 SDLoc DL(N); 15649 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) 15650 return DAG.getMaskedScatter( 15651 N->getVTList(), MSN->getMemoryVT(), DL, 15652 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), 15653 Index, ScaleOp}, 15654 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore()); 15655 15656 if (narrowIndex(Index, IndexType, DAG)) 15657 return DAG.getMaskedScatter( 15658 N->getVTList(), MSN->getMemoryVT(), DL, 15659 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), 15660 Index, ScaleOp}, 15661 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore()); 15662 15663 EVT VT = MSN->getValue()->getValueType(0); 15664 SmallVector<int> ShuffleMask; 15665 if (!MSN->isTruncatingStore() && 15666 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) { 15667 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(), 15668 DAG.getUNDEF(VT), ShuffleMask); 15669 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(), 15670 DAG.getUNDEF(XLenVT), MSN->getMask(), 15671 MSN->getMemoryVT(), MSN->getMemOperand(), 15672 ISD::UNINDEXED, false); 15673 } 15674 break; 15675 } 15676 case ISD::VP_GATHER: { 15677 const auto *VPGN = dyn_cast<VPGatherSDNode>(N); 15678 SDValue Index = VPGN->getIndex(); 15679 SDValue ScaleOp = VPGN->getScale(); 15680 ISD::MemIndexType IndexType = VPGN->getIndexType(); 15681 assert(!VPGN->isIndexScaled() && 15682 "Scaled gather/scatter should not be formed"); 15683 15684 SDLoc DL(N); 15685 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) 15686 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, 15687 {VPGN->getChain(), VPGN->getBasePtr(), Index, 15688 ScaleOp, VPGN->getMask(), 15689 VPGN->getVectorLength()}, 15690 VPGN->getMemOperand(), IndexType); 15691 15692 if (narrowIndex(Index, IndexType, DAG)) 15693 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, 15694 {VPGN->getChain(), VPGN->getBasePtr(), Index, 15695 ScaleOp, VPGN->getMask(), 15696 VPGN->getVectorLength()}, 15697 VPGN->getMemOperand(), IndexType); 15698 15699 break; 15700 } 15701 case ISD::VP_SCATTER: { 15702 const auto *VPSN = dyn_cast<VPScatterSDNode>(N); 15703 SDValue Index = VPSN->getIndex(); 15704 SDValue ScaleOp = VPSN->getScale(); 15705 ISD::MemIndexType IndexType = VPSN->getIndexType(); 15706 assert(!VPSN->isIndexScaled() && 15707 "Scaled gather/scatter should not be formed"); 15708 15709 SDLoc DL(N); 15710 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) 15711 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, 15712 {VPSN->getChain(), VPSN->getValue(), 15713 VPSN->getBasePtr(), Index, ScaleOp, 15714 VPSN->getMask(), VPSN->getVectorLength()}, 15715 VPSN->getMemOperand(), IndexType); 15716 15717 if (narrowIndex(Index, IndexType, DAG)) 15718 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, 15719 {VPSN->getChain(), VPSN->getValue(), 15720 VPSN->getBasePtr(), Index, ScaleOp, 15721 VPSN->getMask(), VPSN->getVectorLength()}, 15722 VPSN->getMemOperand(), IndexType); 15723 break; 15724 } 15725 case RISCVISD::SRA_VL: 15726 case RISCVISD::SRL_VL: 15727 case RISCVISD::SHL_VL: { 15728 SDValue ShAmt = N->getOperand(1); 15729 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { 15730 // We don't need the upper 32 bits of a 64-bit element for a shift amount. 15731 SDLoc DL(N); 15732 SDValue VL = N->getOperand(4); 15733 EVT VT = N->getValueType(0); 15734 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 15735 ShAmt.getOperand(1), VL); 15736 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt, 15737 N->getOperand(2), N->getOperand(3), N->getOperand(4)); 15738 } 15739 break; 15740 } 15741 case ISD::SRA: 15742 if (SDValue V = performSRACombine(N, DAG, Subtarget)) 15743 return V; 15744 [[fallthrough]]; 15745 case ISD::SRL: 15746 case ISD::SHL: { 15747 SDValue ShAmt = N->getOperand(1); 15748 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { 15749 // We don't need the upper 32 bits of a 64-bit element for a shift amount. 15750 SDLoc DL(N); 15751 EVT VT = N->getValueType(0); 15752 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 15753 ShAmt.getOperand(1), 15754 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT())); 15755 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt); 15756 } 15757 break; 15758 } 15759 case RISCVISD::ADD_VL: 15760 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 15761 return V; 15762 return combineToVWMACC(N, DAG, Subtarget); 15763 case RISCVISD::SUB_VL: 15764 case RISCVISD::VWADD_W_VL: 15765 case RISCVISD::VWADDU_W_VL: 15766 case RISCVISD::VWSUB_W_VL: 15767 case RISCVISD::VWSUBU_W_VL: 15768 case RISCVISD::MUL_VL: 15769 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); 15770 case RISCVISD::VFMADD_VL: 15771 case RISCVISD::VFNMADD_VL: 15772 case RISCVISD::VFMSUB_VL: 15773 case RISCVISD::VFNMSUB_VL: 15774 case RISCVISD::STRICT_VFMADD_VL: 15775 case RISCVISD::STRICT_VFNMADD_VL: 15776 case RISCVISD::STRICT_VFMSUB_VL: 15777 case RISCVISD::STRICT_VFNMSUB_VL: 15778 return performVFMADD_VLCombine(N, DAG, Subtarget); 15779 case RISCVISD::FMUL_VL: 15780 return performVFMUL_VLCombine(N, DAG, Subtarget); 15781 case RISCVISD::FADD_VL: 15782 case RISCVISD::FSUB_VL: 15783 return performFADDSUB_VLCombine(N, DAG, Subtarget); 15784 case ISD::LOAD: 15785 case ISD::STORE: { 15786 if (DCI.isAfterLegalizeDAG()) 15787 if (SDValue V = performMemPairCombine(N, DCI)) 15788 return V; 15789 15790 if (N->getOpcode() != ISD::STORE) 15791 break; 15792 15793 auto *Store = cast<StoreSDNode>(N); 15794 SDValue Chain = Store->getChain(); 15795 EVT MemVT = Store->getMemoryVT(); 15796 SDValue Val = Store->getValue(); 15797 SDLoc DL(N); 15798 15799 bool IsScalarizable = 15800 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) && 15801 Store->isSimple() && 15802 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) && 15803 isPowerOf2_64(MemVT.getSizeInBits()) && 15804 MemVT.getSizeInBits() <= Subtarget.getXLen(); 15805 15806 // If sufficiently aligned we can scalarize stores of constant vectors of 15807 // any power-of-two size up to XLen bits, provided that they aren't too 15808 // expensive to materialize. 15809 // vsetivli zero, 2, e8, m1, ta, ma 15810 // vmv.v.i v8, 4 15811 // vse64.v v8, (a0) 15812 // -> 15813 // li a1, 1028 15814 // sh a1, 0(a0) 15815 if (DCI.isBeforeLegalize() && IsScalarizable && 15816 ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) { 15817 // Get the constant vector bits 15818 APInt NewC(Val.getValueSizeInBits(), 0); 15819 uint64_t EltSize = Val.getScalarValueSizeInBits(); 15820 for (unsigned i = 0; i < Val.getNumOperands(); i++) { 15821 if (Val.getOperand(i).isUndef()) 15822 continue; 15823 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize), 15824 i * EltSize); 15825 } 15826 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); 15827 15828 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget, 15829 true) <= 2 && 15830 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 15831 NewVT, *Store->getMemOperand())) { 15832 SDValue NewV = DAG.getConstant(NewC, DL, NewVT); 15833 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(), 15834 Store->getPointerInfo(), Store->getOriginalAlign(), 15835 Store->getMemOperand()->getFlags()); 15836 } 15837 } 15838 15839 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g. 15840 // vsetivli zero, 2, e16, m1, ta, ma 15841 // vle16.v v8, (a0) 15842 // vse16.v v8, (a1) 15843 if (auto *L = dyn_cast<LoadSDNode>(Val); 15844 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() && 15845 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) && 15846 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) && 15847 L->getMemoryVT() == MemVT) { 15848 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); 15849 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 15850 NewVT, *Store->getMemOperand()) && 15851 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 15852 NewVT, *L->getMemOperand())) { 15853 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(), 15854 L->getPointerInfo(), L->getOriginalAlign(), 15855 L->getMemOperand()->getFlags()); 15856 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(), 15857 Store->getPointerInfo(), Store->getOriginalAlign(), 15858 Store->getMemOperand()->getFlags()); 15859 } 15860 } 15861 15862 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1. 15863 // vfmv.f.s is represented as extract element from 0. Match it late to avoid 15864 // any illegal types. 15865 if (Val.getOpcode() == RISCVISD::VMV_X_S || 15866 (DCI.isAfterLegalizeDAG() && 15867 Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 15868 isNullConstant(Val.getOperand(1)))) { 15869 SDValue Src = Val.getOperand(0); 15870 MVT VecVT = Src.getSimpleValueType(); 15871 // VecVT should be scalable and memory VT should match the element type. 15872 if (!Store->isIndexed() && VecVT.isScalableVector() && 15873 MemVT == VecVT.getVectorElementType()) { 15874 SDLoc DL(N); 15875 MVT MaskVT = getMaskTypeFor(VecVT); 15876 return DAG.getStoreVP( 15877 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(), 15878 DAG.getConstant(1, DL, MaskVT), 15879 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT, 15880 Store->getMemOperand(), Store->getAddressingMode(), 15881 Store->isTruncatingStore(), /*IsCompress*/ false); 15882 } 15883 } 15884 15885 break; 15886 } 15887 case ISD::SPLAT_VECTOR: { 15888 EVT VT = N->getValueType(0); 15889 // Only perform this combine on legal MVT types. 15890 if (!isTypeLegal(VT)) 15891 break; 15892 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N, 15893 DAG, Subtarget)) 15894 return Gather; 15895 break; 15896 } 15897 case ISD::BUILD_VECTOR: 15898 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this)) 15899 return V; 15900 break; 15901 case ISD::CONCAT_VECTORS: 15902 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this)) 15903 return V; 15904 break; 15905 case ISD::INSERT_VECTOR_ELT: 15906 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this)) 15907 return V; 15908 break; 15909 case RISCVISD::VFMV_V_F_VL: { 15910 const MVT VT = N->getSimpleValueType(0); 15911 SDValue Passthru = N->getOperand(0); 15912 SDValue Scalar = N->getOperand(1); 15913 SDValue VL = N->getOperand(2); 15914 15915 // If VL is 1, we can use vfmv.s.f. 15916 if (isOneConstant(VL)) 15917 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL); 15918 break; 15919 } 15920 case RISCVISD::VMV_V_X_VL: { 15921 const MVT VT = N->getSimpleValueType(0); 15922 SDValue Passthru = N->getOperand(0); 15923 SDValue Scalar = N->getOperand(1); 15924 SDValue VL = N->getOperand(2); 15925 15926 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the 15927 // scalar input. 15928 unsigned ScalarSize = Scalar.getValueSizeInBits(); 15929 unsigned EltWidth = VT.getScalarSizeInBits(); 15930 if (ScalarSize > EltWidth && Passthru.isUndef()) 15931 if (SimplifyDemandedLowBitsHelper(1, EltWidth)) 15932 return SDValue(N, 0); 15933 15934 // If VL is 1 and the scalar value won't benefit from immediate, we can 15935 // use vmv.s.x. 15936 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar); 15937 if (isOneConstant(VL) && 15938 (!Const || Const->isZero() || 15939 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5))) 15940 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL); 15941 15942 break; 15943 } 15944 case RISCVISD::VFMV_S_F_VL: { 15945 SDValue Src = N->getOperand(1); 15946 // Try to remove vector->scalar->vector if the scalar->vector is inserting 15947 // into an undef vector. 15948 // TODO: Could use a vslide or vmv.v.v for non-undef. 15949 if (N->getOperand(0).isUndef() && 15950 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 15951 isNullConstant(Src.getOperand(1)) && 15952 Src.getOperand(0).getValueType().isScalableVector()) { 15953 EVT VT = N->getValueType(0); 15954 EVT SrcVT = Src.getOperand(0).getValueType(); 15955 assert(SrcVT.getVectorElementType() == VT.getVectorElementType()); 15956 // Widths match, just return the original vector. 15957 if (SrcVT == VT) 15958 return Src.getOperand(0); 15959 // TODO: Use insert_subvector/extract_subvector to change widen/narrow? 15960 } 15961 [[fallthrough]]; 15962 } 15963 case RISCVISD::VMV_S_X_VL: { 15964 const MVT VT = N->getSimpleValueType(0); 15965 SDValue Passthru = N->getOperand(0); 15966 SDValue Scalar = N->getOperand(1); 15967 SDValue VL = N->getOperand(2); 15968 15969 // Use M1 or smaller to avoid over constraining register allocation 15970 const MVT M1VT = getLMUL1VT(VT); 15971 if (M1VT.bitsLT(VT)) { 15972 SDValue M1Passthru = 15973 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru, 15974 DAG.getVectorIdxConstant(0, DL)); 15975 SDValue Result = 15976 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL); 15977 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result, 15978 DAG.getConstant(0, DL, XLenVT)); 15979 return Result; 15980 } 15981 15982 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or 15983 // higher would involve overly constraining the register allocator for 15984 // no purpose. 15985 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar); 15986 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) && 15987 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef()) 15988 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); 15989 15990 break; 15991 } 15992 case RISCVISD::VMV_X_S: { 15993 SDValue Vec = N->getOperand(0); 15994 MVT VecVT = N->getOperand(0).getSimpleValueType(); 15995 const MVT M1VT = getLMUL1VT(VecVT); 15996 if (M1VT.bitsLT(VecVT)) { 15997 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec, 15998 DAG.getVectorIdxConstant(0, DL)); 15999 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec); 16000 } 16001 break; 16002 } 16003 case ISD::INTRINSIC_VOID: 16004 case ISD::INTRINSIC_W_CHAIN: 16005 case ISD::INTRINSIC_WO_CHAIN: { 16006 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; 16007 unsigned IntNo = N->getConstantOperandVal(IntOpNo); 16008 switch (IntNo) { 16009 // By default we do not combine any intrinsic. 16010 default: 16011 return SDValue(); 16012 case Intrinsic::riscv_masked_strided_load: { 16013 MVT VT = N->getSimpleValueType(0); 16014 auto *Load = cast<MemIntrinsicSDNode>(N); 16015 SDValue PassThru = N->getOperand(2); 16016 SDValue Base = N->getOperand(3); 16017 SDValue Stride = N->getOperand(4); 16018 SDValue Mask = N->getOperand(5); 16019 16020 // If the stride is equal to the element size in bytes, we can use 16021 // a masked.load. 16022 const unsigned ElementSize = VT.getScalarStoreSize(); 16023 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride); 16024 StrideC && StrideC->getZExtValue() == ElementSize) 16025 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base, 16026 DAG.getUNDEF(XLenVT), Mask, PassThru, 16027 Load->getMemoryVT(), Load->getMemOperand(), 16028 ISD::UNINDEXED, ISD::NON_EXTLOAD); 16029 return SDValue(); 16030 } 16031 case Intrinsic::riscv_masked_strided_store: { 16032 auto *Store = cast<MemIntrinsicSDNode>(N); 16033 SDValue Value = N->getOperand(2); 16034 SDValue Base = N->getOperand(3); 16035 SDValue Stride = N->getOperand(4); 16036 SDValue Mask = N->getOperand(5); 16037 16038 // If the stride is equal to the element size in bytes, we can use 16039 // a masked.store. 16040 const unsigned ElementSize = Value.getValueType().getScalarStoreSize(); 16041 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride); 16042 StrideC && StrideC->getZExtValue() == ElementSize) 16043 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base, 16044 DAG.getUNDEF(XLenVT), Mask, 16045 Store->getMemoryVT(), Store->getMemOperand(), 16046 ISD::UNINDEXED, false); 16047 return SDValue(); 16048 } 16049 case Intrinsic::riscv_vcpop: 16050 case Intrinsic::riscv_vcpop_mask: 16051 case Intrinsic::riscv_vfirst: 16052 case Intrinsic::riscv_vfirst_mask: { 16053 SDValue VL = N->getOperand(2); 16054 if (IntNo == Intrinsic::riscv_vcpop_mask || 16055 IntNo == Intrinsic::riscv_vfirst_mask) 16056 VL = N->getOperand(3); 16057 if (!isNullConstant(VL)) 16058 return SDValue(); 16059 // If VL is 0, vcpop -> li 0, vfirst -> li -1. 16060 SDLoc DL(N); 16061 EVT VT = N->getValueType(0); 16062 if (IntNo == Intrinsic::riscv_vfirst || 16063 IntNo == Intrinsic::riscv_vfirst_mask) 16064 return DAG.getConstant(-1, DL, VT); 16065 return DAG.getConstant(0, DL, VT); 16066 } 16067 } 16068 } 16069 case ISD::BITCAST: { 16070 assert(Subtarget.useRVVForFixedLengthVectors()); 16071 SDValue N0 = N->getOperand(0); 16072 EVT VT = N->getValueType(0); 16073 EVT SrcVT = N0.getValueType(); 16074 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer 16075 // type, widen both sides to avoid a trip through memory. 16076 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) && 16077 VT.isScalarInteger()) { 16078 unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); 16079 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT)); 16080 Ops[0] = N0; 16081 SDLoc DL(N); 16082 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops); 16083 N0 = DAG.getBitcast(MVT::i8, N0); 16084 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0); 16085 } 16086 16087 return SDValue(); 16088 } 16089 } 16090 16091 return SDValue(); 16092 } 16093 16094 bool RISCVTargetLowering::shouldTransformSignedTruncationCheck( 16095 EVT XVT, unsigned KeptBits) const { 16096 // For vectors, we don't have a preference.. 16097 if (XVT.isVector()) 16098 return false; 16099 16100 if (XVT != MVT::i32 && XVT != MVT::i64) 16101 return false; 16102 16103 // We can use sext.w for RV64 or an srai 31 on RV32. 16104 if (KeptBits == 32 || KeptBits == 64) 16105 return true; 16106 16107 // With Zbb we can use sext.h/sext.b. 16108 return Subtarget.hasStdExtZbb() && 16109 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) || 16110 KeptBits == 16); 16111 } 16112 16113 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 16114 const SDNode *N, CombineLevel Level) const { 16115 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || 16116 N->getOpcode() == ISD::SRL) && 16117 "Expected shift op"); 16118 16119 // The following folds are only desirable if `(OP _, c1 << c2)` can be 16120 // materialised in fewer instructions than `(OP _, c1)`: 16121 // 16122 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 16123 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 16124 SDValue N0 = N->getOperand(0); 16125 EVT Ty = N0.getValueType(); 16126 if (Ty.isScalarInteger() && 16127 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 16128 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 16129 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 16130 if (C1 && C2) { 16131 const APInt &C1Int = C1->getAPIntValue(); 16132 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 16133 16134 // We can materialise `c1 << c2` into an add immediate, so it's "free", 16135 // and the combine should happen, to potentially allow further combines 16136 // later. 16137 if (ShiftedC1Int.getSignificantBits() <= 64 && 16138 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 16139 return true; 16140 16141 // We can materialise `c1` in an add immediate, so it's "free", and the 16142 // combine should be prevented. 16143 if (C1Int.getSignificantBits() <= 64 && 16144 isLegalAddImmediate(C1Int.getSExtValue())) 16145 return false; 16146 16147 // Neither constant will fit into an immediate, so find materialisation 16148 // costs. 16149 int C1Cost = 16150 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget, 16151 /*CompressionCost*/ true); 16152 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 16153 ShiftedC1Int, Ty.getSizeInBits(), Subtarget, 16154 /*CompressionCost*/ true); 16155 16156 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 16157 // combine should be prevented. 16158 if (C1Cost < ShiftedC1Cost) 16159 return false; 16160 } 16161 } 16162 return true; 16163 } 16164 16165 bool RISCVTargetLowering::targetShrinkDemandedConstant( 16166 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 16167 TargetLoweringOpt &TLO) const { 16168 // Delay this optimization as late as possible. 16169 if (!TLO.LegalOps) 16170 return false; 16171 16172 EVT VT = Op.getValueType(); 16173 if (VT.isVector()) 16174 return false; 16175 16176 unsigned Opcode = Op.getOpcode(); 16177 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR) 16178 return false; 16179 16180 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 16181 if (!C) 16182 return false; 16183 16184 const APInt &Mask = C->getAPIntValue(); 16185 16186 // Clear all non-demanded bits initially. 16187 APInt ShrunkMask = Mask & DemandedBits; 16188 16189 // Try to make a smaller immediate by setting undemanded bits. 16190 16191 APInt ExpandedMask = Mask | ~DemandedBits; 16192 16193 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool { 16194 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask); 16195 }; 16196 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool { 16197 if (NewMask == Mask) 16198 return true; 16199 SDLoc DL(Op); 16200 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType()); 16201 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 16202 Op.getOperand(0), NewC); 16203 return TLO.CombineTo(Op, NewOp); 16204 }; 16205 16206 // If the shrunk mask fits in sign extended 12 bits, let the target 16207 // independent code apply it. 16208 if (ShrunkMask.isSignedIntN(12)) 16209 return false; 16210 16211 // And has a few special cases for zext. 16212 if (Opcode == ISD::AND) { 16213 // Preserve (and X, 0xffff), if zext.h exists use zext.h, 16214 // otherwise use SLLI + SRLI. 16215 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff); 16216 if (IsLegalMask(NewMask)) 16217 return UseMask(NewMask); 16218 16219 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. 16220 if (VT == MVT::i64) { 16221 APInt NewMask = APInt(64, 0xffffffff); 16222 if (IsLegalMask(NewMask)) 16223 return UseMask(NewMask); 16224 } 16225 } 16226 16227 // For the remaining optimizations, we need to be able to make a negative 16228 // number through a combination of mask and undemanded bits. 16229 if (!ExpandedMask.isNegative()) 16230 return false; 16231 16232 // What is the fewest number of bits we need to represent the negative number. 16233 unsigned MinSignedBits = ExpandedMask.getSignificantBits(); 16234 16235 // Try to make a 12 bit negative immediate. If that fails try to make a 32 16236 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 16237 // If we can't create a simm12, we shouldn't change opaque constants. 16238 APInt NewMask = ShrunkMask; 16239 if (MinSignedBits <= 12) 16240 NewMask.setBitsFrom(11); 16241 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 16242 NewMask.setBitsFrom(31); 16243 else 16244 return false; 16245 16246 // Check that our new mask is a subset of the demanded mask. 16247 assert(IsLegalMask(NewMask)); 16248 return UseMask(NewMask); 16249 } 16250 16251 static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) { 16252 static const uint64_t GREVMasks[] = { 16253 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 16254 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 16255 16256 for (unsigned Stage = 0; Stage != 6; ++Stage) { 16257 unsigned Shift = 1 << Stage; 16258 if (ShAmt & Shift) { 16259 uint64_t Mask = GREVMasks[Stage]; 16260 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask); 16261 if (IsGORC) 16262 Res |= x; 16263 x = Res; 16264 } 16265 } 16266 16267 return x; 16268 } 16269 16270 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 16271 KnownBits &Known, 16272 const APInt &DemandedElts, 16273 const SelectionDAG &DAG, 16274 unsigned Depth) const { 16275 unsigned BitWidth = Known.getBitWidth(); 16276 unsigned Opc = Op.getOpcode(); 16277 assert((Opc >= ISD::BUILTIN_OP_END || 16278 Opc == ISD::INTRINSIC_WO_CHAIN || 16279 Opc == ISD::INTRINSIC_W_CHAIN || 16280 Opc == ISD::INTRINSIC_VOID) && 16281 "Should use MaskedValueIsZero if you don't know whether Op" 16282 " is a target node!"); 16283 16284 Known.resetAll(); 16285 switch (Opc) { 16286 default: break; 16287 case RISCVISD::SELECT_CC: { 16288 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1); 16289 // If we don't know any bits, early out. 16290 if (Known.isUnknown()) 16291 break; 16292 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1); 16293 16294 // Only known if known in both the LHS and RHS. 16295 Known = Known.intersectWith(Known2); 16296 break; 16297 } 16298 case RISCVISD::CZERO_EQZ: 16299 case RISCVISD::CZERO_NEZ: 16300 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 16301 // Result is either all zero or operand 0. We can propagate zeros, but not 16302 // ones. 16303 Known.One.clearAllBits(); 16304 break; 16305 case RISCVISD::REMUW: { 16306 KnownBits Known2; 16307 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 16308 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 16309 // We only care about the lower 32 bits. 16310 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 16311 // Restore the original width by sign extending. 16312 Known = Known.sext(BitWidth); 16313 break; 16314 } 16315 case RISCVISD::DIVUW: { 16316 KnownBits Known2; 16317 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 16318 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 16319 // We only care about the lower 32 bits. 16320 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 16321 // Restore the original width by sign extending. 16322 Known = Known.sext(BitWidth); 16323 break; 16324 } 16325 case RISCVISD::SLLW: { 16326 KnownBits Known2; 16327 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 16328 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 16329 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32)); 16330 // Restore the original width by sign extending. 16331 Known = Known.sext(BitWidth); 16332 break; 16333 } 16334 case RISCVISD::CTZW: { 16335 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 16336 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros(); 16337 unsigned LowBits = llvm::bit_width(PossibleTZ); 16338 Known.Zero.setBitsFrom(LowBits); 16339 break; 16340 } 16341 case RISCVISD::CLZW: { 16342 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 16343 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros(); 16344 unsigned LowBits = llvm::bit_width(PossibleLZ); 16345 Known.Zero.setBitsFrom(LowBits); 16346 break; 16347 } 16348 case RISCVISD::BREV8: 16349 case RISCVISD::ORC_B: { 16350 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a 16351 // control value of 7 is equivalent to brev8 and orc.b. 16352 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 16353 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B; 16354 // To compute zeros, we need to invert the value and invert it back after. 16355 Known.Zero = 16356 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC); 16357 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC); 16358 break; 16359 } 16360 case RISCVISD::READ_VLENB: { 16361 // We can use the minimum and maximum VLEN values to bound VLENB. We 16362 // know VLEN must be a power of two. 16363 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8; 16364 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8; 16365 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?"); 16366 Known.Zero.setLowBits(Log2_32(MinVLenB)); 16367 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1); 16368 if (MaxVLenB == MinVLenB) 16369 Known.One.setBit(Log2_32(MinVLenB)); 16370 break; 16371 } 16372 case RISCVISD::FCLASS: { 16373 // fclass will only set one of the low 10 bits. 16374 Known.Zero.setBitsFrom(10); 16375 break; 16376 } 16377 case ISD::INTRINSIC_W_CHAIN: 16378 case ISD::INTRINSIC_WO_CHAIN: { 16379 unsigned IntNo = 16380 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1); 16381 switch (IntNo) { 16382 default: 16383 // We can't do anything for most intrinsics. 16384 break; 16385 case Intrinsic::riscv_vsetvli: 16386 case Intrinsic::riscv_vsetvlimax: { 16387 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli; 16388 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1); 16389 RISCVII::VLMUL VLMUL = 16390 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2)); 16391 unsigned SEW = RISCVVType::decodeVSEW(VSEW); 16392 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL); 16393 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW; 16394 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul; 16395 16396 // Result of vsetvli must be not larger than AVL. 16397 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1))) 16398 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1)); 16399 16400 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1; 16401 if (BitWidth > KnownZeroFirstBit) 16402 Known.Zero.setBitsFrom(KnownZeroFirstBit); 16403 break; 16404 } 16405 } 16406 break; 16407 } 16408 } 16409 } 16410 16411 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 16412 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 16413 unsigned Depth) const { 16414 switch (Op.getOpcode()) { 16415 default: 16416 break; 16417 case RISCVISD::SELECT_CC: { 16418 unsigned Tmp = 16419 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1); 16420 if (Tmp == 1) return 1; // Early out. 16421 unsigned Tmp2 = 16422 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1); 16423 return std::min(Tmp, Tmp2); 16424 } 16425 case RISCVISD::CZERO_EQZ: 16426 case RISCVISD::CZERO_NEZ: 16427 // Output is either all zero or operand 0. We can propagate sign bit count 16428 // from operand 0. 16429 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 16430 case RISCVISD::ABSW: { 16431 // We expand this at isel to negw+max. The result will have 33 sign bits 16432 // if the input has at least 33 sign bits. 16433 unsigned Tmp = 16434 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 16435 if (Tmp < 33) return 1; 16436 return 33; 16437 } 16438 case RISCVISD::SLLW: 16439 case RISCVISD::SRAW: 16440 case RISCVISD::SRLW: 16441 case RISCVISD::DIVW: 16442 case RISCVISD::DIVUW: 16443 case RISCVISD::REMUW: 16444 case RISCVISD::ROLW: 16445 case RISCVISD::RORW: 16446 case RISCVISD::FCVT_W_RV64: 16447 case RISCVISD::FCVT_WU_RV64: 16448 case RISCVISD::STRICT_FCVT_W_RV64: 16449 case RISCVISD::STRICT_FCVT_WU_RV64: 16450 // TODO: As the result is sign-extended, this is conservatively correct. A 16451 // more precise answer could be calculated for SRAW depending on known 16452 // bits in the shift amount. 16453 return 33; 16454 case RISCVISD::VMV_X_S: { 16455 // The number of sign bits of the scalar result is computed by obtaining the 16456 // element type of the input vector operand, subtracting its width from the 16457 // XLEN, and then adding one (sign bit within the element type). If the 16458 // element type is wider than XLen, the least-significant XLEN bits are 16459 // taken. 16460 unsigned XLen = Subtarget.getXLen(); 16461 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits(); 16462 if (EltBits <= XLen) 16463 return XLen - EltBits + 1; 16464 break; 16465 } 16466 case ISD::INTRINSIC_W_CHAIN: { 16467 unsigned IntNo = Op.getConstantOperandVal(1); 16468 switch (IntNo) { 16469 default: 16470 break; 16471 case Intrinsic::riscv_masked_atomicrmw_xchg_i64: 16472 case Intrinsic::riscv_masked_atomicrmw_add_i64: 16473 case Intrinsic::riscv_masked_atomicrmw_sub_i64: 16474 case Intrinsic::riscv_masked_atomicrmw_nand_i64: 16475 case Intrinsic::riscv_masked_atomicrmw_max_i64: 16476 case Intrinsic::riscv_masked_atomicrmw_min_i64: 16477 case Intrinsic::riscv_masked_atomicrmw_umax_i64: 16478 case Intrinsic::riscv_masked_atomicrmw_umin_i64: 16479 case Intrinsic::riscv_masked_cmpxchg_i64: 16480 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated 16481 // narrow atomic operation. These are implemented using atomic 16482 // operations at the minimum supported atomicrmw/cmpxchg width whose 16483 // result is then sign extended to XLEN. With +A, the minimum width is 16484 // 32 for both 64 and 32. 16485 assert(Subtarget.getXLen() == 64); 16486 assert(getMinCmpXchgSizeInBits() == 32); 16487 assert(Subtarget.hasStdExtA()); 16488 return 33; 16489 } 16490 break; 16491 } 16492 } 16493 16494 return 1; 16495 } 16496 16497 const Constant * 16498 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const { 16499 assert(Ld && "Unexpected null LoadSDNode"); 16500 if (!ISD::isNormalLoad(Ld)) 16501 return nullptr; 16502 16503 SDValue Ptr = Ld->getBasePtr(); 16504 16505 // Only constant pools with no offset are supported. 16506 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * { 16507 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr); 16508 if (!CNode || CNode->isMachineConstantPoolEntry() || 16509 CNode->getOffset() != 0) 16510 return nullptr; 16511 16512 return CNode; 16513 }; 16514 16515 // Simple case, LLA. 16516 if (Ptr.getOpcode() == RISCVISD::LLA) { 16517 auto *CNode = GetSupportedConstantPool(Ptr); 16518 if (!CNode || CNode->getTargetFlags() != 0) 16519 return nullptr; 16520 16521 return CNode->getConstVal(); 16522 } 16523 16524 // Look for a HI and ADD_LO pair. 16525 if (Ptr.getOpcode() != RISCVISD::ADD_LO || 16526 Ptr.getOperand(0).getOpcode() != RISCVISD::HI) 16527 return nullptr; 16528 16529 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1)); 16530 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0)); 16531 16532 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO || 16533 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI) 16534 return nullptr; 16535 16536 if (CNodeLo->getConstVal() != CNodeHi->getConstVal()) 16537 return nullptr; 16538 16539 return CNodeLo->getConstVal(); 16540 } 16541 16542 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 16543 MachineBasicBlock *BB) { 16544 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 16545 16546 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 16547 // Should the count have wrapped while it was being read, we need to try 16548 // again. 16549 // ... 16550 // read: 16551 // rdcycleh x3 # load high word of cycle 16552 // rdcycle x2 # load low word of cycle 16553 // rdcycleh x4 # load high word of cycle 16554 // bne x3, x4, read # check if high word reads match, otherwise try again 16555 // ... 16556 16557 MachineFunction &MF = *BB->getParent(); 16558 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 16559 MachineFunction::iterator It = ++BB->getIterator(); 16560 16561 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 16562 MF.insert(It, LoopMBB); 16563 16564 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 16565 MF.insert(It, DoneMBB); 16566 16567 // Transfer the remainder of BB and its successor edges to DoneMBB. 16568 DoneMBB->splice(DoneMBB->begin(), BB, 16569 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 16570 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 16571 16572 BB->addSuccessor(LoopMBB); 16573 16574 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 16575 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 16576 Register LoReg = MI.getOperand(0).getReg(); 16577 Register HiReg = MI.getOperand(1).getReg(); 16578 DebugLoc DL = MI.getDebugLoc(); 16579 16580 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 16581 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 16582 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 16583 .addReg(RISCV::X0); 16584 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 16585 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 16586 .addReg(RISCV::X0); 16587 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 16588 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 16589 .addReg(RISCV::X0); 16590 16591 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 16592 .addReg(HiReg) 16593 .addReg(ReadAgainReg) 16594 .addMBB(LoopMBB); 16595 16596 LoopMBB->addSuccessor(LoopMBB); 16597 LoopMBB->addSuccessor(DoneMBB); 16598 16599 MI.eraseFromParent(); 16600 16601 return DoneMBB; 16602 } 16603 16604 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 16605 MachineBasicBlock *BB, 16606 const RISCVSubtarget &Subtarget) { 16607 assert((MI.getOpcode() == RISCV::SplitF64Pseudo || 16608 MI.getOpcode() == RISCV::SplitF64Pseudo_INX) && 16609 "Unexpected instruction"); 16610 16611 MachineFunction &MF = *BB->getParent(); 16612 DebugLoc DL = MI.getDebugLoc(); 16613 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 16614 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 16615 Register LoReg = MI.getOperand(0).getReg(); 16616 Register HiReg = MI.getOperand(1).getReg(); 16617 Register SrcReg = MI.getOperand(2).getReg(); 16618 16619 const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX 16620 ? &RISCV::GPRPairRegClass 16621 : &RISCV::FPR64RegClass; 16622 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 16623 16624 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 16625 RI, Register()); 16626 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 16627 MachineMemOperand *MMOLo = 16628 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 16629 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 16630 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 16631 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 16632 .addFrameIndex(FI) 16633 .addImm(0) 16634 .addMemOperand(MMOLo); 16635 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 16636 .addFrameIndex(FI) 16637 .addImm(4) 16638 .addMemOperand(MMOHi); 16639 MI.eraseFromParent(); // The pseudo instruction is gone now. 16640 return BB; 16641 } 16642 16643 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 16644 MachineBasicBlock *BB, 16645 const RISCVSubtarget &Subtarget) { 16646 assert((MI.getOpcode() == RISCV::BuildPairF64Pseudo || 16647 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX) && 16648 "Unexpected instruction"); 16649 16650 MachineFunction &MF = *BB->getParent(); 16651 DebugLoc DL = MI.getDebugLoc(); 16652 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 16653 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 16654 Register DstReg = MI.getOperand(0).getReg(); 16655 Register LoReg = MI.getOperand(1).getReg(); 16656 Register HiReg = MI.getOperand(2).getReg(); 16657 16658 const TargetRegisterClass *DstRC = 16659 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPairRegClass 16660 : &RISCV::FPR64RegClass; 16661 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 16662 16663 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 16664 MachineMemOperand *MMOLo = 16665 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 16666 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 16667 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 16668 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 16669 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 16670 .addFrameIndex(FI) 16671 .addImm(0) 16672 .addMemOperand(MMOLo); 16673 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 16674 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 16675 .addFrameIndex(FI) 16676 .addImm(4) 16677 .addMemOperand(MMOHi); 16678 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register()); 16679 MI.eraseFromParent(); // The pseudo instruction is gone now. 16680 return BB; 16681 } 16682 16683 static bool isSelectPseudo(MachineInstr &MI) { 16684 switch (MI.getOpcode()) { 16685 default: 16686 return false; 16687 case RISCV::Select_GPR_Using_CC_GPR: 16688 case RISCV::Select_FPR16_Using_CC_GPR: 16689 case RISCV::Select_FPR16INX_Using_CC_GPR: 16690 case RISCV::Select_FPR32_Using_CC_GPR: 16691 case RISCV::Select_FPR32INX_Using_CC_GPR: 16692 case RISCV::Select_FPR64_Using_CC_GPR: 16693 case RISCV::Select_FPR64INX_Using_CC_GPR: 16694 case RISCV::Select_FPR64IN32X_Using_CC_GPR: 16695 return true; 16696 } 16697 } 16698 16699 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, 16700 unsigned RelOpcode, unsigned EqOpcode, 16701 const RISCVSubtarget &Subtarget) { 16702 DebugLoc DL = MI.getDebugLoc(); 16703 Register DstReg = MI.getOperand(0).getReg(); 16704 Register Src1Reg = MI.getOperand(1).getReg(); 16705 Register Src2Reg = MI.getOperand(2).getReg(); 16706 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 16707 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass); 16708 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 16709 16710 // Save the current FFLAGS. 16711 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags); 16712 16713 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg) 16714 .addReg(Src1Reg) 16715 .addReg(Src2Reg); 16716 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 16717 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 16718 16719 // Restore the FFLAGS. 16720 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) 16721 .addReg(SavedFFlags, RegState::Kill); 16722 16723 // Issue a dummy FEQ opcode to raise exception for signaling NaNs. 16724 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0) 16725 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill())) 16726 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill())); 16727 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 16728 MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept); 16729 16730 // Erase the pseudoinstruction. 16731 MI.eraseFromParent(); 16732 return BB; 16733 } 16734 16735 static MachineBasicBlock * 16736 EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, 16737 MachineBasicBlock *ThisMBB, 16738 const RISCVSubtarget &Subtarget) { 16739 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5) 16740 // Without this, custom-inserter would have generated: 16741 // 16742 // A 16743 // | \ 16744 // | B 16745 // | / 16746 // C 16747 // | \ 16748 // | D 16749 // | / 16750 // E 16751 // 16752 // A: X = ...; Y = ... 16753 // B: empty 16754 // C: Z = PHI [X, A], [Y, B] 16755 // D: empty 16756 // E: PHI [X, C], [Z, D] 16757 // 16758 // If we lower both Select_FPRX_ in a single step, we can instead generate: 16759 // 16760 // A 16761 // | \ 16762 // | C 16763 // | /| 16764 // |/ | 16765 // | | 16766 // | D 16767 // | / 16768 // E 16769 // 16770 // A: X = ...; Y = ... 16771 // D: empty 16772 // E: PHI [X, A], [X, C], [Y, D] 16773 16774 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 16775 const DebugLoc &DL = First.getDebugLoc(); 16776 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); 16777 MachineFunction *F = ThisMBB->getParent(); 16778 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB); 16779 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB); 16780 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 16781 MachineFunction::iterator It = ++ThisMBB->getIterator(); 16782 F->insert(It, FirstMBB); 16783 F->insert(It, SecondMBB); 16784 F->insert(It, SinkMBB); 16785 16786 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. 16787 SinkMBB->splice(SinkMBB->begin(), ThisMBB, 16788 std::next(MachineBasicBlock::iterator(First)), 16789 ThisMBB->end()); 16790 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); 16791 16792 // Fallthrough block for ThisMBB. 16793 ThisMBB->addSuccessor(FirstMBB); 16794 // Fallthrough block for FirstMBB. 16795 FirstMBB->addSuccessor(SecondMBB); 16796 ThisMBB->addSuccessor(SinkMBB); 16797 FirstMBB->addSuccessor(SinkMBB); 16798 // This is fallthrough. 16799 SecondMBB->addSuccessor(SinkMBB); 16800 16801 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm()); 16802 Register FLHS = First.getOperand(1).getReg(); 16803 Register FRHS = First.getOperand(2).getReg(); 16804 // Insert appropriate branch. 16805 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC)) 16806 .addReg(FLHS) 16807 .addReg(FRHS) 16808 .addMBB(SinkMBB); 16809 16810 Register SLHS = Second.getOperand(1).getReg(); 16811 Register SRHS = Second.getOperand(2).getReg(); 16812 Register Op1Reg4 = First.getOperand(4).getReg(); 16813 Register Op1Reg5 = First.getOperand(5).getReg(); 16814 16815 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm()); 16816 // Insert appropriate branch. 16817 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC)) 16818 .addReg(SLHS) 16819 .addReg(SRHS) 16820 .addMBB(SinkMBB); 16821 16822 Register DestReg = Second.getOperand(0).getReg(); 16823 Register Op2Reg4 = Second.getOperand(4).getReg(); 16824 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg) 16825 .addReg(Op2Reg4) 16826 .addMBB(ThisMBB) 16827 .addReg(Op1Reg4) 16828 .addMBB(FirstMBB) 16829 .addReg(Op1Reg5) 16830 .addMBB(SecondMBB); 16831 16832 // Now remove the Select_FPRX_s. 16833 First.eraseFromParent(); 16834 Second.eraseFromParent(); 16835 return SinkMBB; 16836 } 16837 16838 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 16839 MachineBasicBlock *BB, 16840 const RISCVSubtarget &Subtarget) { 16841 // To "insert" Select_* instructions, we actually have to insert the triangle 16842 // control-flow pattern. The incoming instructions know the destination vreg 16843 // to set, the condition code register to branch on, the true/false values to 16844 // select between, and the condcode to use to select the appropriate branch. 16845 // 16846 // We produce the following control flow: 16847 // HeadMBB 16848 // | \ 16849 // | IfFalseMBB 16850 // | / 16851 // TailMBB 16852 // 16853 // When we find a sequence of selects we attempt to optimize their emission 16854 // by sharing the control flow. Currently we only handle cases where we have 16855 // multiple selects with the exact same condition (same LHS, RHS and CC). 16856 // The selects may be interleaved with other instructions if the other 16857 // instructions meet some requirements we deem safe: 16858 // - They are not pseudo instructions. 16859 // - They are debug instructions. Otherwise, 16860 // - They do not have side-effects, do not access memory and their inputs do 16861 // not depend on the results of the select pseudo-instructions. 16862 // The TrueV/FalseV operands of the selects cannot depend on the result of 16863 // previous selects in the sequence. 16864 // These conditions could be further relaxed. See the X86 target for a 16865 // related approach and more information. 16866 // 16867 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)) 16868 // is checked here and handled by a separate function - 16869 // EmitLoweredCascadedSelect. 16870 Register LHS = MI.getOperand(1).getReg(); 16871 Register RHS = MI.getOperand(2).getReg(); 16872 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 16873 16874 SmallVector<MachineInstr *, 4> SelectDebugValues; 16875 SmallSet<Register, 4> SelectDests; 16876 SelectDests.insert(MI.getOperand(0).getReg()); 16877 16878 MachineInstr *LastSelectPseudo = &MI; 16879 auto Next = next_nodbg(MI.getIterator(), BB->instr_end()); 16880 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() && 16881 Next->getOpcode() == MI.getOpcode() && 16882 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() && 16883 Next->getOperand(5).isKill()) { 16884 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget); 16885 } 16886 16887 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 16888 SequenceMBBI != E; ++SequenceMBBI) { 16889 if (SequenceMBBI->isDebugInstr()) 16890 continue; 16891 if (isSelectPseudo(*SequenceMBBI)) { 16892 if (SequenceMBBI->getOperand(1).getReg() != LHS || 16893 SequenceMBBI->getOperand(2).getReg() != RHS || 16894 SequenceMBBI->getOperand(3).getImm() != CC || 16895 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 16896 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 16897 break; 16898 LastSelectPseudo = &*SequenceMBBI; 16899 SequenceMBBI->collectDebugValues(SelectDebugValues); 16900 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 16901 continue; 16902 } 16903 if (SequenceMBBI->hasUnmodeledSideEffects() || 16904 SequenceMBBI->mayLoadOrStore() || 16905 SequenceMBBI->usesCustomInsertionHook()) 16906 break; 16907 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 16908 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 16909 })) 16910 break; 16911 } 16912 16913 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 16914 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 16915 DebugLoc DL = MI.getDebugLoc(); 16916 MachineFunction::iterator I = ++BB->getIterator(); 16917 16918 MachineBasicBlock *HeadMBB = BB; 16919 MachineFunction *F = BB->getParent(); 16920 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 16921 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 16922 16923 F->insert(I, IfFalseMBB); 16924 F->insert(I, TailMBB); 16925 16926 // Transfer debug instructions associated with the selects to TailMBB. 16927 for (MachineInstr *DebugInstr : SelectDebugValues) { 16928 TailMBB->push_back(DebugInstr->removeFromParent()); 16929 } 16930 16931 // Move all instructions after the sequence to TailMBB. 16932 TailMBB->splice(TailMBB->end(), HeadMBB, 16933 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 16934 // Update machine-CFG edges by transferring all successors of the current 16935 // block to the new block which will contain the Phi nodes for the selects. 16936 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 16937 // Set the successors for HeadMBB. 16938 HeadMBB->addSuccessor(IfFalseMBB); 16939 HeadMBB->addSuccessor(TailMBB); 16940 16941 // Insert appropriate branch. 16942 BuildMI(HeadMBB, DL, TII.getBrCond(CC)) 16943 .addReg(LHS) 16944 .addReg(RHS) 16945 .addMBB(TailMBB); 16946 16947 // IfFalseMBB just falls through to TailMBB. 16948 IfFalseMBB->addSuccessor(TailMBB); 16949 16950 // Create PHIs for all of the select pseudo-instructions. 16951 auto SelectMBBI = MI.getIterator(); 16952 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 16953 auto InsertionPoint = TailMBB->begin(); 16954 while (SelectMBBI != SelectEnd) { 16955 auto Next = std::next(SelectMBBI); 16956 if (isSelectPseudo(*SelectMBBI)) { 16957 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 16958 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 16959 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 16960 .addReg(SelectMBBI->getOperand(4).getReg()) 16961 .addMBB(HeadMBB) 16962 .addReg(SelectMBBI->getOperand(5).getReg()) 16963 .addMBB(IfFalseMBB); 16964 SelectMBBI->eraseFromParent(); 16965 } 16966 SelectMBBI = Next; 16967 } 16968 16969 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 16970 return TailMBB; 16971 } 16972 16973 static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, 16974 MachineBasicBlock *BB, 16975 unsigned CVTXOpc, 16976 unsigned CVTFOpc) { 16977 DebugLoc DL = MI.getDebugLoc(); 16978 16979 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 16980 16981 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 16982 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass); 16983 16984 // Save the old value of FFLAGS. 16985 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS); 16986 16987 assert(MI.getNumOperands() == 7); 16988 16989 // Emit a VFCVT_X_F 16990 const TargetRegisterInfo *TRI = 16991 BB->getParent()->getSubtarget().getRegisterInfo(); 16992 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI); 16993 Register Tmp = MRI.createVirtualRegister(RC); 16994 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp) 16995 .add(MI.getOperand(1)) 16996 .add(MI.getOperand(2)) 16997 .add(MI.getOperand(3)) 16998 .add(MachineOperand::CreateImm(7)) // frm = DYN 16999 .add(MI.getOperand(4)) 17000 .add(MI.getOperand(5)) 17001 .add(MI.getOperand(6)) 17002 .add(MachineOperand::CreateReg(RISCV::FRM, 17003 /*IsDef*/ false, 17004 /*IsImp*/ true)); 17005 17006 // Emit a VFCVT_F_X 17007 BuildMI(*BB, MI, DL, TII.get(CVTFOpc)) 17008 .add(MI.getOperand(0)) 17009 .add(MI.getOperand(1)) 17010 .addReg(Tmp) 17011 .add(MI.getOperand(3)) 17012 .add(MachineOperand::CreateImm(7)) // frm = DYN 17013 .add(MI.getOperand(4)) 17014 .add(MI.getOperand(5)) 17015 .add(MI.getOperand(6)) 17016 .add(MachineOperand::CreateReg(RISCV::FRM, 17017 /*IsDef*/ false, 17018 /*IsImp*/ true)); 17019 17020 // Restore FFLAGS. 17021 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) 17022 .addReg(SavedFFLAGS, RegState::Kill); 17023 17024 // Erase the pseudoinstruction. 17025 MI.eraseFromParent(); 17026 return BB; 17027 } 17028 17029 static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, 17030 const RISCVSubtarget &Subtarget) { 17031 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc; 17032 const TargetRegisterClass *RC; 17033 switch (MI.getOpcode()) { 17034 default: 17035 llvm_unreachable("Unexpected opcode"); 17036 case RISCV::PseudoFROUND_H: 17037 CmpOpc = RISCV::FLT_H; 17038 F2IOpc = RISCV::FCVT_W_H; 17039 I2FOpc = RISCV::FCVT_H_W; 17040 FSGNJOpc = RISCV::FSGNJ_H; 17041 FSGNJXOpc = RISCV::FSGNJX_H; 17042 RC = &RISCV::FPR16RegClass; 17043 break; 17044 case RISCV::PseudoFROUND_H_INX: 17045 CmpOpc = RISCV::FLT_H_INX; 17046 F2IOpc = RISCV::FCVT_W_H_INX; 17047 I2FOpc = RISCV::FCVT_H_W_INX; 17048 FSGNJOpc = RISCV::FSGNJ_H_INX; 17049 FSGNJXOpc = RISCV::FSGNJX_H_INX; 17050 RC = &RISCV::GPRF16RegClass; 17051 break; 17052 case RISCV::PseudoFROUND_S: 17053 CmpOpc = RISCV::FLT_S; 17054 F2IOpc = RISCV::FCVT_W_S; 17055 I2FOpc = RISCV::FCVT_S_W; 17056 FSGNJOpc = RISCV::FSGNJ_S; 17057 FSGNJXOpc = RISCV::FSGNJX_S; 17058 RC = &RISCV::FPR32RegClass; 17059 break; 17060 case RISCV::PseudoFROUND_S_INX: 17061 CmpOpc = RISCV::FLT_S_INX; 17062 F2IOpc = RISCV::FCVT_W_S_INX; 17063 I2FOpc = RISCV::FCVT_S_W_INX; 17064 FSGNJOpc = RISCV::FSGNJ_S_INX; 17065 FSGNJXOpc = RISCV::FSGNJX_S_INX; 17066 RC = &RISCV::GPRF32RegClass; 17067 break; 17068 case RISCV::PseudoFROUND_D: 17069 assert(Subtarget.is64Bit() && "Expected 64-bit GPR."); 17070 CmpOpc = RISCV::FLT_D; 17071 F2IOpc = RISCV::FCVT_L_D; 17072 I2FOpc = RISCV::FCVT_D_L; 17073 FSGNJOpc = RISCV::FSGNJ_D; 17074 FSGNJXOpc = RISCV::FSGNJX_D; 17075 RC = &RISCV::FPR64RegClass; 17076 break; 17077 case RISCV::PseudoFROUND_D_INX: 17078 assert(Subtarget.is64Bit() && "Expected 64-bit GPR."); 17079 CmpOpc = RISCV::FLT_D_INX; 17080 F2IOpc = RISCV::FCVT_L_D_INX; 17081 I2FOpc = RISCV::FCVT_D_L_INX; 17082 FSGNJOpc = RISCV::FSGNJ_D_INX; 17083 FSGNJXOpc = RISCV::FSGNJX_D_INX; 17084 RC = &RISCV::GPRRegClass; 17085 break; 17086 } 17087 17088 const BasicBlock *BB = MBB->getBasicBlock(); 17089 DebugLoc DL = MI.getDebugLoc(); 17090 MachineFunction::iterator I = ++MBB->getIterator(); 17091 17092 MachineFunction *F = MBB->getParent(); 17093 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB); 17094 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB); 17095 17096 F->insert(I, CvtMBB); 17097 F->insert(I, DoneMBB); 17098 // Move all instructions after the sequence to DoneMBB. 17099 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI), 17100 MBB->end()); 17101 // Update machine-CFG edges by transferring all successors of the current 17102 // block to the new block which will contain the Phi nodes for the selects. 17103 DoneMBB->transferSuccessorsAndUpdatePHIs(MBB); 17104 // Set the successors for MBB. 17105 MBB->addSuccessor(CvtMBB); 17106 MBB->addSuccessor(DoneMBB); 17107 17108 Register DstReg = MI.getOperand(0).getReg(); 17109 Register SrcReg = MI.getOperand(1).getReg(); 17110 Register MaxReg = MI.getOperand(2).getReg(); 17111 int64_t FRM = MI.getOperand(3).getImm(); 17112 17113 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 17114 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 17115 17116 Register FabsReg = MRI.createVirtualRegister(RC); 17117 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg); 17118 17119 // Compare the FP value to the max value. 17120 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 17121 auto MIB = 17122 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg); 17123 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 17124 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 17125 17126 // Insert branch. 17127 BuildMI(MBB, DL, TII.get(RISCV::BEQ)) 17128 .addReg(CmpReg) 17129 .addReg(RISCV::X0) 17130 .addMBB(DoneMBB); 17131 17132 CvtMBB->addSuccessor(DoneMBB); 17133 17134 // Convert to integer. 17135 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 17136 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM); 17137 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 17138 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 17139 17140 // Convert back to FP. 17141 Register I2FReg = MRI.createVirtualRegister(RC); 17142 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM); 17143 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 17144 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 17145 17146 // Restore the sign bit. 17147 Register CvtReg = MRI.createVirtualRegister(RC); 17148 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg); 17149 17150 // Merge the results. 17151 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg) 17152 .addReg(SrcReg) 17153 .addMBB(MBB) 17154 .addReg(CvtReg) 17155 .addMBB(CvtMBB); 17156 17157 MI.eraseFromParent(); 17158 return DoneMBB; 17159 } 17160 17161 MachineBasicBlock * 17162 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 17163 MachineBasicBlock *BB) const { 17164 switch (MI.getOpcode()) { 17165 default: 17166 llvm_unreachable("Unexpected instr type to insert"); 17167 case RISCV::ReadCycleWide: 17168 assert(!Subtarget.is64Bit() && 17169 "ReadCycleWrite is only to be used on riscv32"); 17170 return emitReadCycleWidePseudo(MI, BB); 17171 case RISCV::Select_GPR_Using_CC_GPR: 17172 case RISCV::Select_FPR16_Using_CC_GPR: 17173 case RISCV::Select_FPR16INX_Using_CC_GPR: 17174 case RISCV::Select_FPR32_Using_CC_GPR: 17175 case RISCV::Select_FPR32INX_Using_CC_GPR: 17176 case RISCV::Select_FPR64_Using_CC_GPR: 17177 case RISCV::Select_FPR64INX_Using_CC_GPR: 17178 case RISCV::Select_FPR64IN32X_Using_CC_GPR: 17179 return emitSelectPseudo(MI, BB, Subtarget); 17180 case RISCV::BuildPairF64Pseudo: 17181 case RISCV::BuildPairF64Pseudo_INX: 17182 return emitBuildPairF64Pseudo(MI, BB, Subtarget); 17183 case RISCV::SplitF64Pseudo: 17184 case RISCV::SplitF64Pseudo_INX: 17185 return emitSplitF64Pseudo(MI, BB, Subtarget); 17186 case RISCV::PseudoQuietFLE_H: 17187 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget); 17188 case RISCV::PseudoQuietFLE_H_INX: 17189 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget); 17190 case RISCV::PseudoQuietFLT_H: 17191 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget); 17192 case RISCV::PseudoQuietFLT_H_INX: 17193 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget); 17194 case RISCV::PseudoQuietFLE_S: 17195 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget); 17196 case RISCV::PseudoQuietFLE_S_INX: 17197 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget); 17198 case RISCV::PseudoQuietFLT_S: 17199 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget); 17200 case RISCV::PseudoQuietFLT_S_INX: 17201 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget); 17202 case RISCV::PseudoQuietFLE_D: 17203 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget); 17204 case RISCV::PseudoQuietFLE_D_INX: 17205 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget); 17206 case RISCV::PseudoQuietFLE_D_IN32X: 17207 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X, 17208 Subtarget); 17209 case RISCV::PseudoQuietFLT_D: 17210 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget); 17211 case RISCV::PseudoQuietFLT_D_INX: 17212 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget); 17213 case RISCV::PseudoQuietFLT_D_IN32X: 17214 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X, 17215 Subtarget); 17216 17217 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: 17218 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK, 17219 RISCV::PseudoVFCVT_F_X_V_M1_MASK); 17220 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK: 17221 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK, 17222 RISCV::PseudoVFCVT_F_X_V_M2_MASK); 17223 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK: 17224 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK, 17225 RISCV::PseudoVFCVT_F_X_V_M4_MASK); 17226 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK: 17227 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK, 17228 RISCV::PseudoVFCVT_F_X_V_M8_MASK); 17229 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK: 17230 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK, 17231 RISCV::PseudoVFCVT_F_X_V_MF2_MASK); 17232 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK: 17233 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK, 17234 RISCV::PseudoVFCVT_F_X_V_MF4_MASK); 17235 case RISCV::PseudoFROUND_H: 17236 case RISCV::PseudoFROUND_H_INX: 17237 case RISCV::PseudoFROUND_S: 17238 case RISCV::PseudoFROUND_S_INX: 17239 case RISCV::PseudoFROUND_D: 17240 case RISCV::PseudoFROUND_D_INX: 17241 case RISCV::PseudoFROUND_D_IN32X: 17242 return emitFROUND(MI, BB, Subtarget); 17243 case TargetOpcode::STATEPOINT: 17244 case TargetOpcode::STACKMAP: 17245 case TargetOpcode::PATCHPOINT: 17246 if (!Subtarget.is64Bit()) 17247 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only " 17248 "supported on 64-bit targets"); 17249 return emitPatchPoint(MI, BB); 17250 } 17251 } 17252 17253 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, 17254 SDNode *Node) const { 17255 // Add FRM dependency to any instructions with dynamic rounding mode. 17256 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm); 17257 if (Idx < 0) { 17258 // Vector pseudos have FRM index indicated by TSFlags. 17259 Idx = RISCVII::getFRMOpNum(MI.getDesc()); 17260 if (Idx < 0) 17261 return; 17262 } 17263 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN) 17264 return; 17265 // If the instruction already reads FRM, don't add another read. 17266 if (MI.readsRegister(RISCV::FRM)) 17267 return; 17268 MI.addOperand( 17269 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true)); 17270 } 17271 17272 // Calling Convention Implementation. 17273 // The expectations for frontend ABI lowering vary from target to target. 17274 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 17275 // details, but this is a longer term goal. For now, we simply try to keep the 17276 // role of the frontend as simple and well-defined as possible. The rules can 17277 // be summarised as: 17278 // * Never split up large scalar arguments. We handle them here. 17279 // * If a hardfloat calling convention is being used, and the struct may be 17280 // passed in a pair of registers (fp+fp, int+fp), and both registers are 17281 // available, then pass as two separate arguments. If either the GPRs or FPRs 17282 // are exhausted, then pass according to the rule below. 17283 // * If a struct could never be passed in registers or directly in a stack 17284 // slot (as it is larger than 2*XLEN and the floating point rules don't 17285 // apply), then pass it using a pointer with the byval attribute. 17286 // * If a struct is less than 2*XLEN, then coerce to either a two-element 17287 // word-sized array or a 2*XLEN scalar (depending on alignment). 17288 // * The frontend can determine whether a struct is returned by reference or 17289 // not based on its size and fields. If it will be returned by reference, the 17290 // frontend must modify the prototype so a pointer with the sret annotation is 17291 // passed as the first argument. This is not necessary for large scalar 17292 // returns. 17293 // * Struct return values and varargs should be coerced to structs containing 17294 // register-size fields in the same situations they would be for fixed 17295 // arguments. 17296 17297 static const MCPhysReg ArgFPR16s[] = { 17298 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 17299 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 17300 }; 17301 static const MCPhysReg ArgFPR32s[] = { 17302 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 17303 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 17304 }; 17305 static const MCPhysReg ArgFPR64s[] = { 17306 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 17307 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 17308 }; 17309 // This is an interim calling convention and it may be changed in the future. 17310 static const MCPhysReg ArgVRs[] = { 17311 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 17312 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 17313 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 17314 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 17315 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 17316 RISCV::V20M2, RISCV::V22M2}; 17317 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 17318 RISCV::V20M4}; 17319 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 17320 17321 ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) { 17322 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except 17323 // the ILP32E ABI. 17324 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, 17325 RISCV::X13, RISCV::X14, RISCV::X15, 17326 RISCV::X16, RISCV::X17}; 17327 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI. 17328 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, 17329 RISCV::X13, RISCV::X14, RISCV::X15}; 17330 17331 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) 17332 return ArrayRef(ArgEGPRs); 17333 17334 return ArrayRef(ArgIGPRs); 17335 } 17336 17337 static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) { 17338 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used 17339 // for save-restore libcall, so we don't use them. 17340 static const MCPhysReg FastCCIGPRs[] = { 17341 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 17342 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 17343 RISCV::X29, RISCV::X30, RISCV::X31}; 17344 17345 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E. 17346 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, 17347 RISCV::X13, RISCV::X14, RISCV::X15, 17348 RISCV::X7}; 17349 17350 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) 17351 return ArrayRef(FastCCEGPRs); 17352 17353 return ArrayRef(FastCCIGPRs); 17354 } 17355 17356 // Pass a 2*XLEN argument that has been split into two XLEN values through 17357 // registers or the stack as necessary. 17358 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 17359 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 17360 MVT ValVT2, MVT LocVT2, 17361 ISD::ArgFlagsTy ArgFlags2, bool EABI) { 17362 unsigned XLenInBytes = XLen / 8; 17363 const RISCVSubtarget &STI = 17364 State.getMachineFunction().getSubtarget<RISCVSubtarget>(); 17365 ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(STI.getTargetABI()); 17366 17367 if (Register Reg = State.AllocateReg(ArgGPRs)) { 17368 // At least one half can be passed via register. 17369 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 17370 VA1.getLocVT(), CCValAssign::Full)); 17371 } else { 17372 // Both halves must be passed on the stack, with proper alignment. 17373 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte 17374 // alignment. This behavior may be changed when RV32E/ILP32E is ratified. 17375 Align StackAlign(XLenInBytes); 17376 if (!EABI || XLen != 32) 17377 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign()); 17378 State.addLoc( 17379 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 17380 State.AllocateStack(XLenInBytes, StackAlign), 17381 VA1.getLocVT(), CCValAssign::Full)); 17382 State.addLoc(CCValAssign::getMem( 17383 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 17384 LocVT2, CCValAssign::Full)); 17385 return false; 17386 } 17387 17388 if (Register Reg = State.AllocateReg(ArgGPRs)) { 17389 // The second half can also be passed via register. 17390 State.addLoc( 17391 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 17392 } else { 17393 // The second half is passed via the stack, without additional alignment. 17394 State.addLoc(CCValAssign::getMem( 17395 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 17396 LocVT2, CCValAssign::Full)); 17397 } 17398 17399 return false; 17400 } 17401 17402 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo, 17403 std::optional<unsigned> FirstMaskArgument, 17404 CCState &State, const RISCVTargetLowering &TLI) { 17405 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 17406 if (RC == &RISCV::VRRegClass) { 17407 // Assign the first mask argument to V0. 17408 // This is an interim calling convention and it may be changed in the 17409 // future. 17410 if (FirstMaskArgument && ValNo == *FirstMaskArgument) 17411 return State.AllocateReg(RISCV::V0); 17412 return State.AllocateReg(ArgVRs); 17413 } 17414 if (RC == &RISCV::VRM2RegClass) 17415 return State.AllocateReg(ArgVRM2s); 17416 if (RC == &RISCV::VRM4RegClass) 17417 return State.AllocateReg(ArgVRM4s); 17418 if (RC == &RISCV::VRM8RegClass) 17419 return State.AllocateReg(ArgVRM8s); 17420 llvm_unreachable("Unhandled register class for ValueType"); 17421 } 17422 17423 // Implements the RISC-V calling convention. Returns true upon failure. 17424 bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 17425 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 17426 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 17427 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 17428 std::optional<unsigned> FirstMaskArgument) { 17429 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 17430 assert(XLen == 32 || XLen == 64); 17431 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 17432 17433 // Static chain parameter must not be passed in normal argument registers, 17434 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain 17435 if (ArgFlags.isNest()) { 17436 if (unsigned Reg = State.AllocateReg(RISCV::X7)) { 17437 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17438 return false; 17439 } 17440 } 17441 17442 // Any return value split in to more than two values can't be returned 17443 // directly. Vectors are returned via the available vector registers. 17444 if (!LocVT.isVector() && IsRet && ValNo > 1) 17445 return true; 17446 17447 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 17448 // variadic argument, or if no F16/F32 argument registers are available. 17449 bool UseGPRForF16_F32 = true; 17450 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 17451 // variadic argument, or if no F64 argument registers are available. 17452 bool UseGPRForF64 = true; 17453 17454 switch (ABI) { 17455 default: 17456 llvm_unreachable("Unexpected ABI"); 17457 case RISCVABI::ABI_ILP32: 17458 case RISCVABI::ABI_ILP32E: 17459 case RISCVABI::ABI_LP64: 17460 case RISCVABI::ABI_LP64E: 17461 break; 17462 case RISCVABI::ABI_ILP32F: 17463 case RISCVABI::ABI_LP64F: 17464 UseGPRForF16_F32 = !IsFixed; 17465 break; 17466 case RISCVABI::ABI_ILP32D: 17467 case RISCVABI::ABI_LP64D: 17468 UseGPRForF16_F32 = !IsFixed; 17469 UseGPRForF64 = !IsFixed; 17470 break; 17471 } 17472 17473 // FPR16, FPR32, and FPR64 alias each other. 17474 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) { 17475 UseGPRForF16_F32 = true; 17476 UseGPRForF64 = true; 17477 } 17478 17479 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 17480 // similar local variables rather than directly checking against the target 17481 // ABI. 17482 17483 if (UseGPRForF16_F32 && 17484 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) { 17485 LocVT = XLenVT; 17486 LocInfo = CCValAssign::BCvt; 17487 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 17488 LocVT = MVT::i64; 17489 LocInfo = CCValAssign::BCvt; 17490 } 17491 17492 ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI); 17493 17494 // If this is a variadic argument, the RISC-V calling convention requires 17495 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 17496 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 17497 // be used regardless of whether the original argument was split during 17498 // legalisation or not. The argument will not be passed by registers if the 17499 // original type is larger than 2*XLEN, so the register alignment rule does 17500 // not apply. 17501 // TODO: To be compatible with GCC's behaviors, we don't align registers 17502 // currently if we are using ILP32E calling convention. This behavior may be 17503 // changed when RV32E/ILP32E is ratified. 17504 unsigned TwoXLenInBytes = (2 * XLen) / 8; 17505 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 17506 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes && 17507 ABI != RISCVABI::ABI_ILP32E) { 17508 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 17509 // Skip 'odd' register if necessary. 17510 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) 17511 State.AllocateReg(ArgGPRs); 17512 } 17513 17514 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 17515 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 17516 State.getPendingArgFlags(); 17517 17518 assert(PendingLocs.size() == PendingArgFlags.size() && 17519 "PendingLocs and PendingArgFlags out of sync"); 17520 17521 // Handle passing f64 on RV32D with a soft float ABI or when floating point 17522 // registers are exhausted. 17523 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 17524 assert(PendingLocs.empty() && "Can't lower f64 if it is split"); 17525 // Depending on available argument GPRS, f64 may be passed in a pair of 17526 // GPRs, split between a GPR and the stack, or passed completely on the 17527 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 17528 // cases. 17529 Register Reg = State.AllocateReg(ArgGPRs); 17530 if (!Reg) { 17531 unsigned StackOffset = State.AllocateStack(8, Align(8)); 17532 State.addLoc( 17533 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 17534 return false; 17535 } 17536 LocVT = MVT::i32; 17537 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17538 Register HiReg = State.AllocateReg(ArgGPRs); 17539 if (HiReg) { 17540 State.addLoc( 17541 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo)); 17542 } else { 17543 unsigned StackOffset = State.AllocateStack(4, Align(4)); 17544 State.addLoc( 17545 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 17546 } 17547 return false; 17548 } 17549 17550 // Fixed-length vectors are located in the corresponding scalable-vector 17551 // container types. 17552 if (ValVT.isFixedLengthVector()) 17553 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 17554 17555 // Split arguments might be passed indirectly, so keep track of the pending 17556 // values. Split vectors are passed via a mix of registers and indirectly, so 17557 // treat them as we would any other argument. 17558 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 17559 LocVT = XLenVT; 17560 LocInfo = CCValAssign::Indirect; 17561 PendingLocs.push_back( 17562 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 17563 PendingArgFlags.push_back(ArgFlags); 17564 if (!ArgFlags.isSplitEnd()) { 17565 return false; 17566 } 17567 } 17568 17569 // If the split argument only had two elements, it should be passed directly 17570 // in registers or on the stack. 17571 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && 17572 PendingLocs.size() <= 2) { 17573 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 17574 // Apply the normal calling convention rules to the first half of the 17575 // split argument. 17576 CCValAssign VA = PendingLocs[0]; 17577 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 17578 PendingLocs.clear(); 17579 PendingArgFlags.clear(); 17580 return CC_RISCVAssign2XLen( 17581 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags, 17582 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E); 17583 } 17584 17585 // Allocate to a register if possible, or else a stack slot. 17586 Register Reg; 17587 unsigned StoreSizeBytes = XLen / 8; 17588 Align StackAlign = Align(XLen / 8); 17589 17590 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32) 17591 Reg = State.AllocateReg(ArgFPR16s); 17592 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 17593 Reg = State.AllocateReg(ArgFPR32s); 17594 else if (ValVT == MVT::f64 && !UseGPRForF64) 17595 Reg = State.AllocateReg(ArgFPR64s); 17596 else if (ValVT.isVector()) { 17597 Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI); 17598 if (!Reg) { 17599 // For return values, the vector must be passed fully via registers or 17600 // via the stack. 17601 // FIXME: The proposed vector ABI only mandates v8-v15 for return values, 17602 // but we're using all of them. 17603 if (IsRet) 17604 return true; 17605 // Try using a GPR to pass the address 17606 if ((Reg = State.AllocateReg(ArgGPRs))) { 17607 LocVT = XLenVT; 17608 LocInfo = CCValAssign::Indirect; 17609 } else if (ValVT.isScalableVector()) { 17610 LocVT = XLenVT; 17611 LocInfo = CCValAssign::Indirect; 17612 } else { 17613 // Pass fixed-length vectors on the stack. 17614 LocVT = ValVT; 17615 StoreSizeBytes = ValVT.getStoreSize(); 17616 // Align vectors to their element sizes, being careful for vXi1 17617 // vectors. 17618 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); 17619 } 17620 } 17621 } else { 17622 Reg = State.AllocateReg(ArgGPRs); 17623 } 17624 17625 unsigned StackOffset = 17626 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); 17627 17628 // If we reach this point and PendingLocs is non-empty, we must be at the 17629 // end of a split argument that must be passed indirectly. 17630 if (!PendingLocs.empty()) { 17631 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 17632 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 17633 17634 for (auto &It : PendingLocs) { 17635 if (Reg) 17636 It.convertToReg(Reg); 17637 else 17638 It.convertToMem(StackOffset); 17639 State.addLoc(It); 17640 } 17641 PendingLocs.clear(); 17642 PendingArgFlags.clear(); 17643 return false; 17644 } 17645 17646 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 17647 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) && 17648 "Expected an XLenVT or vector types at this stage"); 17649 17650 if (Reg) { 17651 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17652 return false; 17653 } 17654 17655 // When a scalar floating-point value is passed on the stack, no 17656 // bit-conversion is needed. 17657 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) { 17658 assert(!ValVT.isVector()); 17659 LocVT = ValVT; 17660 LocInfo = CCValAssign::Full; 17661 } 17662 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 17663 return false; 17664 } 17665 17666 template <typename ArgTy> 17667 static std::optional<unsigned> preAssignMask(const ArgTy &Args) { 17668 for (const auto &ArgIdx : enumerate(Args)) { 17669 MVT ArgVT = ArgIdx.value().VT; 17670 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) 17671 return ArgIdx.index(); 17672 } 17673 return std::nullopt; 17674 } 17675 17676 void RISCVTargetLowering::analyzeInputArgs( 17677 MachineFunction &MF, CCState &CCInfo, 17678 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 17679 RISCVCCAssignFn Fn) const { 17680 unsigned NumArgs = Ins.size(); 17681 FunctionType *FType = MF.getFunction().getFunctionType(); 17682 17683 std::optional<unsigned> FirstMaskArgument; 17684 if (Subtarget.hasVInstructions()) 17685 FirstMaskArgument = preAssignMask(Ins); 17686 17687 for (unsigned i = 0; i != NumArgs; ++i) { 17688 MVT ArgVT = Ins[i].VT; 17689 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 17690 17691 Type *ArgTy = nullptr; 17692 if (IsRet) 17693 ArgTy = FType->getReturnType(); 17694 else if (Ins[i].isOrigArg()) 17695 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 17696 17697 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 17698 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 17699 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 17700 FirstMaskArgument)) { 17701 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 17702 << ArgVT << '\n'); 17703 llvm_unreachable(nullptr); 17704 } 17705 } 17706 } 17707 17708 void RISCVTargetLowering::analyzeOutputArgs( 17709 MachineFunction &MF, CCState &CCInfo, 17710 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 17711 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const { 17712 unsigned NumArgs = Outs.size(); 17713 17714 std::optional<unsigned> FirstMaskArgument; 17715 if (Subtarget.hasVInstructions()) 17716 FirstMaskArgument = preAssignMask(Outs); 17717 17718 for (unsigned i = 0; i != NumArgs; i++) { 17719 MVT ArgVT = Outs[i].VT; 17720 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 17721 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 17722 17723 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 17724 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 17725 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 17726 FirstMaskArgument)) { 17727 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 17728 << ArgVT << "\n"); 17729 llvm_unreachable(nullptr); 17730 } 17731 } 17732 } 17733 17734 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 17735 // values. 17736 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 17737 const CCValAssign &VA, const SDLoc &DL, 17738 const RISCVSubtarget &Subtarget) { 17739 switch (VA.getLocInfo()) { 17740 default: 17741 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 17742 case CCValAssign::Full: 17743 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) 17744 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget); 17745 break; 17746 case CCValAssign::BCvt: 17747 if (VA.getLocVT().isInteger() && 17748 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { 17749 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val); 17750 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { 17751 if (RV64LegalI32) { 17752 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val); 17753 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); 17754 } else { 17755 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 17756 } 17757 } else { 17758 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 17759 } 17760 break; 17761 } 17762 return Val; 17763 } 17764 17765 // The caller is responsible for loading the full value if the argument is 17766 // passed with CCValAssign::Indirect. 17767 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 17768 const CCValAssign &VA, const SDLoc &DL, 17769 const ISD::InputArg &In, 17770 const RISCVTargetLowering &TLI) { 17771 MachineFunction &MF = DAG.getMachineFunction(); 17772 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 17773 EVT LocVT = VA.getLocVT(); 17774 SDValue Val; 17775 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 17776 Register VReg = RegInfo.createVirtualRegister(RC); 17777 RegInfo.addLiveIn(VA.getLocReg(), VReg); 17778 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 17779 17780 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass. 17781 if (In.isOrigArg()) { 17782 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex()); 17783 if (OrigArg->getType()->isIntegerTy()) { 17784 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); 17785 // An input zero extended from i31 can also be considered sign extended. 17786 if ((BitWidth <= 32 && In.Flags.isSExt()) || 17787 (BitWidth < 32 && In.Flags.isZExt())) { 17788 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 17789 RVFI->addSExt32Register(VReg); 17790 } 17791 } 17792 } 17793 17794 if (VA.getLocInfo() == CCValAssign::Indirect) 17795 return Val; 17796 17797 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget()); 17798 } 17799 17800 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 17801 const CCValAssign &VA, const SDLoc &DL, 17802 const RISCVSubtarget &Subtarget) { 17803 EVT LocVT = VA.getLocVT(); 17804 17805 switch (VA.getLocInfo()) { 17806 default: 17807 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 17808 case CCValAssign::Full: 17809 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) 17810 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget); 17811 break; 17812 case CCValAssign::BCvt: 17813 if (LocVT.isInteger() && 17814 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { 17815 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val); 17816 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) { 17817 if (RV64LegalI32) { 17818 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); 17819 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val); 17820 } else { 17821 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 17822 } 17823 } else { 17824 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 17825 } 17826 break; 17827 } 17828 return Val; 17829 } 17830 17831 // The caller is responsible for loading the full value if the argument is 17832 // passed with CCValAssign::Indirect. 17833 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 17834 const CCValAssign &VA, const SDLoc &DL) { 17835 MachineFunction &MF = DAG.getMachineFunction(); 17836 MachineFrameInfo &MFI = MF.getFrameInfo(); 17837 EVT LocVT = VA.getLocVT(); 17838 EVT ValVT = VA.getValVT(); 17839 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 17840 if (ValVT.isScalableVector()) { 17841 // When the value is a scalable vector, we save the pointer which points to 17842 // the scalable vector value in the stack. The ValVT will be the pointer 17843 // type, instead of the scalable vector type. 17844 ValVT = LocVT; 17845 } 17846 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), 17847 /*IsImmutable=*/true); 17848 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 17849 SDValue Val; 17850 17851 ISD::LoadExtType ExtType; 17852 switch (VA.getLocInfo()) { 17853 default: 17854 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 17855 case CCValAssign::Full: 17856 case CCValAssign::Indirect: 17857 case CCValAssign::BCvt: 17858 ExtType = ISD::NON_EXTLOAD; 17859 break; 17860 } 17861 Val = DAG.getExtLoad( 17862 ExtType, DL, LocVT, Chain, FIN, 17863 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 17864 return Val; 17865 } 17866 17867 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 17868 const CCValAssign &VA, 17869 const CCValAssign &HiVA, 17870 const SDLoc &DL) { 17871 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 17872 "Unexpected VA"); 17873 MachineFunction &MF = DAG.getMachineFunction(); 17874 MachineFrameInfo &MFI = MF.getFrameInfo(); 17875 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 17876 17877 assert(VA.isRegLoc() && "Expected register VA assignment"); 17878 17879 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 17880 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 17881 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 17882 SDValue Hi; 17883 if (HiVA.isMemLoc()) { 17884 // Second half of f64 is passed on the stack. 17885 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(), 17886 /*IsImmutable=*/true); 17887 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 17888 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 17889 MachinePointerInfo::getFixedStack(MF, FI)); 17890 } else { 17891 // Second half of f64 is passed in another GPR. 17892 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 17893 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg); 17894 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 17895 } 17896 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 17897 } 17898 17899 // FastCC has less than 1% performance improvement for some particular 17900 // benchmark. But theoretically, it may has benenfit for some cases. 17901 bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, 17902 unsigned ValNo, MVT ValVT, MVT LocVT, 17903 CCValAssign::LocInfo LocInfo, 17904 ISD::ArgFlagsTy ArgFlags, CCState &State, 17905 bool IsFixed, bool IsRet, Type *OrigTy, 17906 const RISCVTargetLowering &TLI, 17907 std::optional<unsigned> FirstMaskArgument) { 17908 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 17909 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { 17910 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17911 return false; 17912 } 17913 } 17914 17915 const RISCVSubtarget &Subtarget = TLI.getSubtarget(); 17916 17917 if (LocVT == MVT::f16 && 17918 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) { 17919 static const MCPhysReg FPR16List[] = { 17920 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 17921 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 17922 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 17923 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 17924 if (unsigned Reg = State.AllocateReg(FPR16List)) { 17925 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17926 return false; 17927 } 17928 } 17929 17930 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { 17931 static const MCPhysReg FPR32List[] = { 17932 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 17933 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 17934 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 17935 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 17936 if (unsigned Reg = State.AllocateReg(FPR32List)) { 17937 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17938 return false; 17939 } 17940 } 17941 17942 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { 17943 static const MCPhysReg FPR64List[] = { 17944 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 17945 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 17946 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 17947 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 17948 if (unsigned Reg = State.AllocateReg(FPR64List)) { 17949 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17950 return false; 17951 } 17952 } 17953 17954 // Check if there is an available GPR before hitting the stack. 17955 if ((LocVT == MVT::f16 && 17956 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) || 17957 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || 17958 (LocVT == MVT::f64 && Subtarget.is64Bit() && 17959 Subtarget.hasStdExtZdinx())) { 17960 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { 17961 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17962 return false; 17963 } 17964 } 17965 17966 if (LocVT == MVT::f16) { 17967 unsigned Offset2 = State.AllocateStack(2, Align(2)); 17968 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo)); 17969 return false; 17970 } 17971 17972 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 17973 unsigned Offset4 = State.AllocateStack(4, Align(4)); 17974 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 17975 return false; 17976 } 17977 17978 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 17979 unsigned Offset5 = State.AllocateStack(8, Align(8)); 17980 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 17981 return false; 17982 } 17983 17984 if (LocVT.isVector()) { 17985 if (unsigned Reg = 17986 allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) { 17987 // Fixed-length vectors are located in the corresponding scalable-vector 17988 // container types. 17989 if (ValVT.isFixedLengthVector()) 17990 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 17991 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17992 } else { 17993 // Try and pass the address via a "fast" GPR. 17994 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) { 17995 LocInfo = CCValAssign::Indirect; 17996 LocVT = TLI.getSubtarget().getXLenVT(); 17997 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo)); 17998 } else if (ValVT.isFixedLengthVector()) { 17999 auto StackAlign = 18000 MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); 18001 unsigned StackOffset = 18002 State.AllocateStack(ValVT.getStoreSize(), StackAlign); 18003 State.addLoc( 18004 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 18005 } else { 18006 // Can't pass scalable vectors on the stack. 18007 return true; 18008 } 18009 } 18010 18011 return false; 18012 } 18013 18014 return true; // CC didn't match. 18015 } 18016 18017 bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 18018 CCValAssign::LocInfo LocInfo, 18019 ISD::ArgFlagsTy ArgFlags, CCState &State) { 18020 if (ArgFlags.isNest()) { 18021 report_fatal_error( 18022 "Attribute 'nest' is not supported in GHC calling convention"); 18023 } 18024 18025 static const MCPhysReg GPRList[] = { 18026 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 18027 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 18028 18029 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 18030 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 18031 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 18032 if (unsigned Reg = State.AllocateReg(GPRList)) { 18033 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 18034 return false; 18035 } 18036 } 18037 18038 const RISCVSubtarget &Subtarget = 18039 State.getMachineFunction().getSubtarget<RISCVSubtarget>(); 18040 18041 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { 18042 // Pass in STG registers: F1, ..., F6 18043 // fs0 ... fs5 18044 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 18045 RISCV::F18_F, RISCV::F19_F, 18046 RISCV::F20_F, RISCV::F21_F}; 18047 if (unsigned Reg = State.AllocateReg(FPR32List)) { 18048 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 18049 return false; 18050 } 18051 } 18052 18053 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { 18054 // Pass in STG registers: D1, ..., D6 18055 // fs6 ... fs11 18056 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 18057 RISCV::F24_D, RISCV::F25_D, 18058 RISCV::F26_D, RISCV::F27_D}; 18059 if (unsigned Reg = State.AllocateReg(FPR64List)) { 18060 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 18061 return false; 18062 } 18063 } 18064 18065 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || 18066 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && 18067 Subtarget.is64Bit())) { 18068 if (unsigned Reg = State.AllocateReg(GPRList)) { 18069 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 18070 return false; 18071 } 18072 } 18073 18074 report_fatal_error("No registers left in GHC calling convention"); 18075 return true; 18076 } 18077 18078 // Transform physical registers into virtual registers. 18079 SDValue RISCVTargetLowering::LowerFormalArguments( 18080 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 18081 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 18082 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 18083 18084 MachineFunction &MF = DAG.getMachineFunction(); 18085 18086 switch (CallConv) { 18087 default: 18088 report_fatal_error("Unsupported calling convention"); 18089 case CallingConv::C: 18090 case CallingConv::Fast: 18091 case CallingConv::SPIR_KERNEL: 18092 case CallingConv::GRAAL: 18093 break; 18094 case CallingConv::GHC: 18095 if (Subtarget.isRVE()) 18096 report_fatal_error("GHC calling convention is not supported on RVE!"); 18097 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx()) 18098 report_fatal_error("GHC calling convention requires the (Zfinx/F) and " 18099 "(Zdinx/D) instruction set extensions"); 18100 } 18101 18102 const Function &Func = MF.getFunction(); 18103 if (Func.hasFnAttribute("interrupt")) { 18104 if (!Func.arg_empty()) 18105 report_fatal_error( 18106 "Functions with the interrupt attribute cannot have arguments!"); 18107 18108 StringRef Kind = 18109 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 18110 18111 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 18112 report_fatal_error( 18113 "Function interrupt attribute argument not supported!"); 18114 } 18115 18116 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 18117 MVT XLenVT = Subtarget.getXLenVT(); 18118 unsigned XLenInBytes = Subtarget.getXLen() / 8; 18119 // Used with vargs to acumulate store chains. 18120 std::vector<SDValue> OutChains; 18121 18122 // Assign locations to all of the incoming arguments. 18123 SmallVector<CCValAssign, 16> ArgLocs; 18124 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 18125 18126 if (CallConv == CallingConv::GHC) 18127 CCInfo.AnalyzeFormalArguments(Ins, RISCV::CC_RISCV_GHC); 18128 else 18129 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, 18130 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC 18131 : RISCV::CC_RISCV); 18132 18133 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) { 18134 CCValAssign &VA = ArgLocs[i]; 18135 SDValue ArgValue; 18136 // Passing f64 on RV32D with a soft float ABI must be handled as a special 18137 // case. 18138 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 18139 assert(VA.needsCustom()); 18140 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL); 18141 } else if (VA.isRegLoc()) 18142 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this); 18143 else 18144 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 18145 18146 if (VA.getLocInfo() == CCValAssign::Indirect) { 18147 // If the original argument was split and passed by reference (e.g. i128 18148 // on RV32), we need to load all parts of it here (using the same 18149 // address). Vectors may be partly split to registers and partly to the 18150 // stack, in which case the base address is partly offset and subsequent 18151 // stores are relative to that. 18152 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 18153 MachinePointerInfo())); 18154 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex; 18155 unsigned ArgPartOffset = Ins[InsIdx].PartOffset; 18156 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 18157 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) { 18158 CCValAssign &PartVA = ArgLocs[i + 1]; 18159 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset; 18160 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 18161 if (PartVA.getValVT().isScalableVector()) 18162 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); 18163 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); 18164 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 18165 MachinePointerInfo())); 18166 ++i; 18167 ++InsIdx; 18168 } 18169 continue; 18170 } 18171 InVals.push_back(ArgValue); 18172 } 18173 18174 if (any_of(ArgLocs, 18175 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) 18176 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); 18177 18178 if (IsVarArg) { 18179 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI()); 18180 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 18181 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 18182 MachineFrameInfo &MFI = MF.getFrameInfo(); 18183 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 18184 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 18185 18186 // Size of the vararg save area. For now, the varargs save area is either 18187 // zero or large enough to hold a0-a7. 18188 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 18189 int FI; 18190 18191 // If all registers are allocated, then all varargs must be passed on the 18192 // stack and we don't need to save any argregs. 18193 if (VarArgsSaveSize == 0) { 18194 int VaArgOffset = CCInfo.getStackSize(); 18195 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 18196 } else { 18197 int VaArgOffset = -VarArgsSaveSize; 18198 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true); 18199 18200 // If saving an odd number of registers then create an extra stack slot to 18201 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 18202 // offsets to even-numbered registered remain 2*XLEN-aligned. 18203 if (Idx % 2) { 18204 MFI.CreateFixedObject( 18205 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true); 18206 VarArgsSaveSize += XLenInBytes; 18207 } 18208 18209 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 18210 18211 // Copy the integer registers that may have been used for passing varargs 18212 // to the vararg save area. 18213 for (unsigned I = Idx; I < ArgRegs.size(); ++I) { 18214 const Register Reg = RegInfo.createVirtualRegister(RC); 18215 RegInfo.addLiveIn(ArgRegs[I], Reg); 18216 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 18217 SDValue Store = DAG.getStore( 18218 Chain, DL, ArgValue, FIN, 18219 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes)); 18220 OutChains.push_back(Store); 18221 FIN = 18222 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL); 18223 } 18224 } 18225 18226 // Record the frame index of the first variable argument 18227 // which is a value necessary to VASTART. 18228 RVFI->setVarArgsFrameIndex(FI); 18229 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 18230 } 18231 18232 // All stores are grouped in one node to allow the matching between 18233 // the size of Ins and InVals. This only happens for vararg functions. 18234 if (!OutChains.empty()) { 18235 OutChains.push_back(Chain); 18236 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 18237 } 18238 18239 return Chain; 18240 } 18241 18242 /// isEligibleForTailCallOptimization - Check whether the call is eligible 18243 /// for tail call optimization. 18244 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 18245 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 18246 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 18247 const SmallVector<CCValAssign, 16> &ArgLocs) const { 18248 18249 auto CalleeCC = CLI.CallConv; 18250 auto &Outs = CLI.Outs; 18251 auto &Caller = MF.getFunction(); 18252 auto CallerCC = Caller.getCallingConv(); 18253 18254 // Exception-handling functions need a special set of instructions to 18255 // indicate a return to the hardware. Tail-calling another function would 18256 // probably break this. 18257 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 18258 // should be expanded as new function attributes are introduced. 18259 if (Caller.hasFnAttribute("interrupt")) 18260 return false; 18261 18262 // Do not tail call opt if the stack is used to pass parameters. 18263 if (CCInfo.getStackSize() != 0) 18264 return false; 18265 18266 // Do not tail call opt if any parameters need to be passed indirectly. 18267 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 18268 // passed indirectly. So the address of the value will be passed in a 18269 // register, or if not available, then the address is put on the stack. In 18270 // order to pass indirectly, space on the stack often needs to be allocated 18271 // in order to store the value. In this case the CCInfo.getNextStackOffset() 18272 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 18273 // are passed CCValAssign::Indirect. 18274 for (auto &VA : ArgLocs) 18275 if (VA.getLocInfo() == CCValAssign::Indirect) 18276 return false; 18277 18278 // Do not tail call opt if either caller or callee uses struct return 18279 // semantics. 18280 auto IsCallerStructRet = Caller.hasStructRetAttr(); 18281 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 18282 if (IsCallerStructRet || IsCalleeStructRet) 18283 return false; 18284 18285 // The callee has to preserve all registers the caller needs to preserve. 18286 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 18287 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 18288 if (CalleeCC != CallerCC) { 18289 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 18290 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 18291 return false; 18292 } 18293 18294 // Byval parameters hand the function a pointer directly into the stack area 18295 // we want to reuse during a tail call. Working around this *is* possible 18296 // but less efficient and uglier in LowerCall. 18297 for (auto &Arg : Outs) 18298 if (Arg.Flags.isByVal()) 18299 return false; 18300 18301 return true; 18302 } 18303 18304 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { 18305 return DAG.getDataLayout().getPrefTypeAlign( 18306 VT.getTypeForEVT(*DAG.getContext())); 18307 } 18308 18309 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 18310 // and output parameter nodes. 18311 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 18312 SmallVectorImpl<SDValue> &InVals) const { 18313 SelectionDAG &DAG = CLI.DAG; 18314 SDLoc &DL = CLI.DL; 18315 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 18316 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 18317 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 18318 SDValue Chain = CLI.Chain; 18319 SDValue Callee = CLI.Callee; 18320 bool &IsTailCall = CLI.IsTailCall; 18321 CallingConv::ID CallConv = CLI.CallConv; 18322 bool IsVarArg = CLI.IsVarArg; 18323 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 18324 MVT XLenVT = Subtarget.getXLenVT(); 18325 18326 MachineFunction &MF = DAG.getMachineFunction(); 18327 18328 // Analyze the operands of the call, assigning locations to each operand. 18329 SmallVector<CCValAssign, 16> ArgLocs; 18330 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 18331 18332 if (CallConv == CallingConv::GHC) { 18333 if (Subtarget.isRVE()) 18334 report_fatal_error("GHC calling convention is not supported on RVE!"); 18335 ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC); 18336 } else 18337 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, 18338 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC 18339 : RISCV::CC_RISCV); 18340 18341 // Check if it's really possible to do a tail call. 18342 if (IsTailCall) 18343 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 18344 18345 if (IsTailCall) 18346 ++NumTailCalls; 18347 else if (CLI.CB && CLI.CB->isMustTailCall()) 18348 report_fatal_error("failed to perform tail call elimination on a call " 18349 "site marked musttail"); 18350 18351 // Get a count of how many bytes are to be pushed on the stack. 18352 unsigned NumBytes = ArgCCInfo.getStackSize(); 18353 18354 // Create local copies for byval args 18355 SmallVector<SDValue, 8> ByValArgs; 18356 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 18357 ISD::ArgFlagsTy Flags = Outs[i].Flags; 18358 if (!Flags.isByVal()) 18359 continue; 18360 18361 SDValue Arg = OutVals[i]; 18362 unsigned Size = Flags.getByValSize(); 18363 Align Alignment = Flags.getNonZeroByValAlign(); 18364 18365 int FI = 18366 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 18367 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 18368 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 18369 18370 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 18371 /*IsVolatile=*/false, 18372 /*AlwaysInline=*/false, IsTailCall, 18373 MachinePointerInfo(), MachinePointerInfo()); 18374 ByValArgs.push_back(FIPtr); 18375 } 18376 18377 if (!IsTailCall) 18378 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 18379 18380 // Copy argument values to their designated locations. 18381 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 18382 SmallVector<SDValue, 8> MemOpChains; 18383 SDValue StackPtr; 18384 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e; 18385 ++i, ++OutIdx) { 18386 CCValAssign &VA = ArgLocs[i]; 18387 SDValue ArgValue = OutVals[OutIdx]; 18388 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags; 18389 18390 // Handle passing f64 on RV32D with a soft float ABI as a special case. 18391 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 18392 assert(VA.isRegLoc() && "Expected register VA assignment"); 18393 assert(VA.needsCustom()); 18394 SDValue SplitF64 = DAG.getNode( 18395 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 18396 SDValue Lo = SplitF64.getValue(0); 18397 SDValue Hi = SplitF64.getValue(1); 18398 18399 Register RegLo = VA.getLocReg(); 18400 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 18401 18402 // Get the CCValAssign for the Hi part. 18403 CCValAssign &HiVA = ArgLocs[++i]; 18404 18405 if (HiVA.isMemLoc()) { 18406 // Second half of f64 is passed on the stack. 18407 if (!StackPtr.getNode()) 18408 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 18409 SDValue Address = 18410 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 18411 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL)); 18412 // Emit the store. 18413 MemOpChains.push_back( 18414 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo())); 18415 } else { 18416 // Second half of f64 is passed in another GPR. 18417 Register RegHigh = HiVA.getLocReg(); 18418 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 18419 } 18420 continue; 18421 } 18422 18423 // Promote the value if needed. 18424 // For now, only handle fully promoted and indirect arguments. 18425 if (VA.getLocInfo() == CCValAssign::Indirect) { 18426 // Store the argument in a stack slot and pass its address. 18427 Align StackAlign = 18428 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG), 18429 getPrefTypeAlign(ArgValue.getValueType(), DAG)); 18430 TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); 18431 // If the original argument was split (e.g. i128), we need 18432 // to store the required parts of it here (and pass just one address). 18433 // Vectors may be partly split to registers and partly to the stack, in 18434 // which case the base address is partly offset and subsequent stores are 18435 // relative to that. 18436 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex; 18437 unsigned ArgPartOffset = Outs[OutIdx].PartOffset; 18438 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 18439 // Calculate the total size to store. We don't have access to what we're 18440 // actually storing other than performing the loop and collecting the 18441 // info. 18442 SmallVector<std::pair<SDValue, SDValue>> Parts; 18443 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) { 18444 SDValue PartValue = OutVals[OutIdx + 1]; 18445 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset; 18446 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 18447 EVT PartVT = PartValue.getValueType(); 18448 if (PartVT.isScalableVector()) 18449 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); 18450 StoredSize += PartVT.getStoreSize(); 18451 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); 18452 Parts.push_back(std::make_pair(PartValue, Offset)); 18453 ++i; 18454 ++OutIdx; 18455 } 18456 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); 18457 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 18458 MemOpChains.push_back( 18459 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 18460 MachinePointerInfo::getFixedStack(MF, FI))); 18461 for (const auto &Part : Parts) { 18462 SDValue PartValue = Part.first; 18463 SDValue PartOffset = Part.second; 18464 SDValue Address = 18465 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); 18466 MemOpChains.push_back( 18467 DAG.getStore(Chain, DL, PartValue, Address, 18468 MachinePointerInfo::getFixedStack(MF, FI))); 18469 } 18470 ArgValue = SpillSlot; 18471 } else { 18472 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget); 18473 } 18474 18475 // Use local copy if it is a byval arg. 18476 if (Flags.isByVal()) 18477 ArgValue = ByValArgs[j++]; 18478 18479 if (VA.isRegLoc()) { 18480 // Queue up the argument copies and emit them at the end. 18481 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 18482 } else { 18483 assert(VA.isMemLoc() && "Argument not register or memory"); 18484 assert(!IsTailCall && "Tail call not allowed if stack is used " 18485 "for passing parameters"); 18486 18487 // Work out the address of the stack slot. 18488 if (!StackPtr.getNode()) 18489 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 18490 SDValue Address = 18491 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 18492 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 18493 18494 // Emit the store. 18495 MemOpChains.push_back( 18496 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 18497 } 18498 } 18499 18500 // Join the stores, which are independent of one another. 18501 if (!MemOpChains.empty()) 18502 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 18503 18504 SDValue Glue; 18505 18506 // Build a sequence of copy-to-reg nodes, chained and glued together. 18507 for (auto &Reg : RegsToPass) { 18508 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 18509 Glue = Chain.getValue(1); 18510 } 18511 18512 // Validate that none of the argument registers have been marked as 18513 // reserved, if so report an error. Do the same for the return address if this 18514 // is not a tailcall. 18515 validateCCReservedRegs(RegsToPass, MF); 18516 if (!IsTailCall && 18517 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 18518 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 18519 MF.getFunction(), 18520 "Return address register required, but has been reserved."}); 18521 18522 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 18523 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 18524 // split it and then direct call can be matched by PseudoCALL. 18525 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 18526 const GlobalValue *GV = S->getGlobal(); 18527 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL); 18528 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 18529 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL); 18530 } 18531 18532 // The first call operand is the chain and the second is the target address. 18533 SmallVector<SDValue, 8> Ops; 18534 Ops.push_back(Chain); 18535 Ops.push_back(Callee); 18536 18537 // Add argument registers to the end of the list so that they are 18538 // known live into the call. 18539 for (auto &Reg : RegsToPass) 18540 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 18541 18542 if (!IsTailCall) { 18543 // Add a register mask operand representing the call-preserved registers. 18544 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 18545 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 18546 assert(Mask && "Missing call preserved mask for calling convention"); 18547 Ops.push_back(DAG.getRegisterMask(Mask)); 18548 } 18549 18550 // Glue the call to the argument copies, if any. 18551 if (Glue.getNode()) 18552 Ops.push_back(Glue); 18553 18554 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) && 18555 "Unexpected CFI type for a direct call"); 18556 18557 // Emit the call. 18558 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 18559 18560 if (IsTailCall) { 18561 MF.getFrameInfo().setHasTailCall(); 18562 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 18563 if (CLI.CFIType) 18564 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 18565 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 18566 return Ret; 18567 } 18568 18569 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 18570 if (CLI.CFIType) 18571 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 18572 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 18573 Glue = Chain.getValue(1); 18574 18575 // Mark the end of the call, which is glued to the call itself. 18576 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 18577 Glue = Chain.getValue(1); 18578 18579 // Assign locations to each value returned by this call. 18580 SmallVector<CCValAssign, 16> RVLocs; 18581 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 18582 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV); 18583 18584 // Copy all of the result registers out of their specified physreg. 18585 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 18586 auto &VA = RVLocs[i]; 18587 // Copy the value out 18588 SDValue RetValue = 18589 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 18590 // Glue the RetValue to the end of the call sequence 18591 Chain = RetValue.getValue(1); 18592 Glue = RetValue.getValue(2); 18593 18594 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 18595 assert(VA.needsCustom()); 18596 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(), 18597 MVT::i32, Glue); 18598 Chain = RetValue2.getValue(1); 18599 Glue = RetValue2.getValue(2); 18600 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 18601 RetValue2); 18602 } 18603 18604 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget); 18605 18606 InVals.push_back(RetValue); 18607 } 18608 18609 return Chain; 18610 } 18611 18612 bool RISCVTargetLowering::CanLowerReturn( 18613 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 18614 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 18615 SmallVector<CCValAssign, 16> RVLocs; 18616 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 18617 18618 std::optional<unsigned> FirstMaskArgument; 18619 if (Subtarget.hasVInstructions()) 18620 FirstMaskArgument = preAssignMask(Outs); 18621 18622 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 18623 MVT VT = Outs[i].VT; 18624 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 18625 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 18626 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 18627 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 18628 *this, FirstMaskArgument)) 18629 return false; 18630 } 18631 return true; 18632 } 18633 18634 SDValue 18635 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 18636 bool IsVarArg, 18637 const SmallVectorImpl<ISD::OutputArg> &Outs, 18638 const SmallVectorImpl<SDValue> &OutVals, 18639 const SDLoc &DL, SelectionDAG &DAG) const { 18640 MachineFunction &MF = DAG.getMachineFunction(); 18641 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 18642 18643 // Stores the assignment of the return value to a location. 18644 SmallVector<CCValAssign, 16> RVLocs; 18645 18646 // Info about the registers and stack slot. 18647 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 18648 *DAG.getContext()); 18649 18650 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 18651 nullptr, RISCV::CC_RISCV); 18652 18653 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 18654 report_fatal_error("GHC functions return void only"); 18655 18656 SDValue Glue; 18657 SmallVector<SDValue, 4> RetOps(1, Chain); 18658 18659 // Copy the result values into the output registers. 18660 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) { 18661 SDValue Val = OutVals[OutIdx]; 18662 CCValAssign &VA = RVLocs[i]; 18663 assert(VA.isRegLoc() && "Can only return in registers!"); 18664 18665 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 18666 // Handle returning f64 on RV32D with a soft float ABI. 18667 assert(VA.isRegLoc() && "Expected return via registers"); 18668 assert(VA.needsCustom()); 18669 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 18670 DAG.getVTList(MVT::i32, MVT::i32), Val); 18671 SDValue Lo = SplitF64.getValue(0); 18672 SDValue Hi = SplitF64.getValue(1); 18673 Register RegLo = VA.getLocReg(); 18674 Register RegHi = RVLocs[++i].getLocReg(); 18675 18676 if (STI.isRegisterReservedByUser(RegLo) || 18677 STI.isRegisterReservedByUser(RegHi)) 18678 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 18679 MF.getFunction(), 18680 "Return value register required, but has been reserved."}); 18681 18682 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 18683 Glue = Chain.getValue(1); 18684 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 18685 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 18686 Glue = Chain.getValue(1); 18687 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 18688 } else { 18689 // Handle a 'normal' return. 18690 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget); 18691 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 18692 18693 if (STI.isRegisterReservedByUser(VA.getLocReg())) 18694 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 18695 MF.getFunction(), 18696 "Return value register required, but has been reserved."}); 18697 18698 // Guarantee that all emitted copies are stuck together. 18699 Glue = Chain.getValue(1); 18700 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 18701 } 18702 } 18703 18704 RetOps[0] = Chain; // Update chain. 18705 18706 // Add the glue node if we have it. 18707 if (Glue.getNode()) { 18708 RetOps.push_back(Glue); 18709 } 18710 18711 if (any_of(RVLocs, 18712 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) 18713 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); 18714 18715 unsigned RetOpc = RISCVISD::RET_GLUE; 18716 // Interrupt service routines use different return instructions. 18717 const Function &Func = DAG.getMachineFunction().getFunction(); 18718 if (Func.hasFnAttribute("interrupt")) { 18719 if (!Func.getReturnType()->isVoidTy()) 18720 report_fatal_error( 18721 "Functions with the interrupt attribute must have void return type!"); 18722 18723 MachineFunction &MF = DAG.getMachineFunction(); 18724 StringRef Kind = 18725 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 18726 18727 if (Kind == "supervisor") 18728 RetOpc = RISCVISD::SRET_GLUE; 18729 else 18730 RetOpc = RISCVISD::MRET_GLUE; 18731 } 18732 18733 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 18734 } 18735 18736 void RISCVTargetLowering::validateCCReservedRegs( 18737 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 18738 MachineFunction &MF) const { 18739 const Function &F = MF.getFunction(); 18740 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 18741 18742 if (llvm::any_of(Regs, [&STI](auto Reg) { 18743 return STI.isRegisterReservedByUser(Reg.first); 18744 })) 18745 F.getContext().diagnose(DiagnosticInfoUnsupported{ 18746 F, "Argument register required, but has been reserved."}); 18747 } 18748 18749 // Check if the result of the node is only used as a return value, as 18750 // otherwise we can't perform a tail-call. 18751 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 18752 if (N->getNumValues() != 1) 18753 return false; 18754 if (!N->hasNUsesOfValue(1, 0)) 18755 return false; 18756 18757 SDNode *Copy = *N->use_begin(); 18758 18759 if (Copy->getOpcode() == ISD::BITCAST) { 18760 return isUsedByReturnOnly(Copy, Chain); 18761 } 18762 18763 // TODO: Handle additional opcodes in order to support tail-calling libcalls 18764 // with soft float ABIs. 18765 if (Copy->getOpcode() != ISD::CopyToReg) { 18766 return false; 18767 } 18768 18769 // If the ISD::CopyToReg has a glue operand, we conservatively assume it 18770 // isn't safe to perform a tail call. 18771 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue) 18772 return false; 18773 18774 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else. 18775 bool HasRet = false; 18776 for (SDNode *Node : Copy->uses()) { 18777 if (Node->getOpcode() != RISCVISD::RET_GLUE) 18778 return false; 18779 HasRet = true; 18780 } 18781 if (!HasRet) 18782 return false; 18783 18784 Chain = Copy->getOperand(0); 18785 return true; 18786 } 18787 18788 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 18789 return CI->isTailCall(); 18790 } 18791 18792 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 18793 #define NODE_NAME_CASE(NODE) \ 18794 case RISCVISD::NODE: \ 18795 return "RISCVISD::" #NODE; 18796 // clang-format off 18797 switch ((RISCVISD::NodeType)Opcode) { 18798 case RISCVISD::FIRST_NUMBER: 18799 break; 18800 NODE_NAME_CASE(RET_GLUE) 18801 NODE_NAME_CASE(SRET_GLUE) 18802 NODE_NAME_CASE(MRET_GLUE) 18803 NODE_NAME_CASE(CALL) 18804 NODE_NAME_CASE(SELECT_CC) 18805 NODE_NAME_CASE(BR_CC) 18806 NODE_NAME_CASE(BuildPairF64) 18807 NODE_NAME_CASE(SplitF64) 18808 NODE_NAME_CASE(TAIL) 18809 NODE_NAME_CASE(ADD_LO) 18810 NODE_NAME_CASE(HI) 18811 NODE_NAME_CASE(LLA) 18812 NODE_NAME_CASE(ADD_TPREL) 18813 NODE_NAME_CASE(MULHSU) 18814 NODE_NAME_CASE(SLLW) 18815 NODE_NAME_CASE(SRAW) 18816 NODE_NAME_CASE(SRLW) 18817 NODE_NAME_CASE(DIVW) 18818 NODE_NAME_CASE(DIVUW) 18819 NODE_NAME_CASE(REMUW) 18820 NODE_NAME_CASE(ROLW) 18821 NODE_NAME_CASE(RORW) 18822 NODE_NAME_CASE(CLZW) 18823 NODE_NAME_CASE(CTZW) 18824 NODE_NAME_CASE(ABSW) 18825 NODE_NAME_CASE(FMV_H_X) 18826 NODE_NAME_CASE(FMV_X_ANYEXTH) 18827 NODE_NAME_CASE(FMV_X_SIGNEXTH) 18828 NODE_NAME_CASE(FMV_W_X_RV64) 18829 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 18830 NODE_NAME_CASE(FCVT_X) 18831 NODE_NAME_CASE(FCVT_XU) 18832 NODE_NAME_CASE(FCVT_W_RV64) 18833 NODE_NAME_CASE(FCVT_WU_RV64) 18834 NODE_NAME_CASE(STRICT_FCVT_W_RV64) 18835 NODE_NAME_CASE(STRICT_FCVT_WU_RV64) 18836 NODE_NAME_CASE(FP_ROUND_BF16) 18837 NODE_NAME_CASE(FP_EXTEND_BF16) 18838 NODE_NAME_CASE(FROUND) 18839 NODE_NAME_CASE(FCLASS) 18840 NODE_NAME_CASE(FMAX) 18841 NODE_NAME_CASE(FMIN) 18842 NODE_NAME_CASE(READ_CYCLE_WIDE) 18843 NODE_NAME_CASE(BREV8) 18844 NODE_NAME_CASE(ORC_B) 18845 NODE_NAME_CASE(ZIP) 18846 NODE_NAME_CASE(UNZIP) 18847 NODE_NAME_CASE(CLMUL) 18848 NODE_NAME_CASE(CLMULH) 18849 NODE_NAME_CASE(CLMULR) 18850 NODE_NAME_CASE(SHA256SIG0) 18851 NODE_NAME_CASE(SHA256SIG1) 18852 NODE_NAME_CASE(SHA256SUM0) 18853 NODE_NAME_CASE(SHA256SUM1) 18854 NODE_NAME_CASE(SM4KS) 18855 NODE_NAME_CASE(SM4ED) 18856 NODE_NAME_CASE(SM3P0) 18857 NODE_NAME_CASE(SM3P1) 18858 NODE_NAME_CASE(TH_LWD) 18859 NODE_NAME_CASE(TH_LWUD) 18860 NODE_NAME_CASE(TH_LDD) 18861 NODE_NAME_CASE(TH_SWD) 18862 NODE_NAME_CASE(TH_SDD) 18863 NODE_NAME_CASE(VMV_V_V_VL) 18864 NODE_NAME_CASE(VMV_V_X_VL) 18865 NODE_NAME_CASE(VFMV_V_F_VL) 18866 NODE_NAME_CASE(VMV_X_S) 18867 NODE_NAME_CASE(VMV_S_X_VL) 18868 NODE_NAME_CASE(VFMV_S_F_VL) 18869 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL) 18870 NODE_NAME_CASE(READ_VLENB) 18871 NODE_NAME_CASE(TRUNCATE_VECTOR_VL) 18872 NODE_NAME_CASE(VSLIDEUP_VL) 18873 NODE_NAME_CASE(VSLIDE1UP_VL) 18874 NODE_NAME_CASE(VSLIDEDOWN_VL) 18875 NODE_NAME_CASE(VSLIDE1DOWN_VL) 18876 NODE_NAME_CASE(VFSLIDE1UP_VL) 18877 NODE_NAME_CASE(VFSLIDE1DOWN_VL) 18878 NODE_NAME_CASE(VID_VL) 18879 NODE_NAME_CASE(VFNCVT_ROD_VL) 18880 NODE_NAME_CASE(VECREDUCE_ADD_VL) 18881 NODE_NAME_CASE(VECREDUCE_UMAX_VL) 18882 NODE_NAME_CASE(VECREDUCE_SMAX_VL) 18883 NODE_NAME_CASE(VECREDUCE_UMIN_VL) 18884 NODE_NAME_CASE(VECREDUCE_SMIN_VL) 18885 NODE_NAME_CASE(VECREDUCE_AND_VL) 18886 NODE_NAME_CASE(VECREDUCE_OR_VL) 18887 NODE_NAME_CASE(VECREDUCE_XOR_VL) 18888 NODE_NAME_CASE(VECREDUCE_FADD_VL) 18889 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) 18890 NODE_NAME_CASE(VECREDUCE_FMIN_VL) 18891 NODE_NAME_CASE(VECREDUCE_FMAX_VL) 18892 NODE_NAME_CASE(ADD_VL) 18893 NODE_NAME_CASE(AND_VL) 18894 NODE_NAME_CASE(MUL_VL) 18895 NODE_NAME_CASE(OR_VL) 18896 NODE_NAME_CASE(SDIV_VL) 18897 NODE_NAME_CASE(SHL_VL) 18898 NODE_NAME_CASE(SREM_VL) 18899 NODE_NAME_CASE(SRA_VL) 18900 NODE_NAME_CASE(SRL_VL) 18901 NODE_NAME_CASE(ROTL_VL) 18902 NODE_NAME_CASE(ROTR_VL) 18903 NODE_NAME_CASE(SUB_VL) 18904 NODE_NAME_CASE(UDIV_VL) 18905 NODE_NAME_CASE(UREM_VL) 18906 NODE_NAME_CASE(XOR_VL) 18907 NODE_NAME_CASE(AVGFLOORU_VL) 18908 NODE_NAME_CASE(AVGCEILU_VL) 18909 NODE_NAME_CASE(SADDSAT_VL) 18910 NODE_NAME_CASE(UADDSAT_VL) 18911 NODE_NAME_CASE(SSUBSAT_VL) 18912 NODE_NAME_CASE(USUBSAT_VL) 18913 NODE_NAME_CASE(FADD_VL) 18914 NODE_NAME_CASE(FSUB_VL) 18915 NODE_NAME_CASE(FMUL_VL) 18916 NODE_NAME_CASE(FDIV_VL) 18917 NODE_NAME_CASE(FNEG_VL) 18918 NODE_NAME_CASE(FABS_VL) 18919 NODE_NAME_CASE(FSQRT_VL) 18920 NODE_NAME_CASE(FCLASS_VL) 18921 NODE_NAME_CASE(VFMADD_VL) 18922 NODE_NAME_CASE(VFNMADD_VL) 18923 NODE_NAME_CASE(VFMSUB_VL) 18924 NODE_NAME_CASE(VFNMSUB_VL) 18925 NODE_NAME_CASE(VFWMADD_VL) 18926 NODE_NAME_CASE(VFWNMADD_VL) 18927 NODE_NAME_CASE(VFWMSUB_VL) 18928 NODE_NAME_CASE(VFWNMSUB_VL) 18929 NODE_NAME_CASE(FCOPYSIGN_VL) 18930 NODE_NAME_CASE(SMIN_VL) 18931 NODE_NAME_CASE(SMAX_VL) 18932 NODE_NAME_CASE(UMIN_VL) 18933 NODE_NAME_CASE(UMAX_VL) 18934 NODE_NAME_CASE(BITREVERSE_VL) 18935 NODE_NAME_CASE(BSWAP_VL) 18936 NODE_NAME_CASE(CTLZ_VL) 18937 NODE_NAME_CASE(CTTZ_VL) 18938 NODE_NAME_CASE(CTPOP_VL) 18939 NODE_NAME_CASE(VFMIN_VL) 18940 NODE_NAME_CASE(VFMAX_VL) 18941 NODE_NAME_CASE(MULHS_VL) 18942 NODE_NAME_CASE(MULHU_VL) 18943 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL) 18944 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL) 18945 NODE_NAME_CASE(VFCVT_RM_X_F_VL) 18946 NODE_NAME_CASE(VFCVT_RM_XU_F_VL) 18947 NODE_NAME_CASE(VFCVT_X_F_VL) 18948 NODE_NAME_CASE(VFCVT_XU_F_VL) 18949 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL) 18950 NODE_NAME_CASE(SINT_TO_FP_VL) 18951 NODE_NAME_CASE(UINT_TO_FP_VL) 18952 NODE_NAME_CASE(VFCVT_RM_F_XU_VL) 18953 NODE_NAME_CASE(VFCVT_RM_F_X_VL) 18954 NODE_NAME_CASE(FP_EXTEND_VL) 18955 NODE_NAME_CASE(FP_ROUND_VL) 18956 NODE_NAME_CASE(STRICT_FADD_VL) 18957 NODE_NAME_CASE(STRICT_FSUB_VL) 18958 NODE_NAME_CASE(STRICT_FMUL_VL) 18959 NODE_NAME_CASE(STRICT_FDIV_VL) 18960 NODE_NAME_CASE(STRICT_FSQRT_VL) 18961 NODE_NAME_CASE(STRICT_VFMADD_VL) 18962 NODE_NAME_CASE(STRICT_VFNMADD_VL) 18963 NODE_NAME_CASE(STRICT_VFMSUB_VL) 18964 NODE_NAME_CASE(STRICT_VFNMSUB_VL) 18965 NODE_NAME_CASE(STRICT_FP_ROUND_VL) 18966 NODE_NAME_CASE(STRICT_FP_EXTEND_VL) 18967 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL) 18968 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL) 18969 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL) 18970 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL) 18971 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL) 18972 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL) 18973 NODE_NAME_CASE(STRICT_FSETCC_VL) 18974 NODE_NAME_CASE(STRICT_FSETCCS_VL) 18975 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL) 18976 NODE_NAME_CASE(VWMUL_VL) 18977 NODE_NAME_CASE(VWMULU_VL) 18978 NODE_NAME_CASE(VWMULSU_VL) 18979 NODE_NAME_CASE(VWADD_VL) 18980 NODE_NAME_CASE(VWADDU_VL) 18981 NODE_NAME_CASE(VWSUB_VL) 18982 NODE_NAME_CASE(VWSUBU_VL) 18983 NODE_NAME_CASE(VWADD_W_VL) 18984 NODE_NAME_CASE(VWADDU_W_VL) 18985 NODE_NAME_CASE(VWSUB_W_VL) 18986 NODE_NAME_CASE(VWSUBU_W_VL) 18987 NODE_NAME_CASE(VWSLL_VL) 18988 NODE_NAME_CASE(VFWMUL_VL) 18989 NODE_NAME_CASE(VFWADD_VL) 18990 NODE_NAME_CASE(VFWSUB_VL) 18991 NODE_NAME_CASE(VFWADD_W_VL) 18992 NODE_NAME_CASE(VFWSUB_W_VL) 18993 NODE_NAME_CASE(VWMACC_VL) 18994 NODE_NAME_CASE(VWMACCU_VL) 18995 NODE_NAME_CASE(VWMACCSU_VL) 18996 NODE_NAME_CASE(VNSRL_VL) 18997 NODE_NAME_CASE(SETCC_VL) 18998 NODE_NAME_CASE(VMERGE_VL) 18999 NODE_NAME_CASE(VMAND_VL) 19000 NODE_NAME_CASE(VMOR_VL) 19001 NODE_NAME_CASE(VMXOR_VL) 19002 NODE_NAME_CASE(VMCLR_VL) 19003 NODE_NAME_CASE(VMSET_VL) 19004 NODE_NAME_CASE(VRGATHER_VX_VL) 19005 NODE_NAME_CASE(VRGATHER_VV_VL) 19006 NODE_NAME_CASE(VRGATHEREI16_VV_VL) 19007 NODE_NAME_CASE(VSEXT_VL) 19008 NODE_NAME_CASE(VZEXT_VL) 19009 NODE_NAME_CASE(VCPOP_VL) 19010 NODE_NAME_CASE(VFIRST_VL) 19011 NODE_NAME_CASE(READ_CSR) 19012 NODE_NAME_CASE(WRITE_CSR) 19013 NODE_NAME_CASE(SWAP_CSR) 19014 NODE_NAME_CASE(CZERO_EQZ) 19015 NODE_NAME_CASE(CZERO_NEZ) 19016 } 19017 // clang-format on 19018 return nullptr; 19019 #undef NODE_NAME_CASE 19020 } 19021 19022 /// getConstraintType - Given a constraint letter, return the type of 19023 /// constraint it is for this target. 19024 RISCVTargetLowering::ConstraintType 19025 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 19026 if (Constraint.size() == 1) { 19027 switch (Constraint[0]) { 19028 default: 19029 break; 19030 case 'f': 19031 return C_RegisterClass; 19032 case 'I': 19033 case 'J': 19034 case 'K': 19035 return C_Immediate; 19036 case 'A': 19037 return C_Memory; 19038 case 'S': // A symbolic address 19039 return C_Other; 19040 } 19041 } else { 19042 if (Constraint == "vr" || Constraint == "vm") 19043 return C_RegisterClass; 19044 } 19045 return TargetLowering::getConstraintType(Constraint); 19046 } 19047 19048 std::pair<unsigned, const TargetRegisterClass *> 19049 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 19050 StringRef Constraint, 19051 MVT VT) const { 19052 // First, see if this is a constraint that directly corresponds to a RISC-V 19053 // register class. 19054 if (Constraint.size() == 1) { 19055 switch (Constraint[0]) { 19056 case 'r': 19057 // TODO: Support fixed vectors up to XLen for P extension? 19058 if (VT.isVector()) 19059 break; 19060 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) 19061 return std::make_pair(0U, &RISCV::GPRF16RegClass); 19062 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx()) 19063 return std::make_pair(0U, &RISCV::GPRF32RegClass); 19064 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit()) 19065 return std::make_pair(0U, &RISCV::GPRPairRegClass); 19066 return std::make_pair(0U, &RISCV::GPRNoX0RegClass); 19067 case 'f': 19068 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) 19069 return std::make_pair(0U, &RISCV::FPR16RegClass); 19070 if (Subtarget.hasStdExtF() && VT == MVT::f32) 19071 return std::make_pair(0U, &RISCV::FPR32RegClass); 19072 if (Subtarget.hasStdExtD() && VT == MVT::f64) 19073 return std::make_pair(0U, &RISCV::FPR64RegClass); 19074 break; 19075 default: 19076 break; 19077 } 19078 } else if (Constraint == "vr") { 19079 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, 19080 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 19081 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) 19082 return std::make_pair(0U, RC); 19083 } 19084 } else if (Constraint == "vm") { 19085 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) 19086 return std::make_pair(0U, &RISCV::VMV0RegClass); 19087 } 19088 19089 // Clang will correctly decode the usage of register name aliases into their 19090 // official names. However, other frontends like `rustc` do not. This allows 19091 // users of these frontends to use the ABI names for registers in LLVM-style 19092 // register constraints. 19093 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 19094 .Case("{zero}", RISCV::X0) 19095 .Case("{ra}", RISCV::X1) 19096 .Case("{sp}", RISCV::X2) 19097 .Case("{gp}", RISCV::X3) 19098 .Case("{tp}", RISCV::X4) 19099 .Case("{t0}", RISCV::X5) 19100 .Case("{t1}", RISCV::X6) 19101 .Case("{t2}", RISCV::X7) 19102 .Cases("{s0}", "{fp}", RISCV::X8) 19103 .Case("{s1}", RISCV::X9) 19104 .Case("{a0}", RISCV::X10) 19105 .Case("{a1}", RISCV::X11) 19106 .Case("{a2}", RISCV::X12) 19107 .Case("{a3}", RISCV::X13) 19108 .Case("{a4}", RISCV::X14) 19109 .Case("{a5}", RISCV::X15) 19110 .Case("{a6}", RISCV::X16) 19111 .Case("{a7}", RISCV::X17) 19112 .Case("{s2}", RISCV::X18) 19113 .Case("{s3}", RISCV::X19) 19114 .Case("{s4}", RISCV::X20) 19115 .Case("{s5}", RISCV::X21) 19116 .Case("{s6}", RISCV::X22) 19117 .Case("{s7}", RISCV::X23) 19118 .Case("{s8}", RISCV::X24) 19119 .Case("{s9}", RISCV::X25) 19120 .Case("{s10}", RISCV::X26) 19121 .Case("{s11}", RISCV::X27) 19122 .Case("{t3}", RISCV::X28) 19123 .Case("{t4}", RISCV::X29) 19124 .Case("{t5}", RISCV::X30) 19125 .Case("{t6}", RISCV::X31) 19126 .Default(RISCV::NoRegister); 19127 if (XRegFromAlias != RISCV::NoRegister) 19128 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 19129 19130 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 19131 // TableGen record rather than the AsmName to choose registers for InlineAsm 19132 // constraints, plus we want to match those names to the widest floating point 19133 // register type available, manually select floating point registers here. 19134 // 19135 // The second case is the ABI name of the register, so that frontends can also 19136 // use the ABI names in register constraint lists. 19137 if (Subtarget.hasStdExtF()) { 19138 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 19139 .Cases("{f0}", "{ft0}", RISCV::F0_F) 19140 .Cases("{f1}", "{ft1}", RISCV::F1_F) 19141 .Cases("{f2}", "{ft2}", RISCV::F2_F) 19142 .Cases("{f3}", "{ft3}", RISCV::F3_F) 19143 .Cases("{f4}", "{ft4}", RISCV::F4_F) 19144 .Cases("{f5}", "{ft5}", RISCV::F5_F) 19145 .Cases("{f6}", "{ft6}", RISCV::F6_F) 19146 .Cases("{f7}", "{ft7}", RISCV::F7_F) 19147 .Cases("{f8}", "{fs0}", RISCV::F8_F) 19148 .Cases("{f9}", "{fs1}", RISCV::F9_F) 19149 .Cases("{f10}", "{fa0}", RISCV::F10_F) 19150 .Cases("{f11}", "{fa1}", RISCV::F11_F) 19151 .Cases("{f12}", "{fa2}", RISCV::F12_F) 19152 .Cases("{f13}", "{fa3}", RISCV::F13_F) 19153 .Cases("{f14}", "{fa4}", RISCV::F14_F) 19154 .Cases("{f15}", "{fa5}", RISCV::F15_F) 19155 .Cases("{f16}", "{fa6}", RISCV::F16_F) 19156 .Cases("{f17}", "{fa7}", RISCV::F17_F) 19157 .Cases("{f18}", "{fs2}", RISCV::F18_F) 19158 .Cases("{f19}", "{fs3}", RISCV::F19_F) 19159 .Cases("{f20}", "{fs4}", RISCV::F20_F) 19160 .Cases("{f21}", "{fs5}", RISCV::F21_F) 19161 .Cases("{f22}", "{fs6}", RISCV::F22_F) 19162 .Cases("{f23}", "{fs7}", RISCV::F23_F) 19163 .Cases("{f24}", "{fs8}", RISCV::F24_F) 19164 .Cases("{f25}", "{fs9}", RISCV::F25_F) 19165 .Cases("{f26}", "{fs10}", RISCV::F26_F) 19166 .Cases("{f27}", "{fs11}", RISCV::F27_F) 19167 .Cases("{f28}", "{ft8}", RISCV::F28_F) 19168 .Cases("{f29}", "{ft9}", RISCV::F29_F) 19169 .Cases("{f30}", "{ft10}", RISCV::F30_F) 19170 .Cases("{f31}", "{ft11}", RISCV::F31_F) 19171 .Default(RISCV::NoRegister); 19172 if (FReg != RISCV::NoRegister) { 19173 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 19174 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) { 19175 unsigned RegNo = FReg - RISCV::F0_F; 19176 unsigned DReg = RISCV::F0_D + RegNo; 19177 return std::make_pair(DReg, &RISCV::FPR64RegClass); 19178 } 19179 if (VT == MVT::f32 || VT == MVT::Other) 19180 return std::make_pair(FReg, &RISCV::FPR32RegClass); 19181 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) { 19182 unsigned RegNo = FReg - RISCV::F0_F; 19183 unsigned HReg = RISCV::F0_H + RegNo; 19184 return std::make_pair(HReg, &RISCV::FPR16RegClass); 19185 } 19186 } 19187 } 19188 19189 if (Subtarget.hasVInstructions()) { 19190 Register VReg = StringSwitch<Register>(Constraint.lower()) 19191 .Case("{v0}", RISCV::V0) 19192 .Case("{v1}", RISCV::V1) 19193 .Case("{v2}", RISCV::V2) 19194 .Case("{v3}", RISCV::V3) 19195 .Case("{v4}", RISCV::V4) 19196 .Case("{v5}", RISCV::V5) 19197 .Case("{v6}", RISCV::V6) 19198 .Case("{v7}", RISCV::V7) 19199 .Case("{v8}", RISCV::V8) 19200 .Case("{v9}", RISCV::V9) 19201 .Case("{v10}", RISCV::V10) 19202 .Case("{v11}", RISCV::V11) 19203 .Case("{v12}", RISCV::V12) 19204 .Case("{v13}", RISCV::V13) 19205 .Case("{v14}", RISCV::V14) 19206 .Case("{v15}", RISCV::V15) 19207 .Case("{v16}", RISCV::V16) 19208 .Case("{v17}", RISCV::V17) 19209 .Case("{v18}", RISCV::V18) 19210 .Case("{v19}", RISCV::V19) 19211 .Case("{v20}", RISCV::V20) 19212 .Case("{v21}", RISCV::V21) 19213 .Case("{v22}", RISCV::V22) 19214 .Case("{v23}", RISCV::V23) 19215 .Case("{v24}", RISCV::V24) 19216 .Case("{v25}", RISCV::V25) 19217 .Case("{v26}", RISCV::V26) 19218 .Case("{v27}", RISCV::V27) 19219 .Case("{v28}", RISCV::V28) 19220 .Case("{v29}", RISCV::V29) 19221 .Case("{v30}", RISCV::V30) 19222 .Case("{v31}", RISCV::V31) 19223 .Default(RISCV::NoRegister); 19224 if (VReg != RISCV::NoRegister) { 19225 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) 19226 return std::make_pair(VReg, &RISCV::VMRegClass); 19227 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy)) 19228 return std::make_pair(VReg, &RISCV::VRRegClass); 19229 for (const auto *RC : 19230 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 19231 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) { 19232 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC); 19233 return std::make_pair(VReg, RC); 19234 } 19235 } 19236 } 19237 } 19238 19239 std::pair<Register, const TargetRegisterClass *> Res = 19240 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 19241 19242 // If we picked one of the Zfinx register classes, remap it to the GPR class. 19243 // FIXME: When Zfinx is supported in CodeGen this will need to take the 19244 // Subtarget into account. 19245 if (Res.second == &RISCV::GPRF16RegClass || 19246 Res.second == &RISCV::GPRF32RegClass || 19247 Res.second == &RISCV::GPRPairRegClass) 19248 return std::make_pair(Res.first, &RISCV::GPRRegClass); 19249 19250 return Res; 19251 } 19252 19253 InlineAsm::ConstraintCode 19254 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 19255 // Currently only support length 1 constraints. 19256 if (ConstraintCode.size() == 1) { 19257 switch (ConstraintCode[0]) { 19258 case 'A': 19259 return InlineAsm::ConstraintCode::A; 19260 default: 19261 break; 19262 } 19263 } 19264 19265 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 19266 } 19267 19268 void RISCVTargetLowering::LowerAsmOperandForConstraint( 19269 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, 19270 SelectionDAG &DAG) const { 19271 // Currently only support length 1 constraints. 19272 if (Constraint.size() == 1) { 19273 switch (Constraint[0]) { 19274 case 'I': 19275 // Validate & create a 12-bit signed immediate operand. 19276 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 19277 uint64_t CVal = C->getSExtValue(); 19278 if (isInt<12>(CVal)) 19279 Ops.push_back( 19280 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 19281 } 19282 return; 19283 case 'J': 19284 // Validate & create an integer zero operand. 19285 if (isNullConstant(Op)) 19286 Ops.push_back( 19287 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 19288 return; 19289 case 'K': 19290 // Validate & create a 5-bit unsigned immediate operand. 19291 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 19292 uint64_t CVal = C->getZExtValue(); 19293 if (isUInt<5>(CVal)) 19294 Ops.push_back( 19295 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 19296 } 19297 return; 19298 case 'S': 19299 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) { 19300 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), 19301 GA->getValueType(0))); 19302 } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) { 19303 Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(), 19304 BA->getValueType(0))); 19305 } 19306 return; 19307 default: 19308 break; 19309 } 19310 } 19311 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 19312 } 19313 19314 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder, 19315 Instruction *Inst, 19316 AtomicOrdering Ord) const { 19317 if (Subtarget.hasStdExtZtso()) { 19318 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 19319 return Builder.CreateFence(Ord); 19320 return nullptr; 19321 } 19322 19323 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 19324 return Builder.CreateFence(Ord); 19325 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 19326 return Builder.CreateFence(AtomicOrdering::Release); 19327 return nullptr; 19328 } 19329 19330 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, 19331 Instruction *Inst, 19332 AtomicOrdering Ord) const { 19333 if (Subtarget.hasStdExtZtso()) { 19334 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 19335 return Builder.CreateFence(Ord); 19336 return nullptr; 19337 } 19338 19339 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 19340 return Builder.CreateFence(AtomicOrdering::Acquire); 19341 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) && 19342 Ord == AtomicOrdering::SequentiallyConsistent) 19343 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); 19344 return nullptr; 19345 } 19346 19347 TargetLowering::AtomicExpansionKind 19348 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 19349 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 19350 // point operations can't be used in an lr/sc sequence without breaking the 19351 // forward-progress guarantee. 19352 if (AI->isFloatingPointOperation() || 19353 AI->getOperation() == AtomicRMWInst::UIncWrap || 19354 AI->getOperation() == AtomicRMWInst::UDecWrap) 19355 return AtomicExpansionKind::CmpXChg; 19356 19357 // Don't expand forced atomics, we want to have __sync libcalls instead. 19358 if (Subtarget.hasForcedAtomics()) 19359 return AtomicExpansionKind::None; 19360 19361 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 19362 if (Size == 8 || Size == 16) 19363 return AtomicExpansionKind::MaskedIntrinsic; 19364 return AtomicExpansionKind::None; 19365 } 19366 19367 static Intrinsic::ID 19368 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 19369 if (XLen == 32) { 19370 switch (BinOp) { 19371 default: 19372 llvm_unreachable("Unexpected AtomicRMW BinOp"); 19373 case AtomicRMWInst::Xchg: 19374 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 19375 case AtomicRMWInst::Add: 19376 return Intrinsic::riscv_masked_atomicrmw_add_i32; 19377 case AtomicRMWInst::Sub: 19378 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 19379 case AtomicRMWInst::Nand: 19380 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 19381 case AtomicRMWInst::Max: 19382 return Intrinsic::riscv_masked_atomicrmw_max_i32; 19383 case AtomicRMWInst::Min: 19384 return Intrinsic::riscv_masked_atomicrmw_min_i32; 19385 case AtomicRMWInst::UMax: 19386 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 19387 case AtomicRMWInst::UMin: 19388 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 19389 } 19390 } 19391 19392 if (XLen == 64) { 19393 switch (BinOp) { 19394 default: 19395 llvm_unreachable("Unexpected AtomicRMW BinOp"); 19396 case AtomicRMWInst::Xchg: 19397 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 19398 case AtomicRMWInst::Add: 19399 return Intrinsic::riscv_masked_atomicrmw_add_i64; 19400 case AtomicRMWInst::Sub: 19401 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 19402 case AtomicRMWInst::Nand: 19403 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 19404 case AtomicRMWInst::Max: 19405 return Intrinsic::riscv_masked_atomicrmw_max_i64; 19406 case AtomicRMWInst::Min: 19407 return Intrinsic::riscv_masked_atomicrmw_min_i64; 19408 case AtomicRMWInst::UMax: 19409 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 19410 case AtomicRMWInst::UMin: 19411 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 19412 } 19413 } 19414 19415 llvm_unreachable("Unexpected XLen\n"); 19416 } 19417 19418 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 19419 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 19420 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 19421 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace 19422 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate 19423 // mask, as this produces better code than the LR/SC loop emitted by 19424 // int_riscv_masked_atomicrmw_xchg. 19425 if (AI->getOperation() == AtomicRMWInst::Xchg && 19426 isa<ConstantInt>(AI->getValOperand())) { 19427 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand()); 19428 if (CVal->isZero()) 19429 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr, 19430 Builder.CreateNot(Mask, "Inv_Mask"), 19431 AI->getAlign(), Ord); 19432 if (CVal->isMinusOne()) 19433 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask, 19434 AI->getAlign(), Ord); 19435 } 19436 19437 unsigned XLen = Subtarget.getXLen(); 19438 Value *Ordering = 19439 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 19440 Type *Tys[] = {AlignedAddr->getType()}; 19441 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 19442 AI->getModule(), 19443 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 19444 19445 if (XLen == 64) { 19446 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 19447 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 19448 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 19449 } 19450 19451 Value *Result; 19452 19453 // Must pass the shift amount needed to sign extend the loaded value prior 19454 // to performing a signed comparison for min/max. ShiftAmt is the number of 19455 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 19456 // is the number of bits to left+right shift the value in order to 19457 // sign-extend. 19458 if (AI->getOperation() == AtomicRMWInst::Min || 19459 AI->getOperation() == AtomicRMWInst::Max) { 19460 const DataLayout &DL = AI->getModule()->getDataLayout(); 19461 unsigned ValWidth = 19462 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 19463 Value *SextShamt = 19464 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 19465 Result = Builder.CreateCall(LrwOpScwLoop, 19466 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 19467 } else { 19468 Result = 19469 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 19470 } 19471 19472 if (XLen == 64) 19473 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 19474 return Result; 19475 } 19476 19477 TargetLowering::AtomicExpansionKind 19478 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 19479 AtomicCmpXchgInst *CI) const { 19480 // Don't expand forced atomics, we want to have __sync libcalls instead. 19481 if (Subtarget.hasForcedAtomics()) 19482 return AtomicExpansionKind::None; 19483 19484 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 19485 if (Size == 8 || Size == 16) 19486 return AtomicExpansionKind::MaskedIntrinsic; 19487 return AtomicExpansionKind::None; 19488 } 19489 19490 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 19491 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 19492 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 19493 unsigned XLen = Subtarget.getXLen(); 19494 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 19495 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 19496 if (XLen == 64) { 19497 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 19498 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 19499 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 19500 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 19501 } 19502 Type *Tys[] = {AlignedAddr->getType()}; 19503 Function *MaskedCmpXchg = 19504 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 19505 Value *Result = Builder.CreateCall( 19506 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 19507 if (XLen == 64) 19508 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 19509 return Result; 19510 } 19511 19512 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend, 19513 EVT DataVT) const { 19514 // We have indexed loads for all legal index types. Indices are always 19515 // zero extended 19516 return Extend.getOpcode() == ISD::ZERO_EXTEND && 19517 isTypeLegal(Extend.getValueType()) && 19518 isTypeLegal(Extend.getOperand(0).getValueType()); 19519 } 19520 19521 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, 19522 EVT VT) const { 19523 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) 19524 return false; 19525 19526 switch (FPVT.getSimpleVT().SimpleTy) { 19527 case MVT::f16: 19528 return Subtarget.hasStdExtZfhmin(); 19529 case MVT::f32: 19530 return Subtarget.hasStdExtF(); 19531 case MVT::f64: 19532 return Subtarget.hasStdExtD(); 19533 default: 19534 return false; 19535 } 19536 } 19537 19538 unsigned RISCVTargetLowering::getJumpTableEncoding() const { 19539 // If we are using the small code model, we can reduce size of jump table 19540 // entry to 4 bytes. 19541 if (Subtarget.is64Bit() && !isPositionIndependent() && 19542 getTargetMachine().getCodeModel() == CodeModel::Small) { 19543 return MachineJumpTableInfo::EK_Custom32; 19544 } 19545 return TargetLowering::getJumpTableEncoding(); 19546 } 19547 19548 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry( 19549 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, 19550 unsigned uid, MCContext &Ctx) const { 19551 assert(Subtarget.is64Bit() && !isPositionIndependent() && 19552 getTargetMachine().getCodeModel() == CodeModel::Small); 19553 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx); 19554 } 19555 19556 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { 19557 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power 19558 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be 19559 // a power of two as well. 19560 // FIXME: This doesn't work for zve32, but that's already broken 19561 // elsewhere for the same reason. 19562 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported"); 19563 static_assert(RISCV::RVVBitsPerBlock == 64, 19564 "RVVBitsPerBlock changed, audit needed"); 19565 return true; 19566 } 19567 19568 bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, 19569 SDValue &Offset, 19570 ISD::MemIndexedMode &AM, 19571 SelectionDAG &DAG) const { 19572 // Target does not support indexed loads. 19573 if (!Subtarget.hasVendorXTHeadMemIdx()) 19574 return false; 19575 19576 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) 19577 return false; 19578 19579 Base = Op->getOperand(0); 19580 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { 19581 int64_t RHSC = RHS->getSExtValue(); 19582 if (Op->getOpcode() == ISD::SUB) 19583 RHSC = -(uint64_t)RHSC; 19584 19585 // The constants that can be encoded in the THeadMemIdx instructions 19586 // are of the form (sign_extend(imm5) << imm2). 19587 bool isLegalIndexedOffset = false; 19588 for (unsigned i = 0; i < 4; i++) 19589 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) { 19590 isLegalIndexedOffset = true; 19591 break; 19592 } 19593 19594 if (!isLegalIndexedOffset) 19595 return false; 19596 19597 Offset = Op->getOperand(1); 19598 return true; 19599 } 19600 19601 return false; 19602 } 19603 19604 bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 19605 SDValue &Offset, 19606 ISD::MemIndexedMode &AM, 19607 SelectionDAG &DAG) const { 19608 EVT VT; 19609 SDValue Ptr; 19610 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 19611 VT = LD->getMemoryVT(); 19612 Ptr = LD->getBasePtr(); 19613 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 19614 VT = ST->getMemoryVT(); 19615 Ptr = ST->getBasePtr(); 19616 } else 19617 return false; 19618 19619 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG)) 19620 return false; 19621 19622 AM = ISD::PRE_INC; 19623 return true; 19624 } 19625 19626 bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 19627 SDValue &Base, 19628 SDValue &Offset, 19629 ISD::MemIndexedMode &AM, 19630 SelectionDAG &DAG) const { 19631 EVT VT; 19632 SDValue Ptr; 19633 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 19634 VT = LD->getMemoryVT(); 19635 Ptr = LD->getBasePtr(); 19636 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 19637 VT = ST->getMemoryVT(); 19638 Ptr = ST->getBasePtr(); 19639 } else 19640 return false; 19641 19642 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG)) 19643 return false; 19644 // Post-indexing updates the base, so it's not a valid transform 19645 // if that's not the same as the load's pointer. 19646 if (Ptr != Base) 19647 return false; 19648 19649 AM = ISD::POST_INC; 19650 return true; 19651 } 19652 19653 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 19654 EVT VT) const { 19655 EVT SVT = VT.getScalarType(); 19656 19657 if (!SVT.isSimple()) 19658 return false; 19659 19660 switch (SVT.getSimpleVT().SimpleTy) { 19661 case MVT::f16: 19662 return VT.isVector() ? Subtarget.hasVInstructionsF16() 19663 : Subtarget.hasStdExtZfhOrZhinx(); 19664 case MVT::f32: 19665 return Subtarget.hasStdExtFOrZfinx(); 19666 case MVT::f64: 19667 return Subtarget.hasStdExtDOrZdinx(); 19668 default: 19669 break; 19670 } 19671 19672 return false; 19673 } 19674 19675 ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const { 19676 // Zacas will use amocas.w which does not require extension. 19677 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND; 19678 } 19679 19680 Register RISCVTargetLowering::getExceptionPointerRegister( 19681 const Constant *PersonalityFn) const { 19682 return RISCV::X10; 19683 } 19684 19685 Register RISCVTargetLowering::getExceptionSelectorRegister( 19686 const Constant *PersonalityFn) const { 19687 return RISCV::X11; 19688 } 19689 19690 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 19691 // Return false to suppress the unnecessary extensions if the LibCall 19692 // arguments or return value is a float narrower than XLEN on a soft FP ABI. 19693 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() && 19694 Type.getSizeInBits() < Subtarget.getXLen())) 19695 return false; 19696 19697 return true; 19698 } 19699 19700 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 19701 if (Subtarget.is64Bit() && Type == MVT::i32) 19702 return true; 19703 19704 return IsSigned; 19705 } 19706 19707 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 19708 SDValue C) const { 19709 // Check integral scalar types. 19710 const bool HasExtMOrZmmul = 19711 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul(); 19712 if (!VT.isScalarInteger()) 19713 return false; 19714 19715 // Omit the optimization if the sub target has the M extension and the data 19716 // size exceeds XLen. 19717 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen()) 19718 return false; 19719 19720 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 19721 // Break the MUL to a SLLI and an ADD/SUB. 19722 const APInt &Imm = ConstNode->getAPIntValue(); 19723 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 19724 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 19725 return true; 19726 19727 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12. 19728 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) && 19729 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || 19730 (Imm - 8).isPowerOf2())) 19731 return true; 19732 19733 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 19734 // a pair of LUI/ADDI. 19735 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 && 19736 ConstNode->hasOneUse()) { 19737 APInt ImmS = Imm.ashr(Imm.countr_zero()); 19738 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 19739 (1 - ImmS).isPowerOf2()) 19740 return true; 19741 } 19742 } 19743 19744 return false; 19745 } 19746 19747 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode, 19748 SDValue ConstNode) const { 19749 // Let the DAGCombiner decide for vectors. 19750 EVT VT = AddNode.getValueType(); 19751 if (VT.isVector()) 19752 return true; 19753 19754 // Let the DAGCombiner decide for larger types. 19755 if (VT.getScalarSizeInBits() > Subtarget.getXLen()) 19756 return true; 19757 19758 // It is worse if c1 is simm12 while c1*c2 is not. 19759 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1)); 19760 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode); 19761 const APInt &C1 = C1Node->getAPIntValue(); 19762 const APInt &C2 = C2Node->getAPIntValue(); 19763 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12)) 19764 return false; 19765 19766 // Default to true and let the DAGCombiner decide. 19767 return true; 19768 } 19769 19770 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 19771 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 19772 unsigned *Fast) const { 19773 if (!VT.isVector()) { 19774 if (Fast) 19775 *Fast = Subtarget.hasFastUnalignedAccess(); 19776 return Subtarget.hasFastUnalignedAccess(); 19777 } 19778 19779 // All vector implementations must support element alignment 19780 EVT ElemVT = VT.getVectorElementType(); 19781 if (Alignment >= ElemVT.getStoreSize()) { 19782 if (Fast) 19783 *Fast = 1; 19784 return true; 19785 } 19786 19787 // Note: We lower an unmasked unaligned vector access to an equally sized 19788 // e8 element type access. Given this, we effectively support all unmasked 19789 // misaligned accesses. TODO: Work through the codegen implications of 19790 // allowing such accesses to be formed, and considered fast. 19791 if (Fast) 19792 *Fast = Subtarget.hasFastUnalignedAccess(); 19793 return Subtarget.hasFastUnalignedAccess(); 19794 } 19795 19796 19797 EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op, 19798 const AttributeList &FuncAttributes) const { 19799 if (!Subtarget.hasVInstructions()) 19800 return MVT::Other; 19801 19802 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) 19803 return MVT::Other; 19804 19805 // We use LMUL1 memory operations here for a non-obvious reason. Our caller 19806 // has an expansion threshold, and we want the number of hardware memory 19807 // operations to correspond roughly to that threshold. LMUL>1 operations 19808 // are typically expanded linearly internally, and thus correspond to more 19809 // than one actual memory operation. Note that store merging and load 19810 // combining will typically form larger LMUL operations from the LMUL1 19811 // operations emitted here, and that's okay because combining isn't 19812 // introducing new memory operations; it's just merging existing ones. 19813 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8; 19814 if (Op.size() < MinVLenInBytes) 19815 // TODO: Figure out short memops. For the moment, do the default thing 19816 // which ends up using scalar sequences. 19817 return MVT::Other; 19818 19819 // Prefer i8 for non-zero memset as it allows us to avoid materializing 19820 // a large scalar constant and instead use vmv.v.x/i to do the 19821 // broadcast. For everything else, prefer ELenVT to minimize VL and thus 19822 // maximize the chance we can encode the size in the vsetvli. 19823 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen()); 19824 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT; 19825 19826 // Do we have sufficient alignment for our preferred VT? If not, revert 19827 // to largest size allowed by our alignment criteria. 19828 if (PreferredVT != MVT::i8 && !Subtarget.hasFastUnalignedAccess()) { 19829 Align RequiredAlign(PreferredVT.getStoreSize()); 19830 if (Op.isFixedDstAlign()) 19831 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign()); 19832 if (Op.isMemcpy()) 19833 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign()); 19834 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8); 19835 } 19836 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize()); 19837 } 19838 19839 bool RISCVTargetLowering::splitValueIntoRegisterParts( 19840 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 19841 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { 19842 bool IsABIRegCopy = CC.has_value(); 19843 EVT ValueVT = Val.getValueType(); 19844 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && 19845 PartVT == MVT::f32) { 19846 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float 19847 // nan, and cast to f32. 19848 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val); 19849 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val); 19850 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val, 19851 DAG.getConstant(0xFFFF0000, DL, MVT::i32)); 19852 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); 19853 Parts[0] = Val; 19854 return true; 19855 } 19856 19857 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 19858 LLVMContext &Context = *DAG.getContext(); 19859 EVT ValueEltVT = ValueVT.getVectorElementType(); 19860 EVT PartEltVT = PartVT.getVectorElementType(); 19861 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); 19862 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); 19863 if (PartVTBitSize % ValueVTBitSize == 0) { 19864 assert(PartVTBitSize >= ValueVTBitSize); 19865 // If the element types are different, bitcast to the same element type of 19866 // PartVT first. 19867 // Give an example here, we want copy a <vscale x 1 x i8> value to 19868 // <vscale x 4 x i16>. 19869 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert 19870 // subvector, then we can bitcast to <vscale x 4 x i16>. 19871 if (ValueEltVT != PartEltVT) { 19872 if (PartVTBitSize > ValueVTBitSize) { 19873 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); 19874 assert(Count != 0 && "The number of element should not be zero."); 19875 EVT SameEltTypeVT = 19876 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); 19877 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT, 19878 DAG.getUNDEF(SameEltTypeVT), Val, 19879 DAG.getVectorIdxConstant(0, DL)); 19880 } 19881 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 19882 } else { 19883 Val = 19884 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), 19885 Val, DAG.getVectorIdxConstant(0, DL)); 19886 } 19887 Parts[0] = Val; 19888 return true; 19889 } 19890 } 19891 return false; 19892 } 19893 19894 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( 19895 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 19896 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { 19897 bool IsABIRegCopy = CC.has_value(); 19898 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && 19899 PartVT == MVT::f32) { 19900 SDValue Val = Parts[0]; 19901 19902 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16. 19903 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); 19904 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val); 19905 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 19906 return Val; 19907 } 19908 19909 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 19910 LLVMContext &Context = *DAG.getContext(); 19911 SDValue Val = Parts[0]; 19912 EVT ValueEltVT = ValueVT.getVectorElementType(); 19913 EVT PartEltVT = PartVT.getVectorElementType(); 19914 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); 19915 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); 19916 if (PartVTBitSize % ValueVTBitSize == 0) { 19917 assert(PartVTBitSize >= ValueVTBitSize); 19918 EVT SameEltTypeVT = ValueVT; 19919 // If the element types are different, convert it to the same element type 19920 // of PartVT. 19921 // Give an example here, we want copy a <vscale x 1 x i8> value from 19922 // <vscale x 4 x i16>. 19923 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first, 19924 // then we can extract <vscale x 1 x i8>. 19925 if (ValueEltVT != PartEltVT) { 19926 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); 19927 assert(Count != 0 && "The number of element should not be zero."); 19928 SameEltTypeVT = 19929 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); 19930 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); 19931 } 19932 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, 19933 DAG.getVectorIdxConstant(0, DL)); 19934 return Val; 19935 } 19936 } 19937 return SDValue(); 19938 } 19939 19940 bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { 19941 // When aggressively optimizing for code size, we prefer to use a div 19942 // instruction, as it is usually smaller than the alternative sequence. 19943 // TODO: Add vector division? 19944 bool OptSize = Attr.hasFnAttr(Attribute::MinSize); 19945 return OptSize && !VT.isVector(); 19946 } 19947 19948 bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const { 19949 // Scalarize zero_ext and sign_ext might stop match to widening instruction in 19950 // some situation. 19951 unsigned Opc = N->getOpcode(); 19952 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND) 19953 return false; 19954 return true; 19955 } 19956 19957 static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) { 19958 Module *M = IRB.GetInsertBlock()->getParent()->getParent(); 19959 Function *ThreadPointerFunc = 19960 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); 19961 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(), 19962 IRB.CreateCall(ThreadPointerFunc), Offset); 19963 } 19964 19965 Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { 19966 // Fuchsia provides a fixed TLS slot for the stack cookie. 19967 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. 19968 if (Subtarget.isTargetFuchsia()) 19969 return useTpOffset(IRB, -0x10); 19970 19971 return TargetLowering::getIRStackGuard(IRB); 19972 } 19973 19974 bool RISCVTargetLowering::isLegalInterleavedAccessType( 19975 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, 19976 const DataLayout &DL) const { 19977 EVT VT = getValueType(DL, VTy); 19978 // Don't lower vlseg/vsseg for vector types that can't be split. 19979 if (!isTypeLegal(VT)) 19980 return false; 19981 19982 if (!isLegalElementTypeForRVV(VT.getScalarType()) || 19983 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace, 19984 Alignment)) 19985 return false; 19986 19987 MVT ContainerVT = VT.getSimpleVT(); 19988 19989 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { 19990 if (!Subtarget.useRVVForFixedLengthVectors()) 19991 return false; 19992 // Sometimes the interleaved access pass picks up splats as interleaves of 19993 // one element. Don't lower these. 19994 if (FVTy->getNumElements() < 2) 19995 return false; 19996 19997 ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT()); 19998 } 19999 20000 // Need to make sure that EMUL * NFIELDS ≤ 8 20001 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT)); 20002 if (Fractional) 20003 return true; 20004 return Factor * LMUL <= 8; 20005 } 20006 20007 bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, 20008 Align Alignment) const { 20009 if (!Subtarget.hasVInstructions()) 20010 return false; 20011 20012 // Only support fixed vectors if we know the minimum vector size. 20013 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors()) 20014 return false; 20015 20016 EVT ScalarType = DataType.getScalarType(); 20017 if (!isLegalElementTypeForRVV(ScalarType)) 20018 return false; 20019 20020 if (!Subtarget.hasFastUnalignedAccess() && 20021 Alignment < ScalarType.getStoreSize()) 20022 return false; 20023 20024 return true; 20025 } 20026 20027 static const Intrinsic::ID FixedVlsegIntrIds[] = { 20028 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load, 20029 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load, 20030 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load, 20031 Intrinsic::riscv_seg8_load}; 20032 20033 /// Lower an interleaved load into a vlsegN intrinsic. 20034 /// 20035 /// E.g. Lower an interleaved load (Factor = 2): 20036 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr 20037 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements 20038 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements 20039 /// 20040 /// Into: 20041 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64( 20042 /// %ptr, i64 4) 20043 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 20044 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 20045 bool RISCVTargetLowering::lowerInterleavedLoad( 20046 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, 20047 ArrayRef<unsigned> Indices, unsigned Factor) const { 20048 IRBuilder<> Builder(LI); 20049 20050 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType()); 20051 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(), 20052 LI->getPointerAddressSpace(), 20053 LI->getModule()->getDataLayout())) 20054 return false; 20055 20056 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); 20057 20058 Function *VlsegNFunc = 20059 Intrinsic::getDeclaration(LI->getModule(), FixedVlsegIntrIds[Factor - 2], 20060 {VTy, LI->getPointerOperandType(), XLenTy}); 20061 20062 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); 20063 20064 CallInst *VlsegN = 20065 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL}); 20066 20067 for (unsigned i = 0; i < Shuffles.size(); i++) { 20068 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]); 20069 Shuffles[i]->replaceAllUsesWith(SubVec); 20070 } 20071 20072 return true; 20073 } 20074 20075 static const Intrinsic::ID FixedVssegIntrIds[] = { 20076 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store, 20077 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store, 20078 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store, 20079 Intrinsic::riscv_seg8_store}; 20080 20081 /// Lower an interleaved store into a vssegN intrinsic. 20082 /// 20083 /// E.g. Lower an interleaved store (Factor = 3): 20084 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, 20085 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> 20086 /// store <12 x i32> %i.vec, <12 x i32>* %ptr 20087 /// 20088 /// Into: 20089 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> 20090 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> 20091 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> 20092 /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2, 20093 /// %ptr, i32 4) 20094 /// 20095 /// Note that the new shufflevectors will be removed and we'll only generate one 20096 /// vsseg3 instruction in CodeGen. 20097 bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, 20098 ShuffleVectorInst *SVI, 20099 unsigned Factor) const { 20100 IRBuilder<> Builder(SI); 20101 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType()); 20102 // Given SVI : <n*factor x ty>, then VTy : <n x ty> 20103 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(), 20104 ShuffleVTy->getNumElements() / Factor); 20105 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(), 20106 SI->getPointerAddressSpace(), 20107 SI->getModule()->getDataLayout())) 20108 return false; 20109 20110 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); 20111 20112 Function *VssegNFunc = 20113 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2], 20114 {VTy, SI->getPointerOperandType(), XLenTy}); 20115 20116 auto Mask = SVI->getShuffleMask(); 20117 SmallVector<Value *, 10> Ops; 20118 20119 for (unsigned i = 0; i < Factor; i++) { 20120 Value *Shuffle = Builder.CreateShuffleVector( 20121 SVI->getOperand(0), SVI->getOperand(1), 20122 createSequentialMask(Mask[i], VTy->getNumElements(), 0)); 20123 Ops.push_back(Shuffle); 20124 } 20125 // This VL should be OK (should be executable in one vsseg instruction, 20126 // potentially under larger LMULs) because we checked that the fixed vector 20127 // type fits in isLegalInterleavedAccessType 20128 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); 20129 Ops.append({SI->getPointerOperand(), VL}); 20130 20131 Builder.CreateCall(VssegNFunc, Ops); 20132 20133 return true; 20134 } 20135 20136 bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, 20137 LoadInst *LI) const { 20138 assert(LI->isSimple()); 20139 IRBuilder<> Builder(LI); 20140 20141 // Only deinterleave2 supported at present. 20142 if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2) 20143 return false; 20144 20145 unsigned Factor = 2; 20146 20147 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType()); 20148 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0)); 20149 20150 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(), 20151 LI->getPointerAddressSpace(), 20152 LI->getModule()->getDataLayout())) 20153 return false; 20154 20155 Function *VlsegNFunc; 20156 Value *VL; 20157 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); 20158 SmallVector<Value *, 10> Ops; 20159 20160 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { 20161 VlsegNFunc = Intrinsic::getDeclaration( 20162 LI->getModule(), FixedVlsegIntrIds[Factor - 2], 20163 {ResVTy, LI->getPointerOperandType(), XLenTy}); 20164 VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); 20165 } else { 20166 static const Intrinsic::ID IntrIds[] = { 20167 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, 20168 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, 20169 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, 20170 Intrinsic::riscv_vlseg8}; 20171 20172 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2], 20173 {ResVTy, XLenTy}); 20174 VL = Constant::getAllOnesValue(XLenTy); 20175 Ops.append(Factor, PoisonValue::get(ResVTy)); 20176 } 20177 20178 Ops.append({LI->getPointerOperand(), VL}); 20179 20180 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops); 20181 DI->replaceAllUsesWith(Vlseg); 20182 20183 return true; 20184 } 20185 20186 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II, 20187 StoreInst *SI) const { 20188 assert(SI->isSimple()); 20189 IRBuilder<> Builder(SI); 20190 20191 // Only interleave2 supported at present. 20192 if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2) 20193 return false; 20194 20195 unsigned Factor = 2; 20196 20197 VectorType *VTy = cast<VectorType>(II->getType()); 20198 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType()); 20199 20200 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(), 20201 SI->getPointerAddressSpace(), 20202 SI->getModule()->getDataLayout())) 20203 return false; 20204 20205 Function *VssegNFunc; 20206 Value *VL; 20207 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); 20208 20209 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { 20210 VssegNFunc = Intrinsic::getDeclaration( 20211 SI->getModule(), FixedVssegIntrIds[Factor - 2], 20212 {InVTy, SI->getPointerOperandType(), XLenTy}); 20213 VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); 20214 } else { 20215 static const Intrinsic::ID IntrIds[] = { 20216 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, 20217 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, 20218 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, 20219 Intrinsic::riscv_vsseg8}; 20220 20221 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2], 20222 {InVTy, XLenTy}); 20223 VL = Constant::getAllOnesValue(XLenTy); 20224 } 20225 20226 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1), 20227 SI->getPointerOperand(), VL}); 20228 20229 return true; 20230 } 20231 20232 MachineInstr * 20233 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, 20234 MachineBasicBlock::instr_iterator &MBBI, 20235 const TargetInstrInfo *TII) const { 20236 assert(MBBI->isCall() && MBBI->getCFIType() && 20237 "Invalid call instruction for a KCFI check"); 20238 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect}, 20239 MBBI->getOpcode())); 20240 20241 MachineOperand &Target = MBBI->getOperand(0); 20242 Target.setIsRenamable(false); 20243 20244 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK)) 20245 .addReg(Target.getReg()) 20246 .addImm(MBBI->getCFIType()) 20247 .getInstr(); 20248 } 20249 20250 #define GET_REGISTER_MATCHER 20251 #include "RISCVGenAsmMatcher.inc" 20252 20253 Register 20254 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 20255 const MachineFunction &MF) const { 20256 Register Reg = MatchRegisterAltName(RegName); 20257 if (Reg == RISCV::NoRegister) 20258 Reg = MatchRegisterName(RegName); 20259 if (Reg == RISCV::NoRegister) 20260 report_fatal_error( 20261 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 20262 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 20263 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 20264 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 20265 StringRef(RegName) + "\".")); 20266 return Reg; 20267 } 20268 20269 MachineMemOperand::Flags 20270 RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const { 20271 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal); 20272 20273 if (NontemporalInfo == nullptr) 20274 return MachineMemOperand::MONone; 20275 20276 // 1 for default value work as __RISCV_NTLH_ALL 20277 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE 20278 // 3 -> __RISCV_NTLH_ALL_PRIVATE 20279 // 4 -> __RISCV_NTLH_INNERMOST_SHARED 20280 // 5 -> __RISCV_NTLH_ALL 20281 int NontemporalLevel = 5; 20282 const MDNode *RISCVNontemporalInfo = 20283 I.getMetadata("riscv-nontemporal-domain"); 20284 if (RISCVNontemporalInfo != nullptr) 20285 NontemporalLevel = 20286 cast<ConstantInt>( 20287 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0)) 20288 ->getValue()) 20289 ->getZExtValue(); 20290 20291 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) && 20292 "RISC-V target doesn't support this non-temporal domain."); 20293 20294 NontemporalLevel -= 2; 20295 MachineMemOperand::Flags Flags = MachineMemOperand::MONone; 20296 if (NontemporalLevel & 0b1) 20297 Flags |= MONontemporalBit0; 20298 if (NontemporalLevel & 0b10) 20299 Flags |= MONontemporalBit1; 20300 20301 return Flags; 20302 } 20303 20304 MachineMemOperand::Flags 20305 RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const { 20306 20307 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags(); 20308 MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone; 20309 TargetFlags |= (NodeFlags & MONontemporalBit0); 20310 TargetFlags |= (NodeFlags & MONontemporalBit1); 20311 20312 return TargetFlags; 20313 } 20314 20315 bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable( 20316 const MemSDNode &NodeX, const MemSDNode &NodeY) const { 20317 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY); 20318 } 20319 20320 bool RISCVTargetLowering::isCtpopFast(EVT VT) const { 20321 if (VT.isScalableVector()) 20322 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb(); 20323 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb()) 20324 return true; 20325 return Subtarget.hasStdExtZbb() && 20326 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector()); 20327 } 20328 20329 unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT, 20330 ISD::CondCode Cond) const { 20331 return isCtpopFast(VT) ? 0 : 1; 20332 } 20333 20334 bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { 20335 20336 // GISel support is in progress or complete for G_ADD, G_SUB, G_AND, G_OR, and 20337 // G_XOR. 20338 unsigned Op = Inst.getOpcode(); 20339 if (Op == Instruction::Add || Op == Instruction::Sub || 20340 Op == Instruction::And || Op == Instruction::Or || Op == Instruction::Xor) 20341 return false; 20342 20343 if (Inst.getType()->isScalableTy()) 20344 return true; 20345 20346 for (unsigned i = 0; i < Inst.getNumOperands(); ++i) 20347 if (Inst.getOperand(i)->getType()->isScalableTy() && 20348 !isa<ReturnInst>(&Inst)) 20349 return true; 20350 20351 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) { 20352 if (AI->getAllocatedType()->isScalableTy()) 20353 return true; 20354 } 20355 20356 return false; 20357 } 20358 20359 SDValue 20360 RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 20361 SelectionDAG &DAG, 20362 SmallVectorImpl<SDNode *> &Created) const { 20363 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 20364 if (isIntDivCheap(N->getValueType(0), Attr)) 20365 return SDValue(N, 0); // Lower SDIV as SDIV 20366 20367 // Only perform this transform if short forward branch opt is supported. 20368 if (!Subtarget.hasShortForwardBranchOpt()) 20369 return SDValue(); 20370 EVT VT = N->getValueType(0); 20371 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()))) 20372 return SDValue(); 20373 20374 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw. 20375 if (Divisor.sgt(2048) || Divisor.slt(-2048)) 20376 return SDValue(); 20377 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created); 20378 } 20379 20380 bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest( 20381 EVT VT, const APInt &AndMask) const { 20382 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) 20383 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024); 20384 return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask); 20385 } 20386 20387 unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const { 20388 return Subtarget.getMinimumJumpTableEntries(); 20389 } 20390 20391 namespace llvm::RISCVVIntrinsicsTable { 20392 20393 #define GET_RISCVVIntrinsicsTable_IMPL 20394 #include "RISCVGenSearchableTables.inc" 20395 20396 } // namespace llvm::RISCVVIntrinsicsTable 20397