1 //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/MemoryLocation.h" 24 #include "llvm/Analysis/VectorUtils.h" 25 #include "llvm/CodeGen/Analysis.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineJumpTableInfo.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" 32 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 33 #include "llvm/CodeGen/ValueTypes.h" 34 #include "llvm/IR/DiagnosticInfo.h" 35 #include "llvm/IR/DiagnosticPrinter.h" 36 #include "llvm/IR/IRBuilder.h" 37 #include "llvm/IR/Instructions.h" 38 #include "llvm/IR/IntrinsicsRISCV.h" 39 #include "llvm/IR/PatternMatch.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/InstructionCost.h" 44 #include "llvm/Support/KnownBits.h" 45 #include "llvm/Support/MathExtras.h" 46 #include "llvm/Support/raw_ostream.h" 47 #include <optional> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "riscv-lower" 52 53 STATISTIC(NumTailCalls, "Number of tail calls"); 54 55 static cl::opt<unsigned> ExtensionMaxWebSize( 56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden, 57 cl::desc("Give the maximum size (in number of nodes) of the web of " 58 "instructions that we will consider for VW expansion"), 59 cl::init(18)); 60 61 static cl::opt<bool> 62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, 63 cl::desc("Allow the formation of VW_W operations (e.g., " 64 "VWADD_W) with splat constants"), 65 cl::init(false)); 66 67 static cl::opt<unsigned> NumRepeatedDivisors( 68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, 69 cl::desc("Set the minimum number of repetitions of a divisor to allow " 70 "transformation to multiplications by the reciprocal"), 71 cl::init(2)); 72 73 static cl::opt<int> 74 FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, 75 cl::desc("Give the maximum number of instructions that we will " 76 "use for creating a floating-point immediate value"), 77 cl::init(2)); 78 79 static cl::opt<bool> 80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, 81 cl::desc("Make i32 a legal type for SelectionDAG on RV64.")); 82 83 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 84 const RISCVSubtarget &STI) 85 : TargetLowering(TM), Subtarget(STI) { 86 87 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 89 90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 91 !Subtarget.hasStdExtF()) { 92 errs() << "Hard-float 'f' ABI can't be used for a target that " 93 "doesn't support the F instruction set extension (ignoring " 94 "target-abi)\n"; 95 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 97 !Subtarget.hasStdExtD()) { 98 errs() << "Hard-float 'd' ABI can't be used for a target that " 99 "doesn't support the D instruction set extension (ignoring " 100 "target-abi)\n"; 101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 102 } 103 104 switch (ABI) { 105 default: 106 report_fatal_error("Don't know how to lower this ABI"); 107 case RISCVABI::ABI_ILP32: 108 case RISCVABI::ABI_ILP32E: 109 case RISCVABI::ABI_LP64E: 110 case RISCVABI::ABI_ILP32F: 111 case RISCVABI::ABI_ILP32D: 112 case RISCVABI::ABI_LP64: 113 case RISCVABI::ABI_LP64F: 114 case RISCVABI::ABI_LP64D: 115 break; 116 } 117 118 MVT XLenVT = Subtarget.getXLenVT(); 119 120 // Set up the register classes. 121 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 122 if (Subtarget.is64Bit() && RV64LegalI32) 123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass); 124 125 if (Subtarget.hasStdExtZfhmin()) 126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 127 if (Subtarget.hasStdExtZfbfmin()) 128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass); 129 if (Subtarget.hasStdExtF()) 130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 131 if (Subtarget.hasStdExtD()) 132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 133 if (Subtarget.hasStdExtZhinxmin()) 134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass); 135 if (Subtarget.hasStdExtZfinx()) 136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass); 137 if (Subtarget.hasStdExtZdinx()) { 138 if (Subtarget.is64Bit()) 139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass); 140 else 141 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass); 142 } 143 144 static const MVT::SimpleValueType BoolVecVTs[] = { 145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 147 static const MVT::SimpleValueType IntVecVTs[] = { 148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 152 MVT::nxv4i64, MVT::nxv8i64}; 153 static const MVT::SimpleValueType F16VecVTs[] = { 154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 156 static const MVT::SimpleValueType BF16VecVTs[] = { 157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16, 158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16}; 159 static const MVT::SimpleValueType F32VecVTs[] = { 160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 161 static const MVT::SimpleValueType F64VecVTs[] = { 162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 163 164 if (Subtarget.hasVInstructions()) { 165 auto addRegClassForRVV = [this](MVT VT) { 166 // Disable the smallest fractional LMUL types if ELEN is less than 167 // RVVBitsPerBlock. 168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen(); 169 if (VT.getVectorMinNumElements() < MinElts) 170 return; 171 172 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 173 const TargetRegisterClass *RC; 174 if (Size <= RISCV::RVVBitsPerBlock) 175 RC = &RISCV::VRRegClass; 176 else if (Size == 2 * RISCV::RVVBitsPerBlock) 177 RC = &RISCV::VRM2RegClass; 178 else if (Size == 4 * RISCV::RVVBitsPerBlock) 179 RC = &RISCV::VRM4RegClass; 180 else if (Size == 8 * RISCV::RVVBitsPerBlock) 181 RC = &RISCV::VRM8RegClass; 182 else 183 llvm_unreachable("Unexpected size"); 184 185 addRegisterClass(VT, RC); 186 }; 187 188 for (MVT VT : BoolVecVTs) 189 addRegClassForRVV(VT); 190 for (MVT VT : IntVecVTs) { 191 if (VT.getVectorElementType() == MVT::i64 && 192 !Subtarget.hasVInstructionsI64()) 193 continue; 194 addRegClassForRVV(VT); 195 } 196 197 if (Subtarget.hasVInstructionsF16Minimal()) 198 for (MVT VT : F16VecVTs) 199 addRegClassForRVV(VT); 200 201 if (Subtarget.hasVInstructionsBF16()) 202 for (MVT VT : BF16VecVTs) 203 addRegClassForRVV(VT); 204 205 if (Subtarget.hasVInstructionsF32()) 206 for (MVT VT : F32VecVTs) 207 addRegClassForRVV(VT); 208 209 if (Subtarget.hasVInstructionsF64()) 210 for (MVT VT : F64VecVTs) 211 addRegClassForRVV(VT); 212 213 if (Subtarget.useRVVForFixedLengthVectors()) { 214 auto addRegClassForFixedVectors = [this](MVT VT) { 215 MVT ContainerVT = getContainerForFixedLengthVector(VT); 216 unsigned RCID = getRegClassIDForVecVT(ContainerVT); 217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 218 addRegisterClass(VT, TRI.getRegClass(RCID)); 219 }; 220 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 221 if (useRVVForFixedLengthVectorVT(VT)) 222 addRegClassForFixedVectors(VT); 223 224 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 225 if (useRVVForFixedLengthVectorVT(VT)) 226 addRegClassForFixedVectors(VT); 227 } 228 } 229 230 // Compute derived properties from the register classes. 231 computeRegisterProperties(STI.getRegisterInfo()); 232 233 setStackPointerRegisterToSaveRestore(RISCV::X2); 234 235 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT, 236 MVT::i1, Promote); 237 // DAGCombiner can call isLoadExtLegal for types that aren't legal. 238 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, 239 MVT::i1, Promote); 240 241 // TODO: add all necessary setOperationAction calls. 242 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 243 244 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 245 setOperationAction(ISD::BR_CC, XLenVT, Expand); 246 if (RV64LegalI32 && Subtarget.is64Bit()) 247 setOperationAction(ISD::BR_CC, MVT::i32, Expand); 248 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 249 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 250 if (RV64LegalI32 && Subtarget.is64Bit()) 251 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 252 253 if (!Subtarget.hasVendorXCValu()) 254 setCondCodeAction(ISD::SETLE, XLenVT, Expand); 255 setCondCodeAction(ISD::SETGT, XLenVT, Custom); 256 setCondCodeAction(ISD::SETGE, XLenVT, Expand); 257 if (!Subtarget.hasVendorXCValu()) 258 setCondCodeAction(ISD::SETULE, XLenVT, Expand); 259 setCondCodeAction(ISD::SETUGT, XLenVT, Custom); 260 setCondCodeAction(ISD::SETUGE, XLenVT, Expand); 261 262 if (RV64LegalI32 && Subtarget.is64Bit()) 263 setOperationAction(ISD::SETCC, MVT::i32, Promote); 264 265 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); 266 267 setOperationAction(ISD::VASTART, MVT::Other, Custom); 268 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); 269 if (RV64LegalI32 && Subtarget.is64Bit()) 270 setOperationAction(ISD::VAARG, MVT::i32, Promote); 271 272 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 273 274 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 275 276 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb()) 277 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand); 278 279 if (Subtarget.is64Bit()) { 280 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); 281 282 if (!RV64LegalI32) { 283 setOperationAction(ISD::LOAD, MVT::i32, Custom); 284 setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL}, 285 MVT::i32, Custom); 286 setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT}, 287 MVT::i32, Custom); 288 if (!Subtarget.hasStdExtZbb()) 289 setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, MVT::i32, Custom); 290 } else { 291 setOperationAction(ISD::SSUBO, MVT::i32, Custom); 292 if (Subtarget.hasStdExtZbb()) { 293 setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, MVT::i32, Custom); 294 setOperationAction({ISD::UADDSAT, ISD::USUBSAT}, MVT::i32, Custom); 295 } 296 } 297 setOperationAction(ISD::SADDO, MVT::i32, Custom); 298 } 299 if (!Subtarget.hasStdExtZmmul()) { 300 setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand); 301 if (RV64LegalI32 && Subtarget.is64Bit()) 302 setOperationAction(ISD::MUL, MVT::i32, Promote); 303 } else if (Subtarget.is64Bit()) { 304 setOperationAction(ISD::MUL, MVT::i128, Custom); 305 if (!RV64LegalI32) 306 setOperationAction(ISD::MUL, MVT::i32, Custom); 307 else 308 setOperationAction(ISD::SMULO, MVT::i32, Custom); 309 } else { 310 setOperationAction(ISD::MUL, MVT::i64, Custom); 311 } 312 313 if (!Subtarget.hasStdExtM()) { 314 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, 315 XLenVT, Expand); 316 if (RV64LegalI32 && Subtarget.is64Bit()) 317 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32, 318 Promote); 319 } else if (Subtarget.is64Bit()) { 320 if (!RV64LegalI32) 321 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, 322 {MVT::i8, MVT::i16, MVT::i32}, Custom); 323 } 324 325 if (RV64LegalI32 && Subtarget.is64Bit()) { 326 setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand); 327 setOperationAction( 328 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, 329 Expand); 330 } 331 332 setOperationAction( 333 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT, 334 Expand); 335 336 setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT, 337 Custom); 338 339 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) { 340 if (!RV64LegalI32 && Subtarget.is64Bit()) 341 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); 342 } else if (Subtarget.hasVendorXTHeadBb()) { 343 if (Subtarget.is64Bit()) 344 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); 345 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom); 346 } else if (Subtarget.hasVendorXCVbitmanip()) { 347 setOperationAction(ISD::ROTL, XLenVT, Expand); 348 } else { 349 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand); 350 if (RV64LegalI32 && Subtarget.is64Bit()) 351 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand); 352 } 353 354 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 355 // pattern match it directly in isel. 356 setOperationAction(ISD::BSWAP, XLenVT, 357 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || 358 Subtarget.hasVendorXTHeadBb()) 359 ? Legal 360 : Expand); 361 if (RV64LegalI32 && Subtarget.is64Bit()) 362 setOperationAction(ISD::BSWAP, MVT::i32, 363 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || 364 Subtarget.hasVendorXTHeadBb()) 365 ? Promote 366 : Expand); 367 368 369 if (Subtarget.hasVendorXCVbitmanip()) { 370 setOperationAction(ISD::BITREVERSE, XLenVT, Legal); 371 } else { 372 // Zbkb can use rev8+brev8 to implement bitreverse. 373 setOperationAction(ISD::BITREVERSE, XLenVT, 374 Subtarget.hasStdExtZbkb() ? Custom : Expand); 375 } 376 377 if (Subtarget.hasStdExtZbb()) { 378 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT, 379 Legal); 380 if (RV64LegalI32 && Subtarget.is64Bit()) 381 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32, 382 Promote); 383 384 if (Subtarget.is64Bit()) { 385 if (RV64LegalI32) 386 setOperationAction(ISD::CTTZ, MVT::i32, Legal); 387 else 388 setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); 389 } 390 } else if (!Subtarget.hasVendorXCVbitmanip()) { 391 setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand); 392 if (RV64LegalI32 && Subtarget.is64Bit()) 393 setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand); 394 } 395 396 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || 397 Subtarget.hasVendorXCVbitmanip()) { 398 // We need the custom lowering to make sure that the resulting sequence 399 // for the 32bit case is efficient on 64bit targets. 400 if (Subtarget.is64Bit()) { 401 if (RV64LegalI32) { 402 setOperationAction(ISD::CTLZ, MVT::i32, 403 Subtarget.hasStdExtZbb() ? Legal : Promote); 404 if (!Subtarget.hasStdExtZbb()) 405 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); 406 } else 407 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom); 408 } 409 } else { 410 setOperationAction(ISD::CTLZ, XLenVT, Expand); 411 if (RV64LegalI32 && Subtarget.is64Bit()) 412 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 413 } 414 415 if (!RV64LegalI32 && Subtarget.is64Bit() && 416 !Subtarget.hasShortForwardBranchOpt()) 417 setOperationAction(ISD::ABS, MVT::i32, Custom); 418 419 // We can use PseudoCCSUB to implement ABS. 420 if (Subtarget.hasShortForwardBranchOpt()) 421 setOperationAction(ISD::ABS, XLenVT, Legal); 422 423 if (!Subtarget.hasVendorXTHeadCondMov()) { 424 setOperationAction(ISD::SELECT, XLenVT, Custom); 425 if (RV64LegalI32 && Subtarget.is64Bit()) 426 setOperationAction(ISD::SELECT, MVT::i32, Promote); 427 } 428 429 static const unsigned FPLegalNodeTypes[] = { 430 ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT, 431 ISD::LLRINT, ISD::LROUND, ISD::LLROUND, 432 ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND, 433 ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD, 434 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, 435 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS}; 436 437 static const ISD::CondCode FPCCToExpand[] = { 438 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 439 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 440 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 441 442 static const unsigned FPOpToExpand[] = { 443 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, 444 ISD::FREM}; 445 446 static const unsigned FPRndMode[] = { 447 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND, 448 ISD::FROUNDEVEN}; 449 450 if (Subtarget.hasStdExtZfhminOrZhinxmin()) 451 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 452 453 static const unsigned ZfhminZfbfminPromoteOps[] = { 454 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, 455 ISD::FSUB, ISD::FMUL, ISD::FMA, 456 ISD::FDIV, ISD::FSQRT, ISD::FABS, 457 ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD, 458 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, 459 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, 460 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR, 461 ISD::FTRUNC, ISD::FRINT, ISD::FROUND, 462 ISD::FROUNDEVEN, ISD::SELECT}; 463 464 if (Subtarget.hasStdExtZfbfmin()) { 465 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 466 setOperationAction(ISD::BITCAST, MVT::bf16, Custom); 467 setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom); 468 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); 469 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); 470 setOperationAction(ISD::ConstantFP, MVT::bf16, Expand); 471 setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand); 472 setOperationAction(ISD::BR_CC, MVT::bf16, Expand); 473 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote); 474 setOperationAction(ISD::FREM, MVT::bf16, Promote); 475 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the 476 // DAGCombiner::visitFP_ROUND probably needs improvements first. 477 setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand); 478 } 479 480 if (Subtarget.hasStdExtZfhminOrZhinxmin()) { 481 if (Subtarget.hasStdExtZfhOrZhinx()) { 482 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); 483 setOperationAction(FPRndMode, MVT::f16, 484 Subtarget.hasStdExtZfa() ? Legal : Custom); 485 setOperationAction(ISD::SELECT, MVT::f16, Custom); 486 setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom); 487 } else { 488 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote); 489 setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT, 490 ISD::STRICT_LROUND, ISD::STRICT_LLROUND}, 491 MVT::f16, Legal); 492 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the 493 // DAGCombiner::visitFP_ROUND probably needs improvements first. 494 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); 495 } 496 497 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); 498 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); 499 setCondCodeAction(FPCCToExpand, MVT::f16, Expand); 500 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 501 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 502 503 setOperationAction(ISD::FNEARBYINT, MVT::f16, 504 Subtarget.hasStdExtZfa() ? Legal : Promote); 505 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI, 506 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, 507 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, 508 ISD::FLOG10}, 509 MVT::f16, Promote); 510 511 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have 512 // complete support for all operations in LegalizeDAG. 513 setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, 514 ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, 515 ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, 516 ISD::STRICT_FTRUNC}, 517 MVT::f16, Promote); 518 519 // We need to custom promote this. 520 if (Subtarget.is64Bit()) 521 setOperationAction(ISD::FPOWI, MVT::i32, Custom); 522 523 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, 524 Subtarget.hasStdExtZfa() ? Legal : Custom); 525 } 526 527 if (Subtarget.hasStdExtFOrZfinx()) { 528 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal); 529 setOperationAction(FPRndMode, MVT::f32, 530 Subtarget.hasStdExtZfa() ? Legal : Custom); 531 setCondCodeAction(FPCCToExpand, MVT::f32, Expand); 532 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 533 setOperationAction(ISD::SELECT, MVT::f32, Custom); 534 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 535 setOperationAction(FPOpToExpand, MVT::f32, Expand); 536 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 537 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 538 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand); 539 setTruncStoreAction(MVT::f32, MVT::bf16, Expand); 540 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom); 541 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom); 542 setOperationAction(ISD::FP_TO_BF16, MVT::f32, 543 Subtarget.isSoftFPABI() ? LibCall : Custom); 544 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom); 545 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom); 546 547 if (Subtarget.hasStdExtZfa()) { 548 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); 549 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal); 550 } else { 551 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom); 552 } 553 } 554 555 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit()) 556 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 557 558 if (Subtarget.hasStdExtDOrZdinx()) { 559 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal); 560 561 if (!Subtarget.is64Bit()) 562 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 563 564 if (Subtarget.hasStdExtZfa()) { 565 setOperationAction(FPRndMode, MVT::f64, Legal); 566 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); 567 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal); 568 } else { 569 if (Subtarget.is64Bit()) 570 setOperationAction(FPRndMode, MVT::f64, Custom); 571 572 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom); 573 } 574 575 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); 576 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); 577 setCondCodeAction(FPCCToExpand, MVT::f64, Expand); 578 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 579 setOperationAction(ISD::SELECT, MVT::f64, Custom); 580 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 581 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 582 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 583 setOperationAction(FPOpToExpand, MVT::f64, Expand); 584 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 585 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 586 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand); 587 setTruncStoreAction(MVT::f64, MVT::bf16, Expand); 588 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom); 589 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom); 590 setOperationAction(ISD::FP_TO_BF16, MVT::f64, 591 Subtarget.isSoftFPABI() ? LibCall : Custom); 592 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom); 593 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); 594 } 595 596 if (Subtarget.is64Bit()) { 597 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT, 598 ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT}, 599 MVT::i32, Custom); 600 setOperationAction(ISD::LROUND, MVT::i32, Custom); 601 } 602 603 if (Subtarget.hasStdExtFOrZfinx()) { 604 setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT, 605 Custom); 606 607 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, 608 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, 609 XLenVT, Legal); 610 611 if (RV64LegalI32 && Subtarget.is64Bit()) 612 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, 613 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, 614 MVT::i32, Legal); 615 616 setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom); 617 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); 618 } 619 620 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, 621 ISD::JumpTable}, 622 XLenVT, Custom); 623 624 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 625 626 if (Subtarget.is64Bit()) 627 setOperationAction(ISD::Constant, MVT::i64, Custom); 628 629 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present. 630 // Unfortunately this can't be determined just from the ISA naming string. 631 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 632 Subtarget.is64Bit() ? Legal : Custom); 633 setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, 634 Subtarget.is64Bit() ? Legal : Custom); 635 636 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal); 637 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 638 if (Subtarget.is64Bit()) 639 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 640 641 if (Subtarget.hasStdExtZicbop()) { 642 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 643 } 644 645 if (Subtarget.hasStdExtA()) { 646 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 647 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) 648 setMinCmpXchgSizeInBits(8); 649 else 650 setMinCmpXchgSizeInBits(32); 651 } else if (Subtarget.hasForcedAtomics()) { 652 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 653 } else { 654 setMaxAtomicSizeInBitsSupported(0); 655 } 656 657 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 658 659 setBooleanContents(ZeroOrOneBooleanContent); 660 661 if (getTargetMachine().getTargetTriple().isOSLinux()) { 662 // Custom lowering of llvm.clear_cache. 663 setOperationAction(ISD::CLEAR_CACHE, MVT::Other, Custom); 664 } 665 666 if (Subtarget.hasVInstructions()) { 667 setBooleanVectorContents(ZeroOrOneBooleanContent); 668 669 setOperationAction(ISD::VSCALE, XLenVT, Custom); 670 if (RV64LegalI32 && Subtarget.is64Bit()) 671 setOperationAction(ISD::VSCALE, MVT::i32, Custom); 672 673 // RVV intrinsics may have illegal operands. 674 // We also need to custom legalize vmv.x.s. 675 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN, 676 ISD::INTRINSIC_VOID}, 677 {MVT::i8, MVT::i16}, Custom); 678 if (Subtarget.is64Bit()) 679 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, 680 MVT::i32, Custom); 681 else 682 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN}, 683 MVT::i64, Custom); 684 685 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, 686 MVT::Other, Custom); 687 688 static const unsigned IntegerVPOps[] = { 689 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, 690 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM, 691 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, 692 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL, 693 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, 694 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, 695 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, 696 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT, 697 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, 698 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, 699 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, 700 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE, 701 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT, 702 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF, 703 ISD::EXPERIMENTAL_VP_SPLAT}; 704 705 static const unsigned FloatingPointVPOps[] = { 706 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, 707 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, 708 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, 709 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, 710 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP, 711 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND, 712 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, 713 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, 714 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, 715 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, 716 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT, 717 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE, 718 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM, 719 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT}; 720 721 static const unsigned IntegerVecReduceOps[] = { 722 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, 723 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, 724 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}; 725 726 static const unsigned FloatingPointVecReduceOps[] = { 727 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN, 728 ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM}; 729 730 if (!Subtarget.is64Bit()) { 731 // We must custom-lower certain vXi64 operations on RV32 due to the vector 732 // element type being illegal. 733 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, 734 MVT::i64, Custom); 735 736 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom); 737 738 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, 739 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, 740 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, 741 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN}, 742 MVT::i64, Custom); 743 } 744 745 for (MVT VT : BoolVecVTs) { 746 if (!isTypeLegal(VT)) 747 continue; 748 749 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 750 751 // Mask VTs are custom-expanded into a series of standard nodes 752 setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS, 753 ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, 754 ISD::SCALAR_TO_VECTOR}, 755 VT, Custom); 756 757 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 758 Custom); 759 760 setOperationAction(ISD::SELECT, VT, Custom); 761 setOperationAction( 762 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT, 763 Expand); 764 765 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT, 766 Custom); 767 768 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom); 769 770 setOperationAction( 771 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, 772 Custom); 773 774 setOperationAction( 775 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, 776 Custom); 777 778 // RVV has native int->float & float->int conversions where the 779 // element type sizes are within one power-of-two of each other. Any 780 // wider distances between type sizes have to be lowered as sequences 781 // which progressively narrow the gap in stages. 782 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, 783 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, 784 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, 785 ISD::STRICT_FP_TO_UINT}, 786 VT, Custom); 787 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 788 Custom); 789 790 // Expand all extending loads to types larger than this, and truncating 791 // stores from types larger than this. 792 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 793 setTruncStoreAction(VT, OtherVT, Expand); 794 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, 795 OtherVT, Expand); 796 } 797 798 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, 799 ISD::VP_TRUNCATE, ISD::VP_SETCC}, 800 VT, Custom); 801 802 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); 803 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); 804 805 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 806 807 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); 808 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); 809 810 setOperationPromotedToType( 811 ISD::VECTOR_SPLICE, VT, 812 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount())); 813 } 814 815 for (MVT VT : IntVecVTs) { 816 if (!isTypeLegal(VT)) 817 continue; 818 819 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 820 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 821 822 // Vectors implement MULHS/MULHU. 823 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand); 824 825 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*. 826 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) 827 setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand); 828 829 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT, 830 Legal); 831 832 setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom); 833 834 // Custom-lower extensions and truncations from/to mask types. 835 setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, 836 VT, Custom); 837 838 // RVV has native int->float & float->int conversions where the 839 // element type sizes are within one power-of-two of each other. Any 840 // wider distances between type sizes have to be lowered as sequences 841 // which progressively narrow the gap in stages. 842 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, 843 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, 844 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, 845 ISD::STRICT_FP_TO_UINT}, 846 VT, Custom); 847 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 848 Custom); 849 setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, 850 ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT, 851 ISD::SSUBSAT, ISD::USUBSAT}, 852 VT, Legal); 853 854 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 855 // nodes which truncate by one power of two at a time. 856 setOperationAction(ISD::TRUNCATE, VT, Custom); 857 858 // Custom-lower insert/extract operations to simplify patterns. 859 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 860 Custom); 861 862 // Custom-lower reduction operations to set up the corresponding custom 863 // nodes' operands. 864 setOperationAction(IntegerVecReduceOps, VT, Custom); 865 866 setOperationAction(IntegerVPOps, VT, Custom); 867 868 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 869 870 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, 871 VT, Custom); 872 873 setOperationAction( 874 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 875 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, 876 VT, Custom); 877 878 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 879 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, 880 VT, Custom); 881 882 setOperationAction(ISD::SELECT, VT, Custom); 883 setOperationAction(ISD::SELECT_CC, VT, Expand); 884 885 setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom); 886 887 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 888 setTruncStoreAction(VT, OtherVT, Expand); 889 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, 890 OtherVT, Expand); 891 } 892 893 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); 894 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); 895 896 // Splice 897 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); 898 899 if (Subtarget.hasStdExtZvkb()) { 900 setOperationAction(ISD::BSWAP, VT, Legal); 901 setOperationAction(ISD::VP_BSWAP, VT, Custom); 902 } else { 903 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand); 904 setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand); 905 } 906 907 if (Subtarget.hasStdExtZvbb()) { 908 setOperationAction(ISD::BITREVERSE, VT, Legal); 909 setOperationAction(ISD::VP_BITREVERSE, VT, Custom); 910 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, 911 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, 912 VT, Custom); 913 } else { 914 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand); 915 setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand); 916 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, 917 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, 918 VT, Expand); 919 920 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the 921 // range of f32. 922 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 923 if (isTypeLegal(FloatVT)) { 924 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, 925 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ, 926 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF}, 927 VT, Custom); 928 } 929 } 930 } 931 932 // Expand various CCs to best match the RVV ISA, which natively supports UNE 933 // but no other unordered comparisons, and supports all ordered comparisons 934 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 935 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 936 // and we pattern-match those back to the "original", swapping operands once 937 // more. This way we catch both operations and both "vf" and "fv" forms with 938 // fewer patterns. 939 static const ISD::CondCode VFPCCToExpand[] = { 940 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 941 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 942 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 943 }; 944 945 // TODO: support more ops. 946 static const unsigned ZvfhminPromoteOps[] = { 947 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, 948 ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, 949 ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL, 950 ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT, 951 ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM, 952 ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, 953 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}; 954 955 // TODO: support more vp ops. 956 static const unsigned ZvfhminPromoteVPOps[] = { 957 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, 958 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, 959 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, 960 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT, 961 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL, 962 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, 963 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, 964 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM, 965 ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM}; 966 967 // Sets common operation actions on RVV floating-point vector types. 968 const auto SetCommonVFPActions = [&](MVT VT) { 969 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 970 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 971 // sizes are within one power-of-two of each other. Therefore conversions 972 // between vXf16 and vXf64 must be lowered as sequences which convert via 973 // vXf32. 974 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 975 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom); 976 // Custom-lower insert/extract operations to simplify patterns. 977 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 978 Custom); 979 // Expand various condition codes (explained above). 980 setCondCodeAction(VFPCCToExpand, VT, Expand); 981 982 setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal); 983 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom); 984 985 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, 986 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, 987 ISD::IS_FPCLASS}, 988 VT, Custom); 989 990 setOperationAction(FloatingPointVecReduceOps, VT, Custom); 991 992 // Expand FP operations that need libcalls. 993 setOperationAction(ISD::FREM, VT, Expand); 994 setOperationAction(ISD::FPOW, VT, Expand); 995 setOperationAction(ISD::FCOS, VT, Expand); 996 setOperationAction(ISD::FSIN, VT, Expand); 997 setOperationAction(ISD::FSINCOS, VT, Expand); 998 setOperationAction(ISD::FEXP, VT, Expand); 999 setOperationAction(ISD::FEXP2, VT, Expand); 1000 setOperationAction(ISD::FEXP10, VT, Expand); 1001 setOperationAction(ISD::FLOG, VT, Expand); 1002 setOperationAction(ISD::FLOG2, VT, Expand); 1003 setOperationAction(ISD::FLOG10, VT, Expand); 1004 1005 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 1006 1007 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 1008 1009 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, 1010 VT, Custom); 1011 1012 setOperationAction( 1013 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 1014 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, 1015 VT, Custom); 1016 1017 setOperationAction(ISD::SELECT, VT, Custom); 1018 setOperationAction(ISD::SELECT_CC, VT, Expand); 1019 1020 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 1021 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, 1022 VT, Custom); 1023 1024 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); 1025 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); 1026 1027 setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom); 1028 1029 setOperationAction(FloatingPointVPOps, VT, Custom); 1030 1031 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, 1032 Custom); 1033 setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, 1034 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}, 1035 VT, Legal); 1036 setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, 1037 ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL, 1038 ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, 1039 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, 1040 VT, Custom); 1041 }; 1042 1043 // Sets common extload/truncstore actions on RVV floating-point vector 1044 // types. 1045 const auto SetCommonVFPExtLoadTruncStoreActions = 1046 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) { 1047 for (auto SmallVT : SmallerVTs) { 1048 setTruncStoreAction(VT, SmallVT, Expand); 1049 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand); 1050 } 1051 }; 1052 1053 if (Subtarget.hasVInstructionsF16()) { 1054 for (MVT VT : F16VecVTs) { 1055 if (!isTypeLegal(VT)) 1056 continue; 1057 SetCommonVFPActions(VT); 1058 } 1059 } else if (Subtarget.hasVInstructionsF16Minimal()) { 1060 for (MVT VT : F16VecVTs) { 1061 if (!isTypeLegal(VT)) 1062 continue; 1063 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 1064 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, 1065 Custom); 1066 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); 1067 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, 1068 Custom); 1069 setOperationAction(ISD::SELECT_CC, VT, Expand); 1070 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, 1071 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, 1072 VT, Custom); 1073 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 1074 ISD::EXTRACT_SUBVECTOR}, 1075 VT, Custom); 1076 if (Subtarget.hasStdExtZfhmin()) 1077 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 1078 // load/store 1079 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 1080 1081 // Custom split nxv32f16 since nxv32f32 if not legal. 1082 if (VT == MVT::nxv32f16) { 1083 setOperationAction(ZvfhminPromoteOps, VT, Custom); 1084 setOperationAction(ZvfhminPromoteVPOps, VT, Custom); 1085 continue; 1086 } 1087 // Add more promote ops. 1088 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1089 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); 1090 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); 1091 } 1092 } 1093 1094 // TODO: Could we merge some code with zvfhmin? 1095 if (Subtarget.hasVInstructionsBF16()) { 1096 for (MVT VT : BF16VecVTs) { 1097 if (!isTypeLegal(VT)) 1098 continue; 1099 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 1100 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); 1101 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, 1102 Custom); 1103 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 1104 ISD::EXTRACT_SUBVECTOR}, 1105 VT, Custom); 1106 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 1107 if (Subtarget.hasStdExtZfbfmin()) 1108 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 1109 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, 1110 Custom); 1111 setOperationAction(ISD::SELECT_CC, VT, Expand); 1112 // TODO: Promote to fp32. 1113 } 1114 } 1115 1116 if (Subtarget.hasVInstructionsF32()) { 1117 for (MVT VT : F32VecVTs) { 1118 if (!isTypeLegal(VT)) 1119 continue; 1120 SetCommonVFPActions(VT); 1121 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 1122 } 1123 } 1124 1125 if (Subtarget.hasVInstructionsF64()) { 1126 for (MVT VT : F64VecVTs) { 1127 if (!isTypeLegal(VT)) 1128 continue; 1129 SetCommonVFPActions(VT); 1130 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 1131 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); 1132 } 1133 } 1134 1135 if (Subtarget.useRVVForFixedLengthVectors()) { 1136 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 1137 if (!useRVVForFixedLengthVectorVT(VT)) 1138 continue; 1139 1140 // By default everything must be expanded. 1141 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 1142 setOperationAction(Op, VT, Expand); 1143 for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { 1144 setTruncStoreAction(VT, OtherVT, Expand); 1145 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, 1146 OtherVT, Expand); 1147 } 1148 1149 // Custom lower fixed vector undefs to scalable vector undefs to avoid 1150 // expansion to a build_vector of 0s. 1151 setOperationAction(ISD::UNDEF, VT, Custom); 1152 1153 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 1154 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, 1155 Custom); 1156 1157 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT, 1158 Custom); 1159 1160 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, 1161 VT, Custom); 1162 1163 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 1164 1165 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 1166 1167 setOperationAction(ISD::SETCC, VT, Custom); 1168 1169 setOperationAction(ISD::SELECT, VT, Custom); 1170 1171 setOperationAction(ISD::TRUNCATE, VT, Custom); 1172 1173 setOperationAction(ISD::BITCAST, VT, Custom); 1174 1175 setOperationAction( 1176 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, 1177 Custom); 1178 1179 setOperationAction( 1180 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, 1181 Custom); 1182 1183 setOperationAction( 1184 { 1185 ISD::SINT_TO_FP, 1186 ISD::UINT_TO_FP, 1187 ISD::FP_TO_SINT, 1188 ISD::FP_TO_UINT, 1189 ISD::STRICT_SINT_TO_FP, 1190 ISD::STRICT_UINT_TO_FP, 1191 ISD::STRICT_FP_TO_SINT, 1192 ISD::STRICT_FP_TO_UINT, 1193 }, 1194 VT, Custom); 1195 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 1196 Custom); 1197 1198 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 1199 1200 // Operations below are different for between masks and other vectors. 1201 if (VT.getVectorElementType() == MVT::i1) { 1202 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND, 1203 ISD::OR, ISD::XOR}, 1204 VT, Custom); 1205 1206 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, 1207 ISD::VP_SETCC, ISD::VP_TRUNCATE}, 1208 VT, Custom); 1209 1210 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); 1211 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); 1212 continue; 1213 } 1214 1215 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to 1216 // it before type legalization for i64 vectors on RV32. It will then be 1217 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle. 1218 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs 1219 // improvements first. 1220 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { 1221 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 1222 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 1223 } 1224 1225 setOperationAction( 1226 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom); 1227 1228 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, 1229 ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 1230 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, 1231 ISD::VP_SCATTER}, 1232 VT, Custom); 1233 1234 setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR, 1235 ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV, 1236 ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL}, 1237 VT, Custom); 1238 1239 setOperationAction( 1240 {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom); 1241 1242 setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom); 1243 1244 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*. 1245 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) 1246 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); 1247 1248 setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, 1249 ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT, 1250 ISD::SSUBSAT, ISD::USUBSAT}, 1251 VT, Custom); 1252 1253 setOperationAction(ISD::VSELECT, VT, Custom); 1254 1255 setOperationAction( 1256 {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom); 1257 1258 // Custom-lower reduction operations to set up the corresponding custom 1259 // nodes' operands. 1260 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX, 1261 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX, 1262 ISD::VECREDUCE_UMIN}, 1263 VT, Custom); 1264 1265 setOperationAction(IntegerVPOps, VT, Custom); 1266 1267 if (Subtarget.hasStdExtZvkb()) 1268 setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom); 1269 1270 if (Subtarget.hasStdExtZvbb()) { 1271 setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, 1272 ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP}, 1273 VT, Custom); 1274 } else { 1275 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the 1276 // range of f32. 1277 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1278 if (isTypeLegal(FloatVT)) 1279 setOperationAction( 1280 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, 1281 Custom); 1282 } 1283 } 1284 1285 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 1286 // There are no extending loads or truncating stores. 1287 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) { 1288 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 1289 setTruncStoreAction(VT, InnerVT, Expand); 1290 } 1291 1292 if (!useRVVForFixedLengthVectorVT(VT)) 1293 continue; 1294 1295 // By default everything must be expanded. 1296 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 1297 setOperationAction(Op, VT, Expand); 1298 1299 // Custom lower fixed vector undefs to scalable vector undefs to avoid 1300 // expansion to a build_vector of 0s. 1301 setOperationAction(ISD::UNDEF, VT, Custom); 1302 1303 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 1304 ISD::EXTRACT_SUBVECTOR}, 1305 VT, Custom); 1306 1307 // FIXME: mload, mstore, mgather, mscatter, vp_load/store, 1308 // vp_stride_load/store, vp_gather/scatter can be hoisted to here. 1309 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 1310 1311 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 1312 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, 1313 Custom); 1314 1315 if (VT.getVectorElementType() == MVT::f16 && 1316 !Subtarget.hasVInstructionsF16()) { 1317 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); 1318 setOperationAction( 1319 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, 1320 Custom); 1321 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, 1322 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, 1323 VT, Custom); 1324 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 1325 if (Subtarget.hasStdExtZfhmin()) { 1326 // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR. 1327 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 1328 } else { 1329 // We need to custom legalize f16 build vectors if Zfhmin isn't 1330 // available. 1331 setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom); 1332 } 1333 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1334 // Don't promote f16 vector operations to f32 if f32 vector type is 1335 // not legal. 1336 // TODO: could split the f16 vector into two vectors and do promotion. 1337 if (!isTypeLegal(F32VecVT)) 1338 continue; 1339 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); 1340 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); 1341 continue; 1342 } 1343 1344 if (VT.getVectorElementType() == MVT::bf16) { 1345 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); 1346 // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR. 1347 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 1348 setOperationAction( 1349 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, 1350 Custom); 1351 // TODO: Promote to fp32. 1352 continue; 1353 } 1354 1355 setOperationAction({ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, 1356 ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, 1357 VT, Custom); 1358 1359 setOperationAction( 1360 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom); 1361 1362 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, 1363 ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 1364 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, 1365 ISD::VP_SCATTER}, 1366 VT, Custom); 1367 1368 setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, 1369 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT, 1370 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM, 1371 ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM}, 1372 VT, Custom); 1373 1374 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, 1375 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT}, 1376 VT, Custom); 1377 1378 setCondCodeAction(VFPCCToExpand, VT, Expand); 1379 1380 setOperationAction(ISD::SETCC, VT, Custom); 1381 setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom); 1382 1383 setOperationAction(ISD::BITCAST, VT, Custom); 1384 1385 setOperationAction(FloatingPointVecReduceOps, VT, Custom); 1386 1387 setOperationAction(FloatingPointVPOps, VT, Custom); 1388 1389 setOperationAction( 1390 {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, 1391 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA, 1392 ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC, 1393 ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, 1394 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, 1395 VT, Custom); 1396 } 1397 1398 // Custom-legalize bitcasts from fixed-length vectors to scalar types. 1399 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom); 1400 if (Subtarget.is64Bit()) 1401 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 1402 if (Subtarget.hasStdExtZfhminOrZhinxmin()) 1403 setOperationAction(ISD::BITCAST, MVT::f16, Custom); 1404 if (Subtarget.hasStdExtFOrZfinx()) 1405 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 1406 if (Subtarget.hasStdExtDOrZdinx()) 1407 setOperationAction(ISD::BITCAST, MVT::f64, Custom); 1408 } 1409 } 1410 1411 if (Subtarget.hasStdExtA()) { 1412 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand); 1413 if (RV64LegalI32 && Subtarget.is64Bit()) 1414 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 1415 } 1416 1417 if (Subtarget.hasForcedAtomics()) { 1418 // Force __sync libcalls to be emitted for atomic rmw/cas operations. 1419 setOperationAction( 1420 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD, 1421 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR, 1422 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN, 1423 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX}, 1424 XLenVT, LibCall); 1425 } 1426 1427 if (Subtarget.hasVendorXTHeadMemIdx()) { 1428 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) { 1429 setIndexedLoadAction(im, MVT::i8, Legal); 1430 setIndexedStoreAction(im, MVT::i8, Legal); 1431 setIndexedLoadAction(im, MVT::i16, Legal); 1432 setIndexedStoreAction(im, MVT::i16, Legal); 1433 setIndexedLoadAction(im, MVT::i32, Legal); 1434 setIndexedStoreAction(im, MVT::i32, Legal); 1435 1436 if (Subtarget.is64Bit()) { 1437 setIndexedLoadAction(im, MVT::i64, Legal); 1438 setIndexedStoreAction(im, MVT::i64, Legal); 1439 } 1440 } 1441 } 1442 1443 if (Subtarget.hasVendorXCVmem()) { 1444 setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); 1445 setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); 1446 setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); 1447 1448 setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal); 1449 setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal); 1450 setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); 1451 } 1452 1453 if (Subtarget.hasVendorXCValu()) { 1454 setOperationAction(ISD::ABS, XLenVT, Legal); 1455 setOperationAction(ISD::SMIN, XLenVT, Legal); 1456 setOperationAction(ISD::UMIN, XLenVT, Legal); 1457 setOperationAction(ISD::SMAX, XLenVT, Legal); 1458 setOperationAction(ISD::UMAX, XLenVT, Legal); 1459 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal); 1460 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); 1461 } 1462 1463 // Function alignments. 1464 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4); 1465 setMinFunctionAlignment(FunctionAlignment); 1466 // Set preferred alignments. 1467 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); 1468 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); 1469 1470 setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, 1471 ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL, 1472 ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); 1473 if (Subtarget.is64Bit()) 1474 setTargetDAGCombine(ISD::SRA); 1475 1476 if (Subtarget.hasStdExtFOrZfinx()) 1477 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM}); 1478 1479 if (Subtarget.hasStdExtZbb()) 1480 setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}); 1481 1482 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) || 1483 Subtarget.hasStdExtV()) 1484 setTargetDAGCombine(ISD::TRUNCATE); 1485 1486 if (Subtarget.hasStdExtZbkb()) 1487 setTargetDAGCombine(ISD::BITREVERSE); 1488 if (Subtarget.hasStdExtZfhminOrZhinxmin()) 1489 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); 1490 if (Subtarget.hasStdExtFOrZfinx()) 1491 setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, 1492 ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}); 1493 if (Subtarget.hasVInstructions()) 1494 setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, 1495 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL, 1496 ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR, 1497 ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, 1498 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL, 1499 ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, 1500 ISD::INSERT_VECTOR_ELT, ISD::ABS}); 1501 if (Subtarget.hasVendorXTHeadMemPair()) 1502 setTargetDAGCombine({ISD::LOAD, ISD::STORE}); 1503 if (Subtarget.useRVVForFixedLengthVectors()) 1504 setTargetDAGCombine(ISD::BITCAST); 1505 1506 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); 1507 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); 1508 1509 // Disable strict node mutation. 1510 IsStrictFPEnabled = true; 1511 1512 // Let the subtarget decide if a predictable select is more expensive than the 1513 // corresponding branch. This information is used in CGP/SelectOpt to decide 1514 // when to convert selects into branches. 1515 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive(); 1516 } 1517 1518 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 1519 LLVMContext &Context, 1520 EVT VT) const { 1521 if (!VT.isVector()) 1522 return getPointerTy(DL); 1523 if (Subtarget.hasVInstructions() && 1524 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 1525 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 1526 return VT.changeVectorElementTypeToInteger(); 1527 } 1528 1529 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const { 1530 return Subtarget.getXLenVT(); 1531 } 1532 1533 // Return false if we can lower get_vector_length to a vsetvli intrinsic. 1534 bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT, 1535 unsigned VF, 1536 bool IsScalable) const { 1537 if (!Subtarget.hasVInstructions()) 1538 return true; 1539 1540 if (!IsScalable) 1541 return true; 1542 1543 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT()) 1544 return true; 1545 1546 // Don't allow VF=1 if those types are't legal. 1547 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen()) 1548 return true; 1549 1550 // VLEN=32 support is incomplete. 1551 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) 1552 return true; 1553 1554 // The maximum VF is for the smallest element width with LMUL=8. 1555 // VF must be a power of 2. 1556 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8; 1557 return VF > MaxVF || !isPowerOf2_32(VF); 1558 } 1559 1560 bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const { 1561 return !Subtarget.hasVInstructions() || 1562 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT); 1563 } 1564 1565 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 1566 const CallInst &I, 1567 MachineFunction &MF, 1568 unsigned Intrinsic) const { 1569 auto &DL = I.getDataLayout(); 1570 1571 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore, 1572 bool IsUnitStrided, bool UsePtrVal = false) { 1573 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN; 1574 // We can't use ptrVal if the intrinsic can access memory before the 1575 // pointer. This means we can't use it for strided or indexed intrinsics. 1576 if (UsePtrVal) 1577 Info.ptrVal = I.getArgOperand(PtrOp); 1578 else 1579 Info.fallbackAddressSpace = 1580 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace(); 1581 Type *MemTy; 1582 if (IsStore) { 1583 // Store value is the first operand. 1584 MemTy = I.getArgOperand(0)->getType(); 1585 } else { 1586 // Use return type. If it's segment load, return type is a struct. 1587 MemTy = I.getType(); 1588 if (MemTy->isStructTy()) 1589 MemTy = MemTy->getStructElementType(0); 1590 } 1591 if (!IsUnitStrided) 1592 MemTy = MemTy->getScalarType(); 1593 1594 Info.memVT = getValueType(DL, MemTy); 1595 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8); 1596 Info.size = MemoryLocation::UnknownSize; 1597 Info.flags |= 1598 IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; 1599 return true; 1600 }; 1601 1602 if (I.hasMetadata(LLVMContext::MD_nontemporal)) 1603 Info.flags |= MachineMemOperand::MONonTemporal; 1604 1605 Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I); 1606 switch (Intrinsic) { 1607 default: 1608 return false; 1609 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 1610 case Intrinsic::riscv_masked_atomicrmw_add_i32: 1611 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 1612 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 1613 case Intrinsic::riscv_masked_atomicrmw_max_i32: 1614 case Intrinsic::riscv_masked_atomicrmw_min_i32: 1615 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 1616 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 1617 case Intrinsic::riscv_masked_cmpxchg_i32: 1618 Info.opc = ISD::INTRINSIC_W_CHAIN; 1619 Info.memVT = MVT::i32; 1620 Info.ptrVal = I.getArgOperand(0); 1621 Info.offset = 0; 1622 Info.align = Align(4); 1623 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 1624 MachineMemOperand::MOVolatile; 1625 return true; 1626 case Intrinsic::riscv_masked_strided_load: 1627 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false, 1628 /*IsUnitStrided*/ false); 1629 case Intrinsic::riscv_masked_strided_store: 1630 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true, 1631 /*IsUnitStrided*/ false); 1632 case Intrinsic::riscv_seg2_load: 1633 case Intrinsic::riscv_seg3_load: 1634 case Intrinsic::riscv_seg4_load: 1635 case Intrinsic::riscv_seg5_load: 1636 case Intrinsic::riscv_seg6_load: 1637 case Intrinsic::riscv_seg7_load: 1638 case Intrinsic::riscv_seg8_load: 1639 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false, 1640 /*IsUnitStrided*/ false, /*UsePtrVal*/ true); 1641 case Intrinsic::riscv_seg2_store: 1642 case Intrinsic::riscv_seg3_store: 1643 case Intrinsic::riscv_seg4_store: 1644 case Intrinsic::riscv_seg5_store: 1645 case Intrinsic::riscv_seg6_store: 1646 case Intrinsic::riscv_seg7_store: 1647 case Intrinsic::riscv_seg8_store: 1648 // Operands are (vec, ..., vec, ptr, vl) 1649 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, 1650 /*IsStore*/ true, 1651 /*IsUnitStrided*/ false, /*UsePtrVal*/ true); 1652 case Intrinsic::riscv_vle: 1653 case Intrinsic::riscv_vle_mask: 1654 case Intrinsic::riscv_vleff: 1655 case Intrinsic::riscv_vleff_mask: 1656 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1657 /*IsStore*/ false, 1658 /*IsUnitStrided*/ true, 1659 /*UsePtrVal*/ true); 1660 case Intrinsic::riscv_vse: 1661 case Intrinsic::riscv_vse_mask: 1662 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1663 /*IsStore*/ true, 1664 /*IsUnitStrided*/ true, 1665 /*UsePtrVal*/ true); 1666 case Intrinsic::riscv_vlse: 1667 case Intrinsic::riscv_vlse_mask: 1668 case Intrinsic::riscv_vloxei: 1669 case Intrinsic::riscv_vloxei_mask: 1670 case Intrinsic::riscv_vluxei: 1671 case Intrinsic::riscv_vluxei_mask: 1672 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1673 /*IsStore*/ false, 1674 /*IsUnitStrided*/ false); 1675 case Intrinsic::riscv_vsse: 1676 case Intrinsic::riscv_vsse_mask: 1677 case Intrinsic::riscv_vsoxei: 1678 case Intrinsic::riscv_vsoxei_mask: 1679 case Intrinsic::riscv_vsuxei: 1680 case Intrinsic::riscv_vsuxei_mask: 1681 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1682 /*IsStore*/ true, 1683 /*IsUnitStrided*/ false); 1684 case Intrinsic::riscv_vlseg2: 1685 case Intrinsic::riscv_vlseg3: 1686 case Intrinsic::riscv_vlseg4: 1687 case Intrinsic::riscv_vlseg5: 1688 case Intrinsic::riscv_vlseg6: 1689 case Intrinsic::riscv_vlseg7: 1690 case Intrinsic::riscv_vlseg8: 1691 case Intrinsic::riscv_vlseg2ff: 1692 case Intrinsic::riscv_vlseg3ff: 1693 case Intrinsic::riscv_vlseg4ff: 1694 case Intrinsic::riscv_vlseg5ff: 1695 case Intrinsic::riscv_vlseg6ff: 1696 case Intrinsic::riscv_vlseg7ff: 1697 case Intrinsic::riscv_vlseg8ff: 1698 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, 1699 /*IsStore*/ false, 1700 /*IsUnitStrided*/ false, /*UsePtrVal*/ true); 1701 case Intrinsic::riscv_vlseg2_mask: 1702 case Intrinsic::riscv_vlseg3_mask: 1703 case Intrinsic::riscv_vlseg4_mask: 1704 case Intrinsic::riscv_vlseg5_mask: 1705 case Intrinsic::riscv_vlseg6_mask: 1706 case Intrinsic::riscv_vlseg7_mask: 1707 case Intrinsic::riscv_vlseg8_mask: 1708 case Intrinsic::riscv_vlseg2ff_mask: 1709 case Intrinsic::riscv_vlseg3ff_mask: 1710 case Intrinsic::riscv_vlseg4ff_mask: 1711 case Intrinsic::riscv_vlseg5ff_mask: 1712 case Intrinsic::riscv_vlseg6ff_mask: 1713 case Intrinsic::riscv_vlseg7ff_mask: 1714 case Intrinsic::riscv_vlseg8ff_mask: 1715 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, 1716 /*IsStore*/ false, 1717 /*IsUnitStrided*/ false, /*UsePtrVal*/ true); 1718 case Intrinsic::riscv_vlsseg2: 1719 case Intrinsic::riscv_vlsseg3: 1720 case Intrinsic::riscv_vlsseg4: 1721 case Intrinsic::riscv_vlsseg5: 1722 case Intrinsic::riscv_vlsseg6: 1723 case Intrinsic::riscv_vlsseg7: 1724 case Intrinsic::riscv_vlsseg8: 1725 case Intrinsic::riscv_vloxseg2: 1726 case Intrinsic::riscv_vloxseg3: 1727 case Intrinsic::riscv_vloxseg4: 1728 case Intrinsic::riscv_vloxseg5: 1729 case Intrinsic::riscv_vloxseg6: 1730 case Intrinsic::riscv_vloxseg7: 1731 case Intrinsic::riscv_vloxseg8: 1732 case Intrinsic::riscv_vluxseg2: 1733 case Intrinsic::riscv_vluxseg3: 1734 case Intrinsic::riscv_vluxseg4: 1735 case Intrinsic::riscv_vluxseg5: 1736 case Intrinsic::riscv_vluxseg6: 1737 case Intrinsic::riscv_vluxseg7: 1738 case Intrinsic::riscv_vluxseg8: 1739 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, 1740 /*IsStore*/ false, 1741 /*IsUnitStrided*/ false); 1742 case Intrinsic::riscv_vlsseg2_mask: 1743 case Intrinsic::riscv_vlsseg3_mask: 1744 case Intrinsic::riscv_vlsseg4_mask: 1745 case Intrinsic::riscv_vlsseg5_mask: 1746 case Intrinsic::riscv_vlsseg6_mask: 1747 case Intrinsic::riscv_vlsseg7_mask: 1748 case Intrinsic::riscv_vlsseg8_mask: 1749 case Intrinsic::riscv_vloxseg2_mask: 1750 case Intrinsic::riscv_vloxseg3_mask: 1751 case Intrinsic::riscv_vloxseg4_mask: 1752 case Intrinsic::riscv_vloxseg5_mask: 1753 case Intrinsic::riscv_vloxseg6_mask: 1754 case Intrinsic::riscv_vloxseg7_mask: 1755 case Intrinsic::riscv_vloxseg8_mask: 1756 case Intrinsic::riscv_vluxseg2_mask: 1757 case Intrinsic::riscv_vluxseg3_mask: 1758 case Intrinsic::riscv_vluxseg4_mask: 1759 case Intrinsic::riscv_vluxseg5_mask: 1760 case Intrinsic::riscv_vluxseg6_mask: 1761 case Intrinsic::riscv_vluxseg7_mask: 1762 case Intrinsic::riscv_vluxseg8_mask: 1763 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5, 1764 /*IsStore*/ false, 1765 /*IsUnitStrided*/ false); 1766 case Intrinsic::riscv_vsseg2: 1767 case Intrinsic::riscv_vsseg3: 1768 case Intrinsic::riscv_vsseg4: 1769 case Intrinsic::riscv_vsseg5: 1770 case Intrinsic::riscv_vsseg6: 1771 case Intrinsic::riscv_vsseg7: 1772 case Intrinsic::riscv_vsseg8: 1773 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, 1774 /*IsStore*/ true, 1775 /*IsUnitStrided*/ false); 1776 case Intrinsic::riscv_vsseg2_mask: 1777 case Intrinsic::riscv_vsseg3_mask: 1778 case Intrinsic::riscv_vsseg4_mask: 1779 case Intrinsic::riscv_vsseg5_mask: 1780 case Intrinsic::riscv_vsseg6_mask: 1781 case Intrinsic::riscv_vsseg7_mask: 1782 case Intrinsic::riscv_vsseg8_mask: 1783 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, 1784 /*IsStore*/ true, 1785 /*IsUnitStrided*/ false); 1786 case Intrinsic::riscv_vssseg2: 1787 case Intrinsic::riscv_vssseg3: 1788 case Intrinsic::riscv_vssseg4: 1789 case Intrinsic::riscv_vssseg5: 1790 case Intrinsic::riscv_vssseg6: 1791 case Intrinsic::riscv_vssseg7: 1792 case Intrinsic::riscv_vssseg8: 1793 case Intrinsic::riscv_vsoxseg2: 1794 case Intrinsic::riscv_vsoxseg3: 1795 case Intrinsic::riscv_vsoxseg4: 1796 case Intrinsic::riscv_vsoxseg5: 1797 case Intrinsic::riscv_vsoxseg6: 1798 case Intrinsic::riscv_vsoxseg7: 1799 case Intrinsic::riscv_vsoxseg8: 1800 case Intrinsic::riscv_vsuxseg2: 1801 case Intrinsic::riscv_vsuxseg3: 1802 case Intrinsic::riscv_vsuxseg4: 1803 case Intrinsic::riscv_vsuxseg5: 1804 case Intrinsic::riscv_vsuxseg6: 1805 case Intrinsic::riscv_vsuxseg7: 1806 case Intrinsic::riscv_vsuxseg8: 1807 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, 1808 /*IsStore*/ true, 1809 /*IsUnitStrided*/ false); 1810 case Intrinsic::riscv_vssseg2_mask: 1811 case Intrinsic::riscv_vssseg3_mask: 1812 case Intrinsic::riscv_vssseg4_mask: 1813 case Intrinsic::riscv_vssseg5_mask: 1814 case Intrinsic::riscv_vssseg6_mask: 1815 case Intrinsic::riscv_vssseg7_mask: 1816 case Intrinsic::riscv_vssseg8_mask: 1817 case Intrinsic::riscv_vsoxseg2_mask: 1818 case Intrinsic::riscv_vsoxseg3_mask: 1819 case Intrinsic::riscv_vsoxseg4_mask: 1820 case Intrinsic::riscv_vsoxseg5_mask: 1821 case Intrinsic::riscv_vsoxseg6_mask: 1822 case Intrinsic::riscv_vsoxseg7_mask: 1823 case Intrinsic::riscv_vsoxseg8_mask: 1824 case Intrinsic::riscv_vsuxseg2_mask: 1825 case Intrinsic::riscv_vsuxseg3_mask: 1826 case Intrinsic::riscv_vsuxseg4_mask: 1827 case Intrinsic::riscv_vsuxseg5_mask: 1828 case Intrinsic::riscv_vsuxseg6_mask: 1829 case Intrinsic::riscv_vsuxseg7_mask: 1830 case Intrinsic::riscv_vsuxseg8_mask: 1831 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, 1832 /*IsStore*/ true, 1833 /*IsUnitStrided*/ false); 1834 } 1835 } 1836 1837 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 1838 const AddrMode &AM, Type *Ty, 1839 unsigned AS, 1840 Instruction *I) const { 1841 // No global is ever allowed as a base. 1842 if (AM.BaseGV) 1843 return false; 1844 1845 // RVV instructions only support register addressing. 1846 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty)) 1847 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs; 1848 1849 // Require a 12-bit signed offset. 1850 if (!isInt<12>(AM.BaseOffs)) 1851 return false; 1852 1853 switch (AM.Scale) { 1854 case 0: // "r+i" or just "i", depending on HasBaseReg. 1855 break; 1856 case 1: 1857 if (!AM.HasBaseReg) // allow "r+i". 1858 break; 1859 return false; // disallow "r+r" or "r+r+i". 1860 default: 1861 return false; 1862 } 1863 1864 return true; 1865 } 1866 1867 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 1868 return isInt<12>(Imm); 1869 } 1870 1871 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 1872 return isInt<12>(Imm); 1873 } 1874 1875 // On RV32, 64-bit integers are split into their high and low parts and held 1876 // in two different registers, so the trunc is free since the low register can 1877 // just be used. 1878 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of 1879 // isTruncateFree? 1880 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 1881 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 1882 return false; 1883 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 1884 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 1885 return (SrcBits == 64 && DestBits == 32); 1886 } 1887 1888 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 1889 // We consider i64->i32 free on RV64 since we have good selection of W 1890 // instructions that make promoting operations back to i64 free in many cases. 1891 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || 1892 !DstVT.isInteger()) 1893 return false; 1894 unsigned SrcBits = SrcVT.getSizeInBits(); 1895 unsigned DestBits = DstVT.getSizeInBits(); 1896 return (SrcBits == 64 && DestBits == 32); 1897 } 1898 1899 bool RISCVTargetLowering::isTruncateFree(SDValue Val, EVT VT2) const { 1900 EVT SrcVT = Val.getValueType(); 1901 // free truncate from vnsrl and vnsra 1902 if (Subtarget.hasStdExtV() && 1903 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) && 1904 SrcVT.isVector() && VT2.isVector()) { 1905 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits(); 1906 unsigned DestBits = VT2.getVectorElementType().getSizeInBits(); 1907 if (SrcBits == DestBits * 2) { 1908 return true; 1909 } 1910 } 1911 return TargetLowering::isTruncateFree(Val, VT2); 1912 } 1913 1914 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 1915 // Zexts are free if they can be combined with a load. 1916 // Don't advertise i32->i64 zextload as being free for RV64. It interacts 1917 // poorly with type legalization of compares preferring sext. 1918 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 1919 EVT MemVT = LD->getMemoryVT(); 1920 if ((MemVT == MVT::i8 || MemVT == MVT::i16) && 1921 (LD->getExtensionType() == ISD::NON_EXTLOAD || 1922 LD->getExtensionType() == ISD::ZEXTLOAD)) 1923 return true; 1924 } 1925 1926 return TargetLowering::isZExtFree(Val, VT2); 1927 } 1928 1929 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 1930 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 1931 } 1932 1933 bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const { 1934 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32); 1935 } 1936 1937 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { 1938 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip(); 1939 } 1940 1941 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { 1942 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || 1943 Subtarget.hasVendorXCVbitmanip(); 1944 } 1945 1946 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( 1947 const Instruction &AndI) const { 1948 // We expect to be able to match a bit extraction instruction if the Zbs 1949 // extension is supported and the mask is a power of two. However, we 1950 // conservatively return false if the mask would fit in an ANDI instruction, 1951 // on the basis that it's possible the sinking+duplication of the AND in 1952 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction 1953 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ). 1954 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs()) 1955 return false; 1956 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1)); 1957 if (!Mask) 1958 return false; 1959 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2(); 1960 } 1961 1962 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const { 1963 EVT VT = Y.getValueType(); 1964 1965 // FIXME: Support vectors once we have tests. 1966 if (VT.isVector()) 1967 return false; 1968 1969 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && 1970 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque()); 1971 } 1972 1973 bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const { 1974 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test. 1975 if (Subtarget.hasStdExtZbs()) 1976 return X.getValueType().isScalarInteger(); 1977 auto *C = dyn_cast<ConstantSDNode>(Y); 1978 // XTheadBs provides th.tst (similar to bexti), if Y is a constant 1979 if (Subtarget.hasVendorXTHeadBs()) 1980 return C != nullptr; 1981 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position. 1982 return C && C->getAPIntValue().ule(10); 1983 } 1984 1985 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode, 1986 EVT VT) const { 1987 // Only enable for rvv. 1988 if (!VT.isVector() || !Subtarget.hasVInstructions()) 1989 return false; 1990 1991 if (VT.isFixedLengthVector() && !isTypeLegal(VT)) 1992 return false; 1993 1994 return true; 1995 } 1996 1997 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 1998 Type *Ty) const { 1999 assert(Ty->isIntegerTy()); 2000 2001 unsigned BitSize = Ty->getIntegerBitWidth(); 2002 if (BitSize > Subtarget.getXLen()) 2003 return false; 2004 2005 // Fast path, assume 32-bit immediates are cheap. 2006 int64_t Val = Imm.getSExtValue(); 2007 if (isInt<32>(Val)) 2008 return true; 2009 2010 // A constant pool entry may be more aligned thant he load we're trying to 2011 // replace. If we don't support unaligned scalar mem, prefer the constant 2012 // pool. 2013 // TODO: Can the caller pass down the alignment? 2014 if (!Subtarget.enableUnalignedScalarMem()) 2015 return true; 2016 2017 // Prefer to keep the load if it would require many instructions. 2018 // This uses the same threshold we use for constant pools but doesn't 2019 // check useConstantPoolForLargeInts. 2020 // TODO: Should we keep the load only when we're definitely going to emit a 2021 // constant pool? 2022 2023 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget); 2024 return Seq.size() <= Subtarget.getMaxBuildIntsCost(); 2025 } 2026 2027 bool RISCVTargetLowering:: 2028 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 2029 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 2030 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 2031 SelectionDAG &DAG) const { 2032 // One interesting pattern that we'd want to form is 'bit extract': 2033 // ((1 >> Y) & 1) ==/!= 0 2034 // But we also need to be careful not to try to reverse that fold. 2035 2036 // Is this '((1 >> Y) & 1)'? 2037 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne()) 2038 return false; // Keep the 'bit extract' pattern. 2039 2040 // Will this be '((1 >> Y) & 1)' after the transform? 2041 if (NewShiftOpcode == ISD::SRL && CC->isOne()) 2042 return true; // Do form the 'bit extract' pattern. 2043 2044 // If 'X' is a constant, and we transform, then we will immediately 2045 // try to undo the fold, thus causing endless combine loop. 2046 // So only do the transform if X is not a constant. This matches the default 2047 // implementation of this function. 2048 return !XC; 2049 } 2050 2051 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const { 2052 switch (Opcode) { 2053 case Instruction::Add: 2054 case Instruction::Sub: 2055 case Instruction::Mul: 2056 case Instruction::And: 2057 case Instruction::Or: 2058 case Instruction::Xor: 2059 case Instruction::FAdd: 2060 case Instruction::FSub: 2061 case Instruction::FMul: 2062 case Instruction::FDiv: 2063 case Instruction::ICmp: 2064 case Instruction::FCmp: 2065 return true; 2066 case Instruction::Shl: 2067 case Instruction::LShr: 2068 case Instruction::AShr: 2069 case Instruction::UDiv: 2070 case Instruction::SDiv: 2071 case Instruction::URem: 2072 case Instruction::SRem: 2073 case Instruction::Select: 2074 return Operand == 1; 2075 default: 2076 return false; 2077 } 2078 } 2079 2080 2081 bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const { 2082 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) 2083 return false; 2084 2085 if (canSplatOperand(I->getOpcode(), Operand)) 2086 return true; 2087 2088 auto *II = dyn_cast<IntrinsicInst>(I); 2089 if (!II) 2090 return false; 2091 2092 switch (II->getIntrinsicID()) { 2093 case Intrinsic::fma: 2094 case Intrinsic::vp_fma: 2095 return Operand == 0 || Operand == 1; 2096 case Intrinsic::vp_shl: 2097 case Intrinsic::vp_lshr: 2098 case Intrinsic::vp_ashr: 2099 case Intrinsic::vp_udiv: 2100 case Intrinsic::vp_sdiv: 2101 case Intrinsic::vp_urem: 2102 case Intrinsic::vp_srem: 2103 case Intrinsic::ssub_sat: 2104 case Intrinsic::vp_ssub_sat: 2105 case Intrinsic::usub_sat: 2106 case Intrinsic::vp_usub_sat: 2107 return Operand == 1; 2108 // These intrinsics are commutative. 2109 case Intrinsic::vp_add: 2110 case Intrinsic::vp_mul: 2111 case Intrinsic::vp_and: 2112 case Intrinsic::vp_or: 2113 case Intrinsic::vp_xor: 2114 case Intrinsic::vp_fadd: 2115 case Intrinsic::vp_fmul: 2116 case Intrinsic::vp_icmp: 2117 case Intrinsic::vp_fcmp: 2118 case Intrinsic::smin: 2119 case Intrinsic::vp_smin: 2120 case Intrinsic::umin: 2121 case Intrinsic::vp_umin: 2122 case Intrinsic::smax: 2123 case Intrinsic::vp_smax: 2124 case Intrinsic::umax: 2125 case Intrinsic::vp_umax: 2126 case Intrinsic::sadd_sat: 2127 case Intrinsic::vp_sadd_sat: 2128 case Intrinsic::uadd_sat: 2129 case Intrinsic::vp_uadd_sat: 2130 // These intrinsics have 'vr' versions. 2131 case Intrinsic::vp_sub: 2132 case Intrinsic::vp_fsub: 2133 case Intrinsic::vp_fdiv: 2134 return Operand == 0 || Operand == 1; 2135 default: 2136 return false; 2137 } 2138 } 2139 2140 /// Check if sinking \p I's operands to I's basic block is profitable, because 2141 /// the operands can be folded into a target instruction, e.g. 2142 /// splats of scalars can fold into vector instructions. 2143 bool RISCVTargetLowering::shouldSinkOperands( 2144 Instruction *I, SmallVectorImpl<Use *> &Ops) const { 2145 using namespace llvm::PatternMatch; 2146 2147 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) 2148 return false; 2149 2150 // Don't sink splat operands if the target prefers it. Some targets requires 2151 // S2V transfer buffers and we can run out of them copying the same value 2152 // repeatedly. 2153 // FIXME: It could still be worth doing if it would improve vector register 2154 // pressure and prevent a vector spill. 2155 if (!Subtarget.sinkSplatOperands()) 2156 return false; 2157 2158 for (auto OpIdx : enumerate(I->operands())) { 2159 if (!canSplatOperand(I, OpIdx.index())) 2160 continue; 2161 2162 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get()); 2163 // Make sure we are not already sinking this operand 2164 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; })) 2165 continue; 2166 2167 // We are looking for a splat that can be sunk. 2168 if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), 2169 m_Undef(), m_ZeroMask()))) 2170 continue; 2171 2172 // Don't sink i1 splats. 2173 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1)) 2174 continue; 2175 2176 // All uses of the shuffle should be sunk to avoid duplicating it across gpr 2177 // and vector registers 2178 for (Use &U : Op->uses()) { 2179 Instruction *Insn = cast<Instruction>(U.getUser()); 2180 if (!canSplatOperand(Insn, U.getOperandNo())) 2181 return false; 2182 } 2183 2184 Ops.push_back(&Op->getOperandUse(0)); 2185 Ops.push_back(&OpIdx.value()); 2186 } 2187 return true; 2188 } 2189 2190 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { 2191 unsigned Opc = VecOp.getOpcode(); 2192 2193 // Assume target opcodes can't be scalarized. 2194 // TODO - do we have any exceptions? 2195 if (Opc >= ISD::BUILTIN_OP_END) 2196 return false; 2197 2198 // If the vector op is not supported, try to convert to scalar. 2199 EVT VecVT = VecOp.getValueType(); 2200 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) 2201 return true; 2202 2203 // If the vector op is supported, but the scalar op is not, the transform may 2204 // not be worthwhile. 2205 // Permit a vector binary operation can be converted to scalar binary 2206 // operation which is custom lowered with illegal type. 2207 EVT ScalarVT = VecVT.getScalarType(); 2208 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) || 2209 isOperationCustom(Opc, ScalarVT); 2210 } 2211 2212 bool RISCVTargetLowering::isOffsetFoldingLegal( 2213 const GlobalAddressSDNode *GA) const { 2214 // In order to maximise the opportunity for common subexpression elimination, 2215 // keep a separate ADD node for the global address offset instead of folding 2216 // it in the global address node. Later peephole optimisations may choose to 2217 // fold it back in when profitable. 2218 return false; 2219 } 2220 2221 // Return one of the followings: 2222 // (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value. 2223 // (2) `{0-31 value, true}` if Imm is negative and FLI is available for its 2224 // positive counterpart, which will be materialized from the first returned 2225 // element. The second returned element indicated that there should be a FNEG 2226 // followed. 2227 // (3) `{-1, _}` if there is no way FLI can be used to materialize Imm. 2228 std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, 2229 EVT VT) const { 2230 if (!Subtarget.hasStdExtZfa()) 2231 return std::make_pair(-1, false); 2232 2233 bool IsSupportedVT = false; 2234 if (VT == MVT::f16) { 2235 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh(); 2236 } else if (VT == MVT::f32) { 2237 IsSupportedVT = true; 2238 } else if (VT == MVT::f64) { 2239 assert(Subtarget.hasStdExtD() && "Expect D extension"); 2240 IsSupportedVT = true; 2241 } 2242 2243 if (!IsSupportedVT) 2244 return std::make_pair(-1, false); 2245 2246 int Index = RISCVLoadFPImm::getLoadFPImm(Imm); 2247 if (Index < 0 && Imm.isNegative()) 2248 // Try the combination of its positive counterpart + FNEG. 2249 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true); 2250 else 2251 return std::make_pair(Index, false); 2252 } 2253 2254 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 2255 bool ForCodeSize) const { 2256 bool IsLegalVT = false; 2257 if (VT == MVT::f16) 2258 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin(); 2259 else if (VT == MVT::f32) 2260 IsLegalVT = Subtarget.hasStdExtFOrZfinx(); 2261 else if (VT == MVT::f64) 2262 IsLegalVT = Subtarget.hasStdExtDOrZdinx(); 2263 else if (VT == MVT::bf16) 2264 IsLegalVT = Subtarget.hasStdExtZfbfmin(); 2265 2266 if (!IsLegalVT) 2267 return false; 2268 2269 if (getLegalZfaFPImm(Imm, VT).first >= 0) 2270 return true; 2271 2272 // Cannot create a 64 bit floating-point immediate value for rv32. 2273 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) { 2274 // td can handle +0.0 or -0.0 already. 2275 // -0.0 can be created by fmv + fneg. 2276 return Imm.isZero(); 2277 } 2278 2279 // Special case: fmv + fneg 2280 if (Imm.isNegZero()) 2281 return true; 2282 2283 // Building an integer and then converting requires a fmv at the end of 2284 // the integer sequence. 2285 const int Cost = 2286 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(), 2287 Subtarget); 2288 return Cost <= FPImmCost; 2289 } 2290 2291 // TODO: This is very conservative. 2292 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 2293 unsigned Index) const { 2294 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) 2295 return false; 2296 2297 // Only support extracting a fixed from a fixed vector for now. 2298 if (ResVT.isScalableVector() || SrcVT.isScalableVector()) 2299 return false; 2300 2301 EVT EltVT = ResVT.getVectorElementType(); 2302 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node"); 2303 2304 // The smallest type we can slide is i8. 2305 // TODO: We can extract index 0 from a mask vector without a slide. 2306 if (EltVT == MVT::i1) 2307 return false; 2308 2309 unsigned ResElts = ResVT.getVectorNumElements(); 2310 unsigned SrcElts = SrcVT.getVectorNumElements(); 2311 2312 unsigned MinVLen = Subtarget.getRealMinVLen(); 2313 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits(); 2314 2315 // If we're extracting only data from the first VLEN bits of the source 2316 // then we can always do this with an m1 vslidedown.vx. Restricting the 2317 // Index ensures we can use a vslidedown.vi. 2318 // TODO: We can generalize this when the exact VLEN is known. 2319 if (Index + ResElts <= MinVLMAX && Index < 31) 2320 return true; 2321 2322 // Convervatively only handle extracting half of a vector. 2323 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be 2324 // a cheap extract. However, this case is important in practice for 2325 // shuffled extracts of longer vectors. How resolve? 2326 if ((ResElts * 2) != SrcElts) 2327 return false; 2328 2329 // Slide can support arbitrary index, but we only treat vslidedown.vi as 2330 // cheap. 2331 if (Index >= 32) 2332 return false; 2333 2334 // TODO: We can do arbitrary slidedowns, but for now only support extracting 2335 // the upper half of a vector until we have more test coverage. 2336 return Index == 0 || Index == ResElts; 2337 } 2338 2339 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, 2340 CallingConv::ID CC, 2341 EVT VT) const { 2342 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. 2343 // We might still end up using a GPR but that will be decided based on ABI. 2344 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && 2345 !Subtarget.hasStdExtZfhminOrZhinxmin()) 2346 return MVT::f32; 2347 2348 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); 2349 2350 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32) 2351 return MVT::i64; 2352 2353 return PartVT; 2354 } 2355 2356 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, 2357 CallingConv::ID CC, 2358 EVT VT) const { 2359 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. 2360 // We might still end up using a GPR but that will be decided based on ABI. 2361 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && 2362 !Subtarget.hasStdExtZfhminOrZhinxmin()) 2363 return 1; 2364 2365 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); 2366 } 2367 2368 unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv( 2369 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 2370 unsigned &NumIntermediates, MVT &RegisterVT) const { 2371 unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv( 2372 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT); 2373 2374 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32) 2375 IntermediateVT = MVT::i64; 2376 2377 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32) 2378 RegisterVT = MVT::i64; 2379 2380 return NumRegs; 2381 } 2382 2383 // Changes the condition code and swaps operands if necessary, so the SetCC 2384 // operation matches one of the comparisons supported directly by branches 2385 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare 2386 // with 1/-1. 2387 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, 2388 ISD::CondCode &CC, SelectionDAG &DAG) { 2389 // If this is a single bit test that can't be handled by ANDI, shift the 2390 // bit to be tested to the MSB and perform a signed compare with 0. 2391 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) && 2392 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() && 2393 isa<ConstantSDNode>(LHS.getOperand(1))) { 2394 uint64_t Mask = LHS.getConstantOperandVal(1); 2395 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) { 2396 unsigned ShAmt = 0; 2397 if (isPowerOf2_64(Mask)) { 2398 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; 2399 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask); 2400 } else { 2401 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask); 2402 } 2403 2404 LHS = LHS.getOperand(0); 2405 if (ShAmt != 0) 2406 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS, 2407 DAG.getConstant(ShAmt, DL, LHS.getValueType())); 2408 return; 2409 } 2410 } 2411 2412 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 2413 int64_t C = RHSC->getSExtValue(); 2414 switch (CC) { 2415 default: break; 2416 case ISD::SETGT: 2417 // Convert X > -1 to X >= 0. 2418 if (C == -1) { 2419 RHS = DAG.getConstant(0, DL, RHS.getValueType()); 2420 CC = ISD::SETGE; 2421 return; 2422 } 2423 break; 2424 case ISD::SETLT: 2425 // Convert X < 1 to 0 >= X. 2426 if (C == 1) { 2427 RHS = LHS; 2428 LHS = DAG.getConstant(0, DL, RHS.getValueType()); 2429 CC = ISD::SETGE; 2430 return; 2431 } 2432 break; 2433 } 2434 } 2435 2436 switch (CC) { 2437 default: 2438 break; 2439 case ISD::SETGT: 2440 case ISD::SETLE: 2441 case ISD::SETUGT: 2442 case ISD::SETULE: 2443 CC = ISD::getSetCCSwappedOperands(CC); 2444 std::swap(LHS, RHS); 2445 break; 2446 } 2447 } 2448 2449 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) { 2450 assert(VT.isScalableVector() && "Expecting a scalable vector type"); 2451 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 2452 if (VT.getVectorElementType() == MVT::i1) 2453 KnownSize *= 8; 2454 2455 switch (KnownSize) { 2456 default: 2457 llvm_unreachable("Invalid LMUL."); 2458 case 8: 2459 return RISCVII::VLMUL::LMUL_F8; 2460 case 16: 2461 return RISCVII::VLMUL::LMUL_F4; 2462 case 32: 2463 return RISCVII::VLMUL::LMUL_F2; 2464 case 64: 2465 return RISCVII::VLMUL::LMUL_1; 2466 case 128: 2467 return RISCVII::VLMUL::LMUL_2; 2468 case 256: 2469 return RISCVII::VLMUL::LMUL_4; 2470 case 512: 2471 return RISCVII::VLMUL::LMUL_8; 2472 } 2473 } 2474 2475 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) { 2476 switch (LMul) { 2477 default: 2478 llvm_unreachable("Invalid LMUL."); 2479 case RISCVII::VLMUL::LMUL_F8: 2480 case RISCVII::VLMUL::LMUL_F4: 2481 case RISCVII::VLMUL::LMUL_F2: 2482 case RISCVII::VLMUL::LMUL_1: 2483 return RISCV::VRRegClassID; 2484 case RISCVII::VLMUL::LMUL_2: 2485 return RISCV::VRM2RegClassID; 2486 case RISCVII::VLMUL::LMUL_4: 2487 return RISCV::VRM4RegClassID; 2488 case RISCVII::VLMUL::LMUL_8: 2489 return RISCV::VRM8RegClassID; 2490 } 2491 } 2492 2493 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 2494 RISCVII::VLMUL LMUL = getLMUL(VT); 2495 if (LMUL == RISCVII::VLMUL::LMUL_F8 || 2496 LMUL == RISCVII::VLMUL::LMUL_F4 || 2497 LMUL == RISCVII::VLMUL::LMUL_F2 || 2498 LMUL == RISCVII::VLMUL::LMUL_1) { 2499 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 2500 "Unexpected subreg numbering"); 2501 return RISCV::sub_vrm1_0 + Index; 2502 } 2503 if (LMUL == RISCVII::VLMUL::LMUL_2) { 2504 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 2505 "Unexpected subreg numbering"); 2506 return RISCV::sub_vrm2_0 + Index; 2507 } 2508 if (LMUL == RISCVII::VLMUL::LMUL_4) { 2509 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 2510 "Unexpected subreg numbering"); 2511 return RISCV::sub_vrm4_0 + Index; 2512 } 2513 llvm_unreachable("Invalid vector type."); 2514 } 2515 2516 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 2517 if (VT.getVectorElementType() == MVT::i1) 2518 return RISCV::VRRegClassID; 2519 return getRegClassIDForLMUL(getLMUL(VT)); 2520 } 2521 2522 // Attempt to decompose a subvector insert/extract between VecVT and 2523 // SubVecVT via subregister indices. Returns the subregister index that 2524 // can perform the subvector insert/extract with the given element index, as 2525 // well as the index corresponding to any leftover subvectors that must be 2526 // further inserted/extracted within the register class for SubVecVT. 2527 std::pair<unsigned, unsigned> 2528 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2529 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 2530 const RISCVRegisterInfo *TRI) { 2531 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 2532 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 2533 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 2534 "Register classes not ordered"); 2535 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 2536 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 2537 // Try to compose a subregister index that takes us from the incoming 2538 // LMUL>1 register class down to the outgoing one. At each step we half 2539 // the LMUL: 2540 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 2541 // Note that this is not guaranteed to find a subregister index, such as 2542 // when we are extracting from one VR type to another. 2543 unsigned SubRegIdx = RISCV::NoSubRegister; 2544 for (const unsigned RCID : 2545 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 2546 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 2547 VecVT = VecVT.getHalfNumVectorElementsVT(); 2548 bool IsHi = 2549 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 2550 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 2551 getSubregIndexByMVT(VecVT, IsHi)); 2552 if (IsHi) 2553 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 2554 } 2555 return {SubRegIdx, InsertExtractIdx}; 2556 } 2557 2558 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar 2559 // stores for those types. 2560 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const { 2561 return !Subtarget.useRVVForFixedLengthVectors() || 2562 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1); 2563 } 2564 2565 bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const { 2566 if (!ScalarTy.isSimple()) 2567 return false; 2568 switch (ScalarTy.getSimpleVT().SimpleTy) { 2569 case MVT::iPTR: 2570 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true; 2571 case MVT::i8: 2572 case MVT::i16: 2573 case MVT::i32: 2574 return true; 2575 case MVT::i64: 2576 return Subtarget.hasVInstructionsI64(); 2577 case MVT::f16: 2578 return Subtarget.hasVInstructionsF16(); 2579 case MVT::f32: 2580 return Subtarget.hasVInstructionsF32(); 2581 case MVT::f64: 2582 return Subtarget.hasVInstructionsF64(); 2583 default: 2584 return false; 2585 } 2586 } 2587 2588 2589 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const { 2590 return NumRepeatedDivisors; 2591 } 2592 2593 static SDValue getVLOperand(SDValue Op) { 2594 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 2595 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 2596 "Unexpected opcode"); 2597 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 2598 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 2599 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2600 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 2601 if (!II) 2602 return SDValue(); 2603 return Op.getOperand(II->VLOperand + 1 + HasChain); 2604 } 2605 2606 static bool useRVVForFixedLengthVectorVT(MVT VT, 2607 const RISCVSubtarget &Subtarget) { 2608 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!"); 2609 if (!Subtarget.useRVVForFixedLengthVectors()) 2610 return false; 2611 2612 // We only support a set of vector types with a consistent maximum fixed size 2613 // across all supported vector element types to avoid legalization issues. 2614 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest 2615 // fixed-length vector type we support is 1024 bytes. 2616 if (VT.getFixedSizeInBits() > 1024 * 8) 2617 return false; 2618 2619 unsigned MinVLen = Subtarget.getRealMinVLen(); 2620 2621 MVT EltVT = VT.getVectorElementType(); 2622 2623 // Don't use RVV for vectors we cannot scalarize if required. 2624 switch (EltVT.SimpleTy) { 2625 // i1 is supported but has different rules. 2626 default: 2627 return false; 2628 case MVT::i1: 2629 // Masks can only use a single register. 2630 if (VT.getVectorNumElements() > MinVLen) 2631 return false; 2632 MinVLen /= 8; 2633 break; 2634 case MVT::i8: 2635 case MVT::i16: 2636 case MVT::i32: 2637 break; 2638 case MVT::i64: 2639 if (!Subtarget.hasVInstructionsI64()) 2640 return false; 2641 break; 2642 case MVT::f16: 2643 if (!Subtarget.hasVInstructionsF16Minimal()) 2644 return false; 2645 break; 2646 case MVT::bf16: 2647 if (!Subtarget.hasVInstructionsBF16()) 2648 return false; 2649 break; 2650 case MVT::f32: 2651 if (!Subtarget.hasVInstructionsF32()) 2652 return false; 2653 break; 2654 case MVT::f64: 2655 if (!Subtarget.hasVInstructionsF64()) 2656 return false; 2657 break; 2658 } 2659 2660 // Reject elements larger than ELEN. 2661 if (EltVT.getSizeInBits() > Subtarget.getELen()) 2662 return false; 2663 2664 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen); 2665 // Don't use RVV for types that don't fit. 2666 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 2667 return false; 2668 2669 // TODO: Perhaps an artificial restriction, but worth having whilst getting 2670 // the base fixed length RVV support in place. 2671 if (!VT.isPow2VectorType()) 2672 return false; 2673 2674 return true; 2675 } 2676 2677 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 2678 return ::useRVVForFixedLengthVectorVT(VT, Subtarget); 2679 } 2680 2681 // Return the largest legal scalable vector type that matches VT's element type. 2682 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, 2683 const RISCVSubtarget &Subtarget) { 2684 // This may be called before legal types are setup. 2685 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) || 2686 useRVVForFixedLengthVectorVT(VT, Subtarget)) && 2687 "Expected legal fixed length vector!"); 2688 2689 unsigned MinVLen = Subtarget.getRealMinVLen(); 2690 unsigned MaxELen = Subtarget.getELen(); 2691 2692 MVT EltVT = VT.getVectorElementType(); 2693 switch (EltVT.SimpleTy) { 2694 default: 2695 llvm_unreachable("unexpected element type for RVV container"); 2696 case MVT::i1: 2697 case MVT::i8: 2698 case MVT::i16: 2699 case MVT::i32: 2700 case MVT::i64: 2701 case MVT::bf16: 2702 case MVT::f16: 2703 case MVT::f32: 2704 case MVT::f64: { 2705 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for 2706 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within 2707 // each fractional LMUL we support SEW between 8 and LMUL*ELEN. 2708 unsigned NumElts = 2709 (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen; 2710 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen); 2711 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts"); 2712 return MVT::getScalableVectorVT(EltVT, NumElts); 2713 } 2714 } 2715 } 2716 2717 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, 2718 const RISCVSubtarget &Subtarget) { 2719 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, 2720 Subtarget); 2721 } 2722 2723 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { 2724 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget()); 2725 } 2726 2727 // Grow V to consume an entire RVV register. 2728 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 2729 const RISCVSubtarget &Subtarget) { 2730 assert(VT.isScalableVector() && 2731 "Expected to convert into a scalable vector!"); 2732 assert(V.getValueType().isFixedLengthVector() && 2733 "Expected a fixed length vector operand!"); 2734 SDLoc DL(V); 2735 SDValue Zero = DAG.getVectorIdxConstant(0, DL); 2736 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 2737 } 2738 2739 // Shrink V so it's just big enough to maintain a VT's worth of data. 2740 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 2741 const RISCVSubtarget &Subtarget) { 2742 assert(VT.isFixedLengthVector() && 2743 "Expected to convert into a fixed length vector!"); 2744 assert(V.getValueType().isScalableVector() && 2745 "Expected a scalable vector operand!"); 2746 SDLoc DL(V); 2747 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2748 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 2749 } 2750 2751 /// Return the type of the mask type suitable for masking the provided 2752 /// vector type. This is simply an i1 element type vector of the same 2753 /// (possibly scalable) length. 2754 static MVT getMaskTypeFor(MVT VecVT) { 2755 assert(VecVT.isVector()); 2756 ElementCount EC = VecVT.getVectorElementCount(); 2757 return MVT::getVectorVT(MVT::i1, EC); 2758 } 2759 2760 /// Creates an all ones mask suitable for masking a vector of type VecTy with 2761 /// vector length VL. . 2762 static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, 2763 SelectionDAG &DAG) { 2764 MVT MaskVT = getMaskTypeFor(VecVT); 2765 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2766 } 2767 2768 static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, 2769 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { 2770 // If we know the exact VLEN, and our VL is exactly equal to VLMAX, 2771 // canonicalize the representation. InsertVSETVLI will pick the immediate 2772 // encoding later if profitable. 2773 const auto [MinVLMAX, MaxVLMAX] = 2774 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); 2775 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX) 2776 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); 2777 2778 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT()); 2779 } 2780 2781 static std::pair<SDValue, SDValue> 2782 getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, 2783 const RISCVSubtarget &Subtarget) { 2784 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 2785 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); 2786 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG); 2787 return {Mask, VL}; 2788 } 2789 2790 static std::pair<SDValue, SDValue> 2791 getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, 2792 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { 2793 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 2794 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget); 2795 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); 2796 return {Mask, VL}; 2797 } 2798 2799 // Gets the two common "VL" operands: an all-ones mask and the vector length. 2800 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 2801 // the vector type that the fixed-length vector is contained in. Otherwise if 2802 // VecVT is scalable, then ContainerVT should be the same as VecVT. 2803 static std::pair<SDValue, SDValue> 2804 getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, 2805 const RISCVSubtarget &Subtarget) { 2806 if (VecVT.isFixedLengthVector()) 2807 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG, 2808 Subtarget); 2809 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 2810 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget); 2811 } 2812 2813 SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL, 2814 SelectionDAG &DAG) const { 2815 assert(VecVT.isScalableVector() && "Expected scalable vector"); 2816 return DAG.getElementCount(DL, Subtarget.getXLenVT(), 2817 VecVT.getVectorElementCount()); 2818 } 2819 2820 std::pair<unsigned, unsigned> 2821 RISCVTargetLowering::computeVLMAXBounds(MVT VecVT, 2822 const RISCVSubtarget &Subtarget) { 2823 assert(VecVT.isScalableVector() && "Expected scalable vector"); 2824 2825 unsigned EltSize = VecVT.getScalarSizeInBits(); 2826 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 2827 2828 unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); 2829 unsigned MaxVLMAX = 2830 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); 2831 2832 unsigned VectorBitsMin = Subtarget.getRealMinVLen(); 2833 unsigned MinVLMAX = 2834 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize); 2835 2836 return std::make_pair(MinVLMAX, MaxVLMAX); 2837 } 2838 2839 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 2840 // of either is (currently) supported. This can get us into an infinite loop 2841 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 2842 // as a ..., etc. 2843 // Until either (or both) of these can reliably lower any node, reporting that 2844 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 2845 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 2846 // which is not desirable. 2847 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( 2848 EVT VT, unsigned DefinedValues) const { 2849 return false; 2850 } 2851 2852 InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const { 2853 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is 2854 // implementation-defined. 2855 if (!VT.isVector()) 2856 return InstructionCost::getInvalid(); 2857 unsigned DLenFactor = Subtarget.getDLenFactor(); 2858 unsigned Cost; 2859 if (VT.isScalableVector()) { 2860 unsigned LMul; 2861 bool Fractional; 2862 std::tie(LMul, Fractional) = 2863 RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT)); 2864 if (Fractional) 2865 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1; 2866 else 2867 Cost = (LMul * DLenFactor); 2868 } else { 2869 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor); 2870 } 2871 return Cost; 2872 } 2873 2874 2875 /// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv 2876 /// is generally quadratic in the number of vreg implied by LMUL. Note that 2877 /// operand (index and possibly mask) are handled separately. 2878 InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const { 2879 return getLMULCost(VT) * getLMULCost(VT); 2880 } 2881 2882 /// Return the cost of a vrgather.vi (or vx) instruction for the type VT. 2883 /// vrgather.vi/vx may be linear in the number of vregs implied by LMUL, 2884 /// or may track the vrgather.vv cost. It is implementation-dependent. 2885 InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const { 2886 return getLMULCost(VT); 2887 } 2888 2889 /// Return the cost of a vslidedown.vx or vslideup.vx instruction 2890 /// for the type VT. (This does not cover the vslide1up or vslide1down 2891 /// variants.) Slides may be linear in the number of vregs implied by LMUL, 2892 /// or may track the vrgather.vv cost. It is implementation-dependent. 2893 InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const { 2894 return getLMULCost(VT); 2895 } 2896 2897 /// Return the cost of a vslidedown.vi or vslideup.vi instruction 2898 /// for the type VT. (This does not cover the vslide1up or vslide1down 2899 /// variants.) Slides may be linear in the number of vregs implied by LMUL, 2900 /// or may track the vrgather.vv cost. It is implementation-dependent. 2901 InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const { 2902 return getLMULCost(VT); 2903 } 2904 2905 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, 2906 const RISCVSubtarget &Subtarget) { 2907 // RISC-V FP-to-int conversions saturate to the destination register size, but 2908 // don't produce 0 for nan. We can use a conversion instruction and fix the 2909 // nan case with a compare and a select. 2910 SDValue Src = Op.getOperand(0); 2911 2912 MVT DstVT = Op.getSimpleValueType(); 2913 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 2914 2915 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; 2916 2917 if (!DstVT.isVector()) { 2918 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate 2919 // the result. 2920 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) || 2921 Src.getValueType() == MVT::bf16) { 2922 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src); 2923 } 2924 2925 unsigned Opc; 2926 if (SatVT == DstVT) 2927 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 2928 else if (DstVT == MVT::i64 && SatVT == MVT::i32) 2929 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 2930 else 2931 return SDValue(); 2932 // FIXME: Support other SatVTs by clamping before or after the conversion. 2933 2934 SDLoc DL(Op); 2935 SDValue FpToInt = DAG.getNode( 2936 Opc, DL, DstVT, Src, 2937 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT())); 2938 2939 if (Opc == RISCVISD::FCVT_WU_RV64) 2940 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); 2941 2942 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); 2943 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, 2944 ISD::CondCode::SETUO); 2945 } 2946 2947 // Vectors. 2948 2949 MVT DstEltVT = DstVT.getVectorElementType(); 2950 MVT SrcVT = Src.getSimpleValueType(); 2951 MVT SrcEltVT = SrcVT.getVectorElementType(); 2952 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 2953 unsigned DstEltSize = DstEltVT.getSizeInBits(); 2954 2955 // Only handle saturating to the destination type. 2956 if (SatVT != DstEltVT) 2957 return SDValue(); 2958 2959 // FIXME: Don't support narrowing by more than 1 steps for now. 2960 if (SrcEltSize > (2 * DstEltSize)) 2961 return SDValue(); 2962 2963 MVT DstContainerVT = DstVT; 2964 MVT SrcContainerVT = SrcVT; 2965 if (DstVT.isFixedLengthVector()) { 2966 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget); 2967 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 2968 assert(DstContainerVT.getVectorElementCount() == 2969 SrcContainerVT.getVectorElementCount() && 2970 "Expected same element count"); 2971 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 2972 } 2973 2974 SDLoc DL(Op); 2975 2976 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget); 2977 2978 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), 2979 {Src, Src, DAG.getCondCode(ISD::SETNE), 2980 DAG.getUNDEF(Mask.getValueType()), Mask, VL}); 2981 2982 // Need to widen by more than 1 step, promote the FP type, then do a widening 2983 // convert. 2984 if (DstEltSize > (2 * SrcEltSize)) { 2985 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!"); 2986 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32); 2987 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL); 2988 } 2989 2990 unsigned RVVOpc = 2991 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; 2992 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL); 2993 2994 SDValue SplatZero = DAG.getNode( 2995 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT), 2996 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL); 2997 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero, 2998 Res, DAG.getUNDEF(DstContainerVT), VL); 2999 3000 if (DstVT.isFixedLengthVector()) 3001 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget); 3002 3003 return Res; 3004 } 3005 3006 static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) { 3007 switch (Opc) { 3008 case ISD::FROUNDEVEN: 3009 case ISD::STRICT_FROUNDEVEN: 3010 case ISD::VP_FROUNDEVEN: 3011 return RISCVFPRndMode::RNE; 3012 case ISD::FTRUNC: 3013 case ISD::STRICT_FTRUNC: 3014 case ISD::VP_FROUNDTOZERO: 3015 return RISCVFPRndMode::RTZ; 3016 case ISD::FFLOOR: 3017 case ISD::STRICT_FFLOOR: 3018 case ISD::VP_FFLOOR: 3019 return RISCVFPRndMode::RDN; 3020 case ISD::FCEIL: 3021 case ISD::STRICT_FCEIL: 3022 case ISD::VP_FCEIL: 3023 return RISCVFPRndMode::RUP; 3024 case ISD::FROUND: 3025 case ISD::STRICT_FROUND: 3026 case ISD::VP_FROUND: 3027 return RISCVFPRndMode::RMM; 3028 case ISD::FRINT: 3029 return RISCVFPRndMode::DYN; 3030 } 3031 3032 return RISCVFPRndMode::Invalid; 3033 } 3034 3035 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND 3036 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to 3037 // the integer domain and back. Taking care to avoid converting values that are 3038 // nan or already correct. 3039 static SDValue 3040 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 3041 const RISCVSubtarget &Subtarget) { 3042 MVT VT = Op.getSimpleValueType(); 3043 assert(VT.isVector() && "Unexpected type"); 3044 3045 SDLoc DL(Op); 3046 3047 SDValue Src = Op.getOperand(0); 3048 3049 MVT ContainerVT = VT; 3050 if (VT.isFixedLengthVector()) { 3051 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3052 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 3053 } 3054 3055 SDValue Mask, VL; 3056 if (Op->isVPOpcode()) { 3057 Mask = Op.getOperand(1); 3058 if (VT.isFixedLengthVector()) 3059 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 3060 Subtarget); 3061 VL = Op.getOperand(2); 3062 } else { 3063 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3064 } 3065 3066 // Freeze the source since we are increasing the number of uses. 3067 Src = DAG.getFreeze(Src); 3068 3069 // We do the conversion on the absolute value and fix the sign at the end. 3070 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL); 3071 3072 // Determine the largest integer that can be represented exactly. This and 3073 // values larger than it don't have any fractional bits so don't need to 3074 // be converted. 3075 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT); 3076 unsigned Precision = APFloat::semanticsPrecision(FltSem); 3077 APFloat MaxVal = APFloat(FltSem); 3078 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 3079 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 3080 SDValue MaxValNode = 3081 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType()); 3082 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, 3083 DAG.getUNDEF(ContainerVT), MaxValNode, VL); 3084 3085 // If abs(Src) was larger than MaxVal or nan, keep it. 3086 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3087 Mask = 3088 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, 3089 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), 3090 Mask, Mask, VL}); 3091 3092 // Truncate to integer and convert back to FP. 3093 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); 3094 MVT XLenVT = Subtarget.getXLenVT(); 3095 SDValue Truncated; 3096 3097 switch (Op.getOpcode()) { 3098 default: 3099 llvm_unreachable("Unexpected opcode"); 3100 case ISD::FCEIL: 3101 case ISD::VP_FCEIL: 3102 case ISD::FFLOOR: 3103 case ISD::VP_FFLOOR: 3104 case ISD::FROUND: 3105 case ISD::FROUNDEVEN: 3106 case ISD::VP_FROUND: 3107 case ISD::VP_FROUNDEVEN: 3108 case ISD::VP_FROUNDTOZERO: { 3109 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 3110 assert(FRM != RISCVFPRndMode::Invalid); 3111 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask, 3112 DAG.getTargetConstant(FRM, DL, XLenVT), VL); 3113 break; 3114 } 3115 case ISD::FTRUNC: 3116 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src, 3117 Mask, VL); 3118 break; 3119 case ISD::FRINT: 3120 case ISD::VP_FRINT: 3121 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL); 3122 break; 3123 case ISD::FNEARBYINT: 3124 case ISD::VP_FNEARBYINT: 3125 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src, 3126 Mask, VL); 3127 break; 3128 } 3129 3130 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. 3131 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL) 3132 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated, 3133 Mask, VL); 3134 3135 // Restore the original sign so that -0.0 is preserved. 3136 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated, 3137 Src, Src, Mask, VL); 3138 3139 if (!VT.isFixedLengthVector()) 3140 return Truncated; 3141 3142 return convertFromScalableVector(VT, Truncated, DAG, Subtarget); 3143 } 3144 3145 // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND 3146 // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to 3147 // qNan and coverting the new source to integer and back to FP. 3148 static SDValue 3149 lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 3150 const RISCVSubtarget &Subtarget) { 3151 SDLoc DL(Op); 3152 MVT VT = Op.getSimpleValueType(); 3153 SDValue Chain = Op.getOperand(0); 3154 SDValue Src = Op.getOperand(1); 3155 3156 MVT ContainerVT = VT; 3157 if (VT.isFixedLengthVector()) { 3158 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3159 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 3160 } 3161 3162 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3163 3164 // Freeze the source since we are increasing the number of uses. 3165 Src = DAG.getFreeze(Src); 3166 3167 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src. 3168 MVT MaskVT = Mask.getSimpleValueType(); 3169 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL, 3170 DAG.getVTList(MaskVT, MVT::Other), 3171 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE), 3172 DAG.getUNDEF(MaskVT), Mask, VL}); 3173 Chain = Unorder.getValue(1); 3174 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL, 3175 DAG.getVTList(ContainerVT, MVT::Other), 3176 {Chain, Src, Src, Src, Unorder, VL}); 3177 Chain = Src.getValue(1); 3178 3179 // We do the conversion on the absolute value and fix the sign at the end. 3180 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL); 3181 3182 // Determine the largest integer that can be represented exactly. This and 3183 // values larger than it don't have any fractional bits so don't need to 3184 // be converted. 3185 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT); 3186 unsigned Precision = APFloat::semanticsPrecision(FltSem); 3187 APFloat MaxVal = APFloat(FltSem); 3188 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 3189 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 3190 SDValue MaxValNode = 3191 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType()); 3192 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, 3193 DAG.getUNDEF(ContainerVT), MaxValNode, VL); 3194 3195 // If abs(Src) was larger than MaxVal or nan, keep it. 3196 Mask = DAG.getNode( 3197 RISCVISD::SETCC_VL, DL, MaskVT, 3198 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL}); 3199 3200 // Truncate to integer and convert back to FP. 3201 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); 3202 MVT XLenVT = Subtarget.getXLenVT(); 3203 SDValue Truncated; 3204 3205 switch (Op.getOpcode()) { 3206 default: 3207 llvm_unreachable("Unexpected opcode"); 3208 case ISD::STRICT_FCEIL: 3209 case ISD::STRICT_FFLOOR: 3210 case ISD::STRICT_FROUND: 3211 case ISD::STRICT_FROUNDEVEN: { 3212 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 3213 assert(FRM != RISCVFPRndMode::Invalid); 3214 Truncated = DAG.getNode( 3215 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other), 3216 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL}); 3217 break; 3218 } 3219 case ISD::STRICT_FTRUNC: 3220 Truncated = 3221 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL, 3222 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL); 3223 break; 3224 case ISD::STRICT_FNEARBYINT: 3225 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL, 3226 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src, 3227 Mask, VL); 3228 break; 3229 } 3230 Chain = Truncated.getValue(1); 3231 3232 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. 3233 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) { 3234 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL, 3235 DAG.getVTList(ContainerVT, MVT::Other), Chain, 3236 Truncated, Mask, VL); 3237 Chain = Truncated.getValue(1); 3238 } 3239 3240 // Restore the original sign so that -0.0 is preserved. 3241 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated, 3242 Src, Src, Mask, VL); 3243 3244 if (VT.isFixedLengthVector()) 3245 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget); 3246 return DAG.getMergeValues({Truncated, Chain}, DL); 3247 } 3248 3249 static SDValue 3250 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 3251 const RISCVSubtarget &Subtarget) { 3252 MVT VT = Op.getSimpleValueType(); 3253 if (VT.isVector()) 3254 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 3255 3256 if (DAG.shouldOptForSize()) 3257 return SDValue(); 3258 3259 SDLoc DL(Op); 3260 SDValue Src = Op.getOperand(0); 3261 3262 // Create an integer the size of the mantissa with the MSB set. This and all 3263 // values larger than it don't have any fractional bits so don't need to be 3264 // converted. 3265 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); 3266 unsigned Precision = APFloat::semanticsPrecision(FltSem); 3267 APFloat MaxVal = APFloat(FltSem); 3268 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 3269 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 3270 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT); 3271 3272 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 3273 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode, 3274 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT())); 3275 } 3276 3277 // Expand vector LRINT and LLRINT by converting to the integer domain. 3278 static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, 3279 const RISCVSubtarget &Subtarget) { 3280 MVT VT = Op.getSimpleValueType(); 3281 assert(VT.isVector() && "Unexpected type"); 3282 3283 SDLoc DL(Op); 3284 SDValue Src = Op.getOperand(0); 3285 MVT ContainerVT = VT; 3286 3287 if (VT.isFixedLengthVector()) { 3288 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3289 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 3290 } 3291 3292 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3293 SDValue Truncated = 3294 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL); 3295 3296 if (!VT.isFixedLengthVector()) 3297 return Truncated; 3298 3299 return convertFromScalableVector(VT, Truncated, DAG, Subtarget); 3300 } 3301 3302 static SDValue 3303 getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, 3304 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, 3305 SDValue Offset, SDValue Mask, SDValue VL, 3306 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { 3307 if (Merge.isUndef()) 3308 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 3309 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT()); 3310 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; 3311 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops); 3312 } 3313 3314 static SDValue 3315 getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, 3316 EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, 3317 SDValue VL, 3318 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { 3319 if (Merge.isUndef()) 3320 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 3321 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT()); 3322 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; 3323 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops); 3324 } 3325 3326 static MVT getLMUL1VT(MVT VT) { 3327 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 3328 "Unexpected vector MVT"); 3329 return MVT::getScalableVectorVT( 3330 VT.getVectorElementType(), 3331 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 3332 } 3333 3334 struct VIDSequence { 3335 int64_t StepNumerator; 3336 unsigned StepDenominator; 3337 int64_t Addend; 3338 }; 3339 3340 static std::optional<uint64_t> getExactInteger(const APFloat &APF, 3341 uint32_t BitWidth) { 3342 // We will use a SINT_TO_FP to materialize this constant so we should use a 3343 // signed APSInt here. 3344 APSInt ValInt(BitWidth, /*IsUnsigned*/ false); 3345 // We use an arbitrary rounding mode here. If a floating-point is an exact 3346 // integer (e.g., 1.0), the rounding mode does not affect the output value. If 3347 // the rounding mode changes the output value, then it is not an exact 3348 // integer. 3349 RoundingMode ArbitraryRM = RoundingMode::TowardZero; 3350 bool IsExact; 3351 // If it is out of signed integer range, it will return an invalid operation. 3352 // If it is not an exact integer, IsExact is false. 3353 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) == 3354 APFloatBase::opInvalidOp) || 3355 !IsExact) 3356 return std::nullopt; 3357 return ValInt.extractBitsAsZExtValue(BitWidth, 0); 3358 } 3359 3360 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] 3361 // to the (non-zero) step S and start value X. This can be then lowered as the 3362 // RVV sequence (VID * S) + X, for example. 3363 // The step S is represented as an integer numerator divided by a positive 3364 // denominator. Note that the implementation currently only identifies 3365 // sequences in which either the numerator is +/- 1 or the denominator is 1. It 3366 // cannot detect 2/3, for example. 3367 // Note that this method will also match potentially unappealing index 3368 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to 3369 // determine whether this is worth generating code for. 3370 static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op, 3371 unsigned EltSizeInBits) { 3372 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); 3373 if (!cast<BuildVectorSDNode>(Op)->isConstant()) 3374 return std::nullopt; 3375 bool IsInteger = Op.getValueType().isInteger(); 3376 3377 std::optional<unsigned> SeqStepDenom; 3378 std::optional<int64_t> SeqStepNum, SeqAddend; 3379 std::optional<std::pair<uint64_t, unsigned>> PrevElt; 3380 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits()); 3381 3382 // First extract the ops into a list of constant integer values. This may not 3383 // be possible for floats if they're not all representable as integers. 3384 SmallVector<std::optional<uint64_t>> Elts(Op.getNumOperands()); 3385 const unsigned OpSize = Op.getScalarValueSizeInBits(); 3386 for (auto [Idx, Elt] : enumerate(Op->op_values())) { 3387 if (Elt.isUndef()) { 3388 Elts[Idx] = std::nullopt; 3389 continue; 3390 } 3391 if (IsInteger) { 3392 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize); 3393 } else { 3394 auto ExactInteger = 3395 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize); 3396 if (!ExactInteger) 3397 return std::nullopt; 3398 Elts[Idx] = *ExactInteger; 3399 } 3400 } 3401 3402 for (auto [Idx, Elt] : enumerate(Elts)) { 3403 // Assume undef elements match the sequence; we just have to be careful 3404 // when interpolating across them. 3405 if (!Elt) 3406 continue; 3407 3408 if (PrevElt) { 3409 // Calculate the step since the last non-undef element, and ensure 3410 // it's consistent across the entire sequence. 3411 unsigned IdxDiff = Idx - PrevElt->second; 3412 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits); 3413 3414 // A zero-value value difference means that we're somewhere in the middle 3415 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a 3416 // step change before evaluating the sequence. 3417 if (ValDiff == 0) 3418 continue; 3419 3420 int64_t Remainder = ValDiff % IdxDiff; 3421 // Normalize the step if it's greater than 1. 3422 if (Remainder != ValDiff) { 3423 // The difference must cleanly divide the element span. 3424 if (Remainder != 0) 3425 return std::nullopt; 3426 ValDiff /= IdxDiff; 3427 IdxDiff = 1; 3428 } 3429 3430 if (!SeqStepNum) 3431 SeqStepNum = ValDiff; 3432 else if (ValDiff != SeqStepNum) 3433 return std::nullopt; 3434 3435 if (!SeqStepDenom) 3436 SeqStepDenom = IdxDiff; 3437 else if (IdxDiff != *SeqStepDenom) 3438 return std::nullopt; 3439 } 3440 3441 // Record this non-undef element for later. 3442 if (!PrevElt || PrevElt->first != *Elt) 3443 PrevElt = std::make_pair(*Elt, Idx); 3444 } 3445 3446 // We need to have logged a step for this to count as a legal index sequence. 3447 if (!SeqStepNum || !SeqStepDenom) 3448 return std::nullopt; 3449 3450 // Loop back through the sequence and validate elements we might have skipped 3451 // while waiting for a valid step. While doing this, log any sequence addend. 3452 for (auto [Idx, Elt] : enumerate(Elts)) { 3453 if (!Elt) 3454 continue; 3455 uint64_t ExpectedVal = 3456 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; 3457 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits); 3458 if (!SeqAddend) 3459 SeqAddend = Addend; 3460 else if (Addend != SeqAddend) 3461 return std::nullopt; 3462 } 3463 3464 assert(SeqAddend && "Must have an addend if we have a step"); 3465 3466 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend}; 3467 } 3468 3469 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT 3470 // and lower it as a VRGATHER_VX_VL from the source vector. 3471 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, 3472 SelectionDAG &DAG, 3473 const RISCVSubtarget &Subtarget) { 3474 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 3475 return SDValue(); 3476 SDValue Vec = SplatVal.getOperand(0); 3477 // Only perform this optimization on vectors of the same size for simplicity. 3478 // Don't perform this optimization for i1 vectors. 3479 // FIXME: Support i1 vectors, maybe by promoting to i8? 3480 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1) 3481 return SDValue(); 3482 SDValue Idx = SplatVal.getOperand(1); 3483 // The index must be a legal type. 3484 if (Idx.getValueType() != Subtarget.getXLenVT()) 3485 return SDValue(); 3486 3487 MVT ContainerVT = VT; 3488 if (VT.isFixedLengthVector()) { 3489 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3490 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3491 } 3492 3493 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3494 3495 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec, 3496 Idx, DAG.getUNDEF(ContainerVT), Mask, VL); 3497 3498 if (!VT.isFixedLengthVector()) 3499 return Gather; 3500 3501 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 3502 } 3503 3504 3505 /// Try and optimize BUILD_VECTORs with "dominant values" - these are values 3506 /// which constitute a large proportion of the elements. In such cases we can 3507 /// splat a vector with the dominant element and make up the shortfall with 3508 /// INSERT_VECTOR_ELTs. Returns SDValue if not profitable. 3509 /// Note that this includes vectors of 2 elements by association. The 3510 /// upper-most element is the "dominant" one, allowing us to use a splat to 3511 /// "insert" the upper element, and an insert of the lower element at position 3512 /// 0, which improves codegen. 3513 static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, 3514 const RISCVSubtarget &Subtarget) { 3515 MVT VT = Op.getSimpleValueType(); 3516 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 3517 3518 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3519 3520 SDLoc DL(Op); 3521 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3522 3523 MVT XLenVT = Subtarget.getXLenVT(); 3524 unsigned NumElts = Op.getNumOperands(); 3525 3526 SDValue DominantValue; 3527 unsigned MostCommonCount = 0; 3528 DenseMap<SDValue, unsigned> ValueCounts; 3529 unsigned NumUndefElts = 3530 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); 3531 3532 // Track the number of scalar loads we know we'd be inserting, estimated as 3533 // any non-zero floating-point constant. Other kinds of element are either 3534 // already in registers or are materialized on demand. The threshold at which 3535 // a vector load is more desirable than several scalar materializion and 3536 // vector-insertion instructions is not known. 3537 unsigned NumScalarLoads = 0; 3538 3539 for (SDValue V : Op->op_values()) { 3540 if (V.isUndef()) 3541 continue; 3542 3543 ValueCounts.insert(std::make_pair(V, 0)); 3544 unsigned &Count = ValueCounts[V]; 3545 if (0 == Count) 3546 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V)) 3547 NumScalarLoads += !CFP->isExactlyValue(+0.0); 3548 3549 // Is this value dominant? In case of a tie, prefer the highest element as 3550 // it's cheaper to insert near the beginning of a vector than it is at the 3551 // end. 3552 if (++Count >= MostCommonCount) { 3553 DominantValue = V; 3554 MostCommonCount = Count; 3555 } 3556 } 3557 3558 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); 3559 unsigned NumDefElts = NumElts - NumUndefElts; 3560 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; 3561 3562 // Don't perform this optimization when optimizing for size, since 3563 // materializing elements and inserting them tends to cause code bloat. 3564 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && 3565 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) && 3566 ((MostCommonCount > DominantValueCountThreshold) || 3567 (ValueCounts.size() <= Log2_32(NumDefElts)))) { 3568 // Start by splatting the most common element. 3569 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); 3570 3571 DenseSet<SDValue> Processed{DominantValue}; 3572 3573 // We can handle an insert into the last element (of a splat) via 3574 // v(f)slide1down. This is slightly better than the vslideup insert 3575 // lowering as it avoids the need for a vector group temporary. It 3576 // is also better than using vmerge.vx as it avoids the need to 3577 // materialize the mask in a vector register. 3578 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1); 3579 !LastOp.isUndef() && ValueCounts[LastOp] == 1 && 3580 LastOp != DominantValue) { 3581 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3582 auto OpCode = 3583 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; 3584 if (!VT.isFloatingPoint()) 3585 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp); 3586 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, 3587 LastOp, Mask, VL); 3588 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget); 3589 Processed.insert(LastOp); 3590 } 3591 3592 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); 3593 for (const auto &OpIdx : enumerate(Op->ops())) { 3594 const SDValue &V = OpIdx.value(); 3595 if (V.isUndef() || !Processed.insert(V).second) 3596 continue; 3597 if (ValueCounts[V] == 1) { 3598 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, 3599 DAG.getVectorIdxConstant(OpIdx.index(), DL)); 3600 } else { 3601 // Blend in all instances of this value using a VSELECT, using a 3602 // mask where each bit signals whether that element is the one 3603 // we're after. 3604 SmallVector<SDValue> Ops; 3605 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { 3606 return DAG.getConstant(V == V1, DL, XLenVT); 3607 }); 3608 Vec = DAG.getNode(ISD::VSELECT, DL, VT, 3609 DAG.getBuildVector(SelMaskTy, DL, Ops), 3610 DAG.getSplatBuildVector(VT, DL, V), Vec); 3611 } 3612 } 3613 3614 return Vec; 3615 } 3616 3617 return SDValue(); 3618 } 3619 3620 static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, 3621 const RISCVSubtarget &Subtarget) { 3622 MVT VT = Op.getSimpleValueType(); 3623 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 3624 3625 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3626 3627 SDLoc DL(Op); 3628 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3629 3630 MVT XLenVT = Subtarget.getXLenVT(); 3631 unsigned NumElts = Op.getNumOperands(); 3632 3633 if (VT.getVectorElementType() == MVT::i1) { 3634 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 3635 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 3636 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 3637 } 3638 3639 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 3640 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 3641 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 3642 } 3643 3644 // Lower constant mask BUILD_VECTORs via an integer vector type, in 3645 // scalar integer chunks whose bit-width depends on the number of mask 3646 // bits and XLEN. 3647 // First, determine the most appropriate scalar integer type to use. This 3648 // is at most XLenVT, but may be shrunk to a smaller vector element type 3649 // according to the size of the final vector - use i8 chunks rather than 3650 // XLenVT if we're producing a v8i1. This results in more consistent 3651 // codegen across RV32 and RV64. 3652 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen()); 3653 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen()); 3654 // If we have to use more than one INSERT_VECTOR_ELT then this 3655 // optimization is likely to increase code size; avoid peforming it in 3656 // such a case. We can use a load from a constant pool in this case. 3657 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) 3658 return SDValue(); 3659 // Now we can create our integer vector type. Note that it may be larger 3660 // than the resulting mask type: v4i1 would use v1i8 as its integer type. 3661 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits); 3662 MVT IntegerViaVecVT = 3663 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), 3664 IntegerViaVecElts); 3665 3666 uint64_t Bits = 0; 3667 unsigned BitPos = 0, IntegerEltIdx = 0; 3668 SmallVector<SDValue, 8> Elts(IntegerViaVecElts); 3669 3670 for (unsigned I = 0; I < NumElts;) { 3671 SDValue V = Op.getOperand(I); 3672 bool BitValue = !V.isUndef() && V->getAsZExtVal(); 3673 Bits |= ((uint64_t)BitValue << BitPos); 3674 ++BitPos; 3675 ++I; 3676 3677 // Once we accumulate enough bits to fill our scalar type or process the 3678 // last element, insert into our vector and clear our accumulated data. 3679 if (I % NumViaIntegerBits == 0 || I == NumElts) { 3680 if (NumViaIntegerBits <= 32) 3681 Bits = SignExtend64<32>(Bits); 3682 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); 3683 Elts[IntegerEltIdx] = Elt; 3684 Bits = 0; 3685 BitPos = 0; 3686 IntegerEltIdx++; 3687 } 3688 } 3689 3690 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts); 3691 3692 if (NumElts < NumViaIntegerBits) { 3693 // If we're producing a smaller vector than our minimum legal integer 3694 // type, bitcast to the equivalent (known-legal) mask type, and extract 3695 // our final mask. 3696 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); 3697 Vec = DAG.getBitcast(MVT::v8i1, Vec); 3698 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, 3699 DAG.getConstant(0, DL, XLenVT)); 3700 } else { 3701 // Else we must have produced an integer type with the same size as the 3702 // mask type; bitcast for the final result. 3703 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); 3704 Vec = DAG.getBitcast(VT, Vec); 3705 } 3706 3707 return Vec; 3708 } 3709 3710 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 3711 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 3712 : RISCVISD::VMV_V_X_VL; 3713 if (!VT.isFloatingPoint()) 3714 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat); 3715 Splat = 3716 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); 3717 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 3718 } 3719 3720 // Try and match index sequences, which we can lower to the vid instruction 3721 // with optional modifications. An all-undef vector is matched by 3722 // getSplatValue, above. 3723 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) { 3724 int64_t StepNumerator = SimpleVID->StepNumerator; 3725 unsigned StepDenominator = SimpleVID->StepDenominator; 3726 int64_t Addend = SimpleVID->Addend; 3727 3728 assert(StepNumerator != 0 && "Invalid step"); 3729 bool Negate = false; 3730 int64_t SplatStepVal = StepNumerator; 3731 unsigned StepOpcode = ISD::MUL; 3732 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it 3733 // anyway as the shift of 63 won't fit in uimm5. 3734 if (StepNumerator != 1 && StepNumerator != INT64_MIN && 3735 isPowerOf2_64(std::abs(StepNumerator))) { 3736 Negate = StepNumerator < 0; 3737 StepOpcode = ISD::SHL; 3738 SplatStepVal = Log2_64(std::abs(StepNumerator)); 3739 } 3740 3741 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a 3742 // threshold since it's the immediate value many RVV instructions accept. 3743 // There is no vmul.vi instruction so ensure multiply constant can fit in 3744 // a single addi instruction. 3745 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) || 3746 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) && 3747 isPowerOf2_32(StepDenominator) && 3748 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) { 3749 MVT VIDVT = 3750 VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; 3751 MVT VIDContainerVT = 3752 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget); 3753 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL); 3754 // Convert right out of the scalable type so we can use standard ISD 3755 // nodes for the rest of the computation. If we used scalable types with 3756 // these, we'd lose the fixed-length vector info and generate worse 3757 // vsetvli code. 3758 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget); 3759 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) || 3760 (StepOpcode == ISD::SHL && SplatStepVal != 0)) { 3761 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT); 3762 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep); 3763 } 3764 if (StepDenominator != 1) { 3765 SDValue SplatStep = 3766 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT); 3767 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep); 3768 } 3769 if (Addend != 0 || Negate) { 3770 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT); 3771 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend, 3772 VID); 3773 } 3774 if (VT.isFloatingPoint()) { 3775 // TODO: Use vfwcvt to reduce register pressure. 3776 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID); 3777 } 3778 return VID; 3779 } 3780 } 3781 3782 // For very small build_vectors, use a single scalar insert of a constant. 3783 // TODO: Base this on constant rematerialization cost, not size. 3784 const unsigned EltBitSize = VT.getScalarSizeInBits(); 3785 if (VT.getSizeInBits() <= 32 && 3786 ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { 3787 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits()); 3788 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) && 3789 "Unexpected sequence type"); 3790 // If we can use the original VL with the modified element type, this 3791 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this 3792 // be moved into InsertVSETVLI? 3793 unsigned ViaVecLen = 3794 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1; 3795 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen); 3796 3797 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); 3798 uint64_t SplatValue = 0; 3799 // Construct the amalgamated value at this larger vector type. 3800 for (const auto &OpIdx : enumerate(Op->op_values())) { 3801 const auto &SeqV = OpIdx.value(); 3802 if (!SeqV.isUndef()) 3803 SplatValue |= 3804 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize)); 3805 } 3806 3807 // On RV64, sign-extend from 32 to 64 bits where possible in order to 3808 // achieve better constant materializion. 3809 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) 3810 SplatValue = SignExtend64<32>(SplatValue); 3811 3812 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT, 3813 DAG.getUNDEF(ViaVecVT), 3814 DAG.getConstant(SplatValue, DL, XLenVT), 3815 DAG.getVectorIdxConstant(0, DL)); 3816 if (ViaVecLen != 1) 3817 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, 3818 MVT::getVectorVT(ViaIntVT, 1), Vec, 3819 DAG.getConstant(0, DL, XLenVT)); 3820 return DAG.getBitcast(VT, Vec); 3821 } 3822 3823 3824 // Attempt to detect "hidden" splats, which only reveal themselves as splats 3825 // when re-interpreted as a vector with a larger element type. For example, 3826 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 3827 // could be instead splat as 3828 // v2i32 = build_vector i32 0x00010000, i32 0x00010000 3829 // TODO: This optimization could also work on non-constant splats, but it 3830 // would require bit-manipulation instructions to construct the splat value. 3831 SmallVector<SDValue> Sequence; 3832 const auto *BV = cast<BuildVectorSDNode>(Op); 3833 if (VT.isInteger() && EltBitSize < Subtarget.getELen() && 3834 ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && 3835 BV->getRepeatedSequence(Sequence) && 3836 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) { 3837 unsigned SeqLen = Sequence.size(); 3838 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); 3839 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || 3840 ViaIntVT == MVT::i64) && 3841 "Unexpected sequence type"); 3842 3843 // If we can use the original VL with the modified element type, this 3844 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this 3845 // be moved into InsertVSETVLI? 3846 const unsigned RequiredVL = NumElts / SeqLen; 3847 const unsigned ViaVecLen = 3848 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ? 3849 NumElts : RequiredVL; 3850 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen); 3851 3852 unsigned EltIdx = 0; 3853 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); 3854 uint64_t SplatValue = 0; 3855 // Construct the amalgamated value which can be splatted as this larger 3856 // vector type. 3857 for (const auto &SeqV : Sequence) { 3858 if (!SeqV.isUndef()) 3859 SplatValue |= 3860 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize)); 3861 EltIdx++; 3862 } 3863 3864 // On RV64, sign-extend from 32 to 64 bits where possible in order to 3865 // achieve better constant materializion. 3866 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) 3867 SplatValue = SignExtend64<32>(SplatValue); 3868 3869 // Since we can't introduce illegal i64 types at this stage, we can only 3870 // perform an i64 splat on RV32 if it is its own sign-extended value. That 3871 // way we can use RVV instructions to splat. 3872 assert((ViaIntVT.bitsLE(XLenVT) || 3873 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && 3874 "Unexpected bitcast sequence"); 3875 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) { 3876 SDValue ViaVL = 3877 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT); 3878 MVT ViaContainerVT = 3879 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget); 3880 SDValue Splat = 3881 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, 3882 DAG.getUNDEF(ViaContainerVT), 3883 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); 3884 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); 3885 if (ViaVecLen != RequiredVL) 3886 Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, 3887 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 3888 DAG.getConstant(0, DL, XLenVT)); 3889 return DAG.getBitcast(VT, Splat); 3890 } 3891 } 3892 3893 // If the number of signbits allows, see if we can lower as a <N x i8>. 3894 // Our main goal here is to reduce LMUL (and thus work) required to 3895 // build the constant, but we will also narrow if the resulting 3896 // narrow vector is known to materialize cheaply. 3897 // TODO: We really should be costing the smaller vector. There are 3898 // profitable cases this misses. 3899 if (EltBitSize > 8 && VT.isInteger() && 3900 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) { 3901 unsigned SignBits = DAG.ComputeNumSignBits(Op); 3902 if (EltBitSize - SignBits < 8) { 3903 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8), 3904 DL, Op->ops()); 3905 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8), 3906 Source, DAG, Subtarget); 3907 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL); 3908 return convertFromScalableVector(VT, Res, DAG, Subtarget); 3909 } 3910 } 3911 3912 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) 3913 return Res; 3914 3915 // For constant vectors, use generic constant pool lowering. Otherwise, 3916 // we'd have to materialize constants in GPRs just to move them into the 3917 // vector. 3918 return SDValue(); 3919 } 3920 3921 static unsigned getPACKOpcode(unsigned DestBW, 3922 const RISCVSubtarget &Subtarget) { 3923 switch (DestBW) { 3924 default: 3925 llvm_unreachable("Unsupported pack size"); 3926 case 16: 3927 return RISCV::PACKH; 3928 case 32: 3929 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK; 3930 case 64: 3931 assert(Subtarget.is64Bit()); 3932 return RISCV::PACK; 3933 } 3934 } 3935 3936 /// Double the element size of the build vector to reduce the number 3937 /// of vslide1down in the build vector chain. In the worst case, this 3938 /// trades three scalar operations for 1 vector operation. Scalar 3939 /// operations are generally lower latency, and for out-of-order cores 3940 /// we also benefit from additional parallelism. 3941 static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, 3942 const RISCVSubtarget &Subtarget) { 3943 SDLoc DL(Op); 3944 MVT VT = Op.getSimpleValueType(); 3945 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 3946 MVT ElemVT = VT.getVectorElementType(); 3947 if (!ElemVT.isInteger()) 3948 return SDValue(); 3949 3950 // TODO: Relax these architectural restrictions, possibly with costing 3951 // of the actual instructions required. 3952 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba()) 3953 return SDValue(); 3954 3955 unsigned NumElts = VT.getVectorNumElements(); 3956 unsigned ElemSizeInBits = ElemVT.getSizeInBits(); 3957 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) || 3958 NumElts % 2 != 0) 3959 return SDValue(); 3960 3961 // Produce [B,A] packed into a type twice as wide. Note that all 3962 // scalars are XLenVT, possibly masked (see below). 3963 MVT XLenVT = Subtarget.getXLenVT(); 3964 SDValue Mask = DAG.getConstant( 3965 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT); 3966 auto pack = [&](SDValue A, SDValue B) { 3967 // Bias the scheduling of the inserted operations to near the 3968 // definition of the element - this tends to reduce register 3969 // pressure overall. 3970 SDLoc ElemDL(B); 3971 if (Subtarget.hasStdExtZbkb()) 3972 // Note that we're relying on the high bits of the result being 3973 // don't care. For PACKW, the result is *sign* extended. 3974 return SDValue( 3975 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget), 3976 ElemDL, XLenVT, A, B), 3977 0); 3978 3979 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask); 3980 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask); 3981 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT); 3982 SDNodeFlags Flags; 3983 Flags.setDisjoint(true); 3984 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A, 3985 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt), Flags); 3986 }; 3987 3988 SmallVector<SDValue> NewOperands; 3989 NewOperands.reserve(NumElts / 2); 3990 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2) 3991 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1))); 3992 assert(NumElts == NewOperands.size() * 2); 3993 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2); 3994 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2); 3995 return DAG.getNode(ISD::BITCAST, DL, VT, 3996 DAG.getBuildVector(WideVecVT, DL, NewOperands)); 3997 } 3998 3999 // Convert to an vXf16 build_vector to vXi16 with bitcasts. 4000 static SDValue lowerBUILD_VECTORvXf16(SDValue Op, SelectionDAG &DAG) { 4001 MVT VT = Op.getSimpleValueType(); 4002 MVT IVT = VT.changeVectorElementType(MVT::i16); 4003 SmallVector<SDValue, 16> NewOps(Op.getNumOperands()); 4004 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) 4005 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I)); 4006 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), IVT, NewOps); 4007 return DAG.getBitcast(VT, Res); 4008 } 4009 4010 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 4011 const RISCVSubtarget &Subtarget) { 4012 MVT VT = Op.getSimpleValueType(); 4013 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 4014 4015 // If we don't have scalar f16, we need to bitcast to an i16 vector. 4016 if (VT.getVectorElementType() == MVT::f16 && 4017 !Subtarget.hasStdExtZfhmin()) 4018 return lowerBUILD_VECTORvXf16(Op, DAG); 4019 4020 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || 4021 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) 4022 return lowerBuildVectorOfConstants(Op, DAG, Subtarget); 4023 4024 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 4025 4026 SDLoc DL(Op); 4027 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 4028 4029 MVT XLenVT = Subtarget.getXLenVT(); 4030 4031 if (VT.getVectorElementType() == MVT::i1) { 4032 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask 4033 // vector type, we have a legal equivalently-sized i8 type, so we can use 4034 // that. 4035 MVT WideVecVT = VT.changeVectorElementType(MVT::i8); 4036 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT); 4037 4038 SDValue WideVec; 4039 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 4040 // For a splat, perform a scalar truncate before creating the wider 4041 // vector. 4042 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat, 4043 DAG.getConstant(1, DL, Splat.getValueType())); 4044 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat); 4045 } else { 4046 SmallVector<SDValue, 8> Ops(Op->op_values()); 4047 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops); 4048 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT); 4049 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne); 4050 } 4051 4052 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE); 4053 } 4054 4055 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 4056 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget)) 4057 return Gather; 4058 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 4059 : RISCVISD::VMV_V_X_VL; 4060 if (!VT.isFloatingPoint()) 4061 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat); 4062 Splat = 4063 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); 4064 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 4065 } 4066 4067 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) 4068 return Res; 4069 4070 // If we're compiling for an exact VLEN value, we can split our work per 4071 // register in the register group. 4072 if (const auto VLen = Subtarget.getRealVLen(); 4073 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) { 4074 MVT ElemVT = VT.getVectorElementType(); 4075 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits(); 4076 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 4077 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg); 4078 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget); 4079 assert(M1VT == getLMUL1VT(M1VT)); 4080 4081 // The following semantically builds up a fixed length concat_vector 4082 // of the component build_vectors. We eagerly lower to scalable and 4083 // insert_subvector here to avoid DAG combining it back to a large 4084 // build_vector. 4085 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end()); 4086 unsigned NumOpElts = M1VT.getVectorMinNumElements(); 4087 SDValue Vec = DAG.getUNDEF(ContainerVT); 4088 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) { 4089 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg); 4090 SDValue SubBV = 4091 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps); 4092 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget); 4093 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts; 4094 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV, 4095 DAG.getVectorIdxConstant(InsertIdx, DL)); 4096 } 4097 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 4098 } 4099 4100 // If we're about to resort to vslide1down (or stack usage), pack our 4101 // elements into the widest scalar type we can. This will force a VL/VTYPE 4102 // toggle, but reduces the critical path, the number of vslide1down ops 4103 // required, and possibly enables scalar folds of the values. 4104 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget)) 4105 return Res; 4106 4107 // For m1 vectors, if we have non-undef values in both halves of our vector, 4108 // split the vector into low and high halves, build them separately, then 4109 // use a vselect to combine them. For long vectors, this cuts the critical 4110 // path of the vslide1down sequence in half, and gives us an opportunity 4111 // to special case each half independently. Note that we don't change the 4112 // length of the sub-vectors here, so if both fallback to the generic 4113 // vslide1down path, we should be able to fold the vselect into the final 4114 // vslidedown (for the undef tail) for the first half w/ masking. 4115 unsigned NumElts = VT.getVectorNumElements(); 4116 unsigned NumUndefElts = 4117 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); 4118 unsigned NumDefElts = NumElts - NumUndefElts; 4119 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 && 4120 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) { 4121 SmallVector<SDValue> SubVecAOps, SubVecBOps; 4122 SmallVector<SDValue> MaskVals; 4123 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0)); 4124 SubVecAOps.reserve(NumElts); 4125 SubVecBOps.reserve(NumElts); 4126 for (unsigned i = 0; i < NumElts; i++) { 4127 SDValue Elem = Op->getOperand(i); 4128 if (i < NumElts / 2) { 4129 SubVecAOps.push_back(Elem); 4130 SubVecBOps.push_back(UndefElem); 4131 } else { 4132 SubVecAOps.push_back(UndefElem); 4133 SubVecBOps.push_back(Elem); 4134 } 4135 bool SelectMaskVal = (i < NumElts / 2); 4136 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); 4137 } 4138 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts && 4139 MaskVals.size() == NumElts); 4140 4141 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps); 4142 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps); 4143 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); 4144 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); 4145 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB); 4146 } 4147 4148 // Cap the cost at a value linear to the number of elements in the vector. 4149 // The default lowering is to use the stack. The vector store + scalar loads 4150 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up 4151 // being (at least) linear in LMUL. As a result, using the vslidedown 4152 // lowering for every element ends up being VL*LMUL.. 4153 // TODO: Should we be directly costing the stack alternative? Doing so might 4154 // give us a more accurate upper bound. 4155 InstructionCost LinearBudget = VT.getVectorNumElements() * 2; 4156 4157 // TODO: unify with TTI getSlideCost. 4158 InstructionCost PerSlideCost = 1; 4159 switch (RISCVTargetLowering::getLMUL(ContainerVT)) { 4160 default: break; 4161 case RISCVII::VLMUL::LMUL_2: 4162 PerSlideCost = 2; 4163 break; 4164 case RISCVII::VLMUL::LMUL_4: 4165 PerSlideCost = 4; 4166 break; 4167 case RISCVII::VLMUL::LMUL_8: 4168 PerSlideCost = 8; 4169 break; 4170 } 4171 4172 // TODO: Should we be using the build instseq then cost + evaluate scheme 4173 // we use for integer constants here? 4174 unsigned UndefCount = 0; 4175 for (const SDValue &V : Op->ops()) { 4176 if (V.isUndef()) { 4177 UndefCount++; 4178 continue; 4179 } 4180 if (UndefCount) { 4181 LinearBudget -= PerSlideCost; 4182 UndefCount = 0; 4183 } 4184 LinearBudget -= PerSlideCost; 4185 } 4186 if (UndefCount) { 4187 LinearBudget -= PerSlideCost; 4188 } 4189 4190 if (LinearBudget < 0) 4191 return SDValue(); 4192 4193 assert((!VT.isFloatingPoint() || 4194 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) && 4195 "Illegal type which will result in reserved encoding"); 4196 4197 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 4198 4199 SDValue Vec; 4200 UndefCount = 0; 4201 for (SDValue V : Op->ops()) { 4202 if (V.isUndef()) { 4203 UndefCount++; 4204 continue; 4205 } 4206 4207 // Start our sequence with a TA splat in the hopes that hardware is able to 4208 // recognize there's no dependency on the prior value of our temporary 4209 // register. 4210 if (!Vec) { 4211 Vec = DAG.getSplatVector(VT, DL, V); 4212 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 4213 UndefCount = 0; 4214 continue; 4215 } 4216 4217 if (UndefCount) { 4218 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); 4219 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 4220 Vec, Offset, Mask, VL, Policy); 4221 UndefCount = 0; 4222 } 4223 auto OpCode = 4224 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; 4225 if (!VT.isFloatingPoint()) 4226 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V); 4227 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, 4228 V, Mask, VL); 4229 } 4230 if (UndefCount) { 4231 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); 4232 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 4233 Vec, Offset, Mask, VL, Policy); 4234 } 4235 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 4236 } 4237 4238 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, 4239 SDValue Lo, SDValue Hi, SDValue VL, 4240 SelectionDAG &DAG) { 4241 if (!Passthru) 4242 Passthru = DAG.getUNDEF(VT); 4243 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 4244 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 4245 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 4246 // If Hi constant is all the same sign bit as Lo, lower this as a custom 4247 // node in order to try and match RVV vector/scalar instructions. 4248 if ((LoC >> 31) == HiC) 4249 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); 4250 4251 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo, 4252 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use 4253 // vlmax vsetvli or vsetivli to change the VL. 4254 // FIXME: Support larger constants? 4255 // FIXME: Support non-constant VLs by saturating? 4256 if (LoC == HiC) { 4257 SDValue NewVL; 4258 if (isAllOnesConstant(VL) || 4259 (isa<RegisterSDNode>(VL) && 4260 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0)) 4261 NewVL = DAG.getRegister(RISCV::X0, MVT::i32); 4262 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal())) 4263 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL); 4264 4265 if (NewVL) { 4266 MVT InterVT = 4267 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); 4268 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, 4269 DAG.getUNDEF(InterVT), Lo, NewVL); 4270 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec); 4271 } 4272 } 4273 } 4274 4275 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended. 4276 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo && 4277 isa<ConstantSDNode>(Hi.getOperand(1)) && 4278 Hi.getConstantOperandVal(1) == 31) 4279 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); 4280 4281 // If the hi bits of the splat are undefined, then it's fine to just splat Lo 4282 // even if it might be sign extended. 4283 if (Hi.isUndef()) 4284 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); 4285 4286 // Fall back to a stack store and stride x0 vector load. 4287 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo, 4288 Hi, VL); 4289 } 4290 4291 // Called by type legalization to handle splat of i64 on RV32. 4292 // FIXME: We can optimize this when the type has sign or zero bits in one 4293 // of the halves. 4294 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, 4295 SDValue Scalar, SDValue VL, 4296 SelectionDAG &DAG) { 4297 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!"); 4298 SDValue Lo, Hi; 4299 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32); 4300 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG); 4301 } 4302 4303 // This function lowers a splat of a scalar operand Splat with the vector 4304 // length VL. It ensures the final sequence is type legal, which is useful when 4305 // lowering a splat after type legalization. 4306 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, 4307 MVT VT, const SDLoc &DL, SelectionDAG &DAG, 4308 const RISCVSubtarget &Subtarget) { 4309 bool HasPassthru = Passthru && !Passthru.isUndef(); 4310 if (!HasPassthru && !Passthru) 4311 Passthru = DAG.getUNDEF(VT); 4312 if (VT.isFloatingPoint()) 4313 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); 4314 4315 MVT XLenVT = Subtarget.getXLenVT(); 4316 4317 // Simplest case is that the operand needs to be promoted to XLenVT. 4318 if (Scalar.getValueType().bitsLE(XLenVT)) { 4319 // If the operand is a constant, sign extend to increase our chances 4320 // of being able to use a .vi instruction. ANY_EXTEND would become a 4321 // a zero extend and the simm5 check in isel would fail. 4322 // FIXME: Should we ignore the upper bits in isel instead? 4323 unsigned ExtOpc = 4324 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 4325 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 4326 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); 4327 } 4328 4329 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 && 4330 "Unexpected scalar for splat lowering!"); 4331 4332 if (isOneConstant(VL) && isNullConstant(Scalar)) 4333 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, 4334 DAG.getConstant(0, DL, XLenVT), VL); 4335 4336 // Otherwise use the more complicated splatting algorithm. 4337 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); 4338 } 4339 4340 // This function lowers an insert of a scalar operand Scalar into lane 4341 // 0 of the vector regardless of the value of VL. The contents of the 4342 // remaining lanes of the result vector are unspecified. VL is assumed 4343 // to be non-zero. 4344 static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, 4345 const SDLoc &DL, SelectionDAG &DAG, 4346 const RISCVSubtarget &Subtarget) { 4347 assert(VT.isScalableVector() && "Expect VT is scalable vector type."); 4348 4349 const MVT XLenVT = Subtarget.getXLenVT(); 4350 SDValue Passthru = DAG.getUNDEF(VT); 4351 4352 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 4353 isNullConstant(Scalar.getOperand(1))) { 4354 SDValue ExtractedVal = Scalar.getOperand(0); 4355 // The element types must be the same. 4356 if (ExtractedVal.getValueType().getVectorElementType() == 4357 VT.getVectorElementType()) { 4358 MVT ExtractedVT = ExtractedVal.getSimpleValueType(); 4359 MVT ExtractedContainerVT = ExtractedVT; 4360 if (ExtractedContainerVT.isFixedLengthVector()) { 4361 ExtractedContainerVT = getContainerForFixedLengthVector( 4362 DAG, ExtractedContainerVT, Subtarget); 4363 ExtractedVal = convertToScalableVector(ExtractedContainerVT, 4364 ExtractedVal, DAG, Subtarget); 4365 } 4366 if (ExtractedContainerVT.bitsLE(VT)) 4367 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, 4368 ExtractedVal, DAG.getVectorIdxConstant(0, DL)); 4369 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal, 4370 DAG.getVectorIdxConstant(0, DL)); 4371 } 4372 } 4373 4374 4375 if (VT.isFloatingPoint()) 4376 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, 4377 DAG.getUNDEF(VT), Scalar, VL); 4378 4379 // Avoid the tricky legalization cases by falling back to using the 4380 // splat code which already handles it gracefully. 4381 if (!Scalar.getValueType().bitsLE(XLenVT)) 4382 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar, 4383 DAG.getConstant(1, DL, XLenVT), 4384 VT, DL, DAG, Subtarget); 4385 4386 // If the operand is a constant, sign extend to increase our chances 4387 // of being able to use a .vi instruction. ANY_EXTEND would become a 4388 // a zero extend and the simm5 check in isel would fail. 4389 // FIXME: Should we ignore the upper bits in isel instead? 4390 unsigned ExtOpc = 4391 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 4392 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 4393 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, 4394 DAG.getUNDEF(VT), Scalar, VL); 4395 } 4396 4397 // Is this a shuffle extracts either the even or odd elements of a vector? 4398 // That is, specifically, either (a) or (b) below. 4399 // t34: v8i8 = extract_subvector t11, Constant:i64<0> 4400 // t33: v8i8 = extract_subvector t11, Constant:i64<8> 4401 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 4402 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 4403 // Returns {Src Vector, Even Elements} om success 4404 static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, 4405 SDValue V2, ArrayRef<int> Mask, 4406 const RISCVSubtarget &Subtarget) { 4407 // Need to be able to widen the vector. 4408 if (VT.getScalarSizeInBits() >= Subtarget.getELen()) 4409 return false; 4410 4411 // Both input must be extracts. 4412 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR || 4413 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR) 4414 return false; 4415 4416 // Extracting from the same source. 4417 SDValue Src = V1.getOperand(0); 4418 if (Src != V2.getOperand(0)) 4419 return false; 4420 4421 // Src needs to have twice the number of elements. 4422 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) 4423 return false; 4424 4425 // The extracts must extract the two halves of the source. 4426 if (V1.getConstantOperandVal(1) != 0 || 4427 V2.getConstantOperandVal(1) != Mask.size()) 4428 return false; 4429 4430 // First index must be the first even or odd element from V1. 4431 if (Mask[0] != 0 && Mask[0] != 1) 4432 return false; 4433 4434 // The others must increase by 2 each time. 4435 // TODO: Support undef elements? 4436 for (unsigned i = 1; i != Mask.size(); ++i) 4437 if (Mask[i] != Mask[i - 1] + 2) 4438 return false; 4439 4440 return true; 4441 } 4442 4443 /// Is this shuffle interleaving contiguous elements from one vector into the 4444 /// even elements and contiguous elements from another vector into the odd 4445 /// elements. \p EvenSrc will contain the element that should be in the first 4446 /// even element. \p OddSrc will contain the element that should be in the first 4447 /// odd element. These can be the first element in a source or the element half 4448 /// way through the source. 4449 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc, 4450 int &OddSrc, const RISCVSubtarget &Subtarget) { 4451 // We need to be able to widen elements to the next larger integer type. 4452 if (VT.getScalarSizeInBits() >= Subtarget.getELen()) 4453 return false; 4454 4455 int Size = Mask.size(); 4456 int NumElts = VT.getVectorNumElements(); 4457 assert(Size == (int)NumElts && "Unexpected mask size"); 4458 4459 SmallVector<unsigned, 2> StartIndexes; 4460 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes)) 4461 return false; 4462 4463 EvenSrc = StartIndexes[0]; 4464 OddSrc = StartIndexes[1]; 4465 4466 // One source should be low half of first vector. 4467 if (EvenSrc != 0 && OddSrc != 0) 4468 return false; 4469 4470 // Subvectors will be subtracted from either at the start of the two input 4471 // vectors, or at the start and middle of the first vector if it's an unary 4472 // interleave. 4473 // In both cases, HalfNumElts will be extracted. 4474 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise 4475 // we'll create an illegal extract_subvector. 4476 // FIXME: We could support other values using a slidedown first. 4477 int HalfNumElts = NumElts / 2; 4478 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0); 4479 } 4480 4481 /// Match shuffles that concatenate two vectors, rotate the concatenation, 4482 /// and then extract the original number of elements from the rotated result. 4483 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The 4484 /// returned rotation amount is for a rotate right, where elements move from 4485 /// higher elements to lower elements. \p LoSrc indicates the first source 4486 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector 4487 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be 4488 /// 0 or 1 if a rotation is found. 4489 /// 4490 /// NOTE: We talk about rotate to the right which matches how bit shift and 4491 /// rotate instructions are described where LSBs are on the right, but LLVM IR 4492 /// and the table below write vectors with the lowest elements on the left. 4493 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) { 4494 int Size = Mask.size(); 4495 4496 // We need to detect various ways of spelling a rotation: 4497 // [11, 12, 13, 14, 15, 0, 1, 2] 4498 // [-1, 12, 13, 14, -1, -1, 1, -1] 4499 // [-1, -1, -1, -1, -1, -1, 1, 2] 4500 // [ 3, 4, 5, 6, 7, 8, 9, 10] 4501 // [-1, 4, 5, 6, -1, -1, 9, -1] 4502 // [-1, 4, 5, 6, -1, -1, -1, -1] 4503 int Rotation = 0; 4504 LoSrc = -1; 4505 HiSrc = -1; 4506 for (int i = 0; i != Size; ++i) { 4507 int M = Mask[i]; 4508 if (M < 0) 4509 continue; 4510 4511 // Determine where a rotate vector would have started. 4512 int StartIdx = i - (M % Size); 4513 // The identity rotation isn't interesting, stop. 4514 if (StartIdx == 0) 4515 return -1; 4516 4517 // If we found the tail of a vector the rotation must be the missing 4518 // front. If we found the head of a vector, it must be how much of the 4519 // head. 4520 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx; 4521 4522 if (Rotation == 0) 4523 Rotation = CandidateRotation; 4524 else if (Rotation != CandidateRotation) 4525 // The rotations don't match, so we can't match this mask. 4526 return -1; 4527 4528 // Compute which value this mask is pointing at. 4529 int MaskSrc = M < Size ? 0 : 1; 4530 4531 // Compute which of the two target values this index should be assigned to. 4532 // This reflects whether the high elements are remaining or the low elemnts 4533 // are remaining. 4534 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc; 4535 4536 // Either set up this value if we've not encountered it before, or check 4537 // that it remains consistent. 4538 if (TargetSrc < 0) 4539 TargetSrc = MaskSrc; 4540 else if (TargetSrc != MaskSrc) 4541 // This may be a rotation, but it pulls from the inputs in some 4542 // unsupported interleaving. 4543 return -1; 4544 } 4545 4546 // Check that we successfully analyzed the mask, and normalize the results. 4547 assert(Rotation != 0 && "Failed to locate a viable rotation!"); 4548 assert((LoSrc >= 0 || HiSrc >= 0) && 4549 "Failed to find a rotated input vector!"); 4550 4551 return Rotation; 4552 } 4553 4554 // Lower a deinterleave shuffle to vnsrl. 4555 // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true) 4556 // -> [p, q, r, s] (EvenElts == false) 4557 // VT is the type of the vector to return, <[vscale x ]n x ty> 4558 // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty> 4559 static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, 4560 bool EvenElts, 4561 const RISCVSubtarget &Subtarget, 4562 SelectionDAG &DAG) { 4563 // The result is a vector of type <m x n x ty> 4564 MVT ContainerVT = VT; 4565 // Convert fixed vectors to scalable if needed 4566 if (ContainerVT.isFixedLengthVector()) { 4567 assert(Src.getSimpleValueType().isFixedLengthVector()); 4568 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); 4569 4570 // The source is a vector of type <m x n*2 x ty> 4571 MVT SrcContainerVT = 4572 MVT::getVectorVT(ContainerVT.getVectorElementType(), 4573 ContainerVT.getVectorElementCount() * 2); 4574 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 4575 } 4576 4577 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 4578 4579 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2> 4580 // This also converts FP to int. 4581 unsigned EltBits = ContainerVT.getScalarSizeInBits(); 4582 MVT WideSrcContainerVT = MVT::getVectorVT( 4583 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount()); 4584 Src = DAG.getBitcast(WideSrcContainerVT, Src); 4585 4586 // The integer version of the container type. 4587 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger(); 4588 4589 // If we want even elements, then the shift amount is 0. Otherwise, shift by 4590 // the original element size. 4591 unsigned Shift = EvenElts ? 0 : EltBits; 4592 SDValue SplatShift = DAG.getNode( 4593 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT), 4594 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL); 4595 SDValue Res = 4596 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift, 4597 DAG.getUNDEF(IntContainerVT), TrueMask, VL); 4598 // Cast back to FP if needed. 4599 Res = DAG.getBitcast(ContainerVT, Res); 4600 4601 if (VT.isFixedLengthVector()) 4602 Res = convertFromScalableVector(VT, Res, DAG, Subtarget); 4603 return Res; 4604 } 4605 4606 // Lower the following shuffle to vslidedown. 4607 // a) 4608 // t49: v8i8 = extract_subvector t13, Constant:i64<0> 4609 // t109: v8i8 = extract_subvector t13, Constant:i64<8> 4610 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106 4611 // b) 4612 // t69: v16i16 = extract_subvector t68, Constant:i64<0> 4613 // t23: v8i16 = extract_subvector t69, Constant:i64<0> 4614 // t29: v4i16 = extract_subvector t23, Constant:i64<4> 4615 // t26: v8i16 = extract_subvector t69, Constant:i64<8> 4616 // t30: v4i16 = extract_subvector t26, Constant:i64<0> 4617 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30 4618 static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, 4619 SDValue V1, SDValue V2, 4620 ArrayRef<int> Mask, 4621 const RISCVSubtarget &Subtarget, 4622 SelectionDAG &DAG) { 4623 auto findNonEXTRACT_SUBVECTORParent = 4624 [](SDValue Parent) -> std::pair<SDValue, uint64_t> { 4625 uint64_t Offset = 0; 4626 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR && 4627 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from 4628 // a scalable vector. But we don't want to match the case. 4629 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) { 4630 Offset += Parent.getConstantOperandVal(1); 4631 Parent = Parent.getOperand(0); 4632 } 4633 return std::make_pair(Parent, Offset); 4634 }; 4635 4636 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1); 4637 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2); 4638 4639 // Extracting from the same source. 4640 SDValue Src = V1Src; 4641 if (Src != V2Src) 4642 return SDValue(); 4643 4644 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs. 4645 SmallVector<int, 16> NewMask(Mask); 4646 for (size_t i = 0; i != NewMask.size(); ++i) { 4647 if (NewMask[i] == -1) 4648 continue; 4649 4650 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) { 4651 NewMask[i] = NewMask[i] + V1IndexOffset; 4652 } else { 4653 // Minus NewMask.size() is needed. Otherwise, the b case would be 4654 // <5,6,7,12> instead of <5,6,7,8>. 4655 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset; 4656 } 4657 } 4658 4659 // First index must be known and non-zero. It will be used as the slidedown 4660 // amount. 4661 if (NewMask[0] <= 0) 4662 return SDValue(); 4663 4664 // NewMask is also continuous. 4665 for (unsigned i = 1; i != NewMask.size(); ++i) 4666 if (NewMask[i - 1] + 1 != NewMask[i]) 4667 return SDValue(); 4668 4669 MVT XLenVT = Subtarget.getXLenVT(); 4670 MVT SrcVT = Src.getSimpleValueType(); 4671 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 4672 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 4673 SDValue Slidedown = 4674 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 4675 convertToScalableVector(ContainerVT, Src, DAG, Subtarget), 4676 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL); 4677 return DAG.getNode( 4678 ISD::EXTRACT_SUBVECTOR, DL, VT, 4679 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 4680 DAG.getConstant(0, DL, XLenVT)); 4681 } 4682 4683 // Because vslideup leaves the destination elements at the start intact, we can 4684 // use it to perform shuffles that insert subvectors: 4685 // 4686 // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11> 4687 // -> 4688 // vsetvli zero, 8, e8, mf2, ta, ma 4689 // vslideup.vi v8, v9, 4 4690 // 4691 // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7> 4692 // -> 4693 // vsetvli zero, 5, e8, mf2, tu, ma 4694 // vslideup.v1 v8, v9, 2 4695 static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, 4696 SDValue V1, SDValue V2, 4697 ArrayRef<int> Mask, 4698 const RISCVSubtarget &Subtarget, 4699 SelectionDAG &DAG) { 4700 unsigned NumElts = VT.getVectorNumElements(); 4701 int NumSubElts, Index; 4702 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts, 4703 Index)) 4704 return SDValue(); 4705 4706 bool OpsSwapped = Mask[Index] < (int)NumElts; 4707 SDValue InPlace = OpsSwapped ? V2 : V1; 4708 SDValue ToInsert = OpsSwapped ? V1 : V2; 4709 4710 MVT XLenVT = Subtarget.getXLenVT(); 4711 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 4712 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first; 4713 // We slide up by the index that the subvector is being inserted at, and set 4714 // VL to the index + the number of elements being inserted. 4715 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC; 4716 // If the we're adding a suffix to the in place vector, i.e. inserting right 4717 // up to the very end of it, then we don't actually care about the tail. 4718 if (NumSubElts + Index >= (int)NumElts) 4719 Policy |= RISCVII::TAIL_AGNOSTIC; 4720 4721 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget); 4722 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget); 4723 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT); 4724 4725 SDValue Res; 4726 // If we're inserting into the lowest elements, use a tail undisturbed 4727 // vmv.v.v. 4728 if (Index == 0) 4729 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert, 4730 VL); 4731 else 4732 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert, 4733 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy); 4734 return convertFromScalableVector(VT, Res, DAG, Subtarget); 4735 } 4736 4737 /// Match v(f)slide1up/down idioms. These operations involve sliding 4738 /// N-1 elements to make room for an inserted scalar at one end. 4739 static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, 4740 SDValue V1, SDValue V2, 4741 ArrayRef<int> Mask, 4742 const RISCVSubtarget &Subtarget, 4743 SelectionDAG &DAG) { 4744 bool OpsSwapped = false; 4745 if (!isa<BuildVectorSDNode>(V1)) { 4746 if (!isa<BuildVectorSDNode>(V2)) 4747 return SDValue(); 4748 std::swap(V1, V2); 4749 OpsSwapped = true; 4750 } 4751 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue(); 4752 if (!Splat) 4753 return SDValue(); 4754 4755 // Return true if the mask could describe a slide of Mask.size() - 1 4756 // elements from concat_vector(V1, V2)[Base:] to [Offset:]. 4757 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) { 4758 const unsigned S = (Offset > 0) ? 0 : -Offset; 4759 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0); 4760 for (unsigned i = S; i != E; ++i) 4761 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset) 4762 return false; 4763 return true; 4764 }; 4765 4766 const unsigned NumElts = VT.getVectorNumElements(); 4767 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1); 4768 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1)) 4769 return SDValue(); 4770 4771 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0]; 4772 // Inserted lane must come from splat, undef scalar is legal but not profitable. 4773 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped) 4774 return SDValue(); 4775 4776 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 4777 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 4778 auto OpCode = IsVSlidedown ? 4779 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) : 4780 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL); 4781 if (!VT.isFloatingPoint()) 4782 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat); 4783 auto Vec = DAG.getNode(OpCode, DL, ContainerVT, 4784 DAG.getUNDEF(ContainerVT), 4785 convertToScalableVector(ContainerVT, V2, DAG, Subtarget), 4786 Splat, TrueMask, VL); 4787 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 4788 } 4789 4790 // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx 4791 // to create an interleaved vector of <[vscale x] n*2 x ty>. 4792 // This requires that the size of ty is less than the subtarget's maximum ELEN. 4793 static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, 4794 const SDLoc &DL, SelectionDAG &DAG, 4795 const RISCVSubtarget &Subtarget) { 4796 MVT VecVT = EvenV.getSimpleValueType(); 4797 MVT VecContainerVT = VecVT; // <vscale x n x ty> 4798 // Convert fixed vectors to scalable if needed 4799 if (VecContainerVT.isFixedLengthVector()) { 4800 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); 4801 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget); 4802 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget); 4803 } 4804 4805 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen()); 4806 4807 // We're working with a vector of the same size as the resulting 4808 // interleaved vector, but with half the number of elements and 4809 // twice the SEW (Hence the restriction on not using the maximum 4810 // ELEN) 4811 MVT WideVT = 4812 MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2), 4813 VecVT.getVectorElementCount()); 4814 MVT WideContainerVT = WideVT; // <vscale x n x ty*2> 4815 if (WideContainerVT.isFixedLengthVector()) 4816 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget); 4817 4818 // Bitcast the input vectors to integers in case they are FP 4819 VecContainerVT = VecContainerVT.changeTypeToInteger(); 4820 EvenV = DAG.getBitcast(VecContainerVT, EvenV); 4821 OddV = DAG.getBitcast(VecContainerVT, OddV); 4822 4823 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget); 4824 SDValue Passthru = DAG.getUNDEF(WideContainerVT); 4825 4826 SDValue Interleaved; 4827 if (OddV.isUndef()) { 4828 // If OddV is undef, this is a zero extend. 4829 // FIXME: Not only does this optimize the code, it fixes some correctness 4830 // issues because MIR does not have freeze. 4831 Interleaved = 4832 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL); 4833 } else if (Subtarget.hasStdExtZvbb()) { 4834 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV. 4835 SDValue OffsetVec = 4836 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT); 4837 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV, 4838 OffsetVec, Passthru, Mask, VL); 4839 if (!EvenV.isUndef()) 4840 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT, 4841 Interleaved, EvenV, Passthru, Mask, VL); 4842 } else if (EvenV.isUndef()) { 4843 Interleaved = 4844 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL); 4845 4846 SDValue OffsetVec = 4847 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT); 4848 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT, 4849 Interleaved, OffsetVec, Passthru, Mask, VL); 4850 } else { 4851 // FIXME: We should freeze the odd vector here. We already handled the case 4852 // of provably undef/poison above. 4853 4854 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with 4855 // vwaddu.vv 4856 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV, 4857 OddV, Passthru, Mask, VL); 4858 4859 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1) 4860 SDValue AllOnesVec = DAG.getSplatVector( 4861 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT())); 4862 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, 4863 OddV, AllOnesVec, Passthru, Mask, VL); 4864 4865 // Add the two together so we get 4866 // (OddV * 0xff...ff) + (OddV + EvenV) 4867 // = (OddV * 0x100...00) + EvenV 4868 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV 4869 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx 4870 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, 4871 Interleaved, OddsMul, Passthru, Mask, VL); 4872 } 4873 4874 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty> 4875 MVT ResultContainerVT = MVT::getVectorVT( 4876 VecVT.getVectorElementType(), // Make sure to use original type 4877 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2)); 4878 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved); 4879 4880 // Convert back to a fixed vector if needed 4881 MVT ResultVT = 4882 MVT::getVectorVT(VecVT.getVectorElementType(), 4883 VecVT.getVectorElementCount().multiplyCoefficientBy(2)); 4884 if (ResultVT.isFixedLengthVector()) 4885 Interleaved = 4886 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget); 4887 4888 return Interleaved; 4889 } 4890 4891 // If we have a vector of bits that we want to reverse, we can use a vbrev on a 4892 // larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse. 4893 static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, 4894 SelectionDAG &DAG, 4895 const RISCVSubtarget &Subtarget) { 4896 SDLoc DL(SVN); 4897 MVT VT = SVN->getSimpleValueType(0); 4898 SDValue V = SVN->getOperand(0); 4899 unsigned NumElts = VT.getVectorNumElements(); 4900 4901 assert(VT.getVectorElementType() == MVT::i1); 4902 4903 if (!ShuffleVectorInst::isReverseMask(SVN->getMask(), 4904 SVN->getMask().size()) || 4905 !SVN->getOperand(1).isUndef()) 4906 return SDValue(); 4907 4908 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts)); 4909 EVT ViaVT = EVT::getVectorVT( 4910 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1); 4911 EVT ViaBitVT = 4912 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits()); 4913 4914 // If we don't have zvbb or the larger element type > ELEN, the operation will 4915 // be illegal. 4916 if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE, 4917 ViaVT) || 4918 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT)) 4919 return SDValue(); 4920 4921 // If the bit vector doesn't fit exactly into the larger element type, we need 4922 // to insert it into the larger vector and then shift up the reversed bits 4923 // afterwards to get rid of the gap introduced. 4924 if (ViaEltSize > NumElts) 4925 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT), 4926 V, DAG.getVectorIdxConstant(0, DL)); 4927 4928 SDValue Res = 4929 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V)); 4930 4931 // Shift up the reversed bits if the vector didn't exactly fit into the larger 4932 // element type. 4933 if (ViaEltSize > NumElts) 4934 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res, 4935 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT)); 4936 4937 Res = DAG.getBitcast(ViaBitVT, Res); 4938 4939 if (ViaEltSize > NumElts) 4940 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, 4941 DAG.getVectorIdxConstant(0, DL)); 4942 return Res; 4943 } 4944 4945 static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, 4946 SelectionDAG &DAG, 4947 const RISCVSubtarget &Subtarget, 4948 MVT &RotateVT, unsigned &RotateAmt) { 4949 SDLoc DL(SVN); 4950 4951 EVT VT = SVN->getValueType(0); 4952 unsigned NumElts = VT.getVectorNumElements(); 4953 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 4954 unsigned NumSubElts; 4955 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2, 4956 NumElts, NumSubElts, RotateAmt)) 4957 return false; 4958 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts), 4959 NumElts / NumSubElts); 4960 4961 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x. 4962 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT); 4963 } 4964 4965 // Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can 4966 // reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this 4967 // as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor. 4968 static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, 4969 SelectionDAG &DAG, 4970 const RISCVSubtarget &Subtarget) { 4971 SDLoc DL(SVN); 4972 4973 EVT VT = SVN->getValueType(0); 4974 unsigned RotateAmt; 4975 MVT RotateVT; 4976 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt)) 4977 return SDValue(); 4978 4979 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0)); 4980 4981 SDValue Rotate; 4982 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap, 4983 // so canonicalize to vrev8. 4984 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8) 4985 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op); 4986 else 4987 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op, 4988 DAG.getConstant(RotateAmt, DL, RotateVT)); 4989 4990 return DAG.getBitcast(VT, Rotate); 4991 } 4992 4993 // If compiling with an exactly known VLEN, see if we can split a 4994 // shuffle on m2 or larger into a small number of m1 sized shuffles 4995 // which write each destination registers exactly once. 4996 static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, 4997 SelectionDAG &DAG, 4998 const RISCVSubtarget &Subtarget) { 4999 SDLoc DL(SVN); 5000 MVT VT = SVN->getSimpleValueType(0); 5001 SDValue V1 = SVN->getOperand(0); 5002 SDValue V2 = SVN->getOperand(1); 5003 ArrayRef<int> Mask = SVN->getMask(); 5004 unsigned NumElts = VT.getVectorNumElements(); 5005 5006 // If we don't know exact data layout, not much we can do. If this 5007 // is already m1 or smaller, no point in splitting further. 5008 const auto VLen = Subtarget.getRealVLen(); 5009 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen) 5010 return SDValue(); 5011 5012 // Avoid picking up bitrotate patterns which we have a linear-in-lmul 5013 // expansion for. 5014 unsigned RotateAmt; 5015 MVT RotateVT; 5016 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt)) 5017 return SDValue(); 5018 5019 MVT ElemVT = VT.getVectorElementType(); 5020 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits(); 5021 unsigned VRegsPerSrc = NumElts / ElemsPerVReg; 5022 5023 SmallVector<std::pair<int, SmallVector<int>>> 5024 OutMasks(VRegsPerSrc, {-1, {}}); 5025 5026 // Check if our mask can be done as a 1-to-1 mapping from source 5027 // to destination registers in the group without needing to 5028 // write each destination more than once. 5029 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) { 5030 int DstVecIdx = DstIdx / ElemsPerVReg; 5031 int DstSubIdx = DstIdx % ElemsPerVReg; 5032 int SrcIdx = Mask[DstIdx]; 5033 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts) 5034 continue; 5035 int SrcVecIdx = SrcIdx / ElemsPerVReg; 5036 int SrcSubIdx = SrcIdx % ElemsPerVReg; 5037 if (OutMasks[DstVecIdx].first == -1) 5038 OutMasks[DstVecIdx].first = SrcVecIdx; 5039 if (OutMasks[DstVecIdx].first != SrcVecIdx) 5040 // Note: This case could easily be handled by keeping track of a chain 5041 // of source values and generating two element shuffles below. This is 5042 // less an implementation question, and more a profitability one. 5043 return SDValue(); 5044 5045 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1); 5046 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx; 5047 } 5048 5049 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 5050 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg); 5051 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget); 5052 assert(M1VT == getLMUL1VT(M1VT)); 5053 unsigned NumOpElts = M1VT.getVectorMinNumElements(); 5054 SDValue Vec = DAG.getUNDEF(ContainerVT); 5055 // The following semantically builds up a fixed length concat_vector 5056 // of the component shuffle_vectors. We eagerly lower to scalable here 5057 // to avoid DAG combining it back to a large shuffle_vector again. 5058 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 5059 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); 5060 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) { 5061 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx]; 5062 if (SrcVecIdx == -1) 5063 continue; 5064 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts; 5065 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1; 5066 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec, 5067 DAG.getVectorIdxConstant(ExtractIdx, DL)); 5068 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget); 5069 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask); 5070 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget); 5071 unsigned InsertIdx = DstVecIdx * NumOpElts; 5072 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec, 5073 DAG.getVectorIdxConstant(InsertIdx, DL)); 5074 } 5075 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 5076 } 5077 5078 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 5079 const RISCVSubtarget &Subtarget) { 5080 SDValue V1 = Op.getOperand(0); 5081 SDValue V2 = Op.getOperand(1); 5082 SDLoc DL(Op); 5083 MVT XLenVT = Subtarget.getXLenVT(); 5084 MVT VT = Op.getSimpleValueType(); 5085 unsigned NumElts = VT.getVectorNumElements(); 5086 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 5087 5088 if (VT.getVectorElementType() == MVT::i1) { 5089 // Lower to a vror.vi of a larger element type if possible before we promote 5090 // i1s to i8s. 5091 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) 5092 return V; 5093 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget)) 5094 return V; 5095 5096 // Promote i1 shuffle to i8 shuffle. 5097 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()); 5098 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1); 5099 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT) 5100 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2); 5101 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask()); 5102 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT), 5103 ISD::SETNE); 5104 } 5105 5106 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 5107 5108 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 5109 5110 if (SVN->isSplat()) { 5111 const int Lane = SVN->getSplatIndex(); 5112 if (Lane >= 0) { 5113 MVT SVT = VT.getVectorElementType(); 5114 5115 // Turn splatted vector load into a strided load with an X0 stride. 5116 SDValue V = V1; 5117 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector 5118 // with undef. 5119 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts? 5120 int Offset = Lane; 5121 if (V.getOpcode() == ISD::CONCAT_VECTORS) { 5122 int OpElements = 5123 V.getOperand(0).getSimpleValueType().getVectorNumElements(); 5124 V = V.getOperand(Offset / OpElements); 5125 Offset %= OpElements; 5126 } 5127 5128 // We need to ensure the load isn't atomic or volatile. 5129 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) { 5130 auto *Ld = cast<LoadSDNode>(V); 5131 Offset *= SVT.getStoreSize(); 5132 SDValue NewAddr = DAG.getMemBasePlusOffset( 5133 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL); 5134 5135 // If this is SEW=64 on RV32, use a strided load with a stride of x0. 5136 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) { 5137 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 5138 SDValue IntID = 5139 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT); 5140 SDValue Ops[] = {Ld->getChain(), 5141 IntID, 5142 DAG.getUNDEF(ContainerVT), 5143 NewAddr, 5144 DAG.getRegister(RISCV::X0, XLenVT), 5145 VL}; 5146 SDValue NewLoad = DAG.getMemIntrinsicNode( 5147 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT, 5148 DAG.getMachineFunction().getMachineMemOperand( 5149 Ld->getMemOperand(), Offset, SVT.getStoreSize())); 5150 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad); 5151 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 5152 } 5153 5154 MVT SplatVT = ContainerVT; 5155 5156 // If we don't have Zfh, we need to use an integer scalar load. 5157 if (SVT == MVT::f16 && !Subtarget.hasStdExtZfh()) { 5158 SVT = MVT::i16; 5159 SplatVT = ContainerVT.changeVectorElementType(SVT); 5160 } 5161 5162 // Otherwise use a scalar load and splat. This will give the best 5163 // opportunity to fold a splat into the operation. ISel can turn it into 5164 // the x0 strided load if we aren't able to fold away the select. 5165 if (SVT.isFloatingPoint()) 5166 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, 5167 Ld->getPointerInfo().getWithOffset(Offset), 5168 Ld->getOriginalAlign(), 5169 Ld->getMemOperand()->getFlags()); 5170 else 5171 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr, 5172 Ld->getPointerInfo().getWithOffset(Offset), SVT, 5173 Ld->getOriginalAlign(), 5174 Ld->getMemOperand()->getFlags()); 5175 DAG.makeEquivalentMemoryOrdering(Ld, V); 5176 5177 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 5178 : RISCVISD::VMV_V_X_VL; 5179 SDValue Splat = 5180 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL); 5181 Splat = DAG.getBitcast(ContainerVT, Splat); 5182 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 5183 } 5184 5185 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 5186 assert(Lane < (int)NumElts && "Unexpected lane!"); 5187 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, 5188 V1, DAG.getConstant(Lane, DL, XLenVT), 5189 DAG.getUNDEF(ContainerVT), TrueMask, VL); 5190 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 5191 } 5192 } 5193 5194 // For exact VLEN m2 or greater, try to split to m1 operations if we 5195 // can split cleanly. 5196 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget)) 5197 return V; 5198 5199 ArrayRef<int> Mask = SVN->getMask(); 5200 5201 if (SDValue V = 5202 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG)) 5203 return V; 5204 5205 if (SDValue V = 5206 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG)) 5207 return V; 5208 5209 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if 5210 // available. 5211 if (Subtarget.hasStdExtZvkb()) 5212 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) 5213 return V; 5214 5215 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may 5216 // be undef which can be handled with a single SLIDEDOWN/UP. 5217 int LoSrc, HiSrc; 5218 int Rotation = isElementRotate(LoSrc, HiSrc, Mask); 5219 if (Rotation > 0) { 5220 SDValue LoV, HiV; 5221 if (LoSrc >= 0) { 5222 LoV = LoSrc == 0 ? V1 : V2; 5223 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget); 5224 } 5225 if (HiSrc >= 0) { 5226 HiV = HiSrc == 0 ? V1 : V2; 5227 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget); 5228 } 5229 5230 // We found a rotation. We need to slide HiV down by Rotation. Then we need 5231 // to slide LoV up by (NumElts - Rotation). 5232 unsigned InvRotate = NumElts - Rotation; 5233 5234 SDValue Res = DAG.getUNDEF(ContainerVT); 5235 if (HiV) { 5236 // Even though we could use a smaller VL, don't to avoid a vsetivli 5237 // toggle. 5238 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV, 5239 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL); 5240 } 5241 if (LoV) 5242 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV, 5243 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL, 5244 RISCVII::TAIL_AGNOSTIC); 5245 5246 return convertFromScalableVector(VT, Res, DAG, Subtarget); 5247 } 5248 5249 // If this is a deinterleave and we can widen the vector, then we can use 5250 // vnsrl to deinterleave. 5251 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) { 5252 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0, 5253 Subtarget, DAG); 5254 } 5255 5256 if (SDValue V = 5257 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) 5258 return V; 5259 5260 // Detect an interleave shuffle and lower to 5261 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1)) 5262 int EvenSrc, OddSrc; 5263 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) { 5264 // Extract the halves of the vectors. 5265 MVT HalfVT = VT.getHalfNumVectorElementsVT(); 5266 5267 int Size = Mask.size(); 5268 SDValue EvenV, OddV; 5269 assert(EvenSrc >= 0 && "Undef source?"); 5270 EvenV = (EvenSrc / Size) == 0 ? V1 : V2; 5271 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV, 5272 DAG.getVectorIdxConstant(EvenSrc % Size, DL)); 5273 5274 assert(OddSrc >= 0 && "Undef source?"); 5275 OddV = (OddSrc / Size) == 0 ? V1 : V2; 5276 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV, 5277 DAG.getVectorIdxConstant(OddSrc % Size, DL)); 5278 5279 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); 5280 } 5281 5282 5283 // Handle any remaining single source shuffles 5284 assert(!V1.isUndef() && "Unexpected shuffle canonicalization"); 5285 if (V2.isUndef()) { 5286 // We might be able to express the shuffle as a bitrotate. But even if we 5287 // don't have Zvkb and have to expand, the expanded sequence of approx. 2 5288 // shifts and a vor will have a higher throughput than a vrgather. 5289 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) 5290 return V; 5291 5292 if (VT.getScalarSizeInBits() == 8 && 5293 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) { 5294 // On such a vector we're unable to use i8 as the index type. 5295 // FIXME: We could promote the index to i16 and use vrgatherei16, but that 5296 // may involve vector splitting if we're already at LMUL=8, or our 5297 // user-supplied maximum fixed-length LMUL. 5298 return SDValue(); 5299 } 5300 5301 // Base case for the two operand recursion below - handle the worst case 5302 // single source shuffle. 5303 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL; 5304 MVT IndexVT = VT.changeTypeToInteger(); 5305 // Since we can't introduce illegal index types at this stage, use i16 and 5306 // vrgatherei16 if the corresponding index type for plain vrgather is greater 5307 // than XLenVT. 5308 if (IndexVT.getScalarType().bitsGT(XLenVT)) { 5309 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; 5310 IndexVT = IndexVT.changeVectorElementType(MVT::i16); 5311 } 5312 5313 // If the mask allows, we can do all the index computation in 16 bits. This 5314 // requires less work and less register pressure at high LMUL, and creates 5315 // smaller constants which may be cheaper to materialize. 5316 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) && 5317 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) { 5318 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; 5319 IndexVT = IndexVT.changeVectorElementType(MVT::i16); 5320 } 5321 5322 MVT IndexContainerVT = 5323 ContainerVT.changeVectorElementType(IndexVT.getScalarType()); 5324 5325 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 5326 SmallVector<SDValue> GatherIndicesLHS; 5327 for (int MaskIndex : Mask) { 5328 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0; 5329 GatherIndicesLHS.push_back(IsLHSIndex 5330 ? DAG.getConstant(MaskIndex, DL, XLenVT) 5331 : DAG.getUNDEF(XLenVT)); 5332 } 5333 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); 5334 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG, 5335 Subtarget); 5336 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, 5337 DAG.getUNDEF(ContainerVT), TrueMask, VL); 5338 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 5339 } 5340 5341 // As a backup, shuffles can be lowered via a vrgather instruction, possibly 5342 // merged with a second vrgather. 5343 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS; 5344 5345 // Now construct the mask that will be used by the blended vrgather operation. 5346 // Construct the appropriate indices into each vector. 5347 for (int MaskIndex : Mask) { 5348 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; 5349 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0 5350 ? MaskIndex : -1); 5351 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts)); 5352 } 5353 5354 // Try to pick a profitable operand order. 5355 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1); 5356 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts); 5357 5358 // Recursively invoke lowering for each operand if we had two 5359 // independent single source shuffles, and then combine the result via a 5360 // vselect. Note that the vselect will likely be folded back into the 5361 // second permute (vrgather, or other) by the post-isel combine. 5362 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS); 5363 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS); 5364 5365 SmallVector<SDValue> MaskVals; 5366 for (int MaskIndex : Mask) { 5367 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps; 5368 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); 5369 } 5370 5371 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); 5372 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); 5373 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); 5374 5375 if (SwapOps) 5376 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2); 5377 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1); 5378 } 5379 5380 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { 5381 // Support splats for any type. These should type legalize well. 5382 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) 5383 return true; 5384 5385 // Only support legal VTs for other shuffles for now. 5386 if (!isTypeLegal(VT)) 5387 return false; 5388 5389 MVT SVT = VT.getSimpleVT(); 5390 5391 // Not for i1 vectors. 5392 if (SVT.getScalarType() == MVT::i1) 5393 return false; 5394 5395 int Dummy1, Dummy2; 5396 return (isElementRotate(Dummy1, Dummy2, M) > 0) || 5397 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget); 5398 } 5399 5400 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting 5401 // the exponent. 5402 SDValue 5403 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, 5404 SelectionDAG &DAG) const { 5405 MVT VT = Op.getSimpleValueType(); 5406 unsigned EltSize = VT.getScalarSizeInBits(); 5407 SDValue Src = Op.getOperand(0); 5408 SDLoc DL(Op); 5409 MVT ContainerVT = VT; 5410 5411 SDValue Mask, VL; 5412 if (Op->isVPOpcode()) { 5413 Mask = Op.getOperand(1); 5414 if (VT.isFixedLengthVector()) 5415 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 5416 Subtarget); 5417 VL = Op.getOperand(2); 5418 } 5419 5420 // We choose FP type that can represent the value if possible. Otherwise, we 5421 // use rounding to zero conversion for correct exponent of the result. 5422 // TODO: Use f16 for i8 when possible? 5423 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32; 5424 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()))) 5425 FloatEltVT = MVT::f32; 5426 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); 5427 5428 // Legal types should have been checked in the RISCVTargetLowering 5429 // constructor. 5430 // TODO: Splitting may make sense in some cases. 5431 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) && 5432 "Expected legal float type!"); 5433 5434 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X. 5435 // The trailing zero count is equal to log2 of this single bit value. 5436 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { 5437 SDValue Neg = DAG.getNegative(Src, DL, VT); 5438 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg); 5439 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) { 5440 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT), 5441 Src, Mask, VL); 5442 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL); 5443 } 5444 5445 // We have a legal FP type, convert to it. 5446 SDValue FloatVal; 5447 if (FloatVT.bitsGT(VT)) { 5448 if (Op->isVPOpcode()) 5449 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL); 5450 else 5451 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); 5452 } else { 5453 // Use RTZ to avoid rounding influencing exponent of FloatVal. 5454 if (VT.isFixedLengthVector()) { 5455 ContainerVT = getContainerForFixedLengthVector(VT); 5456 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 5457 } 5458 if (!Op->isVPOpcode()) 5459 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 5460 SDValue RTZRM = 5461 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()); 5462 MVT ContainerFloatVT = 5463 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount()); 5464 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT, 5465 Src, Mask, RTZRM, VL); 5466 if (VT.isFixedLengthVector()) 5467 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget); 5468 } 5469 // Bitcast to integer and shift the exponent to the LSB. 5470 EVT IntVT = FloatVT.changeVectorElementTypeToInteger(); 5471 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal); 5472 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23; 5473 5474 SDValue Exp; 5475 // Restore back to original type. Truncation after SRL is to generate vnsrl. 5476 if (Op->isVPOpcode()) { 5477 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast, 5478 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL); 5479 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL); 5480 } else { 5481 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast, 5482 DAG.getConstant(ShiftAmt, DL, IntVT)); 5483 if (IntVT.bitsLT(VT)) 5484 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp); 5485 else if (IntVT.bitsGT(VT)) 5486 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp); 5487 } 5488 5489 // The exponent contains log2 of the value in biased form. 5490 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127; 5491 // For trailing zeros, we just need to subtract the bias. 5492 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) 5493 return DAG.getNode(ISD::SUB, DL, VT, Exp, 5494 DAG.getConstant(ExponentBias, DL, VT)); 5495 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) 5496 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp, 5497 DAG.getConstant(ExponentBias, DL, VT), Mask, VL); 5498 5499 // For leading zeros, we need to remove the bias and convert from log2 to 5500 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). 5501 unsigned Adjust = ExponentBias + (EltSize - 1); 5502 SDValue Res; 5503 if (Op->isVPOpcode()) 5504 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp, 5505 Mask, VL); 5506 else 5507 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp); 5508 5509 // The above result with zero input equals to Adjust which is greater than 5510 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ. 5511 if (Op.getOpcode() == ISD::CTLZ) 5512 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT)); 5513 else if (Op.getOpcode() == ISD::VP_CTLZ) 5514 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res, 5515 DAG.getConstant(EltSize, DL, VT), Mask, VL); 5516 return Res; 5517 } 5518 5519 SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op, 5520 SelectionDAG &DAG) const { 5521 SDLoc DL(Op); 5522 MVT XLenVT = Subtarget.getXLenVT(); 5523 SDValue Source = Op->getOperand(0); 5524 MVT SrcVT = Source.getSimpleValueType(); 5525 SDValue Mask = Op->getOperand(1); 5526 SDValue EVL = Op->getOperand(2); 5527 5528 if (SrcVT.isFixedLengthVector()) { 5529 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT); 5530 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget); 5531 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 5532 Subtarget); 5533 SrcVT = ContainerVT; 5534 } 5535 5536 // Convert to boolean vector. 5537 if (SrcVT.getScalarType() != MVT::i1) { 5538 SDValue AllZero = DAG.getConstant(0, DL, SrcVT); 5539 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount()); 5540 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT, 5541 {Source, AllZero, DAG.getCondCode(ISD::SETNE), 5542 DAG.getUNDEF(SrcVT), Mask, EVL}); 5543 } 5544 5545 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL); 5546 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF) 5547 // In this case, we can interpret poison as -1, so nothing to do further. 5548 return Res; 5549 5550 // Convert -1 to VL. 5551 SDValue SetCC = 5552 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT); 5553 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res); 5554 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res); 5555 } 5556 5557 // While RVV has alignment restrictions, we should always be able to load as a 5558 // legal equivalently-sized byte-typed vector instead. This method is 5559 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If 5560 // the load is already correctly-aligned, it returns SDValue(). 5561 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op, 5562 SelectionDAG &DAG) const { 5563 auto *Load = cast<LoadSDNode>(Op); 5564 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load"); 5565 5566 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 5567 Load->getMemoryVT(), 5568 *Load->getMemOperand())) 5569 return SDValue(); 5570 5571 SDLoc DL(Op); 5572 MVT VT = Op.getSimpleValueType(); 5573 unsigned EltSizeBits = VT.getScalarSizeInBits(); 5574 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && 5575 "Unexpected unaligned RVV load type"); 5576 MVT NewVT = 5577 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); 5578 assert(NewVT.isValid() && 5579 "Expecting equally-sized RVV vector types to be legal"); 5580 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(), 5581 Load->getPointerInfo(), Load->getOriginalAlign(), 5582 Load->getMemOperand()->getFlags()); 5583 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL); 5584 } 5585 5586 // While RVV has alignment restrictions, we should always be able to store as a 5587 // legal equivalently-sized byte-typed vector instead. This method is 5588 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It 5589 // returns SDValue() if the store is already correctly aligned. 5590 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, 5591 SelectionDAG &DAG) const { 5592 auto *Store = cast<StoreSDNode>(Op); 5593 assert(Store && Store->getValue().getValueType().isVector() && 5594 "Expected vector store"); 5595 5596 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 5597 Store->getMemoryVT(), 5598 *Store->getMemOperand())) 5599 return SDValue(); 5600 5601 SDLoc DL(Op); 5602 SDValue StoredVal = Store->getValue(); 5603 MVT VT = StoredVal.getSimpleValueType(); 5604 unsigned EltSizeBits = VT.getScalarSizeInBits(); 5605 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && 5606 "Unexpected unaligned RVV store type"); 5607 MVT NewVT = 5608 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); 5609 assert(NewVT.isValid() && 5610 "Expecting equally-sized RVV vector types to be legal"); 5611 StoredVal = DAG.getBitcast(NewVT, StoredVal); 5612 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(), 5613 Store->getPointerInfo(), Store->getOriginalAlign(), 5614 Store->getMemOperand()->getFlags()); 5615 } 5616 5617 static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, 5618 const RISCVSubtarget &Subtarget) { 5619 assert(Op.getValueType() == MVT::i64 && "Unexpected VT"); 5620 5621 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue(); 5622 5623 // All simm32 constants should be handled by isel. 5624 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making 5625 // this check redundant, but small immediates are common so this check 5626 // should have better compile time. 5627 if (isInt<32>(Imm)) 5628 return Op; 5629 5630 // We only need to cost the immediate, if constant pool lowering is enabled. 5631 if (!Subtarget.useConstantPoolForLargeInts()) 5632 return Op; 5633 5634 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget); 5635 if (Seq.size() <= Subtarget.getMaxBuildIntsCost()) 5636 return Op; 5637 5638 // Optimizations below are disabled for opt size. If we're optimizing for 5639 // size, use a constant pool. 5640 if (DAG.shouldOptForSize()) 5641 return SDValue(); 5642 5643 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do 5644 // that if it will avoid a constant pool. 5645 // It will require an extra temporary register though. 5646 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where 5647 // low and high 32 bits are the same and bit 31 and 63 are set. 5648 unsigned ShiftAmt, AddOpc; 5649 RISCVMatInt::InstSeq SeqLo = 5650 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc); 5651 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost()) 5652 return Op; 5653 5654 return SDValue(); 5655 } 5656 5657 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 5658 const RISCVSubtarget &Subtarget) { 5659 SDLoc dl(Op); 5660 AtomicOrdering FenceOrdering = 5661 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); 5662 SyncScope::ID FenceSSID = 5663 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); 5664 5665 if (Subtarget.hasStdExtZtso()) { 5666 // The only fence that needs an instruction is a sequentially-consistent 5667 // cross-thread fence. 5668 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && 5669 FenceSSID == SyncScope::System) 5670 return Op; 5671 5672 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 5673 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); 5674 } 5675 5676 // singlethread fences only synchronize with signal handlers on the same 5677 // thread and thus only need to preserve instruction order, not actually 5678 // enforce memory ordering. 5679 if (FenceSSID == SyncScope::SingleThread) 5680 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 5681 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); 5682 5683 return Op; 5684 } 5685 5686 static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG) { 5687 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 && 5688 "Unexpected custom legalisation"); 5689 5690 // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN. 5691 bool IsAdd = Op.getOpcode() == ISD::SADDSAT; 5692 SDLoc DL(Op); 5693 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0)); 5694 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1)); 5695 SDValue Result = 5696 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); 5697 5698 APInt MinVal = APInt::getSignedMinValue(32).sext(64); 5699 APInt MaxVal = APInt::getSignedMaxValue(32).sext(64); 5700 SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64); 5701 SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64); 5702 Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax); 5703 Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin); 5704 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result); 5705 } 5706 5707 static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) { 5708 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 && 5709 "Unexpected custom legalisation"); 5710 5711 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using 5712 // sign extend allows overflow of the lower 32 bits to be detected on 5713 // the promoted size. 5714 SDLoc DL(Op); 5715 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0)); 5716 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1)); 5717 SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS); 5718 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp); 5719 } 5720 5721 // Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw. 5722 static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG) { 5723 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 && 5724 "Unexpected custom legalisation"); 5725 if (isa<ConstantSDNode>(Op.getOperand(1))) 5726 return SDValue(); 5727 5728 bool IsAdd = Op.getOpcode() == ISD::SADDO; 5729 SDLoc DL(Op); 5730 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0)); 5731 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1)); 5732 SDValue WideOp = 5733 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); 5734 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp); 5735 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp, 5736 DAG.getValueType(MVT::i32)); 5737 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt, 5738 ISD::SETNE); 5739 return DAG.getMergeValues({Res, Ovf}, DL); 5740 } 5741 5742 // Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw. 5743 static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG) { 5744 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 && 5745 "Unexpected custom legalisation"); 5746 SDLoc DL(Op); 5747 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0)); 5748 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1)); 5749 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); 5750 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); 5751 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul, 5752 DAG.getValueType(MVT::i32)); 5753 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt, 5754 ISD::SETNE); 5755 return DAG.getMergeValues({Res, Ovf}, DL); 5756 } 5757 5758 SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, 5759 SelectionDAG &DAG) const { 5760 SDLoc DL(Op); 5761 MVT VT = Op.getSimpleValueType(); 5762 MVT XLenVT = Subtarget.getXLenVT(); 5763 unsigned Check = Op.getConstantOperandVal(1); 5764 unsigned TDCMask = 0; 5765 if (Check & fcSNan) 5766 TDCMask |= RISCV::FPMASK_Signaling_NaN; 5767 if (Check & fcQNan) 5768 TDCMask |= RISCV::FPMASK_Quiet_NaN; 5769 if (Check & fcPosInf) 5770 TDCMask |= RISCV::FPMASK_Positive_Infinity; 5771 if (Check & fcNegInf) 5772 TDCMask |= RISCV::FPMASK_Negative_Infinity; 5773 if (Check & fcPosNormal) 5774 TDCMask |= RISCV::FPMASK_Positive_Normal; 5775 if (Check & fcNegNormal) 5776 TDCMask |= RISCV::FPMASK_Negative_Normal; 5777 if (Check & fcPosSubnormal) 5778 TDCMask |= RISCV::FPMASK_Positive_Subnormal; 5779 if (Check & fcNegSubnormal) 5780 TDCMask |= RISCV::FPMASK_Negative_Subnormal; 5781 if (Check & fcPosZero) 5782 TDCMask |= RISCV::FPMASK_Positive_Zero; 5783 if (Check & fcNegZero) 5784 TDCMask |= RISCV::FPMASK_Negative_Zero; 5785 5786 bool IsOneBitMask = isPowerOf2_32(TDCMask); 5787 5788 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT); 5789 5790 if (VT.isVector()) { 5791 SDValue Op0 = Op.getOperand(0); 5792 MVT VT0 = Op.getOperand(0).getSimpleValueType(); 5793 5794 if (VT.isScalableVector()) { 5795 MVT DstVT = VT0.changeVectorElementTypeToInteger(); 5796 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget); 5797 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { 5798 Mask = Op.getOperand(2); 5799 VL = Op.getOperand(3); 5800 } 5801 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask, 5802 VL, Op->getFlags()); 5803 if (IsOneBitMask) 5804 return DAG.getSetCC(DL, VT, FPCLASS, 5805 DAG.getConstant(TDCMask, DL, DstVT), 5806 ISD::CondCode::SETEQ); 5807 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS, 5808 DAG.getConstant(TDCMask, DL, DstVT)); 5809 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT), 5810 ISD::SETNE); 5811 } 5812 5813 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0); 5814 MVT ContainerVT = getContainerForFixedLengthVector(VT); 5815 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger(); 5816 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget); 5817 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { 5818 Mask = Op.getOperand(2); 5819 MVT MaskContainerVT = 5820 getContainerForFixedLengthVector(Mask.getSimpleValueType()); 5821 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); 5822 VL = Op.getOperand(3); 5823 } 5824 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget); 5825 5826 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0, 5827 Mask, VL, Op->getFlags()); 5828 5829 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT, 5830 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL); 5831 if (IsOneBitMask) { 5832 SDValue VMSEQ = 5833 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, 5834 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ), 5835 DAG.getUNDEF(ContainerVT), Mask, VL}); 5836 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget); 5837 } 5838 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS, 5839 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL); 5840 5841 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 5842 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT, 5843 DAG.getUNDEF(ContainerDstVT), SplatZero, VL); 5844 5845 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, 5846 {AND, SplatZero, DAG.getCondCode(ISD::SETNE), 5847 DAG.getUNDEF(ContainerVT), Mask, VL}); 5848 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget); 5849 } 5850 5851 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0)); 5852 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV); 5853 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT), 5854 ISD::CondCode::SETNE); 5855 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res); 5856 } 5857 5858 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these 5859 // operations propagate nans. 5860 static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, 5861 const RISCVSubtarget &Subtarget) { 5862 SDLoc DL(Op); 5863 MVT VT = Op.getSimpleValueType(); 5864 5865 SDValue X = Op.getOperand(0); 5866 SDValue Y = Op.getOperand(1); 5867 5868 if (!VT.isVector()) { 5869 MVT XLenVT = Subtarget.getXLenVT(); 5870 5871 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This 5872 // ensures that when one input is a nan, the other will also be a nan 5873 // allowing the nan to propagate. If both inputs are nan, this will swap the 5874 // inputs which is harmless. 5875 5876 SDValue NewY = Y; 5877 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) { 5878 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ); 5879 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X); 5880 } 5881 5882 SDValue NewX = X; 5883 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) { 5884 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ); 5885 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y); 5886 } 5887 5888 unsigned Opc = 5889 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN; 5890 return DAG.getNode(Opc, DL, VT, NewX, NewY); 5891 } 5892 5893 // Check no NaNs before converting to fixed vector scalable. 5894 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X); 5895 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y); 5896 5897 MVT ContainerVT = VT; 5898 if (VT.isFixedLengthVector()) { 5899 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 5900 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 5901 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget); 5902 } 5903 5904 SDValue Mask, VL; 5905 if (Op->isVPOpcode()) { 5906 Mask = Op.getOperand(2); 5907 if (VT.isFixedLengthVector()) 5908 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 5909 Subtarget); 5910 VL = Op.getOperand(3); 5911 } else { 5912 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 5913 } 5914 5915 SDValue NewY = Y; 5916 if (!XIsNeverNan) { 5917 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), 5918 {X, X, DAG.getCondCode(ISD::SETOEQ), 5919 DAG.getUNDEF(ContainerVT), Mask, VL}); 5920 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X, 5921 DAG.getUNDEF(ContainerVT), VL); 5922 } 5923 5924 SDValue NewX = X; 5925 if (!YIsNeverNan) { 5926 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), 5927 {Y, Y, DAG.getCondCode(ISD::SETOEQ), 5928 DAG.getUNDEF(ContainerVT), Mask, VL}); 5929 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y, 5930 DAG.getUNDEF(ContainerVT), VL); 5931 } 5932 5933 unsigned Opc = 5934 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM 5935 ? RISCVISD::VFMAX_VL 5936 : RISCVISD::VFMIN_VL; 5937 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY, 5938 DAG.getUNDEF(ContainerVT), Mask, VL); 5939 if (VT.isFixedLengthVector()) 5940 Res = convertFromScalableVector(VT, Res, DAG, Subtarget); 5941 return Res; 5942 } 5943 5944 /// Get a RISC-V target specified VL op for a given SDNode. 5945 static unsigned getRISCVVLOp(SDValue Op) { 5946 #define OP_CASE(NODE) \ 5947 case ISD::NODE: \ 5948 return RISCVISD::NODE##_VL; 5949 #define VP_CASE(NODE) \ 5950 case ISD::VP_##NODE: \ 5951 return RISCVISD::NODE##_VL; 5952 // clang-format off 5953 switch (Op.getOpcode()) { 5954 default: 5955 llvm_unreachable("don't have RISC-V specified VL op for this SDNode"); 5956 OP_CASE(ADD) 5957 OP_CASE(SUB) 5958 OP_CASE(MUL) 5959 OP_CASE(MULHS) 5960 OP_CASE(MULHU) 5961 OP_CASE(SDIV) 5962 OP_CASE(SREM) 5963 OP_CASE(UDIV) 5964 OP_CASE(UREM) 5965 OP_CASE(SHL) 5966 OP_CASE(SRA) 5967 OP_CASE(SRL) 5968 OP_CASE(ROTL) 5969 OP_CASE(ROTR) 5970 OP_CASE(BSWAP) 5971 OP_CASE(CTTZ) 5972 OP_CASE(CTLZ) 5973 OP_CASE(CTPOP) 5974 OP_CASE(BITREVERSE) 5975 OP_CASE(SADDSAT) 5976 OP_CASE(UADDSAT) 5977 OP_CASE(SSUBSAT) 5978 OP_CASE(USUBSAT) 5979 OP_CASE(AVGFLOORS) 5980 OP_CASE(AVGFLOORU) 5981 OP_CASE(AVGCEILS) 5982 OP_CASE(AVGCEILU) 5983 OP_CASE(FADD) 5984 OP_CASE(FSUB) 5985 OP_CASE(FMUL) 5986 OP_CASE(FDIV) 5987 OP_CASE(FNEG) 5988 OP_CASE(FABS) 5989 OP_CASE(FSQRT) 5990 OP_CASE(SMIN) 5991 OP_CASE(SMAX) 5992 OP_CASE(UMIN) 5993 OP_CASE(UMAX) 5994 OP_CASE(STRICT_FADD) 5995 OP_CASE(STRICT_FSUB) 5996 OP_CASE(STRICT_FMUL) 5997 OP_CASE(STRICT_FDIV) 5998 OP_CASE(STRICT_FSQRT) 5999 VP_CASE(ADD) // VP_ADD 6000 VP_CASE(SUB) // VP_SUB 6001 VP_CASE(MUL) // VP_MUL 6002 VP_CASE(SDIV) // VP_SDIV 6003 VP_CASE(SREM) // VP_SREM 6004 VP_CASE(UDIV) // VP_UDIV 6005 VP_CASE(UREM) // VP_UREM 6006 VP_CASE(SHL) // VP_SHL 6007 VP_CASE(FADD) // VP_FADD 6008 VP_CASE(FSUB) // VP_FSUB 6009 VP_CASE(FMUL) // VP_FMUL 6010 VP_CASE(FDIV) // VP_FDIV 6011 VP_CASE(FNEG) // VP_FNEG 6012 VP_CASE(FABS) // VP_FABS 6013 VP_CASE(SMIN) // VP_SMIN 6014 VP_CASE(SMAX) // VP_SMAX 6015 VP_CASE(UMIN) // VP_UMIN 6016 VP_CASE(UMAX) // VP_UMAX 6017 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN 6018 VP_CASE(SETCC) // VP_SETCC 6019 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP 6020 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP 6021 VP_CASE(BITREVERSE) // VP_BITREVERSE 6022 VP_CASE(SADDSAT) // VP_SADDSAT 6023 VP_CASE(UADDSAT) // VP_UADDSAT 6024 VP_CASE(SSUBSAT) // VP_SSUBSAT 6025 VP_CASE(USUBSAT) // VP_USUBSAT 6026 VP_CASE(BSWAP) // VP_BSWAP 6027 VP_CASE(CTLZ) // VP_CTLZ 6028 VP_CASE(CTTZ) // VP_CTTZ 6029 VP_CASE(CTPOP) // VP_CTPOP 6030 case ISD::CTLZ_ZERO_UNDEF: 6031 case ISD::VP_CTLZ_ZERO_UNDEF: 6032 return RISCVISD::CTLZ_VL; 6033 case ISD::CTTZ_ZERO_UNDEF: 6034 case ISD::VP_CTTZ_ZERO_UNDEF: 6035 return RISCVISD::CTTZ_VL; 6036 case ISD::FMA: 6037 case ISD::VP_FMA: 6038 return RISCVISD::VFMADD_VL; 6039 case ISD::STRICT_FMA: 6040 return RISCVISD::STRICT_VFMADD_VL; 6041 case ISD::AND: 6042 case ISD::VP_AND: 6043 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) 6044 return RISCVISD::VMAND_VL; 6045 return RISCVISD::AND_VL; 6046 case ISD::OR: 6047 case ISD::VP_OR: 6048 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) 6049 return RISCVISD::VMOR_VL; 6050 return RISCVISD::OR_VL; 6051 case ISD::XOR: 6052 case ISD::VP_XOR: 6053 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) 6054 return RISCVISD::VMXOR_VL; 6055 return RISCVISD::XOR_VL; 6056 case ISD::VP_SELECT: 6057 case ISD::VP_MERGE: 6058 return RISCVISD::VMERGE_VL; 6059 case ISD::VP_SRA: 6060 return RISCVISD::SRA_VL; 6061 case ISD::VP_SRL: 6062 return RISCVISD::SRL_VL; 6063 case ISD::VP_SQRT: 6064 return RISCVISD::FSQRT_VL; 6065 case ISD::VP_SIGN_EXTEND: 6066 return RISCVISD::VSEXT_VL; 6067 case ISD::VP_ZERO_EXTEND: 6068 return RISCVISD::VZEXT_VL; 6069 case ISD::VP_FP_TO_SINT: 6070 return RISCVISD::VFCVT_RTZ_X_F_VL; 6071 case ISD::VP_FP_TO_UINT: 6072 return RISCVISD::VFCVT_RTZ_XU_F_VL; 6073 case ISD::FMINNUM: 6074 case ISD::VP_FMINNUM: 6075 return RISCVISD::VFMIN_VL; 6076 case ISD::FMAXNUM: 6077 case ISD::VP_FMAXNUM: 6078 return RISCVISD::VFMAX_VL; 6079 case ISD::LRINT: 6080 case ISD::VP_LRINT: 6081 case ISD::LLRINT: 6082 case ISD::VP_LLRINT: 6083 return RISCVISD::VFCVT_X_F_VL; 6084 } 6085 // clang-format on 6086 #undef OP_CASE 6087 #undef VP_CASE 6088 } 6089 6090 /// Return true if a RISC-V target specified op has a merge operand. 6091 static bool hasMergeOp(unsigned Opcode) { 6092 assert(Opcode > RISCVISD::FIRST_NUMBER && 6093 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && 6094 "not a RISC-V target specific op"); 6095 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 6096 130 && 6097 RISCVISD::LAST_RISCV_STRICTFP_OPCODE - 6098 ISD::FIRST_TARGET_STRICTFP_OPCODE == 6099 21 && 6100 "adding target specific op should update this function"); 6101 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) 6102 return true; 6103 if (Opcode == RISCVISD::FCOPYSIGN_VL) 6104 return true; 6105 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL) 6106 return true; 6107 if (Opcode == RISCVISD::SETCC_VL) 6108 return true; 6109 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL) 6110 return true; 6111 if (Opcode == RISCVISD::VMERGE_VL) 6112 return true; 6113 return false; 6114 } 6115 6116 /// Return true if a RISC-V target specified op has a mask operand. 6117 static bool hasMaskOp(unsigned Opcode) { 6118 assert(Opcode > RISCVISD::FIRST_NUMBER && 6119 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && 6120 "not a RISC-V target specific op"); 6121 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 6122 130 && 6123 RISCVISD::LAST_RISCV_STRICTFP_OPCODE - 6124 ISD::FIRST_TARGET_STRICTFP_OPCODE == 6125 21 && 6126 "adding target specific op should update this function"); 6127 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) 6128 return true; 6129 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL) 6130 return true; 6131 if (Opcode >= RISCVISD::STRICT_FADD_VL && 6132 Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL) 6133 return true; 6134 return false; 6135 } 6136 6137 static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) { 6138 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType()); 6139 SDLoc DL(Op); 6140 6141 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); 6142 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); 6143 6144 for (unsigned j = 0; j != Op.getNumOperands(); ++j) { 6145 if (!Op.getOperand(j).getValueType().isVector()) { 6146 LoOperands[j] = Op.getOperand(j); 6147 HiOperands[j] = Op.getOperand(j); 6148 continue; 6149 } 6150 std::tie(LoOperands[j], HiOperands[j]) = 6151 DAG.SplitVector(Op.getOperand(j), DL); 6152 } 6153 6154 SDValue LoRes = 6155 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags()); 6156 SDValue HiRes = 6157 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags()); 6158 6159 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes); 6160 } 6161 6162 static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) { 6163 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op"); 6164 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType()); 6165 SDLoc DL(Op); 6166 6167 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); 6168 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); 6169 6170 for (unsigned j = 0; j != Op.getNumOperands(); ++j) { 6171 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) { 6172 std::tie(LoOperands[j], HiOperands[j]) = 6173 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL); 6174 continue; 6175 } 6176 if (!Op.getOperand(j).getValueType().isVector()) { 6177 LoOperands[j] = Op.getOperand(j); 6178 HiOperands[j] = Op.getOperand(j); 6179 continue; 6180 } 6181 std::tie(LoOperands[j], HiOperands[j]) = 6182 DAG.SplitVector(Op.getOperand(j), DL); 6183 } 6184 6185 SDValue LoRes = 6186 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags()); 6187 SDValue HiRes = 6188 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags()); 6189 6190 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes); 6191 } 6192 6193 static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) { 6194 SDLoc DL(Op); 6195 6196 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL); 6197 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL); 6198 auto [EVLLo, EVLHi] = 6199 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL); 6200 6201 SDValue ResLo = 6202 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 6203 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags()); 6204 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 6205 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags()); 6206 } 6207 6208 static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) { 6209 6210 assert(Op->isStrictFPOpcode()); 6211 6212 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0)); 6213 6214 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1)); 6215 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1)); 6216 6217 SDLoc DL(Op); 6218 6219 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); 6220 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); 6221 6222 for (unsigned j = 0; j != Op.getNumOperands(); ++j) { 6223 if (!Op.getOperand(j).getValueType().isVector()) { 6224 LoOperands[j] = Op.getOperand(j); 6225 HiOperands[j] = Op.getOperand(j); 6226 continue; 6227 } 6228 std::tie(LoOperands[j], HiOperands[j]) = 6229 DAG.SplitVector(Op.getOperand(j), DL); 6230 } 6231 6232 SDValue LoRes = 6233 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags()); 6234 HiOperands[0] = LoRes.getValue(1); 6235 SDValue HiRes = 6236 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags()); 6237 6238 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0), 6239 LoRes.getValue(0), HiRes.getValue(0)); 6240 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL); 6241 } 6242 6243 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 6244 SelectionDAG &DAG) const { 6245 switch (Op.getOpcode()) { 6246 default: 6247 report_fatal_error("unimplemented operand"); 6248 case ISD::ATOMIC_FENCE: 6249 return LowerATOMIC_FENCE(Op, DAG, Subtarget); 6250 case ISD::GlobalAddress: 6251 return lowerGlobalAddress(Op, DAG); 6252 case ISD::BlockAddress: 6253 return lowerBlockAddress(Op, DAG); 6254 case ISD::ConstantPool: 6255 return lowerConstantPool(Op, DAG); 6256 case ISD::JumpTable: 6257 return lowerJumpTable(Op, DAG); 6258 case ISD::GlobalTLSAddress: 6259 return lowerGlobalTLSAddress(Op, DAG); 6260 case ISD::Constant: 6261 return lowerConstant(Op, DAG, Subtarget); 6262 case ISD::SELECT: 6263 return lowerSELECT(Op, DAG); 6264 case ISD::BRCOND: 6265 return lowerBRCOND(Op, DAG); 6266 case ISD::VASTART: 6267 return lowerVASTART(Op, DAG); 6268 case ISD::FRAMEADDR: 6269 return lowerFRAMEADDR(Op, DAG); 6270 case ISD::RETURNADDR: 6271 return lowerRETURNADDR(Op, DAG); 6272 case ISD::SADDO: 6273 case ISD::SSUBO: 6274 return lowerSADDO_SSUBO(Op, DAG); 6275 case ISD::SMULO: 6276 return lowerSMULO(Op, DAG); 6277 case ISD::SHL_PARTS: 6278 return lowerShiftLeftParts(Op, DAG); 6279 case ISD::SRA_PARTS: 6280 return lowerShiftRightParts(Op, DAG, true); 6281 case ISD::SRL_PARTS: 6282 return lowerShiftRightParts(Op, DAG, false); 6283 case ISD::ROTL: 6284 case ISD::ROTR: 6285 if (Op.getValueType().isFixedLengthVector()) { 6286 assert(Subtarget.hasStdExtZvkb()); 6287 return lowerToScalableOp(Op, DAG); 6288 } 6289 assert(Subtarget.hasVendorXTHeadBb() && 6290 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && 6291 "Unexpected custom legalization"); 6292 // XTHeadBb only supports rotate by constant. 6293 if (!isa<ConstantSDNode>(Op.getOperand(1))) 6294 return SDValue(); 6295 return Op; 6296 case ISD::BITCAST: { 6297 SDLoc DL(Op); 6298 EVT VT = Op.getValueType(); 6299 SDValue Op0 = Op.getOperand(0); 6300 EVT Op0VT = Op0.getValueType(); 6301 MVT XLenVT = Subtarget.getXLenVT(); 6302 if (VT == MVT::f16 && Op0VT == MVT::i16 && 6303 Subtarget.hasStdExtZfhminOrZhinxmin()) { 6304 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); 6305 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 6306 return FPConv; 6307 } 6308 if (VT == MVT::bf16 && Op0VT == MVT::i16 && 6309 Subtarget.hasStdExtZfbfmin()) { 6310 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); 6311 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0); 6312 return FPConv; 6313 } 6314 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && 6315 Subtarget.hasStdExtFOrZfinx()) { 6316 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 6317 SDValue FPConv = 6318 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 6319 return FPConv; 6320 } 6321 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() && 6322 Subtarget.hasStdExtDOrZdinx()) { 6323 SDValue Lo, Hi; 6324 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32); 6325 SDValue RetReg = 6326 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 6327 return RetReg; 6328 } 6329 6330 // Consider other scalar<->scalar casts as legal if the types are legal. 6331 // Otherwise expand them. 6332 if (!VT.isVector() && !Op0VT.isVector()) { 6333 if (isTypeLegal(VT) && isTypeLegal(Op0VT)) 6334 return Op; 6335 return SDValue(); 6336 } 6337 6338 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() && 6339 "Unexpected types"); 6340 6341 if (VT.isFixedLengthVector()) { 6342 // We can handle fixed length vector bitcasts with a simple replacement 6343 // in isel. 6344 if (Op0VT.isFixedLengthVector()) 6345 return Op; 6346 // When bitcasting from scalar to fixed-length vector, insert the scalar 6347 // into a one-element vector of the result type, and perform a vector 6348 // bitcast. 6349 if (!Op0VT.isVector()) { 6350 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1); 6351 if (!isTypeLegal(BVT)) 6352 return SDValue(); 6353 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT, 6354 DAG.getUNDEF(BVT), Op0, 6355 DAG.getVectorIdxConstant(0, DL))); 6356 } 6357 return SDValue(); 6358 } 6359 // Custom-legalize bitcasts from fixed-length vector types to scalar types 6360 // thus: bitcast the vector to a one-element vector type whose element type 6361 // is the same as the result type, and extract the first element. 6362 if (!VT.isVector() && Op0VT.isFixedLengthVector()) { 6363 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); 6364 if (!isTypeLegal(BVT)) 6365 return SDValue(); 6366 SDValue BVec = DAG.getBitcast(BVT, Op0); 6367 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 6368 DAG.getVectorIdxConstant(0, DL)); 6369 } 6370 return SDValue(); 6371 } 6372 case ISD::INTRINSIC_WO_CHAIN: 6373 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 6374 case ISD::INTRINSIC_W_CHAIN: 6375 return LowerINTRINSIC_W_CHAIN(Op, DAG); 6376 case ISD::INTRINSIC_VOID: 6377 return LowerINTRINSIC_VOID(Op, DAG); 6378 case ISD::IS_FPCLASS: 6379 return LowerIS_FPCLASS(Op, DAG); 6380 case ISD::BITREVERSE: { 6381 MVT VT = Op.getSimpleValueType(); 6382 if (VT.isFixedLengthVector()) { 6383 assert(Subtarget.hasStdExtZvbb()); 6384 return lowerToScalableOp(Op, DAG); 6385 } 6386 SDLoc DL(Op); 6387 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization"); 6388 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode"); 6389 // Expand bitreverse to a bswap(rev8) followed by brev8. 6390 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0)); 6391 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap); 6392 } 6393 case ISD::TRUNCATE: 6394 // Only custom-lower vector truncates 6395 if (!Op.getSimpleValueType().isVector()) 6396 return Op; 6397 return lowerVectorTruncLike(Op, DAG); 6398 case ISD::ANY_EXTEND: 6399 case ISD::ZERO_EXTEND: 6400 if (Op.getOperand(0).getValueType().isVector() && 6401 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 6402 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 6403 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); 6404 case ISD::SIGN_EXTEND: 6405 if (Op.getOperand(0).getValueType().isVector() && 6406 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 6407 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 6408 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); 6409 case ISD::SPLAT_VECTOR_PARTS: 6410 return lowerSPLAT_VECTOR_PARTS(Op, DAG); 6411 case ISD::INSERT_VECTOR_ELT: 6412 return lowerINSERT_VECTOR_ELT(Op, DAG); 6413 case ISD::EXTRACT_VECTOR_ELT: 6414 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 6415 case ISD::SCALAR_TO_VECTOR: { 6416 MVT VT = Op.getSimpleValueType(); 6417 SDLoc DL(Op); 6418 SDValue Scalar = Op.getOperand(0); 6419 if (VT.getVectorElementType() == MVT::i1) { 6420 MVT WideVT = VT.changeVectorElementType(MVT::i8); 6421 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar); 6422 return DAG.getNode(ISD::TRUNCATE, DL, VT, V); 6423 } 6424 MVT ContainerVT = VT; 6425 if (VT.isFixedLengthVector()) 6426 ContainerVT = getContainerForFixedLengthVector(VT); 6427 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 6428 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar); 6429 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT, 6430 DAG.getUNDEF(ContainerVT), Scalar, VL); 6431 if (VT.isFixedLengthVector()) 6432 V = convertFromScalableVector(VT, V, DAG, Subtarget); 6433 return V; 6434 } 6435 case ISD::VSCALE: { 6436 MVT XLenVT = Subtarget.getXLenVT(); 6437 MVT VT = Op.getSimpleValueType(); 6438 SDLoc DL(Op); 6439 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT); 6440 // We define our scalable vector types for lmul=1 to use a 64 bit known 6441 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 6442 // vscale as VLENB / 8. 6443 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!"); 6444 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) 6445 report_fatal_error("Support for VLEN==32 is incomplete."); 6446 // We assume VLENB is a multiple of 8. We manually choose the best shift 6447 // here because SimplifyDemandedBits isn't always able to simplify it. 6448 uint64_t Val = Op.getConstantOperandVal(0); 6449 if (isPowerOf2_64(Val)) { 6450 uint64_t Log2 = Log2_64(Val); 6451 if (Log2 < 3) 6452 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res, 6453 DAG.getConstant(3 - Log2, DL, VT)); 6454 else if (Log2 > 3) 6455 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res, 6456 DAG.getConstant(Log2 - 3, DL, XLenVT)); 6457 } else if ((Val % 8) == 0) { 6458 // If the multiplier is a multiple of 8, scale it down to avoid needing 6459 // to shift the VLENB value. 6460 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res, 6461 DAG.getConstant(Val / 8, DL, XLenVT)); 6462 } else { 6463 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res, 6464 DAG.getConstant(3, DL, XLenVT)); 6465 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale, 6466 DAG.getConstant(Val, DL, XLenVT)); 6467 } 6468 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res); 6469 } 6470 case ISD::FPOWI: { 6471 // Custom promote f16 powi with illegal i32 integer type on RV64. Once 6472 // promoted this will be legalized into a libcall by LegalizeIntegerTypes. 6473 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() && 6474 Op.getOperand(1).getValueType() == MVT::i32) { 6475 SDLoc DL(Op); 6476 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); 6477 SDValue Powi = 6478 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1)); 6479 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi, 6480 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6481 } 6482 return SDValue(); 6483 } 6484 case ISD::FMAXIMUM: 6485 case ISD::FMINIMUM: 6486 if (Op.getValueType() == MVT::nxv32f16 && 6487 (Subtarget.hasVInstructionsF16Minimal() && 6488 !Subtarget.hasVInstructionsF16())) 6489 return SplitVectorOp(Op, DAG); 6490 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); 6491 case ISD::FP_EXTEND: { 6492 SDLoc DL(Op); 6493 EVT VT = Op.getValueType(); 6494 SDValue Op0 = Op.getOperand(0); 6495 EVT Op0VT = Op0.getValueType(); 6496 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) 6497 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0); 6498 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) { 6499 SDValue FloatVal = 6500 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0); 6501 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal); 6502 } 6503 6504 if (!Op.getValueType().isVector()) 6505 return Op; 6506 return lowerVectorFPExtendOrRoundLike(Op, DAG); 6507 } 6508 case ISD::FP_ROUND: { 6509 SDLoc DL(Op); 6510 EVT VT = Op.getValueType(); 6511 SDValue Op0 = Op.getOperand(0); 6512 EVT Op0VT = Op0.getValueType(); 6513 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin()) 6514 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0); 6515 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() && 6516 Subtarget.hasStdExtDOrZdinx()) { 6517 SDValue FloatVal = 6518 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0, 6519 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6520 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal); 6521 } 6522 6523 if (!Op.getValueType().isVector()) 6524 return Op; 6525 return lowerVectorFPExtendOrRoundLike(Op, DAG); 6526 } 6527 case ISD::STRICT_FP_ROUND: 6528 case ISD::STRICT_FP_EXTEND: 6529 return lowerStrictFPExtendOrRoundLike(Op, DAG); 6530 case ISD::SINT_TO_FP: 6531 case ISD::UINT_TO_FP: 6532 if (Op.getValueType().isVector() && 6533 Op.getValueType().getScalarType() == MVT::f16 && 6534 (Subtarget.hasVInstructionsF16Minimal() && 6535 !Subtarget.hasVInstructionsF16())) { 6536 if (Op.getValueType() == MVT::nxv32f16) 6537 return SplitVectorOp(Op, DAG); 6538 // int -> f32 6539 SDLoc DL(Op); 6540 MVT NVT = 6541 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()); 6542 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops()); 6543 // f32 -> f16 6544 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC, 6545 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6546 } 6547 [[fallthrough]]; 6548 case ISD::FP_TO_SINT: 6549 case ISD::FP_TO_UINT: 6550 if (SDValue Op1 = Op.getOperand(0); 6551 Op1.getValueType().isVector() && 6552 Op1.getValueType().getScalarType() == MVT::f16 && 6553 (Subtarget.hasVInstructionsF16Minimal() && 6554 !Subtarget.hasVInstructionsF16())) { 6555 if (Op1.getValueType() == MVT::nxv32f16) 6556 return SplitVectorOp(Op, DAG); 6557 // f16 -> f32 6558 SDLoc DL(Op); 6559 MVT NVT = MVT::getVectorVT(MVT::f32, 6560 Op1.getValueType().getVectorElementCount()); 6561 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1); 6562 // f32 -> int 6563 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec); 6564 } 6565 [[fallthrough]]; 6566 case ISD::STRICT_FP_TO_SINT: 6567 case ISD::STRICT_FP_TO_UINT: 6568 case ISD::STRICT_SINT_TO_FP: 6569 case ISD::STRICT_UINT_TO_FP: { 6570 // RVV can only do fp<->int conversions to types half/double the size as 6571 // the source. We custom-lower any conversions that do two hops into 6572 // sequences. 6573 MVT VT = Op.getSimpleValueType(); 6574 if (!VT.isVector()) 6575 return Op; 6576 SDLoc DL(Op); 6577 bool IsStrict = Op->isStrictFPOpcode(); 6578 SDValue Src = Op.getOperand(0 + IsStrict); 6579 MVT EltVT = VT.getVectorElementType(); 6580 MVT SrcVT = Src.getSimpleValueType(); 6581 MVT SrcEltVT = SrcVT.getVectorElementType(); 6582 unsigned EltSize = EltVT.getSizeInBits(); 6583 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 6584 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 6585 "Unexpected vector element types"); 6586 6587 bool IsInt2FP = SrcEltVT.isInteger(); 6588 // Widening conversions 6589 if (EltSize > (2 * SrcEltSize)) { 6590 if (IsInt2FP) { 6591 // Do a regular integer sign/zero extension then convert to float. 6592 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2), 6593 VT.getVectorElementCount()); 6594 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP || 6595 Op.getOpcode() == ISD::STRICT_UINT_TO_FP) 6596 ? ISD::ZERO_EXTEND 6597 : ISD::SIGN_EXTEND; 6598 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 6599 if (IsStrict) 6600 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), 6601 Op.getOperand(0), Ext); 6602 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 6603 } 6604 // FP2Int 6605 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 6606 // Do one doubling fp_extend then complete the operation by converting 6607 // to int. 6608 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 6609 if (IsStrict) { 6610 auto [FExt, Chain] = 6611 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT); 6612 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt); 6613 } 6614 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 6615 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 6616 } 6617 6618 // Narrowing conversions 6619 if (SrcEltSize > (2 * EltSize)) { 6620 if (IsInt2FP) { 6621 // One narrowing int_to_fp, then an fp_round. 6622 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 6623 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 6624 if (IsStrict) { 6625 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, 6626 DAG.getVTList(InterimFVT, MVT::Other), 6627 Op.getOperand(0), Src); 6628 SDValue Chain = Int2FP.getValue(1); 6629 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first; 6630 } 6631 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 6632 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 6633 } 6634 // FP2Int 6635 // One narrowing fp_to_int, then truncate the integer. If the float isn't 6636 // representable by the integer, the result is poison. 6637 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 6638 VT.getVectorElementCount()); 6639 if (IsStrict) { 6640 SDValue FP2Int = 6641 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other), 6642 Op.getOperand(0), Src); 6643 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 6644 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL); 6645 } 6646 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 6647 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 6648 } 6649 6650 // Scalable vectors can exit here. Patterns will handle equally-sized 6651 // conversions halving/doubling ones. 6652 if (!VT.isFixedLengthVector()) 6653 return Op; 6654 6655 // For fixed-length vectors we lower to a custom "VL" node. 6656 unsigned RVVOpc = 0; 6657 switch (Op.getOpcode()) { 6658 default: 6659 llvm_unreachable("Impossible opcode"); 6660 case ISD::FP_TO_SINT: 6661 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL; 6662 break; 6663 case ISD::FP_TO_UINT: 6664 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL; 6665 break; 6666 case ISD::SINT_TO_FP: 6667 RVVOpc = RISCVISD::SINT_TO_FP_VL; 6668 break; 6669 case ISD::UINT_TO_FP: 6670 RVVOpc = RISCVISD::UINT_TO_FP_VL; 6671 break; 6672 case ISD::STRICT_FP_TO_SINT: 6673 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL; 6674 break; 6675 case ISD::STRICT_FP_TO_UINT: 6676 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL; 6677 break; 6678 case ISD::STRICT_SINT_TO_FP: 6679 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL; 6680 break; 6681 case ISD::STRICT_UINT_TO_FP: 6682 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL; 6683 break; 6684 } 6685 6686 MVT ContainerVT = getContainerForFixedLengthVector(VT); 6687 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 6688 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() && 6689 "Expected same element count"); 6690 6691 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 6692 6693 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 6694 if (IsStrict) { 6695 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), 6696 Op.getOperand(0), Src, Mask, VL); 6697 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget); 6698 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL); 6699 } 6700 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); 6701 return convertFromScalableVector(VT, Src, DAG, Subtarget); 6702 } 6703 case ISD::FP_TO_SINT_SAT: 6704 case ISD::FP_TO_UINT_SAT: 6705 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget); 6706 case ISD::FP_TO_BF16: { 6707 // Custom lower to ensure the libcall return is passed in an FPR on hard 6708 // float ABIs. 6709 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization"); 6710 SDLoc DL(Op); 6711 MakeLibCallOptions CallOptions; 6712 RTLIB::Libcall LC = 6713 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16); 6714 SDValue Res = 6715 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; 6716 if (Subtarget.is64Bit() && !RV64LegalI32) 6717 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); 6718 return DAG.getBitcast(MVT::i32, Res); 6719 } 6720 case ISD::BF16_TO_FP: { 6721 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization"); 6722 MVT VT = Op.getSimpleValueType(); 6723 SDLoc DL(Op); 6724 Op = DAG.getNode( 6725 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0), 6726 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL)); 6727 SDValue Res = Subtarget.is64Bit() 6728 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op) 6729 : DAG.getBitcast(MVT::f32, Op); 6730 // fp_extend if the target VT is bigger than f32. 6731 if (VT != MVT::f32) 6732 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res); 6733 return Res; 6734 } 6735 case ISD::FP_TO_FP16: { 6736 // Custom lower to ensure the libcall return is passed in an FPR on hard 6737 // float ABIs. 6738 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation"); 6739 SDLoc DL(Op); 6740 MakeLibCallOptions CallOptions; 6741 RTLIB::Libcall LC = 6742 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16); 6743 SDValue Res = 6744 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; 6745 if (Subtarget.is64Bit() && !RV64LegalI32) 6746 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); 6747 return DAG.getBitcast(MVT::i32, Res); 6748 } 6749 case ISD::FP16_TO_FP: { 6750 // Custom lower to ensure the libcall argument is passed in an FPR on hard 6751 // float ABIs. 6752 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation"); 6753 SDLoc DL(Op); 6754 MakeLibCallOptions CallOptions; 6755 SDValue Arg = Subtarget.is64Bit() 6756 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, 6757 Op.getOperand(0)) 6758 : DAG.getBitcast(MVT::f32, Op.getOperand(0)); 6759 SDValue Res = 6760 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL) 6761 .first; 6762 return Res; 6763 } 6764 case ISD::FTRUNC: 6765 case ISD::FCEIL: 6766 case ISD::FFLOOR: 6767 case ISD::FNEARBYINT: 6768 case ISD::FRINT: 6769 case ISD::FROUND: 6770 case ISD::FROUNDEVEN: 6771 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 6772 case ISD::LRINT: 6773 case ISD::LLRINT: 6774 return lowerVectorXRINT(Op, DAG, Subtarget); 6775 case ISD::VECREDUCE_ADD: 6776 case ISD::VECREDUCE_UMAX: 6777 case ISD::VECREDUCE_SMAX: 6778 case ISD::VECREDUCE_UMIN: 6779 case ISD::VECREDUCE_SMIN: 6780 return lowerVECREDUCE(Op, DAG); 6781 case ISD::VECREDUCE_AND: 6782 case ISD::VECREDUCE_OR: 6783 case ISD::VECREDUCE_XOR: 6784 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 6785 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false); 6786 return lowerVECREDUCE(Op, DAG); 6787 case ISD::VECREDUCE_FADD: 6788 case ISD::VECREDUCE_SEQ_FADD: 6789 case ISD::VECREDUCE_FMIN: 6790 case ISD::VECREDUCE_FMAX: 6791 case ISD::VECREDUCE_FMAXIMUM: 6792 case ISD::VECREDUCE_FMINIMUM: 6793 return lowerFPVECREDUCE(Op, DAG); 6794 case ISD::VP_REDUCE_ADD: 6795 case ISD::VP_REDUCE_UMAX: 6796 case ISD::VP_REDUCE_SMAX: 6797 case ISD::VP_REDUCE_UMIN: 6798 case ISD::VP_REDUCE_SMIN: 6799 case ISD::VP_REDUCE_FADD: 6800 case ISD::VP_REDUCE_SEQ_FADD: 6801 case ISD::VP_REDUCE_FMIN: 6802 case ISD::VP_REDUCE_FMAX: 6803 case ISD::VP_REDUCE_FMINIMUM: 6804 case ISD::VP_REDUCE_FMAXIMUM: 6805 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 && 6806 (Subtarget.hasVInstructionsF16Minimal() && 6807 !Subtarget.hasVInstructionsF16())) 6808 return SplitVectorReductionOp(Op, DAG); 6809 return lowerVPREDUCE(Op, DAG); 6810 case ISD::VP_REDUCE_AND: 6811 case ISD::VP_REDUCE_OR: 6812 case ISD::VP_REDUCE_XOR: 6813 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1) 6814 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true); 6815 return lowerVPREDUCE(Op, DAG); 6816 case ISD::VP_CTTZ_ELTS: 6817 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: 6818 return lowerVPCttzElements(Op, DAG); 6819 case ISD::UNDEF: { 6820 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType()); 6821 return convertFromScalableVector(Op.getSimpleValueType(), 6822 DAG.getUNDEF(ContainerVT), DAG, Subtarget); 6823 } 6824 case ISD::INSERT_SUBVECTOR: 6825 return lowerINSERT_SUBVECTOR(Op, DAG); 6826 case ISD::EXTRACT_SUBVECTOR: 6827 return lowerEXTRACT_SUBVECTOR(Op, DAG); 6828 case ISD::VECTOR_DEINTERLEAVE: 6829 return lowerVECTOR_DEINTERLEAVE(Op, DAG); 6830 case ISD::VECTOR_INTERLEAVE: 6831 return lowerVECTOR_INTERLEAVE(Op, DAG); 6832 case ISD::STEP_VECTOR: 6833 return lowerSTEP_VECTOR(Op, DAG); 6834 case ISD::VECTOR_REVERSE: 6835 return lowerVECTOR_REVERSE(Op, DAG); 6836 case ISD::VECTOR_SPLICE: 6837 return lowerVECTOR_SPLICE(Op, DAG); 6838 case ISD::BUILD_VECTOR: 6839 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 6840 case ISD::SPLAT_VECTOR: 6841 if ((Op.getValueType().getScalarType() == MVT::f16 && 6842 (Subtarget.hasVInstructionsF16Minimal() && 6843 Subtarget.hasStdExtZfhminOrZhinxmin() && 6844 !Subtarget.hasVInstructionsF16())) || 6845 (Op.getValueType().getScalarType() == MVT::bf16 && 6846 (Subtarget.hasVInstructionsBF16() && Subtarget.hasStdExtZfbfmin()))) { 6847 if (Op.getValueType() == MVT::nxv32f16 || 6848 Op.getValueType() == MVT::nxv32bf16) 6849 return SplitVectorOp(Op, DAG); 6850 SDLoc DL(Op); 6851 SDValue NewScalar = 6852 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); 6853 SDValue NewSplat = DAG.getNode( 6854 ISD::SPLAT_VECTOR, DL, 6855 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()), 6856 NewScalar); 6857 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat, 6858 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6859 } 6860 if (Op.getValueType().getVectorElementType() == MVT::i1) 6861 return lowerVectorMaskSplat(Op, DAG); 6862 return SDValue(); 6863 case ISD::VECTOR_SHUFFLE: 6864 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 6865 case ISD::CONCAT_VECTORS: { 6866 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 6867 // better than going through the stack, as the default expansion does. 6868 SDLoc DL(Op); 6869 MVT VT = Op.getSimpleValueType(); 6870 MVT ContainerVT = VT; 6871 if (VT.isFixedLengthVector()) 6872 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget); 6873 6874 // Recursively split concat_vectors with more than 2 operands: 6875 // 6876 // concat_vector op1, op2, op3, op4 6877 // -> 6878 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4) 6879 // 6880 // This reduces the length of the chain of vslideups and allows us to 6881 // perform the vslideups at a smaller LMUL, limited to MF2. 6882 if (Op.getNumOperands() > 2 && 6883 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) { 6884 MVT HalfVT = VT.getHalfNumVectorElementsVT(); 6885 assert(isPowerOf2_32(Op.getNumOperands())); 6886 size_t HalfNumOps = Op.getNumOperands() / 2; 6887 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT, 6888 Op->ops().take_front(HalfNumOps)); 6889 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT, 6890 Op->ops().drop_front(HalfNumOps)); 6891 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 6892 } 6893 6894 unsigned NumOpElts = 6895 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); 6896 SDValue Vec = DAG.getUNDEF(VT); 6897 for (const auto &OpIdx : enumerate(Op->ops())) { 6898 SDValue SubVec = OpIdx.value(); 6899 // Don't insert undef subvectors. 6900 if (SubVec.isUndef()) 6901 continue; 6902 Vec = 6903 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec, 6904 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL)); 6905 } 6906 return Vec; 6907 } 6908 case ISD::LOAD: 6909 if (auto V = expandUnalignedRVVLoad(Op, DAG)) 6910 return V; 6911 if (Op.getValueType().isFixedLengthVector()) 6912 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 6913 return Op; 6914 case ISD::STORE: 6915 if (auto V = expandUnalignedRVVStore(Op, DAG)) 6916 return V; 6917 if (Op.getOperand(1).getValueType().isFixedLengthVector()) 6918 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 6919 return Op; 6920 case ISD::MLOAD: 6921 case ISD::VP_LOAD: 6922 return lowerMaskedLoad(Op, DAG); 6923 case ISD::MSTORE: 6924 case ISD::VP_STORE: 6925 return lowerMaskedStore(Op, DAG); 6926 case ISD::SELECT_CC: { 6927 // This occurs because we custom legalize SETGT and SETUGT for setcc. That 6928 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand 6929 // into separate SETCC+SELECT just like LegalizeDAG. 6930 SDValue Tmp1 = Op.getOperand(0); 6931 SDValue Tmp2 = Op.getOperand(1); 6932 SDValue True = Op.getOperand(2); 6933 SDValue False = Op.getOperand(3); 6934 EVT VT = Op.getValueType(); 6935 SDValue CC = Op.getOperand(4); 6936 EVT CmpVT = Tmp1.getValueType(); 6937 EVT CCVT = 6938 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); 6939 SDLoc DL(Op); 6940 SDValue Cond = 6941 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags()); 6942 return DAG.getSelect(DL, VT, Cond, True, False); 6943 } 6944 case ISD::SETCC: { 6945 MVT OpVT = Op.getOperand(0).getSimpleValueType(); 6946 if (OpVT.isScalarInteger()) { 6947 MVT VT = Op.getSimpleValueType(); 6948 SDValue LHS = Op.getOperand(0); 6949 SDValue RHS = Op.getOperand(1); 6950 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 6951 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) && 6952 "Unexpected CondCode"); 6953 6954 SDLoc DL(Op); 6955 6956 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can 6957 // convert this to the equivalent of (set(u)ge X, C+1) by using 6958 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant 6959 // in a register. 6960 if (isa<ConstantSDNode>(RHS)) { 6961 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue(); 6962 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) { 6963 // If this is an unsigned compare and the constant is -1, incrementing 6964 // the constant would change behavior. The result should be false. 6965 if (CCVal == ISD::SETUGT && Imm == -1) 6966 return DAG.getConstant(0, DL, VT); 6967 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT. 6968 CCVal = ISD::getSetCCSwappedOperands(CCVal); 6969 SDValue SetCC = DAG.getSetCC( 6970 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal); 6971 return DAG.getLogicalNOT(DL, SetCC, VT); 6972 } 6973 } 6974 6975 // Not a constant we could handle, swap the operands and condition code to 6976 // SETLT/SETULT. 6977 CCVal = ISD::getSetCCSwappedOperands(CCVal); 6978 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal); 6979 } 6980 6981 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && 6982 (Subtarget.hasVInstructionsF16Minimal() && 6983 !Subtarget.hasVInstructionsF16())) 6984 return SplitVectorOp(Op, DAG); 6985 6986 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 6987 } 6988 case ISD::ADD: 6989 case ISD::SUB: 6990 case ISD::MUL: 6991 case ISD::MULHS: 6992 case ISD::MULHU: 6993 case ISD::AND: 6994 case ISD::OR: 6995 case ISD::XOR: 6996 case ISD::SDIV: 6997 case ISD::SREM: 6998 case ISD::UDIV: 6999 case ISD::UREM: 7000 case ISD::BSWAP: 7001 case ISD::CTPOP: 7002 return lowerToScalableOp(Op, DAG); 7003 case ISD::SHL: 7004 case ISD::SRA: 7005 case ISD::SRL: 7006 if (Op.getSimpleValueType().isFixedLengthVector()) 7007 return lowerToScalableOp(Op, DAG); 7008 // This can be called for an i32 shift amount that needs to be promoted. 7009 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && 7010 "Unexpected custom legalisation"); 7011 return SDValue(); 7012 case ISD::FADD: 7013 case ISD::FSUB: 7014 case ISD::FMUL: 7015 case ISD::FDIV: 7016 case ISD::FNEG: 7017 case ISD::FABS: 7018 case ISD::FSQRT: 7019 case ISD::FMA: 7020 case ISD::FMINNUM: 7021 case ISD::FMAXNUM: 7022 if (Op.getValueType() == MVT::nxv32f16 && 7023 (Subtarget.hasVInstructionsF16Minimal() && 7024 !Subtarget.hasVInstructionsF16())) 7025 return SplitVectorOp(Op, DAG); 7026 [[fallthrough]]; 7027 case ISD::AVGFLOORS: 7028 case ISD::AVGFLOORU: 7029 case ISD::AVGCEILS: 7030 case ISD::AVGCEILU: 7031 case ISD::SMIN: 7032 case ISD::SMAX: 7033 case ISD::UMIN: 7034 case ISD::UMAX: 7035 return lowerToScalableOp(Op, DAG); 7036 case ISD::UADDSAT: 7037 case ISD::USUBSAT: 7038 if (!Op.getValueType().isVector()) 7039 return lowerUADDSAT_USUBSAT(Op, DAG); 7040 return lowerToScalableOp(Op, DAG); 7041 case ISD::SADDSAT: 7042 case ISD::SSUBSAT: 7043 if (!Op.getValueType().isVector()) 7044 return lowerSADDSAT_SSUBSAT(Op, DAG); 7045 return lowerToScalableOp(Op, DAG); 7046 case ISD::ABDS: 7047 case ISD::ABDU: { 7048 SDLoc dl(Op); 7049 EVT VT = Op->getValueType(0); 7050 SDValue LHS = DAG.getFreeze(Op->getOperand(0)); 7051 SDValue RHS = DAG.getFreeze(Op->getOperand(1)); 7052 bool IsSigned = Op->getOpcode() == ISD::ABDS; 7053 7054 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs)) 7055 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs)) 7056 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX; 7057 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN; 7058 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS); 7059 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS); 7060 return DAG.getNode(ISD::SUB, dl, VT, Max, Min); 7061 } 7062 case ISD::ABS: 7063 case ISD::VP_ABS: 7064 return lowerABS(Op, DAG); 7065 case ISD::CTLZ: 7066 case ISD::CTLZ_ZERO_UNDEF: 7067 case ISD::CTTZ: 7068 case ISD::CTTZ_ZERO_UNDEF: 7069 if (Subtarget.hasStdExtZvbb()) 7070 return lowerToScalableOp(Op, DAG); 7071 assert(Op.getOpcode() != ISD::CTTZ); 7072 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 7073 case ISD::VSELECT: 7074 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 7075 case ISD::FCOPYSIGN: 7076 if (Op.getValueType() == MVT::nxv32f16 && 7077 (Subtarget.hasVInstructionsF16Minimal() && 7078 !Subtarget.hasVInstructionsF16())) 7079 return SplitVectorOp(Op, DAG); 7080 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); 7081 case ISD::STRICT_FADD: 7082 case ISD::STRICT_FSUB: 7083 case ISD::STRICT_FMUL: 7084 case ISD::STRICT_FDIV: 7085 case ISD::STRICT_FSQRT: 7086 case ISD::STRICT_FMA: 7087 if (Op.getValueType() == MVT::nxv32f16 && 7088 (Subtarget.hasVInstructionsF16Minimal() && 7089 !Subtarget.hasVInstructionsF16())) 7090 return SplitStrictFPVectorOp(Op, DAG); 7091 return lowerToScalableOp(Op, DAG); 7092 case ISD::STRICT_FSETCC: 7093 case ISD::STRICT_FSETCCS: 7094 return lowerVectorStrictFSetcc(Op, DAG); 7095 case ISD::STRICT_FCEIL: 7096 case ISD::STRICT_FRINT: 7097 case ISD::STRICT_FFLOOR: 7098 case ISD::STRICT_FTRUNC: 7099 case ISD::STRICT_FNEARBYINT: 7100 case ISD::STRICT_FROUND: 7101 case ISD::STRICT_FROUNDEVEN: 7102 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 7103 case ISD::MGATHER: 7104 case ISD::VP_GATHER: 7105 return lowerMaskedGather(Op, DAG); 7106 case ISD::MSCATTER: 7107 case ISD::VP_SCATTER: 7108 return lowerMaskedScatter(Op, DAG); 7109 case ISD::GET_ROUNDING: 7110 return lowerGET_ROUNDING(Op, DAG); 7111 case ISD::SET_ROUNDING: 7112 return lowerSET_ROUNDING(Op, DAG); 7113 case ISD::EH_DWARF_CFA: 7114 return lowerEH_DWARF_CFA(Op, DAG); 7115 case ISD::VP_SELECT: 7116 case ISD::VP_MERGE: 7117 case ISD::VP_ADD: 7118 case ISD::VP_SUB: 7119 case ISD::VP_MUL: 7120 case ISD::VP_SDIV: 7121 case ISD::VP_UDIV: 7122 case ISD::VP_SREM: 7123 case ISD::VP_UREM: 7124 case ISD::VP_UADDSAT: 7125 case ISD::VP_USUBSAT: 7126 case ISD::VP_SADDSAT: 7127 case ISD::VP_SSUBSAT: 7128 case ISD::VP_LRINT: 7129 case ISD::VP_LLRINT: 7130 return lowerVPOp(Op, DAG); 7131 case ISD::VP_AND: 7132 case ISD::VP_OR: 7133 case ISD::VP_XOR: 7134 return lowerLogicVPOp(Op, DAG); 7135 case ISD::VP_FADD: 7136 case ISD::VP_FSUB: 7137 case ISD::VP_FMUL: 7138 case ISD::VP_FDIV: 7139 case ISD::VP_FNEG: 7140 case ISD::VP_FABS: 7141 case ISD::VP_SQRT: 7142 case ISD::VP_FMA: 7143 case ISD::VP_FMINNUM: 7144 case ISD::VP_FMAXNUM: 7145 case ISD::VP_FCOPYSIGN: 7146 if (Op.getValueType() == MVT::nxv32f16 && 7147 (Subtarget.hasVInstructionsF16Minimal() && 7148 !Subtarget.hasVInstructionsF16())) 7149 return SplitVPOp(Op, DAG); 7150 [[fallthrough]]; 7151 case ISD::VP_SRA: 7152 case ISD::VP_SRL: 7153 case ISD::VP_SHL: 7154 return lowerVPOp(Op, DAG); 7155 case ISD::VP_IS_FPCLASS: 7156 return LowerIS_FPCLASS(Op, DAG); 7157 case ISD::VP_SIGN_EXTEND: 7158 case ISD::VP_ZERO_EXTEND: 7159 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) 7160 return lowerVPExtMaskOp(Op, DAG); 7161 return lowerVPOp(Op, DAG); 7162 case ISD::VP_TRUNCATE: 7163 return lowerVectorTruncLike(Op, DAG); 7164 case ISD::VP_FP_EXTEND: 7165 case ISD::VP_FP_ROUND: 7166 return lowerVectorFPExtendOrRoundLike(Op, DAG); 7167 case ISD::VP_SINT_TO_FP: 7168 case ISD::VP_UINT_TO_FP: 7169 if (Op.getValueType().isVector() && 7170 Op.getValueType().getScalarType() == MVT::f16 && 7171 (Subtarget.hasVInstructionsF16Minimal() && 7172 !Subtarget.hasVInstructionsF16())) { 7173 if (Op.getValueType() == MVT::nxv32f16) 7174 return SplitVPOp(Op, DAG); 7175 // int -> f32 7176 SDLoc DL(Op); 7177 MVT NVT = 7178 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()); 7179 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops()); 7180 // f32 -> f16 7181 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC, 7182 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 7183 } 7184 [[fallthrough]]; 7185 case ISD::VP_FP_TO_SINT: 7186 case ISD::VP_FP_TO_UINT: 7187 if (SDValue Op1 = Op.getOperand(0); 7188 Op1.getValueType().isVector() && 7189 Op1.getValueType().getScalarType() == MVT::f16 && 7190 (Subtarget.hasVInstructionsF16Minimal() && 7191 !Subtarget.hasVInstructionsF16())) { 7192 if (Op1.getValueType() == MVT::nxv32f16) 7193 return SplitVPOp(Op, DAG); 7194 // f16 -> f32 7195 SDLoc DL(Op); 7196 MVT NVT = MVT::getVectorVT(MVT::f32, 7197 Op1.getValueType().getVectorElementCount()); 7198 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1); 7199 // f32 -> int 7200 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 7201 {WidenVec, Op.getOperand(1), Op.getOperand(2)}); 7202 } 7203 return lowerVPFPIntConvOp(Op, DAG); 7204 case ISD::VP_SETCC: 7205 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && 7206 (Subtarget.hasVInstructionsF16Minimal() && 7207 !Subtarget.hasVInstructionsF16())) 7208 return SplitVPOp(Op, DAG); 7209 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) 7210 return lowerVPSetCCMaskOp(Op, DAG); 7211 [[fallthrough]]; 7212 case ISD::VP_SMIN: 7213 case ISD::VP_SMAX: 7214 case ISD::VP_UMIN: 7215 case ISD::VP_UMAX: 7216 case ISD::VP_BITREVERSE: 7217 case ISD::VP_BSWAP: 7218 return lowerVPOp(Op, DAG); 7219 case ISD::VP_CTLZ: 7220 case ISD::VP_CTLZ_ZERO_UNDEF: 7221 if (Subtarget.hasStdExtZvbb()) 7222 return lowerVPOp(Op, DAG); 7223 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 7224 case ISD::VP_CTTZ: 7225 case ISD::VP_CTTZ_ZERO_UNDEF: 7226 if (Subtarget.hasStdExtZvbb()) 7227 return lowerVPOp(Op, DAG); 7228 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 7229 case ISD::VP_CTPOP: 7230 return lowerVPOp(Op, DAG); 7231 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: 7232 return lowerVPStridedLoad(Op, DAG); 7233 case ISD::EXPERIMENTAL_VP_STRIDED_STORE: 7234 return lowerVPStridedStore(Op, DAG); 7235 case ISD::VP_FCEIL: 7236 case ISD::VP_FFLOOR: 7237 case ISD::VP_FRINT: 7238 case ISD::VP_FNEARBYINT: 7239 case ISD::VP_FROUND: 7240 case ISD::VP_FROUNDEVEN: 7241 case ISD::VP_FROUNDTOZERO: 7242 if (Op.getValueType() == MVT::nxv32f16 && 7243 (Subtarget.hasVInstructionsF16Minimal() && 7244 !Subtarget.hasVInstructionsF16())) 7245 return SplitVPOp(Op, DAG); 7246 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 7247 case ISD::VP_FMAXIMUM: 7248 case ISD::VP_FMINIMUM: 7249 if (Op.getValueType() == MVT::nxv32f16 && 7250 (Subtarget.hasVInstructionsF16Minimal() && 7251 !Subtarget.hasVInstructionsF16())) 7252 return SplitVPOp(Op, DAG); 7253 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); 7254 case ISD::EXPERIMENTAL_VP_SPLICE: 7255 return lowerVPSpliceExperimental(Op, DAG); 7256 case ISD::EXPERIMENTAL_VP_REVERSE: 7257 return lowerVPReverseExperimental(Op, DAG); 7258 case ISD::EXPERIMENTAL_VP_SPLAT: 7259 return lowerVPSplatExperimental(Op, DAG); 7260 case ISD::CLEAR_CACHE: { 7261 assert(getTargetMachine().getTargetTriple().isOSLinux() && 7262 "llvm.clear_cache only needs custom lower on Linux targets"); 7263 SDLoc DL(Op); 7264 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 7265 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1), 7266 Op.getOperand(2), Flags, DL); 7267 } 7268 } 7269 } 7270 7271 SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain, 7272 SDValue Start, SDValue End, 7273 SDValue Flags, SDLoc DL) const { 7274 MakeLibCallOptions CallOptions; 7275 std::pair<SDValue, SDValue> CallResult = 7276 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid, 7277 {Start, End, Flags}, CallOptions, DL, InChain); 7278 7279 // This function returns void so only the out chain matters. 7280 return CallResult.second; 7281 } 7282 7283 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, 7284 SelectionDAG &DAG, unsigned Flags) { 7285 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 7286 } 7287 7288 static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty, 7289 SelectionDAG &DAG, unsigned Flags) { 7290 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 7291 Flags); 7292 } 7293 7294 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, 7295 SelectionDAG &DAG, unsigned Flags) { 7296 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 7297 N->getOffset(), Flags); 7298 } 7299 7300 static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty, 7301 SelectionDAG &DAG, unsigned Flags) { 7302 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 7303 } 7304 7305 template <class NodeTy> 7306 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 7307 bool IsLocal, bool IsExternWeak) const { 7308 SDLoc DL(N); 7309 EVT Ty = getPointerTy(DAG.getDataLayout()); 7310 7311 // When HWASAN is used and tagging of global variables is enabled 7312 // they should be accessed via the GOT, since the tagged address of a global 7313 // is incompatible with existing code models. This also applies to non-pic 7314 // mode. 7315 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) { 7316 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 7317 if (IsLocal && !Subtarget.allowTaggedGlobals()) 7318 // Use PC-relative addressing to access the symbol. This generates the 7319 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 7320 // %pcrel_lo(auipc)). 7321 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); 7322 7323 // Use PC-relative addressing to access the GOT for this symbol, then load 7324 // the address from the GOT. This generates the pattern (PseudoLGA sym), 7325 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 7326 SDValue Load = 7327 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); 7328 MachineFunction &MF = DAG.getMachineFunction(); 7329 MachineMemOperand *MemOp = MF.getMachineMemOperand( 7330 MachinePointerInfo::getGOT(MF), 7331 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 7332 MachineMemOperand::MOInvariant, 7333 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 7334 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); 7335 return Load; 7336 } 7337 7338 switch (getTargetMachine().getCodeModel()) { 7339 default: 7340 report_fatal_error("Unsupported code model for lowering"); 7341 case CodeModel::Small: { 7342 // Generate a sequence for accessing addresses within the first 2 GiB of 7343 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 7344 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 7345 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 7346 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); 7347 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo); 7348 } 7349 case CodeModel::Medium: { 7350 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 7351 if (IsExternWeak) { 7352 // An extern weak symbol may be undefined, i.e. have value 0, which may 7353 // not be within 2GiB of PC, so use GOT-indirect addressing to access the 7354 // symbol. This generates the pattern (PseudoLGA sym), which expands to 7355 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 7356 SDValue Load = 7357 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); 7358 MachineFunction &MF = DAG.getMachineFunction(); 7359 MachineMemOperand *MemOp = MF.getMachineMemOperand( 7360 MachinePointerInfo::getGOT(MF), 7361 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 7362 MachineMemOperand::MOInvariant, 7363 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 7364 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); 7365 return Load; 7366 } 7367 7368 // Generate a sequence for accessing addresses within any 2GiB range within 7369 // the address space. This generates the pattern (PseudoLLA sym), which 7370 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 7371 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); 7372 } 7373 } 7374 } 7375 7376 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 7377 SelectionDAG &DAG) const { 7378 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 7379 assert(N->getOffset() == 0 && "unexpected offset in global node"); 7380 const GlobalValue *GV = N->getGlobal(); 7381 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage()); 7382 } 7383 7384 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 7385 SelectionDAG &DAG) const { 7386 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 7387 7388 return getAddr(N, DAG); 7389 } 7390 7391 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 7392 SelectionDAG &DAG) const { 7393 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 7394 7395 return getAddr(N, DAG); 7396 } 7397 7398 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 7399 SelectionDAG &DAG) const { 7400 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 7401 7402 return getAddr(N, DAG); 7403 } 7404 7405 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 7406 SelectionDAG &DAG, 7407 bool UseGOT) const { 7408 SDLoc DL(N); 7409 EVT Ty = getPointerTy(DAG.getDataLayout()); 7410 const GlobalValue *GV = N->getGlobal(); 7411 MVT XLenVT = Subtarget.getXLenVT(); 7412 7413 if (UseGOT) { 7414 // Use PC-relative addressing to access the GOT for this TLS symbol, then 7415 // load the address from the GOT and add the thread pointer. This generates 7416 // the pattern (PseudoLA_TLS_IE sym), which expands to 7417 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 7418 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 7419 SDValue Load = 7420 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 7421 MachineFunction &MF = DAG.getMachineFunction(); 7422 MachineMemOperand *MemOp = MF.getMachineMemOperand( 7423 MachinePointerInfo::getGOT(MF), 7424 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 7425 MachineMemOperand::MOInvariant, 7426 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 7427 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); 7428 7429 // Add the thread pointer. 7430 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 7431 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 7432 } 7433 7434 // Generate a sequence for accessing the address relative to the thread 7435 // pointer, with the appropriate adjustment for the thread pointer offset. 7436 // This generates the pattern 7437 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 7438 SDValue AddrHi = 7439 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 7440 SDValue AddrAdd = 7441 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 7442 SDValue AddrLo = 7443 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 7444 7445 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); 7446 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 7447 SDValue MNAdd = 7448 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd); 7449 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo); 7450 } 7451 7452 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 7453 SelectionDAG &DAG) const { 7454 SDLoc DL(N); 7455 EVT Ty = getPointerTy(DAG.getDataLayout()); 7456 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 7457 const GlobalValue *GV = N->getGlobal(); 7458 7459 // Use a PC-relative addressing mode to access the global dynamic GOT address. 7460 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 7461 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 7462 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 7463 SDValue Load = 7464 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 7465 7466 // Prepare argument list to generate call. 7467 ArgListTy Args; 7468 ArgListEntry Entry; 7469 Entry.Node = Load; 7470 Entry.Ty = CallTy; 7471 Args.push_back(Entry); 7472 7473 // Setup call to __tls_get_addr. 7474 TargetLowering::CallLoweringInfo CLI(DAG); 7475 CLI.setDebugLoc(DL) 7476 .setChain(DAG.getEntryNode()) 7477 .setLibCallee(CallingConv::C, CallTy, 7478 DAG.getExternalSymbol("__tls_get_addr", Ty), 7479 std::move(Args)); 7480 7481 return LowerCallTo(CLI).first; 7482 } 7483 7484 SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N, 7485 SelectionDAG &DAG) const { 7486 SDLoc DL(N); 7487 EVT Ty = getPointerTy(DAG.getDataLayout()); 7488 const GlobalValue *GV = N->getGlobal(); 7489 7490 // Use a PC-relative addressing mode to access the global dynamic GOT address. 7491 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to 7492 // 7493 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol) 7494 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label) 7495 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label) 7496 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label) 7497 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 7498 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0); 7499 } 7500 7501 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 7502 SelectionDAG &DAG) const { 7503 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 7504 assert(N->getOffset() == 0 && "unexpected offset in global node"); 7505 7506 if (DAG.getTarget().useEmulatedTLS()) 7507 return LowerToTLSEmulatedModel(N, DAG); 7508 7509 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 7510 7511 if (DAG.getMachineFunction().getFunction().getCallingConv() == 7512 CallingConv::GHC) 7513 report_fatal_error("In GHC calling convention TLS is not supported"); 7514 7515 SDValue Addr; 7516 switch (Model) { 7517 case TLSModel::LocalExec: 7518 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 7519 break; 7520 case TLSModel::InitialExec: 7521 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 7522 break; 7523 case TLSModel::LocalDynamic: 7524 case TLSModel::GeneralDynamic: 7525 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG) 7526 : getDynamicTLSAddr(N, DAG); 7527 break; 7528 } 7529 7530 return Addr; 7531 } 7532 7533 // Return true if Val is equal to (setcc LHS, RHS, CC). 7534 // Return false if Val is the inverse of (setcc LHS, RHS, CC). 7535 // Otherwise, return std::nullopt. 7536 static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS, 7537 ISD::CondCode CC, SDValue Val) { 7538 assert(Val->getOpcode() == ISD::SETCC); 7539 SDValue LHS2 = Val.getOperand(0); 7540 SDValue RHS2 = Val.getOperand(1); 7541 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get(); 7542 7543 if (LHS == LHS2 && RHS == RHS2) { 7544 if (CC == CC2) 7545 return true; 7546 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType())) 7547 return false; 7548 } else if (LHS == RHS2 && RHS == LHS2) { 7549 CC2 = ISD::getSetCCSwappedOperands(CC2); 7550 if (CC == CC2) 7551 return true; 7552 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType())) 7553 return false; 7554 } 7555 7556 return std::nullopt; 7557 } 7558 7559 static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, 7560 const RISCVSubtarget &Subtarget) { 7561 SDValue CondV = N->getOperand(0); 7562 SDValue TrueV = N->getOperand(1); 7563 SDValue FalseV = N->getOperand(2); 7564 MVT VT = N->getSimpleValueType(0); 7565 SDLoc DL(N); 7566 7567 if (!Subtarget.hasConditionalMoveFusion()) { 7568 // (select c, -1, y) -> -c | y 7569 if (isAllOnesConstant(TrueV)) { 7570 SDValue Neg = DAG.getNegative(CondV, DL, VT); 7571 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV)); 7572 } 7573 // (select c, y, -1) -> (c-1) | y 7574 if (isAllOnesConstant(FalseV)) { 7575 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, 7576 DAG.getAllOnesConstant(DL, VT)); 7577 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV)); 7578 } 7579 7580 // (select c, 0, y) -> (c-1) & y 7581 if (isNullConstant(TrueV)) { 7582 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, 7583 DAG.getAllOnesConstant(DL, VT)); 7584 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV)); 7585 } 7586 // (select c, y, 0) -> -c & y 7587 if (isNullConstant(FalseV)) { 7588 SDValue Neg = DAG.getNegative(CondV, DL, VT); 7589 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV)); 7590 } 7591 } 7592 7593 // select c, ~x, x --> xor -c, x 7594 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) { 7595 const APInt &TrueVal = TrueV->getAsAPIntVal(); 7596 const APInt &FalseVal = FalseV->getAsAPIntVal(); 7597 if (~TrueVal == FalseVal) { 7598 SDValue Neg = DAG.getNegative(CondV, DL, VT); 7599 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV); 7600 } 7601 } 7602 7603 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops 7604 // when both truev and falsev are also setcc. 7605 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC && 7606 FalseV.getOpcode() == ISD::SETCC) { 7607 SDValue LHS = CondV.getOperand(0); 7608 SDValue RHS = CondV.getOperand(1); 7609 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 7610 7611 // (select x, x, y) -> x | y 7612 // (select !x, x, y) -> x & y 7613 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) { 7614 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV, 7615 DAG.getFreeze(FalseV)); 7616 } 7617 // (select x, y, x) -> x & y 7618 // (select !x, y, x) -> x | y 7619 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) { 7620 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, 7621 DAG.getFreeze(TrueV), FalseV); 7622 } 7623 } 7624 7625 return SDValue(); 7626 } 7627 7628 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants 7629 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable. 7630 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up 7631 // being `0` or `-1`. In such cases we can replace `select` with `and`. 7632 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize 7633 // than `c0`? 7634 static SDValue 7635 foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, 7636 const RISCVSubtarget &Subtarget) { 7637 if (Subtarget.hasShortForwardBranchOpt()) 7638 return SDValue(); 7639 7640 unsigned SelOpNo = 0; 7641 SDValue Sel = BO->getOperand(0); 7642 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { 7643 SelOpNo = 1; 7644 Sel = BO->getOperand(1); 7645 } 7646 7647 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) 7648 return SDValue(); 7649 7650 unsigned ConstSelOpNo = 1; 7651 unsigned OtherSelOpNo = 2; 7652 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) { 7653 ConstSelOpNo = 2; 7654 OtherSelOpNo = 1; 7655 } 7656 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo); 7657 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp); 7658 if (!ConstSelOpNode || ConstSelOpNode->isOpaque()) 7659 return SDValue(); 7660 7661 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1); 7662 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp); 7663 if (!ConstBinOpNode || ConstBinOpNode->isOpaque()) 7664 return SDValue(); 7665 7666 SDLoc DL(Sel); 7667 EVT VT = BO->getValueType(0); 7668 7669 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp}; 7670 if (SelOpNo == 1) 7671 std::swap(NewConstOps[0], NewConstOps[1]); 7672 7673 SDValue NewConstOp = 7674 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps); 7675 if (!NewConstOp) 7676 return SDValue(); 7677 7678 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal(); 7679 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes()) 7680 return SDValue(); 7681 7682 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo); 7683 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp}; 7684 if (SelOpNo == 1) 7685 std::swap(NewNonConstOps[0], NewNonConstOps[1]); 7686 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps); 7687 7688 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp; 7689 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp; 7690 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF); 7691 } 7692 7693 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 7694 SDValue CondV = Op.getOperand(0); 7695 SDValue TrueV = Op.getOperand(1); 7696 SDValue FalseV = Op.getOperand(2); 7697 SDLoc DL(Op); 7698 MVT VT = Op.getSimpleValueType(); 7699 MVT XLenVT = Subtarget.getXLenVT(); 7700 7701 // Lower vector SELECTs to VSELECTs by splatting the condition. 7702 if (VT.isVector()) { 7703 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1); 7704 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV); 7705 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV); 7706 } 7707 7708 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ 7709 // nodes to implement the SELECT. Performing the lowering here allows for 7710 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless 7711 // sequence or RISCVISD::SELECT_CC node (branch-based select). 7712 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) && 7713 VT.isScalarInteger()) { 7714 // (select c, t, 0) -> (czero_eqz t, c) 7715 if (isNullConstant(FalseV)) 7716 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV); 7717 // (select c, 0, f) -> (czero_nez f, c) 7718 if (isNullConstant(TrueV)) 7719 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV); 7720 7721 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c)) 7722 if (TrueV.getOpcode() == ISD::AND && 7723 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) 7724 return DAG.getNode( 7725 ISD::OR, DL, VT, TrueV, 7726 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); 7727 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x)) 7728 if (FalseV.getOpcode() == ISD::AND && 7729 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) 7730 return DAG.getNode( 7731 ISD::OR, DL, VT, FalseV, 7732 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV)); 7733 7734 // Try some other optimizations before falling back to generic lowering. 7735 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) 7736 return V; 7737 7738 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1) 7739 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2) 7740 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) { 7741 const APInt &TrueVal = TrueV->getAsAPIntVal(); 7742 const APInt &FalseVal = FalseV->getAsAPIntVal(); 7743 const int TrueValCost = RISCVMatInt::getIntMatCost( 7744 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); 7745 const int FalseValCost = RISCVMatInt::getIntMatCost( 7746 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); 7747 bool IsCZERO_NEZ = TrueValCost <= FalseValCost; 7748 SDValue LHSVal = DAG.getConstant( 7749 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT); 7750 SDValue RHSVal = 7751 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT); 7752 SDValue CMOV = 7753 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ, 7754 DL, VT, LHSVal, CondV); 7755 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal); 7756 } 7757 7758 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c)) 7759 // Unless we have the short forward branch optimization. 7760 if (!Subtarget.hasConditionalMoveFusion()) 7761 return DAG.getNode( 7762 ISD::OR, DL, VT, 7763 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV), 7764 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); 7765 } 7766 7767 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) 7768 return V; 7769 7770 if (Op.hasOneUse()) { 7771 unsigned UseOpc = Op->use_begin()->getOpcode(); 7772 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) { 7773 SDNode *BinOp = *Op->use_begin(); 7774 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(), 7775 DAG, Subtarget)) { 7776 DAG.ReplaceAllUsesWith(BinOp, &NewSel); 7777 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable 7778 // may return a constant node and cause crash in lowerSELECT. 7779 if (NewSel.getOpcode() == ISD::SELECT) 7780 return lowerSELECT(NewSel, DAG); 7781 return NewSel; 7782 } 7783 } 7784 } 7785 7786 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc)) 7787 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1))) 7788 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV); 7789 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV); 7790 if (FPTV && FPFV) { 7791 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0)) 7792 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV); 7793 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) { 7794 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV, 7795 DAG.getConstant(1, DL, XLenVT)); 7796 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR); 7797 } 7798 } 7799 7800 // If the condition is not an integer SETCC which operates on XLenVT, we need 7801 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.: 7802 // (select condv, truev, falsev) 7803 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 7804 if (CondV.getOpcode() != ISD::SETCC || 7805 CondV.getOperand(0).getSimpleValueType() != XLenVT) { 7806 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 7807 SDValue SetNE = DAG.getCondCode(ISD::SETNE); 7808 7809 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 7810 7811 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops); 7812 } 7813 7814 // If the CondV is the output of a SETCC node which operates on XLenVT inputs, 7815 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take 7816 // advantage of the integer compare+branch instructions. i.e.: 7817 // (select (setcc lhs, rhs, cc), truev, falsev) 7818 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 7819 SDValue LHS = CondV.getOperand(0); 7820 SDValue RHS = CondV.getOperand(1); 7821 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 7822 7823 // Special case for a select of 2 constants that have a diffence of 1. 7824 // Normally this is done by DAGCombine, but if the select is introduced by 7825 // type legalization or op legalization, we miss it. Restricting to SETLT 7826 // case for now because that is what signed saturating add/sub need. 7827 // FIXME: We don't need the condition to be SETLT or even a SETCC, 7828 // but we would probably want to swap the true/false values if the condition 7829 // is SETGE/SETLE to avoid an XORI. 7830 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) && 7831 CCVal == ISD::SETLT) { 7832 const APInt &TrueVal = TrueV->getAsAPIntVal(); 7833 const APInt &FalseVal = FalseV->getAsAPIntVal(); 7834 if (TrueVal - 1 == FalseVal) 7835 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV); 7836 if (TrueVal + 1 == FalseVal) 7837 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV); 7838 } 7839 7840 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 7841 // 1 < x ? x : 1 -> 0 < x ? x : 1 7842 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) && 7843 RHS == TrueV && LHS == FalseV) { 7844 LHS = DAG.getConstant(0, DL, VT); 7845 // 0 <u x is the same as x != 0. 7846 if (CCVal == ISD::SETULT) { 7847 std::swap(LHS, RHS); 7848 CCVal = ISD::SETNE; 7849 } 7850 } 7851 7852 // x <s -1 ? x : -1 -> x <s 0 ? x : -1 7853 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV && 7854 RHS == FalseV) { 7855 RHS = DAG.getConstant(0, DL, VT); 7856 } 7857 7858 SDValue TargetCC = DAG.getCondCode(CCVal); 7859 7860 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) { 7861 // (select (setcc lhs, rhs, CC), constant, falsev) 7862 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant) 7863 std::swap(TrueV, FalseV); 7864 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType())); 7865 } 7866 7867 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 7868 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops); 7869 } 7870 7871 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 7872 SDValue CondV = Op.getOperand(1); 7873 SDLoc DL(Op); 7874 MVT XLenVT = Subtarget.getXLenVT(); 7875 7876 if (CondV.getOpcode() == ISD::SETCC && 7877 CondV.getOperand(0).getValueType() == XLenVT) { 7878 SDValue LHS = CondV.getOperand(0); 7879 SDValue RHS = CondV.getOperand(1); 7880 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 7881 7882 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 7883 7884 SDValue TargetCC = DAG.getCondCode(CCVal); 7885 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 7886 LHS, RHS, TargetCC, Op.getOperand(2)); 7887 } 7888 7889 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 7890 CondV, DAG.getConstant(0, DL, XLenVT), 7891 DAG.getCondCode(ISD::SETNE), Op.getOperand(2)); 7892 } 7893 7894 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 7895 MachineFunction &MF = DAG.getMachineFunction(); 7896 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 7897 7898 SDLoc DL(Op); 7899 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 7900 getPointerTy(MF.getDataLayout())); 7901 7902 // vastart just stores the address of the VarArgsFrameIndex slot into the 7903 // memory location argument. 7904 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 7905 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 7906 MachinePointerInfo(SV)); 7907 } 7908 7909 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 7910 SelectionDAG &DAG) const { 7911 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 7912 MachineFunction &MF = DAG.getMachineFunction(); 7913 MachineFrameInfo &MFI = MF.getFrameInfo(); 7914 MFI.setFrameAddressIsTaken(true); 7915 Register FrameReg = RI.getFrameRegister(MF); 7916 int XLenInBytes = Subtarget.getXLen() / 8; 7917 7918 EVT VT = Op.getValueType(); 7919 SDLoc DL(Op); 7920 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 7921 unsigned Depth = Op.getConstantOperandVal(0); 7922 while (Depth--) { 7923 int Offset = -(XLenInBytes * 2); 7924 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 7925 DAG.getIntPtrConstant(Offset, DL)); 7926 FrameAddr = 7927 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 7928 } 7929 return FrameAddr; 7930 } 7931 7932 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 7933 SelectionDAG &DAG) const { 7934 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 7935 MachineFunction &MF = DAG.getMachineFunction(); 7936 MachineFrameInfo &MFI = MF.getFrameInfo(); 7937 MFI.setReturnAddressIsTaken(true); 7938 MVT XLenVT = Subtarget.getXLenVT(); 7939 int XLenInBytes = Subtarget.getXLen() / 8; 7940 7941 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 7942 return SDValue(); 7943 7944 EVT VT = Op.getValueType(); 7945 SDLoc DL(Op); 7946 unsigned Depth = Op.getConstantOperandVal(0); 7947 if (Depth) { 7948 int Off = -XLenInBytes; 7949 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 7950 SDValue Offset = DAG.getConstant(Off, DL, VT); 7951 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 7952 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 7953 MachinePointerInfo()); 7954 } 7955 7956 // Return the value of the return address register, marking it an implicit 7957 // live-in. 7958 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 7959 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 7960 } 7961 7962 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 7963 SelectionDAG &DAG) const { 7964 SDLoc DL(Op); 7965 SDValue Lo = Op.getOperand(0); 7966 SDValue Hi = Op.getOperand(1); 7967 SDValue Shamt = Op.getOperand(2); 7968 EVT VT = Lo.getValueType(); 7969 7970 // if Shamt-XLEN < 0: // Shamt < XLEN 7971 // Lo = Lo << Shamt 7972 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 7973 // else: 7974 // Lo = 0 7975 // Hi = Lo << (Shamt-XLEN) 7976 7977 SDValue Zero = DAG.getConstant(0, DL, VT); 7978 SDValue One = DAG.getConstant(1, DL, VT); 7979 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 7980 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 7981 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 7982 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 7983 7984 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 7985 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 7986 SDValue ShiftRightLo = 7987 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 7988 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 7989 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 7990 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 7991 7992 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 7993 7994 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 7995 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 7996 7997 SDValue Parts[2] = {Lo, Hi}; 7998 return DAG.getMergeValues(Parts, DL); 7999 } 8000 8001 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 8002 bool IsSRA) const { 8003 SDLoc DL(Op); 8004 SDValue Lo = Op.getOperand(0); 8005 SDValue Hi = Op.getOperand(1); 8006 SDValue Shamt = Op.getOperand(2); 8007 EVT VT = Lo.getValueType(); 8008 8009 // SRA expansion: 8010 // if Shamt-XLEN < 0: // Shamt < XLEN 8011 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) 8012 // Hi = Hi >>s Shamt 8013 // else: 8014 // Lo = Hi >>s (Shamt-XLEN); 8015 // Hi = Hi >>s (XLEN-1) 8016 // 8017 // SRL expansion: 8018 // if Shamt-XLEN < 0: // Shamt < XLEN 8019 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) 8020 // Hi = Hi >>u Shamt 8021 // else: 8022 // Lo = Hi >>u (Shamt-XLEN); 8023 // Hi = 0; 8024 8025 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 8026 8027 SDValue Zero = DAG.getConstant(0, DL, VT); 8028 SDValue One = DAG.getConstant(1, DL, VT); 8029 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 8030 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 8031 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 8032 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 8033 8034 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 8035 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 8036 SDValue ShiftLeftHi = 8037 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 8038 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 8039 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 8040 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 8041 SDValue HiFalse = 8042 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 8043 8044 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 8045 8046 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 8047 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 8048 8049 SDValue Parts[2] = {Lo, Hi}; 8050 return DAG.getMergeValues(Parts, DL); 8051 } 8052 8053 // Lower splats of i1 types to SETCC. For each mask vector type, we have a 8054 // legal equivalently-sized i8 type, so we can use that as a go-between. 8055 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, 8056 SelectionDAG &DAG) const { 8057 SDLoc DL(Op); 8058 MVT VT = Op.getSimpleValueType(); 8059 SDValue SplatVal = Op.getOperand(0); 8060 // All-zeros or all-ones splats are handled specially. 8061 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) { 8062 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; 8063 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL); 8064 } 8065 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) { 8066 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; 8067 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL); 8068 } 8069 MVT InterVT = VT.changeVectorElementType(MVT::i8); 8070 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal, 8071 DAG.getConstant(1, DL, SplatVal.getValueType())); 8072 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal); 8073 SDValue Zero = DAG.getConstant(0, DL, InterVT); 8074 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE); 8075 } 8076 8077 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is 8078 // illegal (currently only vXi64 RV32). 8079 // FIXME: We could also catch non-constant sign-extended i32 values and lower 8080 // them to VMV_V_X_VL. 8081 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, 8082 SelectionDAG &DAG) const { 8083 SDLoc DL(Op); 8084 MVT VecVT = Op.getSimpleValueType(); 8085 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 8086 "Unexpected SPLAT_VECTOR_PARTS lowering"); 8087 8088 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!"); 8089 SDValue Lo = Op.getOperand(0); 8090 SDValue Hi = Op.getOperand(1); 8091 8092 MVT ContainerVT = VecVT; 8093 if (VecVT.isFixedLengthVector()) 8094 ContainerVT = getContainerForFixedLengthVector(VecVT); 8095 8096 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; 8097 8098 SDValue Res = 8099 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG); 8100 8101 if (VecVT.isFixedLengthVector()) 8102 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget); 8103 8104 return Res; 8105 } 8106 8107 // Custom-lower extensions from mask vectors by using a vselect either with 1 8108 // for zero/any-extension or -1 for sign-extension: 8109 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 8110 // Note that any-extension is lowered identically to zero-extension. 8111 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 8112 int64_t ExtTrueVal) const { 8113 SDLoc DL(Op); 8114 MVT VecVT = Op.getSimpleValueType(); 8115 SDValue Src = Op.getOperand(0); 8116 // Only custom-lower extensions from mask types 8117 assert(Src.getValueType().isVector() && 8118 Src.getValueType().getVectorElementType() == MVT::i1); 8119 8120 if (VecVT.isScalableVector()) { 8121 SDValue SplatZero = DAG.getConstant(0, DL, VecVT); 8122 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT); 8123 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 8124 } 8125 8126 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 8127 MVT I1ContainerVT = 8128 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 8129 8130 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 8131 8132 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; 8133 8134 MVT XLenVT = Subtarget.getXLenVT(); 8135 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 8136 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 8137 8138 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 8139 DAG.getUNDEF(ContainerVT), SplatZero, VL); 8140 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 8141 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL); 8142 SDValue Select = 8143 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal, 8144 SplatZero, DAG.getUNDEF(ContainerVT), VL); 8145 8146 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 8147 } 8148 8149 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( 8150 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { 8151 MVT ExtVT = Op.getSimpleValueType(); 8152 // Only custom-lower extensions from fixed-length vector types. 8153 if (!ExtVT.isFixedLengthVector()) 8154 return Op; 8155 MVT VT = Op.getOperand(0).getSimpleValueType(); 8156 // Grab the canonical container type for the extended type. Infer the smaller 8157 // type from that to ensure the same number of vector elements, as we know 8158 // the LMUL will be sufficient to hold the smaller type. 8159 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); 8160 // Get the extended container type manually to ensure the same number of 8161 // vector elements between source and dest. 8162 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 8163 ContainerExtVT.getVectorElementCount()); 8164 8165 SDValue Op1 = 8166 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 8167 8168 SDLoc DL(Op); 8169 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 8170 8171 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); 8172 8173 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); 8174 } 8175 8176 // Custom-lower truncations from vectors to mask vectors by using a mask and a 8177 // setcc operation: 8178 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 8179 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op, 8180 SelectionDAG &DAG) const { 8181 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; 8182 SDLoc DL(Op); 8183 EVT MaskVT = Op.getValueType(); 8184 // Only expect to custom-lower truncations to mask types 8185 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 8186 "Unexpected type for vector mask lowering"); 8187 SDValue Src = Op.getOperand(0); 8188 MVT VecVT = Src.getSimpleValueType(); 8189 SDValue Mask, VL; 8190 if (IsVPTrunc) { 8191 Mask = Op.getOperand(1); 8192 VL = Op.getOperand(2); 8193 } 8194 // If this is a fixed vector, we need to convert it to a scalable vector. 8195 MVT ContainerVT = VecVT; 8196 8197 if (VecVT.isFixedLengthVector()) { 8198 ContainerVT = getContainerForFixedLengthVector(VecVT); 8199 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 8200 if (IsVPTrunc) { 8201 MVT MaskContainerVT = 8202 getContainerForFixedLengthVector(Mask.getSimpleValueType()); 8203 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); 8204 } 8205 } 8206 8207 if (!IsVPTrunc) { 8208 std::tie(Mask, VL) = 8209 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 8210 } 8211 8212 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 8213 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 8214 8215 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 8216 DAG.getUNDEF(ContainerVT), SplatOne, VL); 8217 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 8218 DAG.getUNDEF(ContainerVT), SplatZero, VL); 8219 8220 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 8221 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, 8222 DAG.getUNDEF(ContainerVT), Mask, VL); 8223 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, 8224 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE), 8225 DAG.getUNDEF(MaskContainerVT), Mask, VL}); 8226 if (MaskVT.isFixedLengthVector()) 8227 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); 8228 return Trunc; 8229 } 8230 8231 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op, 8232 SelectionDAG &DAG) const { 8233 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; 8234 SDLoc DL(Op); 8235 8236 MVT VT = Op.getSimpleValueType(); 8237 // Only custom-lower vector truncates 8238 assert(VT.isVector() && "Unexpected type for vector truncate lowering"); 8239 8240 // Truncates to mask types are handled differently 8241 if (VT.getVectorElementType() == MVT::i1) 8242 return lowerVectorMaskTruncLike(Op, DAG); 8243 8244 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 8245 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 8246 // truncate by one power of two at a time. 8247 MVT DstEltVT = VT.getVectorElementType(); 8248 8249 SDValue Src = Op.getOperand(0); 8250 MVT SrcVT = Src.getSimpleValueType(); 8251 MVT SrcEltVT = SrcVT.getVectorElementType(); 8252 8253 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) && 8254 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 8255 "Unexpected vector truncate lowering"); 8256 8257 MVT ContainerVT = SrcVT; 8258 SDValue Mask, VL; 8259 if (IsVPTrunc) { 8260 Mask = Op.getOperand(1); 8261 VL = Op.getOperand(2); 8262 } 8263 if (SrcVT.isFixedLengthVector()) { 8264 ContainerVT = getContainerForFixedLengthVector(SrcVT); 8265 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 8266 if (IsVPTrunc) { 8267 MVT MaskVT = getMaskTypeFor(ContainerVT); 8268 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 8269 } 8270 } 8271 8272 SDValue Result = Src; 8273 if (!IsVPTrunc) { 8274 std::tie(Mask, VL) = 8275 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 8276 } 8277 8278 LLVMContext &Context = *DAG.getContext(); 8279 const ElementCount Count = ContainerVT.getVectorElementCount(); 8280 do { 8281 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); 8282 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 8283 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, 8284 Mask, VL); 8285 } while (SrcEltVT != DstEltVT); 8286 8287 if (SrcVT.isFixedLengthVector()) 8288 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 8289 8290 return Result; 8291 } 8292 8293 SDValue 8294 RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op, 8295 SelectionDAG &DAG) const { 8296 SDLoc DL(Op); 8297 SDValue Chain = Op.getOperand(0); 8298 SDValue Src = Op.getOperand(1); 8299 MVT VT = Op.getSimpleValueType(); 8300 MVT SrcVT = Src.getSimpleValueType(); 8301 MVT ContainerVT = VT; 8302 if (VT.isFixedLengthVector()) { 8303 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 8304 ContainerVT = 8305 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 8306 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 8307 } 8308 8309 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 8310 8311 // RVV can only widen/truncate fp to types double/half the size as the source. 8312 if ((VT.getVectorElementType() == MVT::f64 && 8313 (SrcVT.getVectorElementType() == MVT::f16 || 8314 SrcVT.getVectorElementType() == MVT::bf16)) || 8315 ((VT.getVectorElementType() == MVT::f16 || 8316 VT.getVectorElementType() == MVT::bf16) && 8317 SrcVT.getVectorElementType() == MVT::f64)) { 8318 // For double rounding, the intermediate rounding should be round-to-odd. 8319 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND 8320 ? RISCVISD::STRICT_FP_EXTEND_VL 8321 : RISCVISD::STRICT_VFNCVT_ROD_VL; 8322 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 8323 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other), 8324 Chain, Src, Mask, VL); 8325 Chain = Src.getValue(1); 8326 } 8327 8328 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND 8329 ? RISCVISD::STRICT_FP_EXTEND_VL 8330 : RISCVISD::STRICT_FP_ROUND_VL; 8331 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), 8332 Chain, Src, Mask, VL); 8333 if (VT.isFixedLengthVector()) { 8334 // StrictFP operations have two result values. Their lowered result should 8335 // have same result count. 8336 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget); 8337 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL); 8338 } 8339 return Res; 8340 } 8341 8342 SDValue 8343 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op, 8344 SelectionDAG &DAG) const { 8345 bool IsVP = 8346 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND; 8347 bool IsExtend = 8348 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND; 8349 // RVV can only do truncate fp to types half the size as the source. We 8350 // custom-lower f64->f16 rounds via RVV's round-to-odd float 8351 // conversion instruction. 8352 SDLoc DL(Op); 8353 MVT VT = Op.getSimpleValueType(); 8354 8355 assert(VT.isVector() && "Unexpected type for vector truncate lowering"); 8356 8357 SDValue Src = Op.getOperand(0); 8358 MVT SrcVT = Src.getSimpleValueType(); 8359 8360 bool IsDirectExtend = 8361 IsExtend && (VT.getVectorElementType() != MVT::f64 || 8362 (SrcVT.getVectorElementType() != MVT::f16 && 8363 SrcVT.getVectorElementType() != MVT::bf16)); 8364 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 && 8365 VT.getVectorElementType() != MVT::bf16) || 8366 SrcVT.getVectorElementType() != MVT::f64); 8367 8368 bool IsDirectConv = IsDirectExtend || IsDirectTrunc; 8369 8370 // Prepare any fixed-length vector operands. 8371 MVT ContainerVT = VT; 8372 SDValue Mask, VL; 8373 if (IsVP) { 8374 Mask = Op.getOperand(1); 8375 VL = Op.getOperand(2); 8376 } 8377 if (VT.isFixedLengthVector()) { 8378 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 8379 ContainerVT = 8380 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 8381 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 8382 if (IsVP) { 8383 MVT MaskVT = getMaskTypeFor(ContainerVT); 8384 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 8385 } 8386 } 8387 8388 if (!IsVP) 8389 std::tie(Mask, VL) = 8390 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 8391 8392 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL; 8393 8394 if (IsDirectConv) { 8395 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL); 8396 if (VT.isFixedLengthVector()) 8397 Src = convertFromScalableVector(VT, Src, DAG, Subtarget); 8398 return Src; 8399 } 8400 8401 unsigned InterConvOpc = 8402 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL; 8403 8404 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 8405 SDValue IntermediateConv = 8406 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL); 8407 SDValue Result = 8408 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL); 8409 if (VT.isFixedLengthVector()) 8410 return convertFromScalableVector(VT, Result, DAG, Subtarget); 8411 return Result; 8412 } 8413 8414 // Given a scalable vector type and an index into it, returns the type for the 8415 // smallest subvector that the index fits in. This can be used to reduce LMUL 8416 // for operations like vslidedown. 8417 // 8418 // E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32. 8419 static std::optional<MVT> 8420 getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, 8421 const RISCVSubtarget &Subtarget) { 8422 assert(VecVT.isScalableVector()); 8423 const unsigned EltSize = VecVT.getScalarSizeInBits(); 8424 const unsigned VectorBitsMin = Subtarget.getRealMinVLen(); 8425 const unsigned MinVLMAX = VectorBitsMin / EltSize; 8426 MVT SmallerVT; 8427 if (MaxIdx < MinVLMAX) 8428 SmallerVT = getLMUL1VT(VecVT); 8429 else if (MaxIdx < MinVLMAX * 2) 8430 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT(); 8431 else if (MaxIdx < MinVLMAX * 4) 8432 SmallerVT = getLMUL1VT(VecVT) 8433 .getDoubleNumVectorElementsVT() 8434 .getDoubleNumVectorElementsVT(); 8435 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT)) 8436 return std::nullopt; 8437 return SmallerVT; 8438 } 8439 8440 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the 8441 // first position of a vector, and that vector is slid up to the insert index. 8442 // By limiting the active vector length to index+1 and merging with the 8443 // original vector (with an undisturbed tail policy for elements >= VL), we 8444 // achieve the desired result of leaving all elements untouched except the one 8445 // at VL-1, which is replaced with the desired value. 8446 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 8447 SelectionDAG &DAG) const { 8448 SDLoc DL(Op); 8449 MVT VecVT = Op.getSimpleValueType(); 8450 SDValue Vec = Op.getOperand(0); 8451 SDValue Val = Op.getOperand(1); 8452 SDValue Idx = Op.getOperand(2); 8453 8454 if (VecVT.getVectorElementType() == MVT::i1) { 8455 // FIXME: For now we just promote to an i8 vector and insert into that, 8456 // but this is probably not optimal. 8457 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 8458 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 8459 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx); 8460 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec); 8461 } 8462 8463 MVT ContainerVT = VecVT; 8464 // If the operand is a fixed-length vector, convert to a scalable one. 8465 if (VecVT.isFixedLengthVector()) { 8466 ContainerVT = getContainerForFixedLengthVector(VecVT); 8467 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 8468 } 8469 8470 // If we know the index we're going to insert at, we can shrink Vec so that 8471 // we're performing the scalar inserts and slideup on a smaller LMUL. 8472 MVT OrigContainerVT = ContainerVT; 8473 SDValue OrigVec = Vec; 8474 SDValue AlignedIdx; 8475 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) { 8476 const unsigned OrigIdx = IdxC->getZExtValue(); 8477 // Do we know an upper bound on LMUL? 8478 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx, 8479 DL, DAG, Subtarget)) { 8480 ContainerVT = *ShrunkVT; 8481 AlignedIdx = DAG.getVectorIdxConstant(0, DL); 8482 } 8483 8484 // If we're compiling for an exact VLEN value, we can always perform 8485 // the insert in m1 as we can determine the register corresponding to 8486 // the index in the register group. 8487 const MVT M1VT = getLMUL1VT(ContainerVT); 8488 if (auto VLEN = Subtarget.getRealVLen(); 8489 VLEN && ContainerVT.bitsGT(M1VT)) { 8490 EVT ElemVT = VecVT.getVectorElementType(); 8491 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits(); 8492 unsigned RemIdx = OrigIdx % ElemsPerVReg; 8493 unsigned SubRegIdx = OrigIdx / ElemsPerVReg; 8494 unsigned ExtractIdx = 8495 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); 8496 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL); 8497 Idx = DAG.getVectorIdxConstant(RemIdx, DL); 8498 ContainerVT = M1VT; 8499 } 8500 8501 if (AlignedIdx) 8502 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, 8503 AlignedIdx); 8504 } 8505 8506 MVT XLenVT = Subtarget.getXLenVT(); 8507 8508 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; 8509 // Even i64-element vectors on RV32 can be lowered without scalar 8510 // legalization if the most-significant 32 bits of the value are not affected 8511 // by the sign-extension of the lower 32 bits. 8512 // TODO: We could also catch sign extensions of a 32-bit value. 8513 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) { 8514 const auto *CVal = cast<ConstantSDNode>(Val); 8515 if (isInt<32>(CVal->getSExtValue())) { 8516 IsLegalInsert = true; 8517 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 8518 } 8519 } 8520 8521 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 8522 8523 SDValue ValInVec; 8524 8525 if (IsLegalInsert) { 8526 unsigned Opc = 8527 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL; 8528 if (isNullConstant(Idx)) { 8529 if (!VecVT.isFloatingPoint()) 8530 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val); 8531 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL); 8532 8533 if (AlignedIdx) 8534 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, 8535 Vec, AlignedIdx); 8536 if (!VecVT.isFixedLengthVector()) 8537 return Vec; 8538 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); 8539 } 8540 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget); 8541 } else { 8542 // On RV32, i64-element vectors must be specially handled to place the 8543 // value at element 0, by using two vslide1down instructions in sequence on 8544 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for 8545 // this. 8546 SDValue ValLo, ValHi; 8547 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32); 8548 MVT I32ContainerVT = 8549 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2); 8550 SDValue I32Mask = 8551 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; 8552 // Limit the active VL to two. 8553 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); 8554 // If the Idx is 0 we can insert directly into the vector. 8555 if (isNullConstant(Idx)) { 8556 // First slide in the lo value, then the hi in above it. We use slide1down 8557 // to avoid the register group overlap constraint of vslide1up. 8558 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 8559 Vec, Vec, ValLo, I32Mask, InsertI64VL); 8560 // If the source vector is undef don't pass along the tail elements from 8561 // the previous slide1down. 8562 SDValue Tail = Vec.isUndef() ? Vec : ValInVec; 8563 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 8564 Tail, ValInVec, ValHi, I32Mask, InsertI64VL); 8565 // Bitcast back to the right container type. 8566 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 8567 8568 if (AlignedIdx) 8569 ValInVec = 8570 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, 8571 ValInVec, AlignedIdx); 8572 if (!VecVT.isFixedLengthVector()) 8573 return ValInVec; 8574 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget); 8575 } 8576 8577 // First slide in the lo value, then the hi in above it. We use slide1down 8578 // to avoid the register group overlap constraint of vslide1up. 8579 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 8580 DAG.getUNDEF(I32ContainerVT), 8581 DAG.getUNDEF(I32ContainerVT), ValLo, 8582 I32Mask, InsertI64VL); 8583 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 8584 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi, 8585 I32Mask, InsertI64VL); 8586 // Bitcast back to the right container type. 8587 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 8588 } 8589 8590 // Now that the value is in a vector, slide it into position. 8591 SDValue InsertVL = 8592 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT)); 8593 8594 // Use tail agnostic policy if Idx is the last index of Vec. 8595 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; 8596 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) && 8597 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements()) 8598 Policy = RISCVII::TAIL_AGNOSTIC; 8599 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec, 8600 Idx, Mask, InsertVL, Policy); 8601 8602 if (AlignedIdx) 8603 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, 8604 Slideup, AlignedIdx); 8605 if (!VecVT.isFixedLengthVector()) 8606 return Slideup; 8607 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 8608 } 8609 8610 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 8611 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 8612 // types this is done using VMV_X_S to allow us to glean information about the 8613 // sign bits of the result. 8614 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 8615 SelectionDAG &DAG) const { 8616 SDLoc DL(Op); 8617 SDValue Idx = Op.getOperand(1); 8618 SDValue Vec = Op.getOperand(0); 8619 EVT EltVT = Op.getValueType(); 8620 MVT VecVT = Vec.getSimpleValueType(); 8621 MVT XLenVT = Subtarget.getXLenVT(); 8622 8623 if (VecVT.getVectorElementType() == MVT::i1) { 8624 // Use vfirst.m to extract the first bit. 8625 if (isNullConstant(Idx)) { 8626 MVT ContainerVT = VecVT; 8627 if (VecVT.isFixedLengthVector()) { 8628 ContainerVT = getContainerForFixedLengthVector(VecVT); 8629 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 8630 } 8631 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 8632 SDValue Vfirst = 8633 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL); 8634 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst, 8635 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ); 8636 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res); 8637 } 8638 if (VecVT.isFixedLengthVector()) { 8639 unsigned NumElts = VecVT.getVectorNumElements(); 8640 if (NumElts >= 8) { 8641 MVT WideEltVT; 8642 unsigned WidenVecLen; 8643 SDValue ExtractElementIdx; 8644 SDValue ExtractBitIdx; 8645 unsigned MaxEEW = Subtarget.getELen(); 8646 MVT LargestEltVT = MVT::getIntegerVT( 8647 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits()))); 8648 if (NumElts <= LargestEltVT.getSizeInBits()) { 8649 assert(isPowerOf2_32(NumElts) && 8650 "the number of elements should be power of 2"); 8651 WideEltVT = MVT::getIntegerVT(NumElts); 8652 WidenVecLen = 1; 8653 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT); 8654 ExtractBitIdx = Idx; 8655 } else { 8656 WideEltVT = LargestEltVT; 8657 WidenVecLen = NumElts / WideEltVT.getSizeInBits(); 8658 // extract element index = index / element width 8659 ExtractElementIdx = DAG.getNode( 8660 ISD::SRL, DL, XLenVT, Idx, 8661 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT)); 8662 // mask bit index = index % element width 8663 ExtractBitIdx = DAG.getNode( 8664 ISD::AND, DL, XLenVT, Idx, 8665 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT)); 8666 } 8667 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen); 8668 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec); 8669 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, 8670 Vec, ExtractElementIdx); 8671 // Extract the bit from GPR. 8672 SDValue ShiftRight = 8673 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx); 8674 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight, 8675 DAG.getConstant(1, DL, XLenVT)); 8676 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res); 8677 } 8678 } 8679 // Otherwise, promote to an i8 vector and extract from that. 8680 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 8681 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 8682 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx); 8683 } 8684 8685 // If this is a fixed vector, we need to convert it to a scalable vector. 8686 MVT ContainerVT = VecVT; 8687 if (VecVT.isFixedLengthVector()) { 8688 ContainerVT = getContainerForFixedLengthVector(VecVT); 8689 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 8690 } 8691 8692 // If we're compiling for an exact VLEN value and we have a known 8693 // constant index, we can always perform the extract in m1 (or 8694 // smaller) as we can determine the register corresponding to 8695 // the index in the register group. 8696 const auto VLen = Subtarget.getRealVLen(); 8697 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx); 8698 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) { 8699 MVT M1VT = getLMUL1VT(ContainerVT); 8700 unsigned OrigIdx = IdxC->getZExtValue(); 8701 EVT ElemVT = VecVT.getVectorElementType(); 8702 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits(); 8703 unsigned RemIdx = OrigIdx % ElemsPerVReg; 8704 unsigned SubRegIdx = OrigIdx / ElemsPerVReg; 8705 unsigned ExtractIdx = 8706 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); 8707 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec, 8708 DAG.getVectorIdxConstant(ExtractIdx, DL)); 8709 Idx = DAG.getVectorIdxConstant(RemIdx, DL); 8710 ContainerVT = M1VT; 8711 } 8712 8713 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which 8714 // contains our index. 8715 std::optional<uint64_t> MaxIdx; 8716 if (VecVT.isFixedLengthVector()) 8717 MaxIdx = VecVT.getVectorNumElements() - 1; 8718 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) 8719 MaxIdx = IdxC->getZExtValue(); 8720 if (MaxIdx) { 8721 if (auto SmallerVT = 8722 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) { 8723 ContainerVT = *SmallerVT; 8724 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, 8725 DAG.getConstant(0, DL, XLenVT)); 8726 } 8727 } 8728 8729 // If after narrowing, the required slide is still greater than LMUL2, 8730 // fallback to generic expansion and go through the stack. This is done 8731 // for a subtle reason: extracting *all* elements out of a vector is 8732 // widely expected to be linear in vector size, but because vslidedown 8733 // is linear in LMUL, performing N extracts using vslidedown becomes 8734 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack 8735 // seems to have the same problem (the store is linear in LMUL), but the 8736 // generic expansion *memoizes* the store, and thus for many extracts of 8737 // the same vector we end up with one store and a bunch of loads. 8738 // TODO: We don't have the same code for insert_vector_elt because we 8739 // have BUILD_VECTOR and handle the degenerate case there. Should we 8740 // consider adding an inverse BUILD_VECTOR node? 8741 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT(); 8742 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector()) 8743 return SDValue(); 8744 8745 // If the index is 0, the vector is already in the right position. 8746 if (!isNullConstant(Idx)) { 8747 // Use a VL of 1 to avoid processing more elements than we need. 8748 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget); 8749 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, 8750 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 8751 } 8752 8753 if (!EltVT.isInteger()) { 8754 // Floating-point extracts are handled in TableGen. 8755 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 8756 DAG.getVectorIdxConstant(0, DL)); 8757 } 8758 8759 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 8760 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 8761 } 8762 8763 // Some RVV intrinsics may claim that they want an integer operand to be 8764 // promoted or expanded. 8765 static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, 8766 const RISCVSubtarget &Subtarget) { 8767 assert((Op.getOpcode() == ISD::INTRINSIC_VOID || 8768 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 8769 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 8770 "Unexpected opcode"); 8771 8772 if (!Subtarget.hasVInstructions()) 8773 return SDValue(); 8774 8775 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID || 8776 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 8777 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 8778 8779 SDLoc DL(Op); 8780 8781 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 8782 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 8783 if (!II || !II->hasScalarOperand()) 8784 return SDValue(); 8785 8786 unsigned SplatOp = II->ScalarOperand + 1 + HasChain; 8787 assert(SplatOp < Op.getNumOperands()); 8788 8789 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 8790 SDValue &ScalarOp = Operands[SplatOp]; 8791 MVT OpVT = ScalarOp.getSimpleValueType(); 8792 MVT XLenVT = Subtarget.getXLenVT(); 8793 8794 // If this isn't a scalar, or its type is XLenVT we're done. 8795 if (!OpVT.isScalarInteger() || OpVT == XLenVT) 8796 return SDValue(); 8797 8798 // Simplest case is that the operand needs to be promoted to XLenVT. 8799 if (OpVT.bitsLT(XLenVT)) { 8800 // If the operand is a constant, sign extend to increase our chances 8801 // of being able to use a .vi instruction. ANY_EXTEND would become a 8802 // a zero extend and the simm5 check in isel would fail. 8803 // FIXME: Should we ignore the upper bits in isel instead? 8804 unsigned ExtOpc = 8805 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 8806 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); 8807 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 8808 } 8809 8810 // Use the previous operand to get the vXi64 VT. The result might be a mask 8811 // VT for compares. Using the previous operand assumes that the previous 8812 // operand will never have a smaller element size than a scalar operand and 8813 // that a widening operation never uses SEW=64. 8814 // NOTE: If this fails the below assert, we can probably just find the 8815 // element count from any operand or result and use it to construct the VT. 8816 assert(II->ScalarOperand > 0 && "Unexpected splat operand!"); 8817 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType(); 8818 8819 // The more complex case is when the scalar is larger than XLenVT. 8820 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && 8821 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); 8822 8823 // If this is a sign-extended 32-bit value, we can truncate it and rely on the 8824 // instruction to sign-extend since SEW>XLEN. 8825 if (DAG.ComputeNumSignBits(ScalarOp) > 32) { 8826 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp); 8827 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 8828 } 8829 8830 switch (IntNo) { 8831 case Intrinsic::riscv_vslide1up: 8832 case Intrinsic::riscv_vslide1down: 8833 case Intrinsic::riscv_vslide1up_mask: 8834 case Intrinsic::riscv_vslide1down_mask: { 8835 // We need to special case these when the scalar is larger than XLen. 8836 unsigned NumOps = Op.getNumOperands(); 8837 bool IsMasked = NumOps == 7; 8838 8839 // Convert the vector source to the equivalent nxvXi32 vector. 8840 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); 8841 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]); 8842 SDValue ScalarLo, ScalarHi; 8843 std::tie(ScalarLo, ScalarHi) = 8844 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32); 8845 8846 // Double the VL since we halved SEW. 8847 SDValue AVL = getVLOperand(Op); 8848 SDValue I32VL; 8849 8850 // Optimize for constant AVL 8851 if (isa<ConstantSDNode>(AVL)) { 8852 const auto [MinVLMAX, MaxVLMAX] = 8853 RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget); 8854 8855 uint64_t AVLInt = AVL->getAsZExtVal(); 8856 if (AVLInt <= MinVLMAX) { 8857 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT); 8858 } else if (AVLInt >= 2 * MaxVLMAX) { 8859 // Just set vl to VLMAX in this situation 8860 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT); 8861 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); 8862 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits()); 8863 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); 8864 SDValue SETVLMAX = DAG.getTargetConstant( 8865 Intrinsic::riscv_vsetvlimax, DL, MVT::i32); 8866 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW, 8867 LMUL); 8868 } else { 8869 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl 8870 // is related to the hardware implementation. 8871 // So let the following code handle 8872 } 8873 } 8874 if (!I32VL) { 8875 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT); 8876 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); 8877 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits()); 8878 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); 8879 SDValue SETVL = 8880 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32); 8881 // Using vsetvli instruction to get actually used length which related to 8882 // the hardware implementation 8883 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL, 8884 SEW, LMUL); 8885 I32VL = 8886 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); 8887 } 8888 8889 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG); 8890 8891 // Shift the two scalar parts in using SEW=32 slide1up/slide1down 8892 // instructions. 8893 SDValue Passthru; 8894 if (IsMasked) 8895 Passthru = DAG.getUNDEF(I32VT); 8896 else 8897 Passthru = DAG.getBitcast(I32VT, Operands[1]); 8898 8899 if (IntNo == Intrinsic::riscv_vslide1up || 8900 IntNo == Intrinsic::riscv_vslide1up_mask) { 8901 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, 8902 ScalarHi, I32Mask, I32VL); 8903 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, 8904 ScalarLo, I32Mask, I32VL); 8905 } else { 8906 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, 8907 ScalarLo, I32Mask, I32VL); 8908 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, 8909 ScalarHi, I32Mask, I32VL); 8910 } 8911 8912 // Convert back to nxvXi64. 8913 Vec = DAG.getBitcast(VT, Vec); 8914 8915 if (!IsMasked) 8916 return Vec; 8917 // Apply mask after the operation. 8918 SDValue Mask = Operands[NumOps - 3]; 8919 SDValue MaskedOff = Operands[1]; 8920 // Assume Policy operand is the last operand. 8921 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal(); 8922 // We don't need to select maskedoff if it's undef. 8923 if (MaskedOff.isUndef()) 8924 return Vec; 8925 // TAMU 8926 if (Policy == RISCVII::TAIL_AGNOSTIC) 8927 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff, 8928 DAG.getUNDEF(VT), AVL); 8929 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. 8930 // It's fine because vmerge does not care mask policy. 8931 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff, 8932 MaskedOff, AVL); 8933 } 8934 } 8935 8936 // We need to convert the scalar to a splat vector. 8937 SDValue VL = getVLOperand(Op); 8938 assert(VL.getValueType() == XLenVT); 8939 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG); 8940 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 8941 } 8942 8943 // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support 8944 // scalable vector llvm.get.vector.length for now. 8945 // 8946 // We need to convert from a scalable VF to a vsetvli with VLMax equal to 8947 // (vscale * VF). The vscale and VF are independent of element width. We use 8948 // SEW=8 for the vsetvli because it is the only element width that supports all 8949 // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is 8950 // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The 8951 // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different 8952 // SEW and LMUL are better for the surrounding vector instructions. 8953 static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, 8954 const RISCVSubtarget &Subtarget) { 8955 MVT XLenVT = Subtarget.getXLenVT(); 8956 8957 // The smallest LMUL is only valid for the smallest element width. 8958 const unsigned ElementWidth = 8; 8959 8960 // Determine the VF that corresponds to LMUL 1 for ElementWidth. 8961 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth; 8962 // We don't support VF==1 with ELEN==32. 8963 [[maybe_unused]] unsigned MinVF = 8964 RISCV::RVVBitsPerBlock / Subtarget.getELen(); 8965 8966 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2); 8967 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) && 8968 "Unexpected VF"); 8969 8970 bool Fractional = VF < LMul1VF; 8971 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF; 8972 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional); 8973 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth); 8974 8975 SDLoc DL(N); 8976 8977 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT); 8978 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT); 8979 8980 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1)); 8981 8982 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT); 8983 SDValue Res = 8984 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul); 8985 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res); 8986 } 8987 8988 static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, 8989 const RISCVSubtarget &Subtarget) { 8990 SDValue Op0 = N->getOperand(1); 8991 MVT OpVT = Op0.getSimpleValueType(); 8992 MVT ContainerVT = OpVT; 8993 if (OpVT.isFixedLengthVector()) { 8994 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget); 8995 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget); 8996 } 8997 MVT XLenVT = Subtarget.getXLenVT(); 8998 SDLoc DL(N); 8999 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget); 9000 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL); 9001 if (isOneConstant(N->getOperand(2))) 9002 return Res; 9003 9004 // Convert -1 to VL. 9005 SDValue Setcc = 9006 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT); 9007 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount()); 9008 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res); 9009 } 9010 9011 static inline void promoteVCIXScalar(const SDValue &Op, 9012 SmallVectorImpl<SDValue> &Operands, 9013 SelectionDAG &DAG) { 9014 const RISCVSubtarget &Subtarget = 9015 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); 9016 9017 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID || 9018 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 9019 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 9020 SDLoc DL(Op); 9021 9022 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 9023 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 9024 if (!II || !II->hasScalarOperand()) 9025 return; 9026 9027 unsigned SplatOp = II->ScalarOperand + 1; 9028 assert(SplatOp < Op.getNumOperands()); 9029 9030 SDValue &ScalarOp = Operands[SplatOp]; 9031 MVT OpVT = ScalarOp.getSimpleValueType(); 9032 MVT XLenVT = Subtarget.getXLenVT(); 9033 9034 // The code below is partially copied from lowerVectorIntrinsicScalars. 9035 // If this isn't a scalar, or its type is XLenVT we're done. 9036 if (!OpVT.isScalarInteger() || OpVT == XLenVT) 9037 return; 9038 9039 // Manually emit promote operation for scalar operation. 9040 if (OpVT.bitsLT(XLenVT)) { 9041 unsigned ExtOpc = 9042 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 9043 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); 9044 } 9045 9046 return; 9047 } 9048 9049 static void processVCIXOperands(SDValue &OrigOp, 9050 SmallVectorImpl<SDValue> &Operands, 9051 SelectionDAG &DAG) { 9052 promoteVCIXScalar(OrigOp, Operands, DAG); 9053 const RISCVSubtarget &Subtarget = 9054 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); 9055 for (SDValue &V : Operands) { 9056 EVT ValType = V.getValueType(); 9057 if (ValType.isVector() && ValType.isFloatingPoint()) { 9058 MVT InterimIVT = 9059 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()), 9060 ValType.getVectorElementCount()); 9061 V = DAG.getBitcast(InterimIVT, V); 9062 } 9063 if (ValType.isFixedLengthVector()) { 9064 MVT OpContainerVT = getContainerForFixedLengthVector( 9065 DAG, V.getSimpleValueType(), Subtarget); 9066 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget); 9067 } 9068 } 9069 } 9070 9071 // LMUL * VLEN should be greater than or equal to EGS * SEW 9072 static inline bool isValidEGW(int EGS, EVT VT, 9073 const RISCVSubtarget &Subtarget) { 9074 return (Subtarget.getRealMinVLen() * 9075 VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >= 9076 EGS * VT.getScalarSizeInBits(); 9077 } 9078 9079 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 9080 SelectionDAG &DAG) const { 9081 unsigned IntNo = Op.getConstantOperandVal(0); 9082 SDLoc DL(Op); 9083 MVT XLenVT = Subtarget.getXLenVT(); 9084 9085 switch (IntNo) { 9086 default: 9087 break; // Don't custom lower most intrinsics. 9088 case Intrinsic::thread_pointer: { 9089 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 9090 return DAG.getRegister(RISCV::X4, PtrVT); 9091 } 9092 case Intrinsic::riscv_orc_b: 9093 case Intrinsic::riscv_brev8: 9094 case Intrinsic::riscv_sha256sig0: 9095 case Intrinsic::riscv_sha256sig1: 9096 case Intrinsic::riscv_sha256sum0: 9097 case Intrinsic::riscv_sha256sum1: 9098 case Intrinsic::riscv_sm3p0: 9099 case Intrinsic::riscv_sm3p1: { 9100 unsigned Opc; 9101 switch (IntNo) { 9102 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; 9103 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; 9104 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; 9105 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; 9106 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; 9107 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; 9108 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; 9109 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; 9110 } 9111 9112 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 9113 SDValue NewOp = 9114 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 9115 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp); 9116 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 9117 } 9118 9119 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); 9120 } 9121 case Intrinsic::riscv_sm4ks: 9122 case Intrinsic::riscv_sm4ed: { 9123 unsigned Opc = 9124 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; 9125 9126 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 9127 SDValue NewOp0 = 9128 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 9129 SDValue NewOp1 = 9130 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); 9131 SDValue Res = 9132 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3)); 9133 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 9134 } 9135 9136 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2), 9137 Op.getOperand(3)); 9138 } 9139 case Intrinsic::riscv_zip: 9140 case Intrinsic::riscv_unzip: { 9141 unsigned Opc = 9142 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP; 9143 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); 9144 } 9145 case Intrinsic::riscv_mopr: { 9146 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 9147 SDValue NewOp = 9148 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 9149 SDValue Res = DAG.getNode( 9150 RISCVISD::MOPR, DL, MVT::i64, NewOp, 9151 DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64)); 9152 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 9153 } 9154 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1), 9155 Op.getOperand(2)); 9156 } 9157 9158 case Intrinsic::riscv_moprr: { 9159 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 9160 SDValue NewOp0 = 9161 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 9162 SDValue NewOp1 = 9163 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); 9164 SDValue Res = DAG.getNode( 9165 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1, 9166 DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64)); 9167 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 9168 } 9169 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1), 9170 Op.getOperand(2), Op.getOperand(3)); 9171 } 9172 case Intrinsic::riscv_clmul: 9173 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 9174 SDValue NewOp0 = 9175 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 9176 SDValue NewOp1 = 9177 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); 9178 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1); 9179 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 9180 } 9181 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1), 9182 Op.getOperand(2)); 9183 case Intrinsic::riscv_clmulh: 9184 case Intrinsic::riscv_clmulr: { 9185 unsigned Opc = 9186 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR; 9187 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 9188 SDValue NewOp0 = 9189 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 9190 SDValue NewOp1 = 9191 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); 9192 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, 9193 DAG.getConstant(32, DL, MVT::i64)); 9194 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1, 9195 DAG.getConstant(32, DL, MVT::i64)); 9196 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1); 9197 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res, 9198 DAG.getConstant(32, DL, MVT::i64)); 9199 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 9200 } 9201 9202 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); 9203 } 9204 case Intrinsic::experimental_get_vector_length: 9205 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget); 9206 case Intrinsic::experimental_cttz_elts: 9207 return lowerCttzElts(Op.getNode(), DAG, Subtarget); 9208 case Intrinsic::riscv_vmv_x_s: { 9209 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1)); 9210 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res); 9211 } 9212 case Intrinsic::riscv_vfmv_f_s: 9213 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), 9214 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL)); 9215 case Intrinsic::riscv_vmv_v_x: 9216 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2), 9217 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG, 9218 Subtarget); 9219 case Intrinsic::riscv_vfmv_v_f: 9220 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 9221 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 9222 case Intrinsic::riscv_vmv_s_x: { 9223 SDValue Scalar = Op.getOperand(2); 9224 9225 if (Scalar.getValueType().bitsLE(XLenVT)) { 9226 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar); 9227 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(), 9228 Op.getOperand(1), Scalar, Op.getOperand(3)); 9229 } 9230 9231 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 9232 9233 // This is an i64 value that lives in two scalar registers. We have to 9234 // insert this in a convoluted way. First we build vXi64 splat containing 9235 // the two values that we assemble using some bit math. Next we'll use 9236 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask 9237 // to merge element 0 from our splat into the source vector. 9238 // FIXME: This is probably not the best way to do this, but it is 9239 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting 9240 // point. 9241 // sw lo, (a0) 9242 // sw hi, 4(a0) 9243 // vlse vX, (a0) 9244 // 9245 // vid.v vVid 9246 // vmseq.vx mMask, vVid, 0 9247 // vmerge.vvm vDest, vSrc, vVal, mMask 9248 MVT VT = Op.getSimpleValueType(); 9249 SDValue Vec = Op.getOperand(1); 9250 SDValue VL = getVLOperand(Op); 9251 9252 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG); 9253 if (Op.getOperand(1).isUndef()) 9254 return SplattedVal; 9255 SDValue SplattedIdx = 9256 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 9257 DAG.getConstant(0, DL, MVT::i32), VL); 9258 9259 MVT MaskVT = getMaskTypeFor(VT); 9260 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG); 9261 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 9262 SDValue SelectCond = 9263 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, 9264 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ), 9265 DAG.getUNDEF(MaskVT), Mask, VL}); 9266 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal, 9267 Vec, DAG.getUNDEF(VT), VL); 9268 } 9269 case Intrinsic::riscv_vfmv_s_f: 9270 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(), 9271 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 9272 // EGS * EEW >= 128 bits 9273 case Intrinsic::riscv_vaesdf_vv: 9274 case Intrinsic::riscv_vaesdf_vs: 9275 case Intrinsic::riscv_vaesdm_vv: 9276 case Intrinsic::riscv_vaesdm_vs: 9277 case Intrinsic::riscv_vaesef_vv: 9278 case Intrinsic::riscv_vaesef_vs: 9279 case Intrinsic::riscv_vaesem_vv: 9280 case Intrinsic::riscv_vaesem_vs: 9281 case Intrinsic::riscv_vaeskf1: 9282 case Intrinsic::riscv_vaeskf2: 9283 case Intrinsic::riscv_vaesz_vs: 9284 case Intrinsic::riscv_vsm4k: 9285 case Intrinsic::riscv_vsm4r_vv: 9286 case Intrinsic::riscv_vsm4r_vs: { 9287 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) || 9288 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) || 9289 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget)) 9290 report_fatal_error("EGW should be greater than or equal to 4 * SEW."); 9291 return Op; 9292 } 9293 // EGS * EEW >= 256 bits 9294 case Intrinsic::riscv_vsm3c: 9295 case Intrinsic::riscv_vsm3me: { 9296 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) || 9297 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget)) 9298 report_fatal_error("EGW should be greater than or equal to 8 * SEW."); 9299 return Op; 9300 } 9301 // zvknha(SEW=32)/zvknhb(SEW=[32|64]) 9302 case Intrinsic::riscv_vsha2ch: 9303 case Intrinsic::riscv_vsha2cl: 9304 case Intrinsic::riscv_vsha2ms: { 9305 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 && 9306 !Subtarget.hasStdExtZvknhb()) 9307 report_fatal_error("SEW=64 needs Zvknhb to be enabled."); 9308 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) || 9309 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) || 9310 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget)) 9311 report_fatal_error("EGW should be greater than or equal to 4 * SEW."); 9312 return Op; 9313 } 9314 case Intrinsic::riscv_sf_vc_v_x: 9315 case Intrinsic::riscv_sf_vc_v_i: 9316 case Intrinsic::riscv_sf_vc_v_xv: 9317 case Intrinsic::riscv_sf_vc_v_iv: 9318 case Intrinsic::riscv_sf_vc_v_vv: 9319 case Intrinsic::riscv_sf_vc_v_fv: 9320 case Intrinsic::riscv_sf_vc_v_xvv: 9321 case Intrinsic::riscv_sf_vc_v_ivv: 9322 case Intrinsic::riscv_sf_vc_v_vvv: 9323 case Intrinsic::riscv_sf_vc_v_fvv: 9324 case Intrinsic::riscv_sf_vc_v_xvw: 9325 case Intrinsic::riscv_sf_vc_v_ivw: 9326 case Intrinsic::riscv_sf_vc_v_vvw: 9327 case Intrinsic::riscv_sf_vc_v_fvw: { 9328 MVT VT = Op.getSimpleValueType(); 9329 9330 SmallVector<SDValue> Operands{Op->op_values()}; 9331 processVCIXOperands(Op, Operands, DAG); 9332 9333 MVT RetVT = VT; 9334 if (VT.isFixedLengthVector()) 9335 RetVT = getContainerForFixedLengthVector(VT); 9336 else if (VT.isFloatingPoint()) 9337 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()), 9338 VT.getVectorElementCount()); 9339 9340 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands); 9341 9342 if (VT.isFixedLengthVector()) 9343 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget); 9344 else if (VT.isFloatingPoint()) 9345 NewNode = DAG.getBitcast(VT, NewNode); 9346 9347 if (Op == NewNode) 9348 break; 9349 9350 return NewNode; 9351 } 9352 } 9353 9354 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 9355 } 9356 9357 static inline SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, 9358 unsigned Type) { 9359 SDLoc DL(Op); 9360 SmallVector<SDValue> Operands{Op->op_values()}; 9361 Operands.erase(Operands.begin() + 1); 9362 9363 const RISCVSubtarget &Subtarget = 9364 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); 9365 MVT VT = Op.getSimpleValueType(); 9366 MVT RetVT = VT; 9367 MVT FloatVT = VT; 9368 9369 if (VT.isFloatingPoint()) { 9370 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()), 9371 VT.getVectorElementCount()); 9372 FloatVT = RetVT; 9373 } 9374 if (VT.isFixedLengthVector()) 9375 RetVT = getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), RetVT, 9376 Subtarget); 9377 9378 processVCIXOperands(Op, Operands, DAG); 9379 9380 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other}); 9381 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands); 9382 SDValue Chain = NewNode.getValue(1); 9383 9384 if (VT.isFixedLengthVector()) 9385 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget); 9386 if (VT.isFloatingPoint()) 9387 NewNode = DAG.getBitcast(VT, NewNode); 9388 9389 NewNode = DAG.getMergeValues({NewNode, Chain}, DL); 9390 9391 return NewNode; 9392 } 9393 9394 static inline SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, 9395 unsigned Type) { 9396 SmallVector<SDValue> Operands{Op->op_values()}; 9397 Operands.erase(Operands.begin() + 1); 9398 processVCIXOperands(Op, Operands, DAG); 9399 9400 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands); 9401 } 9402 9403 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 9404 SelectionDAG &DAG) const { 9405 unsigned IntNo = Op.getConstantOperandVal(1); 9406 switch (IntNo) { 9407 default: 9408 break; 9409 case Intrinsic::riscv_masked_strided_load: { 9410 SDLoc DL(Op); 9411 MVT XLenVT = Subtarget.getXLenVT(); 9412 9413 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 9414 // the selection of the masked intrinsics doesn't do this for us. 9415 SDValue Mask = Op.getOperand(5); 9416 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 9417 9418 MVT VT = Op->getSimpleValueType(0); 9419 MVT ContainerVT = VT; 9420 if (VT.isFixedLengthVector()) 9421 ContainerVT = getContainerForFixedLengthVector(VT); 9422 9423 SDValue PassThru = Op.getOperand(2); 9424 if (!IsUnmasked) { 9425 MVT MaskVT = getMaskTypeFor(ContainerVT); 9426 if (VT.isFixedLengthVector()) { 9427 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 9428 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 9429 } 9430 } 9431 9432 auto *Load = cast<MemIntrinsicSDNode>(Op); 9433 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 9434 SDValue Ptr = Op.getOperand(3); 9435 SDValue Stride = Op.getOperand(4); 9436 SDValue Result, Chain; 9437 9438 // TODO: We restrict this to unmasked loads currently in consideration of 9439 // the complexity of handling all falses masks. 9440 MVT ScalarVT = ContainerVT.getVectorElementType(); 9441 if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) { 9442 SDValue ScalarLoad = 9443 DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Load->getChain(), Ptr, 9444 ScalarVT, Load->getMemOperand()); 9445 Chain = ScalarLoad.getValue(1); 9446 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG, 9447 Subtarget); 9448 } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) { 9449 SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr, 9450 Load->getMemOperand()); 9451 Chain = ScalarLoad.getValue(1); 9452 Result = DAG.getSplat(ContainerVT, DL, ScalarLoad); 9453 } else { 9454 SDValue IntID = DAG.getTargetConstant( 9455 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL, 9456 XLenVT); 9457 9458 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID}; 9459 if (IsUnmasked) 9460 Ops.push_back(DAG.getUNDEF(ContainerVT)); 9461 else 9462 Ops.push_back(PassThru); 9463 Ops.push_back(Ptr); 9464 Ops.push_back(Stride); 9465 if (!IsUnmasked) 9466 Ops.push_back(Mask); 9467 Ops.push_back(VL); 9468 if (!IsUnmasked) { 9469 SDValue Policy = 9470 DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 9471 Ops.push_back(Policy); 9472 } 9473 9474 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 9475 Result = 9476 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 9477 Load->getMemoryVT(), Load->getMemOperand()); 9478 Chain = Result.getValue(1); 9479 } 9480 if (VT.isFixedLengthVector()) 9481 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 9482 return DAG.getMergeValues({Result, Chain}, DL); 9483 } 9484 case Intrinsic::riscv_seg2_load: 9485 case Intrinsic::riscv_seg3_load: 9486 case Intrinsic::riscv_seg4_load: 9487 case Intrinsic::riscv_seg5_load: 9488 case Intrinsic::riscv_seg6_load: 9489 case Intrinsic::riscv_seg7_load: 9490 case Intrinsic::riscv_seg8_load: { 9491 SDLoc DL(Op); 9492 static const Intrinsic::ID VlsegInts[7] = { 9493 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, 9494 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, 9495 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, 9496 Intrinsic::riscv_vlseg8}; 9497 unsigned NF = Op->getNumValues() - 1; 9498 assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); 9499 MVT XLenVT = Subtarget.getXLenVT(); 9500 MVT VT = Op->getSimpleValueType(0); 9501 MVT ContainerVT = getContainerForFixedLengthVector(VT); 9502 9503 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, 9504 Subtarget); 9505 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT); 9506 auto *Load = cast<MemIntrinsicSDNode>(Op); 9507 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT); 9508 ContainerVTs.push_back(MVT::Other); 9509 SDVTList VTs = DAG.getVTList(ContainerVTs); 9510 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID}; 9511 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT)); 9512 Ops.push_back(Op.getOperand(2)); 9513 Ops.push_back(VL); 9514 SDValue Result = 9515 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 9516 Load->getMemoryVT(), Load->getMemOperand()); 9517 SmallVector<SDValue, 9> Results; 9518 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) 9519 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx), 9520 DAG, Subtarget)); 9521 Results.push_back(Result.getValue(NF)); 9522 return DAG.getMergeValues(Results, DL); 9523 } 9524 case Intrinsic::riscv_sf_vc_v_x_se: 9525 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE); 9526 case Intrinsic::riscv_sf_vc_v_i_se: 9527 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE); 9528 case Intrinsic::riscv_sf_vc_v_xv_se: 9529 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE); 9530 case Intrinsic::riscv_sf_vc_v_iv_se: 9531 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE); 9532 case Intrinsic::riscv_sf_vc_v_vv_se: 9533 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE); 9534 case Intrinsic::riscv_sf_vc_v_fv_se: 9535 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE); 9536 case Intrinsic::riscv_sf_vc_v_xvv_se: 9537 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE); 9538 case Intrinsic::riscv_sf_vc_v_ivv_se: 9539 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE); 9540 case Intrinsic::riscv_sf_vc_v_vvv_se: 9541 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE); 9542 case Intrinsic::riscv_sf_vc_v_fvv_se: 9543 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE); 9544 case Intrinsic::riscv_sf_vc_v_xvw_se: 9545 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE); 9546 case Intrinsic::riscv_sf_vc_v_ivw_se: 9547 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE); 9548 case Intrinsic::riscv_sf_vc_v_vvw_se: 9549 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE); 9550 case Intrinsic::riscv_sf_vc_v_fvw_se: 9551 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE); 9552 } 9553 9554 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 9555 } 9556 9557 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, 9558 SelectionDAG &DAG) const { 9559 unsigned IntNo = Op.getConstantOperandVal(1); 9560 switch (IntNo) { 9561 default: 9562 break; 9563 case Intrinsic::riscv_masked_strided_store: { 9564 SDLoc DL(Op); 9565 MVT XLenVT = Subtarget.getXLenVT(); 9566 9567 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 9568 // the selection of the masked intrinsics doesn't do this for us. 9569 SDValue Mask = Op.getOperand(5); 9570 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 9571 9572 SDValue Val = Op.getOperand(2); 9573 MVT VT = Val.getSimpleValueType(); 9574 MVT ContainerVT = VT; 9575 if (VT.isFixedLengthVector()) { 9576 ContainerVT = getContainerForFixedLengthVector(VT); 9577 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 9578 } 9579 if (!IsUnmasked) { 9580 MVT MaskVT = getMaskTypeFor(ContainerVT); 9581 if (VT.isFixedLengthVector()) 9582 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 9583 } 9584 9585 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 9586 9587 SDValue IntID = DAG.getTargetConstant( 9588 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL, 9589 XLenVT); 9590 9591 auto *Store = cast<MemIntrinsicSDNode>(Op); 9592 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID}; 9593 Ops.push_back(Val); 9594 Ops.push_back(Op.getOperand(3)); // Ptr 9595 Ops.push_back(Op.getOperand(4)); // Stride 9596 if (!IsUnmasked) 9597 Ops.push_back(Mask); 9598 Ops.push_back(VL); 9599 9600 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(), 9601 Ops, Store->getMemoryVT(), 9602 Store->getMemOperand()); 9603 } 9604 case Intrinsic::riscv_seg2_store: 9605 case Intrinsic::riscv_seg3_store: 9606 case Intrinsic::riscv_seg4_store: 9607 case Intrinsic::riscv_seg5_store: 9608 case Intrinsic::riscv_seg6_store: 9609 case Intrinsic::riscv_seg7_store: 9610 case Intrinsic::riscv_seg8_store: { 9611 SDLoc DL(Op); 9612 static const Intrinsic::ID VssegInts[] = { 9613 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, 9614 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, 9615 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, 9616 Intrinsic::riscv_vsseg8}; 9617 // Operands are (chain, int_id, vec*, ptr, vl) 9618 unsigned NF = Op->getNumOperands() - 4; 9619 assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); 9620 MVT XLenVT = Subtarget.getXLenVT(); 9621 MVT VT = Op->getOperand(2).getSimpleValueType(); 9622 MVT ContainerVT = getContainerForFixedLengthVector(VT); 9623 9624 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, 9625 Subtarget); 9626 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT); 9627 SDValue Ptr = Op->getOperand(NF + 2); 9628 9629 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op); 9630 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID}; 9631 for (unsigned i = 0; i < NF; i++) 9632 Ops.push_back(convertToScalableVector( 9633 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget)); 9634 Ops.append({Ptr, VL}); 9635 9636 return DAG.getMemIntrinsicNode( 9637 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops, 9638 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand()); 9639 } 9640 case Intrinsic::riscv_sf_vc_xv_se: 9641 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE); 9642 case Intrinsic::riscv_sf_vc_iv_se: 9643 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE); 9644 case Intrinsic::riscv_sf_vc_vv_se: 9645 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE); 9646 case Intrinsic::riscv_sf_vc_fv_se: 9647 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE); 9648 case Intrinsic::riscv_sf_vc_xvv_se: 9649 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE); 9650 case Intrinsic::riscv_sf_vc_ivv_se: 9651 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE); 9652 case Intrinsic::riscv_sf_vc_vvv_se: 9653 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE); 9654 case Intrinsic::riscv_sf_vc_fvv_se: 9655 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE); 9656 case Intrinsic::riscv_sf_vc_xvw_se: 9657 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE); 9658 case Intrinsic::riscv_sf_vc_ivw_se: 9659 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE); 9660 case Intrinsic::riscv_sf_vc_vvw_se: 9661 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE); 9662 case Intrinsic::riscv_sf_vc_fvw_se: 9663 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE); 9664 } 9665 9666 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 9667 } 9668 9669 static unsigned getRVVReductionOp(unsigned ISDOpcode) { 9670 switch (ISDOpcode) { 9671 default: 9672 llvm_unreachable("Unhandled reduction"); 9673 case ISD::VP_REDUCE_ADD: 9674 case ISD::VECREDUCE_ADD: 9675 return RISCVISD::VECREDUCE_ADD_VL; 9676 case ISD::VP_REDUCE_UMAX: 9677 case ISD::VECREDUCE_UMAX: 9678 return RISCVISD::VECREDUCE_UMAX_VL; 9679 case ISD::VP_REDUCE_SMAX: 9680 case ISD::VECREDUCE_SMAX: 9681 return RISCVISD::VECREDUCE_SMAX_VL; 9682 case ISD::VP_REDUCE_UMIN: 9683 case ISD::VECREDUCE_UMIN: 9684 return RISCVISD::VECREDUCE_UMIN_VL; 9685 case ISD::VP_REDUCE_SMIN: 9686 case ISD::VECREDUCE_SMIN: 9687 return RISCVISD::VECREDUCE_SMIN_VL; 9688 case ISD::VP_REDUCE_AND: 9689 case ISD::VECREDUCE_AND: 9690 return RISCVISD::VECREDUCE_AND_VL; 9691 case ISD::VP_REDUCE_OR: 9692 case ISD::VECREDUCE_OR: 9693 return RISCVISD::VECREDUCE_OR_VL; 9694 case ISD::VP_REDUCE_XOR: 9695 case ISD::VECREDUCE_XOR: 9696 return RISCVISD::VECREDUCE_XOR_VL; 9697 case ISD::VP_REDUCE_FADD: 9698 return RISCVISD::VECREDUCE_FADD_VL; 9699 case ISD::VP_REDUCE_SEQ_FADD: 9700 return RISCVISD::VECREDUCE_SEQ_FADD_VL; 9701 case ISD::VP_REDUCE_FMAX: 9702 case ISD::VP_REDUCE_FMAXIMUM: 9703 return RISCVISD::VECREDUCE_FMAX_VL; 9704 case ISD::VP_REDUCE_FMIN: 9705 case ISD::VP_REDUCE_FMINIMUM: 9706 return RISCVISD::VECREDUCE_FMIN_VL; 9707 } 9708 9709 } 9710 9711 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, 9712 SelectionDAG &DAG, 9713 bool IsVP) const { 9714 SDLoc DL(Op); 9715 SDValue Vec = Op.getOperand(IsVP ? 1 : 0); 9716 MVT VecVT = Vec.getSimpleValueType(); 9717 assert((Op.getOpcode() == ISD::VECREDUCE_AND || 9718 Op.getOpcode() == ISD::VECREDUCE_OR || 9719 Op.getOpcode() == ISD::VECREDUCE_XOR || 9720 Op.getOpcode() == ISD::VP_REDUCE_AND || 9721 Op.getOpcode() == ISD::VP_REDUCE_OR || 9722 Op.getOpcode() == ISD::VP_REDUCE_XOR) && 9723 "Unexpected reduction lowering"); 9724 9725 MVT XLenVT = Subtarget.getXLenVT(); 9726 9727 MVT ContainerVT = VecVT; 9728 if (VecVT.isFixedLengthVector()) { 9729 ContainerVT = getContainerForFixedLengthVector(VecVT); 9730 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 9731 } 9732 9733 SDValue Mask, VL; 9734 if (IsVP) { 9735 Mask = Op.getOperand(2); 9736 VL = Op.getOperand(3); 9737 } else { 9738 std::tie(Mask, VL) = 9739 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 9740 } 9741 9742 unsigned BaseOpc; 9743 ISD::CondCode CC; 9744 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 9745 9746 switch (Op.getOpcode()) { 9747 default: 9748 llvm_unreachable("Unhandled reduction"); 9749 case ISD::VECREDUCE_AND: 9750 case ISD::VP_REDUCE_AND: { 9751 // vcpop ~x == 0 9752 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 9753 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL); 9754 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 9755 CC = ISD::SETEQ; 9756 BaseOpc = ISD::AND; 9757 break; 9758 } 9759 case ISD::VECREDUCE_OR: 9760 case ISD::VP_REDUCE_OR: 9761 // vcpop x != 0 9762 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 9763 CC = ISD::SETNE; 9764 BaseOpc = ISD::OR; 9765 break; 9766 case ISD::VECREDUCE_XOR: 9767 case ISD::VP_REDUCE_XOR: { 9768 // ((vcpop x) & 1) != 0 9769 SDValue One = DAG.getConstant(1, DL, XLenVT); 9770 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 9771 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One); 9772 CC = ISD::SETNE; 9773 BaseOpc = ISD::XOR; 9774 break; 9775 } 9776 } 9777 9778 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC); 9779 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC); 9780 9781 if (!IsVP) 9782 return SetCC; 9783 9784 // Now include the start value in the operation. 9785 // Note that we must return the start value when no elements are operated 9786 // upon. The vcpop instructions we've emitted in each case above will return 9787 // 0 for an inactive vector, and so we've already received the neutral value: 9788 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we 9789 // can simply include the start value. 9790 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0)); 9791 } 9792 9793 static bool isNonZeroAVL(SDValue AVL) { 9794 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL); 9795 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL); 9796 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) || 9797 (ImmAVL && ImmAVL->getZExtValue() >= 1); 9798 } 9799 9800 /// Helper to lower a reduction sequence of the form: 9801 /// scalar = reduce_op vec, scalar_start 9802 static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, 9803 SDValue StartValue, SDValue Vec, SDValue Mask, 9804 SDValue VL, const SDLoc &DL, SelectionDAG &DAG, 9805 const RISCVSubtarget &Subtarget) { 9806 const MVT VecVT = Vec.getSimpleValueType(); 9807 const MVT M1VT = getLMUL1VT(VecVT); 9808 const MVT XLenVT = Subtarget.getXLenVT(); 9809 const bool NonZeroAVL = isNonZeroAVL(VL); 9810 9811 // The reduction needs an LMUL1 input; do the splat at either LMUL1 9812 // or the original VT if fractional. 9813 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT; 9814 // We reuse the VL of the reduction to reduce vsetvli toggles if we can 9815 // prove it is non-zero. For the AVL=0 case, we need the scalar to 9816 // be the result of the reduction operation. 9817 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT); 9818 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, 9819 DAG, Subtarget); 9820 if (M1VT != InnerVT) 9821 InitialValue = 9822 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT), 9823 InitialValue, DAG.getVectorIdxConstant(0, DL)); 9824 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue; 9825 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 9826 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy}; 9827 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops); 9828 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, 9829 DAG.getVectorIdxConstant(0, DL)); 9830 } 9831 9832 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 9833 SelectionDAG &DAG) const { 9834 SDLoc DL(Op); 9835 SDValue Vec = Op.getOperand(0); 9836 EVT VecEVT = Vec.getValueType(); 9837 9838 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); 9839 9840 // Due to ordering in legalize types we may have a vector type that needs to 9841 // be split. Do that manually so we can get down to a legal type. 9842 while (getTypeAction(*DAG.getContext(), VecEVT) == 9843 TargetLowering::TypeSplitVector) { 9844 auto [Lo, Hi] = DAG.SplitVector(Vec, DL); 9845 VecEVT = Lo.getValueType(); 9846 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi); 9847 } 9848 9849 // TODO: The type may need to be widened rather than split. Or widened before 9850 // it can be split. 9851 if (!isTypeLegal(VecEVT)) 9852 return SDValue(); 9853 9854 MVT VecVT = VecEVT.getSimpleVT(); 9855 MVT VecEltVT = VecVT.getVectorElementType(); 9856 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 9857 9858 MVT ContainerVT = VecVT; 9859 if (VecVT.isFixedLengthVector()) { 9860 ContainerVT = getContainerForFixedLengthVector(VecVT); 9861 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 9862 } 9863 9864 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 9865 9866 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); 9867 switch (BaseOpc) { 9868 case ISD::AND: 9869 case ISD::OR: 9870 case ISD::UMAX: 9871 case ISD::UMIN: 9872 case ISD::SMAX: 9873 case ISD::SMIN: 9874 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec, 9875 DAG.getVectorIdxConstant(0, DL)); 9876 } 9877 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec, 9878 Mask, VL, DL, DAG, Subtarget); 9879 } 9880 9881 // Given a reduction op, this function returns the matching reduction opcode, 9882 // the vector SDValue and the scalar SDValue required to lower this to a 9883 // RISCVISD node. 9884 static std::tuple<unsigned, SDValue, SDValue> 9885 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, 9886 const RISCVSubtarget &Subtarget) { 9887 SDLoc DL(Op); 9888 auto Flags = Op->getFlags(); 9889 unsigned Opcode = Op.getOpcode(); 9890 switch (Opcode) { 9891 default: 9892 llvm_unreachable("Unhandled reduction"); 9893 case ISD::VECREDUCE_FADD: { 9894 // Use positive zero if we can. It is cheaper to materialize. 9895 SDValue Zero = 9896 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT); 9897 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero); 9898 } 9899 case ISD::VECREDUCE_SEQ_FADD: 9900 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), 9901 Op.getOperand(0)); 9902 case ISD::VECREDUCE_FMINIMUM: 9903 case ISD::VECREDUCE_FMAXIMUM: 9904 case ISD::VECREDUCE_FMIN: 9905 case ISD::VECREDUCE_FMAX: { 9906 SDValue Front = 9907 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0), 9908 DAG.getVectorIdxConstant(0, DL)); 9909 unsigned RVVOpc = 9910 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM) 9911 ? RISCVISD::VECREDUCE_FMIN_VL 9912 : RISCVISD::VECREDUCE_FMAX_VL; 9913 return std::make_tuple(RVVOpc, Op.getOperand(0), Front); 9914 } 9915 } 9916 } 9917 9918 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 9919 SelectionDAG &DAG) const { 9920 SDLoc DL(Op); 9921 MVT VecEltVT = Op.getSimpleValueType(); 9922 9923 unsigned RVVOpcode; 9924 SDValue VectorVal, ScalarVal; 9925 std::tie(RVVOpcode, VectorVal, ScalarVal) = 9926 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget); 9927 MVT VecVT = VectorVal.getSimpleValueType(); 9928 9929 MVT ContainerVT = VecVT; 9930 if (VecVT.isFixedLengthVector()) { 9931 ContainerVT = getContainerForFixedLengthVector(VecVT); 9932 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); 9933 } 9934 9935 MVT ResVT = Op.getSimpleValueType(); 9936 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 9937 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask, 9938 VL, DL, DAG, Subtarget); 9939 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM && 9940 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM) 9941 return Res; 9942 9943 if (Op->getFlags().hasNoNaNs()) 9944 return Res; 9945 9946 // Force output to NaN if any element is Nan. 9947 SDValue IsNan = 9948 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), 9949 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE), 9950 DAG.getUNDEF(Mask.getValueType()), Mask, VL}); 9951 MVT XLenVT = Subtarget.getXLenVT(); 9952 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL); 9953 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop, 9954 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ); 9955 return DAG.getSelect( 9956 DL, ResVT, NoNaNs, Res, 9957 DAG.getConstantFP(APFloat::getNaN(DAG.EVTToAPFloatSemantics(ResVT)), DL, 9958 ResVT)); 9959 } 9960 9961 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op, 9962 SelectionDAG &DAG) const { 9963 SDLoc DL(Op); 9964 unsigned Opc = Op.getOpcode(); 9965 SDValue Start = Op.getOperand(0); 9966 SDValue Vec = Op.getOperand(1); 9967 EVT VecEVT = Vec.getValueType(); 9968 MVT XLenVT = Subtarget.getXLenVT(); 9969 9970 // TODO: The type may need to be widened rather than split. Or widened before 9971 // it can be split. 9972 if (!isTypeLegal(VecEVT)) 9973 return SDValue(); 9974 9975 MVT VecVT = VecEVT.getSimpleVT(); 9976 unsigned RVVOpcode = getRVVReductionOp(Opc); 9977 9978 if (VecVT.isFixedLengthVector()) { 9979 auto ContainerVT = getContainerForFixedLengthVector(VecVT); 9980 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 9981 } 9982 9983 SDValue VL = Op.getOperand(3); 9984 SDValue Mask = Op.getOperand(2); 9985 SDValue Res = 9986 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0), 9987 Vec, Mask, VL, DL, DAG, Subtarget); 9988 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) || 9989 Op->getFlags().hasNoNaNs()) 9990 return Res; 9991 9992 // Propagate NaNs. 9993 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType()); 9994 // Check if any of the elements in Vec is NaN. 9995 SDValue IsNaN = DAG.getNode( 9996 RISCVISD::SETCC_VL, DL, PredVT, 9997 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL}); 9998 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL); 9999 // Check if the start value is NaN. 10000 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO); 10001 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN); 10002 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop, 10003 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ); 10004 MVT ResVT = Res.getSimpleValueType(); 10005 return DAG.getSelect( 10006 DL, ResVT, NoNaNs, Res, 10007 DAG.getConstantFP(APFloat::getNaN(DAG.EVTToAPFloatSemantics(ResVT)), DL, 10008 ResVT)); 10009 } 10010 10011 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 10012 SelectionDAG &DAG) const { 10013 SDValue Vec = Op.getOperand(0); 10014 SDValue SubVec = Op.getOperand(1); 10015 MVT VecVT = Vec.getSimpleValueType(); 10016 MVT SubVecVT = SubVec.getSimpleValueType(); 10017 10018 SDLoc DL(Op); 10019 MVT XLenVT = Subtarget.getXLenVT(); 10020 unsigned OrigIdx = Op.getConstantOperandVal(2); 10021 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 10022 10023 // We don't have the ability to slide mask vectors up indexed by their i1 10024 // elements; the smallest we can do is i8. Often we are able to bitcast to 10025 // equivalent i8 vectors. Note that when inserting a fixed-length vector 10026 // into a scalable one, we might not necessarily have enough scalable 10027 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 10028 if (SubVecVT.getVectorElementType() == MVT::i1 && 10029 (OrigIdx != 0 || !Vec.isUndef())) { 10030 if (VecVT.getVectorMinNumElements() >= 8 && 10031 SubVecVT.getVectorMinNumElements() >= 8) { 10032 assert(OrigIdx % 8 == 0 && "Invalid index"); 10033 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 10034 SubVecVT.getVectorMinNumElements() % 8 == 0 && 10035 "Unexpected mask vector lowering"); 10036 OrigIdx /= 8; 10037 SubVecVT = 10038 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 10039 SubVecVT.isScalableVector()); 10040 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 10041 VecVT.isScalableVector()); 10042 Vec = DAG.getBitcast(VecVT, Vec); 10043 SubVec = DAG.getBitcast(SubVecVT, SubVec); 10044 } else { 10045 // We can't slide this mask vector up indexed by its i1 elements. 10046 // This poses a problem when we wish to insert a scalable vector which 10047 // can't be re-expressed as a larger type. Just choose the slow path and 10048 // extend to a larger type, then truncate back down. 10049 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 10050 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 10051 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 10052 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); 10053 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, 10054 Op.getOperand(2)); 10055 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); 10056 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); 10057 } 10058 } 10059 10060 // If the subvector vector is a fixed-length type and we don't know VLEN 10061 // exactly, we cannot use subregister manipulation to simplify the codegen; we 10062 // don't know which register of a LMUL group contains the specific subvector 10063 // as we only know the minimum register size. Therefore we must slide the 10064 // vector group up the full amount. 10065 const auto VLen = Subtarget.getRealVLen(); 10066 if (SubVecVT.isFixedLengthVector() && !VLen) { 10067 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector()) 10068 return Op; 10069 MVT ContainerVT = VecVT; 10070 if (VecVT.isFixedLengthVector()) { 10071 ContainerVT = getContainerForFixedLengthVector(VecVT); 10072 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 10073 } 10074 10075 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) { 10076 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 10077 DAG.getUNDEF(ContainerVT), SubVec, 10078 DAG.getVectorIdxConstant(0, DL)); 10079 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); 10080 return DAG.getBitcast(Op.getValueType(), SubVec); 10081 } 10082 10083 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 10084 DAG.getUNDEF(ContainerVT), SubVec, 10085 DAG.getVectorIdxConstant(0, DL)); 10086 SDValue Mask = 10087 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 10088 // Set the vector length to only the number of elements we care about. Note 10089 // that for slideup this includes the offset. 10090 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements(); 10091 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget); 10092 10093 // Use tail agnostic policy if we're inserting over Vec's tail. 10094 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; 10095 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements()) 10096 Policy = RISCVII::TAIL_AGNOSTIC; 10097 10098 // If we're inserting into the lowest elements, use a tail undisturbed 10099 // vmv.v.v. 10100 if (OrigIdx == 0) { 10101 SubVec = 10102 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL); 10103 } else { 10104 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 10105 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec, 10106 SlideupAmt, Mask, VL, Policy); 10107 } 10108 10109 if (VecVT.isFixedLengthVector()) 10110 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); 10111 return DAG.getBitcast(Op.getValueType(), SubVec); 10112 } 10113 10114 MVT ContainerVecVT = VecVT; 10115 if (VecVT.isFixedLengthVector()) { 10116 ContainerVecVT = getContainerForFixedLengthVector(VecVT); 10117 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget); 10118 } 10119 10120 MVT ContainerSubVecVT = SubVecVT; 10121 if (SubVecVT.isFixedLengthVector()) { 10122 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT); 10123 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget); 10124 } 10125 10126 unsigned SubRegIdx; 10127 ElementCount RemIdx; 10128 // insert_subvector scales the index by vscale if the subvector is scalable, 10129 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if 10130 // we have a fixed length subvector, we need to adjust the index by 1/vscale. 10131 if (SubVecVT.isFixedLengthVector()) { 10132 assert(VLen); 10133 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock; 10134 auto Decompose = 10135 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 10136 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI); 10137 SubRegIdx = Decompose.first; 10138 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) + 10139 (OrigIdx % Vscale)); 10140 } else { 10141 auto Decompose = 10142 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 10143 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI); 10144 SubRegIdx = Decompose.first; 10145 RemIdx = ElementCount::getScalable(Decompose.second); 10146 } 10147 10148 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock); 10149 assert(isPowerOf2_64( 10150 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue())); 10151 bool ExactlyVecRegSized = 10152 Subtarget.expandVScale(SubVecVT.getSizeInBits()) 10153 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize)); 10154 10155 // 1. If the Idx has been completely eliminated and this subvector's size is 10156 // a vector register or a multiple thereof, or the surrounding elements are 10157 // undef, then this is a subvector insert which naturally aligns to a vector 10158 // register. These can easily be handled using subregister manipulation. 10159 // 2. If the subvector isn't an exact multiple of a valid register group size, 10160 // then the insertion must preserve the undisturbed elements of the register. 10161 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 10162 // vector type (which resolves to a subregister copy), performing a VSLIDEUP 10163 // to place the subvector within the vector register, and an INSERT_SUBVECTOR 10164 // of that LMUL=1 type back into the larger vector (resolving to another 10165 // subregister operation). See below for how our VSLIDEUP works. We go via a 10166 // LMUL=1 type to avoid allocating a large register group to hold our 10167 // subvector. 10168 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) { 10169 if (SubVecVT.isFixedLengthVector()) { 10170 // We may get NoSubRegister if inserting at index 0 and the subvec 10171 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0 10172 if (SubRegIdx == RISCV::NoSubRegister) { 10173 assert(OrigIdx == 0); 10174 return Op; 10175 } 10176 10177 SDValue Insert = 10178 DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec); 10179 if (VecVT.isFixedLengthVector()) 10180 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget); 10181 return Insert; 10182 } 10183 return Op; 10184 } 10185 10186 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 10187 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 10188 // (in our case undisturbed). This means we can set up a subvector insertion 10189 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 10190 // size of the subvector. 10191 MVT InterSubVT = ContainerVecVT; 10192 SDValue AlignedExtract = Vec; 10193 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue(); 10194 if (SubVecVT.isFixedLengthVector()) 10195 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock; 10196 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) { 10197 InterSubVT = getLMUL1VT(ContainerVecVT); 10198 // Extract a subvector equal to the nearest full vector register type. This 10199 // should resolve to a EXTRACT_SUBREG instruction. 10200 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 10201 DAG.getVectorIdxConstant(AlignedIdx, DL)); 10202 } 10203 10204 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 10205 DAG.getUNDEF(InterSubVT), SubVec, 10206 DAG.getVectorIdxConstant(0, DL)); 10207 10208 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget); 10209 10210 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount(); 10211 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount()); 10212 10213 // Use tail agnostic policy if we're inserting over InterSubVT's tail. 10214 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; 10215 if (Subtarget.expandVScale(EndIndex) == 10216 Subtarget.expandVScale(InterSubVT.getVectorElementCount())) 10217 Policy = RISCVII::TAIL_AGNOSTIC; 10218 10219 // If we're inserting into the lowest elements, use a tail undisturbed 10220 // vmv.v.v. 10221 if (RemIdx.isZero()) { 10222 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract, 10223 SubVec, VL); 10224 } else { 10225 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx); 10226 10227 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 10228 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 10229 10230 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec, 10231 SlideupAmt, Mask, VL, Policy); 10232 } 10233 10234 // If required, insert this subvector back into the correct vector register. 10235 // This should resolve to an INSERT_SUBREG instruction. 10236 if (ContainerVecVT.bitsGT(InterSubVT)) 10237 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec, 10238 DAG.getVectorIdxConstant(AlignedIdx, DL)); 10239 10240 if (VecVT.isFixedLengthVector()) 10241 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); 10242 10243 // We might have bitcast from a mask type: cast back to the original type if 10244 // required. 10245 return DAG.getBitcast(Op.getSimpleValueType(), SubVec); 10246 } 10247 10248 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 10249 SelectionDAG &DAG) const { 10250 SDValue Vec = Op.getOperand(0); 10251 MVT SubVecVT = Op.getSimpleValueType(); 10252 MVT VecVT = Vec.getSimpleValueType(); 10253 10254 SDLoc DL(Op); 10255 MVT XLenVT = Subtarget.getXLenVT(); 10256 unsigned OrigIdx = Op.getConstantOperandVal(1); 10257 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 10258 10259 // We don't have the ability to slide mask vectors down indexed by their i1 10260 // elements; the smallest we can do is i8. Often we are able to bitcast to 10261 // equivalent i8 vectors. Note that when extracting a fixed-length vector 10262 // from a scalable one, we might not necessarily have enough scalable 10263 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 10264 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { 10265 if (VecVT.getVectorMinNumElements() >= 8 && 10266 SubVecVT.getVectorMinNumElements() >= 8) { 10267 assert(OrigIdx % 8 == 0 && "Invalid index"); 10268 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 10269 SubVecVT.getVectorMinNumElements() % 8 == 0 && 10270 "Unexpected mask vector lowering"); 10271 OrigIdx /= 8; 10272 SubVecVT = 10273 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 10274 SubVecVT.isScalableVector()); 10275 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 10276 VecVT.isScalableVector()); 10277 Vec = DAG.getBitcast(VecVT, Vec); 10278 } else { 10279 // We can't slide this mask vector down, indexed by its i1 elements. 10280 // This poses a problem when we wish to extract a scalable vector which 10281 // can't be re-expressed as a larger type. Just choose the slow path and 10282 // extend to a larger type, then truncate back down. 10283 // TODO: We could probably improve this when extracting certain fixed 10284 // from fixed, where we can extract as i8 and shift the correct element 10285 // right to reach the desired subvector? 10286 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 10287 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 10288 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 10289 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, 10290 Op.getOperand(1)); 10291 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); 10292 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); 10293 } 10294 } 10295 10296 // With an index of 0 this is a cast-like subvector, which can be performed 10297 // with subregister operations. 10298 if (OrigIdx == 0) 10299 return Op; 10300 10301 const auto VLen = Subtarget.getRealVLen(); 10302 10303 // If the subvector vector is a fixed-length type and we don't know VLEN 10304 // exactly, we cannot use subregister manipulation to simplify the codegen; we 10305 // don't know which register of a LMUL group contains the specific subvector 10306 // as we only know the minimum register size. Therefore we must slide the 10307 // vector group down the full amount. 10308 if (SubVecVT.isFixedLengthVector() && !VLen) { 10309 MVT ContainerVT = VecVT; 10310 if (VecVT.isFixedLengthVector()) { 10311 ContainerVT = getContainerForFixedLengthVector(VecVT); 10312 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 10313 } 10314 10315 // Shrink down Vec so we're performing the slidedown on a smaller LMUL. 10316 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1; 10317 if (auto ShrunkVT = 10318 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) { 10319 ContainerVT = *ShrunkVT; 10320 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, 10321 DAG.getVectorIdxConstant(0, DL)); 10322 } 10323 10324 SDValue Mask = 10325 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 10326 // Set the vector length to only the number of elements we care about. This 10327 // avoids sliding down elements we're going to discard straight away. 10328 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG, 10329 Subtarget); 10330 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 10331 SDValue Slidedown = 10332 getVSlidedown(DAG, Subtarget, DL, ContainerVT, 10333 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); 10334 // Now we can use a cast-like subvector extract to get the result. 10335 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 10336 DAG.getVectorIdxConstant(0, DL)); 10337 return DAG.getBitcast(Op.getValueType(), Slidedown); 10338 } 10339 10340 if (VecVT.isFixedLengthVector()) { 10341 VecVT = getContainerForFixedLengthVector(VecVT); 10342 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget); 10343 } 10344 10345 MVT ContainerSubVecVT = SubVecVT; 10346 if (SubVecVT.isFixedLengthVector()) 10347 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT); 10348 10349 unsigned SubRegIdx; 10350 ElementCount RemIdx; 10351 // extract_subvector scales the index by vscale if the subvector is scalable, 10352 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if 10353 // we have a fixed length subvector, we need to adjust the index by 1/vscale. 10354 if (SubVecVT.isFixedLengthVector()) { 10355 assert(VLen); 10356 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock; 10357 auto Decompose = 10358 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 10359 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI); 10360 SubRegIdx = Decompose.first; 10361 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) + 10362 (OrigIdx % Vscale)); 10363 } else { 10364 auto Decompose = 10365 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 10366 VecVT, ContainerSubVecVT, OrigIdx, TRI); 10367 SubRegIdx = Decompose.first; 10368 RemIdx = ElementCount::getScalable(Decompose.second); 10369 } 10370 10371 // If the Idx has been completely eliminated then this is a subvector extract 10372 // which naturally aligns to a vector register. These can easily be handled 10373 // using subregister manipulation. 10374 if (RemIdx.isZero()) { 10375 if (SubVecVT.isFixedLengthVector()) { 10376 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec); 10377 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget); 10378 } 10379 return Op; 10380 } 10381 10382 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT 10383 // was > M1 then the index would need to be a multiple of VLMAX, and so would 10384 // divide exactly. 10385 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second || 10386 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1); 10387 10388 // If the vector type is an LMUL-group type, extract a subvector equal to the 10389 // nearest full vector register type. 10390 MVT InterSubVT = VecVT; 10391 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 10392 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and 10393 // we should have successfully decomposed the extract into a subregister. 10394 assert(SubRegIdx != RISCV::NoSubRegister); 10395 InterSubVT = getLMUL1VT(VecVT); 10396 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec); 10397 } 10398 10399 // Slide this vector register down by the desired number of elements in order 10400 // to place the desired subvector starting at element 0. 10401 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx); 10402 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 10403 if (SubVecVT.isFixedLengthVector()) 10404 VL = getVLOp(SubVecVT.getVectorNumElements(), InterSubVT, DL, DAG, 10405 Subtarget); 10406 SDValue Slidedown = 10407 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT), 10408 Vec, SlidedownAmt, Mask, VL); 10409 10410 // Now the vector is in the right position, extract our final subvector. This 10411 // should resolve to a COPY. 10412 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 10413 DAG.getVectorIdxConstant(0, DL)); 10414 10415 // We might have bitcast from a mask type: cast back to the original type if 10416 // required. 10417 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); 10418 } 10419 10420 // Widen a vector's operands to i8, then truncate its results back to the 10421 // original type, typically i1. All operand and result types must be the same. 10422 static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, 10423 SelectionDAG &DAG) { 10424 MVT VT = N.getSimpleValueType(); 10425 MVT WideVT = VT.changeVectorElementType(MVT::i8); 10426 SmallVector<SDValue, 4> WideOps; 10427 for (SDValue Op : N->ops()) { 10428 assert(Op.getSimpleValueType() == VT && 10429 "Operands and result must be same type"); 10430 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op)); 10431 } 10432 10433 unsigned NumVals = N->getNumValues(); 10434 10435 SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>( 10436 NumVals, N.getValueType().changeVectorElementType(MVT::i8))); 10437 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps); 10438 SmallVector<SDValue, 4> TruncVals; 10439 for (unsigned I = 0; I < NumVals; I++) { 10440 TruncVals.push_back( 10441 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I), 10442 DAG.getConstant(0, DL, WideVT), ISD::SETNE)); 10443 } 10444 10445 if (TruncVals.size() > 1) 10446 return DAG.getMergeValues(TruncVals, DL); 10447 return TruncVals.front(); 10448 } 10449 10450 SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, 10451 SelectionDAG &DAG) const { 10452 SDLoc DL(Op); 10453 MVT VecVT = Op.getSimpleValueType(); 10454 10455 assert(VecVT.isScalableVector() && 10456 "vector_interleave on non-scalable vector!"); 10457 10458 // 1 bit element vectors need to be widened to e8 10459 if (VecVT.getVectorElementType() == MVT::i1) 10460 return widenVectorOpsToi8(Op, DL, DAG); 10461 10462 // If the VT is LMUL=8, we need to split and reassemble. 10463 if (VecVT.getSizeInBits().getKnownMinValue() == 10464 (8 * RISCV::RVVBitsPerBlock)) { 10465 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); 10466 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1); 10467 EVT SplitVT = Op0Lo.getValueType(); 10468 10469 SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, 10470 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi); 10471 SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, 10472 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi); 10473 10474 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, 10475 ResLo.getValue(0), ResHi.getValue(0)); 10476 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1), 10477 ResHi.getValue(1)); 10478 return DAG.getMergeValues({Even, Odd}, DL); 10479 } 10480 10481 // Concatenate the two vectors as one vector to deinterleave 10482 MVT ConcatVT = 10483 MVT::getVectorVT(VecVT.getVectorElementType(), 10484 VecVT.getVectorElementCount().multiplyCoefficientBy(2)); 10485 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, 10486 Op.getOperand(0), Op.getOperand(1)); 10487 10488 // We want to operate on all lanes, so get the mask and VL and mask for it 10489 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget); 10490 SDValue Passthru = DAG.getUNDEF(ConcatVT); 10491 10492 // We can deinterleave through vnsrl.wi if the element type is smaller than 10493 // ELEN 10494 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { 10495 SDValue Even = 10496 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG); 10497 SDValue Odd = 10498 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG); 10499 return DAG.getMergeValues({Even, Odd}, DL); 10500 } 10501 10502 // For the indices, use the same SEW to avoid an extra vsetvli 10503 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger(); 10504 // Create a vector of even indices {0, 2, 4, ...} 10505 SDValue EvenIdx = 10506 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2)); 10507 // Create a vector of odd indices {1, 3, 5, ... } 10508 SDValue OddIdx = 10509 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT)); 10510 10511 // Gather the even and odd elements into two separate vectors 10512 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT, 10513 Concat, EvenIdx, Passthru, Mask, VL); 10514 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT, 10515 Concat, OddIdx, Passthru, Mask, VL); 10516 10517 // Extract the result half of the gather for even and odd 10518 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide, 10519 DAG.getVectorIdxConstant(0, DL)); 10520 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide, 10521 DAG.getVectorIdxConstant(0, DL)); 10522 10523 return DAG.getMergeValues({Even, Odd}, DL); 10524 } 10525 10526 SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op, 10527 SelectionDAG &DAG) const { 10528 SDLoc DL(Op); 10529 MVT VecVT = Op.getSimpleValueType(); 10530 10531 assert(VecVT.isScalableVector() && 10532 "vector_interleave on non-scalable vector!"); 10533 10534 // i1 vectors need to be widened to i8 10535 if (VecVT.getVectorElementType() == MVT::i1) 10536 return widenVectorOpsToi8(Op, DL, DAG); 10537 10538 MVT XLenVT = Subtarget.getXLenVT(); 10539 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT); 10540 10541 // If the VT is LMUL=8, we need to split and reassemble. 10542 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) { 10543 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); 10544 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1); 10545 EVT SplitVT = Op0Lo.getValueType(); 10546 10547 SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, 10548 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo); 10549 SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, 10550 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi); 10551 10552 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, 10553 ResLo.getValue(0), ResLo.getValue(1)); 10554 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, 10555 ResHi.getValue(0), ResHi.getValue(1)); 10556 return DAG.getMergeValues({Lo, Hi}, DL); 10557 } 10558 10559 SDValue Interleaved; 10560 10561 // If the element type is smaller than ELEN, then we can interleave with 10562 // vwaddu.vv and vwmaccu.vx 10563 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { 10564 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL, 10565 DAG, Subtarget); 10566 } else { 10567 // Otherwise, fallback to using vrgathere16.vv 10568 MVT ConcatVT = 10569 MVT::getVectorVT(VecVT.getVectorElementType(), 10570 VecVT.getVectorElementCount().multiplyCoefficientBy(2)); 10571 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, 10572 Op.getOperand(0), Op.getOperand(1)); 10573 10574 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16); 10575 10576 // 0 1 2 3 4 5 6 7 ... 10577 SDValue StepVec = DAG.getStepVector(DL, IdxVT); 10578 10579 // 1 1 1 1 1 1 1 1 ... 10580 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT)); 10581 10582 // 1 0 1 0 1 0 1 0 ... 10583 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones); 10584 OddMask = DAG.getSetCC( 10585 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask, 10586 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)), 10587 ISD::CondCode::SETNE); 10588 10589 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG)); 10590 10591 // Build up the index vector for interleaving the concatenated vector 10592 // 0 0 1 1 2 2 3 3 ... 10593 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones); 10594 // 0 n 1 n+1 2 n+2 3 n+3 ... 10595 Idx = 10596 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL); 10597 10598 // Then perform the interleave 10599 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ... 10600 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG); 10601 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT, 10602 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL); 10603 } 10604 10605 // Extract the two halves from the interleaved result 10606 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved, 10607 DAG.getVectorIdxConstant(0, DL)); 10608 SDValue Hi = DAG.getNode( 10609 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved, 10610 DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL)); 10611 10612 return DAG.getMergeValues({Lo, Hi}, DL); 10613 } 10614 10615 // Lower step_vector to the vid instruction. Any non-identity step value must 10616 // be accounted for my manual expansion. 10617 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op, 10618 SelectionDAG &DAG) const { 10619 SDLoc DL(Op); 10620 MVT VT = Op.getSimpleValueType(); 10621 assert(VT.isScalableVector() && "Expected scalable vector"); 10622 MVT XLenVT = Subtarget.getXLenVT(); 10623 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); 10624 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 10625 uint64_t StepValImm = Op.getConstantOperandVal(0); 10626 if (StepValImm != 1) { 10627 if (isPowerOf2_64(StepValImm)) { 10628 SDValue StepVal = 10629 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 10630 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL); 10631 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal); 10632 } else { 10633 SDValue StepVal = lowerScalarSplat( 10634 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), 10635 VL, VT, DL, DAG, Subtarget); 10636 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal); 10637 } 10638 } 10639 return StepVec; 10640 } 10641 10642 // Implement vector_reverse using vrgather.vv with indices determined by 10643 // subtracting the id of each element from (VLMAX-1). This will convert 10644 // the indices like so: 10645 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). 10646 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 10647 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, 10648 SelectionDAG &DAG) const { 10649 SDLoc DL(Op); 10650 MVT VecVT = Op.getSimpleValueType(); 10651 if (VecVT.getVectorElementType() == MVT::i1) { 10652 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 10653 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0)); 10654 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1); 10655 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2); 10656 } 10657 unsigned EltSize = VecVT.getScalarSizeInBits(); 10658 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 10659 unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); 10660 unsigned MaxVLMAX = 10661 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); 10662 10663 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 10664 MVT IntVT = VecVT.changeVectorElementTypeToInteger(); 10665 10666 // If this is SEW=8 and VLMAX is potentially more than 256, we need 10667 // to use vrgatherei16.vv. 10668 // TODO: It's also possible to use vrgatherei16.vv for other types to 10669 // decrease register width for the index calculation. 10670 if (MaxVLMAX > 256 && EltSize == 8) { 10671 // If this is LMUL=8, we have to split before can use vrgatherei16.vv. 10672 // Reverse each half, then reassemble them in reverse order. 10673 // NOTE: It's also possible that after splitting that VLMAX no longer 10674 // requires vrgatherei16.vv. 10675 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 10676 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); 10677 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT); 10678 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 10679 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 10680 // Reassemble the low and high pieces reversed. 10681 // FIXME: This is a CONCAT_VECTORS. 10682 SDValue Res = 10683 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, 10684 DAG.getVectorIdxConstant(0, DL)); 10685 return DAG.getNode( 10686 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, 10687 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); 10688 } 10689 10690 // Just promote the int type to i16 which will double the LMUL. 10691 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); 10692 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 10693 } 10694 10695 MVT XLenVT = Subtarget.getXLenVT(); 10696 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 10697 10698 // Calculate VLMAX-1 for the desired SEW. 10699 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT, 10700 computeVLMax(VecVT, DL, DAG), 10701 DAG.getConstant(1, DL, XLenVT)); 10702 10703 // Splat VLMAX-1 taking care to handle SEW==64 on RV32. 10704 bool IsRV32E64 = 10705 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; 10706 SDValue SplatVL; 10707 if (!IsRV32E64) 10708 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); 10709 else 10710 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT), 10711 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT)); 10712 10713 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); 10714 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, 10715 DAG.getUNDEF(IntVT), Mask, VL); 10716 10717 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, 10718 DAG.getUNDEF(VecVT), Mask, VL); 10719 } 10720 10721 SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op, 10722 SelectionDAG &DAG) const { 10723 SDLoc DL(Op); 10724 SDValue V1 = Op.getOperand(0); 10725 SDValue V2 = Op.getOperand(1); 10726 MVT XLenVT = Subtarget.getXLenVT(); 10727 MVT VecVT = Op.getSimpleValueType(); 10728 10729 SDValue VLMax = computeVLMax(VecVT, DL, DAG); 10730 10731 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue(); 10732 SDValue DownOffset, UpOffset; 10733 if (ImmValue >= 0) { 10734 // The operand is a TargetConstant, we need to rebuild it as a regular 10735 // constant. 10736 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); 10737 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset); 10738 } else { 10739 // The operand is a TargetConstant, we need to rebuild it as a regular 10740 // constant rather than negating the original operand. 10741 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); 10742 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset); 10743 } 10744 10745 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG); 10746 10747 SDValue SlideDown = 10748 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, 10749 DownOffset, TrueMask, UpOffset); 10750 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset, 10751 TrueMask, DAG.getRegister(RISCV::X0, XLenVT), 10752 RISCVII::TAIL_AGNOSTIC); 10753 } 10754 10755 SDValue 10756 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 10757 SelectionDAG &DAG) const { 10758 SDLoc DL(Op); 10759 auto *Load = cast<LoadSDNode>(Op); 10760 10761 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 10762 Load->getMemoryVT(), 10763 *Load->getMemOperand()) && 10764 "Expecting a correctly-aligned load"); 10765 10766 MVT VT = Op.getSimpleValueType(); 10767 MVT XLenVT = Subtarget.getXLenVT(); 10768 MVT ContainerVT = getContainerForFixedLengthVector(VT); 10769 10770 // If we know the exact VLEN and our fixed length vector completely fills 10771 // the container, use a whole register load instead. 10772 const auto [MinVLMAX, MaxVLMAX] = 10773 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); 10774 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && 10775 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) { 10776 MachineMemOperand *MMO = Load->getMemOperand(); 10777 SDValue NewLoad = 10778 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), 10779 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(), 10780 MMO->getAAInfo(), MMO->getRanges()); 10781 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 10782 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); 10783 } 10784 10785 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget); 10786 10787 bool IsMaskOp = VT.getVectorElementType() == MVT::i1; 10788 SDValue IntID = DAG.getTargetConstant( 10789 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT); 10790 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID}; 10791 if (!IsMaskOp) 10792 Ops.push_back(DAG.getUNDEF(ContainerVT)); 10793 Ops.push_back(Load->getBasePtr()); 10794 Ops.push_back(VL); 10795 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 10796 SDValue NewLoad = 10797 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 10798 Load->getMemoryVT(), Load->getMemOperand()); 10799 10800 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 10801 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); 10802 } 10803 10804 SDValue 10805 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 10806 SelectionDAG &DAG) const { 10807 SDLoc DL(Op); 10808 auto *Store = cast<StoreSDNode>(Op); 10809 10810 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 10811 Store->getMemoryVT(), 10812 *Store->getMemOperand()) && 10813 "Expecting a correctly-aligned store"); 10814 10815 SDValue StoreVal = Store->getValue(); 10816 MVT VT = StoreVal.getSimpleValueType(); 10817 MVT XLenVT = Subtarget.getXLenVT(); 10818 10819 // If the size less than a byte, we need to pad with zeros to make a byte. 10820 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) { 10821 VT = MVT::v8i1; 10822 StoreVal = 10823 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT), 10824 StoreVal, DAG.getVectorIdxConstant(0, DL)); 10825 } 10826 10827 MVT ContainerVT = getContainerForFixedLengthVector(VT); 10828 10829 SDValue NewValue = 10830 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); 10831 10832 10833 // If we know the exact VLEN and our fixed length vector completely fills 10834 // the container, use a whole register store instead. 10835 const auto [MinVLMAX, MaxVLMAX] = 10836 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); 10837 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && 10838 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) { 10839 MachineMemOperand *MMO = Store->getMemOperand(); 10840 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(), 10841 MMO->getPointerInfo(), MMO->getBaseAlign(), 10842 MMO->getFlags(), MMO->getAAInfo()); 10843 } 10844 10845 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, 10846 Subtarget); 10847 10848 bool IsMaskOp = VT.getVectorElementType() == MVT::i1; 10849 SDValue IntID = DAG.getTargetConstant( 10850 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT); 10851 return DAG.getMemIntrinsicNode( 10852 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), 10853 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL}, 10854 Store->getMemoryVT(), Store->getMemOperand()); 10855 } 10856 10857 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, 10858 SelectionDAG &DAG) const { 10859 SDLoc DL(Op); 10860 MVT VT = Op.getSimpleValueType(); 10861 10862 const auto *MemSD = cast<MemSDNode>(Op); 10863 EVT MemVT = MemSD->getMemoryVT(); 10864 MachineMemOperand *MMO = MemSD->getMemOperand(); 10865 SDValue Chain = MemSD->getChain(); 10866 SDValue BasePtr = MemSD->getBasePtr(); 10867 10868 SDValue Mask, PassThru, VL; 10869 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) { 10870 Mask = VPLoad->getMask(); 10871 PassThru = DAG.getUNDEF(VT); 10872 VL = VPLoad->getVectorLength(); 10873 } else { 10874 const auto *MLoad = cast<MaskedLoadSDNode>(Op); 10875 Mask = MLoad->getMask(); 10876 PassThru = MLoad->getPassThru(); 10877 } 10878 10879 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 10880 10881 MVT XLenVT = Subtarget.getXLenVT(); 10882 10883 MVT ContainerVT = VT; 10884 if (VT.isFixedLengthVector()) { 10885 ContainerVT = getContainerForFixedLengthVector(VT); 10886 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 10887 if (!IsUnmasked) { 10888 MVT MaskVT = getMaskTypeFor(ContainerVT); 10889 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 10890 } 10891 } 10892 10893 if (!VL) 10894 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 10895 10896 unsigned IntID = 10897 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask; 10898 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 10899 if (IsUnmasked) 10900 Ops.push_back(DAG.getUNDEF(ContainerVT)); 10901 else 10902 Ops.push_back(PassThru); 10903 Ops.push_back(BasePtr); 10904 if (!IsUnmasked) 10905 Ops.push_back(Mask); 10906 Ops.push_back(VL); 10907 if (!IsUnmasked) 10908 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); 10909 10910 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 10911 10912 SDValue Result = 10913 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); 10914 Chain = Result.getValue(1); 10915 10916 if (VT.isFixedLengthVector()) 10917 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 10918 10919 return DAG.getMergeValues({Result, Chain}, DL); 10920 } 10921 10922 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, 10923 SelectionDAG &DAG) const { 10924 SDLoc DL(Op); 10925 10926 const auto *MemSD = cast<MemSDNode>(Op); 10927 EVT MemVT = MemSD->getMemoryVT(); 10928 MachineMemOperand *MMO = MemSD->getMemOperand(); 10929 SDValue Chain = MemSD->getChain(); 10930 SDValue BasePtr = MemSD->getBasePtr(); 10931 SDValue Val, Mask, VL; 10932 10933 bool IsCompressingStore = false; 10934 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) { 10935 Val = VPStore->getValue(); 10936 Mask = VPStore->getMask(); 10937 VL = VPStore->getVectorLength(); 10938 } else { 10939 const auto *MStore = cast<MaskedStoreSDNode>(Op); 10940 Val = MStore->getValue(); 10941 Mask = MStore->getMask(); 10942 IsCompressingStore = MStore->isCompressingStore(); 10943 } 10944 10945 bool IsUnmasked = 10946 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore; 10947 10948 MVT VT = Val.getSimpleValueType(); 10949 MVT XLenVT = Subtarget.getXLenVT(); 10950 10951 MVT ContainerVT = VT; 10952 if (VT.isFixedLengthVector()) { 10953 ContainerVT = getContainerForFixedLengthVector(VT); 10954 10955 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 10956 if (!IsUnmasked || IsCompressingStore) { 10957 MVT MaskVT = getMaskTypeFor(ContainerVT); 10958 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 10959 } 10960 } 10961 10962 if (!VL) 10963 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 10964 10965 if (IsCompressingStore) { 10966 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT, 10967 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT), 10968 DAG.getUNDEF(ContainerVT), Val, Mask, VL); 10969 VL = 10970 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask, 10971 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL); 10972 } 10973 10974 unsigned IntID = 10975 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask; 10976 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 10977 Ops.push_back(Val); 10978 Ops.push_back(BasePtr); 10979 if (!IsUnmasked) 10980 Ops.push_back(Mask); 10981 Ops.push_back(VL); 10982 10983 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, 10984 DAG.getVTList(MVT::Other), Ops, MemVT, MMO); 10985 } 10986 10987 SDValue 10988 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 10989 SelectionDAG &DAG) const { 10990 MVT InVT = Op.getOperand(0).getSimpleValueType(); 10991 MVT ContainerVT = getContainerForFixedLengthVector(InVT); 10992 10993 MVT VT = Op.getSimpleValueType(); 10994 10995 SDValue Op1 = 10996 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 10997 SDValue Op2 = 10998 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 10999 11000 SDLoc DL(Op); 11001 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL, 11002 DAG, Subtarget); 11003 MVT MaskVT = getMaskTypeFor(ContainerVT); 11004 11005 SDValue Cmp = 11006 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, 11007 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL}); 11008 11009 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 11010 } 11011 11012 SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op, 11013 SelectionDAG &DAG) const { 11014 unsigned Opc = Op.getOpcode(); 11015 SDLoc DL(Op); 11016 SDValue Chain = Op.getOperand(0); 11017 SDValue Op1 = Op.getOperand(1); 11018 SDValue Op2 = Op.getOperand(2); 11019 SDValue CC = Op.getOperand(3); 11020 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 11021 MVT VT = Op.getSimpleValueType(); 11022 MVT InVT = Op1.getSimpleValueType(); 11023 11024 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE 11025 // condition code. 11026 if (Opc == ISD::STRICT_FSETCCS) { 11027 // Expand strict_fsetccs(x, oeq) to 11028 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole)) 11029 SDVTList VTList = Op->getVTList(); 11030 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) { 11031 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE); 11032 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1, 11033 Op2, OLECCVal); 11034 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2, 11035 Op1, OLECCVal); 11036 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 11037 Tmp1.getValue(1), Tmp2.getValue(1)); 11038 // Tmp1 and Tmp2 might be the same node. 11039 if (Tmp1 != Tmp2) 11040 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2); 11041 return DAG.getMergeValues({Tmp1, OutChain}, DL); 11042 } 11043 11044 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq)) 11045 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) { 11046 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ); 11047 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1, 11048 Op2, OEQCCVal); 11049 SDValue Res = DAG.getNOT(DL, OEQ, VT); 11050 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL); 11051 } 11052 } 11053 11054 MVT ContainerInVT = InVT; 11055 if (InVT.isFixedLengthVector()) { 11056 ContainerInVT = getContainerForFixedLengthVector(InVT); 11057 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget); 11058 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget); 11059 } 11060 MVT MaskVT = getMaskTypeFor(ContainerInVT); 11061 11062 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget); 11063 11064 SDValue Res; 11065 if (Opc == ISD::STRICT_FSETCC && 11066 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE || 11067 CCVal == ISD::SETOLE)) { 11068 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only 11069 // active when both input elements are ordered. 11070 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG); 11071 SDValue OrderMask1 = DAG.getNode( 11072 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other), 11073 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT), 11074 True, VL}); 11075 SDValue OrderMask2 = DAG.getNode( 11076 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other), 11077 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT), 11078 True, VL}); 11079 Mask = 11080 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL); 11081 // Use Mask as the merge operand to let the result be 0 if either of the 11082 // inputs is unordered. 11083 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL, 11084 DAG.getVTList(MaskVT, MVT::Other), 11085 {Chain, Op1, Op2, CC, Mask, Mask, VL}); 11086 } else { 11087 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL 11088 : RISCVISD::STRICT_FSETCCS_VL; 11089 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other), 11090 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL}); 11091 } 11092 11093 if (VT.isFixedLengthVector()) { 11094 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget); 11095 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL); 11096 } 11097 return Res; 11098 } 11099 11100 // Lower vector ABS to smax(X, sub(0, X)). 11101 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { 11102 SDLoc DL(Op); 11103 MVT VT = Op.getSimpleValueType(); 11104 SDValue X = Op.getOperand(0); 11105 11106 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) && 11107 "Unexpected type for ISD::ABS"); 11108 11109 MVT ContainerVT = VT; 11110 if (VT.isFixedLengthVector()) { 11111 ContainerVT = getContainerForFixedLengthVector(VT); 11112 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 11113 } 11114 11115 SDValue Mask, VL; 11116 if (Op->getOpcode() == ISD::VP_ABS) { 11117 Mask = Op->getOperand(1); 11118 if (VT.isFixedLengthVector()) 11119 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 11120 Subtarget); 11121 VL = Op->getOperand(2); 11122 } else 11123 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 11124 11125 SDValue SplatZero = DAG.getNode( 11126 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 11127 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL); 11128 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, 11129 DAG.getUNDEF(ContainerVT), Mask, VL); 11130 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, 11131 DAG.getUNDEF(ContainerVT), Mask, VL); 11132 11133 if (VT.isFixedLengthVector()) 11134 Max = convertFromScalableVector(VT, Max, DAG, Subtarget); 11135 return Max; 11136 } 11137 11138 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( 11139 SDValue Op, SelectionDAG &DAG) const { 11140 SDLoc DL(Op); 11141 MVT VT = Op.getSimpleValueType(); 11142 SDValue Mag = Op.getOperand(0); 11143 SDValue Sign = Op.getOperand(1); 11144 assert(Mag.getValueType() == Sign.getValueType() && 11145 "Can only handle COPYSIGN with matching types."); 11146 11147 MVT ContainerVT = getContainerForFixedLengthVector(VT); 11148 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget); 11149 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget); 11150 11151 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 11152 11153 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, 11154 Sign, DAG.getUNDEF(ContainerVT), Mask, VL); 11155 11156 return convertFromScalableVector(VT, CopySign, DAG, Subtarget); 11157 } 11158 11159 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 11160 SDValue Op, SelectionDAG &DAG) const { 11161 MVT VT = Op.getSimpleValueType(); 11162 MVT ContainerVT = getContainerForFixedLengthVector(VT); 11163 11164 MVT I1ContainerVT = 11165 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 11166 11167 SDValue CC = 11168 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 11169 SDValue Op1 = 11170 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 11171 SDValue Op2 = 11172 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 11173 11174 SDLoc DL(Op); 11175 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 11176 11177 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1, 11178 Op2, DAG.getUNDEF(ContainerVT), VL); 11179 11180 return convertFromScalableVector(VT, Select, DAG, Subtarget); 11181 } 11182 11183 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, 11184 SelectionDAG &DAG) const { 11185 unsigned NewOpc = getRISCVVLOp(Op); 11186 bool HasMergeOp = hasMergeOp(NewOpc); 11187 bool HasMask = hasMaskOp(NewOpc); 11188 11189 MVT VT = Op.getSimpleValueType(); 11190 MVT ContainerVT = getContainerForFixedLengthVector(VT); 11191 11192 // Create list of operands by converting existing ones to scalable types. 11193 SmallVector<SDValue, 6> Ops; 11194 for (const SDValue &V : Op->op_values()) { 11195 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 11196 11197 // Pass through non-vector operands. 11198 if (!V.getValueType().isVector()) { 11199 Ops.push_back(V); 11200 continue; 11201 } 11202 11203 // "cast" fixed length vector to a scalable vector. 11204 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 11205 "Only fixed length vectors are supported!"); 11206 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 11207 } 11208 11209 SDLoc DL(Op); 11210 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 11211 if (HasMergeOp) 11212 Ops.push_back(DAG.getUNDEF(ContainerVT)); 11213 if (HasMask) 11214 Ops.push_back(Mask); 11215 Ops.push_back(VL); 11216 11217 // StrictFP operations have two result values. Their lowered result should 11218 // have same result count. 11219 if (Op->isStrictFPOpcode()) { 11220 SDValue ScalableRes = 11221 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops, 11222 Op->getFlags()); 11223 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 11224 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL); 11225 } 11226 11227 SDValue ScalableRes = 11228 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags()); 11229 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 11230 } 11231 11232 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node: 11233 // * Operands of each node are assumed to be in the same order. 11234 // * The EVL operand is promoted from i32 to i64 on RV64. 11235 // * Fixed-length vectors are converted to their scalable-vector container 11236 // types. 11237 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const { 11238 unsigned RISCVISDOpc = getRISCVVLOp(Op); 11239 bool HasMergeOp = hasMergeOp(RISCVISDOpc); 11240 11241 SDLoc DL(Op); 11242 MVT VT = Op.getSimpleValueType(); 11243 SmallVector<SDValue, 4> Ops; 11244 11245 MVT ContainerVT = VT; 11246 if (VT.isFixedLengthVector()) 11247 ContainerVT = getContainerForFixedLengthVector(VT); 11248 11249 for (const auto &OpIdx : enumerate(Op->ops())) { 11250 SDValue V = OpIdx.value(); 11251 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 11252 // Add dummy merge value before the mask. Or if there isn't a mask, before 11253 // EVL. 11254 if (HasMergeOp) { 11255 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode()); 11256 if (MaskIdx) { 11257 if (*MaskIdx == OpIdx.index()) 11258 Ops.push_back(DAG.getUNDEF(ContainerVT)); 11259 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == 11260 OpIdx.index()) { 11261 if (Op.getOpcode() == ISD::VP_MERGE) { 11262 // For VP_MERGE, copy the false operand instead of an undef value. 11263 Ops.push_back(Ops.back()); 11264 } else { 11265 assert(Op.getOpcode() == ISD::VP_SELECT); 11266 // For VP_SELECT, add an undef value. 11267 Ops.push_back(DAG.getUNDEF(ContainerVT)); 11268 } 11269 } 11270 } 11271 // Pass through operands which aren't fixed-length vectors. 11272 if (!V.getValueType().isFixedLengthVector()) { 11273 Ops.push_back(V); 11274 continue; 11275 } 11276 // "cast" fixed length vector to a scalable vector. 11277 MVT OpVT = V.getSimpleValueType(); 11278 MVT ContainerVT = getContainerForFixedLengthVector(OpVT); 11279 assert(useRVVForFixedLengthVectorVT(OpVT) && 11280 "Only fixed length vectors are supported!"); 11281 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 11282 } 11283 11284 if (!VT.isFixedLengthVector()) 11285 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags()); 11286 11287 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags()); 11288 11289 return convertFromScalableVector(VT, VPOp, DAG, Subtarget); 11290 } 11291 11292 SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op, 11293 SelectionDAG &DAG) const { 11294 SDLoc DL(Op); 11295 MVT VT = Op.getSimpleValueType(); 11296 11297 SDValue Src = Op.getOperand(0); 11298 // NOTE: Mask is dropped. 11299 SDValue VL = Op.getOperand(2); 11300 11301 MVT ContainerVT = VT; 11302 if (VT.isFixedLengthVector()) { 11303 ContainerVT = getContainerForFixedLengthVector(VT); 11304 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 11305 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); 11306 } 11307 11308 MVT XLenVT = Subtarget.getXLenVT(); 11309 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 11310 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 11311 DAG.getUNDEF(ContainerVT), Zero, VL); 11312 11313 SDValue SplatValue = DAG.getConstant( 11314 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT); 11315 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 11316 DAG.getUNDEF(ContainerVT), SplatValue, VL); 11317 11318 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat, 11319 ZeroSplat, DAG.getUNDEF(ContainerVT), VL); 11320 if (!VT.isFixedLengthVector()) 11321 return Result; 11322 return convertFromScalableVector(VT, Result, DAG, Subtarget); 11323 } 11324 11325 SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op, 11326 SelectionDAG &DAG) const { 11327 SDLoc DL(Op); 11328 MVT VT = Op.getSimpleValueType(); 11329 11330 SDValue Op1 = Op.getOperand(0); 11331 SDValue Op2 = Op.getOperand(1); 11332 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 11333 // NOTE: Mask is dropped. 11334 SDValue VL = Op.getOperand(4); 11335 11336 MVT ContainerVT = VT; 11337 if (VT.isFixedLengthVector()) { 11338 ContainerVT = getContainerForFixedLengthVector(VT); 11339 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 11340 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); 11341 } 11342 11343 SDValue Result; 11344 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 11345 11346 switch (Condition) { 11347 default: 11348 break; 11349 // X != Y --> (X^Y) 11350 case ISD::SETNE: 11351 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); 11352 break; 11353 // X == Y --> ~(X^Y) 11354 case ISD::SETEQ: { 11355 SDValue Temp = 11356 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); 11357 Result = 11358 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL); 11359 break; 11360 } 11361 // X >s Y --> X == 0 & Y == 1 --> ~X & Y 11362 // X <u Y --> X == 0 & Y == 1 --> ~X & Y 11363 case ISD::SETGT: 11364 case ISD::SETULT: { 11365 SDValue Temp = 11366 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); 11367 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL); 11368 break; 11369 } 11370 // X <s Y --> X == 1 & Y == 0 --> ~Y & X 11371 // X >u Y --> X == 1 & Y == 0 --> ~Y & X 11372 case ISD::SETLT: 11373 case ISD::SETUGT: { 11374 SDValue Temp = 11375 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); 11376 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL); 11377 break; 11378 } 11379 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y 11380 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y 11381 case ISD::SETGE: 11382 case ISD::SETULE: { 11383 SDValue Temp = 11384 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); 11385 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL); 11386 break; 11387 } 11388 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X 11389 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X 11390 case ISD::SETLE: 11391 case ISD::SETUGE: { 11392 SDValue Temp = 11393 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); 11394 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL); 11395 break; 11396 } 11397 } 11398 11399 if (!VT.isFixedLengthVector()) 11400 return Result; 11401 return convertFromScalableVector(VT, Result, DAG, Subtarget); 11402 } 11403 11404 // Lower Floating-Point/Integer Type-Convert VP SDNodes 11405 SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, 11406 SelectionDAG &DAG) const { 11407 SDLoc DL(Op); 11408 11409 SDValue Src = Op.getOperand(0); 11410 SDValue Mask = Op.getOperand(1); 11411 SDValue VL = Op.getOperand(2); 11412 unsigned RISCVISDOpc = getRISCVVLOp(Op); 11413 11414 MVT DstVT = Op.getSimpleValueType(); 11415 MVT SrcVT = Src.getSimpleValueType(); 11416 if (DstVT.isFixedLengthVector()) { 11417 DstVT = getContainerForFixedLengthVector(DstVT); 11418 SrcVT = getContainerForFixedLengthVector(SrcVT); 11419 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); 11420 MVT MaskVT = getMaskTypeFor(DstVT); 11421 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11422 } 11423 11424 unsigned DstEltSize = DstVT.getScalarSizeInBits(); 11425 unsigned SrcEltSize = SrcVT.getScalarSizeInBits(); 11426 11427 SDValue Result; 11428 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion. 11429 if (SrcVT.isInteger()) { 11430 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); 11431 11432 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL 11433 ? RISCVISD::VSEXT_VL 11434 : RISCVISD::VZEXT_VL; 11435 11436 // Do we need to do any pre-widening before converting? 11437 if (SrcEltSize == 1) { 11438 MVT IntVT = DstVT.changeVectorElementTypeToInteger(); 11439 MVT XLenVT = Subtarget.getXLenVT(); 11440 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 11441 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, 11442 DAG.getUNDEF(IntVT), Zero, VL); 11443 SDValue One = DAG.getConstant( 11444 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT); 11445 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, 11446 DAG.getUNDEF(IntVT), One, VL); 11447 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat, 11448 ZeroSplat, DAG.getUNDEF(IntVT), VL); 11449 } else if (DstEltSize > (2 * SrcEltSize)) { 11450 // Widen before converting. 11451 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2), 11452 DstVT.getVectorElementCount()); 11453 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL); 11454 } 11455 11456 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); 11457 } else { 11458 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && 11459 "Wrong input/output vector types"); 11460 11461 // Convert f16 to f32 then convert f32 to i64. 11462 if (DstEltSize > (2 * SrcEltSize)) { 11463 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); 11464 MVT InterimFVT = 11465 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); 11466 Src = 11467 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL); 11468 } 11469 11470 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); 11471 } 11472 } else { // Narrowing + Conversion 11473 if (SrcVT.isInteger()) { 11474 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); 11475 // First do a narrowing convert to an FP type half the size, then round 11476 // the FP type to a small FP type if needed. 11477 11478 MVT InterimFVT = DstVT; 11479 if (SrcEltSize > (2 * DstEltSize)) { 11480 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!"); 11481 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); 11482 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); 11483 } 11484 11485 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL); 11486 11487 if (InterimFVT != DstVT) { 11488 Src = Result; 11489 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL); 11490 } 11491 } else { 11492 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && 11493 "Wrong input/output vector types"); 11494 // First do a narrowing conversion to an integer half the size, then 11495 // truncate if needed. 11496 11497 if (DstEltSize == 1) { 11498 // First convert to the same size integer, then convert to mask using 11499 // setcc. 11500 assert(SrcEltSize >= 16 && "Unexpected FP type!"); 11501 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize), 11502 DstVT.getVectorElementCount()); 11503 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); 11504 11505 // Compare the integer result to 0. The integer should be 0 or 1/-1, 11506 // otherwise the conversion was undefined. 11507 MVT XLenVT = Subtarget.getXLenVT(); 11508 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 11509 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT, 11510 DAG.getUNDEF(InterimIVT), SplatZero, VL); 11511 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, 11512 {Result, SplatZero, DAG.getCondCode(ISD::SETNE), 11513 DAG.getUNDEF(DstVT), Mask, VL}); 11514 } else { 11515 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 11516 DstVT.getVectorElementCount()); 11517 11518 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); 11519 11520 while (InterimIVT != DstVT) { 11521 SrcEltSize /= 2; 11522 Src = Result; 11523 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 11524 DstVT.getVectorElementCount()); 11525 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT, 11526 Src, Mask, VL); 11527 } 11528 } 11529 } 11530 } 11531 11532 MVT VT = Op.getSimpleValueType(); 11533 if (!VT.isFixedLengthVector()) 11534 return Result; 11535 return convertFromScalableVector(VT, Result, DAG, Subtarget); 11536 } 11537 11538 SDValue 11539 RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, 11540 SelectionDAG &DAG) const { 11541 SDLoc DL(Op); 11542 11543 SDValue Op1 = Op.getOperand(0); 11544 SDValue Op2 = Op.getOperand(1); 11545 SDValue Offset = Op.getOperand(2); 11546 SDValue Mask = Op.getOperand(3); 11547 SDValue EVL1 = Op.getOperand(4); 11548 SDValue EVL2 = Op.getOperand(5); 11549 11550 const MVT XLenVT = Subtarget.getXLenVT(); 11551 MVT VT = Op.getSimpleValueType(); 11552 MVT ContainerVT = VT; 11553 if (VT.isFixedLengthVector()) { 11554 ContainerVT = getContainerForFixedLengthVector(VT); 11555 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 11556 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); 11557 MVT MaskVT = getMaskTypeFor(ContainerVT); 11558 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11559 } 11560 11561 // EVL1 may need to be extended to XLenVT with RV64LegalI32. 11562 EVL1 = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EVL1); 11563 11564 bool IsMaskVector = VT.getVectorElementType() == MVT::i1; 11565 if (IsMaskVector) { 11566 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8); 11567 11568 // Expand input operands 11569 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 11570 DAG.getUNDEF(ContainerVT), 11571 DAG.getConstant(1, DL, XLenVT), EVL1); 11572 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 11573 DAG.getUNDEF(ContainerVT), 11574 DAG.getConstant(0, DL, XLenVT), EVL1); 11575 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1, 11576 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1); 11577 11578 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 11579 DAG.getUNDEF(ContainerVT), 11580 DAG.getConstant(1, DL, XLenVT), EVL2); 11581 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 11582 DAG.getUNDEF(ContainerVT), 11583 DAG.getConstant(0, DL, XLenVT), EVL2); 11584 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2, 11585 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2); 11586 } 11587 11588 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue(); 11589 SDValue DownOffset, UpOffset; 11590 if (ImmValue >= 0) { 11591 // The operand is a TargetConstant, we need to rebuild it as a regular 11592 // constant. 11593 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); 11594 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset); 11595 } else { 11596 // The operand is a TargetConstant, we need to rebuild it as a regular 11597 // constant rather than negating the original operand. 11598 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); 11599 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset); 11600 } 11601 11602 SDValue SlideDown = 11603 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 11604 Op1, DownOffset, Mask, UpOffset); 11605 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2, 11606 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC); 11607 11608 if (IsMaskVector) { 11609 // Truncate Result back to a mask vector (Result has same EVL as Op2) 11610 Result = DAG.getNode( 11611 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1), 11612 {Result, DAG.getConstant(0, DL, ContainerVT), 11613 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)), 11614 Mask, EVL2}); 11615 } 11616 11617 if (!VT.isFixedLengthVector()) 11618 return Result; 11619 return convertFromScalableVector(VT, Result, DAG, Subtarget); 11620 } 11621 11622 SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op, 11623 SelectionDAG &DAG) const { 11624 SDLoc DL(Op); 11625 SDValue Val = Op.getOperand(0); 11626 SDValue Mask = Op.getOperand(1); 11627 SDValue VL = Op.getOperand(2); 11628 MVT VT = Op.getSimpleValueType(); 11629 11630 MVT ContainerVT = VT; 11631 if (VT.isFixedLengthVector()) { 11632 ContainerVT = getContainerForFixedLengthVector(VT); 11633 MVT MaskVT = getMaskTypeFor(ContainerVT); 11634 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11635 } 11636 11637 SDValue Result = 11638 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget); 11639 11640 if (!VT.isFixedLengthVector()) 11641 return Result; 11642 return convertFromScalableVector(VT, Result, DAG, Subtarget); 11643 } 11644 11645 SDValue 11646 RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, 11647 SelectionDAG &DAG) const { 11648 SDLoc DL(Op); 11649 MVT VT = Op.getSimpleValueType(); 11650 MVT XLenVT = Subtarget.getXLenVT(); 11651 11652 SDValue Op1 = Op.getOperand(0); 11653 SDValue Mask = Op.getOperand(1); 11654 SDValue EVL = Op.getOperand(2); 11655 11656 MVT ContainerVT = VT; 11657 if (VT.isFixedLengthVector()) { 11658 ContainerVT = getContainerForFixedLengthVector(VT); 11659 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 11660 MVT MaskVT = getMaskTypeFor(ContainerVT); 11661 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11662 } 11663 11664 MVT GatherVT = ContainerVT; 11665 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger(); 11666 // Check if we are working with mask vectors 11667 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1; 11668 if (IsMaskVector) { 11669 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8); 11670 11671 // Expand input operand 11672 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, 11673 DAG.getUNDEF(IndicesVT), 11674 DAG.getConstant(1, DL, XLenVT), EVL); 11675 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, 11676 DAG.getUNDEF(IndicesVT), 11677 DAG.getConstant(0, DL, XLenVT), EVL); 11678 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne, 11679 SplatZero, DAG.getUNDEF(IndicesVT), EVL); 11680 } 11681 11682 unsigned EltSize = GatherVT.getScalarSizeInBits(); 11683 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue(); 11684 unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); 11685 unsigned MaxVLMAX = 11686 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); 11687 11688 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 11689 // If this is SEW=8 and VLMAX is unknown or more than 256, we need 11690 // to use vrgatherei16.vv. 11691 // TODO: It's also possible to use vrgatherei16.vv for other types to 11692 // decrease register width for the index calculation. 11693 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 11694 if (MaxVLMAX > 256 && EltSize == 8) { 11695 // If this is LMUL=8, we have to split before using vrgatherei16.vv. 11696 // Split the vector in half and reverse each half using a full register 11697 // reverse. 11698 // Swap the halves and concatenate them. 11699 // Slide the concatenated result by (VLMax - VL). 11700 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 11701 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT); 11702 auto [Lo, Hi] = DAG.SplitVector(Op1, DL); 11703 11704 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 11705 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 11706 11707 // Reassemble the low and high pieces reversed. 11708 // NOTE: this Result is unmasked (because we do not need masks for 11709 // shuffles). If in the future this has to change, we can use a SELECT_VL 11710 // between Result and UNDEF using the mask originally passed to VP_REVERSE 11711 SDValue Result = 11712 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev); 11713 11714 // Slide off any elements from past EVL that were reversed into the low 11715 // elements. 11716 unsigned MinElts = GatherVT.getVectorMinNumElements(); 11717 SDValue VLMax = 11718 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts)); 11719 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL); 11720 11721 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT, 11722 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL); 11723 11724 if (IsMaskVector) { 11725 // Truncate Result back to a mask vector 11726 Result = 11727 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, 11728 {Result, DAG.getConstant(0, DL, GatherVT), 11729 DAG.getCondCode(ISD::SETNE), 11730 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL}); 11731 } 11732 11733 if (!VT.isFixedLengthVector()) 11734 return Result; 11735 return convertFromScalableVector(VT, Result, DAG, Subtarget); 11736 } 11737 11738 // Just promote the int type to i16 which will double the LMUL. 11739 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount()); 11740 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 11741 } 11742 11743 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL); 11744 SDValue VecLen = 11745 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT)); 11746 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, 11747 DAG.getUNDEF(IndicesVT), VecLen, EVL); 11748 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID, 11749 DAG.getUNDEF(IndicesVT), Mask, EVL); 11750 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB, 11751 DAG.getUNDEF(GatherVT), Mask, EVL); 11752 11753 if (IsMaskVector) { 11754 // Truncate Result back to a mask vector 11755 Result = DAG.getNode( 11756 RISCVISD::SETCC_VL, DL, ContainerVT, 11757 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE), 11758 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL}); 11759 } 11760 11761 if (!VT.isFixedLengthVector()) 11762 return Result; 11763 return convertFromScalableVector(VT, Result, DAG, Subtarget); 11764 } 11765 11766 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, 11767 SelectionDAG &DAG) const { 11768 MVT VT = Op.getSimpleValueType(); 11769 if (VT.getVectorElementType() != MVT::i1) 11770 return lowerVPOp(Op, DAG); 11771 11772 // It is safe to drop mask parameter as masked-off elements are undef. 11773 SDValue Op1 = Op->getOperand(0); 11774 SDValue Op2 = Op->getOperand(1); 11775 SDValue VL = Op->getOperand(3); 11776 11777 MVT ContainerVT = VT; 11778 const bool IsFixed = VT.isFixedLengthVector(); 11779 if (IsFixed) { 11780 ContainerVT = getContainerForFixedLengthVector(VT); 11781 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 11782 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); 11783 } 11784 11785 SDLoc DL(Op); 11786 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL); 11787 if (!IsFixed) 11788 return Val; 11789 return convertFromScalableVector(VT, Val, DAG, Subtarget); 11790 } 11791 11792 SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op, 11793 SelectionDAG &DAG) const { 11794 SDLoc DL(Op); 11795 MVT XLenVT = Subtarget.getXLenVT(); 11796 MVT VT = Op.getSimpleValueType(); 11797 MVT ContainerVT = VT; 11798 if (VT.isFixedLengthVector()) 11799 ContainerVT = getContainerForFixedLengthVector(VT); 11800 11801 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 11802 11803 auto *VPNode = cast<VPStridedLoadSDNode>(Op); 11804 // Check if the mask is known to be all ones 11805 SDValue Mask = VPNode->getMask(); 11806 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 11807 11808 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse 11809 : Intrinsic::riscv_vlse_mask, 11810 DL, XLenVT); 11811 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, 11812 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(), 11813 VPNode->getStride()}; 11814 if (!IsUnmasked) { 11815 if (VT.isFixedLengthVector()) { 11816 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); 11817 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11818 } 11819 Ops.push_back(Mask); 11820 } 11821 Ops.push_back(VPNode->getVectorLength()); 11822 if (!IsUnmasked) { 11823 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 11824 Ops.push_back(Policy); 11825 } 11826 11827 SDValue Result = 11828 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 11829 VPNode->getMemoryVT(), VPNode->getMemOperand()); 11830 SDValue Chain = Result.getValue(1); 11831 11832 if (VT.isFixedLengthVector()) 11833 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 11834 11835 return DAG.getMergeValues({Result, Chain}, DL); 11836 } 11837 11838 SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op, 11839 SelectionDAG &DAG) const { 11840 SDLoc DL(Op); 11841 MVT XLenVT = Subtarget.getXLenVT(); 11842 11843 auto *VPNode = cast<VPStridedStoreSDNode>(Op); 11844 SDValue StoreVal = VPNode->getValue(); 11845 MVT VT = StoreVal.getSimpleValueType(); 11846 MVT ContainerVT = VT; 11847 if (VT.isFixedLengthVector()) { 11848 ContainerVT = getContainerForFixedLengthVector(VT); 11849 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); 11850 } 11851 11852 // Check if the mask is known to be all ones 11853 SDValue Mask = VPNode->getMask(); 11854 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 11855 11856 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse 11857 : Intrinsic::riscv_vsse_mask, 11858 DL, XLenVT); 11859 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal, 11860 VPNode->getBasePtr(), VPNode->getStride()}; 11861 if (!IsUnmasked) { 11862 if (VT.isFixedLengthVector()) { 11863 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); 11864 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11865 } 11866 Ops.push_back(Mask); 11867 } 11868 Ops.push_back(VPNode->getVectorLength()); 11869 11870 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(), 11871 Ops, VPNode->getMemoryVT(), 11872 VPNode->getMemOperand()); 11873 } 11874 11875 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be 11876 // matched to a RVV indexed load. The RVV indexed load instructions only 11877 // support the "unsigned unscaled" addressing mode; indices are implicitly 11878 // zero-extended or truncated to XLEN and are treated as byte offsets. Any 11879 // signed or scaled indexing is extended to the XLEN value type and scaled 11880 // accordingly. 11881 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op, 11882 SelectionDAG &DAG) const { 11883 SDLoc DL(Op); 11884 MVT VT = Op.getSimpleValueType(); 11885 11886 const auto *MemSD = cast<MemSDNode>(Op.getNode()); 11887 EVT MemVT = MemSD->getMemoryVT(); 11888 MachineMemOperand *MMO = MemSD->getMemOperand(); 11889 SDValue Chain = MemSD->getChain(); 11890 SDValue BasePtr = MemSD->getBasePtr(); 11891 11892 [[maybe_unused]] ISD::LoadExtType LoadExtType; 11893 SDValue Index, Mask, PassThru, VL; 11894 11895 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) { 11896 Index = VPGN->getIndex(); 11897 Mask = VPGN->getMask(); 11898 PassThru = DAG.getUNDEF(VT); 11899 VL = VPGN->getVectorLength(); 11900 // VP doesn't support extending loads. 11901 LoadExtType = ISD::NON_EXTLOAD; 11902 } else { 11903 // Else it must be a MGATHER. 11904 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode()); 11905 Index = MGN->getIndex(); 11906 Mask = MGN->getMask(); 11907 PassThru = MGN->getPassThru(); 11908 LoadExtType = MGN->getExtensionType(); 11909 } 11910 11911 MVT IndexVT = Index.getSimpleValueType(); 11912 MVT XLenVT = Subtarget.getXLenVT(); 11913 11914 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 11915 "Unexpected VTs!"); 11916 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); 11917 // Targets have to explicitly opt-in for extending vector loads. 11918 assert(LoadExtType == ISD::NON_EXTLOAD && 11919 "Unexpected extending MGATHER/VP_GATHER"); 11920 11921 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 11922 // the selection of the masked intrinsics doesn't do this for us. 11923 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 11924 11925 MVT ContainerVT = VT; 11926 if (VT.isFixedLengthVector()) { 11927 ContainerVT = getContainerForFixedLengthVector(VT); 11928 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 11929 ContainerVT.getVectorElementCount()); 11930 11931 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 11932 11933 if (!IsUnmasked) { 11934 MVT MaskVT = getMaskTypeFor(ContainerVT); 11935 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11936 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 11937 } 11938 } 11939 11940 if (!VL) 11941 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 11942 11943 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { 11944 IndexVT = IndexVT.changeVectorElementType(XLenVT); 11945 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index); 11946 } 11947 11948 unsigned IntID = 11949 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask; 11950 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 11951 if (IsUnmasked) 11952 Ops.push_back(DAG.getUNDEF(ContainerVT)); 11953 else 11954 Ops.push_back(PassThru); 11955 Ops.push_back(BasePtr); 11956 Ops.push_back(Index); 11957 if (!IsUnmasked) 11958 Ops.push_back(Mask); 11959 Ops.push_back(VL); 11960 if (!IsUnmasked) 11961 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); 11962 11963 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 11964 SDValue Result = 11965 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); 11966 Chain = Result.getValue(1); 11967 11968 if (VT.isFixedLengthVector()) 11969 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 11970 11971 return DAG.getMergeValues({Result, Chain}, DL); 11972 } 11973 11974 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be 11975 // matched to a RVV indexed store. The RVV indexed store instructions only 11976 // support the "unsigned unscaled" addressing mode; indices are implicitly 11977 // zero-extended or truncated to XLEN and are treated as byte offsets. Any 11978 // signed or scaled indexing is extended to the XLEN value type and scaled 11979 // accordingly. 11980 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op, 11981 SelectionDAG &DAG) const { 11982 SDLoc DL(Op); 11983 const auto *MemSD = cast<MemSDNode>(Op.getNode()); 11984 EVT MemVT = MemSD->getMemoryVT(); 11985 MachineMemOperand *MMO = MemSD->getMemOperand(); 11986 SDValue Chain = MemSD->getChain(); 11987 SDValue BasePtr = MemSD->getBasePtr(); 11988 11989 [[maybe_unused]] bool IsTruncatingStore = false; 11990 SDValue Index, Mask, Val, VL; 11991 11992 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) { 11993 Index = VPSN->getIndex(); 11994 Mask = VPSN->getMask(); 11995 Val = VPSN->getValue(); 11996 VL = VPSN->getVectorLength(); 11997 // VP doesn't support truncating stores. 11998 IsTruncatingStore = false; 11999 } else { 12000 // Else it must be a MSCATTER. 12001 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode()); 12002 Index = MSN->getIndex(); 12003 Mask = MSN->getMask(); 12004 Val = MSN->getValue(); 12005 IsTruncatingStore = MSN->isTruncatingStore(); 12006 } 12007 12008 MVT VT = Val.getSimpleValueType(); 12009 MVT IndexVT = Index.getSimpleValueType(); 12010 MVT XLenVT = Subtarget.getXLenVT(); 12011 12012 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 12013 "Unexpected VTs!"); 12014 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); 12015 // Targets have to explicitly opt-in for extending vector loads and 12016 // truncating vector stores. 12017 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER"); 12018 12019 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 12020 // the selection of the masked intrinsics doesn't do this for us. 12021 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 12022 12023 MVT ContainerVT = VT; 12024 if (VT.isFixedLengthVector()) { 12025 ContainerVT = getContainerForFixedLengthVector(VT); 12026 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 12027 ContainerVT.getVectorElementCount()); 12028 12029 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 12030 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 12031 12032 if (!IsUnmasked) { 12033 MVT MaskVT = getMaskTypeFor(ContainerVT); 12034 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 12035 } 12036 } 12037 12038 if (!VL) 12039 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 12040 12041 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { 12042 IndexVT = IndexVT.changeVectorElementType(XLenVT); 12043 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index); 12044 } 12045 12046 unsigned IntID = 12047 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask; 12048 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 12049 Ops.push_back(Val); 12050 Ops.push_back(BasePtr); 12051 Ops.push_back(Index); 12052 if (!IsUnmasked) 12053 Ops.push_back(Mask); 12054 Ops.push_back(VL); 12055 12056 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, 12057 DAG.getVTList(MVT::Other), Ops, MemVT, MMO); 12058 } 12059 12060 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op, 12061 SelectionDAG &DAG) const { 12062 const MVT XLenVT = Subtarget.getXLenVT(); 12063 SDLoc DL(Op); 12064 SDValue Chain = Op->getOperand(0); 12065 SDValue SysRegNo = DAG.getTargetConstant( 12066 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); 12067 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other); 12068 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo); 12069 12070 // Encoding used for rounding mode in RISC-V differs from that used in 12071 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a 12072 // table, which consists of a sequence of 4-bit fields, each representing 12073 // corresponding FLT_ROUNDS mode. 12074 static const int Table = 12075 (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) | 12076 (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) | 12077 (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) | 12078 (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) | 12079 (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM); 12080 12081 SDValue Shift = 12082 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT)); 12083 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, 12084 DAG.getConstant(Table, DL, XLenVT), Shift); 12085 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, 12086 DAG.getConstant(7, DL, XLenVT)); 12087 12088 return DAG.getMergeValues({Masked, Chain}, DL); 12089 } 12090 12091 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op, 12092 SelectionDAG &DAG) const { 12093 const MVT XLenVT = Subtarget.getXLenVT(); 12094 SDLoc DL(Op); 12095 SDValue Chain = Op->getOperand(0); 12096 SDValue RMValue = Op->getOperand(1); 12097 SDValue SysRegNo = DAG.getTargetConstant( 12098 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); 12099 12100 // Encoding used for rounding mode in RISC-V differs from that used in 12101 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in 12102 // a table, which consists of a sequence of 4-bit fields, each representing 12103 // corresponding RISC-V mode. 12104 static const unsigned Table = 12105 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) | 12106 (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) | 12107 (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) | 12108 (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) | 12109 (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway)); 12110 12111 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue); 12112 12113 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue, 12114 DAG.getConstant(2, DL, XLenVT)); 12115 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, 12116 DAG.getConstant(Table, DL, XLenVT), Shift); 12117 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, 12118 DAG.getConstant(0x7, DL, XLenVT)); 12119 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo, 12120 RMValue); 12121 } 12122 12123 SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op, 12124 SelectionDAG &DAG) const { 12125 MachineFunction &MF = DAG.getMachineFunction(); 12126 12127 bool isRISCV64 = Subtarget.is64Bit(); 12128 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 12129 12130 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false); 12131 return DAG.getFrameIndex(FI, PtrVT); 12132 } 12133 12134 // Returns the opcode of the target-specific SDNode that implements the 32-bit 12135 // form of the given Opcode. 12136 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 12137 switch (Opcode) { 12138 default: 12139 llvm_unreachable("Unexpected opcode"); 12140 case ISD::SHL: 12141 return RISCVISD::SLLW; 12142 case ISD::SRA: 12143 return RISCVISD::SRAW; 12144 case ISD::SRL: 12145 return RISCVISD::SRLW; 12146 case ISD::SDIV: 12147 return RISCVISD::DIVW; 12148 case ISD::UDIV: 12149 return RISCVISD::DIVUW; 12150 case ISD::UREM: 12151 return RISCVISD::REMUW; 12152 case ISD::ROTL: 12153 return RISCVISD::ROLW; 12154 case ISD::ROTR: 12155 return RISCVISD::RORW; 12156 } 12157 } 12158 12159 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG 12160 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would 12161 // otherwise be promoted to i64, making it difficult to select the 12162 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of 12163 // type i8/i16/i32 is lost. 12164 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 12165 unsigned ExtOpc = ISD::ANY_EXTEND) { 12166 SDLoc DL(N); 12167 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 12168 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 12169 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 12170 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 12171 // ReplaceNodeResults requires we maintain the same type for the return value. 12172 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 12173 } 12174 12175 // Converts the given 32-bit operation to a i64 operation with signed extension 12176 // semantic to reduce the signed extension instructions. 12177 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 12178 SDLoc DL(N); 12179 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 12180 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 12181 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 12182 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 12183 DAG.getValueType(MVT::i32)); 12184 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 12185 } 12186 12187 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 12188 SmallVectorImpl<SDValue> &Results, 12189 SelectionDAG &DAG) const { 12190 SDLoc DL(N); 12191 switch (N->getOpcode()) { 12192 default: 12193 llvm_unreachable("Don't know how to custom type legalize this operation!"); 12194 case ISD::STRICT_FP_TO_SINT: 12195 case ISD::STRICT_FP_TO_UINT: 12196 case ISD::FP_TO_SINT: 12197 case ISD::FP_TO_UINT: { 12198 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 12199 "Unexpected custom legalisation"); 12200 bool IsStrict = N->isStrictFPOpcode(); 12201 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || 12202 N->getOpcode() == ISD::STRICT_FP_TO_SINT; 12203 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 12204 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 12205 TargetLowering::TypeSoftenFloat) { 12206 if (!isTypeLegal(Op0.getValueType())) 12207 return; 12208 if (IsStrict) { 12209 SDValue Chain = N->getOperand(0); 12210 // In absense of Zfh, promote f16 to f32, then convert. 12211 if (Op0.getValueType() == MVT::f16 && 12212 !Subtarget.hasStdExtZfhOrZhinx()) { 12213 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other}, 12214 {Chain, Op0}); 12215 Chain = Op0.getValue(1); 12216 } 12217 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64 12218 : RISCVISD::STRICT_FCVT_WU_RV64; 12219 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); 12220 SDValue Res = DAG.getNode( 12221 Opc, DL, VTs, Chain, Op0, 12222 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); 12223 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12224 Results.push_back(Res.getValue(1)); 12225 return; 12226 } 12227 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then 12228 // convert. 12229 if ((Op0.getValueType() == MVT::f16 && 12230 !Subtarget.hasStdExtZfhOrZhinx()) || 12231 Op0.getValueType() == MVT::bf16) 12232 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); 12233 12234 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 12235 SDValue Res = 12236 DAG.getNode(Opc, DL, MVT::i64, Op0, 12237 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); 12238 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12239 return; 12240 } 12241 // If the FP type needs to be softened, emit a library call using the 'si' 12242 // version. If we left it to default legalization we'd end up with 'di'. If 12243 // the FP type doesn't need to be softened just let generic type 12244 // legalization promote the result type. 12245 RTLIB::Libcall LC; 12246 if (IsSigned) 12247 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 12248 else 12249 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 12250 MakeLibCallOptions CallOptions; 12251 EVT OpVT = Op0.getValueType(); 12252 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 12253 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 12254 SDValue Result; 12255 std::tie(Result, Chain) = 12256 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 12257 Results.push_back(Result); 12258 if (IsStrict) 12259 Results.push_back(Chain); 12260 break; 12261 } 12262 case ISD::LROUND: { 12263 SDValue Op0 = N->getOperand(0); 12264 EVT Op0VT = Op0.getValueType(); 12265 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 12266 TargetLowering::TypeSoftenFloat) { 12267 if (!isTypeLegal(Op0VT)) 12268 return; 12269 12270 // In absense of Zfh, promote f16 to f32, then convert. 12271 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) 12272 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); 12273 12274 SDValue Res = 12275 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0, 12276 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64)); 12277 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12278 return; 12279 } 12280 // If the FP type needs to be softened, emit a library call to lround. We'll 12281 // need to truncate the result. We assume any value that doesn't fit in i32 12282 // is allowed to return an unspecified value. 12283 RTLIB::Libcall LC = 12284 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32; 12285 MakeLibCallOptions CallOptions; 12286 EVT OpVT = Op0.getValueType(); 12287 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true); 12288 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first; 12289 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result); 12290 Results.push_back(Result); 12291 break; 12292 } 12293 case ISD::READCYCLECOUNTER: 12294 case ISD::READSTEADYCOUNTER: { 12295 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only " 12296 "has custom type legalization on riscv32"); 12297 12298 SDValue LoCounter, HiCounter; 12299 MVT XLenVT = Subtarget.getXLenVT(); 12300 if (N->getOpcode() == ISD::READCYCLECOUNTER) { 12301 LoCounter = DAG.getTargetConstant( 12302 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT); 12303 HiCounter = DAG.getTargetConstant( 12304 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT); 12305 } else { 12306 LoCounter = DAG.getTargetConstant( 12307 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT); 12308 HiCounter = DAG.getTargetConstant( 12309 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT); 12310 } 12311 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 12312 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs, 12313 N->getOperand(0), LoCounter, HiCounter); 12314 12315 Results.push_back( 12316 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 12317 Results.push_back(RCW.getValue(2)); 12318 break; 12319 } 12320 case ISD::LOAD: { 12321 if (!ISD::isNON_EXTLoad(N)) 12322 return; 12323 12324 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the 12325 // sext_inreg we emit for ADD/SUB/MUL/SLLI. 12326 LoadSDNode *Ld = cast<LoadSDNode>(N); 12327 12328 SDLoc dl(N); 12329 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(), 12330 Ld->getBasePtr(), Ld->getMemoryVT(), 12331 Ld->getMemOperand()); 12332 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res)); 12333 Results.push_back(Res.getValue(1)); 12334 return; 12335 } 12336 case ISD::MUL: { 12337 unsigned Size = N->getSimpleValueType(0).getSizeInBits(); 12338 unsigned XLen = Subtarget.getXLen(); 12339 // This multiply needs to be expanded, try to use MULHSU+MUL if possible. 12340 if (Size > XLen) { 12341 assert(Size == (XLen * 2) && "Unexpected custom legalisation"); 12342 SDValue LHS = N->getOperand(0); 12343 SDValue RHS = N->getOperand(1); 12344 APInt HighMask = APInt::getHighBitsSet(Size, XLen); 12345 12346 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask); 12347 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask); 12348 // We need exactly one side to be unsigned. 12349 if (LHSIsU == RHSIsU) 12350 return; 12351 12352 auto MakeMULPair = [&](SDValue S, SDValue U) { 12353 MVT XLenVT = Subtarget.getXLenVT(); 12354 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S); 12355 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U); 12356 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U); 12357 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U); 12358 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi); 12359 }; 12360 12361 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen; 12362 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen; 12363 12364 // The other operand should be signed, but still prefer MULH when 12365 // possible. 12366 if (RHSIsU && LHSIsS && !RHSIsS) 12367 Results.push_back(MakeMULPair(LHS, RHS)); 12368 else if (LHSIsU && RHSIsS && !LHSIsS) 12369 Results.push_back(MakeMULPair(RHS, LHS)); 12370 12371 return; 12372 } 12373 [[fallthrough]]; 12374 } 12375 case ISD::ADD: 12376 case ISD::SUB: 12377 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 12378 "Unexpected custom legalisation"); 12379 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 12380 break; 12381 case ISD::SHL: 12382 case ISD::SRA: 12383 case ISD::SRL: 12384 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 12385 "Unexpected custom legalisation"); 12386 if (N->getOperand(1).getOpcode() != ISD::Constant) { 12387 // If we can use a BSET instruction, allow default promotion to apply. 12388 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() && 12389 isOneConstant(N->getOperand(0))) 12390 break; 12391 Results.push_back(customLegalizeToWOp(N, DAG)); 12392 break; 12393 } 12394 12395 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is 12396 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the 12397 // shift amount. 12398 if (N->getOpcode() == ISD::SHL) { 12399 SDLoc DL(N); 12400 SDValue NewOp0 = 12401 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 12402 SDValue NewOp1 = 12403 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1)); 12404 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1); 12405 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 12406 DAG.getValueType(MVT::i32)); 12407 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 12408 } 12409 12410 break; 12411 case ISD::ROTL: 12412 case ISD::ROTR: 12413 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 12414 "Unexpected custom legalisation"); 12415 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || 12416 Subtarget.hasVendorXTHeadBb()) && 12417 "Unexpected custom legalization"); 12418 if (!isa<ConstantSDNode>(N->getOperand(1)) && 12419 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())) 12420 return; 12421 Results.push_back(customLegalizeToWOp(N, DAG)); 12422 break; 12423 case ISD::CTTZ: 12424 case ISD::CTTZ_ZERO_UNDEF: 12425 case ISD::CTLZ: 12426 case ISD::CTLZ_ZERO_UNDEF: { 12427 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 12428 "Unexpected custom legalisation"); 12429 12430 SDValue NewOp0 = 12431 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 12432 bool IsCTZ = 12433 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF; 12434 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW; 12435 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0); 12436 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12437 return; 12438 } 12439 case ISD::SDIV: 12440 case ISD::UDIV: 12441 case ISD::UREM: { 12442 MVT VT = N->getSimpleValueType(0); 12443 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 12444 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 12445 "Unexpected custom legalisation"); 12446 // Don't promote division/remainder by constant since we should expand those 12447 // to multiply by magic constant. 12448 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 12449 if (N->getOperand(1).getOpcode() == ISD::Constant && 12450 !isIntDivCheap(N->getValueType(0), Attr)) 12451 return; 12452 12453 // If the input is i32, use ANY_EXTEND since the W instructions don't read 12454 // the upper 32 bits. For other types we need to sign or zero extend 12455 // based on the opcode. 12456 unsigned ExtOpc = ISD::ANY_EXTEND; 12457 if (VT != MVT::i32) 12458 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 12459 : ISD::ZERO_EXTEND; 12460 12461 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 12462 break; 12463 } 12464 case ISD::SADDO: { 12465 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 12466 "Unexpected custom legalisation"); 12467 12468 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise 12469 // use the default legalization. 12470 if (!isa<ConstantSDNode>(N->getOperand(1))) 12471 return; 12472 12473 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 12474 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 12475 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS); 12476 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 12477 DAG.getValueType(MVT::i32)); 12478 12479 SDValue Zero = DAG.getConstant(0, DL, MVT::i64); 12480 12481 // For an addition, the result should be less than one of the operands (LHS) 12482 // if and only if the other operand (RHS) is negative, otherwise there will 12483 // be overflow. 12484 // For a subtraction, the result should be less than one of the operands 12485 // (LHS) if and only if the other operand (RHS) is (non-zero) positive, 12486 // otherwise there will be overflow. 12487 EVT OType = N->getValueType(1); 12488 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT); 12489 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT); 12490 12491 SDValue Overflow = 12492 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS); 12493 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12494 Results.push_back(Overflow); 12495 return; 12496 } 12497 case ISD::UADDO: 12498 case ISD::USUBO: { 12499 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 12500 "Unexpected custom legalisation"); 12501 bool IsAdd = N->getOpcode() == ISD::UADDO; 12502 // Create an ADDW or SUBW. 12503 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 12504 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 12505 SDValue Res = 12506 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); 12507 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 12508 DAG.getValueType(MVT::i32)); 12509 12510 SDValue Overflow; 12511 if (IsAdd && isOneConstant(RHS)) { 12512 // Special case uaddo X, 1 overflowed if the addition result is 0. 12513 // The general case (X + C) < C is not necessarily beneficial. Although we 12514 // reduce the live range of X, we may introduce the materialization of 12515 // constant C, especially when the setcc result is used by branch. We have 12516 // no compare with constant and branch instructions. 12517 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, 12518 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ); 12519 } else if (IsAdd && isAllOnesConstant(RHS)) { 12520 // Special case uaddo X, -1 overflowed if X != 0. 12521 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0), 12522 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE); 12523 } else { 12524 // Sign extend the LHS and perform an unsigned compare with the ADDW 12525 // result. Since the inputs are sign extended from i32, this is equivalent 12526 // to comparing the lower 32 bits. 12527 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 12528 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS, 12529 IsAdd ? ISD::SETULT : ISD::SETUGT); 12530 } 12531 12532 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12533 Results.push_back(Overflow); 12534 return; 12535 } 12536 case ISD::UADDSAT: 12537 case ISD::USUBSAT: { 12538 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 12539 "Unexpected custom legalisation"); 12540 if (Subtarget.hasStdExtZbb()) { 12541 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using 12542 // sign extend allows overflow of the lower 32 bits to be detected on 12543 // the promoted size. 12544 SDValue LHS = 12545 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 12546 SDValue RHS = 12547 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 12548 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS); 12549 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12550 return; 12551 } 12552 12553 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom 12554 // promotion for UADDO/USUBO. 12555 Results.push_back(expandAddSubSat(N, DAG)); 12556 return; 12557 } 12558 case ISD::SADDSAT: 12559 case ISD::SSUBSAT: { 12560 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 12561 "Unexpected custom legalisation"); 12562 Results.push_back(expandAddSubSat(N, DAG)); 12563 return; 12564 } 12565 case ISD::ABS: { 12566 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 12567 "Unexpected custom legalisation"); 12568 12569 if (Subtarget.hasStdExtZbb()) { 12570 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel. 12571 // This allows us to remember that the result is sign extended. Expanding 12572 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits. 12573 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, 12574 N->getOperand(0)); 12575 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src); 12576 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs)); 12577 return; 12578 } 12579 12580 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y) 12581 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 12582 12583 // Freeze the source so we can increase it's use count. 12584 Src = DAG.getFreeze(Src); 12585 12586 // Copy sign bit to all bits using the sraiw pattern. 12587 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src, 12588 DAG.getValueType(MVT::i32)); 12589 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill, 12590 DAG.getConstant(31, DL, MVT::i64)); 12591 12592 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill); 12593 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill); 12594 12595 // NOTE: The result is only required to be anyextended, but sext is 12596 // consistent with type legalization of sub. 12597 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes, 12598 DAG.getValueType(MVT::i32)); 12599 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 12600 return; 12601 } 12602 case ISD::BITCAST: { 12603 EVT VT = N->getValueType(0); 12604 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!"); 12605 SDValue Op0 = N->getOperand(0); 12606 EVT Op0VT = Op0.getValueType(); 12607 MVT XLenVT = Subtarget.getXLenVT(); 12608 if (VT == MVT::i16 && Op0VT == MVT::f16 && 12609 Subtarget.hasStdExtZfhminOrZhinxmin()) { 12610 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); 12611 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 12612 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 && 12613 Subtarget.hasStdExtZfbfmin()) { 12614 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); 12615 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 12616 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() && 12617 Subtarget.hasStdExtFOrZfinx()) { 12618 SDValue FPConv = 12619 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 12620 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 12621 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() && 12622 Subtarget.hasStdExtDOrZdinx()) { 12623 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL, 12624 DAG.getVTList(MVT::i32, MVT::i32), Op0); 12625 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, 12626 NewReg.getValue(0), NewReg.getValue(1)); 12627 Results.push_back(RetReg); 12628 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() && 12629 isTypeLegal(Op0VT)) { 12630 // Custom-legalize bitcasts from fixed-length vector types to illegal 12631 // scalar types in order to improve codegen. Bitcast the vector to a 12632 // one-element vector type whose element type is the same as the result 12633 // type, and extract the first element. 12634 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); 12635 if (isTypeLegal(BVT)) { 12636 SDValue BVec = DAG.getBitcast(BVT, Op0); 12637 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 12638 DAG.getVectorIdxConstant(0, DL))); 12639 } 12640 } 12641 break; 12642 } 12643 case RISCVISD::BREV8: 12644 case RISCVISD::ORC_B: { 12645 MVT VT = N->getSimpleValueType(0); 12646 MVT XLenVT = Subtarget.getXLenVT(); 12647 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) && 12648 "Unexpected custom legalisation"); 12649 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) || 12650 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) && 12651 "Unexpected extension"); 12652 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0)); 12653 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp); 12654 // ReplaceNodeResults requires we maintain the same type for the return 12655 // value. 12656 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes)); 12657 break; 12658 } 12659 case ISD::EXTRACT_VECTOR_ELT: { 12660 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 12661 // type is illegal (currently only vXi64 RV32). 12662 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 12663 // transferred to the destination register. We issue two of these from the 12664 // upper- and lower- halves of the SEW-bit vector element, slid down to the 12665 // first element. 12666 SDValue Vec = N->getOperand(0); 12667 SDValue Idx = N->getOperand(1); 12668 12669 // The vector type hasn't been legalized yet so we can't issue target 12670 // specific nodes if it needs legalization. 12671 // FIXME: We would manually legalize if it's important. 12672 if (!isTypeLegal(Vec.getValueType())) 12673 return; 12674 12675 MVT VecVT = Vec.getSimpleValueType(); 12676 12677 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 12678 VecVT.getVectorElementType() == MVT::i64 && 12679 "Unexpected EXTRACT_VECTOR_ELT legalization"); 12680 12681 // If this is a fixed vector, we need to convert it to a scalable vector. 12682 MVT ContainerVT = VecVT; 12683 if (VecVT.isFixedLengthVector()) { 12684 ContainerVT = getContainerForFixedLengthVector(VecVT); 12685 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 12686 } 12687 12688 MVT XLenVT = Subtarget.getXLenVT(); 12689 12690 // Use a VL of 1 to avoid processing more elements than we need. 12691 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget); 12692 12693 // Unless the index is known to be 0, we must slide the vector down to get 12694 // the desired element into index 0. 12695 if (!isNullConstant(Idx)) { 12696 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, 12697 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 12698 } 12699 12700 // Extract the lower XLEN bits of the correct vector element. 12701 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 12702 12703 // To extract the upper XLEN bits of the vector element, shift the first 12704 // element right by 32 bits and re-extract the lower XLEN bits. 12705 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 12706 DAG.getUNDEF(ContainerVT), 12707 DAG.getConstant(32, DL, XLenVT), VL); 12708 SDValue LShr32 = 12709 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV, 12710 DAG.getUNDEF(ContainerVT), Mask, VL); 12711 12712 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 12713 12714 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 12715 break; 12716 } 12717 case ISD::INTRINSIC_WO_CHAIN: { 12718 unsigned IntNo = N->getConstantOperandVal(0); 12719 switch (IntNo) { 12720 default: 12721 llvm_unreachable( 12722 "Don't know how to custom type legalize this intrinsic!"); 12723 case Intrinsic::experimental_get_vector_length: { 12724 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget); 12725 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12726 return; 12727 } 12728 case Intrinsic::experimental_cttz_elts: { 12729 SDValue Res = lowerCttzElts(N, DAG, Subtarget); 12730 Results.push_back( 12731 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res)); 12732 return; 12733 } 12734 case Intrinsic::riscv_orc_b: 12735 case Intrinsic::riscv_brev8: 12736 case Intrinsic::riscv_sha256sig0: 12737 case Intrinsic::riscv_sha256sig1: 12738 case Intrinsic::riscv_sha256sum0: 12739 case Intrinsic::riscv_sha256sum1: 12740 case Intrinsic::riscv_sm3p0: 12741 case Intrinsic::riscv_sm3p1: { 12742 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 12743 return; 12744 unsigned Opc; 12745 switch (IntNo) { 12746 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; 12747 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; 12748 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; 12749 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; 12750 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; 12751 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; 12752 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; 12753 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; 12754 } 12755 12756 SDValue NewOp = 12757 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 12758 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp); 12759 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12760 return; 12761 } 12762 case Intrinsic::riscv_sm4ks: 12763 case Intrinsic::riscv_sm4ed: { 12764 unsigned Opc = 12765 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; 12766 SDValue NewOp0 = 12767 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 12768 SDValue NewOp1 = 12769 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 12770 SDValue Res = 12771 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3)); 12772 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12773 return; 12774 } 12775 case Intrinsic::riscv_mopr: { 12776 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 12777 return; 12778 SDValue NewOp = 12779 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 12780 SDValue Res = DAG.getNode( 12781 RISCVISD::MOPR, DL, MVT::i64, NewOp, 12782 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64)); 12783 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12784 return; 12785 } 12786 case Intrinsic::riscv_moprr: { 12787 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 12788 return; 12789 SDValue NewOp0 = 12790 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 12791 SDValue NewOp1 = 12792 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 12793 SDValue Res = DAG.getNode( 12794 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1, 12795 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64)); 12796 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12797 return; 12798 } 12799 case Intrinsic::riscv_clmul: { 12800 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 12801 return; 12802 12803 SDValue NewOp0 = 12804 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 12805 SDValue NewOp1 = 12806 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 12807 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1); 12808 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12809 return; 12810 } 12811 case Intrinsic::riscv_clmulh: 12812 case Intrinsic::riscv_clmulr: { 12813 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 12814 return; 12815 12816 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros 12817 // to the full 128-bit clmul result of multiplying two xlen values. 12818 // Perform clmulr or clmulh on the shifted values. Finally, extract the 12819 // upper 32 bits. 12820 // 12821 // The alternative is to mask the inputs to 32 bits and use clmul, but 12822 // that requires two shifts to mask each input without zext.w. 12823 // FIXME: If the inputs are known zero extended or could be freely 12824 // zero extended, the mask form would be better. 12825 SDValue NewOp0 = 12826 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 12827 SDValue NewOp1 = 12828 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 12829 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, 12830 DAG.getConstant(32, DL, MVT::i64)); 12831 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1, 12832 DAG.getConstant(32, DL, MVT::i64)); 12833 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH 12834 : RISCVISD::CLMULR; 12835 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1); 12836 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res, 12837 DAG.getConstant(32, DL, MVT::i64)); 12838 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 12839 return; 12840 } 12841 case Intrinsic::riscv_vmv_x_s: { 12842 EVT VT = N->getValueType(0); 12843 MVT XLenVT = Subtarget.getXLenVT(); 12844 if (VT.bitsLT(XLenVT)) { 12845 // Simple case just extract using vmv.x.s and truncate. 12846 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 12847 Subtarget.getXLenVT(), N->getOperand(1)); 12848 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 12849 return; 12850 } 12851 12852 assert(VT == MVT::i64 && !Subtarget.is64Bit() && 12853 "Unexpected custom legalization"); 12854 12855 // We need to do the move in two steps. 12856 SDValue Vec = N->getOperand(1); 12857 MVT VecVT = Vec.getSimpleValueType(); 12858 12859 // First extract the lower XLEN bits of the element. 12860 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 12861 12862 // To extract the upper XLEN bits of the vector element, shift the first 12863 // element right by 32 bits and re-extract the lower XLEN bits. 12864 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget); 12865 12866 SDValue ThirtyTwoV = 12867 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), 12868 DAG.getConstant(32, DL, XLenVT), VL); 12869 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, 12870 DAG.getUNDEF(VecVT), Mask, VL); 12871 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 12872 12873 Results.push_back( 12874 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 12875 break; 12876 } 12877 } 12878 break; 12879 } 12880 case ISD::VECREDUCE_ADD: 12881 case ISD::VECREDUCE_AND: 12882 case ISD::VECREDUCE_OR: 12883 case ISD::VECREDUCE_XOR: 12884 case ISD::VECREDUCE_SMAX: 12885 case ISD::VECREDUCE_UMAX: 12886 case ISD::VECREDUCE_SMIN: 12887 case ISD::VECREDUCE_UMIN: 12888 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) 12889 Results.push_back(V); 12890 break; 12891 case ISD::VP_REDUCE_ADD: 12892 case ISD::VP_REDUCE_AND: 12893 case ISD::VP_REDUCE_OR: 12894 case ISD::VP_REDUCE_XOR: 12895 case ISD::VP_REDUCE_SMAX: 12896 case ISD::VP_REDUCE_UMAX: 12897 case ISD::VP_REDUCE_SMIN: 12898 case ISD::VP_REDUCE_UMIN: 12899 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG)) 12900 Results.push_back(V); 12901 break; 12902 case ISD::GET_ROUNDING: { 12903 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other); 12904 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0)); 12905 Results.push_back(Res.getValue(0)); 12906 Results.push_back(Res.getValue(1)); 12907 break; 12908 } 12909 } 12910 } 12911 12912 /// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP 12913 /// which corresponds to it. 12914 static unsigned getVecReduceOpcode(unsigned Opc) { 12915 switch (Opc) { 12916 default: 12917 llvm_unreachable("Unhandled binary to transfrom reduction"); 12918 case ISD::ADD: 12919 return ISD::VECREDUCE_ADD; 12920 case ISD::UMAX: 12921 return ISD::VECREDUCE_UMAX; 12922 case ISD::SMAX: 12923 return ISD::VECREDUCE_SMAX; 12924 case ISD::UMIN: 12925 return ISD::VECREDUCE_UMIN; 12926 case ISD::SMIN: 12927 return ISD::VECREDUCE_SMIN; 12928 case ISD::AND: 12929 return ISD::VECREDUCE_AND; 12930 case ISD::OR: 12931 return ISD::VECREDUCE_OR; 12932 case ISD::XOR: 12933 return ISD::VECREDUCE_XOR; 12934 case ISD::FADD: 12935 // Note: This is the associative form of the generic reduction opcode. 12936 return ISD::VECREDUCE_FADD; 12937 } 12938 } 12939 12940 /// Perform two related transforms whose purpose is to incrementally recognize 12941 /// an explode_vector followed by scalar reduction as a vector reduction node. 12942 /// This exists to recover from a deficiency in SLP which can't handle 12943 /// forests with multiple roots sharing common nodes. In some cases, one 12944 /// of the trees will be vectorized, and the other will remain (unprofitably) 12945 /// scalarized. 12946 static SDValue 12947 combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, 12948 const RISCVSubtarget &Subtarget) { 12949 12950 // This transforms need to run before all integer types have been legalized 12951 // to i64 (so that the vector element type matches the add type), and while 12952 // it's safe to introduce odd sized vector types. 12953 if (DAG.NewNodesMustHaveLegalTypes) 12954 return SDValue(); 12955 12956 // Without V, this transform isn't useful. We could form the (illegal) 12957 // operations and let them be scalarized again, but there's really no point. 12958 if (!Subtarget.hasVInstructions()) 12959 return SDValue(); 12960 12961 const SDLoc DL(N); 12962 const EVT VT = N->getValueType(0); 12963 const unsigned Opc = N->getOpcode(); 12964 12965 // For FADD, we only handle the case with reassociation allowed. We 12966 // could handle strict reduction order, but at the moment, there's no 12967 // known reason to, and the complexity isn't worth it. 12968 // TODO: Handle fminnum and fmaxnum here 12969 if (!VT.isInteger() && 12970 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation())) 12971 return SDValue(); 12972 12973 const unsigned ReduceOpc = getVecReduceOpcode(Opc); 12974 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) && 12975 "Inconsistent mappings"); 12976 SDValue LHS = N->getOperand(0); 12977 SDValue RHS = N->getOperand(1); 12978 12979 if (!LHS.hasOneUse() || !RHS.hasOneUse()) 12980 return SDValue(); 12981 12982 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 12983 std::swap(LHS, RHS); 12984 12985 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 12986 !isa<ConstantSDNode>(RHS.getOperand(1))) 12987 return SDValue(); 12988 12989 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue(); 12990 SDValue SrcVec = RHS.getOperand(0); 12991 EVT SrcVecVT = SrcVec.getValueType(); 12992 assert(SrcVecVT.getVectorElementType() == VT); 12993 if (SrcVecVT.isScalableVector()) 12994 return SDValue(); 12995 12996 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen()) 12997 return SDValue(); 12998 12999 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to 13000 // reduce_op (extract_subvector [2 x VT] from V). This will form the 13001 // root of our reduction tree. TODO: We could extend this to any two 13002 // adjacent aligned constant indices if desired. 13003 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 13004 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) { 13005 uint64_t LHSIdx = 13006 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue(); 13007 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) { 13008 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2); 13009 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec, 13010 DAG.getVectorIdxConstant(0, DL)); 13011 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags()); 13012 } 13013 } 13014 13015 // Match (binop (reduce (extract_subvector V, 0), 13016 // (extract_vector_elt V, sizeof(SubVec)))) 13017 // into a reduction of one more element from the original vector V. 13018 if (LHS.getOpcode() != ReduceOpc) 13019 return SDValue(); 13020 13021 SDValue ReduceVec = LHS.getOperand(0); 13022 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && 13023 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) && 13024 isNullConstant(ReduceVec.getOperand(1)) && 13025 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) { 13026 // For illegal types (e.g. 3xi32), most will be combined again into a 13027 // wider (hopefully legal) type. If this is a terminal state, we are 13028 // relying on type legalization here to produce something reasonable 13029 // and this lowering quality could probably be improved. (TODO) 13030 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1); 13031 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec, 13032 DAG.getVectorIdxConstant(0, DL)); 13033 auto Flags = ReduceVec->getFlags(); 13034 Flags.intersectWith(N->getFlags()); 13035 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags); 13036 } 13037 13038 return SDValue(); 13039 } 13040 13041 13042 // Try to fold (<bop> x, (reduction.<bop> vec, start)) 13043 static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, 13044 const RISCVSubtarget &Subtarget) { 13045 auto BinOpToRVVReduce = [](unsigned Opc) { 13046 switch (Opc) { 13047 default: 13048 llvm_unreachable("Unhandled binary to transfrom reduction"); 13049 case ISD::ADD: 13050 return RISCVISD::VECREDUCE_ADD_VL; 13051 case ISD::UMAX: 13052 return RISCVISD::VECREDUCE_UMAX_VL; 13053 case ISD::SMAX: 13054 return RISCVISD::VECREDUCE_SMAX_VL; 13055 case ISD::UMIN: 13056 return RISCVISD::VECREDUCE_UMIN_VL; 13057 case ISD::SMIN: 13058 return RISCVISD::VECREDUCE_SMIN_VL; 13059 case ISD::AND: 13060 return RISCVISD::VECREDUCE_AND_VL; 13061 case ISD::OR: 13062 return RISCVISD::VECREDUCE_OR_VL; 13063 case ISD::XOR: 13064 return RISCVISD::VECREDUCE_XOR_VL; 13065 case ISD::FADD: 13066 return RISCVISD::VECREDUCE_FADD_VL; 13067 case ISD::FMAXNUM: 13068 return RISCVISD::VECREDUCE_FMAX_VL; 13069 case ISD::FMINNUM: 13070 return RISCVISD::VECREDUCE_FMIN_VL; 13071 } 13072 }; 13073 13074 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) { 13075 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 13076 isNullConstant(V.getOperand(1)) && 13077 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc); 13078 }; 13079 13080 unsigned Opc = N->getOpcode(); 13081 unsigned ReduceIdx; 13082 if (IsReduction(N->getOperand(0), Opc)) 13083 ReduceIdx = 0; 13084 else if (IsReduction(N->getOperand(1), Opc)) 13085 ReduceIdx = 1; 13086 else 13087 return SDValue(); 13088 13089 // Skip if FADD disallows reassociation but the combiner needs. 13090 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation()) 13091 return SDValue(); 13092 13093 SDValue Extract = N->getOperand(ReduceIdx); 13094 SDValue Reduce = Extract.getOperand(0); 13095 if (!Extract.hasOneUse() || !Reduce.hasOneUse()) 13096 return SDValue(); 13097 13098 SDValue ScalarV = Reduce.getOperand(2); 13099 EVT ScalarVT = ScalarV.getValueType(); 13100 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR && 13101 ScalarV.getOperand(0)->isUndef() && 13102 isNullConstant(ScalarV.getOperand(2))) 13103 ScalarV = ScalarV.getOperand(1); 13104 13105 // Make sure that ScalarV is a splat with VL=1. 13106 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL && 13107 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL && 13108 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL) 13109 return SDValue(); 13110 13111 if (!isNonZeroAVL(ScalarV.getOperand(2))) 13112 return SDValue(); 13113 13114 // Check the scalar of ScalarV is neutral element 13115 // TODO: Deal with value other than neutral element. 13116 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1), 13117 0)) 13118 return SDValue(); 13119 13120 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold. 13121 // FIXME: We might be able to improve this if operand 0 is undef. 13122 if (!isNonZeroAVL(Reduce.getOperand(5))) 13123 return SDValue(); 13124 13125 SDValue NewStart = N->getOperand(1 - ReduceIdx); 13126 13127 SDLoc DL(N); 13128 SDValue NewScalarV = 13129 lowerScalarInsert(NewStart, ScalarV.getOperand(2), 13130 ScalarV.getSimpleValueType(), DL, DAG, Subtarget); 13131 13132 // If we looked through an INSERT_SUBVECTOR we need to restore it. 13133 if (ScalarVT != ScalarV.getValueType()) 13134 NewScalarV = 13135 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT), 13136 NewScalarV, DAG.getVectorIdxConstant(0, DL)); 13137 13138 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1), 13139 NewScalarV, Reduce.getOperand(3), 13140 Reduce.getOperand(4), Reduce.getOperand(5)}; 13141 SDValue NewReduce = 13142 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops); 13143 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce, 13144 Extract.getOperand(1)); 13145 } 13146 13147 // Optimize (add (shl x, c0), (shl y, c1)) -> 13148 // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. 13149 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, 13150 const RISCVSubtarget &Subtarget) { 13151 // Perform this optimization only in the zba extension. 13152 if (!Subtarget.hasStdExtZba()) 13153 return SDValue(); 13154 13155 // Skip for vector types and larger types. 13156 EVT VT = N->getValueType(0); 13157 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) 13158 return SDValue(); 13159 13160 // The two operand nodes must be SHL and have no other use. 13161 SDValue N0 = N->getOperand(0); 13162 SDValue N1 = N->getOperand(1); 13163 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL || 13164 !N0->hasOneUse() || !N1->hasOneUse()) 13165 return SDValue(); 13166 13167 // Check c0 and c1. 13168 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 13169 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 13170 if (!N0C || !N1C) 13171 return SDValue(); 13172 int64_t C0 = N0C->getSExtValue(); 13173 int64_t C1 = N1C->getSExtValue(); 13174 if (C0 <= 0 || C1 <= 0) 13175 return SDValue(); 13176 13177 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable. 13178 int64_t Bits = std::min(C0, C1); 13179 int64_t Diff = std::abs(C0 - C1); 13180 if (Diff != 1 && Diff != 2 && Diff != 3) 13181 return SDValue(); 13182 13183 // Build nodes. 13184 SDLoc DL(N); 13185 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0); 13186 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0); 13187 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL, 13188 DAG.getConstant(Diff, DL, VT), NS); 13189 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT)); 13190 } 13191 13192 // Combine a constant select operand into its use: 13193 // 13194 // (and (select cond, -1, c), x) 13195 // -> (select cond, x, (and x, c)) [AllOnes=1] 13196 // (or (select cond, 0, c), x) 13197 // -> (select cond, x, (or x, c)) [AllOnes=0] 13198 // (xor (select cond, 0, c), x) 13199 // -> (select cond, x, (xor x, c)) [AllOnes=0] 13200 // (add (select cond, 0, c), x) 13201 // -> (select cond, x, (add x, c)) [AllOnes=0] 13202 // (sub x, (select cond, 0, c)) 13203 // -> (select cond, x, (sub x, c)) [AllOnes=0] 13204 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 13205 SelectionDAG &DAG, bool AllOnes, 13206 const RISCVSubtarget &Subtarget) { 13207 EVT VT = N->getValueType(0); 13208 13209 // Skip vectors. 13210 if (VT.isVector()) 13211 return SDValue(); 13212 13213 if (!Subtarget.hasConditionalMoveFusion()) { 13214 // (select cond, x, (and x, c)) has custom lowering with Zicond. 13215 if ((!Subtarget.hasStdExtZicond() && 13216 !Subtarget.hasVendorXVentanaCondOps()) || 13217 N->getOpcode() != ISD::AND) 13218 return SDValue(); 13219 13220 // Maybe harmful when condition code has multiple use. 13221 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse()) 13222 return SDValue(); 13223 13224 // Maybe harmful when VT is wider than XLen. 13225 if (VT.getSizeInBits() > Subtarget.getXLen()) 13226 return SDValue(); 13227 } 13228 13229 if ((Slct.getOpcode() != ISD::SELECT && 13230 Slct.getOpcode() != RISCVISD::SELECT_CC) || 13231 !Slct.hasOneUse()) 13232 return SDValue(); 13233 13234 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) { 13235 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); 13236 }; 13237 13238 bool SwapSelectOps; 13239 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0; 13240 SDValue TrueVal = Slct.getOperand(1 + OpOffset); 13241 SDValue FalseVal = Slct.getOperand(2 + OpOffset); 13242 SDValue NonConstantVal; 13243 if (isZeroOrAllOnes(TrueVal, AllOnes)) { 13244 SwapSelectOps = false; 13245 NonConstantVal = FalseVal; 13246 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) { 13247 SwapSelectOps = true; 13248 NonConstantVal = TrueVal; 13249 } else 13250 return SDValue(); 13251 13252 // Slct is now know to be the desired identity constant when CC is true. 13253 TrueVal = OtherOp; 13254 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); 13255 // Unless SwapSelectOps says the condition should be false. 13256 if (SwapSelectOps) 13257 std::swap(TrueVal, FalseVal); 13258 13259 if (Slct.getOpcode() == RISCVISD::SELECT_CC) 13260 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT, 13261 {Slct.getOperand(0), Slct.getOperand(1), 13262 Slct.getOperand(2), TrueVal, FalseVal}); 13263 13264 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, 13265 {Slct.getOperand(0), TrueVal, FalseVal}); 13266 } 13267 13268 // Attempt combineSelectAndUse on each operand of a commutative operator N. 13269 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG, 13270 bool AllOnes, 13271 const RISCVSubtarget &Subtarget) { 13272 SDValue N0 = N->getOperand(0); 13273 SDValue N1 = N->getOperand(1); 13274 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget)) 13275 return Result; 13276 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget)) 13277 return Result; 13278 return SDValue(); 13279 } 13280 13281 // Transform (add (mul x, c0), c1) -> 13282 // (add (mul (add x, c1/c0), c0), c1%c0). 13283 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case 13284 // that should be excluded is when c0*(c1/c0) is simm12, which will lead 13285 // to an infinite loop in DAGCombine if transformed. 13286 // Or transform (add (mul x, c0), c1) -> 13287 // (add (mul (add x, c1/c0+1), c0), c1%c0-c0), 13288 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner 13289 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will 13290 // lead to an infinite loop in DAGCombine if transformed. 13291 // Or transform (add (mul x, c0), c1) -> 13292 // (add (mul (add x, c1/c0-1), c0), c1%c0+c0), 13293 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner 13294 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will 13295 // lead to an infinite loop in DAGCombine if transformed. 13296 // Or transform (add (mul x, c0), c1) -> 13297 // (mul (add x, c1/c0), c0). 13298 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not. 13299 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, 13300 const RISCVSubtarget &Subtarget) { 13301 // Skip for vector types and larger types. 13302 EVT VT = N->getValueType(0); 13303 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) 13304 return SDValue(); 13305 // The first operand node must be a MUL and has no other use. 13306 SDValue N0 = N->getOperand(0); 13307 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL) 13308 return SDValue(); 13309 // Check if c0 and c1 match above conditions. 13310 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 13311 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 13312 if (!N0C || !N1C) 13313 return SDValue(); 13314 // If N0C has multiple uses it's possible one of the cases in 13315 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result 13316 // in an infinite loop. 13317 if (!N0C->hasOneUse()) 13318 return SDValue(); 13319 int64_t C0 = N0C->getSExtValue(); 13320 int64_t C1 = N1C->getSExtValue(); 13321 int64_t CA, CB; 13322 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1)) 13323 return SDValue(); 13324 // Search for proper CA (non-zero) and CB that both are simm12. 13325 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) && 13326 !isInt<12>(C0 * (C1 / C0))) { 13327 CA = C1 / C0; 13328 CB = C1 % C0; 13329 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) && 13330 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) { 13331 CA = C1 / C0 + 1; 13332 CB = C1 % C0 - C0; 13333 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) && 13334 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) { 13335 CA = C1 / C0 - 1; 13336 CB = C1 % C0 + C0; 13337 } else 13338 return SDValue(); 13339 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0). 13340 SDLoc DL(N); 13341 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0), 13342 DAG.getConstant(CA, DL, VT)); 13343 SDValue New1 = 13344 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT)); 13345 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT)); 13346 } 13347 13348 // add (zext, zext) -> zext (add (zext, zext)) 13349 // sub (zext, zext) -> sext (sub (zext, zext)) 13350 // mul (zext, zext) -> zext (mul (zext, zext)) 13351 // sdiv (zext, zext) -> zext (sdiv (zext, zext)) 13352 // udiv (zext, zext) -> zext (udiv (zext, zext)) 13353 // srem (zext, zext) -> zext (srem (zext, zext)) 13354 // urem (zext, zext) -> zext (urem (zext, zext)) 13355 // 13356 // where the sum of the extend widths match, and the the range of the bin op 13357 // fits inside the width of the narrower bin op. (For profitability on rvv, we 13358 // use a power of two for both inner and outer extend.) 13359 static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG) { 13360 13361 EVT VT = N->getValueType(0); 13362 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT)) 13363 return SDValue(); 13364 13365 SDValue N0 = N->getOperand(0); 13366 SDValue N1 = N->getOperand(1); 13367 if (N0.getOpcode() != ISD::ZERO_EXTEND || N1.getOpcode() != ISD::ZERO_EXTEND) 13368 return SDValue(); 13369 if (!N0.hasOneUse() || !N1.hasOneUse()) 13370 return SDValue(); 13371 13372 SDValue Src0 = N0.getOperand(0); 13373 SDValue Src1 = N1.getOperand(0); 13374 EVT SrcVT = Src0.getValueType(); 13375 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) || 13376 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 || 13377 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2) 13378 return SDValue(); 13379 13380 LLVMContext &C = *DAG.getContext(); 13381 EVT ElemVT = VT.getVectorElementType().getHalfSizedIntegerVT(C); 13382 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount()); 13383 13384 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0); 13385 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1); 13386 13387 // Src0 and Src1 are zero extended, so they're always positive if signed. 13388 // 13389 // sub can produce a negative from two positive operands, so it needs sign 13390 // extended. Other nodes produce a positive from two positive operands, so 13391 // zero extend instead. 13392 unsigned OuterExtend = 13393 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 13394 13395 return DAG.getNode( 13396 OuterExtend, SDLoc(N), VT, 13397 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1)); 13398 } 13399 13400 // Try to turn (add (xor bool, 1) -1) into (neg bool). 13401 static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) { 13402 SDValue N0 = N->getOperand(0); 13403 SDValue N1 = N->getOperand(1); 13404 EVT VT = N->getValueType(0); 13405 SDLoc DL(N); 13406 13407 // RHS should be -1. 13408 if (!isAllOnesConstant(N1)) 13409 return SDValue(); 13410 13411 // Look for (xor X, 1). 13412 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1))) 13413 return SDValue(); 13414 13415 // First xor input should be 0 or 1. 13416 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); 13417 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask)) 13418 return SDValue(); 13419 13420 // Emit a negate of the setcc. 13421 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), 13422 N0.getOperand(0)); 13423 } 13424 13425 static SDValue performADDCombine(SDNode *N, 13426 TargetLowering::DAGCombinerInfo &DCI, 13427 const RISCVSubtarget &Subtarget) { 13428 SelectionDAG &DAG = DCI.DAG; 13429 if (SDValue V = combineAddOfBooleanXor(N, DAG)) 13430 return V; 13431 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) 13432 return V; 13433 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) 13434 if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) 13435 return V; 13436 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 13437 return V; 13438 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 13439 return V; 13440 if (SDValue V = combineBinOpOfZExt(N, DAG)) 13441 return V; 13442 13443 // fold (add (select lhs, rhs, cc, 0, y), x) -> 13444 // (select lhs, rhs, cc, x, (add x, y)) 13445 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 13446 } 13447 13448 // Try to turn a sub boolean RHS and constant LHS into an addi. 13449 static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) { 13450 SDValue N0 = N->getOperand(0); 13451 SDValue N1 = N->getOperand(1); 13452 EVT VT = N->getValueType(0); 13453 SDLoc DL(N); 13454 13455 // Require a constant LHS. 13456 auto *N0C = dyn_cast<ConstantSDNode>(N0); 13457 if (!N0C) 13458 return SDValue(); 13459 13460 // All our optimizations involve subtracting 1 from the immediate and forming 13461 // an ADDI. Make sure the new immediate is valid for an ADDI. 13462 APInt ImmValMinus1 = N0C->getAPIntValue() - 1; 13463 if (!ImmValMinus1.isSignedIntN(12)) 13464 return SDValue(); 13465 13466 SDValue NewLHS; 13467 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) { 13468 // (sub constant, (setcc x, y, eq/neq)) -> 13469 // (add (setcc x, y, neq/eq), constant - 1) 13470 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get(); 13471 EVT SetCCOpVT = N1.getOperand(0).getValueType(); 13472 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger()) 13473 return SDValue(); 13474 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT); 13475 NewLHS = 13476 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal); 13477 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) && 13478 N1.getOperand(0).getOpcode() == ISD::SETCC) { 13479 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1). 13480 // Since setcc returns a bool the xor is equivalent to 1-setcc. 13481 NewLHS = N1.getOperand(0); 13482 } else 13483 return SDValue(); 13484 13485 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT); 13486 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS); 13487 } 13488 13489 // Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are 13490 // non-zero. Replace with orc.b. 13491 static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, 13492 const RISCVSubtarget &Subtarget) { 13493 if (!Subtarget.hasStdExtZbb()) 13494 return SDValue(); 13495 13496 EVT VT = N->getValueType(0); 13497 13498 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16) 13499 return SDValue(); 13500 13501 SDValue N0 = N->getOperand(0); 13502 SDValue N1 = N->getOperand(1); 13503 13504 if (N0.getOpcode() != ISD::SHL || N0.getOperand(0) != N1 || !N0.hasOneUse()) 13505 return SDValue(); 13506 13507 auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 13508 if (!ShAmtC || ShAmtC->getZExtValue() != 8) 13509 return SDValue(); 13510 13511 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0xfe)); 13512 if (!DAG.MaskedValueIsZero(N1, Mask)) 13513 return SDValue(); 13514 13515 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, N1); 13516 } 13517 13518 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, 13519 const RISCVSubtarget &Subtarget) { 13520 if (SDValue V = combineSubOfBoolean(N, DAG)) 13521 return V; 13522 13523 EVT VT = N->getValueType(0); 13524 SDValue N0 = N->getOperand(0); 13525 SDValue N1 = N->getOperand(1); 13526 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1) 13527 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && 13528 isNullConstant(N1.getOperand(1))) { 13529 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get(); 13530 if (CCVal == ISD::SETLT) { 13531 SDLoc DL(N); 13532 unsigned ShAmt = N0.getValueSizeInBits() - 1; 13533 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), 13534 DAG.getConstant(ShAmt, DL, VT)); 13535 } 13536 } 13537 13538 if (SDValue V = combineBinOpOfZExt(N, DAG)) 13539 return V; 13540 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget)) 13541 return V; 13542 13543 // fold (sub x, (select lhs, rhs, cc, 0, y)) -> 13544 // (select lhs, rhs, cc, x, (sub x, y)) 13545 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget); 13546 } 13547 13548 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1. 13549 // Legalizing setcc can introduce xors like this. Doing this transform reduces 13550 // the number of xors and may allow the xor to fold into a branch condition. 13551 static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) { 13552 SDValue N0 = N->getOperand(0); 13553 SDValue N1 = N->getOperand(1); 13554 bool IsAnd = N->getOpcode() == ISD::AND; 13555 13556 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR) 13557 return SDValue(); 13558 13559 if (!N0.hasOneUse() || !N1.hasOneUse()) 13560 return SDValue(); 13561 13562 SDValue N01 = N0.getOperand(1); 13563 SDValue N11 = N1.getOperand(1); 13564 13565 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into 13566 // (xor X, -1) based on the upper bits of the other operand being 0. If the 13567 // operation is And, allow one of the Xors to use -1. 13568 if (isOneConstant(N01)) { 13569 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11))) 13570 return SDValue(); 13571 } else if (isOneConstant(N11)) { 13572 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1. 13573 if (!(IsAnd && isAllOnesConstant(N01))) 13574 return SDValue(); 13575 } else 13576 return SDValue(); 13577 13578 EVT VT = N->getValueType(0); 13579 13580 SDValue N00 = N0.getOperand(0); 13581 SDValue N10 = N1.getOperand(0); 13582 13583 // The LHS of the xors needs to be 0/1. 13584 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); 13585 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask)) 13586 return SDValue(); 13587 13588 // Invert the opcode and insert a new xor. 13589 SDLoc DL(N); 13590 unsigned Opc = IsAnd ? ISD::OR : ISD::AND; 13591 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10); 13592 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT)); 13593 } 13594 13595 // Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to 13596 // (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed 13597 // value to an unsigned value. This will be lowered to vmax and series of 13598 // vnclipu instructions later. This can be extended to other truncated types 13599 // other than i8 by replacing 256 and 255 with the equivalent constants for the 13600 // type. 13601 static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG) { 13602 EVT VT = N->getValueType(0); 13603 SDValue N0 = N->getOperand(0); 13604 EVT SrcVT = N0.getValueType(); 13605 13606 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 13607 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT)) 13608 return SDValue(); 13609 13610 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse()) 13611 return SDValue(); 13612 13613 SDValue Cond = N0.getOperand(0); 13614 SDValue True = N0.getOperand(1); 13615 SDValue False = N0.getOperand(2); 13616 13617 if (Cond.getOpcode() != ISD::SETCC) 13618 return SDValue(); 13619 13620 // FIXME: Support the version of this pattern with the select operands 13621 // swapped. 13622 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 13623 if (CCVal != ISD::SETULT) 13624 return SDValue(); 13625 13626 SDValue CondLHS = Cond.getOperand(0); 13627 SDValue CondRHS = Cond.getOperand(1); 13628 13629 if (CondLHS != True) 13630 return SDValue(); 13631 13632 unsigned ScalarBits = VT.getScalarSizeInBits(); 13633 13634 // FIXME: Support other constants. 13635 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS); 13636 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits)) 13637 return SDValue(); 13638 13639 if (False.getOpcode() != ISD::SIGN_EXTEND) 13640 return SDValue(); 13641 13642 False = False.getOperand(0); 13643 13644 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True) 13645 return SDValue(); 13646 13647 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1)); 13648 if (!FalseRHSC || !FalseRHSC->isZero()) 13649 return SDValue(); 13650 13651 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get(); 13652 if (CCVal2 != ISD::SETGT) 13653 return SDValue(); 13654 13655 // Emit the signed to unsigned saturation pattern. 13656 SDLoc DL(N); 13657 SDValue Max = 13658 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT)); 13659 SDValue Min = 13660 DAG.getNode(ISD::SMIN, DL, SrcVT, Max, 13661 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT)); 13662 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min); 13663 } 13664 13665 static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, 13666 const RISCVSubtarget &Subtarget) { 13667 SDValue N0 = N->getOperand(0); 13668 EVT VT = N->getValueType(0); 13669 13670 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero 13671 // extending X. This is safe since we only need the LSB after the shift and 13672 // shift amounts larger than 31 would produce poison. If we wait until 13673 // type legalization, we'll create RISCVISD::SRLW and we can't recover it 13674 // to use a BEXT instruction. 13675 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 && 13676 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL && 13677 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { 13678 SDLoc DL(N0); 13679 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); 13680 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); 13681 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); 13682 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl); 13683 } 13684 13685 return combineTruncSelectToSMaxUSat(N, DAG); 13686 } 13687 13688 // Combines two comparison operation and logic operation to one selection 13689 // operation(min, max) and logic operation. Returns new constructed Node if 13690 // conditions for optimization are satisfied. 13691 static SDValue performANDCombine(SDNode *N, 13692 TargetLowering::DAGCombinerInfo &DCI, 13693 const RISCVSubtarget &Subtarget) { 13694 SelectionDAG &DAG = DCI.DAG; 13695 13696 SDValue N0 = N->getOperand(0); 13697 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero 13698 // extending X. This is safe since we only need the LSB after the shift and 13699 // shift amounts larger than 31 would produce poison. If we wait until 13700 // type legalization, we'll create RISCVISD::SRLW and we can't recover it 13701 // to use a BEXT instruction. 13702 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && 13703 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) && 13704 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) && 13705 N0.hasOneUse()) { 13706 SDLoc DL(N); 13707 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); 13708 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); 13709 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); 13710 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl, 13711 DAG.getConstant(1, DL, MVT::i64)); 13712 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); 13713 } 13714 13715 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 13716 return V; 13717 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 13718 return V; 13719 13720 if (DCI.isAfterLegalizeDAG()) 13721 if (SDValue V = combineDeMorganOfBoolean(N, DAG)) 13722 return V; 13723 13724 // fold (and (select lhs, rhs, cc, -1, y), x) -> 13725 // (select lhs, rhs, cc, x, (and x, y)) 13726 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget); 13727 } 13728 13729 // Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez. 13730 // FIXME: Generalize to other binary operators with same operand. 13731 static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, 13732 SelectionDAG &DAG) { 13733 assert(N->getOpcode() == ISD::OR && "Unexpected opcode"); 13734 13735 if (N0.getOpcode() != RISCVISD::CZERO_EQZ || 13736 N1.getOpcode() != RISCVISD::CZERO_NEZ || 13737 !N0.hasOneUse() || !N1.hasOneUse()) 13738 return SDValue(); 13739 13740 // Should have the same condition. 13741 SDValue Cond = N0.getOperand(1); 13742 if (Cond != N1.getOperand(1)) 13743 return SDValue(); 13744 13745 SDValue TrueV = N0.getOperand(0); 13746 SDValue FalseV = N1.getOperand(0); 13747 13748 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR || 13749 TrueV.getOperand(1) != FalseV.getOperand(1) || 13750 !isOneConstant(TrueV.getOperand(1)) || 13751 !TrueV.hasOneUse() || !FalseV.hasOneUse()) 13752 return SDValue(); 13753 13754 EVT VT = N->getValueType(0); 13755 SDLoc DL(N); 13756 13757 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0), 13758 Cond); 13759 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), 13760 Cond); 13761 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1); 13762 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1)); 13763 } 13764 13765 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, 13766 const RISCVSubtarget &Subtarget) { 13767 SelectionDAG &DAG = DCI.DAG; 13768 13769 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 13770 return V; 13771 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 13772 return V; 13773 13774 if (DCI.isAfterLegalizeDAG()) 13775 if (SDValue V = combineDeMorganOfBoolean(N, DAG)) 13776 return V; 13777 13778 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom. 13779 // We may be able to pull a common operation out of the true and false value. 13780 SDValue N0 = N->getOperand(0); 13781 SDValue N1 = N->getOperand(1); 13782 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG)) 13783 return V; 13784 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG)) 13785 return V; 13786 13787 // fold (or (select cond, 0, y), x) -> 13788 // (select cond, x, (or x, y)) 13789 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 13790 } 13791 13792 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 13793 const RISCVSubtarget &Subtarget) { 13794 SDValue N0 = N->getOperand(0); 13795 SDValue N1 = N->getOperand(1); 13796 13797 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use 13798 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create 13799 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction. 13800 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && 13801 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) && 13802 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) && 13803 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { 13804 SDLoc DL(N); 13805 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); 13806 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); 13807 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1); 13808 SDValue And = DAG.getNOT(DL, Shl, MVT::i64); 13809 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); 13810 } 13811 13812 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x) 13813 // NOTE: Assumes ROL being legal means ROLW is legal. 13814 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 13815 if (N0.getOpcode() == RISCVISD::SLLW && 13816 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) && 13817 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) { 13818 SDLoc DL(N); 13819 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64, 13820 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1)); 13821 } 13822 13823 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt) 13824 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) { 13825 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 13826 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 13827 if (ConstN00 && CC == ISD::SETLT) { 13828 EVT VT = N0.getValueType(); 13829 SDLoc DL(N0); 13830 const APInt &Imm = ConstN00->getAPIntValue(); 13831 if ((Imm + 1).isSignedIntN(12)) 13832 return DAG.getSetCC(DL, VT, N0.getOperand(1), 13833 DAG.getConstant(Imm + 1, DL, VT), CC); 13834 } 13835 } 13836 13837 // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with 13838 // RV64LegalI32 when the setcc is created after type legalization. An i1 xor 13839 // would have been promoted to i32, but the setcc would have i64 result. 13840 if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE && 13841 isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) { 13842 SDValue N00 = N0.getOperand(0); 13843 SDLoc DL(N); 13844 SDValue LHS = N00.getOperand(0); 13845 SDValue RHS = N00.getOperand(1); 13846 SDValue CC = N00.getOperand(2); 13847 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 13848 LHS.getValueType()); 13849 SDValue Setcc = DAG.getSetCC(SDLoc(N00), N0.getOperand(0).getValueType(), 13850 LHS, RHS, NotCC); 13851 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N->getValueType(0), Setcc); 13852 } 13853 13854 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 13855 return V; 13856 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 13857 return V; 13858 13859 // fold (xor (select cond, 0, y), x) -> 13860 // (select cond, x, (xor x, y)) 13861 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 13862 } 13863 13864 // Try to expand a scalar multiply to a faster sequence. 13865 static SDValue expandMul(SDNode *N, SelectionDAG &DAG, 13866 TargetLowering::DAGCombinerInfo &DCI, 13867 const RISCVSubtarget &Subtarget) { 13868 13869 EVT VT = N->getValueType(0); 13870 13871 // LI + MUL is usually smaller than the alternative sequence. 13872 if (DAG.getMachineFunction().getFunction().hasMinSize()) 13873 return SDValue(); 13874 13875 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 13876 return SDValue(); 13877 13878 if (VT != Subtarget.getXLenVT()) 13879 return SDValue(); 13880 13881 const bool HasShlAdd = 13882 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa(); 13883 13884 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1)); 13885 if (!CNode) 13886 return SDValue(); 13887 uint64_t MulAmt = CNode->getZExtValue(); 13888 13889 // WARNING: The code below is knowingly incorrect with regards to undef semantics. 13890 // We're adding additional uses of X here, and in principle, we should be freezing 13891 // X before doing so. However, adding freeze here causes real regressions, and no 13892 // other target properly freezes X in these cases either. 13893 SDValue X = N->getOperand(0); 13894 13895 if (HasShlAdd) { 13896 for (uint64_t Divisor : {3, 5, 9}) { 13897 if (MulAmt % Divisor != 0) 13898 continue; 13899 uint64_t MulAmt2 = MulAmt / Divisor; 13900 // 3/5/9 * 2^N -> shl (shXadd X, X), N 13901 if (isPowerOf2_64(MulAmt2)) { 13902 SDLoc DL(N); 13903 SDValue X = N->getOperand(0); 13904 // Put the shift first if we can fold a zext into the 13905 // shift forming a slli.uw. 13906 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && 13907 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) { 13908 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X, 13909 DAG.getConstant(Log2_64(MulAmt2), DL, VT)); 13910 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl, 13911 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), 13912 Shl); 13913 } 13914 // Otherwise, put rhe shl second so that it can fold with following 13915 // instructions (e.g. sext or add). 13916 SDValue Mul359 = 13917 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, 13918 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); 13919 return DAG.getNode(ISD::SHL, DL, VT, Mul359, 13920 DAG.getConstant(Log2_64(MulAmt2), DL, VT)); 13921 } 13922 13923 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X) 13924 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) { 13925 SDLoc DL(N); 13926 SDValue Mul359 = 13927 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, 13928 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); 13929 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359, 13930 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT), 13931 Mul359); 13932 } 13933 } 13934 13935 // If this is a power 2 + 2/4/8, we can use a shift followed by a single 13936 // shXadd. First check if this a sum of two power of 2s because that's 13937 // easy. Then count how many zeros are up to the first bit. 13938 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) { 13939 unsigned ScaleShift = llvm::countr_zero(MulAmt); 13940 if (ScaleShift >= 1 && ScaleShift < 4) { 13941 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); 13942 SDLoc DL(N); 13943 SDValue Shift1 = 13944 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT)); 13945 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, 13946 DAG.getConstant(ScaleShift, DL, VT), Shift1); 13947 } 13948 } 13949 13950 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x) 13951 // This is the two instruction form, there are also three instruction 13952 // variants we could implement. e.g. 13953 // (2^(1,2,3) * 3,5,9 + 1) << C2 13954 // 2^(C1>3) * 3,5,9 +/- 1 13955 for (uint64_t Divisor : {3, 5, 9}) { 13956 uint64_t C = MulAmt - 1; 13957 if (C <= Divisor) 13958 continue; 13959 unsigned TZ = llvm::countr_zero(C); 13960 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) { 13961 SDLoc DL(N); 13962 SDValue Mul359 = 13963 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, 13964 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); 13965 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359, 13966 DAG.getConstant(TZ, DL, VT), X); 13967 } 13968 } 13969 13970 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X)) 13971 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) { 13972 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1); 13973 if (ScaleShift >= 1 && ScaleShift < 4) { 13974 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2))); 13975 SDLoc DL(N); 13976 SDValue Shift1 = 13977 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT)); 13978 return DAG.getNode(ISD::ADD, DL, VT, Shift1, 13979 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, 13980 DAG.getConstant(ScaleShift, DL, VT), X)); 13981 } 13982 } 13983 13984 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x)) 13985 for (uint64_t Offset : {3, 5, 9}) { 13986 if (isPowerOf2_64(MulAmt + Offset)) { 13987 SDLoc DL(N); 13988 SDValue Shift1 = 13989 DAG.getNode(ISD::SHL, DL, VT, X, 13990 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT)); 13991 SDValue Mul359 = 13992 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, 13993 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X); 13994 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359); 13995 } 13996 } 13997 } 13998 13999 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2)) 14000 uint64_t MulAmtLowBit = MulAmt & (-MulAmt); 14001 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) { 14002 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit; 14003 SDLoc DL(N); 14004 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), 14005 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT)); 14006 SDValue Shift2 = 14007 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), 14008 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT)); 14009 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2); 14010 } 14011 14012 return SDValue(); 14013 } 14014 14015 // Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) -> 14016 // (bitcast (sra (v2Xi16 (bitcast X)), 15)) 14017 // Same for other equivalent types with other equivalent constants. 14018 static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG) { 14019 EVT VT = N->getValueType(0); 14020 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 14021 14022 // Do this for legal vectors unless they are i1 or i8 vectors. 14023 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16) 14024 return SDValue(); 14025 14026 if (N->getOperand(0).getOpcode() != ISD::AND || 14027 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL) 14028 return SDValue(); 14029 14030 SDValue And = N->getOperand(0); 14031 SDValue Srl = And.getOperand(0); 14032 14033 APInt V1, V2, V3; 14034 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) || 14035 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) || 14036 !ISD::isConstantSplatVector(Srl.getOperand(1).getNode(), V3)) 14037 return SDValue(); 14038 14039 unsigned HalfSize = VT.getScalarSizeInBits() / 2; 14040 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) || 14041 V3 != (HalfSize - 1)) 14042 return SDValue(); 14043 14044 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), 14045 EVT::getIntegerVT(*DAG.getContext(), HalfSize), 14046 VT.getVectorElementCount() * 2); 14047 SDLoc DL(N); 14048 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0)); 14049 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast, 14050 DAG.getConstant(HalfSize - 1, DL, HalfVT)); 14051 return DAG.getNode(ISD::BITCAST, DL, VT, Sra); 14052 } 14053 14054 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 14055 TargetLowering::DAGCombinerInfo &DCI, 14056 const RISCVSubtarget &Subtarget) { 14057 EVT VT = N->getValueType(0); 14058 if (!VT.isVector()) 14059 return expandMul(N, DAG, DCI, Subtarget); 14060 14061 SDLoc DL(N); 14062 SDValue N0 = N->getOperand(0); 14063 SDValue N1 = N->getOperand(1); 14064 SDValue MulOper; 14065 unsigned AddSubOpc; 14066 14067 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y) 14068 // (mul x, add (y, 1)) -> (add x, (mul x, y)) 14069 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y)) 14070 // (mul x, (sub 1, y)) -> (sub x, (mul x, y)) 14071 auto IsAddSubWith1 = [&](SDValue V) -> bool { 14072 AddSubOpc = V->getOpcode(); 14073 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) { 14074 SDValue Opnd = V->getOperand(1); 14075 MulOper = V->getOperand(0); 14076 if (AddSubOpc == ISD::SUB) 14077 std::swap(Opnd, MulOper); 14078 if (isOneOrOneSplat(Opnd)) 14079 return true; 14080 } 14081 return false; 14082 }; 14083 14084 if (IsAddSubWith1(N0)) { 14085 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper); 14086 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal); 14087 } 14088 14089 if (IsAddSubWith1(N1)) { 14090 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper); 14091 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal); 14092 } 14093 14094 if (SDValue V = combineBinOpOfZExt(N, DAG)) 14095 return V; 14096 14097 if (SDValue V = combineVectorMulToSraBitcast(N, DAG)) 14098 return V; 14099 14100 return SDValue(); 14101 } 14102 14103 /// According to the property that indexed load/store instructions zero-extend 14104 /// their indices, try to narrow the type of index operand. 14105 static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) { 14106 if (isIndexTypeSigned(IndexType)) 14107 return false; 14108 14109 if (!N->hasOneUse()) 14110 return false; 14111 14112 EVT VT = N.getValueType(); 14113 SDLoc DL(N); 14114 14115 // In general, what we're doing here is seeing if we can sink a truncate to 14116 // a smaller element type into the expression tree building our index. 14117 // TODO: We can generalize this and handle a bunch more cases if useful. 14118 14119 // Narrow a buildvector to the narrowest element type. This requires less 14120 // work and less register pressure at high LMUL, and creates smaller constants 14121 // which may be cheaper to materialize. 14122 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) { 14123 KnownBits Known = DAG.computeKnownBits(N); 14124 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits()); 14125 LLVMContext &C = *DAG.getContext(); 14126 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C); 14127 if (ResultVT.bitsLT(VT.getVectorElementType())) { 14128 N = DAG.getNode(ISD::TRUNCATE, DL, 14129 VT.changeVectorElementType(ResultVT), N); 14130 return true; 14131 } 14132 } 14133 14134 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty). 14135 if (N.getOpcode() != ISD::SHL) 14136 return false; 14137 14138 SDValue N0 = N.getOperand(0); 14139 if (N0.getOpcode() != ISD::ZERO_EXTEND && 14140 N0.getOpcode() != RISCVISD::VZEXT_VL) 14141 return false; 14142 if (!N0->hasOneUse()) 14143 return false; 14144 14145 APInt ShAmt; 14146 SDValue N1 = N.getOperand(1); 14147 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt)) 14148 return false; 14149 14150 SDValue Src = N0.getOperand(0); 14151 EVT SrcVT = Src.getValueType(); 14152 unsigned SrcElen = SrcVT.getScalarSizeInBits(); 14153 unsigned ShAmtV = ShAmt.getZExtValue(); 14154 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV); 14155 NewElen = std::max(NewElen, 8U); 14156 14157 // Skip if NewElen is not narrower than the original extended type. 14158 if (NewElen >= N0.getValueType().getScalarSizeInBits()) 14159 return false; 14160 14161 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen); 14162 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT); 14163 14164 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops()); 14165 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT); 14166 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec); 14167 return true; 14168 } 14169 14170 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with 14171 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from 14172 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg 14173 // can become a sext.w instead of a shift pair. 14174 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, 14175 const RISCVSubtarget &Subtarget) { 14176 SDValue N0 = N->getOperand(0); 14177 SDValue N1 = N->getOperand(1); 14178 EVT VT = N->getValueType(0); 14179 EVT OpVT = N0.getValueType(); 14180 14181 if (OpVT != MVT::i64 || !Subtarget.is64Bit()) 14182 return SDValue(); 14183 14184 // RHS needs to be a constant. 14185 auto *N1C = dyn_cast<ConstantSDNode>(N1); 14186 if (!N1C) 14187 return SDValue(); 14188 14189 // LHS needs to be (and X, 0xffffffff). 14190 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 14191 !isa<ConstantSDNode>(N0.getOperand(1)) || 14192 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff)) 14193 return SDValue(); 14194 14195 // Looking for an equality compare. 14196 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); 14197 if (!isIntEqualitySetCC(Cond)) 14198 return SDValue(); 14199 14200 // Don't do this if the sign bit is provably zero, it will be turned back into 14201 // an AND. 14202 APInt SignMask = APInt::getOneBitSet(64, 31); 14203 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask)) 14204 return SDValue(); 14205 14206 const APInt &C1 = N1C->getAPIntValue(); 14207 14208 SDLoc dl(N); 14209 // If the constant is larger than 2^32 - 1 it is impossible for both sides 14210 // to be equal. 14211 if (C1.getActiveBits() > 32) 14212 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT); 14213 14214 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT, 14215 N0.getOperand(0), DAG.getValueType(MVT::i32)); 14216 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64), 14217 dl, OpVT), Cond); 14218 } 14219 14220 static SDValue 14221 performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, 14222 const RISCVSubtarget &Subtarget) { 14223 SDValue Src = N->getOperand(0); 14224 EVT VT = N->getValueType(0); 14225 14226 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X) 14227 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH && 14228 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16)) 14229 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT, 14230 Src.getOperand(0)); 14231 14232 return SDValue(); 14233 } 14234 14235 namespace { 14236 // Forward declaration of the structure holding the necessary information to 14237 // apply a combine. 14238 struct CombineResult; 14239 14240 enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 }; 14241 /// Helper class for folding sign/zero extensions. 14242 /// In particular, this class is used for the following combines: 14243 /// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w 14244 /// sub | sub_vl -> vwsub(u) | vwsub(u)_w 14245 /// mul | mul_vl -> vwmul(u) | vwmul_su 14246 /// shl | shl_vl -> vwsll 14247 /// fadd -> vfwadd | vfwadd_w 14248 /// fsub -> vfwsub | vfwsub_w 14249 /// fmul -> vfwmul 14250 /// An object of this class represents an operand of the operation we want to 14251 /// combine. 14252 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of 14253 /// NodeExtensionHelper for `a` and one for `b`. 14254 /// 14255 /// This class abstracts away how the extension is materialized and 14256 /// how its number of users affect the combines. 14257 /// 14258 /// In particular: 14259 /// - VWADD_W is conceptually == add(op0, sext(op1)) 14260 /// - VWADDU_W == add(op0, zext(op1)) 14261 /// - VWSUB_W == sub(op0, sext(op1)) 14262 /// - VWSUBU_W == sub(op0, zext(op1)) 14263 /// - VFWADD_W == fadd(op0, fpext(op1)) 14264 /// - VFWSUB_W == fsub(op0, fpext(op1)) 14265 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to 14266 /// zext|sext(smaller_value). 14267 struct NodeExtensionHelper { 14268 /// Records if this operand is like being zero extended. 14269 bool SupportsZExt; 14270 /// Records if this operand is like being sign extended. 14271 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For 14272 /// instance, a splat constant (e.g., 3), would support being both sign and 14273 /// zero extended. 14274 bool SupportsSExt; 14275 /// Records if this operand is like being floating-Point extended. 14276 bool SupportsFPExt; 14277 /// This boolean captures whether we care if this operand would still be 14278 /// around after the folding happens. 14279 bool EnforceOneUse; 14280 /// Original value that this NodeExtensionHelper represents. 14281 SDValue OrigOperand; 14282 14283 /// Get the value feeding the extension or the value itself. 14284 /// E.g., for zext(a), this would return a. 14285 SDValue getSource() const { 14286 switch (OrigOperand.getOpcode()) { 14287 case ISD::ZERO_EXTEND: 14288 case ISD::SIGN_EXTEND: 14289 case RISCVISD::VSEXT_VL: 14290 case RISCVISD::VZEXT_VL: 14291 case RISCVISD::FP_EXTEND_VL: 14292 return OrigOperand.getOperand(0); 14293 default: 14294 return OrigOperand; 14295 } 14296 } 14297 14298 /// Check if this instance represents a splat. 14299 bool isSplat() const { 14300 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL || 14301 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR; 14302 } 14303 14304 /// Get the extended opcode. 14305 unsigned getExtOpc(ExtKind SupportsExt) const { 14306 switch (SupportsExt) { 14307 case ExtKind::SExt: 14308 return RISCVISD::VSEXT_VL; 14309 case ExtKind::ZExt: 14310 return RISCVISD::VZEXT_VL; 14311 case ExtKind::FPExt: 14312 return RISCVISD::FP_EXTEND_VL; 14313 } 14314 llvm_unreachable("Unknown ExtKind enum"); 14315 } 14316 14317 /// Get or create a value that can feed \p Root with the given extension \p 14318 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this 14319 /// operand. \see ::getSource(). 14320 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG, 14321 const RISCVSubtarget &Subtarget, 14322 std::optional<ExtKind> SupportsExt) const { 14323 if (!SupportsExt.has_value()) 14324 return OrigOperand; 14325 14326 MVT NarrowVT = getNarrowType(Root, *SupportsExt); 14327 14328 SDValue Source = getSource(); 14329 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType())); 14330 if (Source.getValueType() == NarrowVT) 14331 return Source; 14332 14333 unsigned ExtOpc = getExtOpc(*SupportsExt); 14334 14335 // If we need an extension, we should be changing the type. 14336 SDLoc DL(OrigOperand); 14337 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); 14338 switch (OrigOperand.getOpcode()) { 14339 case ISD::ZERO_EXTEND: 14340 case ISD::SIGN_EXTEND: 14341 case RISCVISD::VSEXT_VL: 14342 case RISCVISD::VZEXT_VL: 14343 case RISCVISD::FP_EXTEND_VL: 14344 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); 14345 case ISD::SPLAT_VECTOR: 14346 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0)); 14347 case RISCVISD::VMV_V_X_VL: 14348 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, 14349 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL); 14350 case RISCVISD::VFMV_V_F_VL: 14351 Source = Source.getOperand(1); 14352 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source"); 14353 Source = Source.getOperand(0); 14354 assert(Source.getValueType() == NarrowVT.getVectorElementType()); 14355 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT, 14356 DAG.getUNDEF(NarrowVT), Source, VL); 14357 default: 14358 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL 14359 // and that operand should already have the right NarrowVT so no 14360 // extension should be required at this point. 14361 llvm_unreachable("Unsupported opcode"); 14362 } 14363 } 14364 14365 /// Helper function to get the narrow type for \p Root. 14366 /// The narrow type is the type of \p Root where we divided the size of each 14367 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>. 14368 /// \pre Both the narrow type and the original type should be legal. 14369 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) { 14370 MVT VT = Root->getSimpleValueType(0); 14371 14372 // Determine the narrow size. 14373 unsigned NarrowSize = VT.getScalarSizeInBits() / 2; 14374 14375 MVT EltVT = SupportsExt == ExtKind::FPExt 14376 ? MVT::getFloatingPointVT(NarrowSize) 14377 : MVT::getIntegerVT(NarrowSize); 14378 14379 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) && 14380 "Trying to extend something we can't represent"); 14381 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount()); 14382 return NarrowVT; 14383 } 14384 14385 /// Get the opcode to materialize: 14386 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b) 14387 static unsigned getSExtOpcode(unsigned Opcode) { 14388 switch (Opcode) { 14389 case ISD::ADD: 14390 case RISCVISD::ADD_VL: 14391 case RISCVISD::VWADD_W_VL: 14392 case RISCVISD::VWADDU_W_VL: 14393 case ISD::OR: 14394 return RISCVISD::VWADD_VL; 14395 case ISD::SUB: 14396 case RISCVISD::SUB_VL: 14397 case RISCVISD::VWSUB_W_VL: 14398 case RISCVISD::VWSUBU_W_VL: 14399 return RISCVISD::VWSUB_VL; 14400 case ISD::MUL: 14401 case RISCVISD::MUL_VL: 14402 return RISCVISD::VWMUL_VL; 14403 default: 14404 llvm_unreachable("Unexpected opcode"); 14405 } 14406 } 14407 14408 /// Get the opcode to materialize: 14409 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b) 14410 static unsigned getZExtOpcode(unsigned Opcode) { 14411 switch (Opcode) { 14412 case ISD::ADD: 14413 case RISCVISD::ADD_VL: 14414 case RISCVISD::VWADD_W_VL: 14415 case RISCVISD::VWADDU_W_VL: 14416 case ISD::OR: 14417 return RISCVISD::VWADDU_VL; 14418 case ISD::SUB: 14419 case RISCVISD::SUB_VL: 14420 case RISCVISD::VWSUB_W_VL: 14421 case RISCVISD::VWSUBU_W_VL: 14422 return RISCVISD::VWSUBU_VL; 14423 case ISD::MUL: 14424 case RISCVISD::MUL_VL: 14425 return RISCVISD::VWMULU_VL; 14426 case ISD::SHL: 14427 case RISCVISD::SHL_VL: 14428 return RISCVISD::VWSLL_VL; 14429 default: 14430 llvm_unreachable("Unexpected opcode"); 14431 } 14432 } 14433 14434 /// Get the opcode to materialize: 14435 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b) 14436 static unsigned getFPExtOpcode(unsigned Opcode) { 14437 switch (Opcode) { 14438 case RISCVISD::FADD_VL: 14439 case RISCVISD::VFWADD_W_VL: 14440 return RISCVISD::VFWADD_VL; 14441 case RISCVISD::FSUB_VL: 14442 case RISCVISD::VFWSUB_W_VL: 14443 return RISCVISD::VFWSUB_VL; 14444 case RISCVISD::FMUL_VL: 14445 return RISCVISD::VFWMUL_VL; 14446 default: 14447 llvm_unreachable("Unexpected opcode"); 14448 } 14449 } 14450 14451 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> 14452 /// newOpcode(a, b). 14453 static unsigned getSUOpcode(unsigned Opcode) { 14454 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) && 14455 "SU is only supported for MUL"); 14456 return RISCVISD::VWMULSU_VL; 14457 } 14458 14459 /// Get the opcode to materialize 14460 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b). 14461 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) { 14462 switch (Opcode) { 14463 case ISD::ADD: 14464 case RISCVISD::ADD_VL: 14465 case ISD::OR: 14466 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL 14467 : RISCVISD::VWADDU_W_VL; 14468 case ISD::SUB: 14469 case RISCVISD::SUB_VL: 14470 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL 14471 : RISCVISD::VWSUBU_W_VL; 14472 case RISCVISD::FADD_VL: 14473 return RISCVISD::VFWADD_W_VL; 14474 case RISCVISD::FSUB_VL: 14475 return RISCVISD::VFWSUB_W_VL; 14476 default: 14477 llvm_unreachable("Unexpected opcode"); 14478 } 14479 } 14480 14481 using CombineToTry = std::function<std::optional<CombineResult>( 14482 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, 14483 const NodeExtensionHelper & /*RHS*/, SelectionDAG &, 14484 const RISCVSubtarget &)>; 14485 14486 /// Check if this node needs to be fully folded or extended for all users. 14487 bool needToPromoteOtherUsers() const { return EnforceOneUse; } 14488 14489 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG, 14490 const RISCVSubtarget &Subtarget) { 14491 unsigned Opc = OrigOperand.getOpcode(); 14492 MVT VT = OrigOperand.getSimpleValueType(); 14493 14494 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) && 14495 "Unexpected Opcode"); 14496 14497 // The pasthru must be undef for tail agnostic. 14498 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef()) 14499 return; 14500 14501 // Get the scalar value. 14502 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0) 14503 : OrigOperand.getOperand(1); 14504 14505 // See if we have enough sign bits or zero bits in the scalar to use a 14506 // widening opcode by splatting to smaller element size. 14507 unsigned EltBits = VT.getScalarSizeInBits(); 14508 unsigned ScalarBits = Op.getValueSizeInBits(); 14509 // If we're not getting all bits from the element, we need special handling. 14510 if (ScalarBits < EltBits) { 14511 // This should only occur on RV32. 14512 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 && 14513 !Subtarget.is64Bit() && "Unexpected splat"); 14514 // vmv.v.x sign extends narrow inputs. 14515 SupportsSExt = true; 14516 14517 // If the input is positive, then sign extend is also zero extend. 14518 if (DAG.SignBitIsZero(Op)) 14519 SupportsZExt = true; 14520 14521 EnforceOneUse = false; 14522 return; 14523 } 14524 14525 unsigned NarrowSize = EltBits / 2; 14526 // If the narrow type cannot be expressed with a legal VMV, 14527 // this is not a valid candidate. 14528 if (NarrowSize < 8) 14529 return; 14530 14531 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) 14532 SupportsSExt = true; 14533 14534 if (DAG.MaskedValueIsZero(Op, 14535 APInt::getBitsSetFrom(ScalarBits, NarrowSize))) 14536 SupportsZExt = true; 14537 14538 EnforceOneUse = false; 14539 } 14540 14541 /// Helper method to set the various fields of this struct based on the 14542 /// type of \p Root. 14543 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG, 14544 const RISCVSubtarget &Subtarget) { 14545 SupportsZExt = false; 14546 SupportsSExt = false; 14547 SupportsFPExt = false; 14548 EnforceOneUse = true; 14549 unsigned Opc = OrigOperand.getOpcode(); 14550 // For the nodes we handle below, we end up using their inputs directly: see 14551 // getSource(). However since they either don't have a passthru or we check 14552 // that their passthru is undef, we can safely ignore their mask and VL. 14553 switch (Opc) { 14554 case ISD::ZERO_EXTEND: 14555 case ISD::SIGN_EXTEND: { 14556 MVT VT = OrigOperand.getSimpleValueType(); 14557 if (!VT.isVector()) 14558 break; 14559 14560 SDValue NarrowElt = OrigOperand.getOperand(0); 14561 MVT NarrowVT = NarrowElt.getSimpleValueType(); 14562 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them. 14563 if (NarrowVT.getVectorElementType() == MVT::i1) 14564 break; 14565 14566 SupportsZExt = Opc == ISD::ZERO_EXTEND; 14567 SupportsSExt = Opc == ISD::SIGN_EXTEND; 14568 break; 14569 } 14570 case RISCVISD::VZEXT_VL: 14571 SupportsZExt = true; 14572 break; 14573 case RISCVISD::VSEXT_VL: 14574 SupportsSExt = true; 14575 break; 14576 case RISCVISD::FP_EXTEND_VL: 14577 SupportsFPExt = true; 14578 break; 14579 case ISD::SPLAT_VECTOR: 14580 case RISCVISD::VMV_V_X_VL: 14581 fillUpExtensionSupportForSplat(Root, DAG, Subtarget); 14582 break; 14583 case RISCVISD::VFMV_V_F_VL: { 14584 MVT VT = OrigOperand.getSimpleValueType(); 14585 14586 if (!OrigOperand.getOperand(0).isUndef()) 14587 break; 14588 14589 SDValue Op = OrigOperand.getOperand(1); 14590 if (Op.getOpcode() != ISD::FP_EXTEND) 14591 break; 14592 14593 unsigned NarrowSize = VT.getScalarSizeInBits() / 2; 14594 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits(); 14595 if (NarrowSize != ScalarBits) 14596 break; 14597 14598 SupportsFPExt = true; 14599 break; 14600 } 14601 default: 14602 break; 14603 } 14604 } 14605 14606 /// Check if \p Root supports any extension folding combines. 14607 static bool isSupportedRoot(const SDNode *Root, 14608 const RISCVSubtarget &Subtarget) { 14609 switch (Root->getOpcode()) { 14610 case ISD::ADD: 14611 case ISD::SUB: 14612 case ISD::MUL: { 14613 return Root->getValueType(0).isScalableVector(); 14614 } 14615 case ISD::OR: { 14616 return Root->getValueType(0).isScalableVector() && 14617 Root->getFlags().hasDisjoint(); 14618 } 14619 // Vector Widening Integer Add/Sub/Mul Instructions 14620 case RISCVISD::ADD_VL: 14621 case RISCVISD::MUL_VL: 14622 case RISCVISD::VWADD_W_VL: 14623 case RISCVISD::VWADDU_W_VL: 14624 case RISCVISD::SUB_VL: 14625 case RISCVISD::VWSUB_W_VL: 14626 case RISCVISD::VWSUBU_W_VL: 14627 // Vector Widening Floating-Point Add/Sub/Mul Instructions 14628 case RISCVISD::FADD_VL: 14629 case RISCVISD::FSUB_VL: 14630 case RISCVISD::FMUL_VL: 14631 case RISCVISD::VFWADD_W_VL: 14632 case RISCVISD::VFWSUB_W_VL: 14633 return true; 14634 case ISD::SHL: 14635 return Root->getValueType(0).isScalableVector() && 14636 Subtarget.hasStdExtZvbb(); 14637 case RISCVISD::SHL_VL: 14638 return Subtarget.hasStdExtZvbb(); 14639 default: 14640 return false; 14641 } 14642 } 14643 14644 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). 14645 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG, 14646 const RISCVSubtarget &Subtarget) { 14647 assert(isSupportedRoot(Root, Subtarget) && 14648 "Trying to build an helper with an " 14649 "unsupported root"); 14650 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS"); 14651 assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(0))); 14652 OrigOperand = Root->getOperand(OperandIdx); 14653 14654 unsigned Opc = Root->getOpcode(); 14655 switch (Opc) { 14656 // We consider 14657 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS)) 14658 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS)) 14659 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS)) 14660 case RISCVISD::VWADD_W_VL: 14661 case RISCVISD::VWADDU_W_VL: 14662 case RISCVISD::VWSUB_W_VL: 14663 case RISCVISD::VWSUBU_W_VL: 14664 case RISCVISD::VFWADD_W_VL: 14665 case RISCVISD::VFWSUB_W_VL: 14666 if (OperandIdx == 1) { 14667 SupportsZExt = 14668 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; 14669 SupportsSExt = 14670 Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWSUB_W_VL; 14671 SupportsFPExt = 14672 Opc == RISCVISD::VFWADD_W_VL || Opc == RISCVISD::VFWSUB_W_VL; 14673 // There's no existing extension here, so we don't have to worry about 14674 // making sure it gets removed. 14675 EnforceOneUse = false; 14676 break; 14677 } 14678 [[fallthrough]]; 14679 default: 14680 fillUpExtensionSupport(Root, DAG, Subtarget); 14681 break; 14682 } 14683 } 14684 14685 /// Helper function to get the Mask and VL from \p Root. 14686 static std::pair<SDValue, SDValue> 14687 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG, 14688 const RISCVSubtarget &Subtarget) { 14689 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root"); 14690 switch (Root->getOpcode()) { 14691 case ISD::ADD: 14692 case ISD::SUB: 14693 case ISD::MUL: 14694 case ISD::OR: 14695 case ISD::SHL: { 14696 SDLoc DL(Root); 14697 MVT VT = Root->getSimpleValueType(0); 14698 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget); 14699 } 14700 default: 14701 return std::make_pair(Root->getOperand(3), Root->getOperand(4)); 14702 } 14703 } 14704 14705 /// Helper function to check if \p N is commutative with respect to the 14706 /// foldings that are supported by this class. 14707 static bool isCommutative(const SDNode *N) { 14708 switch (N->getOpcode()) { 14709 case ISD::ADD: 14710 case ISD::MUL: 14711 case ISD::OR: 14712 case RISCVISD::ADD_VL: 14713 case RISCVISD::MUL_VL: 14714 case RISCVISD::VWADD_W_VL: 14715 case RISCVISD::VWADDU_W_VL: 14716 case RISCVISD::FADD_VL: 14717 case RISCVISD::FMUL_VL: 14718 case RISCVISD::VFWADD_W_VL: 14719 return true; 14720 case ISD::SUB: 14721 case RISCVISD::SUB_VL: 14722 case RISCVISD::VWSUB_W_VL: 14723 case RISCVISD::VWSUBU_W_VL: 14724 case RISCVISD::FSUB_VL: 14725 case RISCVISD::VFWSUB_W_VL: 14726 case ISD::SHL: 14727 case RISCVISD::SHL_VL: 14728 return false; 14729 default: 14730 llvm_unreachable("Unexpected opcode"); 14731 } 14732 } 14733 14734 /// Get a list of combine to try for folding extensions in \p Root. 14735 /// Note that each returned CombineToTry function doesn't actually modify 14736 /// anything. Instead they produce an optional CombineResult that if not None, 14737 /// need to be materialized for the combine to be applied. 14738 /// \see CombineResult::materialize. 14739 /// If the related CombineToTry function returns std::nullopt, that means the 14740 /// combine didn't match. 14741 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root); 14742 }; 14743 14744 /// Helper structure that holds all the necessary information to materialize a 14745 /// combine that does some extension folding. 14746 struct CombineResult { 14747 /// Opcode to be generated when materializing the combine. 14748 unsigned TargetOpcode; 14749 // No value means no extension is needed. 14750 std::optional<ExtKind> LHSExt; 14751 std::optional<ExtKind> RHSExt; 14752 /// Root of the combine. 14753 SDNode *Root; 14754 /// LHS of the TargetOpcode. 14755 NodeExtensionHelper LHS; 14756 /// RHS of the TargetOpcode. 14757 NodeExtensionHelper RHS; 14758 14759 CombineResult(unsigned TargetOpcode, SDNode *Root, 14760 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt, 14761 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt) 14762 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root), 14763 LHS(LHS), RHS(RHS) {} 14764 14765 /// Return a value that uses TargetOpcode and that can be used to replace 14766 /// Root. 14767 /// The actual replacement is *not* done in that method. 14768 SDValue materialize(SelectionDAG &DAG, 14769 const RISCVSubtarget &Subtarget) const { 14770 SDValue Mask, VL, Merge; 14771 std::tie(Mask, VL) = 14772 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget); 14773 switch (Root->getOpcode()) { 14774 default: 14775 Merge = Root->getOperand(2); 14776 break; 14777 case ISD::ADD: 14778 case ISD::SUB: 14779 case ISD::MUL: 14780 case ISD::OR: 14781 case ISD::SHL: 14782 Merge = DAG.getUNDEF(Root->getValueType(0)); 14783 break; 14784 } 14785 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), 14786 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt), 14787 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt), 14788 Merge, Mask, VL); 14789 } 14790 }; 14791 14792 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) 14793 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both 14794 /// are zext) and LHS and RHS can be folded into Root. 14795 /// AllowExtMask define which form `ext` can take in this pattern. 14796 /// 14797 /// \note If the pattern can match with both zext and sext, the returned 14798 /// CombineResult will feature the zext result. 14799 /// 14800 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 14801 /// can be used to apply the pattern. 14802 static std::optional<CombineResult> 14803 canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, 14804 const NodeExtensionHelper &RHS, 14805 uint8_t AllowExtMask, SelectionDAG &DAG, 14806 const RISCVSubtarget &Subtarget) { 14807 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt) 14808 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()), 14809 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS, 14810 /*RHSExt=*/{ExtKind::ZExt}); 14811 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt) 14812 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()), 14813 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS, 14814 /*RHSExt=*/{ExtKind::SExt}); 14815 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt) 14816 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()), 14817 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS, 14818 /*RHSExt=*/{ExtKind::FPExt}); 14819 return std::nullopt; 14820 } 14821 14822 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) 14823 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both 14824 /// are zext) and LHS and RHS can be folded into Root. 14825 /// 14826 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 14827 /// can be used to apply the pattern. 14828 static std::optional<CombineResult> 14829 canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, 14830 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 14831 const RISCVSubtarget &Subtarget) { 14832 return canFoldToVWWithSameExtensionImpl( 14833 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG, 14834 Subtarget); 14835 } 14836 14837 /// Check if \p Root follows a pattern Root(LHS, ext(RHS)) 14838 /// 14839 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 14840 /// can be used to apply the pattern. 14841 static std::optional<CombineResult> 14842 canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, 14843 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 14844 const RISCVSubtarget &Subtarget) { 14845 if (RHS.SupportsFPExt) 14846 return CombineResult( 14847 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt), 14848 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt}); 14849 14850 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar 14851 // sext/zext? 14852 // Control this behavior behind an option (AllowSplatInVW_W) for testing 14853 // purposes. 14854 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W)) 14855 return CombineResult( 14856 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root, 14857 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt}); 14858 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W)) 14859 return CombineResult( 14860 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root, 14861 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt}); 14862 return std::nullopt; 14863 } 14864 14865 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS)) 14866 /// 14867 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 14868 /// can be used to apply the pattern. 14869 static std::optional<CombineResult> 14870 canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, 14871 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 14872 const RISCVSubtarget &Subtarget) { 14873 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG, 14874 Subtarget); 14875 } 14876 14877 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) 14878 /// 14879 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 14880 /// can be used to apply the pattern. 14881 static std::optional<CombineResult> 14882 canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, 14883 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 14884 const RISCVSubtarget &Subtarget) { 14885 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG, 14886 Subtarget); 14887 } 14888 14889 /// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS)) 14890 /// 14891 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 14892 /// can be used to apply the pattern. 14893 static std::optional<CombineResult> 14894 canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS, 14895 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 14896 const RISCVSubtarget &Subtarget) { 14897 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG, 14898 Subtarget); 14899 } 14900 14901 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) 14902 /// 14903 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 14904 /// can be used to apply the pattern. 14905 static std::optional<CombineResult> 14906 canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, 14907 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 14908 const RISCVSubtarget &Subtarget) { 14909 14910 if (!LHS.SupportsSExt || !RHS.SupportsZExt) 14911 return std::nullopt; 14912 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), 14913 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS, 14914 /*RHSExt=*/{ExtKind::ZExt}); 14915 } 14916 14917 SmallVector<NodeExtensionHelper::CombineToTry> 14918 NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { 14919 SmallVector<CombineToTry> Strategies; 14920 switch (Root->getOpcode()) { 14921 case ISD::ADD: 14922 case ISD::SUB: 14923 case ISD::OR: 14924 case RISCVISD::ADD_VL: 14925 case RISCVISD::SUB_VL: 14926 case RISCVISD::FADD_VL: 14927 case RISCVISD::FSUB_VL: 14928 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub 14929 Strategies.push_back(canFoldToVWWithSameExtension); 14930 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w 14931 Strategies.push_back(canFoldToVW_W); 14932 break; 14933 case RISCVISD::FMUL_VL: 14934 Strategies.push_back(canFoldToVWWithSameExtension); 14935 break; 14936 case ISD::MUL: 14937 case RISCVISD::MUL_VL: 14938 // mul -> vwmul(u) 14939 Strategies.push_back(canFoldToVWWithSameExtension); 14940 // mul -> vwmulsu 14941 Strategies.push_back(canFoldToVW_SU); 14942 break; 14943 case ISD::SHL: 14944 case RISCVISD::SHL_VL: 14945 // shl -> vwsll 14946 Strategies.push_back(canFoldToVWWithZEXT); 14947 break; 14948 case RISCVISD::VWADD_W_VL: 14949 case RISCVISD::VWSUB_W_VL: 14950 // vwadd_w|vwsub_w -> vwadd|vwsub 14951 Strategies.push_back(canFoldToVWWithSEXT); 14952 break; 14953 case RISCVISD::VWADDU_W_VL: 14954 case RISCVISD::VWSUBU_W_VL: 14955 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu 14956 Strategies.push_back(canFoldToVWWithZEXT); 14957 break; 14958 case RISCVISD::VFWADD_W_VL: 14959 case RISCVISD::VFWSUB_W_VL: 14960 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub 14961 Strategies.push_back(canFoldToVWWithFPEXT); 14962 break; 14963 default: 14964 llvm_unreachable("Unexpected opcode"); 14965 } 14966 return Strategies; 14967 } 14968 } // End anonymous namespace. 14969 14970 /// Combine a binary operation to its equivalent VW or VW_W form. 14971 /// The supported combines are: 14972 /// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w 14973 /// sub | sub_vl -> vwsub(u) | vwsub(u)_w 14974 /// mul | mul_vl -> vwmul(u) | vwmul_su 14975 /// shl | shl_vl -> vwsll 14976 /// fadd_vl -> vfwadd | vfwadd_w 14977 /// fsub_vl -> vfwsub | vfwsub_w 14978 /// fmul_vl -> vfwmul 14979 /// vwadd_w(u) -> vwadd(u) 14980 /// vwsub_w(u) -> vwsub(u) 14981 /// vfwadd_w -> vfwadd 14982 /// vfwsub_w -> vfwsub 14983 static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, 14984 TargetLowering::DAGCombinerInfo &DCI, 14985 const RISCVSubtarget &Subtarget) { 14986 SelectionDAG &DAG = DCI.DAG; 14987 if (DCI.isBeforeLegalize()) 14988 return SDValue(); 14989 14990 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget)) 14991 return SDValue(); 14992 14993 SmallVector<SDNode *> Worklist; 14994 SmallSet<SDNode *, 8> Inserted; 14995 Worklist.push_back(N); 14996 Inserted.insert(N); 14997 SmallVector<CombineResult> CombinesToApply; 14998 14999 while (!Worklist.empty()) { 15000 SDNode *Root = Worklist.pop_back_val(); 15001 if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget)) 15002 return SDValue(); 15003 15004 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget); 15005 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget); 15006 auto AppendUsersIfNeeded = [&Worklist, 15007 &Inserted](const NodeExtensionHelper &Op) { 15008 if (Op.needToPromoteOtherUsers()) { 15009 for (SDNode *TheUse : Op.OrigOperand->uses()) { 15010 if (Inserted.insert(TheUse).second) 15011 Worklist.push_back(TheUse); 15012 } 15013 } 15014 }; 15015 15016 // Control the compile time by limiting the number of node we look at in 15017 // total. 15018 if (Inserted.size() > ExtensionMaxWebSize) 15019 return SDValue(); 15020 15021 SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies = 15022 NodeExtensionHelper::getSupportedFoldings(Root); 15023 15024 assert(!FoldingStrategies.empty() && "Nothing to be folded"); 15025 bool Matched = false; 15026 for (int Attempt = 0; 15027 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched; 15028 ++Attempt) { 15029 15030 for (NodeExtensionHelper::CombineToTry FoldingStrategy : 15031 FoldingStrategies) { 15032 std::optional<CombineResult> Res = 15033 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget); 15034 if (Res) { 15035 Matched = true; 15036 CombinesToApply.push_back(*Res); 15037 // All the inputs that are extended need to be folded, otherwise 15038 // we would be leaving the old input (since it is may still be used), 15039 // and the new one. 15040 if (Res->LHSExt.has_value()) 15041 AppendUsersIfNeeded(LHS); 15042 if (Res->RHSExt.has_value()) 15043 AppendUsersIfNeeded(RHS); 15044 break; 15045 } 15046 } 15047 std::swap(LHS, RHS); 15048 } 15049 // Right now we do an all or nothing approach. 15050 if (!Matched) 15051 return SDValue(); 15052 } 15053 // Store the value for the replacement of the input node separately. 15054 SDValue InputRootReplacement; 15055 // We do the RAUW after we materialize all the combines, because some replaced 15056 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently, 15057 // some of these nodes may appear in the NodeExtensionHelpers of some of the 15058 // yet-to-be-visited CombinesToApply roots. 15059 SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; 15060 ValuesToReplace.reserve(CombinesToApply.size()); 15061 for (CombineResult Res : CombinesToApply) { 15062 SDValue NewValue = Res.materialize(DAG, Subtarget); 15063 if (!InputRootReplacement) { 15064 assert(Res.Root == N && 15065 "First element is expected to be the current node"); 15066 InputRootReplacement = NewValue; 15067 } else { 15068 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue); 15069 } 15070 } 15071 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) { 15072 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second); 15073 DCI.AddToWorklist(OldNewValues.second.getNode()); 15074 } 15075 return InputRootReplacement; 15076 } 15077 15078 // Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond 15079 // (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond 15080 // y will be the Passthru and cond will be the Mask. 15081 static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) { 15082 unsigned Opc = N->getOpcode(); 15083 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL || 15084 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL); 15085 15086 SDValue Y = N->getOperand(0); 15087 SDValue MergeOp = N->getOperand(1); 15088 unsigned MergeOpc = MergeOp.getOpcode(); 15089 15090 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT) 15091 return SDValue(); 15092 15093 SDValue X = MergeOp->getOperand(1); 15094 15095 if (!MergeOp.hasOneUse()) 15096 return SDValue(); 15097 15098 // Passthru should be undef 15099 SDValue Passthru = N->getOperand(2); 15100 if (!Passthru.isUndef()) 15101 return SDValue(); 15102 15103 // Mask should be all ones 15104 SDValue Mask = N->getOperand(3); 15105 if (Mask.getOpcode() != RISCVISD::VMSET_VL) 15106 return SDValue(); 15107 15108 // False value of MergeOp should be all zeros 15109 SDValue Z = MergeOp->getOperand(2); 15110 15111 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR && 15112 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef())) 15113 Z = Z.getOperand(1); 15114 15115 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode())) 15116 return SDValue(); 15117 15118 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), 15119 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)}, 15120 N->getFlags()); 15121 } 15122 15123 static SDValue performVWADDSUBW_VLCombine(SDNode *N, 15124 TargetLowering::DAGCombinerInfo &DCI, 15125 const RISCVSubtarget &Subtarget) { 15126 [[maybe_unused]] unsigned Opc = N->getOpcode(); 15127 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL || 15128 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL); 15129 15130 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 15131 return V; 15132 15133 return combineVWADDSUBWSelect(N, DCI.DAG); 15134 } 15135 15136 // Helper function for performMemPairCombine. 15137 // Try to combine the memory loads/stores LSNode1 and LSNode2 15138 // into a single memory pair operation. 15139 static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, 15140 LSBaseSDNode *LSNode2, SDValue BasePtr, 15141 uint64_t Imm) { 15142 SmallPtrSet<const SDNode *, 32> Visited; 15143 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2}; 15144 15145 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) || 15146 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist)) 15147 return SDValue(); 15148 15149 MachineFunction &MF = DAG.getMachineFunction(); 15150 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); 15151 15152 // The new operation has twice the width. 15153 MVT XLenVT = Subtarget.getXLenVT(); 15154 EVT MemVT = LSNode1->getMemoryVT(); 15155 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128; 15156 MachineMemOperand *MMO = LSNode1->getMemOperand(); 15157 MachineMemOperand *NewMMO = MF.getMachineMemOperand( 15158 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16); 15159 15160 if (LSNode1->getOpcode() == ISD::LOAD) { 15161 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType(); 15162 unsigned Opcode; 15163 if (MemVT == MVT::i32) 15164 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD; 15165 else 15166 Opcode = RISCVISD::TH_LDD; 15167 15168 SDValue Res = DAG.getMemIntrinsicNode( 15169 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}), 15170 {LSNode1->getChain(), BasePtr, 15171 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)}, 15172 NewMemVT, NewMMO); 15173 15174 SDValue Node1 = 15175 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1)); 15176 SDValue Node2 = 15177 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2)); 15178 15179 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode()); 15180 return Node1; 15181 } else { 15182 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD; 15183 15184 SDValue Res = DAG.getMemIntrinsicNode( 15185 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other), 15186 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1), 15187 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)}, 15188 NewMemVT, NewMMO); 15189 15190 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode()); 15191 return Res; 15192 } 15193 } 15194 15195 // Try to combine two adjacent loads/stores to a single pair instruction from 15196 // the XTHeadMemPair vendor extension. 15197 static SDValue performMemPairCombine(SDNode *N, 15198 TargetLowering::DAGCombinerInfo &DCI) { 15199 SelectionDAG &DAG = DCI.DAG; 15200 MachineFunction &MF = DAG.getMachineFunction(); 15201 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); 15202 15203 // Target does not support load/store pair. 15204 if (!Subtarget.hasVendorXTHeadMemPair()) 15205 return SDValue(); 15206 15207 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N); 15208 EVT MemVT = LSNode1->getMemoryVT(); 15209 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2; 15210 15211 // No volatile, indexed or atomic loads/stores. 15212 if (!LSNode1->isSimple() || LSNode1->isIndexed()) 15213 return SDValue(); 15214 15215 // Function to get a base + constant representation from a memory value. 15216 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> { 15217 if (Ptr->getOpcode() == ISD::ADD) 15218 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) 15219 return {Ptr->getOperand(0), C1->getZExtValue()}; 15220 return {Ptr, 0}; 15221 }; 15222 15223 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum)); 15224 15225 SDValue Chain = N->getOperand(0); 15226 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end(); 15227 UI != UE; ++UI) { 15228 SDUse &Use = UI.getUse(); 15229 if (Use.getUser() != N && Use.getResNo() == 0 && 15230 Use.getUser()->getOpcode() == N->getOpcode()) { 15231 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser()); 15232 15233 // No volatile, indexed or atomic loads/stores. 15234 if (!LSNode2->isSimple() || LSNode2->isIndexed()) 15235 continue; 15236 15237 // Check if LSNode1 and LSNode2 have the same type and extension. 15238 if (LSNode1->getOpcode() == ISD::LOAD) 15239 if (cast<LoadSDNode>(LSNode2)->getExtensionType() != 15240 cast<LoadSDNode>(LSNode1)->getExtensionType()) 15241 continue; 15242 15243 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT()) 15244 continue; 15245 15246 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum)); 15247 15248 // Check if the base pointer is the same for both instruction. 15249 if (Base1 != Base2) 15250 continue; 15251 15252 // Check if the offsets match the XTHeadMemPair encoding contraints. 15253 bool Valid = false; 15254 if (MemVT == MVT::i32) { 15255 // Check for adjacent i32 values and a 2-bit index. 15256 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1)) 15257 Valid = true; 15258 } else if (MemVT == MVT::i64) { 15259 // Check for adjacent i64 values and a 2-bit index. 15260 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1)) 15261 Valid = true; 15262 } 15263 15264 if (!Valid) 15265 continue; 15266 15267 // Try to combine. 15268 if (SDValue Res = 15269 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1)) 15270 return Res; 15271 } 15272 } 15273 15274 return SDValue(); 15275 } 15276 15277 // Fold 15278 // (fp_to_int (froundeven X)) -> fcvt X, rne 15279 // (fp_to_int (ftrunc X)) -> fcvt X, rtz 15280 // (fp_to_int (ffloor X)) -> fcvt X, rdn 15281 // (fp_to_int (fceil X)) -> fcvt X, rup 15282 // (fp_to_int (fround X)) -> fcvt X, rmm 15283 // (fp_to_int (frint X)) -> fcvt X 15284 static SDValue performFP_TO_INTCombine(SDNode *N, 15285 TargetLowering::DAGCombinerInfo &DCI, 15286 const RISCVSubtarget &Subtarget) { 15287 SelectionDAG &DAG = DCI.DAG; 15288 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 15289 MVT XLenVT = Subtarget.getXLenVT(); 15290 15291 SDValue Src = N->getOperand(0); 15292 15293 // Don't do this for strict-fp Src. 15294 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) 15295 return SDValue(); 15296 15297 // Ensure the FP type is legal. 15298 if (!TLI.isTypeLegal(Src.getValueType())) 15299 return SDValue(); 15300 15301 // Don't do this for f16 with Zfhmin and not Zfh. 15302 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) 15303 return SDValue(); 15304 15305 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); 15306 // If the result is invalid, we didn't find a foldable instruction. 15307 if (FRM == RISCVFPRndMode::Invalid) 15308 return SDValue(); 15309 15310 SDLoc DL(N); 15311 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; 15312 EVT VT = N->getValueType(0); 15313 15314 if (VT.isVector() && TLI.isTypeLegal(VT)) { 15315 MVT SrcVT = Src.getSimpleValueType(); 15316 MVT SrcContainerVT = SrcVT; 15317 MVT ContainerVT = VT.getSimpleVT(); 15318 SDValue XVal = Src.getOperand(0); 15319 15320 // For widening and narrowing conversions we just combine it into a 15321 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They 15322 // end up getting lowered to their appropriate pseudo instructions based on 15323 // their operand types 15324 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 || 15325 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits()) 15326 return SDValue(); 15327 15328 // Make fixed-length vectors scalable first 15329 if (SrcVT.isFixedLengthVector()) { 15330 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 15331 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget); 15332 ContainerVT = 15333 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); 15334 } 15335 15336 auto [Mask, VL] = 15337 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget); 15338 15339 SDValue FpToInt; 15340 if (FRM == RISCVFPRndMode::RTZ) { 15341 // Use the dedicated trunc static rounding mode if we're truncating so we 15342 // don't need to generate calls to fsrmi/fsrm 15343 unsigned Opc = 15344 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; 15345 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); 15346 } else if (FRM == RISCVFPRndMode::DYN) { 15347 unsigned Opc = 15348 IsSigned ? RISCVISD::VFCVT_X_F_VL : RISCVISD::VFCVT_XU_F_VL; 15349 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); 15350 } else { 15351 unsigned Opc = 15352 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL; 15353 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, 15354 DAG.getTargetConstant(FRM, DL, XLenVT), VL); 15355 } 15356 15357 // If converted from fixed-length to scalable, convert back 15358 if (VT.isFixedLengthVector()) 15359 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget); 15360 15361 return FpToInt; 15362 } 15363 15364 // Only handle XLen or i32 types. Other types narrower than XLen will 15365 // eventually be legalized to XLenVT. 15366 if (VT != MVT::i32 && VT != XLenVT) 15367 return SDValue(); 15368 15369 unsigned Opc; 15370 if (VT == XLenVT) 15371 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 15372 else 15373 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 15374 15375 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0), 15376 DAG.getTargetConstant(FRM, DL, XLenVT)); 15377 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt); 15378 } 15379 15380 // Fold 15381 // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne)) 15382 // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz)) 15383 // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn)) 15384 // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup)) 15385 // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm)) 15386 // (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn)) 15387 static SDValue performFP_TO_INT_SATCombine(SDNode *N, 15388 TargetLowering::DAGCombinerInfo &DCI, 15389 const RISCVSubtarget &Subtarget) { 15390 SelectionDAG &DAG = DCI.DAG; 15391 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 15392 MVT XLenVT = Subtarget.getXLenVT(); 15393 15394 // Only handle XLen types. Other types narrower than XLen will eventually be 15395 // legalized to XLenVT. 15396 EVT DstVT = N->getValueType(0); 15397 if (DstVT != XLenVT) 15398 return SDValue(); 15399 15400 SDValue Src = N->getOperand(0); 15401 15402 // Don't do this for strict-fp Src. 15403 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) 15404 return SDValue(); 15405 15406 // Ensure the FP type is also legal. 15407 if (!TLI.isTypeLegal(Src.getValueType())) 15408 return SDValue(); 15409 15410 // Don't do this for f16 with Zfhmin and not Zfh. 15411 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) 15412 return SDValue(); 15413 15414 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 15415 15416 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); 15417 if (FRM == RISCVFPRndMode::Invalid) 15418 return SDValue(); 15419 15420 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT; 15421 15422 unsigned Opc; 15423 if (SatVT == DstVT) 15424 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 15425 else if (DstVT == MVT::i64 && SatVT == MVT::i32) 15426 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 15427 else 15428 return SDValue(); 15429 // FIXME: Support other SatVTs by clamping before or after the conversion. 15430 15431 Src = Src.getOperand(0); 15432 15433 SDLoc DL(N); 15434 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src, 15435 DAG.getTargetConstant(FRM, DL, XLenVT)); 15436 15437 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero 15438 // extend. 15439 if (Opc == RISCVISD::FCVT_WU_RV64) 15440 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); 15441 15442 // RISC-V FP-to-int conversions saturate to the destination register size, but 15443 // don't produce 0 for nan. 15444 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); 15445 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); 15446 } 15447 15448 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is 15449 // smaller than XLenVT. 15450 static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, 15451 const RISCVSubtarget &Subtarget) { 15452 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension"); 15453 15454 SDValue Src = N->getOperand(0); 15455 if (Src.getOpcode() != ISD::BSWAP) 15456 return SDValue(); 15457 15458 EVT VT = N->getValueType(0); 15459 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() || 15460 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits())) 15461 return SDValue(); 15462 15463 SDLoc DL(N); 15464 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0)); 15465 } 15466 15467 // Convert from one FMA opcode to another based on whether we are negating the 15468 // multiply result and/or the accumulator. 15469 // NOTE: Only supports RVV operations with VL. 15470 static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { 15471 // Negating the multiply result changes ADD<->SUB and toggles 'N'. 15472 if (NegMul) { 15473 // clang-format off 15474 switch (Opcode) { 15475 default: llvm_unreachable("Unexpected opcode"); 15476 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; 15477 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; 15478 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; 15479 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; 15480 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; 15481 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; 15482 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; 15483 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; 15484 } 15485 // clang-format on 15486 } 15487 15488 // Negating the accumulator changes ADD<->SUB. 15489 if (NegAcc) { 15490 // clang-format off 15491 switch (Opcode) { 15492 default: llvm_unreachable("Unexpected opcode"); 15493 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; 15494 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; 15495 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; 15496 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; 15497 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; 15498 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; 15499 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; 15500 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; 15501 } 15502 // clang-format on 15503 } 15504 15505 return Opcode; 15506 } 15507 15508 static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) { 15509 // Fold FNEG_VL into FMA opcodes. 15510 // The first operand of strict-fp is chain. 15511 unsigned Offset = N->isTargetStrictFPOpcode(); 15512 SDValue A = N->getOperand(0 + Offset); 15513 SDValue B = N->getOperand(1 + Offset); 15514 SDValue C = N->getOperand(2 + Offset); 15515 SDValue Mask = N->getOperand(3 + Offset); 15516 SDValue VL = N->getOperand(4 + Offset); 15517 15518 auto invertIfNegative = [&Mask, &VL](SDValue &V) { 15519 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask && 15520 V.getOperand(2) == VL) { 15521 // Return the negated input. 15522 V = V.getOperand(0); 15523 return true; 15524 } 15525 15526 return false; 15527 }; 15528 15529 bool NegA = invertIfNegative(A); 15530 bool NegB = invertIfNegative(B); 15531 bool NegC = invertIfNegative(C); 15532 15533 // If no operands are negated, we're done. 15534 if (!NegA && !NegB && !NegC) 15535 return SDValue(); 15536 15537 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC); 15538 if (N->isTargetStrictFPOpcode()) 15539 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(), 15540 {N->getOperand(0), A, B, C, Mask, VL}); 15541 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask, 15542 VL); 15543 } 15544 15545 static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, 15546 const RISCVSubtarget &Subtarget) { 15547 if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG)) 15548 return V; 15549 15550 if (N->getValueType(0).getVectorElementType() == MVT::f32 && 15551 !Subtarget.hasVInstructionsF16()) 15552 return SDValue(); 15553 15554 // FIXME: Ignore strict opcodes for now. 15555 if (N->isTargetStrictFPOpcode()) 15556 return SDValue(); 15557 15558 // Try to form widening FMA. 15559 SDValue Op0 = N->getOperand(0); 15560 SDValue Op1 = N->getOperand(1); 15561 SDValue Mask = N->getOperand(3); 15562 SDValue VL = N->getOperand(4); 15563 15564 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL || 15565 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL) 15566 return SDValue(); 15567 15568 // TODO: Refactor to handle more complex cases similar to 15569 // combineBinOp_VLToVWBinOp_VL. 15570 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) && 15571 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0))) 15572 return SDValue(); 15573 15574 // Check the mask and VL are the same. 15575 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL || 15576 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL) 15577 return SDValue(); 15578 15579 unsigned NewOpc; 15580 switch (N->getOpcode()) { 15581 default: 15582 llvm_unreachable("Unexpected opcode"); 15583 case RISCVISD::VFMADD_VL: 15584 NewOpc = RISCVISD::VFWMADD_VL; 15585 break; 15586 case RISCVISD::VFNMSUB_VL: 15587 NewOpc = RISCVISD::VFWNMSUB_VL; 15588 break; 15589 case RISCVISD::VFNMADD_VL: 15590 NewOpc = RISCVISD::VFWNMADD_VL; 15591 break; 15592 case RISCVISD::VFMSUB_VL: 15593 NewOpc = RISCVISD::VFWMSUB_VL; 15594 break; 15595 } 15596 15597 Op0 = Op0.getOperand(0); 15598 Op1 = Op1.getOperand(0); 15599 15600 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1, 15601 N->getOperand(2), Mask, VL); 15602 } 15603 15604 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 15605 const RISCVSubtarget &Subtarget) { 15606 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode"); 15607 15608 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit()) 15609 return SDValue(); 15610 15611 if (!isa<ConstantSDNode>(N->getOperand(1))) 15612 return SDValue(); 15613 uint64_t ShAmt = N->getConstantOperandVal(1); 15614 if (ShAmt > 32) 15615 return SDValue(); 15616 15617 SDValue N0 = N->getOperand(0); 15618 15619 // Combine (sra (sext_inreg (shl X, C1), i32), C2) -> 15620 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of 15621 // SLLIW+SRAIW. SLLI+SRAI have compressed forms. 15622 if (ShAmt < 32 && 15623 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() && 15624 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 && 15625 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() && 15626 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { 15627 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1); 15628 if (LShAmt < 32) { 15629 SDLoc ShlDL(N0.getOperand(0)); 15630 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64, 15631 N0.getOperand(0).getOperand(0), 15632 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64)); 15633 SDLoc DL(N); 15634 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl, 15635 DAG.getConstant(ShAmt + 32, DL, MVT::i64)); 15636 } 15637 } 15638 15639 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C) 15640 // FIXME: Should this be a generic combine? There's a similar combine on X86. 15641 // 15642 // Also try these folds where an add or sub is in the middle. 15643 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C) 15644 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C) 15645 SDValue Shl; 15646 ConstantSDNode *AddC = nullptr; 15647 15648 // We might have an ADD or SUB between the SRA and SHL. 15649 bool IsAdd = N0.getOpcode() == ISD::ADD; 15650 if ((IsAdd || N0.getOpcode() == ISD::SUB)) { 15651 // Other operand needs to be a constant we can modify. 15652 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0)); 15653 if (!AddC) 15654 return SDValue(); 15655 15656 // AddC needs to have at least 32 trailing zeros. 15657 if (AddC->getAPIntValue().countr_zero() < 32) 15658 return SDValue(); 15659 15660 // All users should be a shift by constant less than or equal to 32. This 15661 // ensures we'll do this optimization for each of them to produce an 15662 // add/sub+sext_inreg they can all share. 15663 for (SDNode *U : N0->uses()) { 15664 if (U->getOpcode() != ISD::SRA || 15665 !isa<ConstantSDNode>(U->getOperand(1)) || 15666 U->getConstantOperandVal(1) > 32) 15667 return SDValue(); 15668 } 15669 15670 Shl = N0.getOperand(IsAdd ? 0 : 1); 15671 } else { 15672 // Not an ADD or SUB. 15673 Shl = N0; 15674 } 15675 15676 // Look for a shift left by 32. 15677 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) || 15678 Shl.getConstantOperandVal(1) != 32) 15679 return SDValue(); 15680 15681 // We if we didn't look through an add/sub, then the shl should have one use. 15682 // If we did look through an add/sub, the sext_inreg we create is free so 15683 // we're only creating 2 new instructions. It's enough to only remove the 15684 // original sra+add/sub. 15685 if (!AddC && !Shl.hasOneUse()) 15686 return SDValue(); 15687 15688 SDLoc DL(N); 15689 SDValue In = Shl.getOperand(0); 15690 15691 // If we looked through an ADD or SUB, we need to rebuild it with the shifted 15692 // constant. 15693 if (AddC) { 15694 SDValue ShiftedAddC = 15695 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64); 15696 if (IsAdd) 15697 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC); 15698 else 15699 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In); 15700 } 15701 15702 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In, 15703 DAG.getValueType(MVT::i32)); 15704 if (ShAmt == 32) 15705 return SExt; 15706 15707 return DAG.getNode( 15708 ISD::SHL, DL, MVT::i64, SExt, 15709 DAG.getConstant(32 - ShAmt, DL, MVT::i64)); 15710 } 15711 15712 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if 15713 // the result is used as the conditon of a br_cc or select_cc we can invert, 15714 // inverting the setcc is free, and Z is 0/1. Caller will invert the 15715 // br_cc/select_cc. 15716 static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) { 15717 bool IsAnd = Cond.getOpcode() == ISD::AND; 15718 if (!IsAnd && Cond.getOpcode() != ISD::OR) 15719 return SDValue(); 15720 15721 if (!Cond.hasOneUse()) 15722 return SDValue(); 15723 15724 SDValue Setcc = Cond.getOperand(0); 15725 SDValue Xor = Cond.getOperand(1); 15726 // Canonicalize setcc to LHS. 15727 if (Setcc.getOpcode() != ISD::SETCC) 15728 std::swap(Setcc, Xor); 15729 // LHS should be a setcc and RHS should be an xor. 15730 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() || 15731 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) 15732 return SDValue(); 15733 15734 // If the condition is an And, SimplifyDemandedBits may have changed 15735 // (xor Z, 1) to (not Z). 15736 SDValue Xor1 = Xor.getOperand(1); 15737 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1))) 15738 return SDValue(); 15739 15740 EVT VT = Cond.getValueType(); 15741 SDValue Xor0 = Xor.getOperand(0); 15742 15743 // The LHS of the xor needs to be 0/1. 15744 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); 15745 if (!DAG.MaskedValueIsZero(Xor0, Mask)) 15746 return SDValue(); 15747 15748 // We can only invert integer setccs. 15749 EVT SetCCOpVT = Setcc.getOperand(0).getValueType(); 15750 if (!SetCCOpVT.isScalarInteger()) 15751 return SDValue(); 15752 15753 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get(); 15754 if (ISD::isIntEqualitySetCC(CCVal)) { 15755 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT); 15756 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0), 15757 Setcc.getOperand(1), CCVal); 15758 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) { 15759 // Invert (setlt 0, X) by converting to (setlt X, 1). 15760 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1), 15761 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal); 15762 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) { 15763 // (setlt X, 1) by converting to (setlt 0, X). 15764 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, 15765 DAG.getConstant(0, SDLoc(Setcc), VT), 15766 Setcc.getOperand(0), CCVal); 15767 } else 15768 return SDValue(); 15769 15770 unsigned Opc = IsAnd ? ISD::OR : ISD::AND; 15771 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0)); 15772 } 15773 15774 // Perform common combines for BR_CC and SELECT_CC condtions. 15775 static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, 15776 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { 15777 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 15778 15779 // As far as arithmetic right shift always saves the sign, 15780 // shift can be omitted. 15781 // Fold setlt (sra X, N), 0 -> setlt X, 0 and 15782 // setge (sra X, N), 0 -> setge X, 0 15783 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) && 15784 LHS.getOpcode() == ISD::SRA) { 15785 LHS = LHS.getOperand(0); 15786 return true; 15787 } 15788 15789 if (!ISD::isIntEqualitySetCC(CCVal)) 15790 return false; 15791 15792 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt) 15793 // Sometimes the setcc is introduced after br_cc/select_cc has been formed. 15794 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 15795 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 15796 // If we're looking for eq 0 instead of ne 0, we need to invert the 15797 // condition. 15798 bool Invert = CCVal == ISD::SETEQ; 15799 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 15800 if (Invert) 15801 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 15802 15803 RHS = LHS.getOperand(1); 15804 LHS = LHS.getOperand(0); 15805 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 15806 15807 CC = DAG.getCondCode(CCVal); 15808 return true; 15809 } 15810 15811 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne) 15812 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) { 15813 RHS = LHS.getOperand(1); 15814 LHS = LHS.getOperand(0); 15815 return true; 15816 } 15817 15818 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt) 15819 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() && 15820 LHS.getOperand(1).getOpcode() == ISD::Constant) { 15821 SDValue LHS0 = LHS.getOperand(0); 15822 if (LHS0.getOpcode() == ISD::AND && 15823 LHS0.getOperand(1).getOpcode() == ISD::Constant) { 15824 uint64_t Mask = LHS0.getConstantOperandVal(1); 15825 uint64_t ShAmt = LHS.getConstantOperandVal(1); 15826 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) { 15827 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; 15828 CC = DAG.getCondCode(CCVal); 15829 15830 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt; 15831 LHS = LHS0.getOperand(0); 15832 if (ShAmt != 0) 15833 LHS = 15834 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0), 15835 DAG.getConstant(ShAmt, DL, LHS.getValueType())); 15836 return true; 15837 } 15838 } 15839 } 15840 15841 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1. 15842 // This can occur when legalizing some floating point comparisons. 15843 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 15844 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 15845 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 15846 CC = DAG.getCondCode(CCVal); 15847 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 15848 return true; 15849 } 15850 15851 if (isNullConstant(RHS)) { 15852 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) { 15853 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 15854 CC = DAG.getCondCode(CCVal); 15855 LHS = NewCond; 15856 return true; 15857 } 15858 } 15859 15860 return false; 15861 } 15862 15863 // Fold 15864 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)). 15865 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)). 15866 // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)). 15867 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)). 15868 static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, 15869 SDValue TrueVal, SDValue FalseVal, 15870 bool Swapped) { 15871 bool Commutative = true; 15872 unsigned Opc = TrueVal.getOpcode(); 15873 switch (Opc) { 15874 default: 15875 return SDValue(); 15876 case ISD::SHL: 15877 case ISD::SRA: 15878 case ISD::SRL: 15879 case ISD::SUB: 15880 Commutative = false; 15881 break; 15882 case ISD::ADD: 15883 case ISD::OR: 15884 case ISD::XOR: 15885 break; 15886 } 15887 15888 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal)) 15889 return SDValue(); 15890 15891 unsigned OpToFold; 15892 if (FalseVal == TrueVal.getOperand(0)) 15893 OpToFold = 0; 15894 else if (Commutative && FalseVal == TrueVal.getOperand(1)) 15895 OpToFold = 1; 15896 else 15897 return SDValue(); 15898 15899 EVT VT = N->getValueType(0); 15900 SDLoc DL(N); 15901 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold); 15902 EVT OtherOpVT = OtherOp.getValueType(); 15903 SDValue IdentityOperand = 15904 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags()); 15905 if (!Commutative) 15906 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT); 15907 assert(IdentityOperand && "No identity operand!"); 15908 15909 if (Swapped) 15910 std::swap(OtherOp, IdentityOperand); 15911 SDValue NewSel = 15912 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand); 15913 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel); 15914 } 15915 15916 // This tries to get rid of `select` and `icmp` that are being used to handle 15917 // `Targets` that do not support `cttz(0)`/`ctlz(0)`. 15918 static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) { 15919 SDValue Cond = N->getOperand(0); 15920 15921 // This represents either CTTZ or CTLZ instruction. 15922 SDValue CountZeroes; 15923 15924 SDValue ValOnZero; 15925 15926 if (Cond.getOpcode() != ISD::SETCC) 15927 return SDValue(); 15928 15929 if (!isNullConstant(Cond->getOperand(1))) 15930 return SDValue(); 15931 15932 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get(); 15933 if (CCVal == ISD::CondCode::SETEQ) { 15934 CountZeroes = N->getOperand(2); 15935 ValOnZero = N->getOperand(1); 15936 } else if (CCVal == ISD::CondCode::SETNE) { 15937 CountZeroes = N->getOperand(1); 15938 ValOnZero = N->getOperand(2); 15939 } else { 15940 return SDValue(); 15941 } 15942 15943 if (CountZeroes.getOpcode() == ISD::TRUNCATE || 15944 CountZeroes.getOpcode() == ISD::ZERO_EXTEND) 15945 CountZeroes = CountZeroes.getOperand(0); 15946 15947 if (CountZeroes.getOpcode() != ISD::CTTZ && 15948 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF && 15949 CountZeroes.getOpcode() != ISD::CTLZ && 15950 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF) 15951 return SDValue(); 15952 15953 if (!isNullConstant(ValOnZero)) 15954 return SDValue(); 15955 15956 SDValue CountZeroesArgument = CountZeroes->getOperand(0); 15957 if (Cond->getOperand(0) != CountZeroesArgument) 15958 return SDValue(); 15959 15960 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { 15961 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes), 15962 CountZeroes.getValueType(), CountZeroesArgument); 15963 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) { 15964 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes), 15965 CountZeroes.getValueType(), CountZeroesArgument); 15966 } 15967 15968 unsigned BitWidth = CountZeroes.getValueSizeInBits(); 15969 SDValue BitWidthMinusOne = 15970 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType()); 15971 15972 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(), 15973 CountZeroes, BitWidthMinusOne); 15974 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0)); 15975 } 15976 15977 static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, 15978 const RISCVSubtarget &Subtarget) { 15979 SDValue Cond = N->getOperand(0); 15980 SDValue True = N->getOperand(1); 15981 SDValue False = N->getOperand(2); 15982 SDLoc DL(N); 15983 EVT VT = N->getValueType(0); 15984 EVT CondVT = Cond.getValueType(); 15985 15986 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 15987 return SDValue(); 15988 15989 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate 15990 // BEXTI, where C is power of 2. 15991 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() && 15992 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) { 15993 SDValue LHS = Cond.getOperand(0); 15994 SDValue RHS = Cond.getOperand(1); 15995 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 15996 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND && 15997 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) { 15998 const APInt &MaskVal = LHS.getConstantOperandAPInt(1); 15999 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12)) 16000 return DAG.getSelect(DL, VT, 16001 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE), 16002 False, True); 16003 } 16004 } 16005 return SDValue(); 16006 } 16007 16008 static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, 16009 const RISCVSubtarget &Subtarget) { 16010 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG)) 16011 return Folded; 16012 16013 if (SDValue V = useInversedSetcc(N, DAG, Subtarget)) 16014 return V; 16015 16016 if (Subtarget.hasConditionalMoveFusion()) 16017 return SDValue(); 16018 16019 SDValue TrueVal = N->getOperand(1); 16020 SDValue FalseVal = N->getOperand(2); 16021 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false)) 16022 return V; 16023 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true); 16024 } 16025 16026 /// If we have a build_vector where each lane is binop X, C, where C 16027 /// is a constant (but not necessarily the same constant on all lanes), 16028 /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..). 16029 /// We assume that materializing a constant build vector will be no more 16030 /// expensive that performing O(n) binops. 16031 static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, 16032 const RISCVSubtarget &Subtarget, 16033 const RISCVTargetLowering &TLI) { 16034 SDLoc DL(N); 16035 EVT VT = N->getValueType(0); 16036 16037 assert(!VT.isScalableVector() && "unexpected build vector"); 16038 16039 if (VT.getVectorNumElements() == 1) 16040 return SDValue(); 16041 16042 const unsigned Opcode = N->op_begin()->getNode()->getOpcode(); 16043 if (!TLI.isBinOp(Opcode)) 16044 return SDValue(); 16045 16046 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT)) 16047 return SDValue(); 16048 16049 // This BUILD_VECTOR involves an implicit truncation, and sinking 16050 // truncates through binops is non-trivial. 16051 if (N->op_begin()->getValueType() != VT.getVectorElementType()) 16052 return SDValue(); 16053 16054 SmallVector<SDValue> LHSOps; 16055 SmallVector<SDValue> RHSOps; 16056 for (SDValue Op : N->ops()) { 16057 if (Op.isUndef()) { 16058 // We can't form a divide or remainder from undef. 16059 if (!DAG.isSafeToSpeculativelyExecute(Opcode)) 16060 return SDValue(); 16061 16062 LHSOps.push_back(Op); 16063 RHSOps.push_back(Op); 16064 continue; 16065 } 16066 16067 // TODO: We can handle operations which have an neutral rhs value 16068 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track 16069 // of profit in a more explicit manner. 16070 if (Op.getOpcode() != Opcode || !Op.hasOneUse()) 16071 return SDValue(); 16072 16073 LHSOps.push_back(Op.getOperand(0)); 16074 if (!isa<ConstantSDNode>(Op.getOperand(1)) && 16075 !isa<ConstantFPSDNode>(Op.getOperand(1))) 16076 return SDValue(); 16077 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may 16078 // have different LHS and RHS types. 16079 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType()) 16080 return SDValue(); 16081 16082 RHSOps.push_back(Op.getOperand(1)); 16083 } 16084 16085 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps), 16086 DAG.getBuildVector(VT, DL, RHSOps)); 16087 } 16088 16089 static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, 16090 const RISCVSubtarget &Subtarget, 16091 const RISCVTargetLowering &TLI) { 16092 SDValue InVec = N->getOperand(0); 16093 SDValue InVal = N->getOperand(1); 16094 SDValue EltNo = N->getOperand(2); 16095 SDLoc DL(N); 16096 16097 EVT VT = InVec.getValueType(); 16098 if (VT.isScalableVector()) 16099 return SDValue(); 16100 16101 if (!InVec.hasOneUse()) 16102 return SDValue(); 16103 16104 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt 16105 // move the insert_vector_elts into the arms of the binop. Note that 16106 // the new RHS must be a constant. 16107 const unsigned InVecOpcode = InVec->getOpcode(); 16108 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) && 16109 InVal.hasOneUse()) { 16110 SDValue InVecLHS = InVec->getOperand(0); 16111 SDValue InVecRHS = InVec->getOperand(1); 16112 SDValue InValLHS = InVal->getOperand(0); 16113 SDValue InValRHS = InVal->getOperand(1); 16114 16115 if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode())) 16116 return SDValue(); 16117 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS)) 16118 return SDValue(); 16119 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may 16120 // have different LHS and RHS types. 16121 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType()) 16122 return SDValue(); 16123 SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, 16124 InVecLHS, InValLHS, EltNo); 16125 SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, 16126 InVecRHS, InValRHS, EltNo); 16127 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS); 16128 } 16129 16130 // Given insert_vector_elt (concat_vectors ...), InVal, Elt 16131 // move the insert_vector_elt to the source operand of the concat_vector. 16132 if (InVec.getOpcode() != ISD::CONCAT_VECTORS) 16133 return SDValue(); 16134 16135 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); 16136 if (!IndexC) 16137 return SDValue(); 16138 unsigned Elt = IndexC->getZExtValue(); 16139 16140 EVT ConcatVT = InVec.getOperand(0).getValueType(); 16141 if (ConcatVT.getVectorElementType() != InVal.getValueType()) 16142 return SDValue(); 16143 unsigned ConcatNumElts = ConcatVT.getVectorNumElements(); 16144 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL); 16145 16146 unsigned ConcatOpIdx = Elt / ConcatNumElts; 16147 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx); 16148 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT, 16149 ConcatOp, InVal, NewIdx); 16150 16151 SmallVector<SDValue> ConcatOps; 16152 ConcatOps.append(InVec->op_begin(), InVec->op_end()); 16153 ConcatOps[ConcatOpIdx] = ConcatOp; 16154 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); 16155 } 16156 16157 // If we're concatenating a series of vector loads like 16158 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ... 16159 // Then we can turn this into a strided load by widening the vector elements 16160 // vlse32 p, stride=n 16161 static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, 16162 const RISCVSubtarget &Subtarget, 16163 const RISCVTargetLowering &TLI) { 16164 SDLoc DL(N); 16165 EVT VT = N->getValueType(0); 16166 16167 // Only perform this combine on legal MVTs. 16168 if (!TLI.isTypeLegal(VT)) 16169 return SDValue(); 16170 16171 // TODO: Potentially extend this to scalable vectors 16172 if (VT.isScalableVector()) 16173 return SDValue(); 16174 16175 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0)); 16176 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) || 16177 !SDValue(BaseLd, 0).hasOneUse()) 16178 return SDValue(); 16179 16180 EVT BaseLdVT = BaseLd->getValueType(0); 16181 16182 // Go through the loads and check that they're strided 16183 SmallVector<LoadSDNode *> Lds; 16184 Lds.push_back(BaseLd); 16185 Align Align = BaseLd->getAlign(); 16186 for (SDValue Op : N->ops().drop_front()) { 16187 auto *Ld = dyn_cast<LoadSDNode>(Op); 16188 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() || 16189 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) || 16190 Ld->getValueType(0) != BaseLdVT) 16191 return SDValue(); 16192 16193 Lds.push_back(Ld); 16194 16195 // The common alignment is the most restrictive (smallest) of all the loads 16196 Align = std::min(Align, Ld->getAlign()); 16197 } 16198 16199 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>; 16200 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1, 16201 LoadSDNode *Ld2) -> std::optional<PtrDiff> { 16202 // If the load ptrs can be decomposed into a common (Base + Index) with a 16203 // common constant stride, then return the constant stride. 16204 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG); 16205 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG); 16206 if (BIO1.equalBaseIndex(BIO2, DAG)) 16207 return {{BIO2.getOffset() - BIO1.getOffset(), false}}; 16208 16209 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride) 16210 SDValue P1 = Ld1->getBasePtr(); 16211 SDValue P2 = Ld2->getBasePtr(); 16212 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1) 16213 return {{P2.getOperand(1), false}}; 16214 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2) 16215 return {{P1.getOperand(1), true}}; 16216 16217 return std::nullopt; 16218 }; 16219 16220 // Get the distance between the first and second loads 16221 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]); 16222 if (!BaseDiff) 16223 return SDValue(); 16224 16225 // Check all the loads are the same distance apart 16226 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++) 16227 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff) 16228 return SDValue(); 16229 16230 // TODO: At this point, we've successfully matched a generalized gather 16231 // load. Maybe we should emit that, and then move the specialized 16232 // matchers above and below into a DAG combine? 16233 16234 // Get the widened scalar type, e.g. v4i8 -> i64 16235 unsigned WideScalarBitWidth = 16236 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements(); 16237 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth); 16238 16239 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64 16240 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands()); 16241 if (!TLI.isTypeLegal(WideVecVT)) 16242 return SDValue(); 16243 16244 // Check that the operation is legal 16245 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align)) 16246 return SDValue(); 16247 16248 auto [StrideVariant, MustNegateStride] = *BaseDiff; 16249 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant) 16250 ? std::get<SDValue>(StrideVariant) 16251 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL, 16252 Lds[0]->getOffset().getValueType()); 16253 if (MustNegateStride) 16254 Stride = DAG.getNegative(Stride, DL, Stride.getValueType()); 16255 16256 SDValue AllOneMask = 16257 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL, 16258 DAG.getConstant(1, DL, MVT::i1)); 16259 16260 uint64_t MemSize; 16261 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride); 16262 ConstStride && ConstStride->getSExtValue() >= 0) 16263 // total size = (elsize * n) + (stride - elsize) * (n-1) 16264 // = elsize + stride * (n-1) 16265 MemSize = WideScalarVT.getSizeInBits() + 16266 ConstStride->getSExtValue() * (N->getNumOperands() - 1); 16267 else 16268 // If Stride isn't constant, then we can't know how much it will load 16269 MemSize = MemoryLocation::UnknownSize; 16270 16271 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( 16272 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize, 16273 Align); 16274 16275 SDValue StridedLoad = DAG.getStridedLoadVP( 16276 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride, 16277 AllOneMask, 16278 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO); 16279 16280 for (SDValue Ld : N->ops()) 16281 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad); 16282 16283 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad); 16284 } 16285 16286 static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, 16287 const RISCVSubtarget &Subtarget) { 16288 16289 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD); 16290 16291 if (N->getValueType(0).isFixedLengthVector()) 16292 return SDValue(); 16293 16294 SDValue Addend = N->getOperand(0); 16295 SDValue MulOp = N->getOperand(1); 16296 16297 if (N->getOpcode() == RISCVISD::ADD_VL) { 16298 SDValue AddMergeOp = N->getOperand(2); 16299 if (!AddMergeOp.isUndef()) 16300 return SDValue(); 16301 } 16302 16303 auto IsVWMulOpc = [](unsigned Opc) { 16304 switch (Opc) { 16305 case RISCVISD::VWMUL_VL: 16306 case RISCVISD::VWMULU_VL: 16307 case RISCVISD::VWMULSU_VL: 16308 return true; 16309 default: 16310 return false; 16311 } 16312 }; 16313 16314 if (!IsVWMulOpc(MulOp.getOpcode())) 16315 std::swap(Addend, MulOp); 16316 16317 if (!IsVWMulOpc(MulOp.getOpcode())) 16318 return SDValue(); 16319 16320 SDValue MulMergeOp = MulOp.getOperand(2); 16321 16322 if (!MulMergeOp.isUndef()) 16323 return SDValue(); 16324 16325 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG, 16326 const RISCVSubtarget &Subtarget) { 16327 if (N->getOpcode() == ISD::ADD) { 16328 SDLoc DL(N); 16329 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG, 16330 Subtarget); 16331 } 16332 return std::make_pair(N->getOperand(3), N->getOperand(4)); 16333 }(N, DAG, Subtarget); 16334 16335 SDValue MulMask = MulOp.getOperand(3); 16336 SDValue MulVL = MulOp.getOperand(4); 16337 16338 if (AddMask != MulMask || AddVL != MulVL) 16339 return SDValue(); 16340 16341 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL; 16342 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL, 16343 "Unexpected opcode after VWMACC_VL"); 16344 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL, 16345 "Unexpected opcode after VWMACC_VL!"); 16346 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL, 16347 "Unexpected opcode after VWMUL_VL!"); 16348 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL, 16349 "Unexpected opcode after VWMUL_VL!"); 16350 16351 SDLoc DL(N); 16352 EVT VT = N->getValueType(0); 16353 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask, 16354 AddVL}; 16355 return DAG.getNode(Opc, DL, VT, Ops); 16356 } 16357 16358 static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, 16359 ISD::MemIndexType &IndexType, 16360 RISCVTargetLowering::DAGCombinerInfo &DCI) { 16361 if (!DCI.isBeforeLegalize()) 16362 return false; 16363 16364 SelectionDAG &DAG = DCI.DAG; 16365 const MVT XLenVT = 16366 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT(); 16367 16368 const EVT IndexVT = Index.getValueType(); 16369 16370 // RISC-V indexed loads only support the "unsigned unscaled" addressing 16371 // mode, so anything else must be manually legalized. 16372 if (!isIndexTypeSigned(IndexType)) 16373 return false; 16374 16375 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { 16376 // Any index legalization should first promote to XLenVT, so we don't lose 16377 // bits when scaling. This may create an illegal index type so we let 16378 // LLVM's legalization take care of the splitting. 16379 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet. 16380 Index = DAG.getNode(ISD::SIGN_EXTEND, DL, 16381 IndexVT.changeVectorElementType(XLenVT), Index); 16382 } 16383 IndexType = ISD::UNSIGNED_SCALED; 16384 return true; 16385 } 16386 16387 /// Match the index vector of a scatter or gather node as the shuffle mask 16388 /// which performs the rearrangement if possible. Will only match if 16389 /// all lanes are touched, and thus replacing the scatter or gather with 16390 /// a unit strided access and shuffle is legal. 16391 static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, 16392 SmallVector<int> &ShuffleMask) { 16393 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) 16394 return false; 16395 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode())) 16396 return false; 16397 16398 const unsigned ElementSize = VT.getScalarStoreSize(); 16399 const unsigned NumElems = VT.getVectorNumElements(); 16400 16401 // Create the shuffle mask and check all bits active 16402 assert(ShuffleMask.empty()); 16403 BitVector ActiveLanes(NumElems); 16404 for (unsigned i = 0; i < Index->getNumOperands(); i++) { 16405 // TODO: We've found an active bit of UB, and could be 16406 // more aggressive here if desired. 16407 if (Index->getOperand(i)->isUndef()) 16408 return false; 16409 uint64_t C = Index->getConstantOperandVal(i); 16410 if (C % ElementSize != 0) 16411 return false; 16412 C = C / ElementSize; 16413 if (C >= NumElems) 16414 return false; 16415 ShuffleMask.push_back(C); 16416 ActiveLanes.set(C); 16417 } 16418 return ActiveLanes.all(); 16419 } 16420 16421 /// Match the index of a gather or scatter operation as an operation 16422 /// with twice the element width and half the number of elements. This is 16423 /// generally profitable (if legal) because these operations are linear 16424 /// in VL, so even if we cause some extract VTYPE/VL toggles, we still 16425 /// come out ahead. 16426 static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, 16427 Align BaseAlign, const RISCVSubtarget &ST) { 16428 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) 16429 return false; 16430 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode())) 16431 return false; 16432 16433 // Attempt a doubling. If we can use a element type 4x or 8x in 16434 // size, this will happen via multiply iterations of the transform. 16435 const unsigned NumElems = VT.getVectorNumElements(); 16436 if (NumElems % 2 != 0) 16437 return false; 16438 16439 const unsigned ElementSize = VT.getScalarStoreSize(); 16440 const unsigned WiderElementSize = ElementSize * 2; 16441 if (WiderElementSize > ST.getELen()/8) 16442 return false; 16443 16444 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize) 16445 return false; 16446 16447 for (unsigned i = 0; i < Index->getNumOperands(); i++) { 16448 // TODO: We've found an active bit of UB, and could be 16449 // more aggressive here if desired. 16450 if (Index->getOperand(i)->isUndef()) 16451 return false; 16452 // TODO: This offset check is too strict if we support fully 16453 // misaligned memory operations. 16454 uint64_t C = Index->getConstantOperandVal(i); 16455 if (i % 2 == 0) { 16456 if (C % WiderElementSize != 0) 16457 return false; 16458 continue; 16459 } 16460 uint64_t Last = Index->getConstantOperandVal(i-1); 16461 if (C != Last + ElementSize) 16462 return false; 16463 } 16464 return true; 16465 } 16466 16467 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1)) 16468 // This would be benefit for the cases where X and Y are both the same value 16469 // type of low precision vectors. Since the truncate would be lowered into 16470 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate 16471 // restriction, such pattern would be expanded into a series of "vsetvli" 16472 // and "vnsrl" instructions later to reach this point. 16473 static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG) { 16474 SDValue Mask = N->getOperand(1); 16475 SDValue VL = N->getOperand(2); 16476 16477 bool IsVLMAX = isAllOnesConstant(VL) || 16478 (isa<RegisterSDNode>(VL) && 16479 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0); 16480 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL || 16481 Mask.getOperand(0) != VL) 16482 return SDValue(); 16483 16484 auto IsTruncNode = [&](SDValue V) { 16485 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL && 16486 V.getOperand(1) == Mask && V.getOperand(2) == VL; 16487 }; 16488 16489 SDValue Op = N->getOperand(0); 16490 16491 // We need to first find the inner level of TRUNCATE_VECTOR_VL node 16492 // to distinguish such pattern. 16493 while (IsTruncNode(Op)) { 16494 if (!Op.hasOneUse()) 16495 return SDValue(); 16496 Op = Op.getOperand(0); 16497 } 16498 16499 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse()) 16500 return SDValue(); 16501 16502 SDValue N0 = Op.getOperand(0); 16503 SDValue N1 = Op.getOperand(1); 16504 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() || 16505 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse()) 16506 return SDValue(); 16507 16508 SDValue N00 = N0.getOperand(0); 16509 SDValue N10 = N1.getOperand(0); 16510 if (!N00.getValueType().isVector() || 16511 N00.getValueType() != N10.getValueType() || 16512 N->getValueType(0) != N10.getValueType()) 16513 return SDValue(); 16514 16515 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1; 16516 SDValue SMin = 16517 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10, 16518 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0))); 16519 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin); 16520 } 16521 16522 // Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the 16523 // maximum value for the truncated type. 16524 // Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1 16525 // is the signed maximum value for the truncated type and C2 is the signed 16526 // minimum value. 16527 static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, 16528 const RISCVSubtarget &Subtarget) { 16529 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL); 16530 16531 MVT VT = N->getSimpleValueType(0); 16532 16533 SDValue Mask = N->getOperand(1); 16534 SDValue VL = N->getOperand(2); 16535 16536 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL, 16537 APInt &SplatVal) { 16538 if (V.getOpcode() != Opc && 16539 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() && 16540 V.getOperand(3) == Mask && V.getOperand(4) == VL)) 16541 return SDValue(); 16542 16543 SDValue Op = V.getOperand(1); 16544 16545 // Peek through conversion between fixed and scalable vectors. 16546 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() && 16547 isNullConstant(Op.getOperand(2)) && 16548 Op.getOperand(1).getValueType().isFixedLengthVector() && 16549 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR && 16550 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() && 16551 isNullConstant(Op.getOperand(1).getOperand(1))) 16552 Op = Op.getOperand(1).getOperand(0); 16553 16554 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal)) 16555 return V.getOperand(0); 16556 16557 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() && 16558 Op.getOperand(2) == VL) { 16559 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 16560 SplatVal = 16561 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits()); 16562 return V.getOperand(0); 16563 } 16564 } 16565 16566 return SDValue(); 16567 }; 16568 16569 SDLoc DL(N); 16570 16571 auto DetectUSatPattern = [&](SDValue V) { 16572 APInt LoC, HiC; 16573 16574 // Simple case, V is a UMIN. 16575 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC)) 16576 if (HiC.isMask(VT.getScalarSizeInBits())) 16577 return UMinOp; 16578 16579 // If we have an SMAX that removes negative numbers first, then we can match 16580 // SMIN instead of UMIN. 16581 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC)) 16582 if (SDValue SMaxOp = 16583 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC)) 16584 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits())) 16585 return SMinOp; 16586 16587 // If we have an SMIN before an SMAX and the SMAX constant is less than or 16588 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX 16589 // first. 16590 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC)) 16591 if (SDValue SMinOp = 16592 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC)) 16593 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) && 16594 HiC.uge(LoC)) 16595 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp, 16596 V.getOperand(1), DAG.getUNDEF(V.getValueType()), 16597 Mask, VL); 16598 16599 return SDValue(); 16600 }; 16601 16602 auto DetectSSatPattern = [&](SDValue V) { 16603 unsigned NumDstBits = VT.getScalarSizeInBits(); 16604 unsigned NumSrcBits = V.getScalarValueSizeInBits(); 16605 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits); 16606 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits); 16607 16608 APInt HiC, LoC; 16609 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC)) 16610 if (SDValue SMaxOp = 16611 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC)) 16612 if (HiC == SignedMax && LoC == SignedMin) 16613 return SMaxOp; 16614 16615 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC)) 16616 if (SDValue SMinOp = 16617 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC)) 16618 if (HiC == SignedMax && LoC == SignedMin) 16619 return SMinOp; 16620 16621 return SDValue(); 16622 }; 16623 16624 SDValue Src = N->getOperand(0); 16625 16626 // Look through multiple layers of truncates. 16627 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL && 16628 Src.getOperand(1) == Mask && Src.getOperand(2) == VL && 16629 Src.hasOneUse()) 16630 Src = Src.getOperand(0); 16631 16632 SDValue Val; 16633 unsigned ClipOpc; 16634 if ((Val = DetectUSatPattern(Src))) 16635 ClipOpc = RISCVISD::VNCLIPU_VL; 16636 else if ((Val = DetectSSatPattern(Src))) 16637 ClipOpc = RISCVISD::VNCLIP_VL; 16638 else 16639 return SDValue(); 16640 16641 MVT ValVT = Val.getSimpleValueType(); 16642 16643 do { 16644 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2); 16645 ValVT = ValVT.changeVectorElementType(ValEltVT); 16646 // Rounding mode here is arbitrary since we aren't shifting out any bits. 16647 Val = DAG.getNode( 16648 ClipOpc, DL, ValVT, 16649 {Val, DAG.getConstant(0, DL, ValVT), DAG.getUNDEF(VT), Mask, 16650 DAG.getTargetConstant(RISCVVXRndMode::RNU, DL, Subtarget.getXLenVT()), 16651 VL}); 16652 } while (ValVT != VT); 16653 16654 return Val; 16655 } 16656 16657 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 16658 DAGCombinerInfo &DCI) const { 16659 SelectionDAG &DAG = DCI.DAG; 16660 const MVT XLenVT = Subtarget.getXLenVT(); 16661 SDLoc DL(N); 16662 16663 // Helper to call SimplifyDemandedBits on an operand of N where only some low 16664 // bits are demanded. N will be added to the Worklist if it was not deleted. 16665 // Caller should return SDValue(N, 0) if this returns true. 16666 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) { 16667 SDValue Op = N->getOperand(OpNo); 16668 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits); 16669 if (!SimplifyDemandedBits(Op, Mask, DCI)) 16670 return false; 16671 16672 if (N->getOpcode() != ISD::DELETED_NODE) 16673 DCI.AddToWorklist(N); 16674 return true; 16675 }; 16676 16677 switch (N->getOpcode()) { 16678 default: 16679 break; 16680 case RISCVISD::SplitF64: { 16681 SDValue Op0 = N->getOperand(0); 16682 // If the input to SplitF64 is just BuildPairF64 then the operation is 16683 // redundant. Instead, use BuildPairF64's operands directly. 16684 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 16685 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 16686 16687 if (Op0->isUndef()) { 16688 SDValue Lo = DAG.getUNDEF(MVT::i32); 16689 SDValue Hi = DAG.getUNDEF(MVT::i32); 16690 return DCI.CombineTo(N, Lo, Hi); 16691 } 16692 16693 // It's cheaper to materialise two 32-bit integers than to load a double 16694 // from the constant pool and transfer it to integer registers through the 16695 // stack. 16696 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 16697 APInt V = C->getValueAPF().bitcastToAPInt(); 16698 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 16699 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 16700 return DCI.CombineTo(N, Lo, Hi); 16701 } 16702 16703 // This is a target-specific version of a DAGCombine performed in 16704 // DAGCombiner::visitBITCAST. It performs the equivalent of: 16705 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 16706 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 16707 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 16708 !Op0.getNode()->hasOneUse()) 16709 break; 16710 SDValue NewSplitF64 = 16711 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 16712 Op0.getOperand(0)); 16713 SDValue Lo = NewSplitF64.getValue(0); 16714 SDValue Hi = NewSplitF64.getValue(1); 16715 APInt SignBit = APInt::getSignMask(32); 16716 if (Op0.getOpcode() == ISD::FNEG) { 16717 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 16718 DAG.getConstant(SignBit, DL, MVT::i32)); 16719 return DCI.CombineTo(N, Lo, NewHi); 16720 } 16721 assert(Op0.getOpcode() == ISD::FABS); 16722 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 16723 DAG.getConstant(~SignBit, DL, MVT::i32)); 16724 return DCI.CombineTo(N, Lo, NewHi); 16725 } 16726 case RISCVISD::SLLW: 16727 case RISCVISD::SRAW: 16728 case RISCVISD::SRLW: 16729 case RISCVISD::RORW: 16730 case RISCVISD::ROLW: { 16731 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 16732 if (SimplifyDemandedLowBitsHelper(0, 32) || 16733 SimplifyDemandedLowBitsHelper(1, 5)) 16734 return SDValue(N, 0); 16735 16736 break; 16737 } 16738 case RISCVISD::CLZW: 16739 case RISCVISD::CTZW: { 16740 // Only the lower 32 bits of the first operand are read 16741 if (SimplifyDemandedLowBitsHelper(0, 32)) 16742 return SDValue(N, 0); 16743 break; 16744 } 16745 case RISCVISD::FMV_W_X_RV64: { 16746 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the 16747 // conversion is unnecessary and can be replaced with the 16748 // FMV_X_ANYEXTW_RV64 operand. 16749 SDValue Op0 = N->getOperand(0); 16750 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64) 16751 return Op0.getOperand(0); 16752 break; 16753 } 16754 case RISCVISD::FMV_X_ANYEXTH: 16755 case RISCVISD::FMV_X_ANYEXTW_RV64: { 16756 SDLoc DL(N); 16757 SDValue Op0 = N->getOperand(0); 16758 MVT VT = N->getSimpleValueType(0); 16759 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 16760 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64 16761 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X. 16762 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 && 16763 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) || 16764 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH && 16765 Op0->getOpcode() == RISCVISD::FMV_H_X)) { 16766 assert(Op0.getOperand(0).getValueType() == VT && 16767 "Unexpected value type!"); 16768 return Op0.getOperand(0); 16769 } 16770 16771 // This is a target-specific version of a DAGCombine performed in 16772 // DAGCombiner::visitBITCAST. It performs the equivalent of: 16773 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 16774 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 16775 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 16776 !Op0.getNode()->hasOneUse()) 16777 break; 16778 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0)); 16779 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16; 16780 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits()); 16781 if (Op0.getOpcode() == ISD::FNEG) 16782 return DAG.getNode(ISD::XOR, DL, VT, NewFMV, 16783 DAG.getConstant(SignBit, DL, VT)); 16784 16785 assert(Op0.getOpcode() == ISD::FABS); 16786 return DAG.getNode(ISD::AND, DL, VT, NewFMV, 16787 DAG.getConstant(~SignBit, DL, VT)); 16788 } 16789 case ISD::ABS: { 16790 EVT VT = N->getValueType(0); 16791 SDValue N0 = N->getOperand(0); 16792 // abs (sext) -> zext (abs) 16793 // abs (zext) -> zext (handled elsewhere) 16794 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) { 16795 SDValue Src = N0.getOperand(0); 16796 SDLoc DL(N); 16797 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, 16798 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src)); 16799 } 16800 break; 16801 } 16802 case ISD::ADD: { 16803 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 16804 return V; 16805 if (SDValue V = combineToVWMACC(N, DAG, Subtarget)) 16806 return V; 16807 return performADDCombine(N, DCI, Subtarget); 16808 } 16809 case ISD::SUB: { 16810 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 16811 return V; 16812 return performSUBCombine(N, DAG, Subtarget); 16813 } 16814 case ISD::AND: 16815 return performANDCombine(N, DCI, Subtarget); 16816 case ISD::OR: { 16817 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 16818 return V; 16819 return performORCombine(N, DCI, Subtarget); 16820 } 16821 case ISD::XOR: 16822 return performXORCombine(N, DAG, Subtarget); 16823 case ISD::MUL: 16824 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 16825 return V; 16826 return performMULCombine(N, DAG, DCI, Subtarget); 16827 case ISD::SDIV: 16828 case ISD::UDIV: 16829 case ISD::SREM: 16830 case ISD::UREM: 16831 if (SDValue V = combineBinOpOfZExt(N, DAG)) 16832 return V; 16833 break; 16834 case ISD::FADD: 16835 case ISD::UMAX: 16836 case ISD::UMIN: 16837 case ISD::SMAX: 16838 case ISD::SMIN: 16839 case ISD::FMAXNUM: 16840 case ISD::FMINNUM: { 16841 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 16842 return V; 16843 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 16844 return V; 16845 return SDValue(); 16846 } 16847 case ISD::SETCC: 16848 return performSETCCCombine(N, DAG, Subtarget); 16849 case ISD::SIGN_EXTEND_INREG: 16850 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); 16851 case ISD::ZERO_EXTEND: 16852 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during 16853 // type legalization. This is safe because fp_to_uint produces poison if 16854 // it overflows. 16855 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) { 16856 SDValue Src = N->getOperand(0); 16857 if (Src.getOpcode() == ISD::FP_TO_UINT && 16858 isTypeLegal(Src.getOperand(0).getValueType())) 16859 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, 16860 Src.getOperand(0)); 16861 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() && 16862 isTypeLegal(Src.getOperand(1).getValueType())) { 16863 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); 16864 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs, 16865 Src.getOperand(0), Src.getOperand(1)); 16866 DCI.CombineTo(N, Res); 16867 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1)); 16868 DCI.recursivelyDeleteUnusedNodes(Src.getNode()); 16869 return SDValue(N, 0); // Return N so it doesn't get rechecked. 16870 } 16871 } 16872 return SDValue(); 16873 case RISCVISD::TRUNCATE_VECTOR_VL: 16874 if (SDValue V = combineTruncOfSraSext(N, DAG)) 16875 return V; 16876 return combineTruncToVnclip(N, DAG, Subtarget); 16877 case ISD::TRUNCATE: 16878 return performTRUNCATECombine(N, DAG, Subtarget); 16879 case ISD::SELECT: 16880 return performSELECTCombine(N, DAG, Subtarget); 16881 case RISCVISD::CZERO_EQZ: 16882 case RISCVISD::CZERO_NEZ: { 16883 SDValue Val = N->getOperand(0); 16884 SDValue Cond = N->getOperand(1); 16885 16886 unsigned Opc = N->getOpcode(); 16887 16888 // czero_eqz x, x -> x 16889 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond) 16890 return Val; 16891 16892 unsigned InvOpc = 16893 Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ; 16894 16895 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1. 16896 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1. 16897 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) { 16898 SDValue NewCond = Cond.getOperand(0); 16899 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1); 16900 if (DAG.MaskedValueIsZero(NewCond, Mask)) 16901 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond); 16902 } 16903 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y 16904 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y 16905 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y 16906 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y 16907 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) { 16908 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 16909 if (ISD::isIntEqualitySetCC(CCVal)) 16910 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N), 16911 N->getValueType(0), Val, Cond.getOperand(0)); 16912 } 16913 return SDValue(); 16914 } 16915 case RISCVISD::SELECT_CC: { 16916 // Transform 16917 SDValue LHS = N->getOperand(0); 16918 SDValue RHS = N->getOperand(1); 16919 SDValue CC = N->getOperand(2); 16920 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 16921 SDValue TrueV = N->getOperand(3); 16922 SDValue FalseV = N->getOperand(4); 16923 SDLoc DL(N); 16924 EVT VT = N->getValueType(0); 16925 16926 // If the True and False values are the same, we don't need a select_cc. 16927 if (TrueV == FalseV) 16928 return TrueV; 16929 16930 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z 16931 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y 16932 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) && 16933 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) && 16934 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) { 16935 if (CCVal == ISD::CondCode::SETGE) 16936 std::swap(TrueV, FalseV); 16937 16938 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue(); 16939 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue(); 16940 // Only handle simm12, if it is not in this range, it can be considered as 16941 // register. 16942 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) && 16943 isInt<12>(TrueSImm - FalseSImm)) { 16944 SDValue SRA = 16945 DAG.getNode(ISD::SRA, DL, VT, LHS, 16946 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT)); 16947 SDValue AND = 16948 DAG.getNode(ISD::AND, DL, VT, SRA, 16949 DAG.getConstant(TrueSImm - FalseSImm, DL, VT)); 16950 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV); 16951 } 16952 16953 if (CCVal == ISD::CondCode::SETGE) 16954 std::swap(TrueV, FalseV); 16955 } 16956 16957 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) 16958 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 16959 {LHS, RHS, CC, TrueV, FalseV}); 16960 16961 if (!Subtarget.hasConditionalMoveFusion()) { 16962 // (select c, -1, y) -> -c | y 16963 if (isAllOnesConstant(TrueV)) { 16964 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); 16965 SDValue Neg = DAG.getNegative(C, DL, VT); 16966 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); 16967 } 16968 // (select c, y, -1) -> -!c | y 16969 if (isAllOnesConstant(FalseV)) { 16970 SDValue C = 16971 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); 16972 SDValue Neg = DAG.getNegative(C, DL, VT); 16973 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); 16974 } 16975 16976 // (select c, 0, y) -> -!c & y 16977 if (isNullConstant(TrueV)) { 16978 SDValue C = 16979 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); 16980 SDValue Neg = DAG.getNegative(C, DL, VT); 16981 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); 16982 } 16983 // (select c, y, 0) -> -c & y 16984 if (isNullConstant(FalseV)) { 16985 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); 16986 SDValue Neg = DAG.getNegative(C, DL, VT); 16987 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); 16988 } 16989 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq)) 16990 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq)) 16991 if (((isOneConstant(FalseV) && LHS == TrueV && 16992 CCVal == ISD::CondCode::SETNE) || 16993 (isOneConstant(TrueV) && LHS == FalseV && 16994 CCVal == ISD::CondCode::SETEQ)) && 16995 isNullConstant(RHS)) { 16996 // freeze it to be safe. 16997 LHS = DAG.getFreeze(LHS); 16998 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ); 16999 return DAG.getNode(ISD::ADD, DL, VT, LHS, C); 17000 } 17001 } 17002 17003 // If both true/false are an xor with 1, pull through the select. 17004 // This can occur after op legalization if both operands are setccs that 17005 // require an xor to invert. 17006 // FIXME: Generalize to other binary ops with identical operand? 17007 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR && 17008 TrueV.getOperand(1) == FalseV.getOperand(1) && 17009 isOneConstant(TrueV.getOperand(1)) && 17010 TrueV.hasOneUse() && FalseV.hasOneUse()) { 17011 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC, 17012 TrueV.getOperand(0), FalseV.getOperand(0)); 17013 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1)); 17014 } 17015 17016 return SDValue(); 17017 } 17018 case RISCVISD::BR_CC: { 17019 SDValue LHS = N->getOperand(1); 17020 SDValue RHS = N->getOperand(2); 17021 SDValue CC = N->getOperand(3); 17022 SDLoc DL(N); 17023 17024 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) 17025 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 17026 N->getOperand(0), LHS, RHS, CC, N->getOperand(4)); 17027 17028 return SDValue(); 17029 } 17030 case ISD::BITREVERSE: 17031 return performBITREVERSECombine(N, DAG, Subtarget); 17032 case ISD::FP_TO_SINT: 17033 case ISD::FP_TO_UINT: 17034 return performFP_TO_INTCombine(N, DCI, Subtarget); 17035 case ISD::FP_TO_SINT_SAT: 17036 case ISD::FP_TO_UINT_SAT: 17037 return performFP_TO_INT_SATCombine(N, DCI, Subtarget); 17038 case ISD::FCOPYSIGN: { 17039 EVT VT = N->getValueType(0); 17040 if (!VT.isVector()) 17041 break; 17042 // There is a form of VFSGNJ which injects the negated sign of its second 17043 // operand. Try and bubble any FNEG up after the extend/round to produce 17044 // this optimized pattern. Avoid modifying cases where FP_ROUND and 17045 // TRUNC=1. 17046 SDValue In2 = N->getOperand(1); 17047 // Avoid cases where the extend/round has multiple uses, as duplicating 17048 // those is typically more expensive than removing a fneg. 17049 if (!In2.hasOneUse()) 17050 break; 17051 if (In2.getOpcode() != ISD::FP_EXTEND && 17052 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 17053 break; 17054 In2 = In2.getOperand(0); 17055 if (In2.getOpcode() != ISD::FNEG) 17056 break; 17057 SDLoc DL(N); 17058 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 17059 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 17060 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 17061 } 17062 case ISD::MGATHER: { 17063 const auto *MGN = cast<MaskedGatherSDNode>(N); 17064 const EVT VT = N->getValueType(0); 17065 SDValue Index = MGN->getIndex(); 17066 SDValue ScaleOp = MGN->getScale(); 17067 ISD::MemIndexType IndexType = MGN->getIndexType(); 17068 assert(!MGN->isIndexScaled() && 17069 "Scaled gather/scatter should not be formed"); 17070 17071 SDLoc DL(N); 17072 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) 17073 return DAG.getMaskedGather( 17074 N->getVTList(), MGN->getMemoryVT(), DL, 17075 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), 17076 MGN->getBasePtr(), Index, ScaleOp}, 17077 MGN->getMemOperand(), IndexType, MGN->getExtensionType()); 17078 17079 if (narrowIndex(Index, IndexType, DAG)) 17080 return DAG.getMaskedGather( 17081 N->getVTList(), MGN->getMemoryVT(), DL, 17082 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), 17083 MGN->getBasePtr(), Index, ScaleOp}, 17084 MGN->getMemOperand(), IndexType, MGN->getExtensionType()); 17085 17086 if (Index.getOpcode() == ISD::BUILD_VECTOR && 17087 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) { 17088 // The sequence will be XLenVT, not the type of Index. Tell 17089 // isSimpleVIDSequence this so we avoid overflow. 17090 if (std::optional<VIDSequence> SimpleVID = 17091 isSimpleVIDSequence(Index, Subtarget.getXLen()); 17092 SimpleVID && SimpleVID->StepDenominator == 1) { 17093 const int64_t StepNumerator = SimpleVID->StepNumerator; 17094 const int64_t Addend = SimpleVID->Addend; 17095 17096 // Note: We don't need to check alignment here since (by assumption 17097 // from the existance of the gather), our offsets must be sufficiently 17098 // aligned. 17099 17100 const EVT PtrVT = getPointerTy(DAG.getDataLayout()); 17101 assert(MGN->getBasePtr()->getValueType(0) == PtrVT); 17102 assert(IndexType == ISD::UNSIGNED_SCALED); 17103 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(), 17104 DAG.getConstant(Addend, DL, PtrVT)); 17105 17106 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(), 17107 VT.getVectorElementCount()); 17108 SDValue StridedLoad = 17109 DAG.getStridedLoadVP(VT, DL, MGN->getChain(), BasePtr, 17110 DAG.getConstant(StepNumerator, DL, XLenVT), 17111 MGN->getMask(), EVL, MGN->getMemOperand()); 17112 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(), 17113 StridedLoad, MGN->getPassThru(), EVL); 17114 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)}, 17115 DL); 17116 } 17117 } 17118 17119 SmallVector<int> ShuffleMask; 17120 if (MGN->getExtensionType() == ISD::NON_EXTLOAD && 17121 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) { 17122 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(), 17123 MGN->getBasePtr(), DAG.getUNDEF(XLenVT), 17124 MGN->getMask(), DAG.getUNDEF(VT), 17125 MGN->getMemoryVT(), MGN->getMemOperand(), 17126 ISD::UNINDEXED, ISD::NON_EXTLOAD); 17127 SDValue Shuffle = 17128 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask); 17129 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL); 17130 } 17131 17132 if (MGN->getExtensionType() == ISD::NON_EXTLOAD && 17133 matchIndexAsWiderOp(VT, Index, MGN->getMask(), 17134 MGN->getMemOperand()->getBaseAlign(), Subtarget)) { 17135 SmallVector<SDValue> NewIndices; 17136 for (unsigned i = 0; i < Index->getNumOperands(); i += 2) 17137 NewIndices.push_back(Index.getOperand(i)); 17138 EVT IndexVT = Index.getValueType() 17139 .getHalfNumVectorElementsVT(*DAG.getContext()); 17140 Index = DAG.getBuildVector(IndexVT, DL, NewIndices); 17141 17142 unsigned ElementSize = VT.getScalarStoreSize(); 17143 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2); 17144 auto EltCnt = VT.getVectorElementCount(); 17145 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!"); 17146 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT, 17147 EltCnt.divideCoefficientBy(2)); 17148 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru()); 17149 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 17150 EltCnt.divideCoefficientBy(2)); 17151 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1)); 17152 17153 SDValue Gather = 17154 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL, 17155 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(), 17156 Index, ScaleOp}, 17157 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD); 17158 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0)); 17159 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL); 17160 } 17161 break; 17162 } 17163 case ISD::MSCATTER:{ 17164 const auto *MSN = cast<MaskedScatterSDNode>(N); 17165 SDValue Index = MSN->getIndex(); 17166 SDValue ScaleOp = MSN->getScale(); 17167 ISD::MemIndexType IndexType = MSN->getIndexType(); 17168 assert(!MSN->isIndexScaled() && 17169 "Scaled gather/scatter should not be formed"); 17170 17171 SDLoc DL(N); 17172 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) 17173 return DAG.getMaskedScatter( 17174 N->getVTList(), MSN->getMemoryVT(), DL, 17175 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), 17176 Index, ScaleOp}, 17177 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore()); 17178 17179 if (narrowIndex(Index, IndexType, DAG)) 17180 return DAG.getMaskedScatter( 17181 N->getVTList(), MSN->getMemoryVT(), DL, 17182 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), 17183 Index, ScaleOp}, 17184 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore()); 17185 17186 EVT VT = MSN->getValue()->getValueType(0); 17187 SmallVector<int> ShuffleMask; 17188 if (!MSN->isTruncatingStore() && 17189 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) { 17190 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(), 17191 DAG.getUNDEF(VT), ShuffleMask); 17192 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(), 17193 DAG.getUNDEF(XLenVT), MSN->getMask(), 17194 MSN->getMemoryVT(), MSN->getMemOperand(), 17195 ISD::UNINDEXED, false); 17196 } 17197 break; 17198 } 17199 case ISD::VP_GATHER: { 17200 const auto *VPGN = cast<VPGatherSDNode>(N); 17201 SDValue Index = VPGN->getIndex(); 17202 SDValue ScaleOp = VPGN->getScale(); 17203 ISD::MemIndexType IndexType = VPGN->getIndexType(); 17204 assert(!VPGN->isIndexScaled() && 17205 "Scaled gather/scatter should not be formed"); 17206 17207 SDLoc DL(N); 17208 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) 17209 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, 17210 {VPGN->getChain(), VPGN->getBasePtr(), Index, 17211 ScaleOp, VPGN->getMask(), 17212 VPGN->getVectorLength()}, 17213 VPGN->getMemOperand(), IndexType); 17214 17215 if (narrowIndex(Index, IndexType, DAG)) 17216 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, 17217 {VPGN->getChain(), VPGN->getBasePtr(), Index, 17218 ScaleOp, VPGN->getMask(), 17219 VPGN->getVectorLength()}, 17220 VPGN->getMemOperand(), IndexType); 17221 17222 break; 17223 } 17224 case ISD::VP_SCATTER: { 17225 const auto *VPSN = cast<VPScatterSDNode>(N); 17226 SDValue Index = VPSN->getIndex(); 17227 SDValue ScaleOp = VPSN->getScale(); 17228 ISD::MemIndexType IndexType = VPSN->getIndexType(); 17229 assert(!VPSN->isIndexScaled() && 17230 "Scaled gather/scatter should not be formed"); 17231 17232 SDLoc DL(N); 17233 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) 17234 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, 17235 {VPSN->getChain(), VPSN->getValue(), 17236 VPSN->getBasePtr(), Index, ScaleOp, 17237 VPSN->getMask(), VPSN->getVectorLength()}, 17238 VPSN->getMemOperand(), IndexType); 17239 17240 if (narrowIndex(Index, IndexType, DAG)) 17241 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, 17242 {VPSN->getChain(), VPSN->getValue(), 17243 VPSN->getBasePtr(), Index, ScaleOp, 17244 VPSN->getMask(), VPSN->getVectorLength()}, 17245 VPSN->getMemOperand(), IndexType); 17246 break; 17247 } 17248 case RISCVISD::SHL_VL: 17249 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 17250 return V; 17251 [[fallthrough]]; 17252 case RISCVISD::SRA_VL: 17253 case RISCVISD::SRL_VL: { 17254 SDValue ShAmt = N->getOperand(1); 17255 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { 17256 // We don't need the upper 32 bits of a 64-bit element for a shift amount. 17257 SDLoc DL(N); 17258 SDValue VL = N->getOperand(4); 17259 EVT VT = N->getValueType(0); 17260 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 17261 ShAmt.getOperand(1), VL); 17262 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt, 17263 N->getOperand(2), N->getOperand(3), N->getOperand(4)); 17264 } 17265 break; 17266 } 17267 case ISD::SRA: 17268 if (SDValue V = performSRACombine(N, DAG, Subtarget)) 17269 return V; 17270 [[fallthrough]]; 17271 case ISD::SRL: 17272 case ISD::SHL: { 17273 if (N->getOpcode() == ISD::SHL) { 17274 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 17275 return V; 17276 } 17277 SDValue ShAmt = N->getOperand(1); 17278 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { 17279 // We don't need the upper 32 bits of a 64-bit element for a shift amount. 17280 SDLoc DL(N); 17281 EVT VT = N->getValueType(0); 17282 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 17283 ShAmt.getOperand(1), 17284 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT())); 17285 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt); 17286 } 17287 break; 17288 } 17289 case RISCVISD::ADD_VL: 17290 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 17291 return V; 17292 return combineToVWMACC(N, DAG, Subtarget); 17293 case RISCVISD::VWADD_W_VL: 17294 case RISCVISD::VWADDU_W_VL: 17295 case RISCVISD::VWSUB_W_VL: 17296 case RISCVISD::VWSUBU_W_VL: 17297 return performVWADDSUBW_VLCombine(N, DCI, Subtarget); 17298 case RISCVISD::SUB_VL: 17299 case RISCVISD::MUL_VL: 17300 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); 17301 case RISCVISD::VFMADD_VL: 17302 case RISCVISD::VFNMADD_VL: 17303 case RISCVISD::VFMSUB_VL: 17304 case RISCVISD::VFNMSUB_VL: 17305 case RISCVISD::STRICT_VFMADD_VL: 17306 case RISCVISD::STRICT_VFNMADD_VL: 17307 case RISCVISD::STRICT_VFMSUB_VL: 17308 case RISCVISD::STRICT_VFNMSUB_VL: 17309 return performVFMADD_VLCombine(N, DAG, Subtarget); 17310 case RISCVISD::FADD_VL: 17311 case RISCVISD::FSUB_VL: 17312 case RISCVISD::FMUL_VL: 17313 case RISCVISD::VFWADD_W_VL: 17314 case RISCVISD::VFWSUB_W_VL: { 17315 if (N->getValueType(0).getVectorElementType() == MVT::f32 && 17316 !Subtarget.hasVInstructionsF16()) 17317 return SDValue(); 17318 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); 17319 } 17320 case ISD::LOAD: 17321 case ISD::STORE: { 17322 if (DCI.isAfterLegalizeDAG()) 17323 if (SDValue V = performMemPairCombine(N, DCI)) 17324 return V; 17325 17326 if (N->getOpcode() != ISD::STORE) 17327 break; 17328 17329 auto *Store = cast<StoreSDNode>(N); 17330 SDValue Chain = Store->getChain(); 17331 EVT MemVT = Store->getMemoryVT(); 17332 SDValue Val = Store->getValue(); 17333 SDLoc DL(N); 17334 17335 bool IsScalarizable = 17336 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) && 17337 Store->isSimple() && 17338 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) && 17339 isPowerOf2_64(MemVT.getSizeInBits()) && 17340 MemVT.getSizeInBits() <= Subtarget.getXLen(); 17341 17342 // If sufficiently aligned we can scalarize stores of constant vectors of 17343 // any power-of-two size up to XLen bits, provided that they aren't too 17344 // expensive to materialize. 17345 // vsetivli zero, 2, e8, m1, ta, ma 17346 // vmv.v.i v8, 4 17347 // vse64.v v8, (a0) 17348 // -> 17349 // li a1, 1028 17350 // sh a1, 0(a0) 17351 if (DCI.isBeforeLegalize() && IsScalarizable && 17352 ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) { 17353 // Get the constant vector bits 17354 APInt NewC(Val.getValueSizeInBits(), 0); 17355 uint64_t EltSize = Val.getScalarValueSizeInBits(); 17356 for (unsigned i = 0; i < Val.getNumOperands(); i++) { 17357 if (Val.getOperand(i).isUndef()) 17358 continue; 17359 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize), 17360 i * EltSize); 17361 } 17362 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); 17363 17364 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget, 17365 true) <= 2 && 17366 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 17367 NewVT, *Store->getMemOperand())) { 17368 SDValue NewV = DAG.getConstant(NewC, DL, NewVT); 17369 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(), 17370 Store->getPointerInfo(), Store->getOriginalAlign(), 17371 Store->getMemOperand()->getFlags()); 17372 } 17373 } 17374 17375 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g. 17376 // vsetivli zero, 2, e16, m1, ta, ma 17377 // vle16.v v8, (a0) 17378 // vse16.v v8, (a1) 17379 if (auto *L = dyn_cast<LoadSDNode>(Val); 17380 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() && 17381 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) && 17382 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) && 17383 L->getMemoryVT() == MemVT) { 17384 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); 17385 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 17386 NewVT, *Store->getMemOperand()) && 17387 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 17388 NewVT, *L->getMemOperand())) { 17389 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(), 17390 L->getPointerInfo(), L->getOriginalAlign(), 17391 L->getMemOperand()->getFlags()); 17392 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(), 17393 Store->getPointerInfo(), Store->getOriginalAlign(), 17394 Store->getMemOperand()->getFlags()); 17395 } 17396 } 17397 17398 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1. 17399 // vfmv.f.s is represented as extract element from 0. Match it late to avoid 17400 // any illegal types. 17401 if (Val.getOpcode() == RISCVISD::VMV_X_S || 17402 (DCI.isAfterLegalizeDAG() && 17403 Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 17404 isNullConstant(Val.getOperand(1)))) { 17405 SDValue Src = Val.getOperand(0); 17406 MVT VecVT = Src.getSimpleValueType(); 17407 // VecVT should be scalable and memory VT should match the element type. 17408 if (!Store->isIndexed() && VecVT.isScalableVector() && 17409 MemVT == VecVT.getVectorElementType()) { 17410 SDLoc DL(N); 17411 MVT MaskVT = getMaskTypeFor(VecVT); 17412 return DAG.getStoreVP( 17413 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(), 17414 DAG.getConstant(1, DL, MaskVT), 17415 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT, 17416 Store->getMemOperand(), Store->getAddressingMode(), 17417 Store->isTruncatingStore(), /*IsCompress*/ false); 17418 } 17419 } 17420 17421 break; 17422 } 17423 case ISD::SPLAT_VECTOR: { 17424 EVT VT = N->getValueType(0); 17425 // Only perform this combine on legal MVT types. 17426 if (!isTypeLegal(VT)) 17427 break; 17428 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N, 17429 DAG, Subtarget)) 17430 return Gather; 17431 break; 17432 } 17433 case ISD::BUILD_VECTOR: 17434 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this)) 17435 return V; 17436 break; 17437 case ISD::CONCAT_VECTORS: 17438 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this)) 17439 return V; 17440 break; 17441 case ISD::INSERT_VECTOR_ELT: 17442 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this)) 17443 return V; 17444 break; 17445 case RISCVISD::VFMV_V_F_VL: { 17446 const MVT VT = N->getSimpleValueType(0); 17447 SDValue Passthru = N->getOperand(0); 17448 SDValue Scalar = N->getOperand(1); 17449 SDValue VL = N->getOperand(2); 17450 17451 // If VL is 1, we can use vfmv.s.f. 17452 if (isOneConstant(VL)) 17453 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL); 17454 break; 17455 } 17456 case RISCVISD::VMV_V_X_VL: { 17457 const MVT VT = N->getSimpleValueType(0); 17458 SDValue Passthru = N->getOperand(0); 17459 SDValue Scalar = N->getOperand(1); 17460 SDValue VL = N->getOperand(2); 17461 17462 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the 17463 // scalar input. 17464 unsigned ScalarSize = Scalar.getValueSizeInBits(); 17465 unsigned EltWidth = VT.getScalarSizeInBits(); 17466 if (ScalarSize > EltWidth && Passthru.isUndef()) 17467 if (SimplifyDemandedLowBitsHelper(1, EltWidth)) 17468 return SDValue(N, 0); 17469 17470 // If VL is 1 and the scalar value won't benefit from immediate, we can 17471 // use vmv.s.x. 17472 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar); 17473 if (isOneConstant(VL) && 17474 (!Const || Const->isZero() || 17475 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5))) 17476 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL); 17477 17478 break; 17479 } 17480 case RISCVISD::VFMV_S_F_VL: { 17481 SDValue Src = N->getOperand(1); 17482 // Try to remove vector->scalar->vector if the scalar->vector is inserting 17483 // into an undef vector. 17484 // TODO: Could use a vslide or vmv.v.v for non-undef. 17485 if (N->getOperand(0).isUndef() && 17486 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 17487 isNullConstant(Src.getOperand(1)) && 17488 Src.getOperand(0).getValueType().isScalableVector()) { 17489 EVT VT = N->getValueType(0); 17490 EVT SrcVT = Src.getOperand(0).getValueType(); 17491 assert(SrcVT.getVectorElementType() == VT.getVectorElementType()); 17492 // Widths match, just return the original vector. 17493 if (SrcVT == VT) 17494 return Src.getOperand(0); 17495 // TODO: Use insert_subvector/extract_subvector to change widen/narrow? 17496 } 17497 [[fallthrough]]; 17498 } 17499 case RISCVISD::VMV_S_X_VL: { 17500 const MVT VT = N->getSimpleValueType(0); 17501 SDValue Passthru = N->getOperand(0); 17502 SDValue Scalar = N->getOperand(1); 17503 SDValue VL = N->getOperand(2); 17504 17505 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() && 17506 Scalar.getOperand(0).getValueType() == N->getValueType(0)) 17507 return Scalar.getOperand(0); 17508 17509 // Use M1 or smaller to avoid over constraining register allocation 17510 const MVT M1VT = getLMUL1VT(VT); 17511 if (M1VT.bitsLT(VT)) { 17512 SDValue M1Passthru = 17513 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru, 17514 DAG.getVectorIdxConstant(0, DL)); 17515 SDValue Result = 17516 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL); 17517 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result, 17518 DAG.getVectorIdxConstant(0, DL)); 17519 return Result; 17520 } 17521 17522 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or 17523 // higher would involve overly constraining the register allocator for 17524 // no purpose. 17525 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar); 17526 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) && 17527 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef()) 17528 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); 17529 17530 break; 17531 } 17532 case RISCVISD::VMV_X_S: { 17533 SDValue Vec = N->getOperand(0); 17534 MVT VecVT = N->getOperand(0).getSimpleValueType(); 17535 const MVT M1VT = getLMUL1VT(VecVT); 17536 if (M1VT.bitsLT(VecVT)) { 17537 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec, 17538 DAG.getVectorIdxConstant(0, DL)); 17539 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec); 17540 } 17541 break; 17542 } 17543 case ISD::INTRINSIC_VOID: 17544 case ISD::INTRINSIC_W_CHAIN: 17545 case ISD::INTRINSIC_WO_CHAIN: { 17546 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; 17547 unsigned IntNo = N->getConstantOperandVal(IntOpNo); 17548 switch (IntNo) { 17549 // By default we do not combine any intrinsic. 17550 default: 17551 return SDValue(); 17552 case Intrinsic::riscv_masked_strided_load: { 17553 MVT VT = N->getSimpleValueType(0); 17554 auto *Load = cast<MemIntrinsicSDNode>(N); 17555 SDValue PassThru = N->getOperand(2); 17556 SDValue Base = N->getOperand(3); 17557 SDValue Stride = N->getOperand(4); 17558 SDValue Mask = N->getOperand(5); 17559 17560 // If the stride is equal to the element size in bytes, we can use 17561 // a masked.load. 17562 const unsigned ElementSize = VT.getScalarStoreSize(); 17563 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride); 17564 StrideC && StrideC->getZExtValue() == ElementSize) 17565 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base, 17566 DAG.getUNDEF(XLenVT), Mask, PassThru, 17567 Load->getMemoryVT(), Load->getMemOperand(), 17568 ISD::UNINDEXED, ISD::NON_EXTLOAD); 17569 return SDValue(); 17570 } 17571 case Intrinsic::riscv_masked_strided_store: { 17572 auto *Store = cast<MemIntrinsicSDNode>(N); 17573 SDValue Value = N->getOperand(2); 17574 SDValue Base = N->getOperand(3); 17575 SDValue Stride = N->getOperand(4); 17576 SDValue Mask = N->getOperand(5); 17577 17578 // If the stride is equal to the element size in bytes, we can use 17579 // a masked.store. 17580 const unsigned ElementSize = Value.getValueType().getScalarStoreSize(); 17581 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride); 17582 StrideC && StrideC->getZExtValue() == ElementSize) 17583 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base, 17584 DAG.getUNDEF(XLenVT), Mask, 17585 Value.getValueType(), Store->getMemOperand(), 17586 ISD::UNINDEXED, false); 17587 return SDValue(); 17588 } 17589 case Intrinsic::riscv_vcpop: 17590 case Intrinsic::riscv_vcpop_mask: 17591 case Intrinsic::riscv_vfirst: 17592 case Intrinsic::riscv_vfirst_mask: { 17593 SDValue VL = N->getOperand(2); 17594 if (IntNo == Intrinsic::riscv_vcpop_mask || 17595 IntNo == Intrinsic::riscv_vfirst_mask) 17596 VL = N->getOperand(3); 17597 if (!isNullConstant(VL)) 17598 return SDValue(); 17599 // If VL is 0, vcpop -> li 0, vfirst -> li -1. 17600 SDLoc DL(N); 17601 EVT VT = N->getValueType(0); 17602 if (IntNo == Intrinsic::riscv_vfirst || 17603 IntNo == Intrinsic::riscv_vfirst_mask) 17604 return DAG.getConstant(-1, DL, VT); 17605 return DAG.getConstant(0, DL, VT); 17606 } 17607 } 17608 } 17609 case ISD::BITCAST: { 17610 assert(Subtarget.useRVVForFixedLengthVectors()); 17611 SDValue N0 = N->getOperand(0); 17612 EVT VT = N->getValueType(0); 17613 EVT SrcVT = N0.getValueType(); 17614 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer 17615 // type, widen both sides to avoid a trip through memory. 17616 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) && 17617 VT.isScalarInteger()) { 17618 unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); 17619 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT)); 17620 Ops[0] = N0; 17621 SDLoc DL(N); 17622 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops); 17623 N0 = DAG.getBitcast(MVT::i8, N0); 17624 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0); 17625 } 17626 17627 return SDValue(); 17628 } 17629 } 17630 17631 return SDValue(); 17632 } 17633 17634 bool RISCVTargetLowering::shouldTransformSignedTruncationCheck( 17635 EVT XVT, unsigned KeptBits) const { 17636 // For vectors, we don't have a preference.. 17637 if (XVT.isVector()) 17638 return false; 17639 17640 if (XVT != MVT::i32 && XVT != MVT::i64) 17641 return false; 17642 17643 // We can use sext.w for RV64 or an srai 31 on RV32. 17644 if (KeptBits == 32 || KeptBits == 64) 17645 return true; 17646 17647 // With Zbb we can use sext.h/sext.b. 17648 return Subtarget.hasStdExtZbb() && 17649 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) || 17650 KeptBits == 16); 17651 } 17652 17653 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 17654 const SDNode *N, CombineLevel Level) const { 17655 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || 17656 N->getOpcode() == ISD::SRL) && 17657 "Expected shift op"); 17658 17659 // The following folds are only desirable if `(OP _, c1 << c2)` can be 17660 // materialised in fewer instructions than `(OP _, c1)`: 17661 // 17662 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 17663 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 17664 SDValue N0 = N->getOperand(0); 17665 EVT Ty = N0.getValueType(); 17666 if (Ty.isScalarInteger() && 17667 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 17668 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 17669 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 17670 if (C1 && C2) { 17671 const APInt &C1Int = C1->getAPIntValue(); 17672 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 17673 17674 // We can materialise `c1 << c2` into an add immediate, so it's "free", 17675 // and the combine should happen, to potentially allow further combines 17676 // later. 17677 if (ShiftedC1Int.getSignificantBits() <= 64 && 17678 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 17679 return true; 17680 17681 // We can materialise `c1` in an add immediate, so it's "free", and the 17682 // combine should be prevented. 17683 if (C1Int.getSignificantBits() <= 64 && 17684 isLegalAddImmediate(C1Int.getSExtValue())) 17685 return false; 17686 17687 // Neither constant will fit into an immediate, so find materialisation 17688 // costs. 17689 int C1Cost = 17690 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget, 17691 /*CompressionCost*/ true); 17692 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 17693 ShiftedC1Int, Ty.getSizeInBits(), Subtarget, 17694 /*CompressionCost*/ true); 17695 17696 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 17697 // combine should be prevented. 17698 if (C1Cost < ShiftedC1Cost) 17699 return false; 17700 } 17701 } 17702 return true; 17703 } 17704 17705 bool RISCVTargetLowering::targetShrinkDemandedConstant( 17706 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 17707 TargetLoweringOpt &TLO) const { 17708 // Delay this optimization as late as possible. 17709 if (!TLO.LegalOps) 17710 return false; 17711 17712 EVT VT = Op.getValueType(); 17713 if (VT.isVector()) 17714 return false; 17715 17716 unsigned Opcode = Op.getOpcode(); 17717 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR) 17718 return false; 17719 17720 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 17721 if (!C) 17722 return false; 17723 17724 const APInt &Mask = C->getAPIntValue(); 17725 17726 // Clear all non-demanded bits initially. 17727 APInt ShrunkMask = Mask & DemandedBits; 17728 17729 // Try to make a smaller immediate by setting undemanded bits. 17730 17731 APInt ExpandedMask = Mask | ~DemandedBits; 17732 17733 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool { 17734 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask); 17735 }; 17736 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool { 17737 if (NewMask == Mask) 17738 return true; 17739 SDLoc DL(Op); 17740 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType()); 17741 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 17742 Op.getOperand(0), NewC); 17743 return TLO.CombineTo(Op, NewOp); 17744 }; 17745 17746 // If the shrunk mask fits in sign extended 12 bits, let the target 17747 // independent code apply it. 17748 if (ShrunkMask.isSignedIntN(12)) 17749 return false; 17750 17751 // And has a few special cases for zext. 17752 if (Opcode == ISD::AND) { 17753 // Preserve (and X, 0xffff), if zext.h exists use zext.h, 17754 // otherwise use SLLI + SRLI. 17755 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff); 17756 if (IsLegalMask(NewMask)) 17757 return UseMask(NewMask); 17758 17759 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. 17760 if (VT == MVT::i64) { 17761 APInt NewMask = APInt(64, 0xffffffff); 17762 if (IsLegalMask(NewMask)) 17763 return UseMask(NewMask); 17764 } 17765 } 17766 17767 // For the remaining optimizations, we need to be able to make a negative 17768 // number through a combination of mask and undemanded bits. 17769 if (!ExpandedMask.isNegative()) 17770 return false; 17771 17772 // What is the fewest number of bits we need to represent the negative number. 17773 unsigned MinSignedBits = ExpandedMask.getSignificantBits(); 17774 17775 // Try to make a 12 bit negative immediate. If that fails try to make a 32 17776 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 17777 // If we can't create a simm12, we shouldn't change opaque constants. 17778 APInt NewMask = ShrunkMask; 17779 if (MinSignedBits <= 12) 17780 NewMask.setBitsFrom(11); 17781 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 17782 NewMask.setBitsFrom(31); 17783 else 17784 return false; 17785 17786 // Check that our new mask is a subset of the demanded mask. 17787 assert(IsLegalMask(NewMask)); 17788 return UseMask(NewMask); 17789 } 17790 17791 static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) { 17792 static const uint64_t GREVMasks[] = { 17793 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 17794 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 17795 17796 for (unsigned Stage = 0; Stage != 6; ++Stage) { 17797 unsigned Shift = 1 << Stage; 17798 if (ShAmt & Shift) { 17799 uint64_t Mask = GREVMasks[Stage]; 17800 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask); 17801 if (IsGORC) 17802 Res |= x; 17803 x = Res; 17804 } 17805 } 17806 17807 return x; 17808 } 17809 17810 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 17811 KnownBits &Known, 17812 const APInt &DemandedElts, 17813 const SelectionDAG &DAG, 17814 unsigned Depth) const { 17815 unsigned BitWidth = Known.getBitWidth(); 17816 unsigned Opc = Op.getOpcode(); 17817 assert((Opc >= ISD::BUILTIN_OP_END || 17818 Opc == ISD::INTRINSIC_WO_CHAIN || 17819 Opc == ISD::INTRINSIC_W_CHAIN || 17820 Opc == ISD::INTRINSIC_VOID) && 17821 "Should use MaskedValueIsZero if you don't know whether Op" 17822 " is a target node!"); 17823 17824 Known.resetAll(); 17825 switch (Opc) { 17826 default: break; 17827 case RISCVISD::SELECT_CC: { 17828 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1); 17829 // If we don't know any bits, early out. 17830 if (Known.isUnknown()) 17831 break; 17832 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1); 17833 17834 // Only known if known in both the LHS and RHS. 17835 Known = Known.intersectWith(Known2); 17836 break; 17837 } 17838 case RISCVISD::CZERO_EQZ: 17839 case RISCVISD::CZERO_NEZ: 17840 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 17841 // Result is either all zero or operand 0. We can propagate zeros, but not 17842 // ones. 17843 Known.One.clearAllBits(); 17844 break; 17845 case RISCVISD::REMUW: { 17846 KnownBits Known2; 17847 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 17848 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 17849 // We only care about the lower 32 bits. 17850 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 17851 // Restore the original width by sign extending. 17852 Known = Known.sext(BitWidth); 17853 break; 17854 } 17855 case RISCVISD::DIVUW: { 17856 KnownBits Known2; 17857 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 17858 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 17859 // We only care about the lower 32 bits. 17860 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 17861 // Restore the original width by sign extending. 17862 Known = Known.sext(BitWidth); 17863 break; 17864 } 17865 case RISCVISD::SLLW: { 17866 KnownBits Known2; 17867 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 17868 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 17869 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32)); 17870 // Restore the original width by sign extending. 17871 Known = Known.sext(BitWidth); 17872 break; 17873 } 17874 case RISCVISD::CTZW: { 17875 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 17876 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros(); 17877 unsigned LowBits = llvm::bit_width(PossibleTZ); 17878 Known.Zero.setBitsFrom(LowBits); 17879 break; 17880 } 17881 case RISCVISD::CLZW: { 17882 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 17883 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros(); 17884 unsigned LowBits = llvm::bit_width(PossibleLZ); 17885 Known.Zero.setBitsFrom(LowBits); 17886 break; 17887 } 17888 case RISCVISD::BREV8: 17889 case RISCVISD::ORC_B: { 17890 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a 17891 // control value of 7 is equivalent to brev8 and orc.b. 17892 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 17893 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B; 17894 // To compute zeros, we need to invert the value and invert it back after. 17895 Known.Zero = 17896 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC); 17897 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC); 17898 break; 17899 } 17900 case RISCVISD::READ_VLENB: { 17901 // We can use the minimum and maximum VLEN values to bound VLENB. We 17902 // know VLEN must be a power of two. 17903 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8; 17904 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8; 17905 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?"); 17906 Known.Zero.setLowBits(Log2_32(MinVLenB)); 17907 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1); 17908 if (MaxVLenB == MinVLenB) 17909 Known.One.setBit(Log2_32(MinVLenB)); 17910 break; 17911 } 17912 case RISCVISD::FCLASS: { 17913 // fclass will only set one of the low 10 bits. 17914 Known.Zero.setBitsFrom(10); 17915 break; 17916 } 17917 case ISD::INTRINSIC_W_CHAIN: 17918 case ISD::INTRINSIC_WO_CHAIN: { 17919 unsigned IntNo = 17920 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1); 17921 switch (IntNo) { 17922 default: 17923 // We can't do anything for most intrinsics. 17924 break; 17925 case Intrinsic::riscv_vsetvli: 17926 case Intrinsic::riscv_vsetvlimax: { 17927 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli; 17928 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1); 17929 RISCVII::VLMUL VLMUL = 17930 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2)); 17931 unsigned SEW = RISCVVType::decodeVSEW(VSEW); 17932 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL); 17933 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW; 17934 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul; 17935 17936 // Result of vsetvli must be not larger than AVL. 17937 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1))) 17938 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1)); 17939 17940 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1; 17941 if (BitWidth > KnownZeroFirstBit) 17942 Known.Zero.setBitsFrom(KnownZeroFirstBit); 17943 break; 17944 } 17945 } 17946 break; 17947 } 17948 } 17949 } 17950 17951 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 17952 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 17953 unsigned Depth) const { 17954 switch (Op.getOpcode()) { 17955 default: 17956 break; 17957 case RISCVISD::SELECT_CC: { 17958 unsigned Tmp = 17959 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1); 17960 if (Tmp == 1) return 1; // Early out. 17961 unsigned Tmp2 = 17962 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1); 17963 return std::min(Tmp, Tmp2); 17964 } 17965 case RISCVISD::CZERO_EQZ: 17966 case RISCVISD::CZERO_NEZ: 17967 // Output is either all zero or operand 0. We can propagate sign bit count 17968 // from operand 0. 17969 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 17970 case RISCVISD::ABSW: { 17971 // We expand this at isel to negw+max. The result will have 33 sign bits 17972 // if the input has at least 33 sign bits. 17973 unsigned Tmp = 17974 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 17975 if (Tmp < 33) return 1; 17976 return 33; 17977 } 17978 case RISCVISD::SLLW: 17979 case RISCVISD::SRAW: 17980 case RISCVISD::SRLW: 17981 case RISCVISD::DIVW: 17982 case RISCVISD::DIVUW: 17983 case RISCVISD::REMUW: 17984 case RISCVISD::ROLW: 17985 case RISCVISD::RORW: 17986 case RISCVISD::FCVT_W_RV64: 17987 case RISCVISD::FCVT_WU_RV64: 17988 case RISCVISD::STRICT_FCVT_W_RV64: 17989 case RISCVISD::STRICT_FCVT_WU_RV64: 17990 // TODO: As the result is sign-extended, this is conservatively correct. A 17991 // more precise answer could be calculated for SRAW depending on known 17992 // bits in the shift amount. 17993 return 33; 17994 case RISCVISD::VMV_X_S: { 17995 // The number of sign bits of the scalar result is computed by obtaining the 17996 // element type of the input vector operand, subtracting its width from the 17997 // XLEN, and then adding one (sign bit within the element type). If the 17998 // element type is wider than XLen, the least-significant XLEN bits are 17999 // taken. 18000 unsigned XLen = Subtarget.getXLen(); 18001 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits(); 18002 if (EltBits <= XLen) 18003 return XLen - EltBits + 1; 18004 break; 18005 } 18006 case ISD::INTRINSIC_W_CHAIN: { 18007 unsigned IntNo = Op.getConstantOperandVal(1); 18008 switch (IntNo) { 18009 default: 18010 break; 18011 case Intrinsic::riscv_masked_atomicrmw_xchg_i64: 18012 case Intrinsic::riscv_masked_atomicrmw_add_i64: 18013 case Intrinsic::riscv_masked_atomicrmw_sub_i64: 18014 case Intrinsic::riscv_masked_atomicrmw_nand_i64: 18015 case Intrinsic::riscv_masked_atomicrmw_max_i64: 18016 case Intrinsic::riscv_masked_atomicrmw_min_i64: 18017 case Intrinsic::riscv_masked_atomicrmw_umax_i64: 18018 case Intrinsic::riscv_masked_atomicrmw_umin_i64: 18019 case Intrinsic::riscv_masked_cmpxchg_i64: 18020 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated 18021 // narrow atomic operation. These are implemented using atomic 18022 // operations at the minimum supported atomicrmw/cmpxchg width whose 18023 // result is then sign extended to XLEN. With +A, the minimum width is 18024 // 32 for both 64 and 32. 18025 assert(Subtarget.getXLen() == 64); 18026 assert(getMinCmpXchgSizeInBits() == 32); 18027 assert(Subtarget.hasStdExtA()); 18028 return 33; 18029 } 18030 break; 18031 } 18032 } 18033 18034 return 1; 18035 } 18036 18037 bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode( 18038 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 18039 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const { 18040 18041 // TODO: Add more target nodes. 18042 switch (Op.getOpcode()) { 18043 case RISCVISD::SELECT_CC: 18044 // Integer select_cc cannot create poison. 18045 // TODO: What are the FP poison semantics? 18046 // TODO: This instruction blocks poison from the unselected operand, can 18047 // we do anything with that? 18048 return !Op.getValueType().isInteger(); 18049 } 18050 return TargetLowering::canCreateUndefOrPoisonForTargetNode( 18051 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth); 18052 } 18053 18054 const Constant * 18055 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const { 18056 assert(Ld && "Unexpected null LoadSDNode"); 18057 if (!ISD::isNormalLoad(Ld)) 18058 return nullptr; 18059 18060 SDValue Ptr = Ld->getBasePtr(); 18061 18062 // Only constant pools with no offset are supported. 18063 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * { 18064 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr); 18065 if (!CNode || CNode->isMachineConstantPoolEntry() || 18066 CNode->getOffset() != 0) 18067 return nullptr; 18068 18069 return CNode; 18070 }; 18071 18072 // Simple case, LLA. 18073 if (Ptr.getOpcode() == RISCVISD::LLA) { 18074 auto *CNode = GetSupportedConstantPool(Ptr); 18075 if (!CNode || CNode->getTargetFlags() != 0) 18076 return nullptr; 18077 18078 return CNode->getConstVal(); 18079 } 18080 18081 // Look for a HI and ADD_LO pair. 18082 if (Ptr.getOpcode() != RISCVISD::ADD_LO || 18083 Ptr.getOperand(0).getOpcode() != RISCVISD::HI) 18084 return nullptr; 18085 18086 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1)); 18087 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0)); 18088 18089 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO || 18090 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI) 18091 return nullptr; 18092 18093 if (CNodeLo->getConstVal() != CNodeHi->getConstVal()) 18094 return nullptr; 18095 18096 return CNodeLo->getConstVal(); 18097 } 18098 18099 static MachineBasicBlock *emitReadCounterWidePseudo(MachineInstr &MI, 18100 MachineBasicBlock *BB) { 18101 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction"); 18102 18103 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves. 18104 // Should the count have wrapped while it was being read, we need to try 18105 // again. 18106 // For example: 18107 // ``` 18108 // read: 18109 // csrrs x3, counterh # load high word of counter 18110 // csrrs x2, counter # load low word of counter 18111 // csrrs x4, counterh # load high word of counter 18112 // bne x3, x4, read # check if high word reads match, otherwise try again 18113 // ``` 18114 18115 MachineFunction &MF = *BB->getParent(); 18116 const BasicBlock *LLVMBB = BB->getBasicBlock(); 18117 MachineFunction::iterator It = ++BB->getIterator(); 18118 18119 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB); 18120 MF.insert(It, LoopMBB); 18121 18122 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB); 18123 MF.insert(It, DoneMBB); 18124 18125 // Transfer the remainder of BB and its successor edges to DoneMBB. 18126 DoneMBB->splice(DoneMBB->begin(), BB, 18127 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 18128 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 18129 18130 BB->addSuccessor(LoopMBB); 18131 18132 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 18133 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 18134 Register LoReg = MI.getOperand(0).getReg(); 18135 Register HiReg = MI.getOperand(1).getReg(); 18136 int64_t LoCounter = MI.getOperand(2).getImm(); 18137 int64_t HiCounter = MI.getOperand(3).getImm(); 18138 DebugLoc DL = MI.getDebugLoc(); 18139 18140 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 18141 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 18142 .addImm(HiCounter) 18143 .addReg(RISCV::X0); 18144 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 18145 .addImm(LoCounter) 18146 .addReg(RISCV::X0); 18147 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 18148 .addImm(HiCounter) 18149 .addReg(RISCV::X0); 18150 18151 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 18152 .addReg(HiReg) 18153 .addReg(ReadAgainReg) 18154 .addMBB(LoopMBB); 18155 18156 LoopMBB->addSuccessor(LoopMBB); 18157 LoopMBB->addSuccessor(DoneMBB); 18158 18159 MI.eraseFromParent(); 18160 18161 return DoneMBB; 18162 } 18163 18164 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 18165 MachineBasicBlock *BB, 18166 const RISCVSubtarget &Subtarget) { 18167 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 18168 18169 MachineFunction &MF = *BB->getParent(); 18170 DebugLoc DL = MI.getDebugLoc(); 18171 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 18172 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 18173 Register LoReg = MI.getOperand(0).getReg(); 18174 Register HiReg = MI.getOperand(1).getReg(); 18175 Register SrcReg = MI.getOperand(2).getReg(); 18176 18177 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 18178 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 18179 18180 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 18181 RI, Register()); 18182 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 18183 MachineMemOperand *MMOLo = 18184 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 18185 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 18186 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 18187 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 18188 .addFrameIndex(FI) 18189 .addImm(0) 18190 .addMemOperand(MMOLo); 18191 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 18192 .addFrameIndex(FI) 18193 .addImm(4) 18194 .addMemOperand(MMOHi); 18195 MI.eraseFromParent(); // The pseudo instruction is gone now. 18196 return BB; 18197 } 18198 18199 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 18200 MachineBasicBlock *BB, 18201 const RISCVSubtarget &Subtarget) { 18202 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 18203 "Unexpected instruction"); 18204 18205 MachineFunction &MF = *BB->getParent(); 18206 DebugLoc DL = MI.getDebugLoc(); 18207 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 18208 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 18209 Register DstReg = MI.getOperand(0).getReg(); 18210 Register LoReg = MI.getOperand(1).getReg(); 18211 Register HiReg = MI.getOperand(2).getReg(); 18212 18213 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 18214 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 18215 18216 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 18217 MachineMemOperand *MMOLo = 18218 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 18219 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 18220 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 18221 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 18222 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 18223 .addFrameIndex(FI) 18224 .addImm(0) 18225 .addMemOperand(MMOLo); 18226 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 18227 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 18228 .addFrameIndex(FI) 18229 .addImm(4) 18230 .addMemOperand(MMOHi); 18231 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register()); 18232 MI.eraseFromParent(); // The pseudo instruction is gone now. 18233 return BB; 18234 } 18235 18236 static bool isSelectPseudo(MachineInstr &MI) { 18237 switch (MI.getOpcode()) { 18238 default: 18239 return false; 18240 case RISCV::Select_GPR_Using_CC_GPR: 18241 case RISCV::Select_GPR_Using_CC_Imm: 18242 case RISCV::Select_FPR16_Using_CC_GPR: 18243 case RISCV::Select_FPR16INX_Using_CC_GPR: 18244 case RISCV::Select_FPR32_Using_CC_GPR: 18245 case RISCV::Select_FPR32INX_Using_CC_GPR: 18246 case RISCV::Select_FPR64_Using_CC_GPR: 18247 case RISCV::Select_FPR64INX_Using_CC_GPR: 18248 case RISCV::Select_FPR64IN32X_Using_CC_GPR: 18249 return true; 18250 } 18251 } 18252 18253 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, 18254 unsigned RelOpcode, unsigned EqOpcode, 18255 const RISCVSubtarget &Subtarget) { 18256 DebugLoc DL = MI.getDebugLoc(); 18257 Register DstReg = MI.getOperand(0).getReg(); 18258 Register Src1Reg = MI.getOperand(1).getReg(); 18259 Register Src2Reg = MI.getOperand(2).getReg(); 18260 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 18261 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass); 18262 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 18263 18264 // Save the current FFLAGS. 18265 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags); 18266 18267 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg) 18268 .addReg(Src1Reg) 18269 .addReg(Src2Reg); 18270 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 18271 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 18272 18273 // Restore the FFLAGS. 18274 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) 18275 .addReg(SavedFFlags, RegState::Kill); 18276 18277 // Issue a dummy FEQ opcode to raise exception for signaling NaNs. 18278 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0) 18279 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill())) 18280 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill())); 18281 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 18282 MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept); 18283 18284 // Erase the pseudoinstruction. 18285 MI.eraseFromParent(); 18286 return BB; 18287 } 18288 18289 static MachineBasicBlock * 18290 EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, 18291 MachineBasicBlock *ThisMBB, 18292 const RISCVSubtarget &Subtarget) { 18293 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5) 18294 // Without this, custom-inserter would have generated: 18295 // 18296 // A 18297 // | \ 18298 // | B 18299 // | / 18300 // C 18301 // | \ 18302 // | D 18303 // | / 18304 // E 18305 // 18306 // A: X = ...; Y = ... 18307 // B: empty 18308 // C: Z = PHI [X, A], [Y, B] 18309 // D: empty 18310 // E: PHI [X, C], [Z, D] 18311 // 18312 // If we lower both Select_FPRX_ in a single step, we can instead generate: 18313 // 18314 // A 18315 // | \ 18316 // | C 18317 // | /| 18318 // |/ | 18319 // | | 18320 // | D 18321 // | / 18322 // E 18323 // 18324 // A: X = ...; Y = ... 18325 // D: empty 18326 // E: PHI [X, A], [X, C], [Y, D] 18327 18328 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 18329 const DebugLoc &DL = First.getDebugLoc(); 18330 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); 18331 MachineFunction *F = ThisMBB->getParent(); 18332 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB); 18333 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB); 18334 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 18335 MachineFunction::iterator It = ++ThisMBB->getIterator(); 18336 F->insert(It, FirstMBB); 18337 F->insert(It, SecondMBB); 18338 F->insert(It, SinkMBB); 18339 18340 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. 18341 SinkMBB->splice(SinkMBB->begin(), ThisMBB, 18342 std::next(MachineBasicBlock::iterator(First)), 18343 ThisMBB->end()); 18344 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); 18345 18346 // Fallthrough block for ThisMBB. 18347 ThisMBB->addSuccessor(FirstMBB); 18348 // Fallthrough block for FirstMBB. 18349 FirstMBB->addSuccessor(SecondMBB); 18350 ThisMBB->addSuccessor(SinkMBB); 18351 FirstMBB->addSuccessor(SinkMBB); 18352 // This is fallthrough. 18353 SecondMBB->addSuccessor(SinkMBB); 18354 18355 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm()); 18356 Register FLHS = First.getOperand(1).getReg(); 18357 Register FRHS = First.getOperand(2).getReg(); 18358 // Insert appropriate branch. 18359 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC)) 18360 .addReg(FLHS) 18361 .addReg(FRHS) 18362 .addMBB(SinkMBB); 18363 18364 Register SLHS = Second.getOperand(1).getReg(); 18365 Register SRHS = Second.getOperand(2).getReg(); 18366 Register Op1Reg4 = First.getOperand(4).getReg(); 18367 Register Op1Reg5 = First.getOperand(5).getReg(); 18368 18369 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm()); 18370 // Insert appropriate branch. 18371 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC)) 18372 .addReg(SLHS) 18373 .addReg(SRHS) 18374 .addMBB(SinkMBB); 18375 18376 Register DestReg = Second.getOperand(0).getReg(); 18377 Register Op2Reg4 = Second.getOperand(4).getReg(); 18378 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg) 18379 .addReg(Op2Reg4) 18380 .addMBB(ThisMBB) 18381 .addReg(Op1Reg4) 18382 .addMBB(FirstMBB) 18383 .addReg(Op1Reg5) 18384 .addMBB(SecondMBB); 18385 18386 // Now remove the Select_FPRX_s. 18387 First.eraseFromParent(); 18388 Second.eraseFromParent(); 18389 return SinkMBB; 18390 } 18391 18392 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 18393 MachineBasicBlock *BB, 18394 const RISCVSubtarget &Subtarget) { 18395 // To "insert" Select_* instructions, we actually have to insert the triangle 18396 // control-flow pattern. The incoming instructions know the destination vreg 18397 // to set, the condition code register to branch on, the true/false values to 18398 // select between, and the condcode to use to select the appropriate branch. 18399 // 18400 // We produce the following control flow: 18401 // HeadMBB 18402 // | \ 18403 // | IfFalseMBB 18404 // | / 18405 // TailMBB 18406 // 18407 // When we find a sequence of selects we attempt to optimize their emission 18408 // by sharing the control flow. Currently we only handle cases where we have 18409 // multiple selects with the exact same condition (same LHS, RHS and CC). 18410 // The selects may be interleaved with other instructions if the other 18411 // instructions meet some requirements we deem safe: 18412 // - They are not pseudo instructions. 18413 // - They are debug instructions. Otherwise, 18414 // - They do not have side-effects, do not access memory and their inputs do 18415 // not depend on the results of the select pseudo-instructions. 18416 // The TrueV/FalseV operands of the selects cannot depend on the result of 18417 // previous selects in the sequence. 18418 // These conditions could be further relaxed. See the X86 target for a 18419 // related approach and more information. 18420 // 18421 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)) 18422 // is checked here and handled by a separate function - 18423 // EmitLoweredCascadedSelect. 18424 18425 auto Next = next_nodbg(MI.getIterator(), BB->instr_end()); 18426 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && 18427 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) && 18428 Next != BB->end() && Next->getOpcode() == MI.getOpcode() && 18429 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() && 18430 Next->getOperand(5).isKill()) 18431 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget); 18432 18433 Register LHS = MI.getOperand(1).getReg(); 18434 Register RHS; 18435 if (MI.getOperand(2).isReg()) 18436 RHS = MI.getOperand(2).getReg(); 18437 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 18438 18439 SmallVector<MachineInstr *, 4> SelectDebugValues; 18440 SmallSet<Register, 4> SelectDests; 18441 SelectDests.insert(MI.getOperand(0).getReg()); 18442 18443 MachineInstr *LastSelectPseudo = &MI; 18444 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 18445 SequenceMBBI != E; ++SequenceMBBI) { 18446 if (SequenceMBBI->isDebugInstr()) 18447 continue; 18448 if (isSelectPseudo(*SequenceMBBI)) { 18449 if (SequenceMBBI->getOperand(1).getReg() != LHS || 18450 !SequenceMBBI->getOperand(2).isReg() || 18451 SequenceMBBI->getOperand(2).getReg() != RHS || 18452 SequenceMBBI->getOperand(3).getImm() != CC || 18453 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 18454 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 18455 break; 18456 LastSelectPseudo = &*SequenceMBBI; 18457 SequenceMBBI->collectDebugValues(SelectDebugValues); 18458 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 18459 continue; 18460 } 18461 if (SequenceMBBI->hasUnmodeledSideEffects() || 18462 SequenceMBBI->mayLoadOrStore() || 18463 SequenceMBBI->usesCustomInsertionHook()) 18464 break; 18465 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 18466 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 18467 })) 18468 break; 18469 } 18470 18471 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 18472 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 18473 DebugLoc DL = MI.getDebugLoc(); 18474 MachineFunction::iterator I = ++BB->getIterator(); 18475 18476 MachineBasicBlock *HeadMBB = BB; 18477 MachineFunction *F = BB->getParent(); 18478 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 18479 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 18480 18481 F->insert(I, IfFalseMBB); 18482 F->insert(I, TailMBB); 18483 18484 // Set the call frame size on entry to the new basic blocks. 18485 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo); 18486 IfFalseMBB->setCallFrameSize(CallFrameSize); 18487 TailMBB->setCallFrameSize(CallFrameSize); 18488 18489 // Transfer debug instructions associated with the selects to TailMBB. 18490 for (MachineInstr *DebugInstr : SelectDebugValues) { 18491 TailMBB->push_back(DebugInstr->removeFromParent()); 18492 } 18493 18494 // Move all instructions after the sequence to TailMBB. 18495 TailMBB->splice(TailMBB->end(), HeadMBB, 18496 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 18497 // Update machine-CFG edges by transferring all successors of the current 18498 // block to the new block which will contain the Phi nodes for the selects. 18499 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 18500 // Set the successors for HeadMBB. 18501 HeadMBB->addSuccessor(IfFalseMBB); 18502 HeadMBB->addSuccessor(TailMBB); 18503 18504 // Insert appropriate branch. 18505 if (MI.getOperand(2).isImm()) 18506 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm())) 18507 .addReg(LHS) 18508 .addImm(MI.getOperand(2).getImm()) 18509 .addMBB(TailMBB); 18510 else 18511 BuildMI(HeadMBB, DL, TII.getBrCond(CC)) 18512 .addReg(LHS) 18513 .addReg(RHS) 18514 .addMBB(TailMBB); 18515 18516 // IfFalseMBB just falls through to TailMBB. 18517 IfFalseMBB->addSuccessor(TailMBB); 18518 18519 // Create PHIs for all of the select pseudo-instructions. 18520 auto SelectMBBI = MI.getIterator(); 18521 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 18522 auto InsertionPoint = TailMBB->begin(); 18523 while (SelectMBBI != SelectEnd) { 18524 auto Next = std::next(SelectMBBI); 18525 if (isSelectPseudo(*SelectMBBI)) { 18526 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 18527 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 18528 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 18529 .addReg(SelectMBBI->getOperand(4).getReg()) 18530 .addMBB(HeadMBB) 18531 .addReg(SelectMBBI->getOperand(5).getReg()) 18532 .addMBB(IfFalseMBB); 18533 SelectMBBI->eraseFromParent(); 18534 } 18535 SelectMBBI = Next; 18536 } 18537 18538 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 18539 return TailMBB; 18540 } 18541 18542 // Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW. 18543 static const RISCV::RISCVMaskedPseudoInfo * 18544 lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) { 18545 const RISCVVInversePseudosTable::PseudoInfo *Inverse = 18546 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW); 18547 assert(Inverse && "Unexpected LMUL and SEW pair for instruction"); 18548 const RISCV::RISCVMaskedPseudoInfo *Masked = 18549 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo); 18550 assert(Masked && "Could not find masked instruction for LMUL and SEW pair"); 18551 return Masked; 18552 } 18553 18554 static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, 18555 MachineBasicBlock *BB, 18556 unsigned CVTXOpc) { 18557 DebugLoc DL = MI.getDebugLoc(); 18558 18559 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 18560 18561 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 18562 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass); 18563 18564 // Save the old value of FFLAGS. 18565 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS); 18566 18567 assert(MI.getNumOperands() == 7); 18568 18569 // Emit a VFCVT_X_F 18570 const TargetRegisterInfo *TRI = 18571 BB->getParent()->getSubtarget().getRegisterInfo(); 18572 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI); 18573 Register Tmp = MRI.createVirtualRegister(RC); 18574 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp) 18575 .add(MI.getOperand(1)) 18576 .add(MI.getOperand(2)) 18577 .add(MI.getOperand(3)) 18578 .add(MachineOperand::CreateImm(7)) // frm = DYN 18579 .add(MI.getOperand(4)) 18580 .add(MI.getOperand(5)) 18581 .add(MI.getOperand(6)) 18582 .add(MachineOperand::CreateReg(RISCV::FRM, 18583 /*IsDef*/ false, 18584 /*IsImp*/ true)); 18585 18586 // Emit a VFCVT_F_X 18587 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags); 18588 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); 18589 // There is no E8 variant for VFCVT_F_X. 18590 assert(Log2SEW >= 4); 18591 unsigned CVTFOpc = 18592 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW) 18593 ->MaskedPseudo; 18594 18595 BuildMI(*BB, MI, DL, TII.get(CVTFOpc)) 18596 .add(MI.getOperand(0)) 18597 .add(MI.getOperand(1)) 18598 .addReg(Tmp) 18599 .add(MI.getOperand(3)) 18600 .add(MachineOperand::CreateImm(7)) // frm = DYN 18601 .add(MI.getOperand(4)) 18602 .add(MI.getOperand(5)) 18603 .add(MI.getOperand(6)) 18604 .add(MachineOperand::CreateReg(RISCV::FRM, 18605 /*IsDef*/ false, 18606 /*IsImp*/ true)); 18607 18608 // Restore FFLAGS. 18609 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) 18610 .addReg(SavedFFLAGS, RegState::Kill); 18611 18612 // Erase the pseudoinstruction. 18613 MI.eraseFromParent(); 18614 return BB; 18615 } 18616 18617 static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, 18618 const RISCVSubtarget &Subtarget) { 18619 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc; 18620 const TargetRegisterClass *RC; 18621 switch (MI.getOpcode()) { 18622 default: 18623 llvm_unreachable("Unexpected opcode"); 18624 case RISCV::PseudoFROUND_H: 18625 CmpOpc = RISCV::FLT_H; 18626 F2IOpc = RISCV::FCVT_W_H; 18627 I2FOpc = RISCV::FCVT_H_W; 18628 FSGNJOpc = RISCV::FSGNJ_H; 18629 FSGNJXOpc = RISCV::FSGNJX_H; 18630 RC = &RISCV::FPR16RegClass; 18631 break; 18632 case RISCV::PseudoFROUND_H_INX: 18633 CmpOpc = RISCV::FLT_H_INX; 18634 F2IOpc = RISCV::FCVT_W_H_INX; 18635 I2FOpc = RISCV::FCVT_H_W_INX; 18636 FSGNJOpc = RISCV::FSGNJ_H_INX; 18637 FSGNJXOpc = RISCV::FSGNJX_H_INX; 18638 RC = &RISCV::GPRF16RegClass; 18639 break; 18640 case RISCV::PseudoFROUND_S: 18641 CmpOpc = RISCV::FLT_S; 18642 F2IOpc = RISCV::FCVT_W_S; 18643 I2FOpc = RISCV::FCVT_S_W; 18644 FSGNJOpc = RISCV::FSGNJ_S; 18645 FSGNJXOpc = RISCV::FSGNJX_S; 18646 RC = &RISCV::FPR32RegClass; 18647 break; 18648 case RISCV::PseudoFROUND_S_INX: 18649 CmpOpc = RISCV::FLT_S_INX; 18650 F2IOpc = RISCV::FCVT_W_S_INX; 18651 I2FOpc = RISCV::FCVT_S_W_INX; 18652 FSGNJOpc = RISCV::FSGNJ_S_INX; 18653 FSGNJXOpc = RISCV::FSGNJX_S_INX; 18654 RC = &RISCV::GPRF32RegClass; 18655 break; 18656 case RISCV::PseudoFROUND_D: 18657 assert(Subtarget.is64Bit() && "Expected 64-bit GPR."); 18658 CmpOpc = RISCV::FLT_D; 18659 F2IOpc = RISCV::FCVT_L_D; 18660 I2FOpc = RISCV::FCVT_D_L; 18661 FSGNJOpc = RISCV::FSGNJ_D; 18662 FSGNJXOpc = RISCV::FSGNJX_D; 18663 RC = &RISCV::FPR64RegClass; 18664 break; 18665 case RISCV::PseudoFROUND_D_INX: 18666 assert(Subtarget.is64Bit() && "Expected 64-bit GPR."); 18667 CmpOpc = RISCV::FLT_D_INX; 18668 F2IOpc = RISCV::FCVT_L_D_INX; 18669 I2FOpc = RISCV::FCVT_D_L_INX; 18670 FSGNJOpc = RISCV::FSGNJ_D_INX; 18671 FSGNJXOpc = RISCV::FSGNJX_D_INX; 18672 RC = &RISCV::GPRRegClass; 18673 break; 18674 } 18675 18676 const BasicBlock *BB = MBB->getBasicBlock(); 18677 DebugLoc DL = MI.getDebugLoc(); 18678 MachineFunction::iterator I = ++MBB->getIterator(); 18679 18680 MachineFunction *F = MBB->getParent(); 18681 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB); 18682 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB); 18683 18684 F->insert(I, CvtMBB); 18685 F->insert(I, DoneMBB); 18686 // Move all instructions after the sequence to DoneMBB. 18687 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI), 18688 MBB->end()); 18689 // Update machine-CFG edges by transferring all successors of the current 18690 // block to the new block which will contain the Phi nodes for the selects. 18691 DoneMBB->transferSuccessorsAndUpdatePHIs(MBB); 18692 // Set the successors for MBB. 18693 MBB->addSuccessor(CvtMBB); 18694 MBB->addSuccessor(DoneMBB); 18695 18696 Register DstReg = MI.getOperand(0).getReg(); 18697 Register SrcReg = MI.getOperand(1).getReg(); 18698 Register MaxReg = MI.getOperand(2).getReg(); 18699 int64_t FRM = MI.getOperand(3).getImm(); 18700 18701 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 18702 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 18703 18704 Register FabsReg = MRI.createVirtualRegister(RC); 18705 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg); 18706 18707 // Compare the FP value to the max value. 18708 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 18709 auto MIB = 18710 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg); 18711 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 18712 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 18713 18714 // Insert branch. 18715 BuildMI(MBB, DL, TII.get(RISCV::BEQ)) 18716 .addReg(CmpReg) 18717 .addReg(RISCV::X0) 18718 .addMBB(DoneMBB); 18719 18720 CvtMBB->addSuccessor(DoneMBB); 18721 18722 // Convert to integer. 18723 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 18724 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM); 18725 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 18726 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 18727 18728 // Convert back to FP. 18729 Register I2FReg = MRI.createVirtualRegister(RC); 18730 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM); 18731 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 18732 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 18733 18734 // Restore the sign bit. 18735 Register CvtReg = MRI.createVirtualRegister(RC); 18736 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg); 18737 18738 // Merge the results. 18739 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg) 18740 .addReg(SrcReg) 18741 .addMBB(MBB) 18742 .addReg(CvtReg) 18743 .addMBB(CvtMBB); 18744 18745 MI.eraseFromParent(); 18746 return DoneMBB; 18747 } 18748 18749 MachineBasicBlock * 18750 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 18751 MachineBasicBlock *BB) const { 18752 switch (MI.getOpcode()) { 18753 default: 18754 llvm_unreachable("Unexpected instr type to insert"); 18755 case RISCV::ReadCounterWide: 18756 assert(!Subtarget.is64Bit() && 18757 "ReadCounterWide is only to be used on riscv32"); 18758 return emitReadCounterWidePseudo(MI, BB); 18759 case RISCV::Select_GPR_Using_CC_GPR: 18760 case RISCV::Select_GPR_Using_CC_Imm: 18761 case RISCV::Select_FPR16_Using_CC_GPR: 18762 case RISCV::Select_FPR16INX_Using_CC_GPR: 18763 case RISCV::Select_FPR32_Using_CC_GPR: 18764 case RISCV::Select_FPR32INX_Using_CC_GPR: 18765 case RISCV::Select_FPR64_Using_CC_GPR: 18766 case RISCV::Select_FPR64INX_Using_CC_GPR: 18767 case RISCV::Select_FPR64IN32X_Using_CC_GPR: 18768 return emitSelectPseudo(MI, BB, Subtarget); 18769 case RISCV::BuildPairF64Pseudo: 18770 return emitBuildPairF64Pseudo(MI, BB, Subtarget); 18771 case RISCV::SplitF64Pseudo: 18772 return emitSplitF64Pseudo(MI, BB, Subtarget); 18773 case RISCV::PseudoQuietFLE_H: 18774 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget); 18775 case RISCV::PseudoQuietFLE_H_INX: 18776 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget); 18777 case RISCV::PseudoQuietFLT_H: 18778 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget); 18779 case RISCV::PseudoQuietFLT_H_INX: 18780 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget); 18781 case RISCV::PseudoQuietFLE_S: 18782 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget); 18783 case RISCV::PseudoQuietFLE_S_INX: 18784 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget); 18785 case RISCV::PseudoQuietFLT_S: 18786 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget); 18787 case RISCV::PseudoQuietFLT_S_INX: 18788 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget); 18789 case RISCV::PseudoQuietFLE_D: 18790 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget); 18791 case RISCV::PseudoQuietFLE_D_INX: 18792 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget); 18793 case RISCV::PseudoQuietFLE_D_IN32X: 18794 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X, 18795 Subtarget); 18796 case RISCV::PseudoQuietFLT_D: 18797 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget); 18798 case RISCV::PseudoQuietFLT_D_INX: 18799 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget); 18800 case RISCV::PseudoQuietFLT_D_IN32X: 18801 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X, 18802 Subtarget); 18803 18804 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: 18805 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK); 18806 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK: 18807 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK); 18808 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK: 18809 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK); 18810 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK: 18811 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK); 18812 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK: 18813 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK); 18814 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK: 18815 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK); 18816 case RISCV::PseudoFROUND_H: 18817 case RISCV::PseudoFROUND_H_INX: 18818 case RISCV::PseudoFROUND_S: 18819 case RISCV::PseudoFROUND_S_INX: 18820 case RISCV::PseudoFROUND_D: 18821 case RISCV::PseudoFROUND_D_INX: 18822 case RISCV::PseudoFROUND_D_IN32X: 18823 return emitFROUND(MI, BB, Subtarget); 18824 case TargetOpcode::STATEPOINT: 18825 // STATEPOINT is a pseudo instruction which has no implicit defs/uses 18826 // while jal call instruction (where statepoint will be lowered at the end) 18827 // has implicit def. This def is early-clobber as it will be set at 18828 // the moment of the call and earlier than any use is read. 18829 // Add this implicit dead def here as a workaround. 18830 MI.addOperand(*MI.getMF(), 18831 MachineOperand::CreateReg( 18832 RISCV::X1, /*isDef*/ true, 18833 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true, 18834 /*isUndef*/ false, /*isEarlyClobber*/ true)); 18835 [[fallthrough]]; 18836 case TargetOpcode::STACKMAP: 18837 case TargetOpcode::PATCHPOINT: 18838 if (!Subtarget.is64Bit()) 18839 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only " 18840 "supported on 64-bit targets"); 18841 return emitPatchPoint(MI, BB); 18842 } 18843 } 18844 18845 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, 18846 SDNode *Node) const { 18847 // Add FRM dependency to any instructions with dynamic rounding mode. 18848 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm); 18849 if (Idx < 0) { 18850 // Vector pseudos have FRM index indicated by TSFlags. 18851 Idx = RISCVII::getFRMOpNum(MI.getDesc()); 18852 if (Idx < 0) 18853 return; 18854 } 18855 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN) 18856 return; 18857 // If the instruction already reads FRM, don't add another read. 18858 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr)) 18859 return; 18860 MI.addOperand( 18861 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true)); 18862 } 18863 18864 // Calling Convention Implementation. 18865 // The expectations for frontend ABI lowering vary from target to target. 18866 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 18867 // details, but this is a longer term goal. For now, we simply try to keep the 18868 // role of the frontend as simple and well-defined as possible. The rules can 18869 // be summarised as: 18870 // * Never split up large scalar arguments. We handle them here. 18871 // * If a hardfloat calling convention is being used, and the struct may be 18872 // passed in a pair of registers (fp+fp, int+fp), and both registers are 18873 // available, then pass as two separate arguments. If either the GPRs or FPRs 18874 // are exhausted, then pass according to the rule below. 18875 // * If a struct could never be passed in registers or directly in a stack 18876 // slot (as it is larger than 2*XLEN and the floating point rules don't 18877 // apply), then pass it using a pointer with the byval attribute. 18878 // * If a struct is less than 2*XLEN, then coerce to either a two-element 18879 // word-sized array or a 2*XLEN scalar (depending on alignment). 18880 // * The frontend can determine whether a struct is returned by reference or 18881 // not based on its size and fields. If it will be returned by reference, the 18882 // frontend must modify the prototype so a pointer with the sret annotation is 18883 // passed as the first argument. This is not necessary for large scalar 18884 // returns. 18885 // * Struct return values and varargs should be coerced to structs containing 18886 // register-size fields in the same situations they would be for fixed 18887 // arguments. 18888 18889 static const MCPhysReg ArgFPR16s[] = { 18890 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 18891 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 18892 }; 18893 static const MCPhysReg ArgFPR32s[] = { 18894 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 18895 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 18896 }; 18897 static const MCPhysReg ArgFPR64s[] = { 18898 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 18899 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 18900 }; 18901 // This is an interim calling convention and it may be changed in the future. 18902 static const MCPhysReg ArgVRs[] = { 18903 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 18904 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 18905 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 18906 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 18907 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 18908 RISCV::V20M2, RISCV::V22M2}; 18909 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 18910 RISCV::V20M4}; 18911 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 18912 18913 ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) { 18914 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except 18915 // the ILP32E ABI. 18916 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, 18917 RISCV::X13, RISCV::X14, RISCV::X15, 18918 RISCV::X16, RISCV::X17}; 18919 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI. 18920 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, 18921 RISCV::X13, RISCV::X14, RISCV::X15}; 18922 18923 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) 18924 return ArrayRef(ArgEGPRs); 18925 18926 return ArrayRef(ArgIGPRs); 18927 } 18928 18929 static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) { 18930 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used 18931 // for save-restore libcall, so we don't use them. 18932 // Don't use X7 for fastcc, since Zicfilp uses X7 as the label register. 18933 static const MCPhysReg FastCCIGPRs[] = { 18934 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, 18935 RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31}; 18936 18937 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E. 18938 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, 18939 RISCV::X13, RISCV::X14, RISCV::X15}; 18940 18941 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) 18942 return ArrayRef(FastCCEGPRs); 18943 18944 return ArrayRef(FastCCIGPRs); 18945 } 18946 18947 // Pass a 2*XLEN argument that has been split into two XLEN values through 18948 // registers or the stack as necessary. 18949 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 18950 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 18951 MVT ValVT2, MVT LocVT2, 18952 ISD::ArgFlagsTy ArgFlags2, bool EABI) { 18953 unsigned XLenInBytes = XLen / 8; 18954 const RISCVSubtarget &STI = 18955 State.getMachineFunction().getSubtarget<RISCVSubtarget>(); 18956 ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(STI.getTargetABI()); 18957 18958 if (Register Reg = State.AllocateReg(ArgGPRs)) { 18959 // At least one half can be passed via register. 18960 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 18961 VA1.getLocVT(), CCValAssign::Full)); 18962 } else { 18963 // Both halves must be passed on the stack, with proper alignment. 18964 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte 18965 // alignment. This behavior may be changed when RV32E/ILP32E is ratified. 18966 Align StackAlign(XLenInBytes); 18967 if (!EABI || XLen != 32) 18968 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign()); 18969 State.addLoc( 18970 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 18971 State.AllocateStack(XLenInBytes, StackAlign), 18972 VA1.getLocVT(), CCValAssign::Full)); 18973 State.addLoc(CCValAssign::getMem( 18974 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 18975 LocVT2, CCValAssign::Full)); 18976 return false; 18977 } 18978 18979 if (Register Reg = State.AllocateReg(ArgGPRs)) { 18980 // The second half can also be passed via register. 18981 State.addLoc( 18982 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 18983 } else { 18984 // The second half is passed via the stack, without additional alignment. 18985 State.addLoc(CCValAssign::getMem( 18986 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 18987 LocVT2, CCValAssign::Full)); 18988 } 18989 18990 return false; 18991 } 18992 18993 // Implements the RISC-V calling convention. Returns true upon failure. 18994 bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 18995 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 18996 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 18997 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 18998 RVVArgDispatcher &RVVDispatcher) { 18999 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 19000 assert(XLen == 32 || XLen == 64); 19001 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 19002 19003 // Static chain parameter must not be passed in normal argument registers, 19004 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain 19005 if (ArgFlags.isNest()) { 19006 if (unsigned Reg = State.AllocateReg(RISCV::X7)) { 19007 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 19008 return false; 19009 } 19010 } 19011 19012 // Any return value split in to more than two values can't be returned 19013 // directly. Vectors are returned via the available vector registers. 19014 if (!LocVT.isVector() && IsRet && ValNo > 1) 19015 return true; 19016 19017 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 19018 // variadic argument, or if no F16/F32 argument registers are available. 19019 bool UseGPRForF16_F32 = true; 19020 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 19021 // variadic argument, or if no F64 argument registers are available. 19022 bool UseGPRForF64 = true; 19023 19024 switch (ABI) { 19025 default: 19026 llvm_unreachable("Unexpected ABI"); 19027 case RISCVABI::ABI_ILP32: 19028 case RISCVABI::ABI_ILP32E: 19029 case RISCVABI::ABI_LP64: 19030 case RISCVABI::ABI_LP64E: 19031 break; 19032 case RISCVABI::ABI_ILP32F: 19033 case RISCVABI::ABI_LP64F: 19034 UseGPRForF16_F32 = !IsFixed; 19035 break; 19036 case RISCVABI::ABI_ILP32D: 19037 case RISCVABI::ABI_LP64D: 19038 UseGPRForF16_F32 = !IsFixed; 19039 UseGPRForF64 = !IsFixed; 19040 break; 19041 } 19042 19043 // FPR16, FPR32, and FPR64 alias each other. 19044 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) { 19045 UseGPRForF16_F32 = true; 19046 UseGPRForF64 = true; 19047 } 19048 19049 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 19050 // similar local variables rather than directly checking against the target 19051 // ABI. 19052 19053 if (UseGPRForF16_F32 && 19054 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) { 19055 LocVT = XLenVT; 19056 LocInfo = CCValAssign::BCvt; 19057 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 19058 LocVT = MVT::i64; 19059 LocInfo = CCValAssign::BCvt; 19060 } 19061 19062 ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI); 19063 19064 // If this is a variadic argument, the RISC-V calling convention requires 19065 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 19066 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 19067 // be used regardless of whether the original argument was split during 19068 // legalisation or not. The argument will not be passed by registers if the 19069 // original type is larger than 2*XLEN, so the register alignment rule does 19070 // not apply. 19071 // TODO: To be compatible with GCC's behaviors, we don't align registers 19072 // currently if we are using ILP32E calling convention. This behavior may be 19073 // changed when RV32E/ILP32E is ratified. 19074 unsigned TwoXLenInBytes = (2 * XLen) / 8; 19075 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 19076 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes && 19077 ABI != RISCVABI::ABI_ILP32E) { 19078 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 19079 // Skip 'odd' register if necessary. 19080 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) 19081 State.AllocateReg(ArgGPRs); 19082 } 19083 19084 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 19085 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 19086 State.getPendingArgFlags(); 19087 19088 assert(PendingLocs.size() == PendingArgFlags.size() && 19089 "PendingLocs and PendingArgFlags out of sync"); 19090 19091 // Handle passing f64 on RV32D with a soft float ABI or when floating point 19092 // registers are exhausted. 19093 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 19094 assert(PendingLocs.empty() && "Can't lower f64 if it is split"); 19095 // Depending on available argument GPRS, f64 may be passed in a pair of 19096 // GPRs, split between a GPR and the stack, or passed completely on the 19097 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 19098 // cases. 19099 Register Reg = State.AllocateReg(ArgGPRs); 19100 if (!Reg) { 19101 unsigned StackOffset = State.AllocateStack(8, Align(8)); 19102 State.addLoc( 19103 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 19104 return false; 19105 } 19106 LocVT = MVT::i32; 19107 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 19108 Register HiReg = State.AllocateReg(ArgGPRs); 19109 if (HiReg) { 19110 State.addLoc( 19111 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo)); 19112 } else { 19113 unsigned StackOffset = State.AllocateStack(4, Align(4)); 19114 State.addLoc( 19115 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 19116 } 19117 return false; 19118 } 19119 19120 // Fixed-length vectors are located in the corresponding scalable-vector 19121 // container types. 19122 if (ValVT.isFixedLengthVector()) 19123 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 19124 19125 // Split arguments might be passed indirectly, so keep track of the pending 19126 // values. Split vectors are passed via a mix of registers and indirectly, so 19127 // treat them as we would any other argument. 19128 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 19129 LocVT = XLenVT; 19130 LocInfo = CCValAssign::Indirect; 19131 PendingLocs.push_back( 19132 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 19133 PendingArgFlags.push_back(ArgFlags); 19134 if (!ArgFlags.isSplitEnd()) { 19135 return false; 19136 } 19137 } 19138 19139 // If the split argument only had two elements, it should be passed directly 19140 // in registers or on the stack. 19141 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && 19142 PendingLocs.size() <= 2) { 19143 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 19144 // Apply the normal calling convention rules to the first half of the 19145 // split argument. 19146 CCValAssign VA = PendingLocs[0]; 19147 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 19148 PendingLocs.clear(); 19149 PendingArgFlags.clear(); 19150 return CC_RISCVAssign2XLen( 19151 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags, 19152 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E); 19153 } 19154 19155 // Allocate to a register if possible, or else a stack slot. 19156 Register Reg; 19157 unsigned StoreSizeBytes = XLen / 8; 19158 Align StackAlign = Align(XLen / 8); 19159 19160 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32) 19161 Reg = State.AllocateReg(ArgFPR16s); 19162 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 19163 Reg = State.AllocateReg(ArgFPR32s); 19164 else if (ValVT == MVT::f64 && !UseGPRForF64) 19165 Reg = State.AllocateReg(ArgFPR64s); 19166 else if (ValVT.isVector()) { 19167 Reg = RVVDispatcher.getNextPhysReg(); 19168 if (!Reg) { 19169 // For return values, the vector must be passed fully via registers or 19170 // via the stack. 19171 // FIXME: The proposed vector ABI only mandates v8-v15 for return values, 19172 // but we're using all of them. 19173 if (IsRet) 19174 return true; 19175 // Try using a GPR to pass the address 19176 if ((Reg = State.AllocateReg(ArgGPRs))) { 19177 LocVT = XLenVT; 19178 LocInfo = CCValAssign::Indirect; 19179 } else if (ValVT.isScalableVector()) { 19180 LocVT = XLenVT; 19181 LocInfo = CCValAssign::Indirect; 19182 } else { 19183 // Pass fixed-length vectors on the stack. 19184 LocVT = ValVT; 19185 StoreSizeBytes = ValVT.getStoreSize(); 19186 // Align vectors to their element sizes, being careful for vXi1 19187 // vectors. 19188 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); 19189 } 19190 } 19191 } else { 19192 Reg = State.AllocateReg(ArgGPRs); 19193 } 19194 19195 unsigned StackOffset = 19196 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); 19197 19198 // If we reach this point and PendingLocs is non-empty, we must be at the 19199 // end of a split argument that must be passed indirectly. 19200 if (!PendingLocs.empty()) { 19201 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 19202 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 19203 19204 for (auto &It : PendingLocs) { 19205 if (Reg) 19206 It.convertToReg(Reg); 19207 else 19208 It.convertToMem(StackOffset); 19209 State.addLoc(It); 19210 } 19211 PendingLocs.clear(); 19212 PendingArgFlags.clear(); 19213 return false; 19214 } 19215 19216 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 19217 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) && 19218 "Expected an XLenVT or vector types at this stage"); 19219 19220 if (Reg) { 19221 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 19222 return false; 19223 } 19224 19225 // When a scalar floating-point value is passed on the stack, no 19226 // bit-conversion is needed. 19227 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) { 19228 assert(!ValVT.isVector()); 19229 LocVT = ValVT; 19230 LocInfo = CCValAssign::Full; 19231 } 19232 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 19233 return false; 19234 } 19235 19236 template <typename ArgTy> 19237 static std::optional<unsigned> preAssignMask(const ArgTy &Args) { 19238 for (const auto &ArgIdx : enumerate(Args)) { 19239 MVT ArgVT = ArgIdx.value().VT; 19240 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) 19241 return ArgIdx.index(); 19242 } 19243 return std::nullopt; 19244 } 19245 19246 void RISCVTargetLowering::analyzeInputArgs( 19247 MachineFunction &MF, CCState &CCInfo, 19248 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 19249 RISCVCCAssignFn Fn) const { 19250 unsigned NumArgs = Ins.size(); 19251 FunctionType *FType = MF.getFunction().getFunctionType(); 19252 19253 RVVArgDispatcher Dispatcher; 19254 if (IsRet) { 19255 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)}; 19256 } else { 19257 SmallVector<Type *, 4> TypeList; 19258 for (const Argument &Arg : MF.getFunction().args()) 19259 TypeList.push_back(Arg.getType()); 19260 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)}; 19261 } 19262 19263 for (unsigned i = 0; i != NumArgs; ++i) { 19264 MVT ArgVT = Ins[i].VT; 19265 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 19266 19267 Type *ArgTy = nullptr; 19268 if (IsRet) 19269 ArgTy = FType->getReturnType(); 19270 else if (Ins[i].isOrigArg()) 19271 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 19272 19273 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 19274 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 19275 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 19276 Dispatcher)) { 19277 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 19278 << ArgVT << '\n'); 19279 llvm_unreachable(nullptr); 19280 } 19281 } 19282 } 19283 19284 void RISCVTargetLowering::analyzeOutputArgs( 19285 MachineFunction &MF, CCState &CCInfo, 19286 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 19287 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const { 19288 unsigned NumArgs = Outs.size(); 19289 19290 SmallVector<Type *, 4> TypeList; 19291 if (IsRet) 19292 TypeList.push_back(MF.getFunction().getReturnType()); 19293 else if (CLI) 19294 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs()) 19295 TypeList.push_back(Arg.Ty); 19296 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)}; 19297 19298 for (unsigned i = 0; i != NumArgs; i++) { 19299 MVT ArgVT = Outs[i].VT; 19300 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 19301 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 19302 19303 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 19304 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 19305 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 19306 Dispatcher)) { 19307 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 19308 << ArgVT << "\n"); 19309 llvm_unreachable(nullptr); 19310 } 19311 } 19312 } 19313 19314 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 19315 // values. 19316 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 19317 const CCValAssign &VA, const SDLoc &DL, 19318 const RISCVSubtarget &Subtarget) { 19319 switch (VA.getLocInfo()) { 19320 default: 19321 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 19322 case CCValAssign::Full: 19323 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) 19324 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget); 19325 break; 19326 case CCValAssign::BCvt: 19327 if (VA.getLocVT().isInteger() && 19328 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { 19329 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val); 19330 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { 19331 if (RV64LegalI32) { 19332 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val); 19333 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); 19334 } else { 19335 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 19336 } 19337 } else { 19338 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 19339 } 19340 break; 19341 } 19342 return Val; 19343 } 19344 19345 // The caller is responsible for loading the full value if the argument is 19346 // passed with CCValAssign::Indirect. 19347 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 19348 const CCValAssign &VA, const SDLoc &DL, 19349 const ISD::InputArg &In, 19350 const RISCVTargetLowering &TLI) { 19351 MachineFunction &MF = DAG.getMachineFunction(); 19352 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 19353 EVT LocVT = VA.getLocVT(); 19354 SDValue Val; 19355 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 19356 Register VReg = RegInfo.createVirtualRegister(RC); 19357 RegInfo.addLiveIn(VA.getLocReg(), VReg); 19358 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 19359 19360 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass. 19361 if (In.isOrigArg()) { 19362 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex()); 19363 if (OrigArg->getType()->isIntegerTy()) { 19364 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); 19365 // An input zero extended from i31 can also be considered sign extended. 19366 if ((BitWidth <= 32 && In.Flags.isSExt()) || 19367 (BitWidth < 32 && In.Flags.isZExt())) { 19368 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 19369 RVFI->addSExt32Register(VReg); 19370 } 19371 } 19372 } 19373 19374 if (VA.getLocInfo() == CCValAssign::Indirect) 19375 return Val; 19376 19377 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget()); 19378 } 19379 19380 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 19381 const CCValAssign &VA, const SDLoc &DL, 19382 const RISCVSubtarget &Subtarget) { 19383 EVT LocVT = VA.getLocVT(); 19384 19385 switch (VA.getLocInfo()) { 19386 default: 19387 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 19388 case CCValAssign::Full: 19389 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) 19390 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget); 19391 break; 19392 case CCValAssign::BCvt: 19393 if (LocVT.isInteger() && 19394 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { 19395 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val); 19396 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) { 19397 if (RV64LegalI32) { 19398 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); 19399 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val); 19400 } else { 19401 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 19402 } 19403 } else { 19404 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 19405 } 19406 break; 19407 } 19408 return Val; 19409 } 19410 19411 // The caller is responsible for loading the full value if the argument is 19412 // passed with CCValAssign::Indirect. 19413 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 19414 const CCValAssign &VA, const SDLoc &DL) { 19415 MachineFunction &MF = DAG.getMachineFunction(); 19416 MachineFrameInfo &MFI = MF.getFrameInfo(); 19417 EVT LocVT = VA.getLocVT(); 19418 EVT ValVT = VA.getValVT(); 19419 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 19420 if (ValVT.isScalableVector()) { 19421 // When the value is a scalable vector, we save the pointer which points to 19422 // the scalable vector value in the stack. The ValVT will be the pointer 19423 // type, instead of the scalable vector type. 19424 ValVT = LocVT; 19425 } 19426 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), 19427 /*IsImmutable=*/true); 19428 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 19429 SDValue Val; 19430 19431 ISD::LoadExtType ExtType; 19432 switch (VA.getLocInfo()) { 19433 default: 19434 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 19435 case CCValAssign::Full: 19436 case CCValAssign::Indirect: 19437 case CCValAssign::BCvt: 19438 ExtType = ISD::NON_EXTLOAD; 19439 break; 19440 } 19441 Val = DAG.getExtLoad( 19442 ExtType, DL, LocVT, Chain, FIN, 19443 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 19444 return Val; 19445 } 19446 19447 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 19448 const CCValAssign &VA, 19449 const CCValAssign &HiVA, 19450 const SDLoc &DL) { 19451 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 19452 "Unexpected VA"); 19453 MachineFunction &MF = DAG.getMachineFunction(); 19454 MachineFrameInfo &MFI = MF.getFrameInfo(); 19455 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 19456 19457 assert(VA.isRegLoc() && "Expected register VA assignment"); 19458 19459 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 19460 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 19461 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 19462 SDValue Hi; 19463 if (HiVA.isMemLoc()) { 19464 // Second half of f64 is passed on the stack. 19465 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(), 19466 /*IsImmutable=*/true); 19467 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 19468 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 19469 MachinePointerInfo::getFixedStack(MF, FI)); 19470 } else { 19471 // Second half of f64 is passed in another GPR. 19472 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 19473 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg); 19474 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 19475 } 19476 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 19477 } 19478 19479 // FastCC has less than 1% performance improvement for some particular 19480 // benchmark. But theoretically, it may has benenfit for some cases. 19481 bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, 19482 unsigned ValNo, MVT ValVT, MVT LocVT, 19483 CCValAssign::LocInfo LocInfo, 19484 ISD::ArgFlagsTy ArgFlags, CCState &State, 19485 bool IsFixed, bool IsRet, Type *OrigTy, 19486 const RISCVTargetLowering &TLI, 19487 RVVArgDispatcher &RVVDispatcher) { 19488 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 19489 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { 19490 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 19491 return false; 19492 } 19493 } 19494 19495 const RISCVSubtarget &Subtarget = TLI.getSubtarget(); 19496 19497 if (LocVT == MVT::f16 && 19498 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) { 19499 static const MCPhysReg FPR16List[] = { 19500 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 19501 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 19502 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 19503 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 19504 if (unsigned Reg = State.AllocateReg(FPR16List)) { 19505 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 19506 return false; 19507 } 19508 } 19509 19510 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { 19511 static const MCPhysReg FPR32List[] = { 19512 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 19513 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 19514 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 19515 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 19516 if (unsigned Reg = State.AllocateReg(FPR32List)) { 19517 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 19518 return false; 19519 } 19520 } 19521 19522 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { 19523 static const MCPhysReg FPR64List[] = { 19524 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 19525 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 19526 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 19527 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 19528 if (unsigned Reg = State.AllocateReg(FPR64List)) { 19529 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 19530 return false; 19531 } 19532 } 19533 19534 // Check if there is an available GPR before hitting the stack. 19535 if ((LocVT == MVT::f16 && 19536 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) || 19537 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || 19538 (LocVT == MVT::f64 && Subtarget.is64Bit() && 19539 Subtarget.hasStdExtZdinx())) { 19540 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { 19541 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 19542 return false; 19543 } 19544 } 19545 19546 if (LocVT == MVT::f16) { 19547 unsigned Offset2 = State.AllocateStack(2, Align(2)); 19548 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo)); 19549 return false; 19550 } 19551 19552 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 19553 unsigned Offset4 = State.AllocateStack(4, Align(4)); 19554 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 19555 return false; 19556 } 19557 19558 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 19559 unsigned Offset5 = State.AllocateStack(8, Align(8)); 19560 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 19561 return false; 19562 } 19563 19564 if (LocVT.isVector()) { 19565 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg(); 19566 if (AllocatedVReg) { 19567 // Fixed-length vectors are located in the corresponding scalable-vector 19568 // container types. 19569 if (ValVT.isFixedLengthVector()) 19570 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 19571 State.addLoc( 19572 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo)); 19573 } else { 19574 // Try and pass the address via a "fast" GPR. 19575 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) { 19576 LocInfo = CCValAssign::Indirect; 19577 LocVT = TLI.getSubtarget().getXLenVT(); 19578 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo)); 19579 } else if (ValVT.isFixedLengthVector()) { 19580 auto StackAlign = 19581 MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); 19582 unsigned StackOffset = 19583 State.AllocateStack(ValVT.getStoreSize(), StackAlign); 19584 State.addLoc( 19585 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 19586 } else { 19587 // Can't pass scalable vectors on the stack. 19588 return true; 19589 } 19590 } 19591 19592 return false; 19593 } 19594 19595 return true; // CC didn't match. 19596 } 19597 19598 bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 19599 CCValAssign::LocInfo LocInfo, 19600 ISD::ArgFlagsTy ArgFlags, CCState &State) { 19601 if (ArgFlags.isNest()) { 19602 report_fatal_error( 19603 "Attribute 'nest' is not supported in GHC calling convention"); 19604 } 19605 19606 static const MCPhysReg GPRList[] = { 19607 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 19608 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 19609 19610 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 19611 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 19612 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 19613 if (unsigned Reg = State.AllocateReg(GPRList)) { 19614 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 19615 return false; 19616 } 19617 } 19618 19619 const RISCVSubtarget &Subtarget = 19620 State.getMachineFunction().getSubtarget<RISCVSubtarget>(); 19621 19622 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { 19623 // Pass in STG registers: F1, ..., F6 19624 // fs0 ... fs5 19625 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 19626 RISCV::F18_F, RISCV::F19_F, 19627 RISCV::F20_F, RISCV::F21_F}; 19628 if (unsigned Reg = State.AllocateReg(FPR32List)) { 19629 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 19630 return false; 19631 } 19632 } 19633 19634 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { 19635 // Pass in STG registers: D1, ..., D6 19636 // fs6 ... fs11 19637 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 19638 RISCV::F24_D, RISCV::F25_D, 19639 RISCV::F26_D, RISCV::F27_D}; 19640 if (unsigned Reg = State.AllocateReg(FPR64List)) { 19641 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 19642 return false; 19643 } 19644 } 19645 19646 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || 19647 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && 19648 Subtarget.is64Bit())) { 19649 if (unsigned Reg = State.AllocateReg(GPRList)) { 19650 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 19651 return false; 19652 } 19653 } 19654 19655 report_fatal_error("No registers left in GHC calling convention"); 19656 return true; 19657 } 19658 19659 // Transform physical registers into virtual registers. 19660 SDValue RISCVTargetLowering::LowerFormalArguments( 19661 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 19662 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 19663 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 19664 19665 MachineFunction &MF = DAG.getMachineFunction(); 19666 19667 switch (CallConv) { 19668 default: 19669 report_fatal_error("Unsupported calling convention"); 19670 case CallingConv::C: 19671 case CallingConv::Fast: 19672 case CallingConv::SPIR_KERNEL: 19673 case CallingConv::GRAAL: 19674 case CallingConv::RISCV_VectorCall: 19675 break; 19676 case CallingConv::GHC: 19677 if (Subtarget.hasStdExtE()) 19678 report_fatal_error("GHC calling convention is not supported on RVE!"); 19679 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx()) 19680 report_fatal_error("GHC calling convention requires the (Zfinx/F) and " 19681 "(Zdinx/D) instruction set extensions"); 19682 } 19683 19684 const Function &Func = MF.getFunction(); 19685 if (Func.hasFnAttribute("interrupt")) { 19686 if (!Func.arg_empty()) 19687 report_fatal_error( 19688 "Functions with the interrupt attribute cannot have arguments!"); 19689 19690 StringRef Kind = 19691 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 19692 19693 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 19694 report_fatal_error( 19695 "Function interrupt attribute argument not supported!"); 19696 } 19697 19698 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 19699 MVT XLenVT = Subtarget.getXLenVT(); 19700 unsigned XLenInBytes = Subtarget.getXLen() / 8; 19701 // Used with vargs to acumulate store chains. 19702 std::vector<SDValue> OutChains; 19703 19704 // Assign locations to all of the incoming arguments. 19705 SmallVector<CCValAssign, 16> ArgLocs; 19706 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 19707 19708 if (CallConv == CallingConv::GHC) 19709 CCInfo.AnalyzeFormalArguments(Ins, RISCV::CC_RISCV_GHC); 19710 else 19711 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, 19712 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC 19713 : RISCV::CC_RISCV); 19714 19715 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) { 19716 CCValAssign &VA = ArgLocs[i]; 19717 SDValue ArgValue; 19718 // Passing f64 on RV32D with a soft float ABI must be handled as a special 19719 // case. 19720 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 19721 assert(VA.needsCustom()); 19722 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL); 19723 } else if (VA.isRegLoc()) 19724 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this); 19725 else 19726 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 19727 19728 if (VA.getLocInfo() == CCValAssign::Indirect) { 19729 // If the original argument was split and passed by reference (e.g. i128 19730 // on RV32), we need to load all parts of it here (using the same 19731 // address). Vectors may be partly split to registers and partly to the 19732 // stack, in which case the base address is partly offset and subsequent 19733 // stores are relative to that. 19734 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 19735 MachinePointerInfo())); 19736 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex; 19737 unsigned ArgPartOffset = Ins[InsIdx].PartOffset; 19738 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 19739 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) { 19740 CCValAssign &PartVA = ArgLocs[i + 1]; 19741 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset; 19742 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 19743 if (PartVA.getValVT().isScalableVector()) 19744 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); 19745 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); 19746 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 19747 MachinePointerInfo())); 19748 ++i; 19749 ++InsIdx; 19750 } 19751 continue; 19752 } 19753 InVals.push_back(ArgValue); 19754 } 19755 19756 if (any_of(ArgLocs, 19757 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) 19758 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); 19759 19760 if (IsVarArg) { 19761 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI()); 19762 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 19763 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 19764 MachineFrameInfo &MFI = MF.getFrameInfo(); 19765 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 19766 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 19767 19768 // Size of the vararg save area. For now, the varargs save area is either 19769 // zero or large enough to hold a0-a7. 19770 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 19771 int FI; 19772 19773 // If all registers are allocated, then all varargs must be passed on the 19774 // stack and we don't need to save any argregs. 19775 if (VarArgsSaveSize == 0) { 19776 int VaArgOffset = CCInfo.getStackSize(); 19777 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 19778 } else { 19779 int VaArgOffset = -VarArgsSaveSize; 19780 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true); 19781 19782 // If saving an odd number of registers then create an extra stack slot to 19783 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 19784 // offsets to even-numbered registered remain 2*XLEN-aligned. 19785 if (Idx % 2) { 19786 MFI.CreateFixedObject( 19787 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true); 19788 VarArgsSaveSize += XLenInBytes; 19789 } 19790 19791 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 19792 19793 // Copy the integer registers that may have been used for passing varargs 19794 // to the vararg save area. 19795 for (unsigned I = Idx; I < ArgRegs.size(); ++I) { 19796 const Register Reg = RegInfo.createVirtualRegister(RC); 19797 RegInfo.addLiveIn(ArgRegs[I], Reg); 19798 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 19799 SDValue Store = DAG.getStore( 19800 Chain, DL, ArgValue, FIN, 19801 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes)); 19802 OutChains.push_back(Store); 19803 FIN = 19804 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL); 19805 } 19806 } 19807 19808 // Record the frame index of the first variable argument 19809 // which is a value necessary to VASTART. 19810 RVFI->setVarArgsFrameIndex(FI); 19811 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 19812 } 19813 19814 // All stores are grouped in one node to allow the matching between 19815 // the size of Ins and InVals. This only happens for vararg functions. 19816 if (!OutChains.empty()) { 19817 OutChains.push_back(Chain); 19818 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 19819 } 19820 19821 return Chain; 19822 } 19823 19824 /// isEligibleForTailCallOptimization - Check whether the call is eligible 19825 /// for tail call optimization. 19826 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 19827 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 19828 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 19829 const SmallVector<CCValAssign, 16> &ArgLocs) const { 19830 19831 auto CalleeCC = CLI.CallConv; 19832 auto &Outs = CLI.Outs; 19833 auto &Caller = MF.getFunction(); 19834 auto CallerCC = Caller.getCallingConv(); 19835 19836 // Exception-handling functions need a special set of instructions to 19837 // indicate a return to the hardware. Tail-calling another function would 19838 // probably break this. 19839 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 19840 // should be expanded as new function attributes are introduced. 19841 if (Caller.hasFnAttribute("interrupt")) 19842 return false; 19843 19844 // Do not tail call opt if the stack is used to pass parameters. 19845 if (CCInfo.getStackSize() != 0) 19846 return false; 19847 19848 // Do not tail call opt if any parameters need to be passed indirectly. 19849 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 19850 // passed indirectly. So the address of the value will be passed in a 19851 // register, or if not available, then the address is put on the stack. In 19852 // order to pass indirectly, space on the stack often needs to be allocated 19853 // in order to store the value. In this case the CCInfo.getNextStackOffset() 19854 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 19855 // are passed CCValAssign::Indirect. 19856 for (auto &VA : ArgLocs) 19857 if (VA.getLocInfo() == CCValAssign::Indirect) 19858 return false; 19859 19860 // Do not tail call opt if either caller or callee uses struct return 19861 // semantics. 19862 auto IsCallerStructRet = Caller.hasStructRetAttr(); 19863 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 19864 if (IsCallerStructRet || IsCalleeStructRet) 19865 return false; 19866 19867 // The callee has to preserve all registers the caller needs to preserve. 19868 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 19869 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 19870 if (CalleeCC != CallerCC) { 19871 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 19872 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 19873 return false; 19874 } 19875 19876 // Byval parameters hand the function a pointer directly into the stack area 19877 // we want to reuse during a tail call. Working around this *is* possible 19878 // but less efficient and uglier in LowerCall. 19879 for (auto &Arg : Outs) 19880 if (Arg.Flags.isByVal()) 19881 return false; 19882 19883 return true; 19884 } 19885 19886 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { 19887 return DAG.getDataLayout().getPrefTypeAlign( 19888 VT.getTypeForEVT(*DAG.getContext())); 19889 } 19890 19891 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 19892 // and output parameter nodes. 19893 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 19894 SmallVectorImpl<SDValue> &InVals) const { 19895 SelectionDAG &DAG = CLI.DAG; 19896 SDLoc &DL = CLI.DL; 19897 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 19898 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 19899 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 19900 SDValue Chain = CLI.Chain; 19901 SDValue Callee = CLI.Callee; 19902 bool &IsTailCall = CLI.IsTailCall; 19903 CallingConv::ID CallConv = CLI.CallConv; 19904 bool IsVarArg = CLI.IsVarArg; 19905 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 19906 MVT XLenVT = Subtarget.getXLenVT(); 19907 19908 MachineFunction &MF = DAG.getMachineFunction(); 19909 19910 // Analyze the operands of the call, assigning locations to each operand. 19911 SmallVector<CCValAssign, 16> ArgLocs; 19912 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 19913 19914 if (CallConv == CallingConv::GHC) { 19915 if (Subtarget.hasStdExtE()) 19916 report_fatal_error("GHC calling convention is not supported on RVE!"); 19917 ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC); 19918 } else 19919 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, 19920 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC 19921 : RISCV::CC_RISCV); 19922 19923 // Check if it's really possible to do a tail call. 19924 if (IsTailCall) 19925 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 19926 19927 if (IsTailCall) 19928 ++NumTailCalls; 19929 else if (CLI.CB && CLI.CB->isMustTailCall()) 19930 report_fatal_error("failed to perform tail call elimination on a call " 19931 "site marked musttail"); 19932 19933 // Get a count of how many bytes are to be pushed on the stack. 19934 unsigned NumBytes = ArgCCInfo.getStackSize(); 19935 19936 // Create local copies for byval args 19937 SmallVector<SDValue, 8> ByValArgs; 19938 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 19939 ISD::ArgFlagsTy Flags = Outs[i].Flags; 19940 if (!Flags.isByVal()) 19941 continue; 19942 19943 SDValue Arg = OutVals[i]; 19944 unsigned Size = Flags.getByValSize(); 19945 Align Alignment = Flags.getNonZeroByValAlign(); 19946 19947 int FI = 19948 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 19949 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 19950 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 19951 19952 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 19953 /*IsVolatile=*/false, 19954 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall, 19955 MachinePointerInfo(), MachinePointerInfo()); 19956 ByValArgs.push_back(FIPtr); 19957 } 19958 19959 if (!IsTailCall) 19960 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 19961 19962 // Copy argument values to their designated locations. 19963 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 19964 SmallVector<SDValue, 8> MemOpChains; 19965 SDValue StackPtr; 19966 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e; 19967 ++i, ++OutIdx) { 19968 CCValAssign &VA = ArgLocs[i]; 19969 SDValue ArgValue = OutVals[OutIdx]; 19970 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags; 19971 19972 // Handle passing f64 on RV32D with a soft float ABI as a special case. 19973 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 19974 assert(VA.isRegLoc() && "Expected register VA assignment"); 19975 assert(VA.needsCustom()); 19976 SDValue SplitF64 = DAG.getNode( 19977 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 19978 SDValue Lo = SplitF64.getValue(0); 19979 SDValue Hi = SplitF64.getValue(1); 19980 19981 Register RegLo = VA.getLocReg(); 19982 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 19983 19984 // Get the CCValAssign for the Hi part. 19985 CCValAssign &HiVA = ArgLocs[++i]; 19986 19987 if (HiVA.isMemLoc()) { 19988 // Second half of f64 is passed on the stack. 19989 if (!StackPtr.getNode()) 19990 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 19991 SDValue Address = 19992 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 19993 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL)); 19994 // Emit the store. 19995 MemOpChains.push_back( 19996 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo())); 19997 } else { 19998 // Second half of f64 is passed in another GPR. 19999 Register RegHigh = HiVA.getLocReg(); 20000 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 20001 } 20002 continue; 20003 } 20004 20005 // Promote the value if needed. 20006 // For now, only handle fully promoted and indirect arguments. 20007 if (VA.getLocInfo() == CCValAssign::Indirect) { 20008 // Store the argument in a stack slot and pass its address. 20009 Align StackAlign = 20010 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG), 20011 getPrefTypeAlign(ArgValue.getValueType(), DAG)); 20012 TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); 20013 // If the original argument was split (e.g. i128), we need 20014 // to store the required parts of it here (and pass just one address). 20015 // Vectors may be partly split to registers and partly to the stack, in 20016 // which case the base address is partly offset and subsequent stores are 20017 // relative to that. 20018 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex; 20019 unsigned ArgPartOffset = Outs[OutIdx].PartOffset; 20020 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 20021 // Calculate the total size to store. We don't have access to what we're 20022 // actually storing other than performing the loop and collecting the 20023 // info. 20024 SmallVector<std::pair<SDValue, SDValue>> Parts; 20025 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) { 20026 SDValue PartValue = OutVals[OutIdx + 1]; 20027 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset; 20028 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 20029 EVT PartVT = PartValue.getValueType(); 20030 if (PartVT.isScalableVector()) 20031 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); 20032 StoredSize += PartVT.getStoreSize(); 20033 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); 20034 Parts.push_back(std::make_pair(PartValue, Offset)); 20035 ++i; 20036 ++OutIdx; 20037 } 20038 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); 20039 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 20040 MemOpChains.push_back( 20041 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 20042 MachinePointerInfo::getFixedStack(MF, FI))); 20043 for (const auto &Part : Parts) { 20044 SDValue PartValue = Part.first; 20045 SDValue PartOffset = Part.second; 20046 SDValue Address = 20047 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); 20048 MemOpChains.push_back( 20049 DAG.getStore(Chain, DL, PartValue, Address, 20050 MachinePointerInfo::getFixedStack(MF, FI))); 20051 } 20052 ArgValue = SpillSlot; 20053 } else { 20054 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget); 20055 } 20056 20057 // Use local copy if it is a byval arg. 20058 if (Flags.isByVal()) 20059 ArgValue = ByValArgs[j++]; 20060 20061 if (VA.isRegLoc()) { 20062 // Queue up the argument copies and emit them at the end. 20063 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 20064 } else { 20065 assert(VA.isMemLoc() && "Argument not register or memory"); 20066 assert(!IsTailCall && "Tail call not allowed if stack is used " 20067 "for passing parameters"); 20068 20069 // Work out the address of the stack slot. 20070 if (!StackPtr.getNode()) 20071 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 20072 SDValue Address = 20073 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 20074 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 20075 20076 // Emit the store. 20077 MemOpChains.push_back( 20078 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 20079 } 20080 } 20081 20082 // Join the stores, which are independent of one another. 20083 if (!MemOpChains.empty()) 20084 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 20085 20086 SDValue Glue; 20087 20088 // Build a sequence of copy-to-reg nodes, chained and glued together. 20089 for (auto &Reg : RegsToPass) { 20090 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 20091 Glue = Chain.getValue(1); 20092 } 20093 20094 // Validate that none of the argument registers have been marked as 20095 // reserved, if so report an error. Do the same for the return address if this 20096 // is not a tailcall. 20097 validateCCReservedRegs(RegsToPass, MF); 20098 if (!IsTailCall && 20099 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 20100 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 20101 MF.getFunction(), 20102 "Return address register required, but has been reserved."}); 20103 20104 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 20105 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 20106 // split it and then direct call can be matched by PseudoCALL. 20107 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 20108 const GlobalValue *GV = S->getGlobal(); 20109 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL); 20110 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 20111 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL); 20112 } 20113 20114 // The first call operand is the chain and the second is the target address. 20115 SmallVector<SDValue, 8> Ops; 20116 Ops.push_back(Chain); 20117 Ops.push_back(Callee); 20118 20119 // Add argument registers to the end of the list so that they are 20120 // known live into the call. 20121 for (auto &Reg : RegsToPass) 20122 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 20123 20124 if (!IsTailCall) { 20125 // Add a register mask operand representing the call-preserved registers. 20126 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 20127 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 20128 assert(Mask && "Missing call preserved mask for calling convention"); 20129 Ops.push_back(DAG.getRegisterMask(Mask)); 20130 } 20131 20132 // Glue the call to the argument copies, if any. 20133 if (Glue.getNode()) 20134 Ops.push_back(Glue); 20135 20136 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) && 20137 "Unexpected CFI type for a direct call"); 20138 20139 // Emit the call. 20140 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 20141 20142 if (IsTailCall) { 20143 MF.getFrameInfo().setHasTailCall(); 20144 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 20145 if (CLI.CFIType) 20146 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 20147 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 20148 return Ret; 20149 } 20150 20151 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 20152 if (CLI.CFIType) 20153 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 20154 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 20155 Glue = Chain.getValue(1); 20156 20157 // Mark the end of the call, which is glued to the call itself. 20158 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 20159 Glue = Chain.getValue(1); 20160 20161 // Assign locations to each value returned by this call. 20162 SmallVector<CCValAssign, 16> RVLocs; 20163 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 20164 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV); 20165 20166 // Copy all of the result registers out of their specified physreg. 20167 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 20168 auto &VA = RVLocs[i]; 20169 // Copy the value out 20170 SDValue RetValue = 20171 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 20172 // Glue the RetValue to the end of the call sequence 20173 Chain = RetValue.getValue(1); 20174 Glue = RetValue.getValue(2); 20175 20176 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 20177 assert(VA.needsCustom()); 20178 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(), 20179 MVT::i32, Glue); 20180 Chain = RetValue2.getValue(1); 20181 Glue = RetValue2.getValue(2); 20182 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 20183 RetValue2); 20184 } 20185 20186 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget); 20187 20188 InVals.push_back(RetValue); 20189 } 20190 20191 return Chain; 20192 } 20193 20194 bool RISCVTargetLowering::CanLowerReturn( 20195 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 20196 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 20197 SmallVector<CCValAssign, 16> RVLocs; 20198 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 20199 20200 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)}; 20201 20202 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 20203 MVT VT = Outs[i].VT; 20204 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 20205 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 20206 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 20207 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, 20208 nullptr, *this, Dispatcher)) 20209 return false; 20210 } 20211 return true; 20212 } 20213 20214 SDValue 20215 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 20216 bool IsVarArg, 20217 const SmallVectorImpl<ISD::OutputArg> &Outs, 20218 const SmallVectorImpl<SDValue> &OutVals, 20219 const SDLoc &DL, SelectionDAG &DAG) const { 20220 MachineFunction &MF = DAG.getMachineFunction(); 20221 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 20222 20223 // Stores the assignment of the return value to a location. 20224 SmallVector<CCValAssign, 16> RVLocs; 20225 20226 // Info about the registers and stack slot. 20227 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 20228 *DAG.getContext()); 20229 20230 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 20231 nullptr, RISCV::CC_RISCV); 20232 20233 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 20234 report_fatal_error("GHC functions return void only"); 20235 20236 SDValue Glue; 20237 SmallVector<SDValue, 4> RetOps(1, Chain); 20238 20239 // Copy the result values into the output registers. 20240 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) { 20241 SDValue Val = OutVals[OutIdx]; 20242 CCValAssign &VA = RVLocs[i]; 20243 assert(VA.isRegLoc() && "Can only return in registers!"); 20244 20245 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 20246 // Handle returning f64 on RV32D with a soft float ABI. 20247 assert(VA.isRegLoc() && "Expected return via registers"); 20248 assert(VA.needsCustom()); 20249 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 20250 DAG.getVTList(MVT::i32, MVT::i32), Val); 20251 SDValue Lo = SplitF64.getValue(0); 20252 SDValue Hi = SplitF64.getValue(1); 20253 Register RegLo = VA.getLocReg(); 20254 Register RegHi = RVLocs[++i].getLocReg(); 20255 20256 if (STI.isRegisterReservedByUser(RegLo) || 20257 STI.isRegisterReservedByUser(RegHi)) 20258 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 20259 MF.getFunction(), 20260 "Return value register required, but has been reserved."}); 20261 20262 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 20263 Glue = Chain.getValue(1); 20264 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 20265 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 20266 Glue = Chain.getValue(1); 20267 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 20268 } else { 20269 // Handle a 'normal' return. 20270 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget); 20271 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 20272 20273 if (STI.isRegisterReservedByUser(VA.getLocReg())) 20274 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 20275 MF.getFunction(), 20276 "Return value register required, but has been reserved."}); 20277 20278 // Guarantee that all emitted copies are stuck together. 20279 Glue = Chain.getValue(1); 20280 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 20281 } 20282 } 20283 20284 RetOps[0] = Chain; // Update chain. 20285 20286 // Add the glue node if we have it. 20287 if (Glue.getNode()) { 20288 RetOps.push_back(Glue); 20289 } 20290 20291 if (any_of(RVLocs, 20292 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) 20293 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); 20294 20295 unsigned RetOpc = RISCVISD::RET_GLUE; 20296 // Interrupt service routines use different return instructions. 20297 const Function &Func = DAG.getMachineFunction().getFunction(); 20298 if (Func.hasFnAttribute("interrupt")) { 20299 if (!Func.getReturnType()->isVoidTy()) 20300 report_fatal_error( 20301 "Functions with the interrupt attribute must have void return type!"); 20302 20303 MachineFunction &MF = DAG.getMachineFunction(); 20304 StringRef Kind = 20305 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 20306 20307 if (Kind == "supervisor") 20308 RetOpc = RISCVISD::SRET_GLUE; 20309 else 20310 RetOpc = RISCVISD::MRET_GLUE; 20311 } 20312 20313 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 20314 } 20315 20316 void RISCVTargetLowering::validateCCReservedRegs( 20317 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 20318 MachineFunction &MF) const { 20319 const Function &F = MF.getFunction(); 20320 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 20321 20322 if (llvm::any_of(Regs, [&STI](auto Reg) { 20323 return STI.isRegisterReservedByUser(Reg.first); 20324 })) 20325 F.getContext().diagnose(DiagnosticInfoUnsupported{ 20326 F, "Argument register required, but has been reserved."}); 20327 } 20328 20329 // Check if the result of the node is only used as a return value, as 20330 // otherwise we can't perform a tail-call. 20331 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 20332 if (N->getNumValues() != 1) 20333 return false; 20334 if (!N->hasNUsesOfValue(1, 0)) 20335 return false; 20336 20337 SDNode *Copy = *N->use_begin(); 20338 20339 if (Copy->getOpcode() == ISD::BITCAST) { 20340 return isUsedByReturnOnly(Copy, Chain); 20341 } 20342 20343 // TODO: Handle additional opcodes in order to support tail-calling libcalls 20344 // with soft float ABIs. 20345 if (Copy->getOpcode() != ISD::CopyToReg) { 20346 return false; 20347 } 20348 20349 // If the ISD::CopyToReg has a glue operand, we conservatively assume it 20350 // isn't safe to perform a tail call. 20351 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue) 20352 return false; 20353 20354 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else. 20355 bool HasRet = false; 20356 for (SDNode *Node : Copy->uses()) { 20357 if (Node->getOpcode() != RISCVISD::RET_GLUE) 20358 return false; 20359 HasRet = true; 20360 } 20361 if (!HasRet) 20362 return false; 20363 20364 Chain = Copy->getOperand(0); 20365 return true; 20366 } 20367 20368 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 20369 return CI->isTailCall(); 20370 } 20371 20372 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 20373 #define NODE_NAME_CASE(NODE) \ 20374 case RISCVISD::NODE: \ 20375 return "RISCVISD::" #NODE; 20376 // clang-format off 20377 switch ((RISCVISD::NodeType)Opcode) { 20378 case RISCVISD::FIRST_NUMBER: 20379 break; 20380 NODE_NAME_CASE(RET_GLUE) 20381 NODE_NAME_CASE(SRET_GLUE) 20382 NODE_NAME_CASE(MRET_GLUE) 20383 NODE_NAME_CASE(CALL) 20384 NODE_NAME_CASE(SELECT_CC) 20385 NODE_NAME_CASE(BR_CC) 20386 NODE_NAME_CASE(BuildPairF64) 20387 NODE_NAME_CASE(SplitF64) 20388 NODE_NAME_CASE(TAIL) 20389 NODE_NAME_CASE(ADD_LO) 20390 NODE_NAME_CASE(HI) 20391 NODE_NAME_CASE(LLA) 20392 NODE_NAME_CASE(ADD_TPREL) 20393 NODE_NAME_CASE(MULHSU) 20394 NODE_NAME_CASE(SHL_ADD) 20395 NODE_NAME_CASE(SLLW) 20396 NODE_NAME_CASE(SRAW) 20397 NODE_NAME_CASE(SRLW) 20398 NODE_NAME_CASE(DIVW) 20399 NODE_NAME_CASE(DIVUW) 20400 NODE_NAME_CASE(REMUW) 20401 NODE_NAME_CASE(ROLW) 20402 NODE_NAME_CASE(RORW) 20403 NODE_NAME_CASE(CLZW) 20404 NODE_NAME_CASE(CTZW) 20405 NODE_NAME_CASE(ABSW) 20406 NODE_NAME_CASE(FMV_H_X) 20407 NODE_NAME_CASE(FMV_X_ANYEXTH) 20408 NODE_NAME_CASE(FMV_X_SIGNEXTH) 20409 NODE_NAME_CASE(FMV_W_X_RV64) 20410 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 20411 NODE_NAME_CASE(FCVT_X) 20412 NODE_NAME_CASE(FCVT_XU) 20413 NODE_NAME_CASE(FCVT_W_RV64) 20414 NODE_NAME_CASE(FCVT_WU_RV64) 20415 NODE_NAME_CASE(STRICT_FCVT_W_RV64) 20416 NODE_NAME_CASE(STRICT_FCVT_WU_RV64) 20417 NODE_NAME_CASE(FP_ROUND_BF16) 20418 NODE_NAME_CASE(FP_EXTEND_BF16) 20419 NODE_NAME_CASE(FROUND) 20420 NODE_NAME_CASE(FCLASS) 20421 NODE_NAME_CASE(FMAX) 20422 NODE_NAME_CASE(FMIN) 20423 NODE_NAME_CASE(READ_COUNTER_WIDE) 20424 NODE_NAME_CASE(BREV8) 20425 NODE_NAME_CASE(ORC_B) 20426 NODE_NAME_CASE(ZIP) 20427 NODE_NAME_CASE(UNZIP) 20428 NODE_NAME_CASE(CLMUL) 20429 NODE_NAME_CASE(CLMULH) 20430 NODE_NAME_CASE(CLMULR) 20431 NODE_NAME_CASE(MOPR) 20432 NODE_NAME_CASE(MOPRR) 20433 NODE_NAME_CASE(SHA256SIG0) 20434 NODE_NAME_CASE(SHA256SIG1) 20435 NODE_NAME_CASE(SHA256SUM0) 20436 NODE_NAME_CASE(SHA256SUM1) 20437 NODE_NAME_CASE(SM4KS) 20438 NODE_NAME_CASE(SM4ED) 20439 NODE_NAME_CASE(SM3P0) 20440 NODE_NAME_CASE(SM3P1) 20441 NODE_NAME_CASE(TH_LWD) 20442 NODE_NAME_CASE(TH_LWUD) 20443 NODE_NAME_CASE(TH_LDD) 20444 NODE_NAME_CASE(TH_SWD) 20445 NODE_NAME_CASE(TH_SDD) 20446 NODE_NAME_CASE(VMV_V_V_VL) 20447 NODE_NAME_CASE(VMV_V_X_VL) 20448 NODE_NAME_CASE(VFMV_V_F_VL) 20449 NODE_NAME_CASE(VMV_X_S) 20450 NODE_NAME_CASE(VMV_S_X_VL) 20451 NODE_NAME_CASE(VFMV_S_F_VL) 20452 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL) 20453 NODE_NAME_CASE(READ_VLENB) 20454 NODE_NAME_CASE(TRUNCATE_VECTOR_VL) 20455 NODE_NAME_CASE(VSLIDEUP_VL) 20456 NODE_NAME_CASE(VSLIDE1UP_VL) 20457 NODE_NAME_CASE(VSLIDEDOWN_VL) 20458 NODE_NAME_CASE(VSLIDE1DOWN_VL) 20459 NODE_NAME_CASE(VFSLIDE1UP_VL) 20460 NODE_NAME_CASE(VFSLIDE1DOWN_VL) 20461 NODE_NAME_CASE(VID_VL) 20462 NODE_NAME_CASE(VFNCVT_ROD_VL) 20463 NODE_NAME_CASE(VECREDUCE_ADD_VL) 20464 NODE_NAME_CASE(VECREDUCE_UMAX_VL) 20465 NODE_NAME_CASE(VECREDUCE_SMAX_VL) 20466 NODE_NAME_CASE(VECREDUCE_UMIN_VL) 20467 NODE_NAME_CASE(VECREDUCE_SMIN_VL) 20468 NODE_NAME_CASE(VECREDUCE_AND_VL) 20469 NODE_NAME_CASE(VECREDUCE_OR_VL) 20470 NODE_NAME_CASE(VECREDUCE_XOR_VL) 20471 NODE_NAME_CASE(VECREDUCE_FADD_VL) 20472 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) 20473 NODE_NAME_CASE(VECREDUCE_FMIN_VL) 20474 NODE_NAME_CASE(VECREDUCE_FMAX_VL) 20475 NODE_NAME_CASE(ADD_VL) 20476 NODE_NAME_CASE(AND_VL) 20477 NODE_NAME_CASE(MUL_VL) 20478 NODE_NAME_CASE(OR_VL) 20479 NODE_NAME_CASE(SDIV_VL) 20480 NODE_NAME_CASE(SHL_VL) 20481 NODE_NAME_CASE(SREM_VL) 20482 NODE_NAME_CASE(SRA_VL) 20483 NODE_NAME_CASE(SRL_VL) 20484 NODE_NAME_CASE(ROTL_VL) 20485 NODE_NAME_CASE(ROTR_VL) 20486 NODE_NAME_CASE(SUB_VL) 20487 NODE_NAME_CASE(UDIV_VL) 20488 NODE_NAME_CASE(UREM_VL) 20489 NODE_NAME_CASE(XOR_VL) 20490 NODE_NAME_CASE(AVGFLOORS_VL) 20491 NODE_NAME_CASE(AVGFLOORU_VL) 20492 NODE_NAME_CASE(AVGCEILS_VL) 20493 NODE_NAME_CASE(AVGCEILU_VL) 20494 NODE_NAME_CASE(SADDSAT_VL) 20495 NODE_NAME_CASE(UADDSAT_VL) 20496 NODE_NAME_CASE(SSUBSAT_VL) 20497 NODE_NAME_CASE(USUBSAT_VL) 20498 NODE_NAME_CASE(VNCLIP_VL) 20499 NODE_NAME_CASE(VNCLIPU_VL) 20500 NODE_NAME_CASE(FADD_VL) 20501 NODE_NAME_CASE(FSUB_VL) 20502 NODE_NAME_CASE(FMUL_VL) 20503 NODE_NAME_CASE(FDIV_VL) 20504 NODE_NAME_CASE(FNEG_VL) 20505 NODE_NAME_CASE(FABS_VL) 20506 NODE_NAME_CASE(FSQRT_VL) 20507 NODE_NAME_CASE(FCLASS_VL) 20508 NODE_NAME_CASE(VFMADD_VL) 20509 NODE_NAME_CASE(VFNMADD_VL) 20510 NODE_NAME_CASE(VFMSUB_VL) 20511 NODE_NAME_CASE(VFNMSUB_VL) 20512 NODE_NAME_CASE(VFWMADD_VL) 20513 NODE_NAME_CASE(VFWNMADD_VL) 20514 NODE_NAME_CASE(VFWMSUB_VL) 20515 NODE_NAME_CASE(VFWNMSUB_VL) 20516 NODE_NAME_CASE(FCOPYSIGN_VL) 20517 NODE_NAME_CASE(SMIN_VL) 20518 NODE_NAME_CASE(SMAX_VL) 20519 NODE_NAME_CASE(UMIN_VL) 20520 NODE_NAME_CASE(UMAX_VL) 20521 NODE_NAME_CASE(BITREVERSE_VL) 20522 NODE_NAME_CASE(BSWAP_VL) 20523 NODE_NAME_CASE(CTLZ_VL) 20524 NODE_NAME_CASE(CTTZ_VL) 20525 NODE_NAME_CASE(CTPOP_VL) 20526 NODE_NAME_CASE(VFMIN_VL) 20527 NODE_NAME_CASE(VFMAX_VL) 20528 NODE_NAME_CASE(MULHS_VL) 20529 NODE_NAME_CASE(MULHU_VL) 20530 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL) 20531 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL) 20532 NODE_NAME_CASE(VFCVT_RM_X_F_VL) 20533 NODE_NAME_CASE(VFCVT_RM_XU_F_VL) 20534 NODE_NAME_CASE(VFCVT_X_F_VL) 20535 NODE_NAME_CASE(VFCVT_XU_F_VL) 20536 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL) 20537 NODE_NAME_CASE(SINT_TO_FP_VL) 20538 NODE_NAME_CASE(UINT_TO_FP_VL) 20539 NODE_NAME_CASE(VFCVT_RM_F_XU_VL) 20540 NODE_NAME_CASE(VFCVT_RM_F_X_VL) 20541 NODE_NAME_CASE(FP_EXTEND_VL) 20542 NODE_NAME_CASE(FP_ROUND_VL) 20543 NODE_NAME_CASE(STRICT_FADD_VL) 20544 NODE_NAME_CASE(STRICT_FSUB_VL) 20545 NODE_NAME_CASE(STRICT_FMUL_VL) 20546 NODE_NAME_CASE(STRICT_FDIV_VL) 20547 NODE_NAME_CASE(STRICT_FSQRT_VL) 20548 NODE_NAME_CASE(STRICT_VFMADD_VL) 20549 NODE_NAME_CASE(STRICT_VFNMADD_VL) 20550 NODE_NAME_CASE(STRICT_VFMSUB_VL) 20551 NODE_NAME_CASE(STRICT_VFNMSUB_VL) 20552 NODE_NAME_CASE(STRICT_FP_ROUND_VL) 20553 NODE_NAME_CASE(STRICT_FP_EXTEND_VL) 20554 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL) 20555 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL) 20556 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL) 20557 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL) 20558 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL) 20559 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL) 20560 NODE_NAME_CASE(STRICT_FSETCC_VL) 20561 NODE_NAME_CASE(STRICT_FSETCCS_VL) 20562 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL) 20563 NODE_NAME_CASE(VWMUL_VL) 20564 NODE_NAME_CASE(VWMULU_VL) 20565 NODE_NAME_CASE(VWMULSU_VL) 20566 NODE_NAME_CASE(VWADD_VL) 20567 NODE_NAME_CASE(VWADDU_VL) 20568 NODE_NAME_CASE(VWSUB_VL) 20569 NODE_NAME_CASE(VWSUBU_VL) 20570 NODE_NAME_CASE(VWADD_W_VL) 20571 NODE_NAME_CASE(VWADDU_W_VL) 20572 NODE_NAME_CASE(VWSUB_W_VL) 20573 NODE_NAME_CASE(VWSUBU_W_VL) 20574 NODE_NAME_CASE(VWSLL_VL) 20575 NODE_NAME_CASE(VFWMUL_VL) 20576 NODE_NAME_CASE(VFWADD_VL) 20577 NODE_NAME_CASE(VFWSUB_VL) 20578 NODE_NAME_CASE(VFWADD_W_VL) 20579 NODE_NAME_CASE(VFWSUB_W_VL) 20580 NODE_NAME_CASE(VWMACC_VL) 20581 NODE_NAME_CASE(VWMACCU_VL) 20582 NODE_NAME_CASE(VWMACCSU_VL) 20583 NODE_NAME_CASE(VNSRL_VL) 20584 NODE_NAME_CASE(SETCC_VL) 20585 NODE_NAME_CASE(VMERGE_VL) 20586 NODE_NAME_CASE(VMAND_VL) 20587 NODE_NAME_CASE(VMOR_VL) 20588 NODE_NAME_CASE(VMXOR_VL) 20589 NODE_NAME_CASE(VMCLR_VL) 20590 NODE_NAME_CASE(VMSET_VL) 20591 NODE_NAME_CASE(VRGATHER_VX_VL) 20592 NODE_NAME_CASE(VRGATHER_VV_VL) 20593 NODE_NAME_CASE(VRGATHEREI16_VV_VL) 20594 NODE_NAME_CASE(VSEXT_VL) 20595 NODE_NAME_CASE(VZEXT_VL) 20596 NODE_NAME_CASE(VCPOP_VL) 20597 NODE_NAME_CASE(VFIRST_VL) 20598 NODE_NAME_CASE(READ_CSR) 20599 NODE_NAME_CASE(WRITE_CSR) 20600 NODE_NAME_CASE(SWAP_CSR) 20601 NODE_NAME_CASE(CZERO_EQZ) 20602 NODE_NAME_CASE(CZERO_NEZ) 20603 NODE_NAME_CASE(SW_GUARDED_BRIND) 20604 NODE_NAME_CASE(SF_VC_XV_SE) 20605 NODE_NAME_CASE(SF_VC_IV_SE) 20606 NODE_NAME_CASE(SF_VC_VV_SE) 20607 NODE_NAME_CASE(SF_VC_FV_SE) 20608 NODE_NAME_CASE(SF_VC_XVV_SE) 20609 NODE_NAME_CASE(SF_VC_IVV_SE) 20610 NODE_NAME_CASE(SF_VC_VVV_SE) 20611 NODE_NAME_CASE(SF_VC_FVV_SE) 20612 NODE_NAME_CASE(SF_VC_XVW_SE) 20613 NODE_NAME_CASE(SF_VC_IVW_SE) 20614 NODE_NAME_CASE(SF_VC_VVW_SE) 20615 NODE_NAME_CASE(SF_VC_FVW_SE) 20616 NODE_NAME_CASE(SF_VC_V_X_SE) 20617 NODE_NAME_CASE(SF_VC_V_I_SE) 20618 NODE_NAME_CASE(SF_VC_V_XV_SE) 20619 NODE_NAME_CASE(SF_VC_V_IV_SE) 20620 NODE_NAME_CASE(SF_VC_V_VV_SE) 20621 NODE_NAME_CASE(SF_VC_V_FV_SE) 20622 NODE_NAME_CASE(SF_VC_V_XVV_SE) 20623 NODE_NAME_CASE(SF_VC_V_IVV_SE) 20624 NODE_NAME_CASE(SF_VC_V_VVV_SE) 20625 NODE_NAME_CASE(SF_VC_V_FVV_SE) 20626 NODE_NAME_CASE(SF_VC_V_XVW_SE) 20627 NODE_NAME_CASE(SF_VC_V_IVW_SE) 20628 NODE_NAME_CASE(SF_VC_V_VVW_SE) 20629 NODE_NAME_CASE(SF_VC_V_FVW_SE) 20630 } 20631 // clang-format on 20632 return nullptr; 20633 #undef NODE_NAME_CASE 20634 } 20635 20636 /// getConstraintType - Given a constraint letter, return the type of 20637 /// constraint it is for this target. 20638 RISCVTargetLowering::ConstraintType 20639 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 20640 if (Constraint.size() == 1) { 20641 switch (Constraint[0]) { 20642 default: 20643 break; 20644 case 'f': 20645 return C_RegisterClass; 20646 case 'I': 20647 case 'J': 20648 case 'K': 20649 return C_Immediate; 20650 case 'A': 20651 return C_Memory; 20652 case 's': 20653 case 'S': // A symbolic address 20654 return C_Other; 20655 } 20656 } else { 20657 if (Constraint == "vr" || Constraint == "vm") 20658 return C_RegisterClass; 20659 } 20660 return TargetLowering::getConstraintType(Constraint); 20661 } 20662 20663 std::pair<unsigned, const TargetRegisterClass *> 20664 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 20665 StringRef Constraint, 20666 MVT VT) const { 20667 // First, see if this is a constraint that directly corresponds to a RISC-V 20668 // register class. 20669 if (Constraint.size() == 1) { 20670 switch (Constraint[0]) { 20671 case 'r': 20672 // TODO: Support fixed vectors up to XLen for P extension? 20673 if (VT.isVector()) 20674 break; 20675 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) 20676 return std::make_pair(0U, &RISCV::GPRF16RegClass); 20677 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx()) 20678 return std::make_pair(0U, &RISCV::GPRF32RegClass); 20679 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit()) 20680 return std::make_pair(0U, &RISCV::GPRPairRegClass); 20681 return std::make_pair(0U, &RISCV::GPRNoX0RegClass); 20682 case 'f': 20683 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) 20684 return std::make_pair(0U, &RISCV::FPR16RegClass); 20685 if (Subtarget.hasStdExtF() && VT == MVT::f32) 20686 return std::make_pair(0U, &RISCV::FPR32RegClass); 20687 if (Subtarget.hasStdExtD() && VT == MVT::f64) 20688 return std::make_pair(0U, &RISCV::FPR64RegClass); 20689 break; 20690 default: 20691 break; 20692 } 20693 } else if (Constraint == "vr") { 20694 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, 20695 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 20696 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) 20697 return std::make_pair(0U, RC); 20698 } 20699 } else if (Constraint == "vm") { 20700 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) 20701 return std::make_pair(0U, &RISCV::VMV0RegClass); 20702 } 20703 20704 // Clang will correctly decode the usage of register name aliases into their 20705 // official names. However, other frontends like `rustc` do not. This allows 20706 // users of these frontends to use the ABI names for registers in LLVM-style 20707 // register constraints. 20708 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 20709 .Case("{zero}", RISCV::X0) 20710 .Case("{ra}", RISCV::X1) 20711 .Case("{sp}", RISCV::X2) 20712 .Case("{gp}", RISCV::X3) 20713 .Case("{tp}", RISCV::X4) 20714 .Case("{t0}", RISCV::X5) 20715 .Case("{t1}", RISCV::X6) 20716 .Case("{t2}", RISCV::X7) 20717 .Cases("{s0}", "{fp}", RISCV::X8) 20718 .Case("{s1}", RISCV::X9) 20719 .Case("{a0}", RISCV::X10) 20720 .Case("{a1}", RISCV::X11) 20721 .Case("{a2}", RISCV::X12) 20722 .Case("{a3}", RISCV::X13) 20723 .Case("{a4}", RISCV::X14) 20724 .Case("{a5}", RISCV::X15) 20725 .Case("{a6}", RISCV::X16) 20726 .Case("{a7}", RISCV::X17) 20727 .Case("{s2}", RISCV::X18) 20728 .Case("{s3}", RISCV::X19) 20729 .Case("{s4}", RISCV::X20) 20730 .Case("{s5}", RISCV::X21) 20731 .Case("{s6}", RISCV::X22) 20732 .Case("{s7}", RISCV::X23) 20733 .Case("{s8}", RISCV::X24) 20734 .Case("{s9}", RISCV::X25) 20735 .Case("{s10}", RISCV::X26) 20736 .Case("{s11}", RISCV::X27) 20737 .Case("{t3}", RISCV::X28) 20738 .Case("{t4}", RISCV::X29) 20739 .Case("{t5}", RISCV::X30) 20740 .Case("{t6}", RISCV::X31) 20741 .Default(RISCV::NoRegister); 20742 if (XRegFromAlias != RISCV::NoRegister) 20743 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 20744 20745 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 20746 // TableGen record rather than the AsmName to choose registers for InlineAsm 20747 // constraints, plus we want to match those names to the widest floating point 20748 // register type available, manually select floating point registers here. 20749 // 20750 // The second case is the ABI name of the register, so that frontends can also 20751 // use the ABI names in register constraint lists. 20752 if (Subtarget.hasStdExtF()) { 20753 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 20754 .Cases("{f0}", "{ft0}", RISCV::F0_F) 20755 .Cases("{f1}", "{ft1}", RISCV::F1_F) 20756 .Cases("{f2}", "{ft2}", RISCV::F2_F) 20757 .Cases("{f3}", "{ft3}", RISCV::F3_F) 20758 .Cases("{f4}", "{ft4}", RISCV::F4_F) 20759 .Cases("{f5}", "{ft5}", RISCV::F5_F) 20760 .Cases("{f6}", "{ft6}", RISCV::F6_F) 20761 .Cases("{f7}", "{ft7}", RISCV::F7_F) 20762 .Cases("{f8}", "{fs0}", RISCV::F8_F) 20763 .Cases("{f9}", "{fs1}", RISCV::F9_F) 20764 .Cases("{f10}", "{fa0}", RISCV::F10_F) 20765 .Cases("{f11}", "{fa1}", RISCV::F11_F) 20766 .Cases("{f12}", "{fa2}", RISCV::F12_F) 20767 .Cases("{f13}", "{fa3}", RISCV::F13_F) 20768 .Cases("{f14}", "{fa4}", RISCV::F14_F) 20769 .Cases("{f15}", "{fa5}", RISCV::F15_F) 20770 .Cases("{f16}", "{fa6}", RISCV::F16_F) 20771 .Cases("{f17}", "{fa7}", RISCV::F17_F) 20772 .Cases("{f18}", "{fs2}", RISCV::F18_F) 20773 .Cases("{f19}", "{fs3}", RISCV::F19_F) 20774 .Cases("{f20}", "{fs4}", RISCV::F20_F) 20775 .Cases("{f21}", "{fs5}", RISCV::F21_F) 20776 .Cases("{f22}", "{fs6}", RISCV::F22_F) 20777 .Cases("{f23}", "{fs7}", RISCV::F23_F) 20778 .Cases("{f24}", "{fs8}", RISCV::F24_F) 20779 .Cases("{f25}", "{fs9}", RISCV::F25_F) 20780 .Cases("{f26}", "{fs10}", RISCV::F26_F) 20781 .Cases("{f27}", "{fs11}", RISCV::F27_F) 20782 .Cases("{f28}", "{ft8}", RISCV::F28_F) 20783 .Cases("{f29}", "{ft9}", RISCV::F29_F) 20784 .Cases("{f30}", "{ft10}", RISCV::F30_F) 20785 .Cases("{f31}", "{ft11}", RISCV::F31_F) 20786 .Default(RISCV::NoRegister); 20787 if (FReg != RISCV::NoRegister) { 20788 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 20789 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) { 20790 unsigned RegNo = FReg - RISCV::F0_F; 20791 unsigned DReg = RISCV::F0_D + RegNo; 20792 return std::make_pair(DReg, &RISCV::FPR64RegClass); 20793 } 20794 if (VT == MVT::f32 || VT == MVT::Other) 20795 return std::make_pair(FReg, &RISCV::FPR32RegClass); 20796 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) { 20797 unsigned RegNo = FReg - RISCV::F0_F; 20798 unsigned HReg = RISCV::F0_H + RegNo; 20799 return std::make_pair(HReg, &RISCV::FPR16RegClass); 20800 } 20801 } 20802 } 20803 20804 if (Subtarget.hasVInstructions()) { 20805 Register VReg = StringSwitch<Register>(Constraint.lower()) 20806 .Case("{v0}", RISCV::V0) 20807 .Case("{v1}", RISCV::V1) 20808 .Case("{v2}", RISCV::V2) 20809 .Case("{v3}", RISCV::V3) 20810 .Case("{v4}", RISCV::V4) 20811 .Case("{v5}", RISCV::V5) 20812 .Case("{v6}", RISCV::V6) 20813 .Case("{v7}", RISCV::V7) 20814 .Case("{v8}", RISCV::V8) 20815 .Case("{v9}", RISCV::V9) 20816 .Case("{v10}", RISCV::V10) 20817 .Case("{v11}", RISCV::V11) 20818 .Case("{v12}", RISCV::V12) 20819 .Case("{v13}", RISCV::V13) 20820 .Case("{v14}", RISCV::V14) 20821 .Case("{v15}", RISCV::V15) 20822 .Case("{v16}", RISCV::V16) 20823 .Case("{v17}", RISCV::V17) 20824 .Case("{v18}", RISCV::V18) 20825 .Case("{v19}", RISCV::V19) 20826 .Case("{v20}", RISCV::V20) 20827 .Case("{v21}", RISCV::V21) 20828 .Case("{v22}", RISCV::V22) 20829 .Case("{v23}", RISCV::V23) 20830 .Case("{v24}", RISCV::V24) 20831 .Case("{v25}", RISCV::V25) 20832 .Case("{v26}", RISCV::V26) 20833 .Case("{v27}", RISCV::V27) 20834 .Case("{v28}", RISCV::V28) 20835 .Case("{v29}", RISCV::V29) 20836 .Case("{v30}", RISCV::V30) 20837 .Case("{v31}", RISCV::V31) 20838 .Default(RISCV::NoRegister); 20839 if (VReg != RISCV::NoRegister) { 20840 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) 20841 return std::make_pair(VReg, &RISCV::VMRegClass); 20842 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy)) 20843 return std::make_pair(VReg, &RISCV::VRRegClass); 20844 for (const auto *RC : 20845 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 20846 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) { 20847 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC); 20848 return std::make_pair(VReg, RC); 20849 } 20850 } 20851 } 20852 } 20853 20854 std::pair<Register, const TargetRegisterClass *> Res = 20855 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 20856 20857 // If we picked one of the Zfinx register classes, remap it to the GPR class. 20858 // FIXME: When Zfinx is supported in CodeGen this will need to take the 20859 // Subtarget into account. 20860 if (Res.second == &RISCV::GPRF16RegClass || 20861 Res.second == &RISCV::GPRF32RegClass || 20862 Res.second == &RISCV::GPRPairRegClass) 20863 return std::make_pair(Res.first, &RISCV::GPRRegClass); 20864 20865 return Res; 20866 } 20867 20868 InlineAsm::ConstraintCode 20869 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 20870 // Currently only support length 1 constraints. 20871 if (ConstraintCode.size() == 1) { 20872 switch (ConstraintCode[0]) { 20873 case 'A': 20874 return InlineAsm::ConstraintCode::A; 20875 default: 20876 break; 20877 } 20878 } 20879 20880 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 20881 } 20882 20883 void RISCVTargetLowering::LowerAsmOperandForConstraint( 20884 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, 20885 SelectionDAG &DAG) const { 20886 // Currently only support length 1 constraints. 20887 if (Constraint.size() == 1) { 20888 switch (Constraint[0]) { 20889 case 'I': 20890 // Validate & create a 12-bit signed immediate operand. 20891 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 20892 uint64_t CVal = C->getSExtValue(); 20893 if (isInt<12>(CVal)) 20894 Ops.push_back( 20895 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 20896 } 20897 return; 20898 case 'J': 20899 // Validate & create an integer zero operand. 20900 if (isNullConstant(Op)) 20901 Ops.push_back( 20902 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 20903 return; 20904 case 'K': 20905 // Validate & create a 5-bit unsigned immediate operand. 20906 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 20907 uint64_t CVal = C->getZExtValue(); 20908 if (isUInt<5>(CVal)) 20909 Ops.push_back( 20910 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 20911 } 20912 return; 20913 case 'S': 20914 TargetLowering::LowerAsmOperandForConstraint(Op, "s", Ops, DAG); 20915 return; 20916 default: 20917 break; 20918 } 20919 } 20920 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 20921 } 20922 20923 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder, 20924 Instruction *Inst, 20925 AtomicOrdering Ord) const { 20926 if (Subtarget.hasStdExtZtso()) { 20927 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 20928 return Builder.CreateFence(Ord); 20929 return nullptr; 20930 } 20931 20932 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 20933 return Builder.CreateFence(Ord); 20934 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 20935 return Builder.CreateFence(AtomicOrdering::Release); 20936 return nullptr; 20937 } 20938 20939 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, 20940 Instruction *Inst, 20941 AtomicOrdering Ord) const { 20942 if (Subtarget.hasStdExtZtso()) { 20943 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 20944 return Builder.CreateFence(Ord); 20945 return nullptr; 20946 } 20947 20948 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 20949 return Builder.CreateFence(AtomicOrdering::Acquire); 20950 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) && 20951 Ord == AtomicOrdering::SequentiallyConsistent) 20952 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); 20953 return nullptr; 20954 } 20955 20956 TargetLowering::AtomicExpansionKind 20957 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 20958 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 20959 // point operations can't be used in an lr/sc sequence without breaking the 20960 // forward-progress guarantee. 20961 if (AI->isFloatingPointOperation() || 20962 AI->getOperation() == AtomicRMWInst::UIncWrap || 20963 AI->getOperation() == AtomicRMWInst::UDecWrap) 20964 return AtomicExpansionKind::CmpXChg; 20965 20966 // Don't expand forced atomics, we want to have __sync libcalls instead. 20967 if (Subtarget.hasForcedAtomics()) 20968 return AtomicExpansionKind::None; 20969 20970 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 20971 if (AI->getOperation() == AtomicRMWInst::Nand) { 20972 if (Subtarget.hasStdExtZacas() && 20973 (Size >= 32 || Subtarget.hasStdExtZabha())) 20974 return AtomicExpansionKind::CmpXChg; 20975 if (Size < 32) 20976 return AtomicExpansionKind::MaskedIntrinsic; 20977 } 20978 20979 if (Size < 32 && !Subtarget.hasStdExtZabha()) 20980 return AtomicExpansionKind::MaskedIntrinsic; 20981 20982 return AtomicExpansionKind::None; 20983 } 20984 20985 static Intrinsic::ID 20986 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 20987 if (XLen == 32) { 20988 switch (BinOp) { 20989 default: 20990 llvm_unreachable("Unexpected AtomicRMW BinOp"); 20991 case AtomicRMWInst::Xchg: 20992 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 20993 case AtomicRMWInst::Add: 20994 return Intrinsic::riscv_masked_atomicrmw_add_i32; 20995 case AtomicRMWInst::Sub: 20996 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 20997 case AtomicRMWInst::Nand: 20998 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 20999 case AtomicRMWInst::Max: 21000 return Intrinsic::riscv_masked_atomicrmw_max_i32; 21001 case AtomicRMWInst::Min: 21002 return Intrinsic::riscv_masked_atomicrmw_min_i32; 21003 case AtomicRMWInst::UMax: 21004 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 21005 case AtomicRMWInst::UMin: 21006 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 21007 } 21008 } 21009 21010 if (XLen == 64) { 21011 switch (BinOp) { 21012 default: 21013 llvm_unreachable("Unexpected AtomicRMW BinOp"); 21014 case AtomicRMWInst::Xchg: 21015 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 21016 case AtomicRMWInst::Add: 21017 return Intrinsic::riscv_masked_atomicrmw_add_i64; 21018 case AtomicRMWInst::Sub: 21019 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 21020 case AtomicRMWInst::Nand: 21021 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 21022 case AtomicRMWInst::Max: 21023 return Intrinsic::riscv_masked_atomicrmw_max_i64; 21024 case AtomicRMWInst::Min: 21025 return Intrinsic::riscv_masked_atomicrmw_min_i64; 21026 case AtomicRMWInst::UMax: 21027 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 21028 case AtomicRMWInst::UMin: 21029 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 21030 } 21031 } 21032 21033 llvm_unreachable("Unexpected XLen\n"); 21034 } 21035 21036 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 21037 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 21038 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 21039 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace 21040 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate 21041 // mask, as this produces better code than the LR/SC loop emitted by 21042 // int_riscv_masked_atomicrmw_xchg. 21043 if (AI->getOperation() == AtomicRMWInst::Xchg && 21044 isa<ConstantInt>(AI->getValOperand())) { 21045 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand()); 21046 if (CVal->isZero()) 21047 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr, 21048 Builder.CreateNot(Mask, "Inv_Mask"), 21049 AI->getAlign(), Ord); 21050 if (CVal->isMinusOne()) 21051 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask, 21052 AI->getAlign(), Ord); 21053 } 21054 21055 unsigned XLen = Subtarget.getXLen(); 21056 Value *Ordering = 21057 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 21058 Type *Tys[] = {AlignedAddr->getType()}; 21059 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 21060 AI->getModule(), 21061 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 21062 21063 if (XLen == 64) { 21064 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 21065 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 21066 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 21067 } 21068 21069 Value *Result; 21070 21071 // Must pass the shift amount needed to sign extend the loaded value prior 21072 // to performing a signed comparison for min/max. ShiftAmt is the number of 21073 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 21074 // is the number of bits to left+right shift the value in order to 21075 // sign-extend. 21076 if (AI->getOperation() == AtomicRMWInst::Min || 21077 AI->getOperation() == AtomicRMWInst::Max) { 21078 const DataLayout &DL = AI->getDataLayout(); 21079 unsigned ValWidth = 21080 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 21081 Value *SextShamt = 21082 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 21083 Result = Builder.CreateCall(LrwOpScwLoop, 21084 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 21085 } else { 21086 Result = 21087 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 21088 } 21089 21090 if (XLen == 64) 21091 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 21092 return Result; 21093 } 21094 21095 TargetLowering::AtomicExpansionKind 21096 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 21097 AtomicCmpXchgInst *CI) const { 21098 // Don't expand forced atomics, we want to have __sync libcalls instead. 21099 if (Subtarget.hasForcedAtomics()) 21100 return AtomicExpansionKind::None; 21101 21102 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 21103 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) && 21104 (Size == 8 || Size == 16)) 21105 return AtomicExpansionKind::MaskedIntrinsic; 21106 return AtomicExpansionKind::None; 21107 } 21108 21109 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 21110 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 21111 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 21112 unsigned XLen = Subtarget.getXLen(); 21113 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 21114 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 21115 if (XLen == 64) { 21116 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 21117 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 21118 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 21119 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 21120 } 21121 Type *Tys[] = {AlignedAddr->getType()}; 21122 Function *MaskedCmpXchg = 21123 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 21124 Value *Result = Builder.CreateCall( 21125 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 21126 if (XLen == 64) 21127 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 21128 return Result; 21129 } 21130 21131 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend, 21132 EVT DataVT) const { 21133 // We have indexed loads for all supported EEW types. Indices are always 21134 // zero extended. 21135 return Extend.getOpcode() == ISD::ZERO_EXTEND && 21136 isTypeLegal(Extend.getValueType()) && 21137 isTypeLegal(Extend.getOperand(0).getValueType()) && 21138 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1; 21139 } 21140 21141 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, 21142 EVT VT) const { 21143 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) 21144 return false; 21145 21146 switch (FPVT.getSimpleVT().SimpleTy) { 21147 case MVT::f16: 21148 return Subtarget.hasStdExtZfhmin(); 21149 case MVT::f32: 21150 return Subtarget.hasStdExtF(); 21151 case MVT::f64: 21152 return Subtarget.hasStdExtD(); 21153 default: 21154 return false; 21155 } 21156 } 21157 21158 unsigned RISCVTargetLowering::getJumpTableEncoding() const { 21159 // If we are using the small code model, we can reduce size of jump table 21160 // entry to 4 bytes. 21161 if (Subtarget.is64Bit() && !isPositionIndependent() && 21162 getTargetMachine().getCodeModel() == CodeModel::Small) { 21163 return MachineJumpTableInfo::EK_Custom32; 21164 } 21165 return TargetLowering::getJumpTableEncoding(); 21166 } 21167 21168 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry( 21169 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, 21170 unsigned uid, MCContext &Ctx) const { 21171 assert(Subtarget.is64Bit() && !isPositionIndependent() && 21172 getTargetMachine().getCodeModel() == CodeModel::Small); 21173 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx); 21174 } 21175 21176 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { 21177 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power 21178 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be 21179 // a power of two as well. 21180 // FIXME: This doesn't work for zve32, but that's already broken 21181 // elsewhere for the same reason. 21182 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported"); 21183 static_assert(RISCV::RVVBitsPerBlock == 64, 21184 "RVVBitsPerBlock changed, audit needed"); 21185 return true; 21186 } 21187 21188 bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, 21189 SDValue &Offset, 21190 ISD::MemIndexedMode &AM, 21191 SelectionDAG &DAG) const { 21192 // Target does not support indexed loads. 21193 if (!Subtarget.hasVendorXTHeadMemIdx()) 21194 return false; 21195 21196 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) 21197 return false; 21198 21199 Base = Op->getOperand(0); 21200 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { 21201 int64_t RHSC = RHS->getSExtValue(); 21202 if (Op->getOpcode() == ISD::SUB) 21203 RHSC = -(uint64_t)RHSC; 21204 21205 // The constants that can be encoded in the THeadMemIdx instructions 21206 // are of the form (sign_extend(imm5) << imm2). 21207 bool isLegalIndexedOffset = false; 21208 for (unsigned i = 0; i < 4; i++) 21209 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) { 21210 isLegalIndexedOffset = true; 21211 break; 21212 } 21213 21214 if (!isLegalIndexedOffset) 21215 return false; 21216 21217 Offset = Op->getOperand(1); 21218 return true; 21219 } 21220 21221 return false; 21222 } 21223 21224 bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 21225 SDValue &Offset, 21226 ISD::MemIndexedMode &AM, 21227 SelectionDAG &DAG) const { 21228 EVT VT; 21229 SDValue Ptr; 21230 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 21231 VT = LD->getMemoryVT(); 21232 Ptr = LD->getBasePtr(); 21233 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 21234 VT = ST->getMemoryVT(); 21235 Ptr = ST->getBasePtr(); 21236 } else 21237 return false; 21238 21239 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG)) 21240 return false; 21241 21242 AM = ISD::PRE_INC; 21243 return true; 21244 } 21245 21246 bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 21247 SDValue &Base, 21248 SDValue &Offset, 21249 ISD::MemIndexedMode &AM, 21250 SelectionDAG &DAG) const { 21251 if (Subtarget.hasVendorXCVmem()) { 21252 if (Op->getOpcode() != ISD::ADD) 21253 return false; 21254 21255 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) 21256 Base = LS->getBasePtr(); 21257 else 21258 return false; 21259 21260 if (Base == Op->getOperand(0)) 21261 Offset = Op->getOperand(1); 21262 else if (Base == Op->getOperand(1)) 21263 Offset = Op->getOperand(0); 21264 else 21265 return false; 21266 21267 AM = ISD::POST_INC; 21268 return true; 21269 } 21270 21271 EVT VT; 21272 SDValue Ptr; 21273 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 21274 VT = LD->getMemoryVT(); 21275 Ptr = LD->getBasePtr(); 21276 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 21277 VT = ST->getMemoryVT(); 21278 Ptr = ST->getBasePtr(); 21279 } else 21280 return false; 21281 21282 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG)) 21283 return false; 21284 // Post-indexing updates the base, so it's not a valid transform 21285 // if that's not the same as the load's pointer. 21286 if (Ptr != Base) 21287 return false; 21288 21289 AM = ISD::POST_INC; 21290 return true; 21291 } 21292 21293 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 21294 EVT VT) const { 21295 EVT SVT = VT.getScalarType(); 21296 21297 if (!SVT.isSimple()) 21298 return false; 21299 21300 switch (SVT.getSimpleVT().SimpleTy) { 21301 case MVT::f16: 21302 return VT.isVector() ? Subtarget.hasVInstructionsF16() 21303 : Subtarget.hasStdExtZfhOrZhinx(); 21304 case MVT::f32: 21305 return Subtarget.hasStdExtFOrZfinx(); 21306 case MVT::f64: 21307 return Subtarget.hasStdExtDOrZdinx(); 21308 default: 21309 break; 21310 } 21311 21312 return false; 21313 } 21314 21315 ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const { 21316 // Zacas will use amocas.w which does not require extension. 21317 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND; 21318 } 21319 21320 Register RISCVTargetLowering::getExceptionPointerRegister( 21321 const Constant *PersonalityFn) const { 21322 return RISCV::X10; 21323 } 21324 21325 Register RISCVTargetLowering::getExceptionSelectorRegister( 21326 const Constant *PersonalityFn) const { 21327 return RISCV::X11; 21328 } 21329 21330 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 21331 // Return false to suppress the unnecessary extensions if the LibCall 21332 // arguments or return value is a float narrower than XLEN on a soft FP ABI. 21333 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() && 21334 Type.getSizeInBits() < Subtarget.getXLen())) 21335 return false; 21336 21337 return true; 21338 } 21339 21340 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 21341 if (Subtarget.is64Bit() && Type == MVT::i32) 21342 return true; 21343 21344 return IsSigned; 21345 } 21346 21347 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 21348 SDValue C) const { 21349 // Check integral scalar types. 21350 const bool HasZmmul = Subtarget.hasStdExtZmmul(); 21351 if (!VT.isScalarInteger()) 21352 return false; 21353 21354 // Omit the optimization if the sub target has the M extension and the data 21355 // size exceeds XLen. 21356 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen()) 21357 return false; 21358 21359 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 21360 // Break the MUL to a SLLI and an ADD/SUB. 21361 const APInt &Imm = ConstNode->getAPIntValue(); 21362 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 21363 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 21364 return true; 21365 21366 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12. 21367 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) && 21368 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || 21369 (Imm - 8).isPowerOf2())) 21370 return true; 21371 21372 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 21373 // a pair of LUI/ADDI. 21374 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 && 21375 ConstNode->hasOneUse()) { 21376 APInt ImmS = Imm.ashr(Imm.countr_zero()); 21377 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 21378 (1 - ImmS).isPowerOf2()) 21379 return true; 21380 } 21381 } 21382 21383 return false; 21384 } 21385 21386 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode, 21387 SDValue ConstNode) const { 21388 // Let the DAGCombiner decide for vectors. 21389 EVT VT = AddNode.getValueType(); 21390 if (VT.isVector()) 21391 return true; 21392 21393 // Let the DAGCombiner decide for larger types. 21394 if (VT.getScalarSizeInBits() > Subtarget.getXLen()) 21395 return true; 21396 21397 // It is worse if c1 is simm12 while c1*c2 is not. 21398 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1)); 21399 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode); 21400 const APInt &C1 = C1Node->getAPIntValue(); 21401 const APInt &C2 = C2Node->getAPIntValue(); 21402 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12)) 21403 return false; 21404 21405 // Default to true and let the DAGCombiner decide. 21406 return true; 21407 } 21408 21409 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 21410 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 21411 unsigned *Fast) const { 21412 if (!VT.isVector()) { 21413 if (Fast) 21414 *Fast = Subtarget.enableUnalignedScalarMem(); 21415 return Subtarget.enableUnalignedScalarMem(); 21416 } 21417 21418 // All vector implementations must support element alignment 21419 EVT ElemVT = VT.getVectorElementType(); 21420 if (Alignment >= ElemVT.getStoreSize()) { 21421 if (Fast) 21422 *Fast = 1; 21423 return true; 21424 } 21425 21426 // Note: We lower an unmasked unaligned vector access to an equally sized 21427 // e8 element type access. Given this, we effectively support all unmasked 21428 // misaligned accesses. TODO: Work through the codegen implications of 21429 // allowing such accesses to be formed, and considered fast. 21430 if (Fast) 21431 *Fast = Subtarget.enableUnalignedVectorMem(); 21432 return Subtarget.enableUnalignedVectorMem(); 21433 } 21434 21435 21436 EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op, 21437 const AttributeList &FuncAttributes) const { 21438 if (!Subtarget.hasVInstructions()) 21439 return MVT::Other; 21440 21441 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) 21442 return MVT::Other; 21443 21444 // We use LMUL1 memory operations here for a non-obvious reason. Our caller 21445 // has an expansion threshold, and we want the number of hardware memory 21446 // operations to correspond roughly to that threshold. LMUL>1 operations 21447 // are typically expanded linearly internally, and thus correspond to more 21448 // than one actual memory operation. Note that store merging and load 21449 // combining will typically form larger LMUL operations from the LMUL1 21450 // operations emitted here, and that's okay because combining isn't 21451 // introducing new memory operations; it's just merging existing ones. 21452 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8; 21453 if (Op.size() < MinVLenInBytes) 21454 // TODO: Figure out short memops. For the moment, do the default thing 21455 // which ends up using scalar sequences. 21456 return MVT::Other; 21457 21458 // Prefer i8 for non-zero memset as it allows us to avoid materializing 21459 // a large scalar constant and instead use vmv.v.x/i to do the 21460 // broadcast. For everything else, prefer ELenVT to minimize VL and thus 21461 // maximize the chance we can encode the size in the vsetvli. 21462 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen()); 21463 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT; 21464 21465 // Do we have sufficient alignment for our preferred VT? If not, revert 21466 // to largest size allowed by our alignment criteria. 21467 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) { 21468 Align RequiredAlign(PreferredVT.getStoreSize()); 21469 if (Op.isFixedDstAlign()) 21470 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign()); 21471 if (Op.isMemcpy()) 21472 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign()); 21473 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8); 21474 } 21475 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize()); 21476 } 21477 21478 bool RISCVTargetLowering::splitValueIntoRegisterParts( 21479 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 21480 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { 21481 bool IsABIRegCopy = CC.has_value(); 21482 EVT ValueVT = Val.getValueType(); 21483 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && 21484 PartVT == MVT::f32) { 21485 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float 21486 // nan, and cast to f32. 21487 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val); 21488 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val); 21489 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val, 21490 DAG.getConstant(0xFFFF0000, DL, MVT::i32)); 21491 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); 21492 Parts[0] = Val; 21493 return true; 21494 } 21495 21496 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 21497 LLVMContext &Context = *DAG.getContext(); 21498 EVT ValueEltVT = ValueVT.getVectorElementType(); 21499 EVT PartEltVT = PartVT.getVectorElementType(); 21500 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); 21501 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); 21502 if (PartVTBitSize % ValueVTBitSize == 0) { 21503 assert(PartVTBitSize >= ValueVTBitSize); 21504 // If the element types are different, bitcast to the same element type of 21505 // PartVT first. 21506 // Give an example here, we want copy a <vscale x 1 x i8> value to 21507 // <vscale x 4 x i16>. 21508 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert 21509 // subvector, then we can bitcast to <vscale x 4 x i16>. 21510 if (ValueEltVT != PartEltVT) { 21511 if (PartVTBitSize > ValueVTBitSize) { 21512 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); 21513 assert(Count != 0 && "The number of element should not be zero."); 21514 EVT SameEltTypeVT = 21515 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); 21516 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT, 21517 DAG.getUNDEF(SameEltTypeVT), Val, 21518 DAG.getVectorIdxConstant(0, DL)); 21519 } 21520 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 21521 } else { 21522 Val = 21523 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), 21524 Val, DAG.getVectorIdxConstant(0, DL)); 21525 } 21526 Parts[0] = Val; 21527 return true; 21528 } 21529 } 21530 return false; 21531 } 21532 21533 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( 21534 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 21535 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { 21536 bool IsABIRegCopy = CC.has_value(); 21537 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && 21538 PartVT == MVT::f32) { 21539 SDValue Val = Parts[0]; 21540 21541 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16. 21542 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); 21543 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val); 21544 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 21545 return Val; 21546 } 21547 21548 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 21549 LLVMContext &Context = *DAG.getContext(); 21550 SDValue Val = Parts[0]; 21551 EVT ValueEltVT = ValueVT.getVectorElementType(); 21552 EVT PartEltVT = PartVT.getVectorElementType(); 21553 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); 21554 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); 21555 if (PartVTBitSize % ValueVTBitSize == 0) { 21556 assert(PartVTBitSize >= ValueVTBitSize); 21557 EVT SameEltTypeVT = ValueVT; 21558 // If the element types are different, convert it to the same element type 21559 // of PartVT. 21560 // Give an example here, we want copy a <vscale x 1 x i8> value from 21561 // <vscale x 4 x i16>. 21562 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first, 21563 // then we can extract <vscale x 1 x i8>. 21564 if (ValueEltVT != PartEltVT) { 21565 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); 21566 assert(Count != 0 && "The number of element should not be zero."); 21567 SameEltTypeVT = 21568 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); 21569 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); 21570 } 21571 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, 21572 DAG.getVectorIdxConstant(0, DL)); 21573 return Val; 21574 } 21575 } 21576 return SDValue(); 21577 } 21578 21579 bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { 21580 // When aggressively optimizing for code size, we prefer to use a div 21581 // instruction, as it is usually smaller than the alternative sequence. 21582 // TODO: Add vector division? 21583 bool OptSize = Attr.hasFnAttr(Attribute::MinSize); 21584 return OptSize && !VT.isVector(); 21585 } 21586 21587 bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const { 21588 // Scalarize zero_ext and sign_ext might stop match to widening instruction in 21589 // some situation. 21590 unsigned Opc = N->getOpcode(); 21591 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND) 21592 return false; 21593 return true; 21594 } 21595 21596 static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) { 21597 Module *M = IRB.GetInsertBlock()->getParent()->getParent(); 21598 Function *ThreadPointerFunc = 21599 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); 21600 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(), 21601 IRB.CreateCall(ThreadPointerFunc), Offset); 21602 } 21603 21604 Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { 21605 // Fuchsia provides a fixed TLS slot for the stack cookie. 21606 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. 21607 if (Subtarget.isTargetFuchsia()) 21608 return useTpOffset(IRB, -0x10); 21609 21610 // Android provides a fixed TLS slot for the stack cookie. See the definition 21611 // of TLS_SLOT_STACK_GUARD in 21612 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h 21613 if (Subtarget.isTargetAndroid()) 21614 return useTpOffset(IRB, -0x18); 21615 21616 return TargetLowering::getIRStackGuard(IRB); 21617 } 21618 21619 bool RISCVTargetLowering::isLegalInterleavedAccessType( 21620 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, 21621 const DataLayout &DL) const { 21622 EVT VT = getValueType(DL, VTy); 21623 // Don't lower vlseg/vsseg for vector types that can't be split. 21624 if (!isTypeLegal(VT)) 21625 return false; 21626 21627 if (!isLegalElementTypeForRVV(VT.getScalarType()) || 21628 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace, 21629 Alignment)) 21630 return false; 21631 21632 MVT ContainerVT = VT.getSimpleVT(); 21633 21634 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { 21635 if (!Subtarget.useRVVForFixedLengthVectors()) 21636 return false; 21637 // Sometimes the interleaved access pass picks up splats as interleaves of 21638 // one element. Don't lower these. 21639 if (FVTy->getNumElements() < 2) 21640 return false; 21641 21642 ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT()); 21643 } else { 21644 // The intrinsics for scalable vectors are not overloaded on pointer type 21645 // and can only handle the default address space. 21646 if (AddrSpace) 21647 return false; 21648 } 21649 21650 // Need to make sure that EMUL * NFIELDS ≤ 8 21651 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT)); 21652 if (Fractional) 21653 return true; 21654 return Factor * LMUL <= 8; 21655 } 21656 21657 bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, 21658 Align Alignment) const { 21659 if (!Subtarget.hasVInstructions()) 21660 return false; 21661 21662 // Only support fixed vectors if we know the minimum vector size. 21663 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors()) 21664 return false; 21665 21666 EVT ScalarType = DataType.getScalarType(); 21667 if (!isLegalElementTypeForRVV(ScalarType)) 21668 return false; 21669 21670 if (!Subtarget.enableUnalignedVectorMem() && 21671 Alignment < ScalarType.getStoreSize()) 21672 return false; 21673 21674 return true; 21675 } 21676 21677 static const Intrinsic::ID FixedVlsegIntrIds[] = { 21678 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load, 21679 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load, 21680 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load, 21681 Intrinsic::riscv_seg8_load}; 21682 21683 /// Lower an interleaved load into a vlsegN intrinsic. 21684 /// 21685 /// E.g. Lower an interleaved load (Factor = 2): 21686 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr 21687 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements 21688 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements 21689 /// 21690 /// Into: 21691 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64( 21692 /// %ptr, i64 4) 21693 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 21694 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 21695 bool RISCVTargetLowering::lowerInterleavedLoad( 21696 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, 21697 ArrayRef<unsigned> Indices, unsigned Factor) const { 21698 IRBuilder<> Builder(LI); 21699 21700 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType()); 21701 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(), 21702 LI->getPointerAddressSpace(), 21703 LI->getDataLayout())) 21704 return false; 21705 21706 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); 21707 21708 Function *VlsegNFunc = 21709 Intrinsic::getDeclaration(LI->getModule(), FixedVlsegIntrIds[Factor - 2], 21710 {VTy, LI->getPointerOperandType(), XLenTy}); 21711 21712 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); 21713 21714 CallInst *VlsegN = 21715 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL}); 21716 21717 for (unsigned i = 0; i < Shuffles.size(); i++) { 21718 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]); 21719 Shuffles[i]->replaceAllUsesWith(SubVec); 21720 } 21721 21722 return true; 21723 } 21724 21725 static const Intrinsic::ID FixedVssegIntrIds[] = { 21726 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store, 21727 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store, 21728 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store, 21729 Intrinsic::riscv_seg8_store}; 21730 21731 /// Lower an interleaved store into a vssegN intrinsic. 21732 /// 21733 /// E.g. Lower an interleaved store (Factor = 3): 21734 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, 21735 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> 21736 /// store <12 x i32> %i.vec, <12 x i32>* %ptr 21737 /// 21738 /// Into: 21739 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> 21740 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> 21741 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> 21742 /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2, 21743 /// %ptr, i32 4) 21744 /// 21745 /// Note that the new shufflevectors will be removed and we'll only generate one 21746 /// vsseg3 instruction in CodeGen. 21747 bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, 21748 ShuffleVectorInst *SVI, 21749 unsigned Factor) const { 21750 IRBuilder<> Builder(SI); 21751 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType()); 21752 // Given SVI : <n*factor x ty>, then VTy : <n x ty> 21753 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(), 21754 ShuffleVTy->getNumElements() / Factor); 21755 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(), 21756 SI->getPointerAddressSpace(), 21757 SI->getDataLayout())) 21758 return false; 21759 21760 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); 21761 21762 Function *VssegNFunc = 21763 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2], 21764 {VTy, SI->getPointerOperandType(), XLenTy}); 21765 21766 auto Mask = SVI->getShuffleMask(); 21767 SmallVector<Value *, 10> Ops; 21768 21769 for (unsigned i = 0; i < Factor; i++) { 21770 Value *Shuffle = Builder.CreateShuffleVector( 21771 SVI->getOperand(0), SVI->getOperand(1), 21772 createSequentialMask(Mask[i], VTy->getNumElements(), 0)); 21773 Ops.push_back(Shuffle); 21774 } 21775 // This VL should be OK (should be executable in one vsseg instruction, 21776 // potentially under larger LMULs) because we checked that the fixed vector 21777 // type fits in isLegalInterleavedAccessType 21778 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); 21779 Ops.append({SI->getPointerOperand(), VL}); 21780 21781 Builder.CreateCall(VssegNFunc, Ops); 21782 21783 return true; 21784 } 21785 21786 bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, 21787 LoadInst *LI) const { 21788 assert(LI->isSimple()); 21789 IRBuilder<> Builder(LI); 21790 21791 // Only deinterleave2 supported at present. 21792 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2) 21793 return false; 21794 21795 unsigned Factor = 2; 21796 21797 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType()); 21798 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0)); 21799 21800 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(), 21801 LI->getPointerAddressSpace(), 21802 LI->getDataLayout())) 21803 return false; 21804 21805 Function *VlsegNFunc; 21806 Value *VL; 21807 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); 21808 SmallVector<Value *, 10> Ops; 21809 21810 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { 21811 VlsegNFunc = Intrinsic::getDeclaration( 21812 LI->getModule(), FixedVlsegIntrIds[Factor - 2], 21813 {ResVTy, LI->getPointerOperandType(), XLenTy}); 21814 VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); 21815 } else { 21816 static const Intrinsic::ID IntrIds[] = { 21817 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, 21818 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, 21819 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, 21820 Intrinsic::riscv_vlseg8}; 21821 21822 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2], 21823 {ResVTy, XLenTy}); 21824 VL = Constant::getAllOnesValue(XLenTy); 21825 Ops.append(Factor, PoisonValue::get(ResVTy)); 21826 } 21827 21828 Ops.append({LI->getPointerOperand(), VL}); 21829 21830 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops); 21831 DI->replaceAllUsesWith(Vlseg); 21832 21833 return true; 21834 } 21835 21836 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II, 21837 StoreInst *SI) const { 21838 assert(SI->isSimple()); 21839 IRBuilder<> Builder(SI); 21840 21841 // Only interleave2 supported at present. 21842 if (II->getIntrinsicID() != Intrinsic::vector_interleave2) 21843 return false; 21844 21845 unsigned Factor = 2; 21846 21847 VectorType *VTy = cast<VectorType>(II->getType()); 21848 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType()); 21849 21850 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(), 21851 SI->getPointerAddressSpace(), 21852 SI->getDataLayout())) 21853 return false; 21854 21855 Function *VssegNFunc; 21856 Value *VL; 21857 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); 21858 21859 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { 21860 VssegNFunc = Intrinsic::getDeclaration( 21861 SI->getModule(), FixedVssegIntrIds[Factor - 2], 21862 {InVTy, SI->getPointerOperandType(), XLenTy}); 21863 VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); 21864 } else { 21865 static const Intrinsic::ID IntrIds[] = { 21866 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, 21867 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, 21868 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, 21869 Intrinsic::riscv_vsseg8}; 21870 21871 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2], 21872 {InVTy, XLenTy}); 21873 VL = Constant::getAllOnesValue(XLenTy); 21874 } 21875 21876 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1), 21877 SI->getPointerOperand(), VL}); 21878 21879 return true; 21880 } 21881 21882 MachineInstr * 21883 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, 21884 MachineBasicBlock::instr_iterator &MBBI, 21885 const TargetInstrInfo *TII) const { 21886 assert(MBBI->isCall() && MBBI->getCFIType() && 21887 "Invalid call instruction for a KCFI check"); 21888 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect}, 21889 MBBI->getOpcode())); 21890 21891 MachineOperand &Target = MBBI->getOperand(0); 21892 Target.setIsRenamable(false); 21893 21894 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK)) 21895 .addReg(Target.getReg()) 21896 .addImm(MBBI->getCFIType()) 21897 .getInstr(); 21898 } 21899 21900 #define GET_REGISTER_MATCHER 21901 #include "RISCVGenAsmMatcher.inc" 21902 21903 Register 21904 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 21905 const MachineFunction &MF) const { 21906 Register Reg = MatchRegisterAltName(RegName); 21907 if (Reg == RISCV::NoRegister) 21908 Reg = MatchRegisterName(RegName); 21909 if (Reg == RISCV::NoRegister) 21910 report_fatal_error( 21911 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 21912 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 21913 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 21914 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 21915 StringRef(RegName) + "\".")); 21916 return Reg; 21917 } 21918 21919 MachineMemOperand::Flags 21920 RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const { 21921 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal); 21922 21923 if (NontemporalInfo == nullptr) 21924 return MachineMemOperand::MONone; 21925 21926 // 1 for default value work as __RISCV_NTLH_ALL 21927 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE 21928 // 3 -> __RISCV_NTLH_ALL_PRIVATE 21929 // 4 -> __RISCV_NTLH_INNERMOST_SHARED 21930 // 5 -> __RISCV_NTLH_ALL 21931 int NontemporalLevel = 5; 21932 const MDNode *RISCVNontemporalInfo = 21933 I.getMetadata("riscv-nontemporal-domain"); 21934 if (RISCVNontemporalInfo != nullptr) 21935 NontemporalLevel = 21936 cast<ConstantInt>( 21937 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0)) 21938 ->getValue()) 21939 ->getZExtValue(); 21940 21941 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) && 21942 "RISC-V target doesn't support this non-temporal domain."); 21943 21944 NontemporalLevel -= 2; 21945 MachineMemOperand::Flags Flags = MachineMemOperand::MONone; 21946 if (NontemporalLevel & 0b1) 21947 Flags |= MONontemporalBit0; 21948 if (NontemporalLevel & 0b10) 21949 Flags |= MONontemporalBit1; 21950 21951 return Flags; 21952 } 21953 21954 MachineMemOperand::Flags 21955 RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const { 21956 21957 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags(); 21958 MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone; 21959 TargetFlags |= (NodeFlags & MONontemporalBit0); 21960 TargetFlags |= (NodeFlags & MONontemporalBit1); 21961 return TargetFlags; 21962 } 21963 21964 bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable( 21965 const MemSDNode &NodeX, const MemSDNode &NodeY) const { 21966 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY); 21967 } 21968 21969 bool RISCVTargetLowering::isCtpopFast(EVT VT) const { 21970 if (VT.isScalableVector()) 21971 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb(); 21972 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb()) 21973 return true; 21974 return Subtarget.hasStdExtZbb() && 21975 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector()); 21976 } 21977 21978 unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT, 21979 ISD::CondCode Cond) const { 21980 return isCtpopFast(VT) ? 0 : 1; 21981 } 21982 21983 bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { 21984 21985 // GISel support is in progress or complete for these opcodes. 21986 unsigned Op = Inst.getOpcode(); 21987 if (Op == Instruction::Add || Op == Instruction::Sub || 21988 Op == Instruction::And || Op == Instruction::Or || 21989 Op == Instruction::Xor || Op == Instruction::InsertElement || 21990 Op == Instruction::ShuffleVector || Op == Instruction::Load || 21991 Op == Instruction::Freeze || Op == Instruction::Store) 21992 return false; 21993 21994 if (Inst.getType()->isScalableTy()) 21995 return true; 21996 21997 for (unsigned i = 0; i < Inst.getNumOperands(); ++i) 21998 if (Inst.getOperand(i)->getType()->isScalableTy() && 21999 !isa<ReturnInst>(&Inst)) 22000 return true; 22001 22002 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) { 22003 if (AI->getAllocatedType()->isScalableTy()) 22004 return true; 22005 } 22006 22007 return false; 22008 } 22009 22010 SDValue 22011 RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 22012 SelectionDAG &DAG, 22013 SmallVectorImpl<SDNode *> &Created) const { 22014 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 22015 if (isIntDivCheap(N->getValueType(0), Attr)) 22016 return SDValue(N, 0); // Lower SDIV as SDIV 22017 22018 // Only perform this transform if short forward branch opt is supported. 22019 if (!Subtarget.hasShortForwardBranchOpt()) 22020 return SDValue(); 22021 EVT VT = N->getValueType(0); 22022 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()))) 22023 return SDValue(); 22024 22025 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw. 22026 if (Divisor.sgt(2048) || Divisor.slt(-2048)) 22027 return SDValue(); 22028 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created); 22029 } 22030 22031 bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest( 22032 EVT VT, const APInt &AndMask) const { 22033 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) 22034 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024); 22035 return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask); 22036 } 22037 22038 unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const { 22039 return Subtarget.getMinimumJumpTableEntries(); 22040 } 22041 22042 // Handle single arg such as return value. 22043 template <typename Arg> 22044 void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) { 22045 // This lambda determines whether an array of types are constructed by 22046 // homogeneous vector types. 22047 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) { 22048 // First, extract the first element in the argument type. 22049 auto It = ArgList.begin(); 22050 MVT FirstArgRegType = It->VT; 22051 22052 // Return if there is no return or the type needs split. 22053 if (It == ArgList.end() || It->Flags.isSplit()) 22054 return false; 22055 22056 ++It; 22057 22058 // Return if this argument type contains only 1 element, or it's not a 22059 // vector type. 22060 if (It == ArgList.end() || !FirstArgRegType.isScalableVector()) 22061 return false; 22062 22063 // Second, check if the following elements in this argument type are all the 22064 // same. 22065 for (; It != ArgList.end(); ++It) 22066 if (It->Flags.isSplit() || It->VT != FirstArgRegType) 22067 return false; 22068 22069 return true; 22070 }; 22071 22072 if (isHomogeneousScalableVectorType(ArgList)) { 22073 // Handle as tuple type 22074 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false}); 22075 } else { 22076 // Handle as normal vector type 22077 bool FirstVMaskAssigned = false; 22078 for (const auto &OutArg : ArgList) { 22079 MVT RegisterVT = OutArg.VT; 22080 22081 // Skip non-RVV register type 22082 if (!RegisterVT.isVector()) 22083 continue; 22084 22085 if (RegisterVT.isFixedLengthVector()) 22086 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT); 22087 22088 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) { 22089 RVVArgInfos.push_back({1, RegisterVT, true}); 22090 FirstVMaskAssigned = true; 22091 continue; 22092 } 22093 22094 RVVArgInfos.push_back({1, RegisterVT, false}); 22095 } 22096 } 22097 } 22098 22099 // Handle multiple args. 22100 template <> 22101 void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) { 22102 const DataLayout &DL = MF->getDataLayout(); 22103 const Function &F = MF->getFunction(); 22104 LLVMContext &Context = F.getContext(); 22105 22106 bool FirstVMaskAssigned = false; 22107 for (Type *Ty : TypeList) { 22108 StructType *STy = dyn_cast<StructType>(Ty); 22109 if (STy && STy->containsHomogeneousScalableVectorTypes()) { 22110 Type *ElemTy = STy->getTypeAtIndex(0U); 22111 EVT VT = TLI->getValueType(DL, ElemTy); 22112 MVT RegisterVT = 22113 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT); 22114 unsigned NumRegs = 22115 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT); 22116 22117 RVVArgInfos.push_back( 22118 {NumRegs * STy->getNumElements(), RegisterVT, false}); 22119 } else { 22120 SmallVector<EVT, 4> ValueVTs; 22121 ComputeValueVTs(*TLI, DL, Ty, ValueVTs); 22122 22123 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; 22124 ++Value) { 22125 EVT VT = ValueVTs[Value]; 22126 MVT RegisterVT = 22127 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT); 22128 unsigned NumRegs = 22129 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT); 22130 22131 // Skip non-RVV register type 22132 if (!RegisterVT.isVector()) 22133 continue; 22134 22135 if (RegisterVT.isFixedLengthVector()) 22136 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT); 22137 22138 if (!FirstVMaskAssigned && 22139 RegisterVT.getVectorElementType() == MVT::i1) { 22140 RVVArgInfos.push_back({1, RegisterVT, true}); 22141 FirstVMaskAssigned = true; 22142 --NumRegs; 22143 } 22144 22145 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false}); 22146 } 22147 } 22148 } 22149 } 22150 22151 void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul, 22152 unsigned StartReg) { 22153 assert((StartReg % LMul) == 0 && 22154 "Start register number should be multiple of lmul"); 22155 const MCPhysReg *VRArrays; 22156 switch (LMul) { 22157 default: 22158 report_fatal_error("Invalid lmul"); 22159 case 1: 22160 VRArrays = ArgVRs; 22161 break; 22162 case 2: 22163 VRArrays = ArgVRM2s; 22164 break; 22165 case 4: 22166 VRArrays = ArgVRM4s; 22167 break; 22168 case 8: 22169 VRArrays = ArgVRM8s; 22170 break; 22171 } 22172 22173 for (unsigned i = 0; i < NF; ++i) 22174 if (StartReg) 22175 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]); 22176 else 22177 AllocatedPhysRegs.push_back(MCPhysReg()); 22178 } 22179 22180 /// This function determines if each RVV argument is passed by register, if the 22181 /// argument can be assigned to a VR, then give it a specific register. 22182 /// Otherwise, assign the argument to 0 which is a invalid MCPhysReg. 22183 void RVVArgDispatcher::compute() { 22184 uint32_t AssignedMap = 0; 22185 auto allocate = [&](const RVVArgInfo &ArgInfo) { 22186 // Allocate first vector mask argument to V0. 22187 if (ArgInfo.FirstVMask) { 22188 AllocatedPhysRegs.push_back(RISCV::V0); 22189 return; 22190 } 22191 22192 unsigned RegsNeeded = divideCeil( 22193 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock); 22194 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded; 22195 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs; 22196 StartReg += RegsNeeded) { 22197 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg; 22198 if ((AssignedMap & Map) == 0) { 22199 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8); 22200 AssignedMap |= Map; 22201 return; 22202 } 22203 } 22204 22205 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0); 22206 }; 22207 22208 for (unsigned i = 0; i < RVVArgInfos.size(); ++i) 22209 allocate(RVVArgInfos[i]); 22210 } 22211 22212 MCPhysReg RVVArgDispatcher::getNextPhysReg() { 22213 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range"); 22214 return AllocatedPhysRegs[CurIdx++]; 22215 } 22216 22217 SDValue RISCVTargetLowering::expandIndirectJTBranch(const SDLoc &dl, 22218 SDValue Value, SDValue Addr, 22219 int JTI, 22220 SelectionDAG &DAG) const { 22221 if (Subtarget.hasStdExtZicfilp()) { 22222 // When Zicfilp enabled, we need to use software guarded branch for jump 22223 // table branch. 22224 SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Value, dl); 22225 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, JTInfo, 22226 Addr); 22227 } 22228 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG); 22229 } 22230 22231 namespace llvm::RISCVVIntrinsicsTable { 22232 22233 #define GET_RISCVVIntrinsicsTable_IMPL 22234 #include "RISCVGenSearchableTables.inc" 22235 22236 } // namespace llvm::RISCVVIntrinsicsTable 22237