1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 if (Subtarget.hasStdExtV()) { 94 addRegisterClass(RISCVVMVTs::vbool64_t, &RISCV::VRRegClass); 95 addRegisterClass(RISCVVMVTs::vbool32_t, &RISCV::VRRegClass); 96 addRegisterClass(RISCVVMVTs::vbool16_t, &RISCV::VRRegClass); 97 addRegisterClass(RISCVVMVTs::vbool8_t, &RISCV::VRRegClass); 98 addRegisterClass(RISCVVMVTs::vbool4_t, &RISCV::VRRegClass); 99 addRegisterClass(RISCVVMVTs::vbool2_t, &RISCV::VRRegClass); 100 addRegisterClass(RISCVVMVTs::vbool1_t, &RISCV::VRRegClass); 101 102 addRegisterClass(RISCVVMVTs::vint8mf8_t, &RISCV::VRRegClass); 103 addRegisterClass(RISCVVMVTs::vint8mf4_t, &RISCV::VRRegClass); 104 addRegisterClass(RISCVVMVTs::vint8mf2_t, &RISCV::VRRegClass); 105 addRegisterClass(RISCVVMVTs::vint8m1_t, &RISCV::VRRegClass); 106 addRegisterClass(RISCVVMVTs::vint8m2_t, &RISCV::VRM2RegClass); 107 addRegisterClass(RISCVVMVTs::vint8m4_t, &RISCV::VRM4RegClass); 108 addRegisterClass(RISCVVMVTs::vint8m8_t, &RISCV::VRM8RegClass); 109 110 addRegisterClass(RISCVVMVTs::vint16mf4_t, &RISCV::VRRegClass); 111 addRegisterClass(RISCVVMVTs::vint16mf2_t, &RISCV::VRRegClass); 112 addRegisterClass(RISCVVMVTs::vint16m1_t, &RISCV::VRRegClass); 113 addRegisterClass(RISCVVMVTs::vint16m2_t, &RISCV::VRM2RegClass); 114 addRegisterClass(RISCVVMVTs::vint16m4_t, &RISCV::VRM4RegClass); 115 addRegisterClass(RISCVVMVTs::vint16m8_t, &RISCV::VRM8RegClass); 116 117 addRegisterClass(RISCVVMVTs::vint32mf2_t, &RISCV::VRRegClass); 118 addRegisterClass(RISCVVMVTs::vint32m1_t, &RISCV::VRRegClass); 119 addRegisterClass(RISCVVMVTs::vint32m2_t, &RISCV::VRM2RegClass); 120 addRegisterClass(RISCVVMVTs::vint32m4_t, &RISCV::VRM4RegClass); 121 addRegisterClass(RISCVVMVTs::vint32m8_t, &RISCV::VRM8RegClass); 122 123 addRegisterClass(RISCVVMVTs::vint64m1_t, &RISCV::VRRegClass); 124 addRegisterClass(RISCVVMVTs::vint64m2_t, &RISCV::VRM2RegClass); 125 addRegisterClass(RISCVVMVTs::vint64m4_t, &RISCV::VRM4RegClass); 126 addRegisterClass(RISCVVMVTs::vint64m8_t, &RISCV::VRM8RegClass); 127 128 if (Subtarget.hasStdExtZfh()) { 129 addRegisterClass(RISCVVMVTs::vfloat16mf4_t, &RISCV::VRRegClass); 130 addRegisterClass(RISCVVMVTs::vfloat16mf2_t, &RISCV::VRRegClass); 131 addRegisterClass(RISCVVMVTs::vfloat16m1_t, &RISCV::VRRegClass); 132 addRegisterClass(RISCVVMVTs::vfloat16m2_t, &RISCV::VRM2RegClass); 133 addRegisterClass(RISCVVMVTs::vfloat16m4_t, &RISCV::VRM4RegClass); 134 addRegisterClass(RISCVVMVTs::vfloat16m8_t, &RISCV::VRM8RegClass); 135 } 136 137 if (Subtarget.hasStdExtF()) { 138 addRegisterClass(RISCVVMVTs::vfloat32mf2_t, &RISCV::VRRegClass); 139 addRegisterClass(RISCVVMVTs::vfloat32m1_t, &RISCV::VRRegClass); 140 addRegisterClass(RISCVVMVTs::vfloat32m2_t, &RISCV::VRM2RegClass); 141 addRegisterClass(RISCVVMVTs::vfloat32m4_t, &RISCV::VRM4RegClass); 142 addRegisterClass(RISCVVMVTs::vfloat32m8_t, &RISCV::VRM8RegClass); 143 } 144 145 if (Subtarget.hasStdExtD()) { 146 addRegisterClass(RISCVVMVTs::vfloat64m1_t, &RISCV::VRRegClass); 147 addRegisterClass(RISCVVMVTs::vfloat64m2_t, &RISCV::VRM2RegClass); 148 addRegisterClass(RISCVVMVTs::vfloat64m4_t, &RISCV::VRM4RegClass); 149 addRegisterClass(RISCVVMVTs::vfloat64m8_t, &RISCV::VRM8RegClass); 150 } 151 } 152 153 // Compute derived properties from the register classes. 154 computeRegisterProperties(STI.getRegisterInfo()); 155 156 setStackPointerRegisterToSaveRestore(RISCV::X2); 157 158 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 159 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 160 161 // TODO: add all necessary setOperationAction calls. 162 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 163 164 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 165 setOperationAction(ISD::BR_CC, XLenVT, Expand); 166 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 167 168 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 169 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 170 171 setOperationAction(ISD::VASTART, MVT::Other, Custom); 172 setOperationAction(ISD::VAARG, MVT::Other, Expand); 173 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 174 setOperationAction(ISD::VAEND, MVT::Other, Expand); 175 176 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 177 if (!Subtarget.hasStdExtZbb()) { 178 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 179 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 180 } 181 182 if (Subtarget.is64Bit()) { 183 setOperationAction(ISD::ADD, MVT::i32, Custom); 184 setOperationAction(ISD::SUB, MVT::i32, Custom); 185 setOperationAction(ISD::SHL, MVT::i32, Custom); 186 setOperationAction(ISD::SRA, MVT::i32, Custom); 187 setOperationAction(ISD::SRL, MVT::i32, Custom); 188 } 189 190 if (!Subtarget.hasStdExtM()) { 191 setOperationAction(ISD::MUL, XLenVT, Expand); 192 setOperationAction(ISD::MULHS, XLenVT, Expand); 193 setOperationAction(ISD::MULHU, XLenVT, Expand); 194 setOperationAction(ISD::SDIV, XLenVT, Expand); 195 setOperationAction(ISD::UDIV, XLenVT, Expand); 196 setOperationAction(ISD::SREM, XLenVT, Expand); 197 setOperationAction(ISD::UREM, XLenVT, Expand); 198 } 199 200 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 201 setOperationAction(ISD::MUL, MVT::i32, Custom); 202 203 setOperationAction(ISD::SDIV, MVT::i8, Custom); 204 setOperationAction(ISD::UDIV, MVT::i8, Custom); 205 setOperationAction(ISD::UREM, MVT::i8, Custom); 206 setOperationAction(ISD::SDIV, MVT::i16, Custom); 207 setOperationAction(ISD::UDIV, MVT::i16, Custom); 208 setOperationAction(ISD::UREM, MVT::i16, Custom); 209 setOperationAction(ISD::SDIV, MVT::i32, Custom); 210 setOperationAction(ISD::UDIV, MVT::i32, Custom); 211 setOperationAction(ISD::UREM, MVT::i32, Custom); 212 } 213 214 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 215 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 216 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 217 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 218 219 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 220 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 221 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 222 223 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 224 if (Subtarget.is64Bit()) { 225 setOperationAction(ISD::ROTL, MVT::i32, Custom); 226 setOperationAction(ISD::ROTR, MVT::i32, Custom); 227 } 228 } else { 229 setOperationAction(ISD::ROTL, XLenVT, Expand); 230 setOperationAction(ISD::ROTR, XLenVT, Expand); 231 } 232 233 if (Subtarget.hasStdExtZbp()) { 234 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 235 // more combining. 236 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 237 setOperationAction(ISD::BSWAP, XLenVT, Custom); 238 239 if (Subtarget.is64Bit()) { 240 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 241 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 242 } 243 } else { 244 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 245 // pattern match it directly in isel. 246 setOperationAction(ISD::BSWAP, XLenVT, 247 Subtarget.hasStdExtZbb() ? Legal : Expand); 248 } 249 250 if (Subtarget.hasStdExtZbb()) { 251 setOperationAction(ISD::SMIN, XLenVT, Legal); 252 setOperationAction(ISD::SMAX, XLenVT, Legal); 253 setOperationAction(ISD::UMIN, XLenVT, Legal); 254 setOperationAction(ISD::UMAX, XLenVT, Legal); 255 } else { 256 setOperationAction(ISD::CTTZ, XLenVT, Expand); 257 setOperationAction(ISD::CTLZ, XLenVT, Expand); 258 setOperationAction(ISD::CTPOP, XLenVT, Expand); 259 } 260 261 if (Subtarget.hasStdExtZbt()) { 262 setOperationAction(ISD::FSHL, XLenVT, Legal); 263 setOperationAction(ISD::FSHR, XLenVT, Legal); 264 setOperationAction(ISD::SELECT, XLenVT, Legal); 265 266 if (Subtarget.is64Bit()) { 267 setOperationAction(ISD::FSHL, MVT::i32, Custom); 268 setOperationAction(ISD::FSHR, MVT::i32, Custom); 269 } 270 } else { 271 setOperationAction(ISD::SELECT, XLenVT, Custom); 272 } 273 274 ISD::CondCode FPCCToExpand[] = { 275 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 276 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 277 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 278 279 ISD::NodeType FPOpToExpand[] = { 280 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 281 ISD::FP_TO_FP16}; 282 283 if (Subtarget.hasStdExtZfh()) 284 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 285 286 if (Subtarget.hasStdExtZfh()) { 287 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 288 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 289 for (auto CC : FPCCToExpand) 290 setCondCodeAction(CC, MVT::f16, Expand); 291 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 292 setOperationAction(ISD::SELECT, MVT::f16, Custom); 293 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 294 for (auto Op : FPOpToExpand) 295 setOperationAction(Op, MVT::f16, Expand); 296 } 297 298 if (Subtarget.hasStdExtF()) { 299 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 300 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 301 for (auto CC : FPCCToExpand) 302 setCondCodeAction(CC, MVT::f32, Expand); 303 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 304 setOperationAction(ISD::SELECT, MVT::f32, Custom); 305 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 306 for (auto Op : FPOpToExpand) 307 setOperationAction(Op, MVT::f32, Expand); 308 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 309 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 310 } 311 312 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 313 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 314 315 if (Subtarget.hasStdExtD()) { 316 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 317 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 318 for (auto CC : FPCCToExpand) 319 setCondCodeAction(CC, MVT::f64, Expand); 320 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 321 setOperationAction(ISD::SELECT, MVT::f64, Custom); 322 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 323 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 324 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 325 for (auto Op : FPOpToExpand) 326 setOperationAction(Op, MVT::f64, Expand); 327 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 328 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 329 } 330 331 if (Subtarget.is64Bit()) { 332 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 333 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 334 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 335 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 336 } 337 338 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 339 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 340 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 341 setOperationAction(ISD::JumpTable, XLenVT, Custom); 342 343 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 344 345 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 346 // Unfortunately this can't be determined just from the ISA naming string. 347 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 348 Subtarget.is64Bit() ? Legal : Custom); 349 350 setOperationAction(ISD::TRAP, MVT::Other, Legal); 351 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 352 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 353 354 if (Subtarget.hasStdExtA()) { 355 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 356 setMinCmpXchgSizeInBits(32); 357 } else { 358 setMaxAtomicSizeInBitsSupported(0); 359 } 360 361 setBooleanContents(ZeroOrOneBooleanContent); 362 363 if (Subtarget.hasStdExtV()) { 364 setBooleanVectorContents(ZeroOrOneBooleanContent); 365 366 setOperationAction(ISD::VSCALE, XLenVT, Custom); 367 368 // RVV intrinsics may have illegal operands. 369 // We also need to custom legalize vmv.x.s. 370 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 371 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 372 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 373 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 374 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 375 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 376 377 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 378 379 if (Subtarget.is64Bit()) { 380 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 381 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 382 } 383 384 for (auto VT : MVT::integer_scalable_vector_valuetypes()) { 385 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 386 387 setOperationAction(ISD::SMIN, VT, Legal); 388 setOperationAction(ISD::SMAX, VT, Legal); 389 setOperationAction(ISD::UMIN, VT, Legal); 390 setOperationAction(ISD::UMAX, VT, Legal); 391 392 setOperationAction(ISD::ROTL, VT, Expand); 393 setOperationAction(ISD::ROTR, VT, Expand); 394 395 if (isTypeLegal(VT)) { 396 // Custom-lower extensions and truncations from/to mask types. 397 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 398 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 399 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 400 401 // We custom-lower all legally-typed vector truncates: 402 // 1. Mask VTs are custom-expanded into a series of standard nodes 403 // 2. Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR" 404 // nodes which truncate by one power of two at a time. 405 setOperationAction(ISD::TRUNCATE, VT, Custom); 406 407 // Custom-lower insert/extract operations to simplify patterns. 408 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 409 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 410 } 411 } 412 413 // We must custom-lower certain vXi64 operations on RV32 due to the vector 414 // element type being illegal. 415 if (!Subtarget.is64Bit()) { 416 setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom); 417 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 418 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 419 } 420 421 // Expand various CCs to best match the RVV ISA, which natively supports UNE 422 // but no other unordered comparisons, and supports all ordered comparisons 423 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 424 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 425 // and we pattern-match those back to the "original", swapping operands once 426 // more. This way we catch both operations and both "vf" and "fv" forms with 427 // fewer patterns. 428 ISD::CondCode VFPCCToExpand[] = { 429 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 430 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 431 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 432 }; 433 434 // Sets common operation actions on RVV floating-point vector types. 435 const auto SetCommonVFPActions = [&](MVT VT) { 436 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 437 // Custom-lower insert/extract operations to simplify patterns. 438 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 439 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 440 for (auto CC : VFPCCToExpand) 441 setCondCodeAction(CC, VT, Expand); 442 }; 443 444 if (Subtarget.hasStdExtZfh()) { 445 for (auto VT : {RISCVVMVTs::vfloat16mf4_t, RISCVVMVTs::vfloat16mf2_t, 446 RISCVVMVTs::vfloat16m1_t, RISCVVMVTs::vfloat16m2_t, 447 RISCVVMVTs::vfloat16m4_t, RISCVVMVTs::vfloat16m8_t}) 448 SetCommonVFPActions(VT); 449 } 450 451 if (Subtarget.hasStdExtF()) { 452 for (auto VT : {RISCVVMVTs::vfloat32mf2_t, RISCVVMVTs::vfloat32m1_t, 453 RISCVVMVTs::vfloat32m2_t, RISCVVMVTs::vfloat32m4_t, 454 RISCVVMVTs::vfloat32m8_t}) 455 SetCommonVFPActions(VT); 456 } 457 458 if (Subtarget.hasStdExtD()) { 459 for (auto VT : {RISCVVMVTs::vfloat64m1_t, RISCVVMVTs::vfloat64m2_t, 460 RISCVVMVTs::vfloat64m4_t, RISCVVMVTs::vfloat64m8_t}) 461 SetCommonVFPActions(VT); 462 } 463 } 464 465 // Function alignments. 466 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 467 setMinFunctionAlignment(FunctionAlignment); 468 setPrefFunctionAlignment(FunctionAlignment); 469 470 setMinimumJumpTableEntries(5); 471 472 // Jumps are expensive, compared to logic 473 setJumpIsExpensive(); 474 475 // We can use any register for comparisons 476 setHasMultipleConditionRegisters(); 477 478 setTargetDAGCombine(ISD::SETCC); 479 if (Subtarget.hasStdExtZbp()) { 480 setTargetDAGCombine(ISD::OR); 481 } 482 } 483 484 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 485 EVT VT) const { 486 if (!VT.isVector()) 487 return getPointerTy(DL); 488 if (Subtarget.hasStdExtV()) 489 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 490 return VT.changeVectorElementTypeToInteger(); 491 } 492 493 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 494 const CallInst &I, 495 MachineFunction &MF, 496 unsigned Intrinsic) const { 497 switch (Intrinsic) { 498 default: 499 return false; 500 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 501 case Intrinsic::riscv_masked_atomicrmw_add_i32: 502 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 503 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 504 case Intrinsic::riscv_masked_atomicrmw_max_i32: 505 case Intrinsic::riscv_masked_atomicrmw_min_i32: 506 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 507 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 508 case Intrinsic::riscv_masked_cmpxchg_i32: 509 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 510 Info.opc = ISD::INTRINSIC_W_CHAIN; 511 Info.memVT = MVT::getVT(PtrTy->getElementType()); 512 Info.ptrVal = I.getArgOperand(0); 513 Info.offset = 0; 514 Info.align = Align(4); 515 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 516 MachineMemOperand::MOVolatile; 517 return true; 518 } 519 } 520 521 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 522 const AddrMode &AM, Type *Ty, 523 unsigned AS, 524 Instruction *I) const { 525 // No global is ever allowed as a base. 526 if (AM.BaseGV) 527 return false; 528 529 // Require a 12-bit signed offset. 530 if (!isInt<12>(AM.BaseOffs)) 531 return false; 532 533 switch (AM.Scale) { 534 case 0: // "r+i" or just "i", depending on HasBaseReg. 535 break; 536 case 1: 537 if (!AM.HasBaseReg) // allow "r+i". 538 break; 539 return false; // disallow "r+r" or "r+r+i". 540 default: 541 return false; 542 } 543 544 return true; 545 } 546 547 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 548 return isInt<12>(Imm); 549 } 550 551 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 552 return isInt<12>(Imm); 553 } 554 555 // On RV32, 64-bit integers are split into their high and low parts and held 556 // in two different registers, so the trunc is free since the low register can 557 // just be used. 558 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 559 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 560 return false; 561 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 562 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 563 return (SrcBits == 64 && DestBits == 32); 564 } 565 566 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 567 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 568 !SrcVT.isInteger() || !DstVT.isInteger()) 569 return false; 570 unsigned SrcBits = SrcVT.getSizeInBits(); 571 unsigned DestBits = DstVT.getSizeInBits(); 572 return (SrcBits == 64 && DestBits == 32); 573 } 574 575 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 576 // Zexts are free if they can be combined with a load. 577 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 578 EVT MemVT = LD->getMemoryVT(); 579 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 580 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 581 (LD->getExtensionType() == ISD::NON_EXTLOAD || 582 LD->getExtensionType() == ISD::ZEXTLOAD)) 583 return true; 584 } 585 586 return TargetLowering::isZExtFree(Val, VT2); 587 } 588 589 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 590 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 591 } 592 593 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 594 return Subtarget.hasStdExtZbb(); 595 } 596 597 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 598 return Subtarget.hasStdExtZbb(); 599 } 600 601 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 602 bool ForCodeSize) const { 603 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 604 return false; 605 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 606 return false; 607 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 608 return false; 609 if (Imm.isNegZero()) 610 return false; 611 return Imm.isZero(); 612 } 613 614 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 615 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 616 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 617 (VT == MVT::f64 && Subtarget.hasStdExtD()); 618 } 619 620 // Changes the condition code and swaps operands if necessary, so the SetCC 621 // operation matches one of the comparisons supported directly in the RISC-V 622 // ISA. 623 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 624 switch (CC) { 625 default: 626 break; 627 case ISD::SETGT: 628 case ISD::SETLE: 629 case ISD::SETUGT: 630 case ISD::SETULE: 631 CC = ISD::getSetCCSwappedOperands(CC); 632 std::swap(LHS, RHS); 633 break; 634 } 635 } 636 637 // Return the RISC-V branch opcode that matches the given DAG integer 638 // condition code. The CondCode must be one of those supported by the RISC-V 639 // ISA (see normaliseSetCC). 640 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 641 switch (CC) { 642 default: 643 llvm_unreachable("Unsupported CondCode"); 644 case ISD::SETEQ: 645 return RISCV::BEQ; 646 case ISD::SETNE: 647 return RISCV::BNE; 648 case ISD::SETLT: 649 return RISCV::BLT; 650 case ISD::SETGE: 651 return RISCV::BGE; 652 case ISD::SETULT: 653 return RISCV::BLTU; 654 case ISD::SETUGE: 655 return RISCV::BGEU; 656 } 657 } 658 659 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 660 SelectionDAG &DAG) const { 661 switch (Op.getOpcode()) { 662 default: 663 report_fatal_error("unimplemented operand"); 664 case ISD::GlobalAddress: 665 return lowerGlobalAddress(Op, DAG); 666 case ISD::BlockAddress: 667 return lowerBlockAddress(Op, DAG); 668 case ISD::ConstantPool: 669 return lowerConstantPool(Op, DAG); 670 case ISD::JumpTable: 671 return lowerJumpTable(Op, DAG); 672 case ISD::GlobalTLSAddress: 673 return lowerGlobalTLSAddress(Op, DAG); 674 case ISD::SELECT: 675 return lowerSELECT(Op, DAG); 676 case ISD::VASTART: 677 return lowerVASTART(Op, DAG); 678 case ISD::FRAMEADDR: 679 return lowerFRAMEADDR(Op, DAG); 680 case ISD::RETURNADDR: 681 return lowerRETURNADDR(Op, DAG); 682 case ISD::SHL_PARTS: 683 return lowerShiftLeftParts(Op, DAG); 684 case ISD::SRA_PARTS: 685 return lowerShiftRightParts(Op, DAG, true); 686 case ISD::SRL_PARTS: 687 return lowerShiftRightParts(Op, DAG, false); 688 case ISD::BITCAST: { 689 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 690 Subtarget.hasStdExtZfh()) && 691 "Unexpected custom legalisation"); 692 SDLoc DL(Op); 693 SDValue Op0 = Op.getOperand(0); 694 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 695 if (Op0.getValueType() != MVT::i16) 696 return SDValue(); 697 SDValue NewOp0 = 698 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 699 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 700 return FPConv; 701 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 702 Subtarget.hasStdExtF()) { 703 if (Op0.getValueType() != MVT::i32) 704 return SDValue(); 705 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 706 SDValue FPConv = 707 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 708 return FPConv; 709 } 710 return SDValue(); 711 } 712 case ISD::INTRINSIC_WO_CHAIN: 713 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 714 case ISD::INTRINSIC_W_CHAIN: 715 return LowerINTRINSIC_W_CHAIN(Op, DAG); 716 case ISD::BSWAP: 717 case ISD::BITREVERSE: { 718 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 719 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 720 MVT VT = Op.getSimpleValueType(); 721 SDLoc DL(Op); 722 // Start with the maximum immediate value which is the bitwidth - 1. 723 unsigned Imm = VT.getSizeInBits() - 1; 724 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 725 if (Op.getOpcode() == ISD::BSWAP) 726 Imm &= ~0x7U; 727 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 728 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 729 } 730 case ISD::TRUNCATE: { 731 SDLoc DL(Op); 732 EVT VT = Op.getValueType(); 733 // Only custom-lower vector truncates 734 if (!VT.isVector()) 735 return Op; 736 737 // Truncates to mask types are handled differently 738 if (VT.getVectorElementType() == MVT::i1) 739 return lowerVectorMaskTrunc(Op, DAG); 740 741 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 742 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which 743 // truncate by one power of two at a time. 744 EVT DstEltVT = VT.getVectorElementType(); 745 746 SDValue Src = Op.getOperand(0); 747 EVT SrcVT = Src.getValueType(); 748 EVT SrcEltVT = SrcVT.getVectorElementType(); 749 750 assert(DstEltVT.bitsLT(SrcEltVT) && 751 isPowerOf2_64(DstEltVT.getSizeInBits()) && 752 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 753 "Unexpected vector truncate lowering"); 754 755 SDValue Result = Src; 756 LLVMContext &Context = *DAG.getContext(); 757 const ElementCount Count = SrcVT.getVectorElementCount(); 758 do { 759 SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2); 760 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 761 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result); 762 } while (SrcEltVT != DstEltVT); 763 764 return Result; 765 } 766 case ISD::ANY_EXTEND: 767 case ISD::ZERO_EXTEND: 768 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 769 case ISD::SIGN_EXTEND: 770 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 771 case ISD::SPLAT_VECTOR: 772 return lowerSPLATVECTOR(Op, DAG); 773 case ISD::INSERT_VECTOR_ELT: 774 return lowerINSERT_VECTOR_ELT(Op, DAG); 775 case ISD::EXTRACT_VECTOR_ELT: 776 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 777 case ISD::VSCALE: { 778 MVT VT = Op.getSimpleValueType(); 779 SDLoc DL(Op); 780 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 781 // We define our scalable vector types for lmul=1 to use a 64 bit known 782 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 783 // vscale as VLENB / 8. 784 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 785 DAG.getConstant(3, DL, VT)); 786 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 787 } 788 } 789 } 790 791 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 792 SelectionDAG &DAG, unsigned Flags) { 793 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 794 } 795 796 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 797 SelectionDAG &DAG, unsigned Flags) { 798 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 799 Flags); 800 } 801 802 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 803 SelectionDAG &DAG, unsigned Flags) { 804 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 805 N->getOffset(), Flags); 806 } 807 808 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 809 SelectionDAG &DAG, unsigned Flags) { 810 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 811 } 812 813 template <class NodeTy> 814 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 815 bool IsLocal) const { 816 SDLoc DL(N); 817 EVT Ty = getPointerTy(DAG.getDataLayout()); 818 819 if (isPositionIndependent()) { 820 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 821 if (IsLocal) 822 // Use PC-relative addressing to access the symbol. This generates the 823 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 824 // %pcrel_lo(auipc)). 825 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 826 827 // Use PC-relative addressing to access the GOT for this symbol, then load 828 // the address from the GOT. This generates the pattern (PseudoLA sym), 829 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 830 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 831 } 832 833 switch (getTargetMachine().getCodeModel()) { 834 default: 835 report_fatal_error("Unsupported code model for lowering"); 836 case CodeModel::Small: { 837 // Generate a sequence for accessing addresses within the first 2 GiB of 838 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 839 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 840 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 841 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 842 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 843 } 844 case CodeModel::Medium: { 845 // Generate a sequence for accessing addresses within any 2GiB range within 846 // the address space. This generates the pattern (PseudoLLA sym), which 847 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 848 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 849 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 850 } 851 } 852 } 853 854 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 855 SelectionDAG &DAG) const { 856 SDLoc DL(Op); 857 EVT Ty = Op.getValueType(); 858 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 859 int64_t Offset = N->getOffset(); 860 MVT XLenVT = Subtarget.getXLenVT(); 861 862 const GlobalValue *GV = N->getGlobal(); 863 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 864 SDValue Addr = getAddr(N, DAG, IsLocal); 865 866 // In order to maximise the opportunity for common subexpression elimination, 867 // emit a separate ADD node for the global address offset instead of folding 868 // it in the global address node. Later peephole optimisations may choose to 869 // fold it back in when profitable. 870 if (Offset != 0) 871 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 872 DAG.getConstant(Offset, DL, XLenVT)); 873 return Addr; 874 } 875 876 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 877 SelectionDAG &DAG) const { 878 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 879 880 return getAddr(N, DAG); 881 } 882 883 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 884 SelectionDAG &DAG) const { 885 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 886 887 return getAddr(N, DAG); 888 } 889 890 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 891 SelectionDAG &DAG) const { 892 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 893 894 return getAddr(N, DAG); 895 } 896 897 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 898 SelectionDAG &DAG, 899 bool UseGOT) const { 900 SDLoc DL(N); 901 EVT Ty = getPointerTy(DAG.getDataLayout()); 902 const GlobalValue *GV = N->getGlobal(); 903 MVT XLenVT = Subtarget.getXLenVT(); 904 905 if (UseGOT) { 906 // Use PC-relative addressing to access the GOT for this TLS symbol, then 907 // load the address from the GOT and add the thread pointer. This generates 908 // the pattern (PseudoLA_TLS_IE sym), which expands to 909 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 910 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 911 SDValue Load = 912 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 913 914 // Add the thread pointer. 915 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 916 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 917 } 918 919 // Generate a sequence for accessing the address relative to the thread 920 // pointer, with the appropriate adjustment for the thread pointer offset. 921 // This generates the pattern 922 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 923 SDValue AddrHi = 924 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 925 SDValue AddrAdd = 926 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 927 SDValue AddrLo = 928 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 929 930 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 931 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 932 SDValue MNAdd = SDValue( 933 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 934 0); 935 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 936 } 937 938 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 939 SelectionDAG &DAG) const { 940 SDLoc DL(N); 941 EVT Ty = getPointerTy(DAG.getDataLayout()); 942 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 943 const GlobalValue *GV = N->getGlobal(); 944 945 // Use a PC-relative addressing mode to access the global dynamic GOT address. 946 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 947 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 948 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 949 SDValue Load = 950 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 951 952 // Prepare argument list to generate call. 953 ArgListTy Args; 954 ArgListEntry Entry; 955 Entry.Node = Load; 956 Entry.Ty = CallTy; 957 Args.push_back(Entry); 958 959 // Setup call to __tls_get_addr. 960 TargetLowering::CallLoweringInfo CLI(DAG); 961 CLI.setDebugLoc(DL) 962 .setChain(DAG.getEntryNode()) 963 .setLibCallee(CallingConv::C, CallTy, 964 DAG.getExternalSymbol("__tls_get_addr", Ty), 965 std::move(Args)); 966 967 return LowerCallTo(CLI).first; 968 } 969 970 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 971 SelectionDAG &DAG) const { 972 SDLoc DL(Op); 973 EVT Ty = Op.getValueType(); 974 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 975 int64_t Offset = N->getOffset(); 976 MVT XLenVT = Subtarget.getXLenVT(); 977 978 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 979 980 if (DAG.getMachineFunction().getFunction().getCallingConv() == 981 CallingConv::GHC) 982 report_fatal_error("In GHC calling convention TLS is not supported"); 983 984 SDValue Addr; 985 switch (Model) { 986 case TLSModel::LocalExec: 987 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 988 break; 989 case TLSModel::InitialExec: 990 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 991 break; 992 case TLSModel::LocalDynamic: 993 case TLSModel::GeneralDynamic: 994 Addr = getDynamicTLSAddr(N, DAG); 995 break; 996 } 997 998 // In order to maximise the opportunity for common subexpression elimination, 999 // emit a separate ADD node for the global address offset instead of folding 1000 // it in the global address node. Later peephole optimisations may choose to 1001 // fold it back in when profitable. 1002 if (Offset != 0) 1003 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1004 DAG.getConstant(Offset, DL, XLenVT)); 1005 return Addr; 1006 } 1007 1008 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 1009 SDValue CondV = Op.getOperand(0); 1010 SDValue TrueV = Op.getOperand(1); 1011 SDValue FalseV = Op.getOperand(2); 1012 SDLoc DL(Op); 1013 MVT XLenVT = Subtarget.getXLenVT(); 1014 1015 // If the result type is XLenVT and CondV is the output of a SETCC node 1016 // which also operated on XLenVT inputs, then merge the SETCC node into the 1017 // lowered RISCVISD::SELECT_CC to take advantage of the integer 1018 // compare+branch instructions. i.e.: 1019 // (select (setcc lhs, rhs, cc), truev, falsev) 1020 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 1021 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 1022 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 1023 SDValue LHS = CondV.getOperand(0); 1024 SDValue RHS = CondV.getOperand(1); 1025 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 1026 ISD::CondCode CCVal = CC->get(); 1027 1028 normaliseSetCC(LHS, RHS, CCVal); 1029 1030 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 1031 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 1032 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1033 } 1034 1035 // Otherwise: 1036 // (select condv, truev, falsev) 1037 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 1038 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 1039 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 1040 1041 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 1042 1043 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1044 } 1045 1046 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 1047 MachineFunction &MF = DAG.getMachineFunction(); 1048 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 1049 1050 SDLoc DL(Op); 1051 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1052 getPointerTy(MF.getDataLayout())); 1053 1054 // vastart just stores the address of the VarArgsFrameIndex slot into the 1055 // memory location argument. 1056 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1057 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 1058 MachinePointerInfo(SV)); 1059 } 1060 1061 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 1062 SelectionDAG &DAG) const { 1063 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1064 MachineFunction &MF = DAG.getMachineFunction(); 1065 MachineFrameInfo &MFI = MF.getFrameInfo(); 1066 MFI.setFrameAddressIsTaken(true); 1067 Register FrameReg = RI.getFrameRegister(MF); 1068 int XLenInBytes = Subtarget.getXLen() / 8; 1069 1070 EVT VT = Op.getValueType(); 1071 SDLoc DL(Op); 1072 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 1073 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1074 while (Depth--) { 1075 int Offset = -(XLenInBytes * 2); 1076 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 1077 DAG.getIntPtrConstant(Offset, DL)); 1078 FrameAddr = 1079 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 1080 } 1081 return FrameAddr; 1082 } 1083 1084 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 1085 SelectionDAG &DAG) const { 1086 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1087 MachineFunction &MF = DAG.getMachineFunction(); 1088 MachineFrameInfo &MFI = MF.getFrameInfo(); 1089 MFI.setReturnAddressIsTaken(true); 1090 MVT XLenVT = Subtarget.getXLenVT(); 1091 int XLenInBytes = Subtarget.getXLen() / 8; 1092 1093 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1094 return SDValue(); 1095 1096 EVT VT = Op.getValueType(); 1097 SDLoc DL(Op); 1098 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1099 if (Depth) { 1100 int Off = -XLenInBytes; 1101 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 1102 SDValue Offset = DAG.getConstant(Off, DL, VT); 1103 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 1104 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 1105 MachinePointerInfo()); 1106 } 1107 1108 // Return the value of the return address register, marking it an implicit 1109 // live-in. 1110 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 1111 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 1112 } 1113 1114 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 1115 SelectionDAG &DAG) const { 1116 SDLoc DL(Op); 1117 SDValue Lo = Op.getOperand(0); 1118 SDValue Hi = Op.getOperand(1); 1119 SDValue Shamt = Op.getOperand(2); 1120 EVT VT = Lo.getValueType(); 1121 1122 // if Shamt-XLEN < 0: // Shamt < XLEN 1123 // Lo = Lo << Shamt 1124 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 1125 // else: 1126 // Lo = 0 1127 // Hi = Lo << (Shamt-XLEN) 1128 1129 SDValue Zero = DAG.getConstant(0, DL, VT); 1130 SDValue One = DAG.getConstant(1, DL, VT); 1131 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1132 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1133 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1134 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1135 1136 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 1137 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 1138 SDValue ShiftRightLo = 1139 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 1140 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 1141 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 1142 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 1143 1144 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1145 1146 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 1147 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1148 1149 SDValue Parts[2] = {Lo, Hi}; 1150 return DAG.getMergeValues(Parts, DL); 1151 } 1152 1153 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 1154 bool IsSRA) const { 1155 SDLoc DL(Op); 1156 SDValue Lo = Op.getOperand(0); 1157 SDValue Hi = Op.getOperand(1); 1158 SDValue Shamt = Op.getOperand(2); 1159 EVT VT = Lo.getValueType(); 1160 1161 // SRA expansion: 1162 // if Shamt-XLEN < 0: // Shamt < XLEN 1163 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1164 // Hi = Hi >>s Shamt 1165 // else: 1166 // Lo = Hi >>s (Shamt-XLEN); 1167 // Hi = Hi >>s (XLEN-1) 1168 // 1169 // SRL expansion: 1170 // if Shamt-XLEN < 0: // Shamt < XLEN 1171 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1172 // Hi = Hi >>u Shamt 1173 // else: 1174 // Lo = Hi >>u (Shamt-XLEN); 1175 // Hi = 0; 1176 1177 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 1178 1179 SDValue Zero = DAG.getConstant(0, DL, VT); 1180 SDValue One = DAG.getConstant(1, DL, VT); 1181 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1182 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1183 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1184 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1185 1186 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 1187 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 1188 SDValue ShiftLeftHi = 1189 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 1190 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 1191 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 1192 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 1193 SDValue HiFalse = 1194 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 1195 1196 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1197 1198 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 1199 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1200 1201 SDValue Parts[2] = {Lo, Hi}; 1202 return DAG.getMergeValues(Parts, DL); 1203 } 1204 1205 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is 1206 // illegal (currently only vXi64 RV32). 1207 // FIXME: We could also catch non-constant sign-extended i32 values and lower 1208 // them to SPLAT_VECTOR_I64 1209 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op, 1210 SelectionDAG &DAG) const { 1211 SDLoc DL(Op); 1212 EVT VecVT = Op.getValueType(); 1213 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 1214 "Unexpected SPLAT_VECTOR lowering"); 1215 SDValue SplatVal = Op.getOperand(0); 1216 1217 // If we can prove that the value is a sign-extended 32-bit value, lower this 1218 // as a custom node in order to try and match RVV vector/scalar instructions. 1219 if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) { 1220 if (isInt<32>(CVal->getSExtValue())) 1221 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1222 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32)); 1223 } 1224 1225 if (SplatVal.getOpcode() == ISD::SIGN_EXTEND && 1226 SplatVal.getOperand(0).getValueType() == MVT::i32) { 1227 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1228 SplatVal.getOperand(0)); 1229 } 1230 1231 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 1232 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 1233 // vmv.v.x vX, hi 1234 // vsll.vx vX, vX, /*32*/ 1235 // vmv.v.x vY, lo 1236 // vsll.vx vY, vY, /*32*/ 1237 // vsrl.vx vY, vY, /*32*/ 1238 // vor.vv vX, vX, vY 1239 SDValue One = DAG.getConstant(1, DL, MVT::i32); 1240 SDValue Zero = DAG.getConstant(0, DL, MVT::i32); 1241 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 1242 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero); 1243 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One); 1244 1245 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 1246 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 1247 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 1248 1249 if (isNullConstant(Hi)) 1250 return Lo; 1251 1252 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 1253 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 1254 1255 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 1256 } 1257 1258 // Custom-lower extensions from mask vectors by using a vselect either with 1 1259 // for zero/any-extension or -1 for sign-extension: 1260 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 1261 // Note that any-extension is lowered identically to zero-extension. 1262 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 1263 int64_t ExtTrueVal) const { 1264 SDLoc DL(Op); 1265 EVT VecVT = Op.getValueType(); 1266 SDValue Src = Op.getOperand(0); 1267 // Only custom-lower extensions from mask types 1268 if (!Src.getValueType().isVector() || 1269 Src.getValueType().getVectorElementType() != MVT::i1) 1270 return Op; 1271 1272 // Be careful not to introduce illegal scalar types at this stage, and be 1273 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1274 // illegal and must be expanded. Since we know that the constants are 1275 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1276 bool IsRV32E64 = 1277 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1278 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1279 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, Subtarget.getXLenVT()); 1280 1281 if (!IsRV32E64) { 1282 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1283 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 1284 } else { 1285 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1286 SplatTrueVal = 1287 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 1288 } 1289 1290 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 1291 } 1292 1293 // Custom-lower truncations from vectors to mask vectors by using a mask and a 1294 // setcc operation: 1295 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 1296 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 1297 SelectionDAG &DAG) const { 1298 SDLoc DL(Op); 1299 EVT MaskVT = Op.getValueType(); 1300 // Only expect to custom-lower truncations to mask types 1301 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 1302 "Unexpected type for vector mask lowering"); 1303 SDValue Src = Op.getOperand(0); 1304 EVT VecVT = Src.getValueType(); 1305 1306 // Be careful not to introduce illegal scalar types at this stage, and be 1307 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1308 // illegal and must be expanded. Since we know that the constants are 1309 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1310 bool IsRV32E64 = 1311 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1312 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 1313 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1314 1315 if (!IsRV32E64) { 1316 SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne); 1317 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1318 } else { 1319 SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne); 1320 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1321 } 1322 1323 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 1324 1325 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 1326 } 1327 1328 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 1329 SelectionDAG &DAG) const { 1330 SDLoc DL(Op); 1331 EVT VecVT = Op.getValueType(); 1332 SDValue Vec = Op.getOperand(0); 1333 SDValue Val = Op.getOperand(1); 1334 SDValue Idx = Op.getOperand(2); 1335 1336 // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is 1337 // first slid down into position, the value is inserted into the first 1338 // position, and the vector is slid back up. We do this to simplify patterns. 1339 // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx), 1340 if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) { 1341 if (isNullConstant(Idx)) 1342 return Op; 1343 SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 1344 DAG.getUNDEF(VecVT), Vec, Idx); 1345 SDValue InsertElt0 = 1346 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val, 1347 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1348 1349 return DAG.getNode(RISCVISD::VSLIDEUP, DL, VecVT, Vec, InsertElt0, Idx); 1350 } 1351 1352 // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type 1353 // is illegal (currently only vXi64 RV32). 1354 // Since there is no easy way of getting a single element into a vector when 1355 // XLEN<SEW, we lower the operation to the following sequence: 1356 // splat vVal, rVal 1357 // vid.v vVid 1358 // vmseq.vx mMask, vVid, rIdx 1359 // vmerge.vvm vDest, vSrc, vVal, mMask 1360 // This essentially merges the original vector with the inserted element by 1361 // using a mask whose only set bit is that corresponding to the insert 1362 // index. 1363 SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val); 1364 SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx); 1365 1366 SDValue VID = DAG.getNode(RISCVISD::VID, DL, VecVT); 1367 auto SetCCVT = 1368 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT); 1369 SDValue Mask = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ); 1370 1371 return DAG.getNode(ISD::VSELECT, DL, VecVT, Mask, SplattedVal, Vec); 1372 } 1373 1374 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 1375 // extract the first element: (extractelt (slidedown vec, idx), 0). This is 1376 // done to maintain partity with the legalization of RV32 vXi64 legalization. 1377 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 1378 SelectionDAG &DAG) const { 1379 SDLoc DL(Op); 1380 SDValue Idx = Op.getOperand(1); 1381 if (isNullConstant(Idx)) 1382 return Op; 1383 1384 SDValue Vec = Op.getOperand(0); 1385 EVT EltVT = Op.getValueType(); 1386 EVT VecVT = Vec.getValueType(); 1387 SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 1388 DAG.getUNDEF(VecVT), Vec, Idx); 1389 1390 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Slidedown, 1391 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1392 } 1393 1394 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 1395 SelectionDAG &DAG) const { 1396 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1397 SDLoc DL(Op); 1398 1399 if (Subtarget.hasStdExtV()) { 1400 // Some RVV intrinsics may claim that they want an integer operand to be 1401 // extended. 1402 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1403 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1404 if (II->ExtendedOperand) { 1405 assert(II->ExtendedOperand < Op.getNumOperands()); 1406 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1407 SDValue &ScalarOp = Operands[II->ExtendedOperand]; 1408 EVT OpVT = ScalarOp.getValueType(); 1409 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1410 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1411 // If the operand is a constant, sign extend to increase our chances 1412 // of being able to use a .vi instruction. ANY_EXTEND would become a 1413 // a zero extend and the simm5 check in isel would fail. 1414 // FIXME: Should we ignore the upper bits in isel instead? 1415 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1416 : ISD::ANY_EXTEND; 1417 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1418 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), 1419 Operands); 1420 } 1421 } 1422 } 1423 } 1424 1425 switch (IntNo) { 1426 default: 1427 return SDValue(); // Don't custom lower most intrinsics. 1428 case Intrinsic::thread_pointer: { 1429 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1430 return DAG.getRegister(RISCV::X4, PtrVT); 1431 } 1432 case Intrinsic::riscv_vmv_x_s: 1433 assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!"); 1434 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 1435 Op.getOperand(1)); 1436 } 1437 } 1438 1439 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 1440 SelectionDAG &DAG) const { 1441 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1442 SDLoc DL(Op); 1443 1444 if (Subtarget.hasStdExtV()) { 1445 // Some RVV intrinsics may claim that they want an integer operand to be 1446 // extended. 1447 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1448 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1449 if (II->ExtendedOperand) { 1450 // The operands start from the second argument in INTRINSIC_W_CHAIN. 1451 unsigned ExtendOp = II->ExtendedOperand + 1; 1452 assert(ExtendOp < Op.getNumOperands()); 1453 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1454 SDValue &ScalarOp = Operands[ExtendOp]; 1455 EVT OpVT = ScalarOp.getValueType(); 1456 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1457 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1458 // If the operand is a constant, sign extend to increase our chances 1459 // of being able to use a .vi instruction. ANY_EXTEND would become a 1460 // a zero extend and the simm5 check in isel would fail. 1461 // FIXME: Should we ignore the upper bits in isel instead? 1462 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1463 : ISD::ANY_EXTEND; 1464 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1465 return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(), 1466 Operands); 1467 } 1468 } 1469 } 1470 } 1471 1472 unsigned NF = 1; 1473 switch (IntNo) { 1474 default: 1475 return SDValue(); // Don't custom lower most intrinsics. 1476 case Intrinsic::riscv_vleff: { 1477 SDLoc DL(Op); 1478 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); 1479 SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0), 1480 Op.getOperand(2), Op.getOperand(3)); 1481 VTs = DAG.getVTList(Op->getValueType(1), MVT::Other); 1482 SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2)); 1483 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); 1484 } 1485 case Intrinsic::riscv_vleff_mask: { 1486 SDLoc DL(Op); 1487 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); 1488 SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0), 1489 Op.getOperand(2), Op.getOperand(3), 1490 Op.getOperand(4), Op.getOperand(5)); 1491 VTs = DAG.getVTList(Op->getValueType(1), MVT::Other); 1492 SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2)); 1493 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); 1494 } 1495 case Intrinsic::riscv_vlseg8ff: 1496 NF++; 1497 LLVM_FALLTHROUGH; 1498 case Intrinsic::riscv_vlseg7ff: 1499 NF++; 1500 LLVM_FALLTHROUGH; 1501 case Intrinsic::riscv_vlseg6ff: 1502 NF++; 1503 LLVM_FALLTHROUGH; 1504 case Intrinsic::riscv_vlseg5ff: 1505 NF++; 1506 LLVM_FALLTHROUGH; 1507 case Intrinsic::riscv_vlseg4ff: 1508 NF++; 1509 LLVM_FALLTHROUGH; 1510 case Intrinsic::riscv_vlseg3ff: 1511 NF++; 1512 LLVM_FALLTHROUGH; 1513 case Intrinsic::riscv_vlseg2ff: { 1514 NF++; 1515 SDLoc DL(Op); 1516 SmallVector<EVT, 8> EVTs(NF, Op.getValueType()); 1517 EVTs.push_back(MVT::Other); 1518 EVTs.push_back(MVT::Glue); 1519 SDVTList VTs = DAG.getVTList(EVTs); 1520 SDValue Load = 1521 DAG.getNode(RISCVISD::VLSEGFF, DL, VTs, Op.getOperand(0), 1522 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 1523 VTs = DAG.getVTList(Op->getValueType(NF), MVT::Other); 1524 SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, 1525 /*Glue*/ Load.getValue(NF + 1)); 1526 SmallVector<SDValue, 8> Results; 1527 for (unsigned i = 0; i < NF; ++i) 1528 Results.push_back(Load.getValue(i)); 1529 Results.push_back(ReadVL); 1530 Results.push_back(Load.getValue(NF)); // Chain. 1531 return DAG.getMergeValues(Results, DL); 1532 } 1533 case Intrinsic::riscv_vlseg8ff_mask: 1534 NF++; 1535 LLVM_FALLTHROUGH; 1536 case Intrinsic::riscv_vlseg7ff_mask: 1537 NF++; 1538 LLVM_FALLTHROUGH; 1539 case Intrinsic::riscv_vlseg6ff_mask: 1540 NF++; 1541 LLVM_FALLTHROUGH; 1542 case Intrinsic::riscv_vlseg5ff_mask: 1543 NF++; 1544 LLVM_FALLTHROUGH; 1545 case Intrinsic::riscv_vlseg4ff_mask: 1546 NF++; 1547 LLVM_FALLTHROUGH; 1548 case Intrinsic::riscv_vlseg3ff_mask: 1549 NF++; 1550 LLVM_FALLTHROUGH; 1551 case Intrinsic::riscv_vlseg2ff_mask: { 1552 NF++; 1553 SDLoc DL(Op); 1554 SmallVector<EVT, 8> EVTs(NF, Op.getValueType()); 1555 EVTs.push_back(MVT::Other); 1556 EVTs.push_back(MVT::Glue); 1557 SDVTList VTs = DAG.getVTList(EVTs); 1558 SmallVector<SDValue, 13> LoadOps; 1559 LoadOps.push_back(Op.getOperand(0)); // Chain. 1560 LoadOps.push_back(Op.getOperand(1)); // Intrinsic ID. 1561 for (unsigned i = 0; i < NF; ++i) 1562 LoadOps.push_back(Op.getOperand(2 + i)); // MaskedOff. 1563 LoadOps.push_back(Op.getOperand(2 + NF)); // Base. 1564 LoadOps.push_back(Op.getOperand(3 + NF)); // Mask. 1565 LoadOps.push_back(Op.getOperand(4 + NF)); // VL. 1566 SDValue Load = DAG.getNode(RISCVISD::VLSEGFF_MASK, DL, VTs, LoadOps); 1567 VTs = DAG.getVTList(Op->getValueType(NF), MVT::Other); 1568 SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, 1569 /*Glue*/ Load.getValue(NF + 1)); 1570 SmallVector<SDValue, 8> Results; 1571 for (unsigned i = 0; i < NF; ++i) 1572 Results.push_back(Load.getValue(i)); 1573 Results.push_back(ReadVL); 1574 Results.push_back(Load.getValue(NF)); // Chain. 1575 return DAG.getMergeValues(Results, DL); 1576 } 1577 } 1578 } 1579 1580 // Returns the opcode of the target-specific SDNode that implements the 32-bit 1581 // form of the given Opcode. 1582 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 1583 switch (Opcode) { 1584 default: 1585 llvm_unreachable("Unexpected opcode"); 1586 case ISD::SHL: 1587 return RISCVISD::SLLW; 1588 case ISD::SRA: 1589 return RISCVISD::SRAW; 1590 case ISD::SRL: 1591 return RISCVISD::SRLW; 1592 case ISD::SDIV: 1593 return RISCVISD::DIVW; 1594 case ISD::UDIV: 1595 return RISCVISD::DIVUW; 1596 case ISD::UREM: 1597 return RISCVISD::REMUW; 1598 case ISD::ROTL: 1599 return RISCVISD::ROLW; 1600 case ISD::ROTR: 1601 return RISCVISD::RORW; 1602 case RISCVISD::GREVI: 1603 return RISCVISD::GREVIW; 1604 case RISCVISD::GORCI: 1605 return RISCVISD::GORCIW; 1606 } 1607 } 1608 1609 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 1610 // Because i32 isn't a legal type for RV64, these operations would otherwise 1611 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 1612 // later one because the fact the operation was originally of type i32 is 1613 // lost. 1614 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 1615 unsigned ExtOpc = ISD::ANY_EXTEND) { 1616 SDLoc DL(N); 1617 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 1618 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 1619 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 1620 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 1621 // ReplaceNodeResults requires we maintain the same type for the return value. 1622 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 1623 } 1624 1625 // Converts the given 32-bit operation to a i64 operation with signed extension 1626 // semantic to reduce the signed extension instructions. 1627 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 1628 SDLoc DL(N); 1629 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1630 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 1631 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 1632 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 1633 DAG.getValueType(MVT::i32)); 1634 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 1635 } 1636 1637 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 1638 SmallVectorImpl<SDValue> &Results, 1639 SelectionDAG &DAG) const { 1640 SDLoc DL(N); 1641 switch (N->getOpcode()) { 1642 default: 1643 llvm_unreachable("Don't know how to custom type legalize this operation!"); 1644 case ISD::STRICT_FP_TO_SINT: 1645 case ISD::STRICT_FP_TO_UINT: 1646 case ISD::FP_TO_SINT: 1647 case ISD::FP_TO_UINT: { 1648 bool IsStrict = N->isStrictFPOpcode(); 1649 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1650 "Unexpected custom legalisation"); 1651 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 1652 // If the FP type needs to be softened, emit a library call using the 'si' 1653 // version. If we left it to default legalization we'd end up with 'di'. If 1654 // the FP type doesn't need to be softened just let generic type 1655 // legalization promote the result type. 1656 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 1657 TargetLowering::TypeSoftenFloat) 1658 return; 1659 RTLIB::Libcall LC; 1660 if (N->getOpcode() == ISD::FP_TO_SINT || 1661 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 1662 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 1663 else 1664 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 1665 MakeLibCallOptions CallOptions; 1666 EVT OpVT = Op0.getValueType(); 1667 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 1668 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 1669 SDValue Result; 1670 std::tie(Result, Chain) = 1671 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 1672 Results.push_back(Result); 1673 if (IsStrict) 1674 Results.push_back(Chain); 1675 break; 1676 } 1677 case ISD::READCYCLECOUNTER: { 1678 assert(!Subtarget.is64Bit() && 1679 "READCYCLECOUNTER only has custom type legalization on riscv32"); 1680 1681 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 1682 SDValue RCW = 1683 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 1684 1685 Results.push_back( 1686 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 1687 Results.push_back(RCW.getValue(2)); 1688 break; 1689 } 1690 case ISD::ADD: 1691 case ISD::SUB: 1692 case ISD::MUL: 1693 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1694 "Unexpected custom legalisation"); 1695 if (N->getOperand(1).getOpcode() == ISD::Constant) 1696 return; 1697 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 1698 break; 1699 case ISD::SHL: 1700 case ISD::SRA: 1701 case ISD::SRL: 1702 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1703 "Unexpected custom legalisation"); 1704 if (N->getOperand(1).getOpcode() == ISD::Constant) 1705 return; 1706 Results.push_back(customLegalizeToWOp(N, DAG)); 1707 break; 1708 case ISD::ROTL: 1709 case ISD::ROTR: 1710 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1711 "Unexpected custom legalisation"); 1712 Results.push_back(customLegalizeToWOp(N, DAG)); 1713 break; 1714 case ISD::SDIV: 1715 case ISD::UDIV: 1716 case ISD::UREM: { 1717 MVT VT = N->getSimpleValueType(0); 1718 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 1719 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 1720 "Unexpected custom legalisation"); 1721 if (N->getOperand(0).getOpcode() == ISD::Constant || 1722 N->getOperand(1).getOpcode() == ISD::Constant) 1723 return; 1724 1725 // If the input is i32, use ANY_EXTEND since the W instructions don't read 1726 // the upper 32 bits. For other types we need to sign or zero extend 1727 // based on the opcode. 1728 unsigned ExtOpc = ISD::ANY_EXTEND; 1729 if (VT != MVT::i32) 1730 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 1731 : ISD::ZERO_EXTEND; 1732 1733 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 1734 break; 1735 } 1736 case ISD::BITCAST: { 1737 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1738 Subtarget.hasStdExtF()) || 1739 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 1740 "Unexpected custom legalisation"); 1741 SDValue Op0 = N->getOperand(0); 1742 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 1743 if (Op0.getValueType() != MVT::f16) 1744 return; 1745 SDValue FPConv = 1746 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 1747 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 1748 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1749 Subtarget.hasStdExtF()) { 1750 if (Op0.getValueType() != MVT::f32) 1751 return; 1752 SDValue FPConv = 1753 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 1754 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 1755 } 1756 break; 1757 } 1758 case RISCVISD::GREVI: 1759 case RISCVISD::GORCI: { 1760 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1761 "Unexpected custom legalisation"); 1762 // This is similar to customLegalizeToWOp, except that we pass the second 1763 // operand (a TargetConstant) straight through: it is already of type 1764 // XLenVT. 1765 SDLoc DL(N); 1766 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 1767 SDValue NewOp0 = 1768 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1769 SDValue NewRes = 1770 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 1771 // ReplaceNodeResults requires we maintain the same type for the return 1772 // value. 1773 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 1774 break; 1775 } 1776 case ISD::BSWAP: 1777 case ISD::BITREVERSE: { 1778 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1779 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 1780 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 1781 N->getOperand(0)); 1782 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 1783 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 1784 DAG.getTargetConstant(Imm, DL, 1785 Subtarget.getXLenVT())); 1786 // ReplaceNodeResults requires we maintain the same type for the return 1787 // value. 1788 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 1789 break; 1790 } 1791 case ISD::FSHL: 1792 case ISD::FSHR: { 1793 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1794 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 1795 SDValue NewOp0 = 1796 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1797 SDValue NewOp1 = 1798 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 1799 SDValue NewOp2 = 1800 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 1801 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 1802 // Mask the shift amount to 5 bits. 1803 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 1804 DAG.getConstant(0x1f, DL, MVT::i64)); 1805 unsigned Opc = 1806 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 1807 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 1808 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 1809 break; 1810 } 1811 case ISD::EXTRACT_VECTOR_ELT: { 1812 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 1813 // type is illegal (currently only vXi64 RV32). 1814 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 1815 // transferred to the destination register. We issue two of these from the 1816 // upper- and lower- halves of the SEW-bit vector element, slid down to the 1817 // first element. 1818 SDLoc DL(N); 1819 SDValue Vec = N->getOperand(0); 1820 SDValue Idx = N->getOperand(1); 1821 EVT VecVT = Vec.getValueType(); 1822 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 1823 VecVT.getVectorElementType() == MVT::i64 && 1824 "Unexpected EXTRACT_VECTOR_ELT legalization"); 1825 1826 SDValue Slidedown = Vec; 1827 // Unless the index is known to be 0, we must slide the vector down to get 1828 // the desired element into index 0. 1829 if (!isNullConstant(Idx)) 1830 Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 1831 DAG.getUNDEF(VecVT), Vec, Idx); 1832 1833 MVT XLenVT = Subtarget.getXLenVT(); 1834 // Extract the lower XLEN bits of the correct vector element. 1835 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx); 1836 1837 // To extract the upper XLEN bits of the vector element, shift the first 1838 // element right by 32 bits and re-extract the lower XLEN bits. 1839 SDValue ThirtyTwoV = 1840 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1841 DAG.getConstant(32, DL, Subtarget.getXLenVT())); 1842 SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV); 1843 1844 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx); 1845 1846 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 1847 break; 1848 } 1849 case ISD::INTRINSIC_WO_CHAIN: { 1850 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 1851 switch (IntNo) { 1852 default: 1853 llvm_unreachable( 1854 "Don't know how to custom type legalize this intrinsic!"); 1855 case Intrinsic::riscv_vmv_x_s: { 1856 EVT VT = N->getValueType(0); 1857 assert((VT == MVT::i8 || VT == MVT::i16 || 1858 (Subtarget.is64Bit() && VT == MVT::i32)) && 1859 "Unexpected custom legalisation!"); 1860 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 1861 Subtarget.getXLenVT(), N->getOperand(1)); 1862 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 1863 break; 1864 } 1865 } 1866 break; 1867 } 1868 } 1869 } 1870 1871 // A structure to hold one of the bit-manipulation patterns below. Together, a 1872 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 1873 // (or (and (shl x, 1), 0xAAAAAAAA), 1874 // (and (srl x, 1), 0x55555555)) 1875 struct RISCVBitmanipPat { 1876 SDValue Op; 1877 unsigned ShAmt; 1878 bool IsSHL; 1879 1880 bool formsPairWith(const RISCVBitmanipPat &Other) const { 1881 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 1882 } 1883 }; 1884 1885 // Matches any of the following bit-manipulation patterns: 1886 // (and (shl x, 1), (0x55555555 << 1)) 1887 // (and (srl x, 1), 0x55555555) 1888 // (shl (and x, 0x55555555), 1) 1889 // (srl (and x, (0x55555555 << 1)), 1) 1890 // where the shift amount and mask may vary thus: 1891 // [1] = 0x55555555 / 0xAAAAAAAA 1892 // [2] = 0x33333333 / 0xCCCCCCCC 1893 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 1894 // [8] = 0x00FF00FF / 0xFF00FF00 1895 // [16] = 0x0000FFFF / 0xFFFFFFFF 1896 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 1897 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) { 1898 Optional<uint64_t> Mask; 1899 // Optionally consume a mask around the shift operation. 1900 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 1901 Mask = Op.getConstantOperandVal(1); 1902 Op = Op.getOperand(0); 1903 } 1904 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 1905 return None; 1906 bool IsSHL = Op.getOpcode() == ISD::SHL; 1907 1908 if (!isa<ConstantSDNode>(Op.getOperand(1))) 1909 return None; 1910 auto ShAmt = Op.getConstantOperandVal(1); 1911 1912 if (!isPowerOf2_64(ShAmt)) 1913 return None; 1914 1915 // These are the unshifted masks which we use to match bit-manipulation 1916 // patterns. They may be shifted left in certain circumstances. 1917 static const uint64_t BitmanipMasks[] = { 1918 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 1919 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL, 1920 }; 1921 1922 unsigned MaskIdx = Log2_64(ShAmt); 1923 if (MaskIdx >= array_lengthof(BitmanipMasks)) 1924 return None; 1925 1926 auto Src = Op.getOperand(0); 1927 1928 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 1929 auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 1930 1931 // The expected mask is shifted left when the AND is found around SHL 1932 // patterns. 1933 // ((x >> 1) & 0x55555555) 1934 // ((x << 1) & 0xAAAAAAAA) 1935 bool SHLExpMask = IsSHL; 1936 1937 if (!Mask) { 1938 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 1939 // the mask is all ones: consume that now. 1940 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 1941 Mask = Src.getConstantOperandVal(1); 1942 Src = Src.getOperand(0); 1943 // The expected mask is now in fact shifted left for SRL, so reverse the 1944 // decision. 1945 // ((x & 0xAAAAAAAA) >> 1) 1946 // ((x & 0x55555555) << 1) 1947 SHLExpMask = !SHLExpMask; 1948 } else { 1949 // Use a default shifted mask of all-ones if there's no AND, truncated 1950 // down to the expected width. This simplifies the logic later on. 1951 Mask = maskTrailingOnes<uint64_t>(Width); 1952 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 1953 } 1954 } 1955 1956 if (SHLExpMask) 1957 ExpMask <<= ShAmt; 1958 1959 if (Mask != ExpMask) 1960 return None; 1961 1962 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 1963 } 1964 1965 // Match the following pattern as a GREVI(W) operation 1966 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 1967 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 1968 const RISCVSubtarget &Subtarget) { 1969 EVT VT = Op.getValueType(); 1970 1971 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 1972 auto LHS = matchRISCVBitmanipPat(Op.getOperand(0)); 1973 auto RHS = matchRISCVBitmanipPat(Op.getOperand(1)); 1974 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 1975 SDLoc DL(Op); 1976 return DAG.getNode( 1977 RISCVISD::GREVI, DL, VT, LHS->Op, 1978 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 1979 } 1980 } 1981 return SDValue(); 1982 } 1983 1984 // Matches any the following pattern as a GORCI(W) operation 1985 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 1986 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 1987 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 1988 // Note that with the variant of 3., 1989 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 1990 // the inner pattern will first be matched as GREVI and then the outer 1991 // pattern will be matched to GORC via the first rule above. 1992 // 4. (or (rotl/rotr x, bitwidth/2), x) 1993 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 1994 const RISCVSubtarget &Subtarget) { 1995 EVT VT = Op.getValueType(); 1996 1997 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 1998 SDLoc DL(Op); 1999 SDValue Op0 = Op.getOperand(0); 2000 SDValue Op1 = Op.getOperand(1); 2001 2002 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 2003 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 2004 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 2005 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 2006 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 2007 if ((Reverse.getOpcode() == ISD::ROTL || 2008 Reverse.getOpcode() == ISD::ROTR) && 2009 Reverse.getOperand(0) == X && 2010 isa<ConstantSDNode>(Reverse.getOperand(1))) { 2011 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 2012 if (RotAmt == (VT.getSizeInBits() / 2)) 2013 return DAG.getNode( 2014 RISCVISD::GORCI, DL, VT, X, 2015 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 2016 } 2017 return SDValue(); 2018 }; 2019 2020 // Check for either commutable permutation of (or (GREVI x, shamt), x) 2021 if (SDValue V = MatchOROfReverse(Op0, Op1)) 2022 return V; 2023 if (SDValue V = MatchOROfReverse(Op1, Op0)) 2024 return V; 2025 2026 // OR is commutable so canonicalize its OR operand to the left 2027 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 2028 std::swap(Op0, Op1); 2029 if (Op0.getOpcode() != ISD::OR) 2030 return SDValue(); 2031 SDValue OrOp0 = Op0.getOperand(0); 2032 SDValue OrOp1 = Op0.getOperand(1); 2033 auto LHS = matchRISCVBitmanipPat(OrOp0); 2034 // OR is commutable so swap the operands and try again: x might have been 2035 // on the left 2036 if (!LHS) { 2037 std::swap(OrOp0, OrOp1); 2038 LHS = matchRISCVBitmanipPat(OrOp0); 2039 } 2040 auto RHS = matchRISCVBitmanipPat(Op1); 2041 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 2042 return DAG.getNode( 2043 RISCVISD::GORCI, DL, VT, LHS->Op, 2044 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 2045 } 2046 } 2047 return SDValue(); 2048 } 2049 2050 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 2051 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 2052 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 2053 // not undo itself, but they are redundant. 2054 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 2055 unsigned ShAmt1 = N->getConstantOperandVal(1); 2056 SDValue Src = N->getOperand(0); 2057 2058 if (Src.getOpcode() != N->getOpcode()) 2059 return SDValue(); 2060 2061 unsigned ShAmt2 = Src.getConstantOperandVal(1); 2062 Src = Src.getOperand(0); 2063 2064 unsigned CombinedShAmt; 2065 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 2066 CombinedShAmt = ShAmt1 | ShAmt2; 2067 else 2068 CombinedShAmt = ShAmt1 ^ ShAmt2; 2069 2070 if (CombinedShAmt == 0) 2071 return Src; 2072 2073 SDLoc DL(N); 2074 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 2075 DAG.getTargetConstant(CombinedShAmt, DL, 2076 N->getOperand(1).getValueType())); 2077 } 2078 2079 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 2080 DAGCombinerInfo &DCI) const { 2081 SelectionDAG &DAG = DCI.DAG; 2082 2083 switch (N->getOpcode()) { 2084 default: 2085 break; 2086 case RISCVISD::SplitF64: { 2087 SDValue Op0 = N->getOperand(0); 2088 // If the input to SplitF64 is just BuildPairF64 then the operation is 2089 // redundant. Instead, use BuildPairF64's operands directly. 2090 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 2091 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 2092 2093 SDLoc DL(N); 2094 2095 // It's cheaper to materialise two 32-bit integers than to load a double 2096 // from the constant pool and transfer it to integer registers through the 2097 // stack. 2098 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 2099 APInt V = C->getValueAPF().bitcastToAPInt(); 2100 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 2101 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 2102 return DCI.CombineTo(N, Lo, Hi); 2103 } 2104 2105 // This is a target-specific version of a DAGCombine performed in 2106 // DAGCombiner::visitBITCAST. It performs the equivalent of: 2107 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 2108 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 2109 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 2110 !Op0.getNode()->hasOneUse()) 2111 break; 2112 SDValue NewSplitF64 = 2113 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 2114 Op0.getOperand(0)); 2115 SDValue Lo = NewSplitF64.getValue(0); 2116 SDValue Hi = NewSplitF64.getValue(1); 2117 APInt SignBit = APInt::getSignMask(32); 2118 if (Op0.getOpcode() == ISD::FNEG) { 2119 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 2120 DAG.getConstant(SignBit, DL, MVT::i32)); 2121 return DCI.CombineTo(N, Lo, NewHi); 2122 } 2123 assert(Op0.getOpcode() == ISD::FABS); 2124 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 2125 DAG.getConstant(~SignBit, DL, MVT::i32)); 2126 return DCI.CombineTo(N, Lo, NewHi); 2127 } 2128 case RISCVISD::SLLW: 2129 case RISCVISD::SRAW: 2130 case RISCVISD::SRLW: 2131 case RISCVISD::ROLW: 2132 case RISCVISD::RORW: { 2133 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 2134 SDValue LHS = N->getOperand(0); 2135 SDValue RHS = N->getOperand(1); 2136 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 2137 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 2138 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 2139 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 2140 if (N->getOpcode() != ISD::DELETED_NODE) 2141 DCI.AddToWorklist(N); 2142 return SDValue(N, 0); 2143 } 2144 break; 2145 } 2146 case RISCVISD::FSLW: 2147 case RISCVISD::FSRW: { 2148 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 2149 // read. 2150 SDValue Op0 = N->getOperand(0); 2151 SDValue Op1 = N->getOperand(1); 2152 SDValue ShAmt = N->getOperand(2); 2153 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 2154 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 2155 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 2156 SimplifyDemandedBits(Op1, OpMask, DCI) || 2157 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 2158 if (N->getOpcode() != ISD::DELETED_NODE) 2159 DCI.AddToWorklist(N); 2160 return SDValue(N, 0); 2161 } 2162 break; 2163 } 2164 case RISCVISD::GREVIW: 2165 case RISCVISD::GORCIW: { 2166 // Only the lower 32 bits of the first operand are read 2167 SDValue Op0 = N->getOperand(0); 2168 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 2169 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 2170 if (N->getOpcode() != ISD::DELETED_NODE) 2171 DCI.AddToWorklist(N); 2172 return SDValue(N, 0); 2173 } 2174 2175 return combineGREVI_GORCI(N, DCI.DAG); 2176 } 2177 case RISCVISD::FMV_X_ANYEXTW_RV64: { 2178 SDLoc DL(N); 2179 SDValue Op0 = N->getOperand(0); 2180 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 2181 // conversion is unnecessary and can be replaced with an ANY_EXTEND 2182 // of the FMV_W_X_RV64 operand. 2183 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 2184 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 2185 "Unexpected value type!"); 2186 return Op0.getOperand(0); 2187 } 2188 2189 // This is a target-specific version of a DAGCombine performed in 2190 // DAGCombiner::visitBITCAST. It performs the equivalent of: 2191 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 2192 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 2193 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 2194 !Op0.getNode()->hasOneUse()) 2195 break; 2196 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 2197 Op0.getOperand(0)); 2198 APInt SignBit = APInt::getSignMask(32).sext(64); 2199 if (Op0.getOpcode() == ISD::FNEG) 2200 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 2201 DAG.getConstant(SignBit, DL, MVT::i64)); 2202 2203 assert(Op0.getOpcode() == ISD::FABS); 2204 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 2205 DAG.getConstant(~SignBit, DL, MVT::i64)); 2206 } 2207 case RISCVISD::GREVI: 2208 case RISCVISD::GORCI: 2209 return combineGREVI_GORCI(N, DCI.DAG); 2210 case ISD::OR: 2211 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 2212 return GREV; 2213 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 2214 return GORC; 2215 break; 2216 case RISCVISD::SELECT_CC: { 2217 // Transform 2218 // (select_cc (xor X, 1), 0, setne, trueV, falseV) -> 2219 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 2220 // This can occur when legalizing some floating point comparisons. 2221 SDValue LHS = N->getOperand(0); 2222 SDValue RHS = N->getOperand(1); 2223 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 2224 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 2225 if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) && 2226 LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) && 2227 DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) { 2228 SDLoc DL(N); 2229 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 2230 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 2231 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 2232 {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3), 2233 N->getOperand(4)}); 2234 } 2235 break; 2236 } 2237 case ISD::SETCC: { 2238 // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1. 2239 // Comparing with 0 may allow us to fold into bnez/beqz. 2240 SDValue LHS = N->getOperand(0); 2241 SDValue RHS = N->getOperand(1); 2242 if (LHS.getValueType().isScalableVector()) 2243 break; 2244 auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2245 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 2246 if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) && 2247 DAG.MaskedValueIsZero(LHS, Mask)) { 2248 SDLoc DL(N); 2249 SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType()); 2250 CC = ISD::getSetCCInverse(CC, LHS.getValueType()); 2251 return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC); 2252 } 2253 break; 2254 } 2255 } 2256 2257 return SDValue(); 2258 } 2259 2260 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 2261 const SDNode *N, CombineLevel Level) const { 2262 // The following folds are only desirable if `(OP _, c1 << c2)` can be 2263 // materialised in fewer instructions than `(OP _, c1)`: 2264 // 2265 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 2266 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 2267 SDValue N0 = N->getOperand(0); 2268 EVT Ty = N0.getValueType(); 2269 if (Ty.isScalarInteger() && 2270 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 2271 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 2272 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2273 if (C1 && C2) { 2274 const APInt &C1Int = C1->getAPIntValue(); 2275 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 2276 2277 // We can materialise `c1 << c2` into an add immediate, so it's "free", 2278 // and the combine should happen, to potentially allow further combines 2279 // later. 2280 if (ShiftedC1Int.getMinSignedBits() <= 64 && 2281 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 2282 return true; 2283 2284 // We can materialise `c1` in an add immediate, so it's "free", and the 2285 // combine should be prevented. 2286 if (C1Int.getMinSignedBits() <= 64 && 2287 isLegalAddImmediate(C1Int.getSExtValue())) 2288 return false; 2289 2290 // Neither constant will fit into an immediate, so find materialisation 2291 // costs. 2292 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 2293 Subtarget.is64Bit()); 2294 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 2295 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 2296 2297 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 2298 // combine should be prevented. 2299 if (C1Cost < ShiftedC1Cost) 2300 return false; 2301 } 2302 } 2303 return true; 2304 } 2305 2306 bool RISCVTargetLowering::targetShrinkDemandedConstant( 2307 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 2308 TargetLoweringOpt &TLO) const { 2309 // Delay this optimization as late as possible. 2310 if (!TLO.LegalOps) 2311 return false; 2312 2313 EVT VT = Op.getValueType(); 2314 if (VT.isVector()) 2315 return false; 2316 2317 // Only handle AND for now. 2318 if (Op.getOpcode() != ISD::AND) 2319 return false; 2320 2321 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 2322 if (!C) 2323 return false; 2324 2325 const APInt &Mask = C->getAPIntValue(); 2326 2327 // Clear all non-demanded bits initially. 2328 APInt ShrunkMask = Mask & DemandedBits; 2329 2330 // If the shrunk mask fits in sign extended 12 bits, let the target 2331 // independent code apply it. 2332 if (ShrunkMask.isSignedIntN(12)) 2333 return false; 2334 2335 // Try to make a smaller immediate by setting undemanded bits. 2336 2337 // We need to be able to make a negative number through a combination of mask 2338 // and undemanded bits. 2339 APInt ExpandedMask = Mask | ~DemandedBits; 2340 if (!ExpandedMask.isNegative()) 2341 return false; 2342 2343 // What is the fewest number of bits we need to represent the negative number. 2344 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 2345 2346 // Try to make a 12 bit negative immediate. If that fails try to make a 32 2347 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 2348 APInt NewMask = ShrunkMask; 2349 if (MinSignedBits <= 12) 2350 NewMask.setBitsFrom(11); 2351 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 2352 NewMask.setBitsFrom(31); 2353 else 2354 return false; 2355 2356 // Sanity check that our new mask is a subset of the demanded mask. 2357 assert(NewMask.isSubsetOf(ExpandedMask)); 2358 2359 // If we aren't changing the mask, just return true to keep it and prevent 2360 // the caller from optimizing. 2361 if (NewMask == Mask) 2362 return true; 2363 2364 // Replace the constant with the new mask. 2365 SDLoc DL(Op); 2366 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 2367 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 2368 return TLO.CombineTo(Op, NewOp); 2369 } 2370 2371 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 2372 KnownBits &Known, 2373 const APInt &DemandedElts, 2374 const SelectionDAG &DAG, 2375 unsigned Depth) const { 2376 unsigned BitWidth = Known.getBitWidth(); 2377 unsigned Opc = Op.getOpcode(); 2378 assert((Opc >= ISD::BUILTIN_OP_END || 2379 Opc == ISD::INTRINSIC_WO_CHAIN || 2380 Opc == ISD::INTRINSIC_W_CHAIN || 2381 Opc == ISD::INTRINSIC_VOID) && 2382 "Should use MaskedValueIsZero if you don't know whether Op" 2383 " is a target node!"); 2384 2385 Known.resetAll(); 2386 switch (Opc) { 2387 default: break; 2388 case RISCVISD::REMUW: { 2389 KnownBits Known2; 2390 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 2391 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 2392 // We only care about the lower 32 bits. 2393 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 2394 // Restore the original width by sign extending. 2395 Known = Known.sext(BitWidth); 2396 break; 2397 } 2398 case RISCVISD::DIVUW: { 2399 KnownBits Known2; 2400 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 2401 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 2402 // We only care about the lower 32 bits. 2403 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 2404 // Restore the original width by sign extending. 2405 Known = Known.sext(BitWidth); 2406 break; 2407 } 2408 case RISCVISD::READ_VLENB: 2409 // We assume VLENB is at least 8 bytes. 2410 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 2411 Known.Zero.setLowBits(3); 2412 break; 2413 } 2414 } 2415 2416 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 2417 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 2418 unsigned Depth) const { 2419 switch (Op.getOpcode()) { 2420 default: 2421 break; 2422 case RISCVISD::SLLW: 2423 case RISCVISD::SRAW: 2424 case RISCVISD::SRLW: 2425 case RISCVISD::DIVW: 2426 case RISCVISD::DIVUW: 2427 case RISCVISD::REMUW: 2428 case RISCVISD::ROLW: 2429 case RISCVISD::RORW: 2430 case RISCVISD::GREVIW: 2431 case RISCVISD::GORCIW: 2432 case RISCVISD::FSLW: 2433 case RISCVISD::FSRW: 2434 // TODO: As the result is sign-extended, this is conservatively correct. A 2435 // more precise answer could be calculated for SRAW depending on known 2436 // bits in the shift amount. 2437 return 33; 2438 case RISCVISD::VMV_X_S: 2439 // The number of sign bits of the scalar result is computed by obtaining the 2440 // element type of the input vector operand, subtracting its width from the 2441 // XLEN, and then adding one (sign bit within the element type). If the 2442 // element type is wider than XLen, the least-significant XLEN bits are 2443 // taken. 2444 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 2445 return 1; 2446 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 2447 } 2448 2449 return 1; 2450 } 2451 2452 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 2453 MachineBasicBlock *BB) { 2454 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 2455 2456 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 2457 // Should the count have wrapped while it was being read, we need to try 2458 // again. 2459 // ... 2460 // read: 2461 // rdcycleh x3 # load high word of cycle 2462 // rdcycle x2 # load low word of cycle 2463 // rdcycleh x4 # load high word of cycle 2464 // bne x3, x4, read # check if high word reads match, otherwise try again 2465 // ... 2466 2467 MachineFunction &MF = *BB->getParent(); 2468 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2469 MachineFunction::iterator It = ++BB->getIterator(); 2470 2471 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 2472 MF.insert(It, LoopMBB); 2473 2474 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 2475 MF.insert(It, DoneMBB); 2476 2477 // Transfer the remainder of BB and its successor edges to DoneMBB. 2478 DoneMBB->splice(DoneMBB->begin(), BB, 2479 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 2480 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 2481 2482 BB->addSuccessor(LoopMBB); 2483 2484 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2485 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 2486 Register LoReg = MI.getOperand(0).getReg(); 2487 Register HiReg = MI.getOperand(1).getReg(); 2488 DebugLoc DL = MI.getDebugLoc(); 2489 2490 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 2491 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 2492 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 2493 .addReg(RISCV::X0); 2494 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 2495 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 2496 .addReg(RISCV::X0); 2497 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 2498 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 2499 .addReg(RISCV::X0); 2500 2501 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 2502 .addReg(HiReg) 2503 .addReg(ReadAgainReg) 2504 .addMBB(LoopMBB); 2505 2506 LoopMBB->addSuccessor(LoopMBB); 2507 LoopMBB->addSuccessor(DoneMBB); 2508 2509 MI.eraseFromParent(); 2510 2511 return DoneMBB; 2512 } 2513 2514 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 2515 MachineBasicBlock *BB) { 2516 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 2517 2518 MachineFunction &MF = *BB->getParent(); 2519 DebugLoc DL = MI.getDebugLoc(); 2520 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 2521 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 2522 Register LoReg = MI.getOperand(0).getReg(); 2523 Register HiReg = MI.getOperand(1).getReg(); 2524 Register SrcReg = MI.getOperand(2).getReg(); 2525 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 2526 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 2527 2528 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 2529 RI); 2530 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 2531 MachineMemOperand *MMOLo = 2532 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 2533 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 2534 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 2535 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 2536 .addFrameIndex(FI) 2537 .addImm(0) 2538 .addMemOperand(MMOLo); 2539 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 2540 .addFrameIndex(FI) 2541 .addImm(4) 2542 .addMemOperand(MMOHi); 2543 MI.eraseFromParent(); // The pseudo instruction is gone now. 2544 return BB; 2545 } 2546 2547 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 2548 MachineBasicBlock *BB) { 2549 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 2550 "Unexpected instruction"); 2551 2552 MachineFunction &MF = *BB->getParent(); 2553 DebugLoc DL = MI.getDebugLoc(); 2554 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 2555 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 2556 Register DstReg = MI.getOperand(0).getReg(); 2557 Register LoReg = MI.getOperand(1).getReg(); 2558 Register HiReg = MI.getOperand(2).getReg(); 2559 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 2560 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 2561 2562 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 2563 MachineMemOperand *MMOLo = 2564 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 2565 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 2566 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 2567 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 2568 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 2569 .addFrameIndex(FI) 2570 .addImm(0) 2571 .addMemOperand(MMOLo); 2572 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 2573 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 2574 .addFrameIndex(FI) 2575 .addImm(4) 2576 .addMemOperand(MMOHi); 2577 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 2578 MI.eraseFromParent(); // The pseudo instruction is gone now. 2579 return BB; 2580 } 2581 2582 static bool isSelectPseudo(MachineInstr &MI) { 2583 switch (MI.getOpcode()) { 2584 default: 2585 return false; 2586 case RISCV::Select_GPR_Using_CC_GPR: 2587 case RISCV::Select_FPR16_Using_CC_GPR: 2588 case RISCV::Select_FPR32_Using_CC_GPR: 2589 case RISCV::Select_FPR64_Using_CC_GPR: 2590 return true; 2591 } 2592 } 2593 2594 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 2595 MachineBasicBlock *BB) { 2596 // To "insert" Select_* instructions, we actually have to insert the triangle 2597 // control-flow pattern. The incoming instructions know the destination vreg 2598 // to set, the condition code register to branch on, the true/false values to 2599 // select between, and the condcode to use to select the appropriate branch. 2600 // 2601 // We produce the following control flow: 2602 // HeadMBB 2603 // | \ 2604 // | IfFalseMBB 2605 // | / 2606 // TailMBB 2607 // 2608 // When we find a sequence of selects we attempt to optimize their emission 2609 // by sharing the control flow. Currently we only handle cases where we have 2610 // multiple selects with the exact same condition (same LHS, RHS and CC). 2611 // The selects may be interleaved with other instructions if the other 2612 // instructions meet some requirements we deem safe: 2613 // - They are debug instructions. Otherwise, 2614 // - They do not have side-effects, do not access memory and their inputs do 2615 // not depend on the results of the select pseudo-instructions. 2616 // The TrueV/FalseV operands of the selects cannot depend on the result of 2617 // previous selects in the sequence. 2618 // These conditions could be further relaxed. See the X86 target for a 2619 // related approach and more information. 2620 Register LHS = MI.getOperand(1).getReg(); 2621 Register RHS = MI.getOperand(2).getReg(); 2622 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 2623 2624 SmallVector<MachineInstr *, 4> SelectDebugValues; 2625 SmallSet<Register, 4> SelectDests; 2626 SelectDests.insert(MI.getOperand(0).getReg()); 2627 2628 MachineInstr *LastSelectPseudo = &MI; 2629 2630 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 2631 SequenceMBBI != E; ++SequenceMBBI) { 2632 if (SequenceMBBI->isDebugInstr()) 2633 continue; 2634 else if (isSelectPseudo(*SequenceMBBI)) { 2635 if (SequenceMBBI->getOperand(1).getReg() != LHS || 2636 SequenceMBBI->getOperand(2).getReg() != RHS || 2637 SequenceMBBI->getOperand(3).getImm() != CC || 2638 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 2639 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 2640 break; 2641 LastSelectPseudo = &*SequenceMBBI; 2642 SequenceMBBI->collectDebugValues(SelectDebugValues); 2643 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 2644 } else { 2645 if (SequenceMBBI->hasUnmodeledSideEffects() || 2646 SequenceMBBI->mayLoadOrStore()) 2647 break; 2648 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 2649 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 2650 })) 2651 break; 2652 } 2653 } 2654 2655 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 2656 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2657 DebugLoc DL = MI.getDebugLoc(); 2658 MachineFunction::iterator I = ++BB->getIterator(); 2659 2660 MachineBasicBlock *HeadMBB = BB; 2661 MachineFunction *F = BB->getParent(); 2662 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 2663 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 2664 2665 F->insert(I, IfFalseMBB); 2666 F->insert(I, TailMBB); 2667 2668 // Transfer debug instructions associated with the selects to TailMBB. 2669 for (MachineInstr *DebugInstr : SelectDebugValues) { 2670 TailMBB->push_back(DebugInstr->removeFromParent()); 2671 } 2672 2673 // Move all instructions after the sequence to TailMBB. 2674 TailMBB->splice(TailMBB->end(), HeadMBB, 2675 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 2676 // Update machine-CFG edges by transferring all successors of the current 2677 // block to the new block which will contain the Phi nodes for the selects. 2678 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 2679 // Set the successors for HeadMBB. 2680 HeadMBB->addSuccessor(IfFalseMBB); 2681 HeadMBB->addSuccessor(TailMBB); 2682 2683 // Insert appropriate branch. 2684 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 2685 2686 BuildMI(HeadMBB, DL, TII.get(Opcode)) 2687 .addReg(LHS) 2688 .addReg(RHS) 2689 .addMBB(TailMBB); 2690 2691 // IfFalseMBB just falls through to TailMBB. 2692 IfFalseMBB->addSuccessor(TailMBB); 2693 2694 // Create PHIs for all of the select pseudo-instructions. 2695 auto SelectMBBI = MI.getIterator(); 2696 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 2697 auto InsertionPoint = TailMBB->begin(); 2698 while (SelectMBBI != SelectEnd) { 2699 auto Next = std::next(SelectMBBI); 2700 if (isSelectPseudo(*SelectMBBI)) { 2701 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 2702 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 2703 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 2704 .addReg(SelectMBBI->getOperand(4).getReg()) 2705 .addMBB(HeadMBB) 2706 .addReg(SelectMBBI->getOperand(5).getReg()) 2707 .addMBB(IfFalseMBB); 2708 SelectMBBI->eraseFromParent(); 2709 } 2710 SelectMBBI = Next; 2711 } 2712 2713 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 2714 return TailMBB; 2715 } 2716 2717 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 2718 int VLIndex, unsigned SEWIndex, 2719 RISCVVLMUL VLMul, bool WritesElement0) { 2720 MachineFunction &MF = *BB->getParent(); 2721 DebugLoc DL = MI.getDebugLoc(); 2722 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 2723 2724 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 2725 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 2726 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 2727 2728 MachineRegisterInfo &MRI = MF.getRegInfo(); 2729 2730 // VL and VTYPE are alive here. 2731 MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)); 2732 2733 if (VLIndex >= 0) { 2734 // Set VL (rs1 != X0). 2735 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 2736 MIB.addReg(DestReg, RegState::Define | RegState::Dead) 2737 .addReg(MI.getOperand(VLIndex).getReg()); 2738 } else 2739 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 2740 MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead) 2741 .addReg(RISCV::X0, RegState::Kill); 2742 2743 // Default to tail agnostic unless the destination is tied to a source. In 2744 // that case the user would have some control over the tail values. The tail 2745 // policy is also ignored on instructions that only update element 0 like 2746 // vmv.s.x or reductions so use agnostic there to match the common case. 2747 // FIXME: This is conservatively correct, but we might want to detect that 2748 // the input is undefined. 2749 bool TailAgnostic = true; 2750 unsigned UseOpIdx; 2751 if (MI.isRegTiedToUseOperand(0, &UseOpIdx) && !WritesElement0) { 2752 TailAgnostic = false; 2753 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 2754 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 2755 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 2756 if (UseMI && UseMI->isImplicitDef()) 2757 TailAgnostic = true; 2758 } 2759 2760 // For simplicity we reuse the vtype representation here. 2761 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 2762 /*TailAgnostic*/ TailAgnostic, 2763 /*MaskAgnostic*/ false)); 2764 2765 // Remove (now) redundant operands from pseudo 2766 MI.getOperand(SEWIndex).setImm(-1); 2767 if (VLIndex >= 0) { 2768 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 2769 MI.getOperand(VLIndex).setIsKill(false); 2770 } 2771 2772 return BB; 2773 } 2774 2775 MachineBasicBlock * 2776 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 2777 MachineBasicBlock *BB) const { 2778 uint64_t TSFlags = MI.getDesc().TSFlags; 2779 2780 if (TSFlags & RISCVII::HasSEWOpMask) { 2781 unsigned NumOperands = MI.getNumExplicitOperands(); 2782 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 2783 unsigned SEWIndex = NumOperands - 1; 2784 bool WritesElement0 = TSFlags & RISCVII::WritesElement0Mask; 2785 2786 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 2787 RISCVII::VLMulShift); 2788 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, WritesElement0); 2789 } 2790 2791 switch (MI.getOpcode()) { 2792 default: 2793 llvm_unreachable("Unexpected instr type to insert"); 2794 case RISCV::ReadCycleWide: 2795 assert(!Subtarget.is64Bit() && 2796 "ReadCycleWrite is only to be used on riscv32"); 2797 return emitReadCycleWidePseudo(MI, BB); 2798 case RISCV::Select_GPR_Using_CC_GPR: 2799 case RISCV::Select_FPR16_Using_CC_GPR: 2800 case RISCV::Select_FPR32_Using_CC_GPR: 2801 case RISCV::Select_FPR64_Using_CC_GPR: 2802 return emitSelectPseudo(MI, BB); 2803 case RISCV::BuildPairF64Pseudo: 2804 return emitBuildPairF64Pseudo(MI, BB); 2805 case RISCV::SplitF64Pseudo: 2806 return emitSplitF64Pseudo(MI, BB); 2807 } 2808 } 2809 2810 // Calling Convention Implementation. 2811 // The expectations for frontend ABI lowering vary from target to target. 2812 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 2813 // details, but this is a longer term goal. For now, we simply try to keep the 2814 // role of the frontend as simple and well-defined as possible. The rules can 2815 // be summarised as: 2816 // * Never split up large scalar arguments. We handle them here. 2817 // * If a hardfloat calling convention is being used, and the struct may be 2818 // passed in a pair of registers (fp+fp, int+fp), and both registers are 2819 // available, then pass as two separate arguments. If either the GPRs or FPRs 2820 // are exhausted, then pass according to the rule below. 2821 // * If a struct could never be passed in registers or directly in a stack 2822 // slot (as it is larger than 2*XLEN and the floating point rules don't 2823 // apply), then pass it using a pointer with the byval attribute. 2824 // * If a struct is less than 2*XLEN, then coerce to either a two-element 2825 // word-sized array or a 2*XLEN scalar (depending on alignment). 2826 // * The frontend can determine whether a struct is returned by reference or 2827 // not based on its size and fields. If it will be returned by reference, the 2828 // frontend must modify the prototype so a pointer with the sret annotation is 2829 // passed as the first argument. This is not necessary for large scalar 2830 // returns. 2831 // * Struct return values and varargs should be coerced to structs containing 2832 // register-size fields in the same situations they would be for fixed 2833 // arguments. 2834 2835 static const MCPhysReg ArgGPRs[] = { 2836 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 2837 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 2838 }; 2839 static const MCPhysReg ArgFPR16s[] = { 2840 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 2841 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 2842 }; 2843 static const MCPhysReg ArgFPR32s[] = { 2844 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 2845 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 2846 }; 2847 static const MCPhysReg ArgFPR64s[] = { 2848 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 2849 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 2850 }; 2851 // This is an interim calling convention and it may be changed in the future. 2852 static const MCPhysReg ArgVRs[] = { 2853 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 2854 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 2855 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 2856 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 2857 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 2858 RISCV::V20M2, RISCV::V22M2}; 2859 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 2860 RISCV::V20M4}; 2861 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 2862 2863 // Pass a 2*XLEN argument that has been split into two XLEN values through 2864 // registers or the stack as necessary. 2865 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 2866 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 2867 MVT ValVT2, MVT LocVT2, 2868 ISD::ArgFlagsTy ArgFlags2) { 2869 unsigned XLenInBytes = XLen / 8; 2870 if (Register Reg = State.AllocateReg(ArgGPRs)) { 2871 // At least one half can be passed via register. 2872 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 2873 VA1.getLocVT(), CCValAssign::Full)); 2874 } else { 2875 // Both halves must be passed on the stack, with proper alignment. 2876 Align StackAlign = 2877 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 2878 State.addLoc( 2879 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 2880 State.AllocateStack(XLenInBytes, StackAlign), 2881 VA1.getLocVT(), CCValAssign::Full)); 2882 State.addLoc(CCValAssign::getMem( 2883 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 2884 LocVT2, CCValAssign::Full)); 2885 return false; 2886 } 2887 2888 if (Register Reg = State.AllocateReg(ArgGPRs)) { 2889 // The second half can also be passed via register. 2890 State.addLoc( 2891 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 2892 } else { 2893 // The second half is passed via the stack, without additional alignment. 2894 State.addLoc(CCValAssign::getMem( 2895 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 2896 LocVT2, CCValAssign::Full)); 2897 } 2898 2899 return false; 2900 } 2901 2902 // Implements the RISC-V calling convention. Returns true upon failure. 2903 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 2904 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 2905 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 2906 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 2907 Optional<unsigned> FirstMaskArgument) { 2908 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 2909 assert(XLen == 32 || XLen == 64); 2910 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 2911 2912 // Any return value split in to more than two values can't be returned 2913 // directly. 2914 if (IsRet && ValNo > 1) 2915 return true; 2916 2917 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 2918 // variadic argument, or if no F16/F32 argument registers are available. 2919 bool UseGPRForF16_F32 = true; 2920 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 2921 // variadic argument, or if no F64 argument registers are available. 2922 bool UseGPRForF64 = true; 2923 2924 switch (ABI) { 2925 default: 2926 llvm_unreachable("Unexpected ABI"); 2927 case RISCVABI::ABI_ILP32: 2928 case RISCVABI::ABI_LP64: 2929 break; 2930 case RISCVABI::ABI_ILP32F: 2931 case RISCVABI::ABI_LP64F: 2932 UseGPRForF16_F32 = !IsFixed; 2933 break; 2934 case RISCVABI::ABI_ILP32D: 2935 case RISCVABI::ABI_LP64D: 2936 UseGPRForF16_F32 = !IsFixed; 2937 UseGPRForF64 = !IsFixed; 2938 break; 2939 } 2940 2941 // FPR16, FPR32, and FPR64 alias each other. 2942 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 2943 UseGPRForF16_F32 = true; 2944 UseGPRForF64 = true; 2945 } 2946 2947 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 2948 // similar local variables rather than directly checking against the target 2949 // ABI. 2950 2951 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 2952 LocVT = XLenVT; 2953 LocInfo = CCValAssign::BCvt; 2954 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 2955 LocVT = MVT::i64; 2956 LocInfo = CCValAssign::BCvt; 2957 } 2958 2959 // If this is a variadic argument, the RISC-V calling convention requires 2960 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 2961 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 2962 // be used regardless of whether the original argument was split during 2963 // legalisation or not. The argument will not be passed by registers if the 2964 // original type is larger than 2*XLEN, so the register alignment rule does 2965 // not apply. 2966 unsigned TwoXLenInBytes = (2 * XLen) / 8; 2967 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 2968 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 2969 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 2970 // Skip 'odd' register if necessary. 2971 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 2972 State.AllocateReg(ArgGPRs); 2973 } 2974 2975 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 2976 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 2977 State.getPendingArgFlags(); 2978 2979 assert(PendingLocs.size() == PendingArgFlags.size() && 2980 "PendingLocs and PendingArgFlags out of sync"); 2981 2982 // Handle passing f64 on RV32D with a soft float ABI or when floating point 2983 // registers are exhausted. 2984 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 2985 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 2986 "Can't lower f64 if it is split"); 2987 // Depending on available argument GPRS, f64 may be passed in a pair of 2988 // GPRs, split between a GPR and the stack, or passed completely on the 2989 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 2990 // cases. 2991 Register Reg = State.AllocateReg(ArgGPRs); 2992 LocVT = MVT::i32; 2993 if (!Reg) { 2994 unsigned StackOffset = State.AllocateStack(8, Align(8)); 2995 State.addLoc( 2996 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 2997 return false; 2998 } 2999 if (!State.AllocateReg(ArgGPRs)) 3000 State.AllocateStack(4, Align(4)); 3001 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3002 return false; 3003 } 3004 3005 // Split arguments might be passed indirectly, so keep track of the pending 3006 // values. 3007 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 3008 LocVT = XLenVT; 3009 LocInfo = CCValAssign::Indirect; 3010 PendingLocs.push_back( 3011 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 3012 PendingArgFlags.push_back(ArgFlags); 3013 if (!ArgFlags.isSplitEnd()) { 3014 return false; 3015 } 3016 } 3017 3018 // If the split argument only had two elements, it should be passed directly 3019 // in registers or on the stack. 3020 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 3021 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 3022 // Apply the normal calling convention rules to the first half of the 3023 // split argument. 3024 CCValAssign VA = PendingLocs[0]; 3025 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 3026 PendingLocs.clear(); 3027 PendingArgFlags.clear(); 3028 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 3029 ArgFlags); 3030 } 3031 3032 // Allocate to a register if possible, or else a stack slot. 3033 Register Reg; 3034 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 3035 Reg = State.AllocateReg(ArgFPR16s); 3036 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 3037 Reg = State.AllocateReg(ArgFPR32s); 3038 else if (ValVT == MVT::f64 && !UseGPRForF64) 3039 Reg = State.AllocateReg(ArgFPR64s); 3040 else if (ValVT.isScalableVector()) { 3041 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 3042 if (RC == &RISCV::VRRegClass) { 3043 // Assign the first mask argument to V0. 3044 // This is an interim calling convention and it may be changed in the 3045 // future. 3046 if (FirstMaskArgument.hasValue() && 3047 ValNo == FirstMaskArgument.getValue()) { 3048 Reg = State.AllocateReg(RISCV::V0); 3049 } else { 3050 Reg = State.AllocateReg(ArgVRs); 3051 } 3052 } else if (RC == &RISCV::VRM2RegClass) { 3053 Reg = State.AllocateReg(ArgVRM2s); 3054 } else if (RC == &RISCV::VRM4RegClass) { 3055 Reg = State.AllocateReg(ArgVRM4s); 3056 } else if (RC == &RISCV::VRM8RegClass) { 3057 Reg = State.AllocateReg(ArgVRM8s); 3058 } else { 3059 llvm_unreachable("Unhandled class register for ValueType"); 3060 } 3061 if (!Reg) { 3062 LocInfo = CCValAssign::Indirect; 3063 // Try using a GPR to pass the address 3064 Reg = State.AllocateReg(ArgGPRs); 3065 LocVT = XLenVT; 3066 } 3067 } else 3068 Reg = State.AllocateReg(ArgGPRs); 3069 unsigned StackOffset = 3070 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 3071 3072 // If we reach this point and PendingLocs is non-empty, we must be at the 3073 // end of a split argument that must be passed indirectly. 3074 if (!PendingLocs.empty()) { 3075 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 3076 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 3077 3078 for (auto &It : PendingLocs) { 3079 if (Reg) 3080 It.convertToReg(Reg); 3081 else 3082 It.convertToMem(StackOffset); 3083 State.addLoc(It); 3084 } 3085 PendingLocs.clear(); 3086 PendingArgFlags.clear(); 3087 return false; 3088 } 3089 3090 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 3091 (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) && 3092 "Expected an XLenVT or scalable vector types at this stage"); 3093 3094 if (Reg) { 3095 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3096 return false; 3097 } 3098 3099 // When a floating-point value is passed on the stack, no bit-conversion is 3100 // needed. 3101 if (ValVT.isFloatingPoint()) { 3102 LocVT = ValVT; 3103 LocInfo = CCValAssign::Full; 3104 } 3105 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 3106 return false; 3107 } 3108 3109 template <typename ArgTy> 3110 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 3111 for (const auto &ArgIdx : enumerate(Args)) { 3112 MVT ArgVT = ArgIdx.value().VT; 3113 if (ArgVT.isScalableVector() && 3114 ArgVT.getVectorElementType().SimpleTy == MVT::i1) 3115 return ArgIdx.index(); 3116 } 3117 return None; 3118 } 3119 3120 void RISCVTargetLowering::analyzeInputArgs( 3121 MachineFunction &MF, CCState &CCInfo, 3122 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 3123 unsigned NumArgs = Ins.size(); 3124 FunctionType *FType = MF.getFunction().getFunctionType(); 3125 3126 Optional<unsigned> FirstMaskArgument; 3127 if (Subtarget.hasStdExtV()) 3128 FirstMaskArgument = preAssignMask(Ins); 3129 3130 for (unsigned i = 0; i != NumArgs; ++i) { 3131 MVT ArgVT = Ins[i].VT; 3132 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 3133 3134 Type *ArgTy = nullptr; 3135 if (IsRet) 3136 ArgTy = FType->getReturnType(); 3137 else if (Ins[i].isOrigArg()) 3138 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 3139 3140 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3141 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 3142 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 3143 FirstMaskArgument)) { 3144 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 3145 << EVT(ArgVT).getEVTString() << '\n'); 3146 llvm_unreachable(nullptr); 3147 } 3148 } 3149 } 3150 3151 void RISCVTargetLowering::analyzeOutputArgs( 3152 MachineFunction &MF, CCState &CCInfo, 3153 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 3154 CallLoweringInfo *CLI) const { 3155 unsigned NumArgs = Outs.size(); 3156 3157 Optional<unsigned> FirstMaskArgument; 3158 if (Subtarget.hasStdExtV()) 3159 FirstMaskArgument = preAssignMask(Outs); 3160 3161 for (unsigned i = 0; i != NumArgs; i++) { 3162 MVT ArgVT = Outs[i].VT; 3163 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3164 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 3165 3166 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3167 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 3168 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 3169 FirstMaskArgument)) { 3170 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 3171 << EVT(ArgVT).getEVTString() << "\n"); 3172 llvm_unreachable(nullptr); 3173 } 3174 } 3175 } 3176 3177 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 3178 // values. 3179 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 3180 const CCValAssign &VA, const SDLoc &DL) { 3181 switch (VA.getLocInfo()) { 3182 default: 3183 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3184 case CCValAssign::Full: 3185 break; 3186 case CCValAssign::BCvt: 3187 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 3188 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 3189 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3190 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 3191 else 3192 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 3193 break; 3194 } 3195 return Val; 3196 } 3197 3198 // The caller is responsible for loading the full value if the argument is 3199 // passed with CCValAssign::Indirect. 3200 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 3201 const CCValAssign &VA, const SDLoc &DL, 3202 const RISCVTargetLowering &TLI) { 3203 MachineFunction &MF = DAG.getMachineFunction(); 3204 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3205 EVT LocVT = VA.getLocVT(); 3206 SDValue Val; 3207 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 3208 Register VReg = RegInfo.createVirtualRegister(RC); 3209 RegInfo.addLiveIn(VA.getLocReg(), VReg); 3210 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 3211 3212 if (VA.getLocInfo() == CCValAssign::Indirect) 3213 return Val; 3214 3215 return convertLocVTToValVT(DAG, Val, VA, DL); 3216 } 3217 3218 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 3219 const CCValAssign &VA, const SDLoc &DL) { 3220 EVT LocVT = VA.getLocVT(); 3221 3222 switch (VA.getLocInfo()) { 3223 default: 3224 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3225 case CCValAssign::Full: 3226 break; 3227 case CCValAssign::BCvt: 3228 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 3229 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 3230 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3231 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 3232 else 3233 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 3234 break; 3235 } 3236 return Val; 3237 } 3238 3239 // The caller is responsible for loading the full value if the argument is 3240 // passed with CCValAssign::Indirect. 3241 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 3242 const CCValAssign &VA, const SDLoc &DL) { 3243 MachineFunction &MF = DAG.getMachineFunction(); 3244 MachineFrameInfo &MFI = MF.getFrameInfo(); 3245 EVT LocVT = VA.getLocVT(); 3246 EVT ValVT = VA.getValVT(); 3247 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 3248 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 3249 VA.getLocMemOffset(), /*Immutable=*/true); 3250 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3251 SDValue Val; 3252 3253 ISD::LoadExtType ExtType; 3254 switch (VA.getLocInfo()) { 3255 default: 3256 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3257 case CCValAssign::Full: 3258 case CCValAssign::Indirect: 3259 case CCValAssign::BCvt: 3260 ExtType = ISD::NON_EXTLOAD; 3261 break; 3262 } 3263 Val = DAG.getExtLoad( 3264 ExtType, DL, LocVT, Chain, FIN, 3265 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 3266 return Val; 3267 } 3268 3269 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 3270 const CCValAssign &VA, const SDLoc &DL) { 3271 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 3272 "Unexpected VA"); 3273 MachineFunction &MF = DAG.getMachineFunction(); 3274 MachineFrameInfo &MFI = MF.getFrameInfo(); 3275 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3276 3277 if (VA.isMemLoc()) { 3278 // f64 is passed on the stack. 3279 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 3280 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 3281 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 3282 MachinePointerInfo::getFixedStack(MF, FI)); 3283 } 3284 3285 assert(VA.isRegLoc() && "Expected register VA assignment"); 3286 3287 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3288 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 3289 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 3290 SDValue Hi; 3291 if (VA.getLocReg() == RISCV::X17) { 3292 // Second half of f64 is passed on the stack. 3293 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 3294 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 3295 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 3296 MachinePointerInfo::getFixedStack(MF, FI)); 3297 } else { 3298 // Second half of f64 is passed in another GPR. 3299 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3300 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 3301 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 3302 } 3303 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 3304 } 3305 3306 // FastCC has less than 1% performance improvement for some particular 3307 // benchmark. But theoretically, it may has benenfit for some cases. 3308 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 3309 CCValAssign::LocInfo LocInfo, 3310 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3311 3312 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3313 // X5 and X6 might be used for save-restore libcall. 3314 static const MCPhysReg GPRList[] = { 3315 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 3316 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 3317 RISCV::X29, RISCV::X30, RISCV::X31}; 3318 if (unsigned Reg = State.AllocateReg(GPRList)) { 3319 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3320 return false; 3321 } 3322 } 3323 3324 if (LocVT == MVT::f16) { 3325 static const MCPhysReg FPR16List[] = { 3326 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 3327 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 3328 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 3329 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 3330 if (unsigned Reg = State.AllocateReg(FPR16List)) { 3331 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3332 return false; 3333 } 3334 } 3335 3336 if (LocVT == MVT::f32) { 3337 static const MCPhysReg FPR32List[] = { 3338 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 3339 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 3340 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 3341 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 3342 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3343 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3344 return false; 3345 } 3346 } 3347 3348 if (LocVT == MVT::f64) { 3349 static const MCPhysReg FPR64List[] = { 3350 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 3351 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 3352 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 3353 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 3354 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3355 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3356 return false; 3357 } 3358 } 3359 3360 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 3361 unsigned Offset4 = State.AllocateStack(4, Align(4)); 3362 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 3363 return false; 3364 } 3365 3366 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 3367 unsigned Offset5 = State.AllocateStack(8, Align(8)); 3368 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 3369 return false; 3370 } 3371 3372 return true; // CC didn't match. 3373 } 3374 3375 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 3376 CCValAssign::LocInfo LocInfo, 3377 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3378 3379 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3380 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 3381 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 3382 static const MCPhysReg GPRList[] = { 3383 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 3384 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 3385 if (unsigned Reg = State.AllocateReg(GPRList)) { 3386 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3387 return false; 3388 } 3389 } 3390 3391 if (LocVT == MVT::f32) { 3392 // Pass in STG registers: F1, ..., F6 3393 // fs0 ... fs5 3394 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 3395 RISCV::F18_F, RISCV::F19_F, 3396 RISCV::F20_F, RISCV::F21_F}; 3397 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3398 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3399 return false; 3400 } 3401 } 3402 3403 if (LocVT == MVT::f64) { 3404 // Pass in STG registers: D1, ..., D6 3405 // fs6 ... fs11 3406 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 3407 RISCV::F24_D, RISCV::F25_D, 3408 RISCV::F26_D, RISCV::F27_D}; 3409 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3410 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3411 return false; 3412 } 3413 } 3414 3415 report_fatal_error("No registers left in GHC calling convention"); 3416 return true; 3417 } 3418 3419 // Transform physical registers into virtual registers. 3420 SDValue RISCVTargetLowering::LowerFormalArguments( 3421 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 3422 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 3423 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3424 3425 MachineFunction &MF = DAG.getMachineFunction(); 3426 3427 switch (CallConv) { 3428 default: 3429 report_fatal_error("Unsupported calling convention"); 3430 case CallingConv::C: 3431 case CallingConv::Fast: 3432 break; 3433 case CallingConv::GHC: 3434 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 3435 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 3436 report_fatal_error( 3437 "GHC calling convention requires the F and D instruction set extensions"); 3438 } 3439 3440 const Function &Func = MF.getFunction(); 3441 if (Func.hasFnAttribute("interrupt")) { 3442 if (!Func.arg_empty()) 3443 report_fatal_error( 3444 "Functions with the interrupt attribute cannot have arguments!"); 3445 3446 StringRef Kind = 3447 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 3448 3449 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 3450 report_fatal_error( 3451 "Function interrupt attribute argument not supported!"); 3452 } 3453 3454 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3455 MVT XLenVT = Subtarget.getXLenVT(); 3456 unsigned XLenInBytes = Subtarget.getXLen() / 8; 3457 // Used with vargs to acumulate store chains. 3458 std::vector<SDValue> OutChains; 3459 3460 // Assign locations to all of the incoming arguments. 3461 SmallVector<CCValAssign, 16> ArgLocs; 3462 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 3463 3464 if (CallConv == CallingConv::Fast) 3465 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 3466 else if (CallConv == CallingConv::GHC) 3467 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 3468 else 3469 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 3470 3471 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3472 CCValAssign &VA = ArgLocs[i]; 3473 SDValue ArgValue; 3474 // Passing f64 on RV32D with a soft float ABI must be handled as a special 3475 // case. 3476 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 3477 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 3478 else if (VA.isRegLoc()) 3479 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 3480 else 3481 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 3482 3483 if (VA.getLocInfo() == CCValAssign::Indirect) { 3484 // If the original argument was split and passed by reference (e.g. i128 3485 // on RV32), we need to load all parts of it here (using the same 3486 // address). 3487 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 3488 MachinePointerInfo())); 3489 unsigned ArgIndex = Ins[i].OrigArgIndex; 3490 assert(Ins[i].PartOffset == 0); 3491 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 3492 CCValAssign &PartVA = ArgLocs[i + 1]; 3493 unsigned PartOffset = Ins[i + 1].PartOffset; 3494 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 3495 DAG.getIntPtrConstant(PartOffset, DL)); 3496 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 3497 MachinePointerInfo())); 3498 ++i; 3499 } 3500 continue; 3501 } 3502 InVals.push_back(ArgValue); 3503 } 3504 3505 if (IsVarArg) { 3506 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 3507 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 3508 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 3509 MachineFrameInfo &MFI = MF.getFrameInfo(); 3510 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3511 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 3512 3513 // Offset of the first variable argument from stack pointer, and size of 3514 // the vararg save area. For now, the varargs save area is either zero or 3515 // large enough to hold a0-a7. 3516 int VaArgOffset, VarArgsSaveSize; 3517 3518 // If all registers are allocated, then all varargs must be passed on the 3519 // stack and we don't need to save any argregs. 3520 if (ArgRegs.size() == Idx) { 3521 VaArgOffset = CCInfo.getNextStackOffset(); 3522 VarArgsSaveSize = 0; 3523 } else { 3524 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 3525 VaArgOffset = -VarArgsSaveSize; 3526 } 3527 3528 // Record the frame index of the first variable argument 3529 // which is a value necessary to VASTART. 3530 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 3531 RVFI->setVarArgsFrameIndex(FI); 3532 3533 // If saving an odd number of registers then create an extra stack slot to 3534 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 3535 // offsets to even-numbered registered remain 2*XLEN-aligned. 3536 if (Idx % 2) { 3537 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 3538 VarArgsSaveSize += XLenInBytes; 3539 } 3540 3541 // Copy the integer registers that may have been used for passing varargs 3542 // to the vararg save area. 3543 for (unsigned I = Idx; I < ArgRegs.size(); 3544 ++I, VaArgOffset += XLenInBytes) { 3545 const Register Reg = RegInfo.createVirtualRegister(RC); 3546 RegInfo.addLiveIn(ArgRegs[I], Reg); 3547 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 3548 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 3549 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 3550 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 3551 MachinePointerInfo::getFixedStack(MF, FI)); 3552 cast<StoreSDNode>(Store.getNode()) 3553 ->getMemOperand() 3554 ->setValue((Value *)nullptr); 3555 OutChains.push_back(Store); 3556 } 3557 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 3558 } 3559 3560 // All stores are grouped in one node to allow the matching between 3561 // the size of Ins and InVals. This only happens for vararg functions. 3562 if (!OutChains.empty()) { 3563 OutChains.push_back(Chain); 3564 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 3565 } 3566 3567 return Chain; 3568 } 3569 3570 /// isEligibleForTailCallOptimization - Check whether the call is eligible 3571 /// for tail call optimization. 3572 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 3573 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 3574 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 3575 const SmallVector<CCValAssign, 16> &ArgLocs) const { 3576 3577 auto &Callee = CLI.Callee; 3578 auto CalleeCC = CLI.CallConv; 3579 auto &Outs = CLI.Outs; 3580 auto &Caller = MF.getFunction(); 3581 auto CallerCC = Caller.getCallingConv(); 3582 3583 // Exception-handling functions need a special set of instructions to 3584 // indicate a return to the hardware. Tail-calling another function would 3585 // probably break this. 3586 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 3587 // should be expanded as new function attributes are introduced. 3588 if (Caller.hasFnAttribute("interrupt")) 3589 return false; 3590 3591 // Do not tail call opt if the stack is used to pass parameters. 3592 if (CCInfo.getNextStackOffset() != 0) 3593 return false; 3594 3595 // Do not tail call opt if any parameters need to be passed indirectly. 3596 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 3597 // passed indirectly. So the address of the value will be passed in a 3598 // register, or if not available, then the address is put on the stack. In 3599 // order to pass indirectly, space on the stack often needs to be allocated 3600 // in order to store the value. In this case the CCInfo.getNextStackOffset() 3601 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 3602 // are passed CCValAssign::Indirect. 3603 for (auto &VA : ArgLocs) 3604 if (VA.getLocInfo() == CCValAssign::Indirect) 3605 return false; 3606 3607 // Do not tail call opt if either caller or callee uses struct return 3608 // semantics. 3609 auto IsCallerStructRet = Caller.hasStructRetAttr(); 3610 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 3611 if (IsCallerStructRet || IsCalleeStructRet) 3612 return false; 3613 3614 // Externally-defined functions with weak linkage should not be 3615 // tail-called. The behaviour of branch instructions in this situation (as 3616 // used for tail calls) is implementation-defined, so we cannot rely on the 3617 // linker replacing the tail call with a return. 3618 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3619 const GlobalValue *GV = G->getGlobal(); 3620 if (GV->hasExternalWeakLinkage()) 3621 return false; 3622 } 3623 3624 // The callee has to preserve all registers the caller needs to preserve. 3625 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3626 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 3627 if (CalleeCC != CallerCC) { 3628 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 3629 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 3630 return false; 3631 } 3632 3633 // Byval parameters hand the function a pointer directly into the stack area 3634 // we want to reuse during a tail call. Working around this *is* possible 3635 // but less efficient and uglier in LowerCall. 3636 for (auto &Arg : Outs) 3637 if (Arg.Flags.isByVal()) 3638 return false; 3639 3640 return true; 3641 } 3642 3643 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 3644 // and output parameter nodes. 3645 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 3646 SmallVectorImpl<SDValue> &InVals) const { 3647 SelectionDAG &DAG = CLI.DAG; 3648 SDLoc &DL = CLI.DL; 3649 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 3650 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 3651 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 3652 SDValue Chain = CLI.Chain; 3653 SDValue Callee = CLI.Callee; 3654 bool &IsTailCall = CLI.IsTailCall; 3655 CallingConv::ID CallConv = CLI.CallConv; 3656 bool IsVarArg = CLI.IsVarArg; 3657 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3658 MVT XLenVT = Subtarget.getXLenVT(); 3659 3660 MachineFunction &MF = DAG.getMachineFunction(); 3661 3662 // Analyze the operands of the call, assigning locations to each operand. 3663 SmallVector<CCValAssign, 16> ArgLocs; 3664 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 3665 3666 if (CallConv == CallingConv::Fast) 3667 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 3668 else if (CallConv == CallingConv::GHC) 3669 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 3670 else 3671 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 3672 3673 // Check if it's really possible to do a tail call. 3674 if (IsTailCall) 3675 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 3676 3677 if (IsTailCall) 3678 ++NumTailCalls; 3679 else if (CLI.CB && CLI.CB->isMustTailCall()) 3680 report_fatal_error("failed to perform tail call elimination on a call " 3681 "site marked musttail"); 3682 3683 // Get a count of how many bytes are to be pushed on the stack. 3684 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 3685 3686 // Create local copies for byval args 3687 SmallVector<SDValue, 8> ByValArgs; 3688 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 3689 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3690 if (!Flags.isByVal()) 3691 continue; 3692 3693 SDValue Arg = OutVals[i]; 3694 unsigned Size = Flags.getByValSize(); 3695 Align Alignment = Flags.getNonZeroByValAlign(); 3696 3697 int FI = 3698 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 3699 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 3700 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 3701 3702 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 3703 /*IsVolatile=*/false, 3704 /*AlwaysInline=*/false, IsTailCall, 3705 MachinePointerInfo(), MachinePointerInfo()); 3706 ByValArgs.push_back(FIPtr); 3707 } 3708 3709 if (!IsTailCall) 3710 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 3711 3712 // Copy argument values to their designated locations. 3713 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 3714 SmallVector<SDValue, 8> MemOpChains; 3715 SDValue StackPtr; 3716 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 3717 CCValAssign &VA = ArgLocs[i]; 3718 SDValue ArgValue = OutVals[i]; 3719 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3720 3721 // Handle passing f64 on RV32D with a soft float ABI as a special case. 3722 bool IsF64OnRV32DSoftABI = 3723 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 3724 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 3725 SDValue SplitF64 = DAG.getNode( 3726 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 3727 SDValue Lo = SplitF64.getValue(0); 3728 SDValue Hi = SplitF64.getValue(1); 3729 3730 Register RegLo = VA.getLocReg(); 3731 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 3732 3733 if (RegLo == RISCV::X17) { 3734 // Second half of f64 is passed on the stack. 3735 // Work out the address of the stack slot. 3736 if (!StackPtr.getNode()) 3737 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 3738 // Emit the store. 3739 MemOpChains.push_back( 3740 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 3741 } else { 3742 // Second half of f64 is passed in another GPR. 3743 assert(RegLo < RISCV::X31 && "Invalid register pair"); 3744 Register RegHigh = RegLo + 1; 3745 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 3746 } 3747 continue; 3748 } 3749 3750 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 3751 // as any other MemLoc. 3752 3753 // Promote the value if needed. 3754 // For now, only handle fully promoted and indirect arguments. 3755 if (VA.getLocInfo() == CCValAssign::Indirect) { 3756 // Store the argument in a stack slot and pass its address. 3757 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 3758 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 3759 MemOpChains.push_back( 3760 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 3761 MachinePointerInfo::getFixedStack(MF, FI))); 3762 // If the original argument was split (e.g. i128), we need 3763 // to store all parts of it here (and pass just one address). 3764 unsigned ArgIndex = Outs[i].OrigArgIndex; 3765 assert(Outs[i].PartOffset == 0); 3766 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 3767 SDValue PartValue = OutVals[i + 1]; 3768 unsigned PartOffset = Outs[i + 1].PartOffset; 3769 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 3770 DAG.getIntPtrConstant(PartOffset, DL)); 3771 MemOpChains.push_back( 3772 DAG.getStore(Chain, DL, PartValue, Address, 3773 MachinePointerInfo::getFixedStack(MF, FI))); 3774 ++i; 3775 } 3776 ArgValue = SpillSlot; 3777 } else { 3778 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 3779 } 3780 3781 // Use local copy if it is a byval arg. 3782 if (Flags.isByVal()) 3783 ArgValue = ByValArgs[j++]; 3784 3785 if (VA.isRegLoc()) { 3786 // Queue up the argument copies and emit them at the end. 3787 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 3788 } else { 3789 assert(VA.isMemLoc() && "Argument not register or memory"); 3790 assert(!IsTailCall && "Tail call not allowed if stack is used " 3791 "for passing parameters"); 3792 3793 // Work out the address of the stack slot. 3794 if (!StackPtr.getNode()) 3795 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 3796 SDValue Address = 3797 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 3798 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 3799 3800 // Emit the store. 3801 MemOpChains.push_back( 3802 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 3803 } 3804 } 3805 3806 // Join the stores, which are independent of one another. 3807 if (!MemOpChains.empty()) 3808 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 3809 3810 SDValue Glue; 3811 3812 // Build a sequence of copy-to-reg nodes, chained and glued together. 3813 for (auto &Reg : RegsToPass) { 3814 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 3815 Glue = Chain.getValue(1); 3816 } 3817 3818 // Validate that none of the argument registers have been marked as 3819 // reserved, if so report an error. Do the same for the return address if this 3820 // is not a tailcall. 3821 validateCCReservedRegs(RegsToPass, MF); 3822 if (!IsTailCall && 3823 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 3824 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 3825 MF.getFunction(), 3826 "Return address register required, but has been reserved."}); 3827 3828 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 3829 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 3830 // split it and then direct call can be matched by PseudoCALL. 3831 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 3832 const GlobalValue *GV = S->getGlobal(); 3833 3834 unsigned OpFlags = RISCVII::MO_CALL; 3835 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 3836 OpFlags = RISCVII::MO_PLT; 3837 3838 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 3839 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3840 unsigned OpFlags = RISCVII::MO_CALL; 3841 3842 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 3843 nullptr)) 3844 OpFlags = RISCVII::MO_PLT; 3845 3846 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 3847 } 3848 3849 // The first call operand is the chain and the second is the target address. 3850 SmallVector<SDValue, 8> Ops; 3851 Ops.push_back(Chain); 3852 Ops.push_back(Callee); 3853 3854 // Add argument registers to the end of the list so that they are 3855 // known live into the call. 3856 for (auto &Reg : RegsToPass) 3857 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 3858 3859 if (!IsTailCall) { 3860 // Add a register mask operand representing the call-preserved registers. 3861 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3862 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 3863 assert(Mask && "Missing call preserved mask for calling convention"); 3864 Ops.push_back(DAG.getRegisterMask(Mask)); 3865 } 3866 3867 // Glue the call to the argument copies, if any. 3868 if (Glue.getNode()) 3869 Ops.push_back(Glue); 3870 3871 // Emit the call. 3872 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 3873 3874 if (IsTailCall) { 3875 MF.getFrameInfo().setHasTailCall(); 3876 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 3877 } 3878 3879 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 3880 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 3881 Glue = Chain.getValue(1); 3882 3883 // Mark the end of the call, which is glued to the call itself. 3884 Chain = DAG.getCALLSEQ_END(Chain, 3885 DAG.getConstant(NumBytes, DL, PtrVT, true), 3886 DAG.getConstant(0, DL, PtrVT, true), 3887 Glue, DL); 3888 Glue = Chain.getValue(1); 3889 3890 // Assign locations to each value returned by this call. 3891 SmallVector<CCValAssign, 16> RVLocs; 3892 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 3893 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 3894 3895 // Copy all of the result registers out of their specified physreg. 3896 for (auto &VA : RVLocs) { 3897 // Copy the value out 3898 SDValue RetValue = 3899 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 3900 // Glue the RetValue to the end of the call sequence 3901 Chain = RetValue.getValue(1); 3902 Glue = RetValue.getValue(2); 3903 3904 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 3905 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 3906 SDValue RetValue2 = 3907 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 3908 Chain = RetValue2.getValue(1); 3909 Glue = RetValue2.getValue(2); 3910 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 3911 RetValue2); 3912 } 3913 3914 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 3915 3916 InVals.push_back(RetValue); 3917 } 3918 3919 return Chain; 3920 } 3921 3922 bool RISCVTargetLowering::CanLowerReturn( 3923 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 3924 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 3925 SmallVector<CCValAssign, 16> RVLocs; 3926 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 3927 3928 Optional<unsigned> FirstMaskArgument; 3929 if (Subtarget.hasStdExtV()) 3930 FirstMaskArgument = preAssignMask(Outs); 3931 3932 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 3933 MVT VT = Outs[i].VT; 3934 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3935 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3936 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 3937 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 3938 *this, FirstMaskArgument)) 3939 return false; 3940 } 3941 return true; 3942 } 3943 3944 SDValue 3945 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 3946 bool IsVarArg, 3947 const SmallVectorImpl<ISD::OutputArg> &Outs, 3948 const SmallVectorImpl<SDValue> &OutVals, 3949 const SDLoc &DL, SelectionDAG &DAG) const { 3950 const MachineFunction &MF = DAG.getMachineFunction(); 3951 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 3952 3953 // Stores the assignment of the return value to a location. 3954 SmallVector<CCValAssign, 16> RVLocs; 3955 3956 // Info about the registers and stack slot. 3957 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 3958 *DAG.getContext()); 3959 3960 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 3961 nullptr); 3962 3963 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 3964 report_fatal_error("GHC functions return void only"); 3965 3966 SDValue Glue; 3967 SmallVector<SDValue, 4> RetOps(1, Chain); 3968 3969 // Copy the result values into the output registers. 3970 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 3971 SDValue Val = OutVals[i]; 3972 CCValAssign &VA = RVLocs[i]; 3973 assert(VA.isRegLoc() && "Can only return in registers!"); 3974 3975 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 3976 // Handle returning f64 on RV32D with a soft float ABI. 3977 assert(VA.isRegLoc() && "Expected return via registers"); 3978 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 3979 DAG.getVTList(MVT::i32, MVT::i32), Val); 3980 SDValue Lo = SplitF64.getValue(0); 3981 SDValue Hi = SplitF64.getValue(1); 3982 Register RegLo = VA.getLocReg(); 3983 assert(RegLo < RISCV::X31 && "Invalid register pair"); 3984 Register RegHi = RegLo + 1; 3985 3986 if (STI.isRegisterReservedByUser(RegLo) || 3987 STI.isRegisterReservedByUser(RegHi)) 3988 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 3989 MF.getFunction(), 3990 "Return value register required, but has been reserved."}); 3991 3992 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 3993 Glue = Chain.getValue(1); 3994 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 3995 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 3996 Glue = Chain.getValue(1); 3997 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 3998 } else { 3999 // Handle a 'normal' return. 4000 Val = convertValVTToLocVT(DAG, Val, VA, DL); 4001 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 4002 4003 if (STI.isRegisterReservedByUser(VA.getLocReg())) 4004 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 4005 MF.getFunction(), 4006 "Return value register required, but has been reserved."}); 4007 4008 // Guarantee that all emitted copies are stuck together. 4009 Glue = Chain.getValue(1); 4010 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 4011 } 4012 } 4013 4014 RetOps[0] = Chain; // Update chain. 4015 4016 // Add the glue node if we have it. 4017 if (Glue.getNode()) { 4018 RetOps.push_back(Glue); 4019 } 4020 4021 // Interrupt service routines use different return instructions. 4022 const Function &Func = DAG.getMachineFunction().getFunction(); 4023 if (Func.hasFnAttribute("interrupt")) { 4024 if (!Func.getReturnType()->isVoidTy()) 4025 report_fatal_error( 4026 "Functions with the interrupt attribute must have void return type!"); 4027 4028 MachineFunction &MF = DAG.getMachineFunction(); 4029 StringRef Kind = 4030 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 4031 4032 unsigned RetOpc; 4033 if (Kind == "user") 4034 RetOpc = RISCVISD::URET_FLAG; 4035 else if (Kind == "supervisor") 4036 RetOpc = RISCVISD::SRET_FLAG; 4037 else 4038 RetOpc = RISCVISD::MRET_FLAG; 4039 4040 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 4041 } 4042 4043 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 4044 } 4045 4046 void RISCVTargetLowering::validateCCReservedRegs( 4047 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 4048 MachineFunction &MF) const { 4049 const Function &F = MF.getFunction(); 4050 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 4051 4052 if (llvm::any_of(Regs, [&STI](auto Reg) { 4053 return STI.isRegisterReservedByUser(Reg.first); 4054 })) 4055 F.getContext().diagnose(DiagnosticInfoUnsupported{ 4056 F, "Argument register required, but has been reserved."}); 4057 } 4058 4059 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 4060 return CI->isTailCall(); 4061 } 4062 4063 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 4064 #define NODE_NAME_CASE(NODE) \ 4065 case RISCVISD::NODE: \ 4066 return "RISCVISD::" #NODE; 4067 // clang-format off 4068 switch ((RISCVISD::NodeType)Opcode) { 4069 case RISCVISD::FIRST_NUMBER: 4070 break; 4071 NODE_NAME_CASE(RET_FLAG) 4072 NODE_NAME_CASE(URET_FLAG) 4073 NODE_NAME_CASE(SRET_FLAG) 4074 NODE_NAME_CASE(MRET_FLAG) 4075 NODE_NAME_CASE(CALL) 4076 NODE_NAME_CASE(SELECT_CC) 4077 NODE_NAME_CASE(BuildPairF64) 4078 NODE_NAME_CASE(SplitF64) 4079 NODE_NAME_CASE(TAIL) 4080 NODE_NAME_CASE(SLLW) 4081 NODE_NAME_CASE(SRAW) 4082 NODE_NAME_CASE(SRLW) 4083 NODE_NAME_CASE(DIVW) 4084 NODE_NAME_CASE(DIVUW) 4085 NODE_NAME_CASE(REMUW) 4086 NODE_NAME_CASE(ROLW) 4087 NODE_NAME_CASE(RORW) 4088 NODE_NAME_CASE(FSLW) 4089 NODE_NAME_CASE(FSRW) 4090 NODE_NAME_CASE(FMV_H_X) 4091 NODE_NAME_CASE(FMV_X_ANYEXTH) 4092 NODE_NAME_CASE(FMV_W_X_RV64) 4093 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 4094 NODE_NAME_CASE(READ_CYCLE_WIDE) 4095 NODE_NAME_CASE(GREVI) 4096 NODE_NAME_CASE(GREVIW) 4097 NODE_NAME_CASE(GORCI) 4098 NODE_NAME_CASE(GORCIW) 4099 NODE_NAME_CASE(VMV_X_S) 4100 NODE_NAME_CASE(SPLAT_VECTOR_I64) 4101 NODE_NAME_CASE(READ_VLENB) 4102 NODE_NAME_CASE(TRUNCATE_VECTOR) 4103 NODE_NAME_CASE(VLEFF) 4104 NODE_NAME_CASE(VLEFF_MASK) 4105 NODE_NAME_CASE(VLSEGFF) 4106 NODE_NAME_CASE(VLSEGFF_MASK) 4107 NODE_NAME_CASE(READ_VL) 4108 NODE_NAME_CASE(VSLIDEUP) 4109 NODE_NAME_CASE(VSLIDEDOWN) 4110 NODE_NAME_CASE(VID) 4111 } 4112 // clang-format on 4113 return nullptr; 4114 #undef NODE_NAME_CASE 4115 } 4116 4117 /// getConstraintType - Given a constraint letter, return the type of 4118 /// constraint it is for this target. 4119 RISCVTargetLowering::ConstraintType 4120 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 4121 if (Constraint.size() == 1) { 4122 switch (Constraint[0]) { 4123 default: 4124 break; 4125 case 'f': 4126 return C_RegisterClass; 4127 case 'I': 4128 case 'J': 4129 case 'K': 4130 return C_Immediate; 4131 case 'A': 4132 return C_Memory; 4133 } 4134 } 4135 return TargetLowering::getConstraintType(Constraint); 4136 } 4137 4138 std::pair<unsigned, const TargetRegisterClass *> 4139 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 4140 StringRef Constraint, 4141 MVT VT) const { 4142 // First, see if this is a constraint that directly corresponds to a 4143 // RISCV register class. 4144 if (Constraint.size() == 1) { 4145 switch (Constraint[0]) { 4146 case 'r': 4147 return std::make_pair(0U, &RISCV::GPRRegClass); 4148 case 'f': 4149 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 4150 return std::make_pair(0U, &RISCV::FPR16RegClass); 4151 if (Subtarget.hasStdExtF() && VT == MVT::f32) 4152 return std::make_pair(0U, &RISCV::FPR32RegClass); 4153 if (Subtarget.hasStdExtD() && VT == MVT::f64) 4154 return std::make_pair(0U, &RISCV::FPR64RegClass); 4155 break; 4156 default: 4157 break; 4158 } 4159 } 4160 4161 // Clang will correctly decode the usage of register name aliases into their 4162 // official names. However, other frontends like `rustc` do not. This allows 4163 // users of these frontends to use the ABI names for registers in LLVM-style 4164 // register constraints. 4165 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 4166 .Case("{zero}", RISCV::X0) 4167 .Case("{ra}", RISCV::X1) 4168 .Case("{sp}", RISCV::X2) 4169 .Case("{gp}", RISCV::X3) 4170 .Case("{tp}", RISCV::X4) 4171 .Case("{t0}", RISCV::X5) 4172 .Case("{t1}", RISCV::X6) 4173 .Case("{t2}", RISCV::X7) 4174 .Cases("{s0}", "{fp}", RISCV::X8) 4175 .Case("{s1}", RISCV::X9) 4176 .Case("{a0}", RISCV::X10) 4177 .Case("{a1}", RISCV::X11) 4178 .Case("{a2}", RISCV::X12) 4179 .Case("{a3}", RISCV::X13) 4180 .Case("{a4}", RISCV::X14) 4181 .Case("{a5}", RISCV::X15) 4182 .Case("{a6}", RISCV::X16) 4183 .Case("{a7}", RISCV::X17) 4184 .Case("{s2}", RISCV::X18) 4185 .Case("{s3}", RISCV::X19) 4186 .Case("{s4}", RISCV::X20) 4187 .Case("{s5}", RISCV::X21) 4188 .Case("{s6}", RISCV::X22) 4189 .Case("{s7}", RISCV::X23) 4190 .Case("{s8}", RISCV::X24) 4191 .Case("{s9}", RISCV::X25) 4192 .Case("{s10}", RISCV::X26) 4193 .Case("{s11}", RISCV::X27) 4194 .Case("{t3}", RISCV::X28) 4195 .Case("{t4}", RISCV::X29) 4196 .Case("{t5}", RISCV::X30) 4197 .Case("{t6}", RISCV::X31) 4198 .Default(RISCV::NoRegister); 4199 if (XRegFromAlias != RISCV::NoRegister) 4200 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 4201 4202 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 4203 // TableGen record rather than the AsmName to choose registers for InlineAsm 4204 // constraints, plus we want to match those names to the widest floating point 4205 // register type available, manually select floating point registers here. 4206 // 4207 // The second case is the ABI name of the register, so that frontends can also 4208 // use the ABI names in register constraint lists. 4209 if (Subtarget.hasStdExtF()) { 4210 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 4211 .Cases("{f0}", "{ft0}", RISCV::F0_F) 4212 .Cases("{f1}", "{ft1}", RISCV::F1_F) 4213 .Cases("{f2}", "{ft2}", RISCV::F2_F) 4214 .Cases("{f3}", "{ft3}", RISCV::F3_F) 4215 .Cases("{f4}", "{ft4}", RISCV::F4_F) 4216 .Cases("{f5}", "{ft5}", RISCV::F5_F) 4217 .Cases("{f6}", "{ft6}", RISCV::F6_F) 4218 .Cases("{f7}", "{ft7}", RISCV::F7_F) 4219 .Cases("{f8}", "{fs0}", RISCV::F8_F) 4220 .Cases("{f9}", "{fs1}", RISCV::F9_F) 4221 .Cases("{f10}", "{fa0}", RISCV::F10_F) 4222 .Cases("{f11}", "{fa1}", RISCV::F11_F) 4223 .Cases("{f12}", "{fa2}", RISCV::F12_F) 4224 .Cases("{f13}", "{fa3}", RISCV::F13_F) 4225 .Cases("{f14}", "{fa4}", RISCV::F14_F) 4226 .Cases("{f15}", "{fa5}", RISCV::F15_F) 4227 .Cases("{f16}", "{fa6}", RISCV::F16_F) 4228 .Cases("{f17}", "{fa7}", RISCV::F17_F) 4229 .Cases("{f18}", "{fs2}", RISCV::F18_F) 4230 .Cases("{f19}", "{fs3}", RISCV::F19_F) 4231 .Cases("{f20}", "{fs4}", RISCV::F20_F) 4232 .Cases("{f21}", "{fs5}", RISCV::F21_F) 4233 .Cases("{f22}", "{fs6}", RISCV::F22_F) 4234 .Cases("{f23}", "{fs7}", RISCV::F23_F) 4235 .Cases("{f24}", "{fs8}", RISCV::F24_F) 4236 .Cases("{f25}", "{fs9}", RISCV::F25_F) 4237 .Cases("{f26}", "{fs10}", RISCV::F26_F) 4238 .Cases("{f27}", "{fs11}", RISCV::F27_F) 4239 .Cases("{f28}", "{ft8}", RISCV::F28_F) 4240 .Cases("{f29}", "{ft9}", RISCV::F29_F) 4241 .Cases("{f30}", "{ft10}", RISCV::F30_F) 4242 .Cases("{f31}", "{ft11}", RISCV::F31_F) 4243 .Default(RISCV::NoRegister); 4244 if (FReg != RISCV::NoRegister) { 4245 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 4246 if (Subtarget.hasStdExtD()) { 4247 unsigned RegNo = FReg - RISCV::F0_F; 4248 unsigned DReg = RISCV::F0_D + RegNo; 4249 return std::make_pair(DReg, &RISCV::FPR64RegClass); 4250 } 4251 return std::make_pair(FReg, &RISCV::FPR32RegClass); 4252 } 4253 } 4254 4255 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 4256 } 4257 4258 unsigned 4259 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 4260 // Currently only support length 1 constraints. 4261 if (ConstraintCode.size() == 1) { 4262 switch (ConstraintCode[0]) { 4263 case 'A': 4264 return InlineAsm::Constraint_A; 4265 default: 4266 break; 4267 } 4268 } 4269 4270 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 4271 } 4272 4273 void RISCVTargetLowering::LowerAsmOperandForConstraint( 4274 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 4275 SelectionDAG &DAG) const { 4276 // Currently only support length 1 constraints. 4277 if (Constraint.length() == 1) { 4278 switch (Constraint[0]) { 4279 case 'I': 4280 // Validate & create a 12-bit signed immediate operand. 4281 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4282 uint64_t CVal = C->getSExtValue(); 4283 if (isInt<12>(CVal)) 4284 Ops.push_back( 4285 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 4286 } 4287 return; 4288 case 'J': 4289 // Validate & create an integer zero operand. 4290 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 4291 if (C->getZExtValue() == 0) 4292 Ops.push_back( 4293 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 4294 return; 4295 case 'K': 4296 // Validate & create a 5-bit unsigned immediate operand. 4297 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4298 uint64_t CVal = C->getZExtValue(); 4299 if (isUInt<5>(CVal)) 4300 Ops.push_back( 4301 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 4302 } 4303 return; 4304 default: 4305 break; 4306 } 4307 } 4308 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 4309 } 4310 4311 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 4312 Instruction *Inst, 4313 AtomicOrdering Ord) const { 4314 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 4315 return Builder.CreateFence(Ord); 4316 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 4317 return Builder.CreateFence(AtomicOrdering::Release); 4318 return nullptr; 4319 } 4320 4321 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 4322 Instruction *Inst, 4323 AtomicOrdering Ord) const { 4324 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 4325 return Builder.CreateFence(AtomicOrdering::Acquire); 4326 return nullptr; 4327 } 4328 4329 TargetLowering::AtomicExpansionKind 4330 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 4331 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 4332 // point operations can't be used in an lr/sc sequence without breaking the 4333 // forward-progress guarantee. 4334 if (AI->isFloatingPointOperation()) 4335 return AtomicExpansionKind::CmpXChg; 4336 4337 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 4338 if (Size == 8 || Size == 16) 4339 return AtomicExpansionKind::MaskedIntrinsic; 4340 return AtomicExpansionKind::None; 4341 } 4342 4343 static Intrinsic::ID 4344 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 4345 if (XLen == 32) { 4346 switch (BinOp) { 4347 default: 4348 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4349 case AtomicRMWInst::Xchg: 4350 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 4351 case AtomicRMWInst::Add: 4352 return Intrinsic::riscv_masked_atomicrmw_add_i32; 4353 case AtomicRMWInst::Sub: 4354 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 4355 case AtomicRMWInst::Nand: 4356 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 4357 case AtomicRMWInst::Max: 4358 return Intrinsic::riscv_masked_atomicrmw_max_i32; 4359 case AtomicRMWInst::Min: 4360 return Intrinsic::riscv_masked_atomicrmw_min_i32; 4361 case AtomicRMWInst::UMax: 4362 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 4363 case AtomicRMWInst::UMin: 4364 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 4365 } 4366 } 4367 4368 if (XLen == 64) { 4369 switch (BinOp) { 4370 default: 4371 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4372 case AtomicRMWInst::Xchg: 4373 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 4374 case AtomicRMWInst::Add: 4375 return Intrinsic::riscv_masked_atomicrmw_add_i64; 4376 case AtomicRMWInst::Sub: 4377 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 4378 case AtomicRMWInst::Nand: 4379 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 4380 case AtomicRMWInst::Max: 4381 return Intrinsic::riscv_masked_atomicrmw_max_i64; 4382 case AtomicRMWInst::Min: 4383 return Intrinsic::riscv_masked_atomicrmw_min_i64; 4384 case AtomicRMWInst::UMax: 4385 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 4386 case AtomicRMWInst::UMin: 4387 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 4388 } 4389 } 4390 4391 llvm_unreachable("Unexpected XLen\n"); 4392 } 4393 4394 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 4395 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 4396 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 4397 unsigned XLen = Subtarget.getXLen(); 4398 Value *Ordering = 4399 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 4400 Type *Tys[] = {AlignedAddr->getType()}; 4401 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 4402 AI->getModule(), 4403 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 4404 4405 if (XLen == 64) { 4406 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 4407 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 4408 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 4409 } 4410 4411 Value *Result; 4412 4413 // Must pass the shift amount needed to sign extend the loaded value prior 4414 // to performing a signed comparison for min/max. ShiftAmt is the number of 4415 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 4416 // is the number of bits to left+right shift the value in order to 4417 // sign-extend. 4418 if (AI->getOperation() == AtomicRMWInst::Min || 4419 AI->getOperation() == AtomicRMWInst::Max) { 4420 const DataLayout &DL = AI->getModule()->getDataLayout(); 4421 unsigned ValWidth = 4422 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 4423 Value *SextShamt = 4424 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 4425 Result = Builder.CreateCall(LrwOpScwLoop, 4426 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 4427 } else { 4428 Result = 4429 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 4430 } 4431 4432 if (XLen == 64) 4433 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 4434 return Result; 4435 } 4436 4437 TargetLowering::AtomicExpansionKind 4438 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 4439 AtomicCmpXchgInst *CI) const { 4440 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 4441 if (Size == 8 || Size == 16) 4442 return AtomicExpansionKind::MaskedIntrinsic; 4443 return AtomicExpansionKind::None; 4444 } 4445 4446 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 4447 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 4448 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 4449 unsigned XLen = Subtarget.getXLen(); 4450 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 4451 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 4452 if (XLen == 64) { 4453 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 4454 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 4455 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 4456 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 4457 } 4458 Type *Tys[] = {AlignedAddr->getType()}; 4459 Function *MaskedCmpXchg = 4460 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 4461 Value *Result = Builder.CreateCall( 4462 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 4463 if (XLen == 64) 4464 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 4465 return Result; 4466 } 4467 4468 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 4469 EVT VT) const { 4470 VT = VT.getScalarType(); 4471 4472 if (!VT.isSimple()) 4473 return false; 4474 4475 switch (VT.getSimpleVT().SimpleTy) { 4476 case MVT::f16: 4477 return Subtarget.hasStdExtZfh(); 4478 case MVT::f32: 4479 return Subtarget.hasStdExtF(); 4480 case MVT::f64: 4481 return Subtarget.hasStdExtD(); 4482 default: 4483 break; 4484 } 4485 4486 return false; 4487 } 4488 4489 Register RISCVTargetLowering::getExceptionPointerRegister( 4490 const Constant *PersonalityFn) const { 4491 return RISCV::X10; 4492 } 4493 4494 Register RISCVTargetLowering::getExceptionSelectorRegister( 4495 const Constant *PersonalityFn) const { 4496 return RISCV::X11; 4497 } 4498 4499 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 4500 // Return false to suppress the unnecessary extensions if the LibCall 4501 // arguments or return value is f32 type for LP64 ABI. 4502 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 4503 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 4504 return false; 4505 4506 return true; 4507 } 4508 4509 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 4510 if (Subtarget.is64Bit() && Type == MVT::i32) 4511 return true; 4512 4513 return IsSigned; 4514 } 4515 4516 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 4517 SDValue C) const { 4518 // Check integral scalar types. 4519 if (VT.isScalarInteger()) { 4520 // Omit the optimization if the sub target has the M extension and the data 4521 // size exceeds XLen. 4522 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 4523 return false; 4524 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 4525 // Break the MUL to a SLLI and an ADD/SUB. 4526 const APInt &Imm = ConstNode->getAPIntValue(); 4527 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 4528 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 4529 return true; 4530 // Omit the following optimization if the sub target has the M extension 4531 // and the data size >= XLen. 4532 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 4533 return false; 4534 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 4535 // a pair of LUI/ADDI. 4536 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 4537 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 4538 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 4539 (1 - ImmS).isPowerOf2()) 4540 return true; 4541 } 4542 } 4543 } 4544 4545 return false; 4546 } 4547 4548 #define GET_REGISTER_MATCHER 4549 #include "RISCVGenAsmMatcher.inc" 4550 4551 Register 4552 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 4553 const MachineFunction &MF) const { 4554 Register Reg = MatchRegisterAltName(RegName); 4555 if (Reg == RISCV::NoRegister) 4556 Reg = MatchRegisterName(RegName); 4557 if (Reg == RISCV::NoRegister) 4558 report_fatal_error( 4559 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 4560 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 4561 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 4562 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 4563 StringRef(RegName) + "\".")); 4564 return Reg; 4565 } 4566 4567 namespace llvm { 4568 namespace RISCVVIntrinsicsTable { 4569 4570 #define GET_RISCVVIntrinsicsTable_IMPL 4571 #include "RISCVGenSearchableTables.inc" 4572 4573 } // namespace RISCVVIntrinsicsTable 4574 4575 namespace RISCVZvlssegTable { 4576 4577 #define GET_RISCVZvlssegTable_IMPL 4578 #include "RISCVGenSearchableTables.inc" 4579 4580 } // namespace RISCVZvlssegTable 4581 } // namespace llvm 4582