1 //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Subclass of MipsTargetLowering specialized for mips32/64. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MipsSEISelLowering.h" 14 #include "MipsMachineFunction.h" 15 #include "MipsRegisterInfo.h" 16 #include "MipsSubtarget.h" 17 #include "llvm/ADT/APInt.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/CodeGen/CallingConvLower.h" 21 #include "llvm/CodeGen/ISDOpcodes.h" 22 #include "llvm/CodeGen/MachineBasicBlock.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineMemOperand.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/SelectionDAG.h" 29 #include "llvm/CodeGen/SelectionDAGNodes.h" 30 #include "llvm/CodeGen/TargetInstrInfo.h" 31 #include "llvm/CodeGen/TargetSubtargetInfo.h" 32 #include "llvm/CodeGen/ValueTypes.h" 33 #include "llvm/CodeGenTypes/MachineValueType.h" 34 #include "llvm/IR/DebugLoc.h" 35 #include "llvm/IR/Intrinsics.h" 36 #include "llvm/IR/IntrinsicsMips.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Support/MathExtras.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include "llvm/TargetParser/Triple.h" 44 #include <algorithm> 45 #include <cassert> 46 #include <cstdint> 47 #include <iterator> 48 #include <utility> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "mips-isel" 53 54 static cl::opt<bool> 55 UseMipsTailCalls("mips-tail-calls", cl::Hidden, 56 cl::desc("MIPS: permit tail calls."), cl::init(false)); 57 58 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 59 cl::desc("Expand double precision loads and " 60 "stores to their single precision " 61 "counterparts")); 62 63 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, 64 const MipsSubtarget &STI) 65 : MipsTargetLowering(TM, STI) { 66 // Set up the register classes 67 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 68 69 if (Subtarget.isGP64bit()) 70 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 71 72 if (Subtarget.hasDSP() || Subtarget.hasMSA()) { 73 // Expand all truncating stores and extending loads. 74 for (MVT VT0 : MVT::fixedlen_vector_valuetypes()) { 75 for (MVT VT1 : MVT::fixedlen_vector_valuetypes()) { 76 setTruncStoreAction(VT0, VT1, Expand); 77 setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); 78 setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); 79 setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); 80 } 81 } 82 } 83 84 if (Subtarget.hasDSP()) { 85 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 86 87 for (const auto &VecTy : VecTys) { 88 addRegisterClass(VecTy, &Mips::DSPRRegClass); 89 90 // Expand all builtin opcodes. 91 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 92 setOperationAction(Opc, VecTy, Expand); 93 94 setOperationAction(ISD::ADD, VecTy, Legal); 95 setOperationAction(ISD::SUB, VecTy, Legal); 96 setOperationAction(ISD::LOAD, VecTy, Legal); 97 setOperationAction(ISD::STORE, VecTy, Legal); 98 setOperationAction(ISD::BITCAST, VecTy, Legal); 99 } 100 101 setTargetDAGCombine( 102 {ISD::SHL, ISD::SRA, ISD::SRL, ISD::SETCC, ISD::VSELECT}); 103 104 if (Subtarget.hasMips32r2()) { 105 setOperationAction(ISD::ADDC, MVT::i32, Legal); 106 setOperationAction(ISD::ADDE, MVT::i32, Legal); 107 } 108 } 109 110 if (Subtarget.hasDSPR2()) 111 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 112 113 if (Subtarget.hasMSA()) { 114 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 115 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 116 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 117 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 118 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 119 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 120 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 121 122 // f16 is a storage-only type, always promote it to f32. 123 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass); 124 setOperationAction(ISD::SETCC, MVT::f16, Promote); 125 setOperationAction(ISD::BR_CC, MVT::f16, Promote); 126 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); 127 setOperationAction(ISD::SELECT, MVT::f16, Promote); 128 setOperationAction(ISD::FADD, MVT::f16, Promote); 129 setOperationAction(ISD::FSUB, MVT::f16, Promote); 130 setOperationAction(ISD::FMUL, MVT::f16, Promote); 131 setOperationAction(ISD::FDIV, MVT::f16, Promote); 132 setOperationAction(ISD::FREM, MVT::f16, Promote); 133 setOperationAction(ISD::FMA, MVT::f16, Promote); 134 setOperationAction(ISD::FNEG, MVT::f16, Promote); 135 setOperationAction(ISD::FABS, MVT::f16, Promote); 136 setOperationAction(ISD::FCEIL, MVT::f16, Promote); 137 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); 138 setOperationAction(ISD::FCOS, MVT::f16, Promote); 139 setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); 140 setOperationAction(ISD::FFLOOR, MVT::f16, Promote); 141 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); 142 setOperationAction(ISD::FPOW, MVT::f16, Promote); 143 setOperationAction(ISD::FPOWI, MVT::f16, Promote); 144 setOperationAction(ISD::FRINT, MVT::f16, Promote); 145 setOperationAction(ISD::FSIN, MVT::f16, Promote); 146 setOperationAction(ISD::FSINCOS, MVT::f16, Promote); 147 setOperationAction(ISD::FSQRT, MVT::f16, Promote); 148 setOperationAction(ISD::FEXP, MVT::f16, Promote); 149 setOperationAction(ISD::FEXP2, MVT::f16, Promote); 150 setOperationAction(ISD::FLOG, MVT::f16, Promote); 151 setOperationAction(ISD::FLOG2, MVT::f16, Promote); 152 setOperationAction(ISD::FLOG10, MVT::f16, Promote); 153 setOperationAction(ISD::FROUND, MVT::f16, Promote); 154 setOperationAction(ISD::FTRUNC, MVT::f16, Promote); 155 setOperationAction(ISD::FMINNUM, MVT::f16, Promote); 156 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); 157 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); 158 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); 159 160 setTargetDAGCombine({ISD::AND, ISD::OR, ISD::SRA, ISD::VSELECT, ISD::XOR}); 161 } 162 163 if (!Subtarget.useSoftFloat()) { 164 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 165 166 // When dealing with single precision only, use libcalls 167 if (!Subtarget.isSingleFloat()) { 168 if (Subtarget.isFP64bit()) 169 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 170 else 171 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 172 } 173 } 174 175 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 176 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 177 setOperationAction(ISD::MULHS, MVT::i32, Custom); 178 setOperationAction(ISD::MULHU, MVT::i32, Custom); 179 180 if (Subtarget.hasCnMips()) 181 setOperationAction(ISD::MUL, MVT::i64, Legal); 182 else if (Subtarget.isGP64bit()) 183 setOperationAction(ISD::MUL, MVT::i64, Custom); 184 185 if (Subtarget.isGP64bit()) { 186 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); 187 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); 188 setOperationAction(ISD::MULHS, MVT::i64, Custom); 189 setOperationAction(ISD::MULHU, MVT::i64, Custom); 190 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 191 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 192 } 193 194 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 195 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 196 197 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 198 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 199 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 200 if (Subtarget.hasMips32r6()) { 201 setOperationAction(ISD::LOAD, MVT::i32, Legal); 202 setOperationAction(ISD::STORE, MVT::i32, Legal); 203 } else { 204 setOperationAction(ISD::LOAD, MVT::i32, Custom); 205 setOperationAction(ISD::STORE, MVT::i32, Custom); 206 } 207 208 setTargetDAGCombine(ISD::MUL); 209 210 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 211 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 212 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 213 214 if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() && 215 !Subtarget.hasMips64()) { 216 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 217 } 218 219 if (NoDPLoadStore) { 220 setOperationAction(ISD::LOAD, MVT::f64, Custom); 221 setOperationAction(ISD::STORE, MVT::f64, Custom); 222 } 223 224 if (Subtarget.hasMips32r6()) { 225 // MIPS32r6 replaces the accumulator-based multiplies with a three register 226 // instruction 227 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 228 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 229 setOperationAction(ISD::MUL, MVT::i32, Legal); 230 setOperationAction(ISD::MULHS, MVT::i32, Legal); 231 setOperationAction(ISD::MULHU, MVT::i32, Legal); 232 233 // MIPS32r6 replaces the accumulator-based division/remainder with separate 234 // three register division and remainder instructions. 235 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 236 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 237 setOperationAction(ISD::SDIV, MVT::i32, Legal); 238 setOperationAction(ISD::UDIV, MVT::i32, Legal); 239 setOperationAction(ISD::SREM, MVT::i32, Legal); 240 setOperationAction(ISD::UREM, MVT::i32, Legal); 241 242 // MIPS32r6 replaces conditional moves with an equivalent that removes the 243 // need for three GPR read ports. 244 setOperationAction(ISD::SETCC, MVT::i32, Legal); 245 setOperationAction(ISD::SELECT, MVT::i32, Legal); 246 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 247 248 setOperationAction(ISD::SETCC, MVT::f32, Legal); 249 setOperationAction(ISD::SELECT, MVT::f32, Legal); 250 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 251 252 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6"); 253 setOperationAction(ISD::SETCC, MVT::f64, Legal); 254 setOperationAction(ISD::SELECT, MVT::f64, Custom); 255 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 256 257 setOperationAction(ISD::BRCOND, MVT::Other, Legal); 258 259 // Floating point > and >= are supported via < and <= 260 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 261 setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); 262 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 263 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 264 265 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 266 setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); 267 setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); 268 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 269 } 270 271 if (Subtarget.hasMips64r6()) { 272 // MIPS64r6 replaces the accumulator-based multiplies with a three register 273 // instruction 274 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 275 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 276 setOperationAction(ISD::MUL, MVT::i64, Legal); 277 setOperationAction(ISD::MULHS, MVT::i64, Legal); 278 setOperationAction(ISD::MULHU, MVT::i64, Legal); 279 280 // MIPS32r6 replaces the accumulator-based division/remainder with separate 281 // three register division and remainder instructions. 282 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 283 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 284 setOperationAction(ISD::SDIV, MVT::i64, Legal); 285 setOperationAction(ISD::UDIV, MVT::i64, Legal); 286 setOperationAction(ISD::SREM, MVT::i64, Legal); 287 setOperationAction(ISD::UREM, MVT::i64, Legal); 288 289 // MIPS64r6 replaces conditional moves with an equivalent that removes the 290 // need for three GPR read ports. 291 setOperationAction(ISD::SETCC, MVT::i64, Legal); 292 setOperationAction(ISD::SELECT, MVT::i64, Legal); 293 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 294 } 295 296 computeRegisterProperties(Subtarget.getRegisterInfo()); 297 } 298 299 const MipsTargetLowering * 300 llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, 301 const MipsSubtarget &STI) { 302 return new MipsSETargetLowering(TM, STI); 303 } 304 305 const TargetRegisterClass * 306 MipsSETargetLowering::getRepRegClassFor(MVT VT) const { 307 if (VT == MVT::Untyped) 308 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; 309 310 return TargetLowering::getRepRegClassFor(VT); 311 } 312 313 // Enable MSA support for the given integer type and Register class. 314 void MipsSETargetLowering:: 315 addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 316 addRegisterClass(Ty, RC); 317 318 // Expand all builtin opcodes. 319 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 320 setOperationAction(Opc, Ty, Expand); 321 322 setOperationAction(ISD::BITCAST, Ty, Legal); 323 setOperationAction(ISD::LOAD, Ty, Legal); 324 setOperationAction(ISD::STORE, Ty, Legal); 325 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 326 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 327 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 328 setOperationAction(ISD::UNDEF, Ty, Legal); 329 330 setOperationAction(ISD::ADD, Ty, Legal); 331 setOperationAction(ISD::AND, Ty, Legal); 332 setOperationAction(ISD::CTLZ, Ty, Legal); 333 setOperationAction(ISD::CTPOP, Ty, Legal); 334 setOperationAction(ISD::MUL, Ty, Legal); 335 setOperationAction(ISD::OR, Ty, Legal); 336 setOperationAction(ISD::SDIV, Ty, Legal); 337 setOperationAction(ISD::SREM, Ty, Legal); 338 setOperationAction(ISD::SHL, Ty, Legal); 339 setOperationAction(ISD::SRA, Ty, Legal); 340 setOperationAction(ISD::SRL, Ty, Legal); 341 setOperationAction(ISD::SUB, Ty, Legal); 342 setOperationAction(ISD::SMAX, Ty, Legal); 343 setOperationAction(ISD::SMIN, Ty, Legal); 344 setOperationAction(ISD::UDIV, Ty, Legal); 345 setOperationAction(ISD::UREM, Ty, Legal); 346 setOperationAction(ISD::UMAX, Ty, Legal); 347 setOperationAction(ISD::UMIN, Ty, Legal); 348 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 349 setOperationAction(ISD::VSELECT, Ty, Legal); 350 setOperationAction(ISD::XOR, Ty, Legal); 351 352 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { 353 setOperationAction(ISD::FP_TO_SINT, Ty, Legal); 354 setOperationAction(ISD::FP_TO_UINT, Ty, Legal); 355 setOperationAction(ISD::SINT_TO_FP, Ty, Legal); 356 setOperationAction(ISD::UINT_TO_FP, Ty, Legal); 357 } 358 359 setOperationAction(ISD::SETCC, Ty, Legal); 360 setCondCodeAction(ISD::SETNE, Ty, Expand); 361 setCondCodeAction(ISD::SETGE, Ty, Expand); 362 setCondCodeAction(ISD::SETGT, Ty, Expand); 363 setCondCodeAction(ISD::SETUGE, Ty, Expand); 364 setCondCodeAction(ISD::SETUGT, Ty, Expand); 365 } 366 367 // Enable MSA support for the given floating-point type and Register class. 368 void MipsSETargetLowering:: 369 addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 370 addRegisterClass(Ty, RC); 371 372 // Expand all builtin opcodes. 373 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 374 setOperationAction(Opc, Ty, Expand); 375 376 setOperationAction(ISD::LOAD, Ty, Legal); 377 setOperationAction(ISD::STORE, Ty, Legal); 378 setOperationAction(ISD::BITCAST, Ty, Legal); 379 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 380 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 381 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 382 383 if (Ty != MVT::v8f16) { 384 setOperationAction(ISD::FABS, Ty, Legal); 385 setOperationAction(ISD::FADD, Ty, Legal); 386 setOperationAction(ISD::FDIV, Ty, Legal); 387 setOperationAction(ISD::FEXP2, Ty, Legal); 388 setOperationAction(ISD::FLOG2, Ty, Legal); 389 setOperationAction(ISD::FMA, Ty, Legal); 390 setOperationAction(ISD::FMUL, Ty, Legal); 391 setOperationAction(ISD::FRINT, Ty, Legal); 392 setOperationAction(ISD::FSQRT, Ty, Legal); 393 setOperationAction(ISD::FSUB, Ty, Legal); 394 setOperationAction(ISD::VSELECT, Ty, Legal); 395 396 setOperationAction(ISD::SETCC, Ty, Legal); 397 setCondCodeAction(ISD::SETOGE, Ty, Expand); 398 setCondCodeAction(ISD::SETOGT, Ty, Expand); 399 setCondCodeAction(ISD::SETUGE, Ty, Expand); 400 setCondCodeAction(ISD::SETUGT, Ty, Expand); 401 setCondCodeAction(ISD::SETGE, Ty, Expand); 402 setCondCodeAction(ISD::SETGT, Ty, Expand); 403 } 404 } 405 406 SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 407 if(!Subtarget.hasMips32r6()) 408 return MipsTargetLowering::LowerOperation(Op, DAG); 409 410 EVT ResTy = Op->getValueType(0); 411 SDLoc DL(Op); 412 413 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the 414 // floating point register are undefined. Not really an issue as sel.d, which 415 // is produced from an FSELECT node, only looks at bit 0. 416 SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0)); 417 return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1), 418 Op->getOperand(2)); 419 } 420 421 bool MipsSETargetLowering::allowsMisalignedMemoryAccesses( 422 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { 423 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 424 425 if (Subtarget.systemSupportsUnalignedAccess()) { 426 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's 427 // implementation defined whether this is handled by hardware, software, or 428 // a hybrid of the two but it's expected that most implementations will 429 // handle the majority of cases in hardware. 430 if (Fast) 431 *Fast = 1; 432 return true; 433 } else if (Subtarget.hasMips32r6()) { 434 return false; 435 } 436 437 switch (SVT) { 438 case MVT::i64: 439 case MVT::i32: 440 if (Fast) 441 *Fast = 1; 442 return true; 443 default: 444 return false; 445 } 446 } 447 448 SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 449 SelectionDAG &DAG) const { 450 switch(Op.getOpcode()) { 451 case ISD::LOAD: return lowerLOAD(Op, DAG); 452 case ISD::STORE: return lowerSTORE(Op, DAG); 453 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 454 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 455 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 456 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 457 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 458 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 459 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 460 DAG); 461 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 462 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 463 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 464 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 465 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 466 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 467 case ISD::SELECT: return lowerSELECT(Op, DAG); 468 case ISD::BITCAST: return lowerBITCAST(Op, DAG); 469 } 470 471 return MipsTargetLowering::LowerOperation(Op, DAG); 472 } 473 474 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 475 // 476 // Performs the following transformations: 477 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 478 // sign/zero-extension is completely overwritten by the new one performed by 479 // the ISD::AND. 480 // - Removes redundant zero extensions performed by an ISD::AND. 481 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 482 TargetLowering::DAGCombinerInfo &DCI, 483 const MipsSubtarget &Subtarget) { 484 if (!Subtarget.hasMSA()) 485 return SDValue(); 486 487 SDValue Op0 = N->getOperand(0); 488 SDValue Op1 = N->getOperand(1); 489 unsigned Op0Opcode = Op0->getOpcode(); 490 491 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 492 // where $d + 1 == 2^n and n == 32 493 // or $d + 1 == 2^n and n <= 32 and ZExt 494 // -> (MipsVExtractZExt $a, $b, $c) 495 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 496 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 497 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 498 499 if (!Mask) 500 return SDValue(); 501 502 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 503 504 if (Log2IfPositive <= 0) 505 return SDValue(); // Mask+1 is not a power of 2 506 507 SDValue Op0Op2 = Op0->getOperand(2); 508 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 509 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 510 unsigned Log2 = Log2IfPositive; 511 512 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 513 Log2 == ExtendTySize) { 514 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 515 return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), 516 Op0->getVTList(), 517 ArrayRef(Ops, Op0->getNumOperands())); 518 } 519 } 520 521 return SDValue(); 522 } 523 524 // Determine if the specified node is a constant vector splat. 525 // 526 // Returns true and sets Imm if: 527 // * N is a ISD::BUILD_VECTOR representing a constant splat 528 // 529 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The 530 // differences are that it assumes the MSA has already been checked and the 531 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and 532 // must not be in order for binsri.d to be selectable). 533 static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { 534 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); 535 536 if (!Node) 537 return false; 538 539 APInt SplatValue, SplatUndef; 540 unsigned SplatBitSize; 541 bool HasAnyUndefs; 542 543 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 544 8, !IsLittleEndian)) 545 return false; 546 547 Imm = SplatValue; 548 549 return true; 550 } 551 552 // Test whether the given node is an all-ones build_vector. 553 static bool isVectorAllOnes(SDValue N) { 554 // Look through bitcasts. Endianness doesn't matter because we are looking 555 // for an all-ones value. 556 if (N->getOpcode() == ISD::BITCAST) 557 N = N->getOperand(0); 558 559 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 560 561 if (!BVN) 562 return false; 563 564 APInt SplatValue, SplatUndef; 565 unsigned SplatBitSize; 566 bool HasAnyUndefs; 567 568 // Endianness doesn't matter in this context because we are looking for 569 // an all-ones value. 570 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) 571 return SplatValue.isAllOnes(); 572 573 return false; 574 } 575 576 // Test whether N is the bitwise inverse of OfNode. 577 static bool isBitwiseInverse(SDValue N, SDValue OfNode) { 578 if (N->getOpcode() != ISD::XOR) 579 return false; 580 581 if (isVectorAllOnes(N->getOperand(0))) 582 return N->getOperand(1) == OfNode; 583 584 if (isVectorAllOnes(N->getOperand(1))) 585 return N->getOperand(0) == OfNode; 586 587 return false; 588 } 589 590 // Perform combines where ISD::OR is the root node. 591 // 592 // Performs the following transformations: 593 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) 594 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit 595 // vector type. 596 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 597 TargetLowering::DAGCombinerInfo &DCI, 598 const MipsSubtarget &Subtarget) { 599 if (!Subtarget.hasMSA()) 600 return SDValue(); 601 602 EVT Ty = N->getValueType(0); 603 604 if (!Ty.is128BitVector()) 605 return SDValue(); 606 607 SDValue Op0 = N->getOperand(0); 608 SDValue Op1 = N->getOperand(1); 609 610 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { 611 SDValue Op0Op0 = Op0->getOperand(0); 612 SDValue Op0Op1 = Op0->getOperand(1); 613 SDValue Op1Op0 = Op1->getOperand(0); 614 SDValue Op1Op1 = Op1->getOperand(1); 615 bool IsLittleEndian = !Subtarget.isLittle(); 616 617 SDValue IfSet, IfClr, Cond; 618 bool IsConstantMask = false; 619 APInt Mask, InvMask; 620 621 // If Op0Op0 is an appropriate mask, try to find it's inverse in either 622 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while 623 // looking. 624 // IfClr will be set if we find a valid match. 625 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { 626 Cond = Op0Op0; 627 IfSet = Op0Op1; 628 629 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 630 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 631 IfClr = Op1Op1; 632 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 633 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 634 IfClr = Op1Op0; 635 636 IsConstantMask = true; 637 } 638 639 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same 640 // thing again using this mask. 641 // IfClr will be set if we find a valid match. 642 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { 643 Cond = Op0Op1; 644 IfSet = Op0Op0; 645 646 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 647 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 648 IfClr = Op1Op1; 649 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 650 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 651 IfClr = Op1Op0; 652 653 IsConstantMask = true; 654 } 655 656 // If IfClr is not yet set, try looking for a non-constant match. 657 // IfClr will be set if we find a valid match amongst the eight 658 // possibilities. 659 if (!IfClr.getNode()) { 660 if (isBitwiseInverse(Op0Op0, Op1Op0)) { 661 Cond = Op1Op0; 662 IfSet = Op1Op1; 663 IfClr = Op0Op1; 664 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { 665 Cond = Op1Op0; 666 IfSet = Op1Op1; 667 IfClr = Op0Op0; 668 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { 669 Cond = Op1Op1; 670 IfSet = Op1Op0; 671 IfClr = Op0Op1; 672 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { 673 Cond = Op1Op1; 674 IfSet = Op1Op0; 675 IfClr = Op0Op0; 676 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { 677 Cond = Op0Op0; 678 IfSet = Op0Op1; 679 IfClr = Op1Op1; 680 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { 681 Cond = Op0Op0; 682 IfSet = Op0Op1; 683 IfClr = Op1Op0; 684 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { 685 Cond = Op0Op1; 686 IfSet = Op0Op0; 687 IfClr = Op1Op1; 688 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { 689 Cond = Op0Op1; 690 IfSet = Op0Op0; 691 IfClr = Op1Op0; 692 } 693 } 694 695 // At this point, IfClr will be set if we have a valid match. 696 if (!IfClr.getNode()) 697 return SDValue(); 698 699 assert(Cond.getNode() && IfSet.getNode()); 700 701 // Fold degenerate cases. 702 if (IsConstantMask) { 703 if (Mask.isAllOnes()) 704 return IfSet; 705 else if (Mask == 0) 706 return IfClr; 707 } 708 709 // Transform the DAG into an equivalent VSELECT. 710 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr); 711 } 712 713 return SDValue(); 714 } 715 716 static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, 717 SelectionDAG &DAG, 718 const MipsSubtarget &Subtarget) { 719 // Estimate the number of operations the below transform will turn a 720 // constant multiply into. The number is approximately equal to the minimal 721 // number of powers of two that constant can be broken down to by adding 722 // or subtracting them. 723 // 724 // If we have taken more than 12[1] / 8[2] steps to attempt the 725 // optimization for a native sized value, it is more than likely that this 726 // optimization will make things worse. 727 // 728 // [1] MIPS64 requires 6 instructions at most to materialize any constant, 729 // multiplication requires at least 4 cycles, but another cycle (or two) 730 // to retrieve the result from the HI/LO registers. 731 // 732 // [2] For MIPS32, more than 8 steps is expensive as the constant could be 733 // materialized in 2 instructions, multiplication requires at least 4 734 // cycles, but another cycle (or two) to retrieve the result from the 735 // HI/LO registers. 736 // 737 // TODO: 738 // - MaxSteps needs to consider the `VT` of the constant for the current 739 // target. 740 // - Consider to perform this optimization after type legalization. 741 // That allows to remove a workaround for types not supported natively. 742 // - Take in account `-Os, -Oz` flags because this optimization 743 // increases code size. 744 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12; 745 746 SmallVector<APInt, 16> WorkStack(1, C); 747 unsigned Steps = 0; 748 unsigned BitWidth = C.getBitWidth(); 749 750 while (!WorkStack.empty()) { 751 APInt Val = WorkStack.pop_back_val(); 752 753 if (Val == 0 || Val == 1) 754 continue; 755 756 if (Steps >= MaxSteps) 757 return false; 758 759 if (Val.isPowerOf2()) { 760 ++Steps; 761 continue; 762 } 763 764 APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); 765 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) 766 : APInt(BitWidth, 1) << C.ceilLogBase2(); 767 if ((Val - Floor).ule(Ceil - Val)) { 768 WorkStack.push_back(Floor); 769 WorkStack.push_back(Val - Floor); 770 } else { 771 WorkStack.push_back(Ceil); 772 WorkStack.push_back(Ceil - Val); 773 } 774 775 ++Steps; 776 } 777 778 // If the value being multiplied is not supported natively, we have to pay 779 // an additional legalization cost, conservatively assume an increase in the 780 // cost of 3 instructions per step. This values for this heuristic were 781 // determined experimentally. 782 unsigned RegisterSize = DAG.getTargetLoweringInfo() 783 .getRegisterType(*DAG.getContext(), VT) 784 .getSizeInBits(); 785 Steps *= (VT.getSizeInBits() != RegisterSize) * 3; 786 if (Steps > 27) 787 return false; 788 789 return true; 790 } 791 792 static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, 793 EVT ShiftTy, SelectionDAG &DAG) { 794 // Return 0. 795 if (C == 0) 796 return DAG.getConstant(0, DL, VT); 797 798 // Return x. 799 if (C == 1) 800 return X; 801 802 // If c is power of 2, return (shl x, log2(c)). 803 if (C.isPowerOf2()) 804 return DAG.getNode(ISD::SHL, DL, VT, X, 805 DAG.getConstant(C.logBase2(), DL, ShiftTy)); 806 807 unsigned BitWidth = C.getBitWidth(); 808 APInt Floor = APInt(BitWidth, 1) << C.logBase2(); 809 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) : 810 APInt(BitWidth, 1) << C.ceilLogBase2(); 811 812 // If |c - floor_c| <= |c - ceil_c|, 813 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 814 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 815 if ((C - Floor).ule(Ceil - C)) { 816 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 817 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 818 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 819 } 820 821 // If |c - floor_c| > |c - ceil_c|, 822 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 823 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 824 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 825 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 826 } 827 828 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 829 const TargetLowering::DAGCombinerInfo &DCI, 830 const MipsSETargetLowering *TL, 831 const MipsSubtarget &Subtarget) { 832 EVT VT = N->getValueType(0); 833 834 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 835 if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( 836 C->getAPIntValue(), VT, DAG, Subtarget)) 837 return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, 838 TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), 839 DAG); 840 841 return SDValue(N, 0); 842 } 843 844 static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 845 SelectionDAG &DAG, 846 const MipsSubtarget &Subtarget) { 847 // See if this is a vector splat immediate node. 848 APInt SplatValue, SplatUndef; 849 unsigned SplatBitSize; 850 bool HasAnyUndefs; 851 unsigned EltSize = Ty.getScalarSizeInBits(); 852 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 853 854 if (!Subtarget.hasDSP()) 855 return SDValue(); 856 857 if (!BV || 858 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 859 EltSize, !Subtarget.isLittle()) || 860 (SplatBitSize != EltSize) || 861 (SplatValue.getZExtValue() >= EltSize)) 862 return SDValue(); 863 864 SDLoc DL(N); 865 return DAG.getNode(Opc, DL, Ty, N->getOperand(0), 866 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32)); 867 } 868 869 static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 870 TargetLowering::DAGCombinerInfo &DCI, 871 const MipsSubtarget &Subtarget) { 872 EVT Ty = N->getValueType(0); 873 874 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 875 return SDValue(); 876 877 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 878 } 879 880 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 881 // constant splats into MipsISD::SHRA_DSP for DSPr2. 882 // 883 // Performs the following transformations: 884 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 885 // sign/zero-extension is completely overwritten by the new one performed by 886 // the ISD::SRA and ISD::SHL nodes. 887 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 888 // sequence. 889 // 890 // See performDSPShiftCombine for more information about the transformation 891 // used for DSPr2. 892 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 893 TargetLowering::DAGCombinerInfo &DCI, 894 const MipsSubtarget &Subtarget) { 895 EVT Ty = N->getValueType(0); 896 897 if (Subtarget.hasMSA()) { 898 SDValue Op0 = N->getOperand(0); 899 SDValue Op1 = N->getOperand(1); 900 901 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 902 // where $d + sizeof($c) == 32 903 // or $d + sizeof($c) <= 32 and SExt 904 // -> (MipsVExtractSExt $a, $b, $c) 905 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 906 SDValue Op0Op0 = Op0->getOperand(0); 907 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 908 909 if (!ShAmount) 910 return SDValue(); 911 912 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 913 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 914 return SDValue(); 915 916 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 917 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 918 919 if (TotalBits == 32 || 920 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 921 TotalBits <= 32)) { 922 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 923 Op0Op0->getOperand(2) }; 924 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), 925 Op0Op0->getVTList(), 926 ArrayRef(Ops, Op0Op0->getNumOperands())); 927 } 928 } 929 } 930 931 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) 932 return SDValue(); 933 934 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 935 } 936 937 938 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 939 TargetLowering::DAGCombinerInfo &DCI, 940 const MipsSubtarget &Subtarget) { 941 EVT Ty = N->getValueType(0); 942 943 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) 944 return SDValue(); 945 946 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 947 } 948 949 static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 950 bool IsV216 = (Ty == MVT::v2i16); 951 952 switch (CC) { 953 case ISD::SETEQ: 954 case ISD::SETNE: return true; 955 case ISD::SETLT: 956 case ISD::SETLE: 957 case ISD::SETGT: 958 case ISD::SETGE: return IsV216; 959 case ISD::SETULT: 960 case ISD::SETULE: 961 case ISD::SETUGT: 962 case ISD::SETUGE: return !IsV216; 963 default: return false; 964 } 965 } 966 967 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 968 EVT Ty = N->getValueType(0); 969 970 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 971 return SDValue(); 972 973 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 974 return SDValue(); 975 976 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 977 N->getOperand(1), N->getOperand(2)); 978 } 979 980 static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 981 EVT Ty = N->getValueType(0); 982 983 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) { 984 SDValue SetCC = N->getOperand(0); 985 986 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 987 return SDValue(); 988 989 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 990 SetCC.getOperand(0), SetCC.getOperand(1), 991 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 992 } 993 994 return SDValue(); 995 } 996 997 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 998 const MipsSubtarget &Subtarget) { 999 EVT Ty = N->getValueType(0); 1000 1001 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 1002 // Try the following combines: 1003 // (xor (or $a, $b), (build_vector allones)) 1004 // (xor (or $a, $b), (bitcast (build_vector allones))) 1005 SDValue Op0 = N->getOperand(0); 1006 SDValue Op1 = N->getOperand(1); 1007 SDValue NotOp; 1008 1009 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 1010 NotOp = Op1; 1011 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 1012 NotOp = Op0; 1013 else 1014 return SDValue(); 1015 1016 if (NotOp->getOpcode() == ISD::OR) 1017 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 1018 NotOp->getOperand(1)); 1019 } 1020 1021 return SDValue(); 1022 } 1023 1024 SDValue 1025 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1026 SelectionDAG &DAG = DCI.DAG; 1027 SDValue Val; 1028 1029 switch (N->getOpcode()) { 1030 case ISD::AND: 1031 Val = performANDCombine(N, DAG, DCI, Subtarget); 1032 break; 1033 case ISD::OR: 1034 Val = performORCombine(N, DAG, DCI, Subtarget); 1035 break; 1036 case ISD::MUL: 1037 return performMULCombine(N, DAG, DCI, this, Subtarget); 1038 case ISD::SHL: 1039 Val = performSHLCombine(N, DAG, DCI, Subtarget); 1040 break; 1041 case ISD::SRA: 1042 return performSRACombine(N, DAG, DCI, Subtarget); 1043 case ISD::SRL: 1044 return performSRLCombine(N, DAG, DCI, Subtarget); 1045 case ISD::VSELECT: 1046 return performVSELECTCombine(N, DAG); 1047 case ISD::XOR: 1048 Val = performXORCombine(N, DAG, Subtarget); 1049 break; 1050 case ISD::SETCC: 1051 Val = performSETCCCombine(N, DAG); 1052 break; 1053 } 1054 1055 if (Val.getNode()) { 1056 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; 1057 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n"; 1058 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n"); 1059 return Val; 1060 } 1061 1062 return MipsTargetLowering::PerformDAGCombine(N, DCI); 1063 } 1064 1065 MachineBasicBlock * 1066 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 1067 MachineBasicBlock *BB) const { 1068 switch (MI.getOpcode()) { 1069 default: 1070 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 1071 case Mips::BPOSGE32_PSEUDO: 1072 return emitBPOSGE32(MI, BB); 1073 case Mips::SNZ_B_PSEUDO: 1074 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 1075 case Mips::SNZ_H_PSEUDO: 1076 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 1077 case Mips::SNZ_W_PSEUDO: 1078 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 1079 case Mips::SNZ_D_PSEUDO: 1080 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 1081 case Mips::SNZ_V_PSEUDO: 1082 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 1083 case Mips::SZ_B_PSEUDO: 1084 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 1085 case Mips::SZ_H_PSEUDO: 1086 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 1087 case Mips::SZ_W_PSEUDO: 1088 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 1089 case Mips::SZ_D_PSEUDO: 1090 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 1091 case Mips::SZ_V_PSEUDO: 1092 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 1093 case Mips::COPY_FW_PSEUDO: 1094 return emitCOPY_FW(MI, BB); 1095 case Mips::COPY_FD_PSEUDO: 1096 return emitCOPY_FD(MI, BB); 1097 case Mips::INSERT_FW_PSEUDO: 1098 return emitINSERT_FW(MI, BB); 1099 case Mips::INSERT_FD_PSEUDO: 1100 return emitINSERT_FD(MI, BB); 1101 case Mips::INSERT_B_VIDX_PSEUDO: 1102 case Mips::INSERT_B_VIDX64_PSEUDO: 1103 return emitINSERT_DF_VIDX(MI, BB, 1, false); 1104 case Mips::INSERT_H_VIDX_PSEUDO: 1105 case Mips::INSERT_H_VIDX64_PSEUDO: 1106 return emitINSERT_DF_VIDX(MI, BB, 2, false); 1107 case Mips::INSERT_W_VIDX_PSEUDO: 1108 case Mips::INSERT_W_VIDX64_PSEUDO: 1109 return emitINSERT_DF_VIDX(MI, BB, 4, false); 1110 case Mips::INSERT_D_VIDX_PSEUDO: 1111 case Mips::INSERT_D_VIDX64_PSEUDO: 1112 return emitINSERT_DF_VIDX(MI, BB, 8, false); 1113 case Mips::INSERT_FW_VIDX_PSEUDO: 1114 case Mips::INSERT_FW_VIDX64_PSEUDO: 1115 return emitINSERT_DF_VIDX(MI, BB, 4, true); 1116 case Mips::INSERT_FD_VIDX_PSEUDO: 1117 case Mips::INSERT_FD_VIDX64_PSEUDO: 1118 return emitINSERT_DF_VIDX(MI, BB, 8, true); 1119 case Mips::FILL_FW_PSEUDO: 1120 return emitFILL_FW(MI, BB); 1121 case Mips::FILL_FD_PSEUDO: 1122 return emitFILL_FD(MI, BB); 1123 case Mips::FEXP2_W_1_PSEUDO: 1124 return emitFEXP2_W_1(MI, BB); 1125 case Mips::FEXP2_D_1_PSEUDO: 1126 return emitFEXP2_D_1(MI, BB); 1127 case Mips::ST_F16: 1128 return emitST_F16_PSEUDO(MI, BB); 1129 case Mips::LD_F16: 1130 return emitLD_F16_PSEUDO(MI, BB); 1131 case Mips::MSA_FP_EXTEND_W_PSEUDO: 1132 return emitFPEXTEND_PSEUDO(MI, BB, false); 1133 case Mips::MSA_FP_ROUND_W_PSEUDO: 1134 return emitFPROUND_PSEUDO(MI, BB, false); 1135 case Mips::MSA_FP_EXTEND_D_PSEUDO: 1136 return emitFPEXTEND_PSEUDO(MI, BB, true); 1137 case Mips::MSA_FP_ROUND_D_PSEUDO: 1138 return emitFPROUND_PSEUDO(MI, BB, true); 1139 } 1140 } 1141 1142 bool MipsSETargetLowering::isEligibleForTailCallOptimization( 1143 const CCState &CCInfo, unsigned NextStackOffset, 1144 const MipsFunctionInfo &FI) const { 1145 if (!UseMipsTailCalls) 1146 return false; 1147 1148 // Exception has to be cleared with eret. 1149 if (FI.isISR()) 1150 return false; 1151 1152 // Return false if either the callee or caller has a byval argument. 1153 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) 1154 return false; 1155 1156 // Return true if the callee's argument area is no larger than the 1157 // caller's. 1158 return NextStackOffset <= FI.getIncomingArgSize(); 1159 } 1160 1161 void MipsSETargetLowering:: 1162 getOpndList(SmallVectorImpl<SDValue> &Ops, 1163 std::deque<std::pair<unsigned, SDValue>> &RegsToPass, 1164 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 1165 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, 1166 SDValue Chain) const { 1167 Ops.push_back(Callee); 1168 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 1169 InternalLinkage, IsCallReloc, CLI, Callee, 1170 Chain); 1171 } 1172 1173 SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1174 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 1175 1176 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1177 return MipsTargetLowering::lowerLOAD(Op, DAG); 1178 1179 // Replace a double precision load with two i32 loads and a buildpair64. 1180 SDLoc DL(Op); 1181 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1182 EVT PtrVT = Ptr.getValueType(); 1183 1184 // i32 load from lower address. 1185 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(), 1186 Nd.getAlign(), Nd.getMemOperand()->getFlags()); 1187 1188 // i32 load from higher address. 1189 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1190 SDValue Hi = DAG.getLoad( 1191 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(), 1192 commonAlignment(Nd.getAlign(), 4), Nd.getMemOperand()->getFlags()); 1193 1194 if (!Subtarget.isLittle()) 1195 std::swap(Lo, Hi); 1196 1197 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1198 SDValue Ops[2] = {BP, Hi.getValue(1)}; 1199 return DAG.getMergeValues(Ops, DL); 1200 } 1201 1202 SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1203 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 1204 1205 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1206 return MipsTargetLowering::lowerSTORE(Op, DAG); 1207 1208 // Replace a double precision store with two extractelement64s and i32 stores. 1209 SDLoc DL(Op); 1210 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1211 EVT PtrVT = Ptr.getValueType(); 1212 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1213 Val, DAG.getConstant(0, DL, MVT::i32)); 1214 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1215 Val, DAG.getConstant(1, DL, MVT::i32)); 1216 1217 if (!Subtarget.isLittle()) 1218 std::swap(Lo, Hi); 1219 1220 // i32 store to lower address. 1221 Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlign(), 1222 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1223 1224 // i32 store to higher address. 1225 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1226 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 1227 commonAlignment(Nd.getAlign(), 4), 1228 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1229 } 1230 1231 SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op, 1232 SelectionDAG &DAG) const { 1233 SDLoc DL(Op); 1234 MVT Src = Op.getOperand(0).getValueType().getSimpleVT(); 1235 MVT Dest = Op.getValueType().getSimpleVT(); 1236 1237 // Bitcast i64 to double. 1238 if (Src == MVT::i64 && Dest == MVT::f64) { 1239 SDValue Lo, Hi; 1240 std::tie(Lo, Hi) = 1241 DAG.SplitScalar(Op.getOperand(0), DL, MVT::i32, MVT::i32); 1242 return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1243 } 1244 1245 // Bitcast double to i64. 1246 if (Src == MVT::f64 && Dest == MVT::i64) { 1247 SDValue Lo = 1248 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), 1249 DAG.getConstant(0, DL, MVT::i32)); 1250 SDValue Hi = 1251 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), 1252 DAG.getConstant(1, DL, MVT::i32)); 1253 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1254 } 1255 1256 // Skip other cases of bitcast and use default lowering. 1257 return SDValue(); 1258 } 1259 1260 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 1261 bool HasLo, bool HasHi, 1262 SelectionDAG &DAG) const { 1263 // MIPS32r6/MIPS64r6 removed accumulator based multiplies. 1264 assert(!Subtarget.hasMips32r6()); 1265 1266 EVT Ty = Op.getOperand(0).getValueType(); 1267 SDLoc DL(Op); 1268 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 1269 Op.getOperand(0), Op.getOperand(1)); 1270 SDValue Lo, Hi; 1271 1272 if (HasLo) 1273 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); 1274 if (HasHi) 1275 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); 1276 1277 if (!HasLo || !HasHi) 1278 return HasLo ? Lo : Hi; 1279 1280 SDValue Vals[] = { Lo, Hi }; 1281 return DAG.getMergeValues(Vals, DL); 1282 } 1283 1284 static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { 1285 SDValue InLo, InHi; 1286 std::tie(InLo, InHi) = DAG.SplitScalar(In, DL, MVT::i32, MVT::i32); 1287 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); 1288 } 1289 1290 static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { 1291 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); 1292 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); 1293 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1294 } 1295 1296 // This function expands mips intrinsic nodes which have 64-bit input operands 1297 // or output values. 1298 // 1299 // out64 = intrinsic-node in64 1300 // => 1301 // lo = copy (extract-element (in64, 0)) 1302 // hi = copy (extract-element (in64, 1)) 1303 // mips-specific-node 1304 // v0 = copy lo 1305 // v1 = copy hi 1306 // out64 = merge-values (v0, v1) 1307 // 1308 static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1309 SDLoc DL(Op); 1310 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 1311 SmallVector<SDValue, 3> Ops; 1312 unsigned OpNo = 0; 1313 1314 // See if Op has a chain input. 1315 if (HasChainIn) 1316 Ops.push_back(Op->getOperand(OpNo++)); 1317 1318 // The next operand is the intrinsic opcode. 1319 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 1320 1321 // See if the next operand has type i64. 1322 SDValue Opnd = Op->getOperand(++OpNo), In64; 1323 1324 if (Opnd.getValueType() == MVT::i64) 1325 In64 = initAccumulator(Opnd, DL, DAG); 1326 else 1327 Ops.push_back(Opnd); 1328 1329 // Push the remaining operands. 1330 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1331 Ops.push_back(Op->getOperand(OpNo)); 1332 1333 // Add In64 to the end of the list. 1334 if (In64.getNode()) 1335 Ops.push_back(In64); 1336 1337 // Scan output. 1338 SmallVector<EVT, 2> ResTys; 1339 1340 for (EVT Ty : Op->values()) 1341 ResTys.push_back((Ty == MVT::i64) ? MVT::Untyped : Ty); 1342 1343 // Create node. 1344 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); 1345 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1346 1347 if (!HasChainIn) 1348 return Out; 1349 1350 assert(Val->getValueType(1) == MVT::Other); 1351 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1352 return DAG.getMergeValues(Vals, DL); 1353 } 1354 1355 // Lower an MSA copy intrinsic into the specified SelectionDAG node 1356 static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1357 SDLoc DL(Op); 1358 SDValue Vec = Op->getOperand(1); 1359 SDValue Idx = Op->getOperand(2); 1360 EVT ResTy = Op->getValueType(0); 1361 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1362 1363 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1364 DAG.getValueType(EltTy)); 1365 1366 return Result; 1367 } 1368 1369 static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { 1370 EVT ResVecTy = Op->getValueType(0); 1371 EVT ViaVecTy = ResVecTy; 1372 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1373 SDLoc DL(Op); 1374 1375 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and 1376 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating 1377 // lanes. 1378 SDValue LaneA = Op->getOperand(OpNr); 1379 SDValue LaneB; 1380 1381 if (ResVecTy == MVT::v2i64) { 1382 // In case of the index being passed as an immediate value, set the upper 1383 // lane to 0 so that the splati.d instruction can be matched. 1384 if (isa<ConstantSDNode>(LaneA)) 1385 LaneB = DAG.getConstant(0, DL, MVT::i32); 1386 // Having the index passed in a register, set the upper lane to the same 1387 // value as the lower - this results in the BUILD_VECTOR node not being 1388 // expanded through stack. This way we are able to pattern match the set of 1389 // nodes created here to splat.d. 1390 else 1391 LaneB = LaneA; 1392 ViaVecTy = MVT::v4i32; 1393 if(BigEndian) 1394 std::swap(LaneA, LaneB); 1395 } else 1396 LaneB = LaneA; 1397 1398 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, 1399 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; 1400 1401 SDValue Result = DAG.getBuildVector( 1402 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1403 1404 if (ViaVecTy != ResVecTy) { 1405 SDValue One = DAG.getConstant(1, DL, ViaVecTy); 1406 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, 1407 DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); 1408 } 1409 1410 return Result; 1411 } 1412 1413 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, 1414 bool IsSigned = false) { 1415 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp)); 1416 return DAG.getConstant( 1417 APInt(Op->getValueType(0).getScalarType().getSizeInBits(), 1418 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), 1419 SDLoc(Op), Op->getValueType(0)); 1420 } 1421 1422 static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, 1423 bool BigEndian, SelectionDAG &DAG) { 1424 EVT ViaVecTy = VecTy; 1425 SDValue SplatValueA = SplatValue; 1426 SDValue SplatValueB = SplatValue; 1427 SDLoc DL(SplatValue); 1428 1429 if (VecTy == MVT::v2i64) { 1430 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. 1431 ViaVecTy = MVT::v4i32; 1432 1433 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); 1434 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, 1435 DAG.getConstant(32, DL, MVT::i32)); 1436 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); 1437 } 1438 1439 // We currently hold the parts in little endian order. Swap them if 1440 // necessary. 1441 if (BigEndian) 1442 std::swap(SplatValueA, SplatValueB); 1443 1444 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1445 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1446 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1447 SplatValueA, SplatValueB, SplatValueA, SplatValueB }; 1448 1449 SDValue Result = DAG.getBuildVector( 1450 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1451 1452 if (VecTy != ViaVecTy) 1453 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); 1454 1455 return Result; 1456 } 1457 1458 static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, 1459 unsigned Opc, SDValue Imm, 1460 bool BigEndian) { 1461 EVT VecTy = Op->getValueType(0); 1462 SDValue Exp2Imm; 1463 SDLoc DL(Op); 1464 1465 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it 1466 // here for now. 1467 if (VecTy == MVT::v2i64) { 1468 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) { 1469 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); 1470 1471 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL, 1472 MVT::i32); 1473 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32); 1474 1475 if (BigEndian) 1476 std::swap(BitImmLoOp, BitImmHiOp); 1477 1478 Exp2Imm = DAG.getNode( 1479 ISD::BITCAST, DL, MVT::v2i64, 1480 DAG.getBuildVector(MVT::v4i32, DL, 1481 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp})); 1482 } 1483 } 1484 1485 if (!Exp2Imm.getNode()) { 1486 // We couldnt constant fold, do a vector shift instead 1487 1488 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since 1489 // only values 0-63 are valid. 1490 if (VecTy == MVT::v2i64) 1491 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm); 1492 1493 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); 1494 1495 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy), 1496 Exp2Imm); 1497 } 1498 1499 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); 1500 } 1501 1502 static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { 1503 SDLoc DL(Op); 1504 EVT ResTy = Op->getValueType(0); 1505 SDValue Vec = Op->getOperand(2); 1506 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1507 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; 1508 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, 1509 DL, ResEltTy); 1510 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG); 1511 1512 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); 1513 } 1514 1515 static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { 1516 EVT ResTy = Op->getValueType(0); 1517 SDLoc DL(Op); 1518 SDValue One = DAG.getConstant(1, DL, ResTy); 1519 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); 1520 1521 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), 1522 DAG.getNOT(DL, Bit, ResTy)); 1523 } 1524 1525 static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { 1526 SDLoc DL(Op); 1527 EVT ResTy = Op->getValueType(0); 1528 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) 1529 << Op->getConstantOperandAPInt(2); 1530 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); 1531 1532 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); 1533 } 1534 1535 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1536 SelectionDAG &DAG) const { 1537 SDLoc DL(Op); 1538 unsigned Intrinsic = Op->getConstantOperandVal(0); 1539 switch (Intrinsic) { 1540 default: 1541 return SDValue(); 1542 case Intrinsic::mips_shilo: 1543 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1544 case Intrinsic::mips_dpau_h_qbl: 1545 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1546 case Intrinsic::mips_dpau_h_qbr: 1547 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1548 case Intrinsic::mips_dpsu_h_qbl: 1549 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1550 case Intrinsic::mips_dpsu_h_qbr: 1551 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1552 case Intrinsic::mips_dpa_w_ph: 1553 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1554 case Intrinsic::mips_dps_w_ph: 1555 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1556 case Intrinsic::mips_dpax_w_ph: 1557 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1558 case Intrinsic::mips_dpsx_w_ph: 1559 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1560 case Intrinsic::mips_mulsa_w_ph: 1561 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1562 case Intrinsic::mips_mult: 1563 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1564 case Intrinsic::mips_multu: 1565 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1566 case Intrinsic::mips_madd: 1567 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1568 case Intrinsic::mips_maddu: 1569 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1570 case Intrinsic::mips_msub: 1571 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1572 case Intrinsic::mips_msubu: 1573 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1574 case Intrinsic::mips_addv_b: 1575 case Intrinsic::mips_addv_h: 1576 case Intrinsic::mips_addv_w: 1577 case Intrinsic::mips_addv_d: 1578 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1579 Op->getOperand(2)); 1580 case Intrinsic::mips_addvi_b: 1581 case Intrinsic::mips_addvi_h: 1582 case Intrinsic::mips_addvi_w: 1583 case Intrinsic::mips_addvi_d: 1584 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1585 lowerMSASplatImm(Op, 2, DAG)); 1586 case Intrinsic::mips_and_v: 1587 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1588 Op->getOperand(2)); 1589 case Intrinsic::mips_andi_b: 1590 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1591 lowerMSASplatImm(Op, 2, DAG)); 1592 case Intrinsic::mips_bclr_b: 1593 case Intrinsic::mips_bclr_h: 1594 case Intrinsic::mips_bclr_w: 1595 case Intrinsic::mips_bclr_d: 1596 return lowerMSABitClear(Op, DAG); 1597 case Intrinsic::mips_bclri_b: 1598 case Intrinsic::mips_bclri_h: 1599 case Intrinsic::mips_bclri_w: 1600 case Intrinsic::mips_bclri_d: 1601 return lowerMSABitClearImm(Op, DAG); 1602 case Intrinsic::mips_binsli_b: 1603 case Intrinsic::mips_binsli_h: 1604 case Intrinsic::mips_binsli_w: 1605 case Intrinsic::mips_binsli_d: { 1606 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) 1607 EVT VecTy = Op->getValueType(0); 1608 EVT EltTy = VecTy.getVectorElementType(); 1609 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1610 report_fatal_error("Immediate out of range"); 1611 APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), 1612 Op->getConstantOperandVal(3) + 1); 1613 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1614 DAG.getConstant(Mask, DL, VecTy, true), 1615 Op->getOperand(2), Op->getOperand(1)); 1616 } 1617 case Intrinsic::mips_binsri_b: 1618 case Intrinsic::mips_binsri_h: 1619 case Intrinsic::mips_binsri_w: 1620 case Intrinsic::mips_binsri_d: { 1621 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) 1622 EVT VecTy = Op->getValueType(0); 1623 EVT EltTy = VecTy.getVectorElementType(); 1624 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1625 report_fatal_error("Immediate out of range"); 1626 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), 1627 Op->getConstantOperandVal(3) + 1); 1628 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1629 DAG.getConstant(Mask, DL, VecTy, true), 1630 Op->getOperand(2), Op->getOperand(1)); 1631 } 1632 case Intrinsic::mips_bmnz_v: 1633 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1634 Op->getOperand(2), Op->getOperand(1)); 1635 case Intrinsic::mips_bmnzi_b: 1636 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1637 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), 1638 Op->getOperand(1)); 1639 case Intrinsic::mips_bmz_v: 1640 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1641 Op->getOperand(1), Op->getOperand(2)); 1642 case Intrinsic::mips_bmzi_b: 1643 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1644 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), 1645 Op->getOperand(2)); 1646 case Intrinsic::mips_bneg_b: 1647 case Intrinsic::mips_bneg_h: 1648 case Intrinsic::mips_bneg_w: 1649 case Intrinsic::mips_bneg_d: { 1650 EVT VecTy = Op->getValueType(0); 1651 SDValue One = DAG.getConstant(1, DL, VecTy); 1652 1653 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), 1654 DAG.getNode(ISD::SHL, DL, VecTy, One, 1655 truncateVecElts(Op, DAG))); 1656 } 1657 case Intrinsic::mips_bnegi_b: 1658 case Intrinsic::mips_bnegi_h: 1659 case Intrinsic::mips_bnegi_w: 1660 case Intrinsic::mips_bnegi_d: 1661 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), 1662 !Subtarget.isLittle()); 1663 case Intrinsic::mips_bnz_b: 1664 case Intrinsic::mips_bnz_h: 1665 case Intrinsic::mips_bnz_w: 1666 case Intrinsic::mips_bnz_d: 1667 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1668 Op->getOperand(1)); 1669 case Intrinsic::mips_bnz_v: 1670 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1671 Op->getOperand(1)); 1672 case Intrinsic::mips_bsel_v: 1673 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1674 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1675 Op->getOperand(1), Op->getOperand(3), 1676 Op->getOperand(2)); 1677 case Intrinsic::mips_bseli_b: 1678 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1679 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1680 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG), 1681 Op->getOperand(2)); 1682 case Intrinsic::mips_bset_b: 1683 case Intrinsic::mips_bset_h: 1684 case Intrinsic::mips_bset_w: 1685 case Intrinsic::mips_bset_d: { 1686 EVT VecTy = Op->getValueType(0); 1687 SDValue One = DAG.getConstant(1, DL, VecTy); 1688 1689 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), 1690 DAG.getNode(ISD::SHL, DL, VecTy, One, 1691 truncateVecElts(Op, DAG))); 1692 } 1693 case Intrinsic::mips_bseti_b: 1694 case Intrinsic::mips_bseti_h: 1695 case Intrinsic::mips_bseti_w: 1696 case Intrinsic::mips_bseti_d: 1697 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), 1698 !Subtarget.isLittle()); 1699 case Intrinsic::mips_bz_b: 1700 case Intrinsic::mips_bz_h: 1701 case Intrinsic::mips_bz_w: 1702 case Intrinsic::mips_bz_d: 1703 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1704 Op->getOperand(1)); 1705 case Intrinsic::mips_bz_v: 1706 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1707 Op->getOperand(1)); 1708 case Intrinsic::mips_ceq_b: 1709 case Intrinsic::mips_ceq_h: 1710 case Intrinsic::mips_ceq_w: 1711 case Intrinsic::mips_ceq_d: 1712 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1713 Op->getOperand(2), ISD::SETEQ); 1714 case Intrinsic::mips_ceqi_b: 1715 case Intrinsic::mips_ceqi_h: 1716 case Intrinsic::mips_ceqi_w: 1717 case Intrinsic::mips_ceqi_d: 1718 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1719 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ); 1720 case Intrinsic::mips_cle_s_b: 1721 case Intrinsic::mips_cle_s_h: 1722 case Intrinsic::mips_cle_s_w: 1723 case Intrinsic::mips_cle_s_d: 1724 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1725 Op->getOperand(2), ISD::SETLE); 1726 case Intrinsic::mips_clei_s_b: 1727 case Intrinsic::mips_clei_s_h: 1728 case Intrinsic::mips_clei_s_w: 1729 case Intrinsic::mips_clei_s_d: 1730 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1731 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE); 1732 case Intrinsic::mips_cle_u_b: 1733 case Intrinsic::mips_cle_u_h: 1734 case Intrinsic::mips_cle_u_w: 1735 case Intrinsic::mips_cle_u_d: 1736 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1737 Op->getOperand(2), ISD::SETULE); 1738 case Intrinsic::mips_clei_u_b: 1739 case Intrinsic::mips_clei_u_h: 1740 case Intrinsic::mips_clei_u_w: 1741 case Intrinsic::mips_clei_u_d: 1742 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1743 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1744 case Intrinsic::mips_clt_s_b: 1745 case Intrinsic::mips_clt_s_h: 1746 case Intrinsic::mips_clt_s_w: 1747 case Intrinsic::mips_clt_s_d: 1748 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1749 Op->getOperand(2), ISD::SETLT); 1750 case Intrinsic::mips_clti_s_b: 1751 case Intrinsic::mips_clti_s_h: 1752 case Intrinsic::mips_clti_s_w: 1753 case Intrinsic::mips_clti_s_d: 1754 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1755 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT); 1756 case Intrinsic::mips_clt_u_b: 1757 case Intrinsic::mips_clt_u_h: 1758 case Intrinsic::mips_clt_u_w: 1759 case Intrinsic::mips_clt_u_d: 1760 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1761 Op->getOperand(2), ISD::SETULT); 1762 case Intrinsic::mips_clti_u_b: 1763 case Intrinsic::mips_clti_u_h: 1764 case Intrinsic::mips_clti_u_w: 1765 case Intrinsic::mips_clti_u_d: 1766 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1767 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1768 case Intrinsic::mips_copy_s_b: 1769 case Intrinsic::mips_copy_s_h: 1770 case Intrinsic::mips_copy_s_w: 1771 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1772 case Intrinsic::mips_copy_s_d: 1773 if (Subtarget.hasMips64()) 1774 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. 1775 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1776 else { 1777 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1778 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1779 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1780 Op->getValueType(0), Op->getOperand(1), 1781 Op->getOperand(2)); 1782 } 1783 case Intrinsic::mips_copy_u_b: 1784 case Intrinsic::mips_copy_u_h: 1785 case Intrinsic::mips_copy_u_w: 1786 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1787 case Intrinsic::mips_copy_u_d: 1788 if (Subtarget.hasMips64()) 1789 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. 1790 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1791 else { 1792 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1793 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1794 // Note: When i64 is illegal, this results in copy_s.w instructions 1795 // instead of copy_u.w instructions. This makes no difference to the 1796 // behaviour since i64 is only illegal when the register file is 32-bit. 1797 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1798 Op->getValueType(0), Op->getOperand(1), 1799 Op->getOperand(2)); 1800 } 1801 case Intrinsic::mips_div_s_b: 1802 case Intrinsic::mips_div_s_h: 1803 case Intrinsic::mips_div_s_w: 1804 case Intrinsic::mips_div_s_d: 1805 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1806 Op->getOperand(2)); 1807 case Intrinsic::mips_div_u_b: 1808 case Intrinsic::mips_div_u_h: 1809 case Intrinsic::mips_div_u_w: 1810 case Intrinsic::mips_div_u_d: 1811 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1812 Op->getOperand(2)); 1813 case Intrinsic::mips_fadd_w: 1814 case Intrinsic::mips_fadd_d: 1815 // TODO: If intrinsics have fast-math-flags, propagate them. 1816 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1817 Op->getOperand(2)); 1818 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1819 case Intrinsic::mips_fceq_w: 1820 case Intrinsic::mips_fceq_d: 1821 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1822 Op->getOperand(2), ISD::SETOEQ); 1823 case Intrinsic::mips_fcle_w: 1824 case Intrinsic::mips_fcle_d: 1825 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1826 Op->getOperand(2), ISD::SETOLE); 1827 case Intrinsic::mips_fclt_w: 1828 case Intrinsic::mips_fclt_d: 1829 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1830 Op->getOperand(2), ISD::SETOLT); 1831 case Intrinsic::mips_fcne_w: 1832 case Intrinsic::mips_fcne_d: 1833 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1834 Op->getOperand(2), ISD::SETONE); 1835 case Intrinsic::mips_fcor_w: 1836 case Intrinsic::mips_fcor_d: 1837 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1838 Op->getOperand(2), ISD::SETO); 1839 case Intrinsic::mips_fcueq_w: 1840 case Intrinsic::mips_fcueq_d: 1841 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1842 Op->getOperand(2), ISD::SETUEQ); 1843 case Intrinsic::mips_fcule_w: 1844 case Intrinsic::mips_fcule_d: 1845 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1846 Op->getOperand(2), ISD::SETULE); 1847 case Intrinsic::mips_fcult_w: 1848 case Intrinsic::mips_fcult_d: 1849 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1850 Op->getOperand(2), ISD::SETULT); 1851 case Intrinsic::mips_fcun_w: 1852 case Intrinsic::mips_fcun_d: 1853 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1854 Op->getOperand(2), ISD::SETUO); 1855 case Intrinsic::mips_fcune_w: 1856 case Intrinsic::mips_fcune_d: 1857 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1858 Op->getOperand(2), ISD::SETUNE); 1859 case Intrinsic::mips_fdiv_w: 1860 case Intrinsic::mips_fdiv_d: 1861 // TODO: If intrinsics have fast-math-flags, propagate them. 1862 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1863 Op->getOperand(2)); 1864 case Intrinsic::mips_ffint_u_w: 1865 case Intrinsic::mips_ffint_u_d: 1866 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), 1867 Op->getOperand(1)); 1868 case Intrinsic::mips_ffint_s_w: 1869 case Intrinsic::mips_ffint_s_d: 1870 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), 1871 Op->getOperand(1)); 1872 case Intrinsic::mips_fill_b: 1873 case Intrinsic::mips_fill_h: 1874 case Intrinsic::mips_fill_w: 1875 case Intrinsic::mips_fill_d: { 1876 EVT ResTy = Op->getValueType(0); 1877 SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(), 1878 Op->getOperand(1)); 1879 1880 // If ResTy is v2i64 then the type legalizer will break this node down into 1881 // an equivalent v4i32. 1882 return DAG.getBuildVector(ResTy, DL, Ops); 1883 } 1884 case Intrinsic::mips_fexp2_w: 1885 case Intrinsic::mips_fexp2_d: { 1886 // TODO: If intrinsics have fast-math-flags, propagate them. 1887 EVT ResTy = Op->getValueType(0); 1888 return DAG.getNode( 1889 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), 1890 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); 1891 } 1892 case Intrinsic::mips_flog2_w: 1893 case Intrinsic::mips_flog2_d: 1894 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1895 case Intrinsic::mips_fmadd_w: 1896 case Intrinsic::mips_fmadd_d: 1897 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), 1898 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1899 case Intrinsic::mips_fmul_w: 1900 case Intrinsic::mips_fmul_d: 1901 // TODO: If intrinsics have fast-math-flags, propagate them. 1902 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1903 Op->getOperand(2)); 1904 case Intrinsic::mips_fmsub_w: 1905 case Intrinsic::mips_fmsub_d: { 1906 // TODO: If intrinsics have fast-math-flags, propagate them. 1907 return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0), 1908 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1909 } 1910 case Intrinsic::mips_frint_w: 1911 case Intrinsic::mips_frint_d: 1912 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1913 case Intrinsic::mips_fsqrt_w: 1914 case Intrinsic::mips_fsqrt_d: 1915 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1916 case Intrinsic::mips_fsub_w: 1917 case Intrinsic::mips_fsub_d: 1918 // TODO: If intrinsics have fast-math-flags, propagate them. 1919 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1920 Op->getOperand(2)); 1921 case Intrinsic::mips_ftrunc_u_w: 1922 case Intrinsic::mips_ftrunc_u_d: 1923 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), 1924 Op->getOperand(1)); 1925 case Intrinsic::mips_ftrunc_s_w: 1926 case Intrinsic::mips_ftrunc_s_d: 1927 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), 1928 Op->getOperand(1)); 1929 case Intrinsic::mips_ilvev_b: 1930 case Intrinsic::mips_ilvev_h: 1931 case Intrinsic::mips_ilvev_w: 1932 case Intrinsic::mips_ilvev_d: 1933 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1934 Op->getOperand(1), Op->getOperand(2)); 1935 case Intrinsic::mips_ilvl_b: 1936 case Intrinsic::mips_ilvl_h: 1937 case Intrinsic::mips_ilvl_w: 1938 case Intrinsic::mips_ilvl_d: 1939 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1940 Op->getOperand(1), Op->getOperand(2)); 1941 case Intrinsic::mips_ilvod_b: 1942 case Intrinsic::mips_ilvod_h: 1943 case Intrinsic::mips_ilvod_w: 1944 case Intrinsic::mips_ilvod_d: 1945 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 1946 Op->getOperand(1), Op->getOperand(2)); 1947 case Intrinsic::mips_ilvr_b: 1948 case Intrinsic::mips_ilvr_h: 1949 case Intrinsic::mips_ilvr_w: 1950 case Intrinsic::mips_ilvr_d: 1951 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 1952 Op->getOperand(1), Op->getOperand(2)); 1953 case Intrinsic::mips_insert_b: 1954 case Intrinsic::mips_insert_h: 1955 case Intrinsic::mips_insert_w: 1956 case Intrinsic::mips_insert_d: 1957 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1958 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 1959 case Intrinsic::mips_insve_b: 1960 case Intrinsic::mips_insve_h: 1961 case Intrinsic::mips_insve_w: 1962 case Intrinsic::mips_insve_d: { 1963 // Report an error for out of range values. 1964 int64_t Max; 1965 switch (Intrinsic) { 1966 case Intrinsic::mips_insve_b: Max = 15; break; 1967 case Intrinsic::mips_insve_h: Max = 7; break; 1968 case Intrinsic::mips_insve_w: Max = 3; break; 1969 case Intrinsic::mips_insve_d: Max = 1; break; 1970 default: llvm_unreachable("Unmatched intrinsic"); 1971 } 1972 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 1973 if (Value < 0 || Value > Max) 1974 report_fatal_error("Immediate out of range"); 1975 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), 1976 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), 1977 DAG.getConstant(0, DL, MVT::i32)); 1978 } 1979 case Intrinsic::mips_ldi_b: 1980 case Intrinsic::mips_ldi_h: 1981 case Intrinsic::mips_ldi_w: 1982 case Intrinsic::mips_ldi_d: 1983 return lowerMSASplatImm(Op, 1, DAG, true); 1984 case Intrinsic::mips_lsa: 1985 case Intrinsic::mips_dlsa: { 1986 EVT ResTy = Op->getValueType(0); 1987 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1988 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, 1989 Op->getOperand(2), Op->getOperand(3))); 1990 } 1991 case Intrinsic::mips_maddv_b: 1992 case Intrinsic::mips_maddv_h: 1993 case Intrinsic::mips_maddv_w: 1994 case Intrinsic::mips_maddv_d: { 1995 EVT ResTy = Op->getValueType(0); 1996 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1997 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 1998 Op->getOperand(2), Op->getOperand(3))); 1999 } 2000 case Intrinsic::mips_max_s_b: 2001 case Intrinsic::mips_max_s_h: 2002 case Intrinsic::mips_max_s_w: 2003 case Intrinsic::mips_max_s_d: 2004 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 2005 Op->getOperand(1), Op->getOperand(2)); 2006 case Intrinsic::mips_max_u_b: 2007 case Intrinsic::mips_max_u_h: 2008 case Intrinsic::mips_max_u_w: 2009 case Intrinsic::mips_max_u_d: 2010 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 2011 Op->getOperand(1), Op->getOperand(2)); 2012 case Intrinsic::mips_maxi_s_b: 2013 case Intrinsic::mips_maxi_s_h: 2014 case Intrinsic::mips_maxi_s_w: 2015 case Intrinsic::mips_maxi_s_d: 2016 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 2017 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2018 case Intrinsic::mips_maxi_u_b: 2019 case Intrinsic::mips_maxi_u_h: 2020 case Intrinsic::mips_maxi_u_w: 2021 case Intrinsic::mips_maxi_u_d: 2022 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 2023 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2024 case Intrinsic::mips_min_s_b: 2025 case Intrinsic::mips_min_s_h: 2026 case Intrinsic::mips_min_s_w: 2027 case Intrinsic::mips_min_s_d: 2028 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 2029 Op->getOperand(1), Op->getOperand(2)); 2030 case Intrinsic::mips_min_u_b: 2031 case Intrinsic::mips_min_u_h: 2032 case Intrinsic::mips_min_u_w: 2033 case Intrinsic::mips_min_u_d: 2034 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2035 Op->getOperand(1), Op->getOperand(2)); 2036 case Intrinsic::mips_mini_s_b: 2037 case Intrinsic::mips_mini_s_h: 2038 case Intrinsic::mips_mini_s_w: 2039 case Intrinsic::mips_mini_s_d: 2040 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 2041 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2042 case Intrinsic::mips_mini_u_b: 2043 case Intrinsic::mips_mini_u_h: 2044 case Intrinsic::mips_mini_u_w: 2045 case Intrinsic::mips_mini_u_d: 2046 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2047 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2048 case Intrinsic::mips_mod_s_b: 2049 case Intrinsic::mips_mod_s_h: 2050 case Intrinsic::mips_mod_s_w: 2051 case Intrinsic::mips_mod_s_d: 2052 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 2053 Op->getOperand(2)); 2054 case Intrinsic::mips_mod_u_b: 2055 case Intrinsic::mips_mod_u_h: 2056 case Intrinsic::mips_mod_u_w: 2057 case Intrinsic::mips_mod_u_d: 2058 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 2059 Op->getOperand(2)); 2060 case Intrinsic::mips_mulv_b: 2061 case Intrinsic::mips_mulv_h: 2062 case Intrinsic::mips_mulv_w: 2063 case Intrinsic::mips_mulv_d: 2064 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 2065 Op->getOperand(2)); 2066 case Intrinsic::mips_msubv_b: 2067 case Intrinsic::mips_msubv_h: 2068 case Intrinsic::mips_msubv_w: 2069 case Intrinsic::mips_msubv_d: { 2070 EVT ResTy = Op->getValueType(0); 2071 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), 2072 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2073 Op->getOperand(2), Op->getOperand(3))); 2074 } 2075 case Intrinsic::mips_nlzc_b: 2076 case Intrinsic::mips_nlzc_h: 2077 case Intrinsic::mips_nlzc_w: 2078 case Intrinsic::mips_nlzc_d: 2079 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 2080 case Intrinsic::mips_nor_v: { 2081 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2082 Op->getOperand(1), Op->getOperand(2)); 2083 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2084 } 2085 case Intrinsic::mips_nori_b: { 2086 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2087 Op->getOperand(1), 2088 lowerMSASplatImm(Op, 2, DAG)); 2089 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2090 } 2091 case Intrinsic::mips_or_v: 2092 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 2093 Op->getOperand(2)); 2094 case Intrinsic::mips_ori_b: 2095 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2096 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2097 case Intrinsic::mips_pckev_b: 2098 case Intrinsic::mips_pckev_h: 2099 case Intrinsic::mips_pckev_w: 2100 case Intrinsic::mips_pckev_d: 2101 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 2102 Op->getOperand(1), Op->getOperand(2)); 2103 case Intrinsic::mips_pckod_b: 2104 case Intrinsic::mips_pckod_h: 2105 case Intrinsic::mips_pckod_w: 2106 case Intrinsic::mips_pckod_d: 2107 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 2108 Op->getOperand(1), Op->getOperand(2)); 2109 case Intrinsic::mips_pcnt_b: 2110 case Intrinsic::mips_pcnt_h: 2111 case Intrinsic::mips_pcnt_w: 2112 case Intrinsic::mips_pcnt_d: 2113 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 2114 case Intrinsic::mips_sat_s_b: 2115 case Intrinsic::mips_sat_s_h: 2116 case Intrinsic::mips_sat_s_w: 2117 case Intrinsic::mips_sat_s_d: 2118 case Intrinsic::mips_sat_u_b: 2119 case Intrinsic::mips_sat_u_h: 2120 case Intrinsic::mips_sat_u_w: 2121 case Intrinsic::mips_sat_u_d: { 2122 // Report an error for out of range values. 2123 int64_t Max; 2124 switch (Intrinsic) { 2125 case Intrinsic::mips_sat_s_b: 2126 case Intrinsic::mips_sat_u_b: Max = 7; break; 2127 case Intrinsic::mips_sat_s_h: 2128 case Intrinsic::mips_sat_u_h: Max = 15; break; 2129 case Intrinsic::mips_sat_s_w: 2130 case Intrinsic::mips_sat_u_w: Max = 31; break; 2131 case Intrinsic::mips_sat_s_d: 2132 case Intrinsic::mips_sat_u_d: Max = 63; break; 2133 default: llvm_unreachable("Unmatched intrinsic"); 2134 } 2135 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2136 if (Value < 0 || Value > Max) 2137 report_fatal_error("Immediate out of range"); 2138 return SDValue(); 2139 } 2140 case Intrinsic::mips_shf_b: 2141 case Intrinsic::mips_shf_h: 2142 case Intrinsic::mips_shf_w: { 2143 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2144 if (Value < 0 || Value > 255) 2145 report_fatal_error("Immediate out of range"); 2146 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 2147 Op->getOperand(2), Op->getOperand(1)); 2148 } 2149 case Intrinsic::mips_sldi_b: 2150 case Intrinsic::mips_sldi_h: 2151 case Intrinsic::mips_sldi_w: 2152 case Intrinsic::mips_sldi_d: { 2153 // Report an error for out of range values. 2154 int64_t Max; 2155 switch (Intrinsic) { 2156 case Intrinsic::mips_sldi_b: Max = 15; break; 2157 case Intrinsic::mips_sldi_h: Max = 7; break; 2158 case Intrinsic::mips_sldi_w: Max = 3; break; 2159 case Intrinsic::mips_sldi_d: Max = 1; break; 2160 default: llvm_unreachable("Unmatched intrinsic"); 2161 } 2162 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue(); 2163 if (Value < 0 || Value > Max) 2164 report_fatal_error("Immediate out of range"); 2165 return SDValue(); 2166 } 2167 case Intrinsic::mips_sll_b: 2168 case Intrinsic::mips_sll_h: 2169 case Intrinsic::mips_sll_w: 2170 case Intrinsic::mips_sll_d: 2171 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 2172 truncateVecElts(Op, DAG)); 2173 case Intrinsic::mips_slli_b: 2174 case Intrinsic::mips_slli_h: 2175 case Intrinsic::mips_slli_w: 2176 case Intrinsic::mips_slli_d: 2177 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 2178 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2179 case Intrinsic::mips_splat_b: 2180 case Intrinsic::mips_splat_h: 2181 case Intrinsic::mips_splat_w: 2182 case Intrinsic::mips_splat_d: 2183 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle 2184 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because 2185 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. 2186 // Instead we lower to MipsISD::VSHF and match from there. 2187 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2188 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1), 2189 Op->getOperand(1)); 2190 case Intrinsic::mips_splati_b: 2191 case Intrinsic::mips_splati_h: 2192 case Intrinsic::mips_splati_w: 2193 case Intrinsic::mips_splati_d: 2194 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2195 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 2196 Op->getOperand(1)); 2197 case Intrinsic::mips_sra_b: 2198 case Intrinsic::mips_sra_h: 2199 case Intrinsic::mips_sra_w: 2200 case Intrinsic::mips_sra_d: 2201 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 2202 truncateVecElts(Op, DAG)); 2203 case Intrinsic::mips_srai_b: 2204 case Intrinsic::mips_srai_h: 2205 case Intrinsic::mips_srai_w: 2206 case Intrinsic::mips_srai_d: 2207 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 2208 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2209 case Intrinsic::mips_srari_b: 2210 case Intrinsic::mips_srari_h: 2211 case Intrinsic::mips_srari_w: 2212 case Intrinsic::mips_srari_d: { 2213 // Report an error for out of range values. 2214 int64_t Max; 2215 switch (Intrinsic) { 2216 case Intrinsic::mips_srari_b: Max = 7; break; 2217 case Intrinsic::mips_srari_h: Max = 15; break; 2218 case Intrinsic::mips_srari_w: Max = 31; break; 2219 case Intrinsic::mips_srari_d: Max = 63; break; 2220 default: llvm_unreachable("Unmatched intrinsic"); 2221 } 2222 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2223 if (Value < 0 || Value > Max) 2224 report_fatal_error("Immediate out of range"); 2225 return SDValue(); 2226 } 2227 case Intrinsic::mips_srl_b: 2228 case Intrinsic::mips_srl_h: 2229 case Intrinsic::mips_srl_w: 2230 case Intrinsic::mips_srl_d: 2231 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 2232 truncateVecElts(Op, DAG)); 2233 case Intrinsic::mips_srli_b: 2234 case Intrinsic::mips_srli_h: 2235 case Intrinsic::mips_srli_w: 2236 case Intrinsic::mips_srli_d: 2237 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 2238 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2239 case Intrinsic::mips_srlri_b: 2240 case Intrinsic::mips_srlri_h: 2241 case Intrinsic::mips_srlri_w: 2242 case Intrinsic::mips_srlri_d: { 2243 // Report an error for out of range values. 2244 int64_t Max; 2245 switch (Intrinsic) { 2246 case Intrinsic::mips_srlri_b: Max = 7; break; 2247 case Intrinsic::mips_srlri_h: Max = 15; break; 2248 case Intrinsic::mips_srlri_w: Max = 31; break; 2249 case Intrinsic::mips_srlri_d: Max = 63; break; 2250 default: llvm_unreachable("Unmatched intrinsic"); 2251 } 2252 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2253 if (Value < 0 || Value > Max) 2254 report_fatal_error("Immediate out of range"); 2255 return SDValue(); 2256 } 2257 case Intrinsic::mips_subv_b: 2258 case Intrinsic::mips_subv_h: 2259 case Intrinsic::mips_subv_w: 2260 case Intrinsic::mips_subv_d: 2261 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 2262 Op->getOperand(2)); 2263 case Intrinsic::mips_subvi_b: 2264 case Intrinsic::mips_subvi_h: 2265 case Intrinsic::mips_subvi_w: 2266 case Intrinsic::mips_subvi_d: 2267 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 2268 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2269 case Intrinsic::mips_vshf_b: 2270 case Intrinsic::mips_vshf_h: 2271 case Intrinsic::mips_vshf_w: 2272 case Intrinsic::mips_vshf_d: 2273 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2274 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 2275 case Intrinsic::mips_xor_v: 2276 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 2277 Op->getOperand(2)); 2278 case Intrinsic::mips_xori_b: 2279 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 2280 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2281 case Intrinsic::thread_pointer: { 2282 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2283 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT); 2284 } 2285 } 2286 } 2287 2288 static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2289 const MipsSubtarget &Subtarget) { 2290 SDLoc DL(Op); 2291 SDValue ChainIn = Op->getOperand(0); 2292 SDValue Address = Op->getOperand(2); 2293 SDValue Offset = Op->getOperand(3); 2294 EVT ResTy = Op->getValueType(0); 2295 EVT PtrTy = Address->getValueType(0); 2296 2297 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2298 // however takes an i32 signed constant offset. The actual type of the 2299 // intrinsic is a scaled signed i10. 2300 if (Subtarget.isABI_N64()) 2301 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2302 2303 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2304 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), 2305 Align(16)); 2306 } 2307 2308 SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2309 SelectionDAG &DAG) const { 2310 unsigned Intr = Op->getConstantOperandVal(1); 2311 switch (Intr) { 2312 default: 2313 return SDValue(); 2314 case Intrinsic::mips_extp: 2315 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 2316 case Intrinsic::mips_extpdp: 2317 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 2318 case Intrinsic::mips_extr_w: 2319 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 2320 case Intrinsic::mips_extr_r_w: 2321 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 2322 case Intrinsic::mips_extr_rs_w: 2323 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 2324 case Intrinsic::mips_extr_s_h: 2325 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 2326 case Intrinsic::mips_mthlip: 2327 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 2328 case Intrinsic::mips_mulsaq_s_w_ph: 2329 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 2330 case Intrinsic::mips_maq_s_w_phl: 2331 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 2332 case Intrinsic::mips_maq_s_w_phr: 2333 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 2334 case Intrinsic::mips_maq_sa_w_phl: 2335 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 2336 case Intrinsic::mips_maq_sa_w_phr: 2337 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 2338 case Intrinsic::mips_dpaq_s_w_ph: 2339 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 2340 case Intrinsic::mips_dpsq_s_w_ph: 2341 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 2342 case Intrinsic::mips_dpaq_sa_l_w: 2343 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 2344 case Intrinsic::mips_dpsq_sa_l_w: 2345 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 2346 case Intrinsic::mips_dpaqx_s_w_ph: 2347 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 2348 case Intrinsic::mips_dpaqx_sa_w_ph: 2349 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 2350 case Intrinsic::mips_dpsqx_s_w_ph: 2351 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 2352 case Intrinsic::mips_dpsqx_sa_w_ph: 2353 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 2354 case Intrinsic::mips_ld_b: 2355 case Intrinsic::mips_ld_h: 2356 case Intrinsic::mips_ld_w: 2357 case Intrinsic::mips_ld_d: 2358 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget); 2359 } 2360 } 2361 2362 static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2363 const MipsSubtarget &Subtarget) { 2364 SDLoc DL(Op); 2365 SDValue ChainIn = Op->getOperand(0); 2366 SDValue Value = Op->getOperand(2); 2367 SDValue Address = Op->getOperand(3); 2368 SDValue Offset = Op->getOperand(4); 2369 EVT PtrTy = Address->getValueType(0); 2370 2371 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2372 // however takes an i32 signed constant offset. The actual type of the 2373 // intrinsic is a scaled signed i10. 2374 if (Subtarget.isABI_N64()) 2375 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2376 2377 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2378 2379 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), 2380 Align(16)); 2381 } 2382 2383 SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2384 SelectionDAG &DAG) const { 2385 unsigned Intr = Op->getConstantOperandVal(1); 2386 switch (Intr) { 2387 default: 2388 return SDValue(); 2389 case Intrinsic::mips_st_b: 2390 case Intrinsic::mips_st_h: 2391 case Intrinsic::mips_st_w: 2392 case Intrinsic::mips_st_d: 2393 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget); 2394 } 2395 } 2396 2397 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 2398 // 2399 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 2400 // choose to sign-extend but we could have equally chosen zero-extend. The 2401 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 2402 // result into this node later (possibly changing it to a zero-extend in the 2403 // process). 2404 SDValue MipsSETargetLowering:: 2405 lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 2406 SDLoc DL(Op); 2407 EVT ResTy = Op->getValueType(0); 2408 SDValue Op0 = Op->getOperand(0); 2409 EVT VecTy = Op0->getValueType(0); 2410 2411 if (!VecTy.is128BitVector()) 2412 return SDValue(); 2413 2414 if (ResTy.isInteger()) { 2415 SDValue Op1 = Op->getOperand(1); 2416 EVT EltTy = VecTy.getVectorElementType(); 2417 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 2418 DAG.getValueType(EltTy)); 2419 } 2420 2421 return Op; 2422 } 2423 2424 static bool isConstantOrUndef(const SDValue Op) { 2425 if (Op->isUndef()) 2426 return true; 2427 if (isa<ConstantSDNode>(Op)) 2428 return true; 2429 if (isa<ConstantFPSDNode>(Op)) 2430 return true; 2431 return false; 2432 } 2433 2434 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 2435 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 2436 if (isConstantOrUndef(Op->getOperand(i))) 2437 return true; 2438 return false; 2439 } 2440 2441 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 2442 // backend. 2443 // 2444 // Lowers according to the following rules: 2445 // - Constant splats are legal as-is as long as the SplatBitSize is a power of 2446 // 2 less than or equal to 64 and the value fits into a signed 10-bit 2447 // immediate 2448 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 2449 // is a power of 2 less than or equal to 64 and the value does not fit into a 2450 // signed 10-bit immediate 2451 // - Non-constant splats are legal as-is. 2452 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 2453 // - All others are illegal and must be expanded. 2454 SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 2455 SelectionDAG &DAG) const { 2456 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 2457 EVT ResTy = Op->getValueType(0); 2458 SDLoc DL(Op); 2459 APInt SplatValue, SplatUndef; 2460 unsigned SplatBitSize; 2461 bool HasAnyUndefs; 2462 2463 if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) 2464 return SDValue(); 2465 2466 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2467 HasAnyUndefs, 8, 2468 !Subtarget.isLittle()) && SplatBitSize <= 64) { 2469 // We can only cope with 8, 16, 32, or 64-bit elements 2470 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 2471 SplatBitSize != 64) 2472 return SDValue(); 2473 2474 // If the value isn't an integer type we will have to bitcast 2475 // from an integer type first. Also, if there are any undefs, we must 2476 // lower them to defined values first. 2477 if (ResTy.isInteger() && !HasAnyUndefs) 2478 return Op; 2479 2480 EVT ViaVecTy; 2481 2482 switch (SplatBitSize) { 2483 default: 2484 return SDValue(); 2485 case 8: 2486 ViaVecTy = MVT::v16i8; 2487 break; 2488 case 16: 2489 ViaVecTy = MVT::v8i16; 2490 break; 2491 case 32: 2492 ViaVecTy = MVT::v4i32; 2493 break; 2494 case 64: 2495 // There's no fill.d to fall back on for 64-bit values 2496 return SDValue(); 2497 } 2498 2499 // SelectionDAG::getConstant will promote SplatValue appropriately. 2500 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 2501 2502 // Bitcast to the type we originally wanted 2503 if (ViaVecTy != ResTy) 2504 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 2505 2506 return Result; 2507 } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false)) 2508 return Op; 2509 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 2510 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 2511 // The resulting code is the same length as the expansion, but it doesn't 2512 // use memory operations 2513 EVT ResTy = Node->getValueType(0); 2514 2515 assert(ResTy.isVector()); 2516 2517 unsigned NumElts = ResTy.getVectorNumElements(); 2518 SDValue Vector = DAG.getUNDEF(ResTy); 2519 for (unsigned i = 0; i < NumElts; ++i) { 2520 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 2521 Node->getOperand(i), 2522 DAG.getConstant(i, DL, MVT::i32)); 2523 } 2524 return Vector; 2525 } 2526 2527 return SDValue(); 2528 } 2529 2530 // Lower VECTOR_SHUFFLE into SHF (if possible). 2531 // 2532 // SHF splits the vector into blocks of four elements, then shuffles these 2533 // elements according to a <4 x i2> constant (encoded as an integer immediate). 2534 // 2535 // It is therefore possible to lower into SHF when the mask takes the form: 2536 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 2537 // When undef's appear they are treated as if they were whatever value is 2538 // necessary in order to fit the above forms. 2539 // 2540 // For example: 2541 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 2542 // <8 x i32> <i32 3, i32 2, i32 1, i32 0, 2543 // i32 7, i32 6, i32 5, i32 4> 2544 // is lowered to: 2545 // (SHF_H $w0, $w1, 27) 2546 // where the 27 comes from: 2547 // 3 + (2 << 2) + (1 << 4) + (0 << 6) 2548 static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 2549 SmallVector<int, 16> Indices, 2550 SelectionDAG &DAG) { 2551 int SHFIndices[4] = { -1, -1, -1, -1 }; 2552 2553 if (Indices.size() < 4) 2554 return SDValue(); 2555 2556 for (unsigned i = 0; i < 4; ++i) { 2557 for (unsigned j = i; j < Indices.size(); j += 4) { 2558 int Idx = Indices[j]; 2559 2560 // Convert from vector index to 4-element subvector index 2561 // If an index refers to an element outside of the subvector then give up 2562 if (Idx != -1) { 2563 Idx -= 4 * (j / 4); 2564 if (Idx < 0 || Idx >= 4) 2565 return SDValue(); 2566 } 2567 2568 // If the mask has an undef, replace it with the current index. 2569 // Note that it might still be undef if the current index is also undef 2570 if (SHFIndices[i] == -1) 2571 SHFIndices[i] = Idx; 2572 2573 // Check that non-undef values are the same as in the mask. If they 2574 // aren't then give up 2575 if (!(Idx == -1 || Idx == SHFIndices[i])) 2576 return SDValue(); 2577 } 2578 } 2579 2580 // Calculate the immediate. Replace any remaining undefs with zero 2581 APInt Imm(32, 0); 2582 for (int i = 3; i >= 0; --i) { 2583 int Idx = SHFIndices[i]; 2584 2585 if (Idx == -1) 2586 Idx = 0; 2587 2588 Imm <<= 2; 2589 Imm |= Idx & 0x3; 2590 } 2591 2592 SDLoc DL(Op); 2593 return DAG.getNode(MipsISD::SHF, DL, ResTy, 2594 DAG.getTargetConstant(Imm, DL, MVT::i32), 2595 Op->getOperand(0)); 2596 } 2597 2598 /// Determine whether a range fits a regular pattern of values. 2599 /// This function accounts for the possibility of jumping over the End iterator. 2600 template <typename ValType> 2601 static bool 2602 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, 2603 unsigned CheckStride, 2604 typename SmallVectorImpl<ValType>::const_iterator End, 2605 ValType ExpectedIndex, unsigned ExpectedIndexStride) { 2606 auto &I = Begin; 2607 2608 while (I != End) { 2609 if (*I != -1 && *I != ExpectedIndex) 2610 return false; 2611 ExpectedIndex += ExpectedIndexStride; 2612 2613 // Incrementing past End is undefined behaviour so we must increment one 2614 // step at a time and check for End at each step. 2615 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) 2616 ; // Empty loop body. 2617 } 2618 return true; 2619 } 2620 2621 // Determine whether VECTOR_SHUFFLE is a SPLATI. 2622 // 2623 // It is a SPLATI when the mask is: 2624 // <x, x, x, ...> 2625 // where x is any valid index. 2626 // 2627 // When undef's appear in the mask they are treated as if they were whatever 2628 // value is necessary in order to fit the above form. 2629 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, 2630 SmallVector<int, 16> Indices, 2631 SelectionDAG &DAG) { 2632 assert((Indices.size() % 2) == 0); 2633 2634 int SplatIndex = -1; 2635 for (const auto &V : Indices) { 2636 if (V != -1) { 2637 SplatIndex = V; 2638 break; 2639 } 2640 } 2641 2642 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex, 2643 0); 2644 } 2645 2646 // Lower VECTOR_SHUFFLE into ILVEV (if possible). 2647 // 2648 // ILVEV interleaves the even elements from each vector. 2649 // 2650 // It is possible to lower into ILVEV when the mask consists of two of the 2651 // following forms interleaved: 2652 // <0, 2, 4, ...> 2653 // <n, n+2, n+4, ...> 2654 // where n is the number of elements in the vector. 2655 // For example: 2656 // <0, 0, 2, 2, 4, 4, ...> 2657 // <0, n, 2, n+2, 4, n+4, ...> 2658 // 2659 // When undef's appear in the mask they are treated as if they were whatever 2660 // value is necessary in order to fit the above forms. 2661 static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 2662 SmallVector<int, 16> Indices, 2663 SelectionDAG &DAG) { 2664 assert((Indices.size() % 2) == 0); 2665 2666 SDValue Wt; 2667 SDValue Ws; 2668 const auto &Begin = Indices.begin(); 2669 const auto &End = Indices.end(); 2670 2671 // Check even elements are taken from the even elements of one half or the 2672 // other and pick an operand accordingly. 2673 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2)) 2674 Wt = Op->getOperand(0); 2675 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2)) 2676 Wt = Op->getOperand(1); 2677 else 2678 return SDValue(); 2679 2680 // Check odd elements are taken from the even elements of one half or the 2681 // other and pick an operand accordingly. 2682 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2)) 2683 Ws = Op->getOperand(0); 2684 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2)) 2685 Ws = Op->getOperand(1); 2686 else 2687 return SDValue(); 2688 2689 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt); 2690 } 2691 2692 // Lower VECTOR_SHUFFLE into ILVOD (if possible). 2693 // 2694 // ILVOD interleaves the odd elements from each vector. 2695 // 2696 // It is possible to lower into ILVOD when the mask consists of two of the 2697 // following forms interleaved: 2698 // <1, 3, 5, ...> 2699 // <n+1, n+3, n+5, ...> 2700 // where n is the number of elements in the vector. 2701 // For example: 2702 // <1, 1, 3, 3, 5, 5, ...> 2703 // <1, n+1, 3, n+3, 5, n+5, ...> 2704 // 2705 // When undef's appear in the mask they are treated as if they were whatever 2706 // value is necessary in order to fit the above forms. 2707 static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 2708 SmallVector<int, 16> Indices, 2709 SelectionDAG &DAG) { 2710 assert((Indices.size() % 2) == 0); 2711 2712 SDValue Wt; 2713 SDValue Ws; 2714 const auto &Begin = Indices.begin(); 2715 const auto &End = Indices.end(); 2716 2717 // Check even elements are taken from the odd elements of one half or the 2718 // other and pick an operand accordingly. 2719 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2)) 2720 Wt = Op->getOperand(0); 2721 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2)) 2722 Wt = Op->getOperand(1); 2723 else 2724 return SDValue(); 2725 2726 // Check odd elements are taken from the odd elements of one half or the 2727 // other and pick an operand accordingly. 2728 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2)) 2729 Ws = Op->getOperand(0); 2730 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2)) 2731 Ws = Op->getOperand(1); 2732 else 2733 return SDValue(); 2734 2735 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws); 2736 } 2737 2738 // Lower VECTOR_SHUFFLE into ILVR (if possible). 2739 // 2740 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of 2741 // each vector. 2742 // 2743 // It is possible to lower into ILVR when the mask consists of two of the 2744 // following forms interleaved: 2745 // <0, 1, 2, ...> 2746 // <n, n+1, n+2, ...> 2747 // where n is the number of elements in the vector. 2748 // For example: 2749 // <0, 0, 1, 1, 2, 2, ...> 2750 // <0, n, 1, n+1, 2, n+2, ...> 2751 // 2752 // When undef's appear in the mask they are treated as if they were whatever 2753 // value is necessary in order to fit the above forms. 2754 static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 2755 SmallVector<int, 16> Indices, 2756 SelectionDAG &DAG) { 2757 assert((Indices.size() % 2) == 0); 2758 2759 SDValue Wt; 2760 SDValue Ws; 2761 const auto &Begin = Indices.begin(); 2762 const auto &End = Indices.end(); 2763 2764 // Check even elements are taken from the right (lowest-indexed) elements of 2765 // one half or the other and pick an operand accordingly. 2766 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1)) 2767 Wt = Op->getOperand(0); 2768 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1)) 2769 Wt = Op->getOperand(1); 2770 else 2771 return SDValue(); 2772 2773 // Check odd elements are taken from the right (lowest-indexed) elements of 2774 // one half or the other and pick an operand accordingly. 2775 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1)) 2776 Ws = Op->getOperand(0); 2777 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1)) 2778 Ws = Op->getOperand(1); 2779 else 2780 return SDValue(); 2781 2782 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt); 2783 } 2784 2785 // Lower VECTOR_SHUFFLE into ILVL (if possible). 2786 // 2787 // ILVL interleaves consecutive elements from the left (highest-indexed) half 2788 // of each vector. 2789 // 2790 // It is possible to lower into ILVL when the mask consists of two of the 2791 // following forms interleaved: 2792 // <x, x+1, x+2, ...> 2793 // <n+x, n+x+1, n+x+2, ...> 2794 // where n is the number of elements in the vector and x is half n. 2795 // For example: 2796 // <x, x, x+1, x+1, x+2, x+2, ...> 2797 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 2798 // 2799 // When undef's appear in the mask they are treated as if they were whatever 2800 // value is necessary in order to fit the above forms. 2801 static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 2802 SmallVector<int, 16> Indices, 2803 SelectionDAG &DAG) { 2804 assert((Indices.size() % 2) == 0); 2805 2806 unsigned HalfSize = Indices.size() / 2; 2807 SDValue Wt; 2808 SDValue Ws; 2809 const auto &Begin = Indices.begin(); 2810 const auto &End = Indices.end(); 2811 2812 // Check even elements are taken from the left (highest-indexed) elements of 2813 // one half or the other and pick an operand accordingly. 2814 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1)) 2815 Wt = Op->getOperand(0); 2816 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1)) 2817 Wt = Op->getOperand(1); 2818 else 2819 return SDValue(); 2820 2821 // Check odd elements are taken from the left (highest-indexed) elements of 2822 // one half or the other and pick an operand accordingly. 2823 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1)) 2824 Ws = Op->getOperand(0); 2825 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize, 2826 1)) 2827 Ws = Op->getOperand(1); 2828 else 2829 return SDValue(); 2830 2831 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt); 2832 } 2833 2834 // Lower VECTOR_SHUFFLE into PCKEV (if possible). 2835 // 2836 // PCKEV copies the even elements of each vector into the result vector. 2837 // 2838 // It is possible to lower into PCKEV when the mask consists of two of the 2839 // following forms concatenated: 2840 // <0, 2, 4, ...> 2841 // <n, n+2, n+4, ...> 2842 // where n is the number of elements in the vector. 2843 // For example: 2844 // <0, 2, 4, ..., 0, 2, 4, ...> 2845 // <0, 2, 4, ..., n, n+2, n+4, ...> 2846 // 2847 // When undef's appear in the mask they are treated as if they were whatever 2848 // value is necessary in order to fit the above forms. 2849 static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2850 SmallVector<int, 16> Indices, 2851 SelectionDAG &DAG) { 2852 assert((Indices.size() % 2) == 0); 2853 2854 SDValue Wt; 2855 SDValue Ws; 2856 const auto &Begin = Indices.begin(); 2857 const auto &Mid = Indices.begin() + Indices.size() / 2; 2858 const auto &End = Indices.end(); 2859 2860 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2)) 2861 Wt = Op->getOperand(0); 2862 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2)) 2863 Wt = Op->getOperand(1); 2864 else 2865 return SDValue(); 2866 2867 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2)) 2868 Ws = Op->getOperand(0); 2869 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2)) 2870 Ws = Op->getOperand(1); 2871 else 2872 return SDValue(); 2873 2874 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt); 2875 } 2876 2877 // Lower VECTOR_SHUFFLE into PCKOD (if possible). 2878 // 2879 // PCKOD copies the odd elements of each vector into the result vector. 2880 // 2881 // It is possible to lower into PCKOD when the mask consists of two of the 2882 // following forms concatenated: 2883 // <1, 3, 5, ...> 2884 // <n+1, n+3, n+5, ...> 2885 // where n is the number of elements in the vector. 2886 // For example: 2887 // <1, 3, 5, ..., 1, 3, 5, ...> 2888 // <1, 3, 5, ..., n+1, n+3, n+5, ...> 2889 // 2890 // When undef's appear in the mask they are treated as if they were whatever 2891 // value is necessary in order to fit the above forms. 2892 static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2893 SmallVector<int, 16> Indices, 2894 SelectionDAG &DAG) { 2895 assert((Indices.size() % 2) == 0); 2896 2897 SDValue Wt; 2898 SDValue Ws; 2899 const auto &Begin = Indices.begin(); 2900 const auto &Mid = Indices.begin() + Indices.size() / 2; 2901 const auto &End = Indices.end(); 2902 2903 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2)) 2904 Wt = Op->getOperand(0); 2905 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2)) 2906 Wt = Op->getOperand(1); 2907 else 2908 return SDValue(); 2909 2910 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2)) 2911 Ws = Op->getOperand(0); 2912 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2)) 2913 Ws = Op->getOperand(1); 2914 else 2915 return SDValue(); 2916 2917 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt); 2918 } 2919 2920 // Lower VECTOR_SHUFFLE into VSHF. 2921 // 2922 // This mostly consists of converting the shuffle indices in Indices into a 2923 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2924 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2925 // if the type is v8i16 and all the indices are less than 8 then the second 2926 // operand is unused and can be replaced with anything. We choose to replace it 2927 // with the used operand since this reduces the number of instructions overall. 2928 static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 2929 const SmallVector<int, 16> &Indices, 2930 SelectionDAG &DAG) { 2931 SmallVector<SDValue, 16> Ops; 2932 SDValue Op0; 2933 SDValue Op1; 2934 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 2935 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 2936 bool Using1stVec = false; 2937 bool Using2ndVec = false; 2938 SDLoc DL(Op); 2939 int ResTyNumElts = ResTy.getVectorNumElements(); 2940 2941 for (int i = 0; i < ResTyNumElts; ++i) { 2942 // Idx == -1 means UNDEF 2943 int Idx = Indices[i]; 2944 2945 if (0 <= Idx && Idx < ResTyNumElts) 2946 Using1stVec = true; 2947 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 2948 Using2ndVec = true; 2949 } 2950 2951 for (int Idx : Indices) 2952 Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy)); 2953 2954 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); 2955 2956 if (Using1stVec && Using2ndVec) { 2957 Op0 = Op->getOperand(0); 2958 Op1 = Op->getOperand(1); 2959 } else if (Using1stVec) 2960 Op0 = Op1 = Op->getOperand(0); 2961 else if (Using2ndVec) 2962 Op0 = Op1 = Op->getOperand(1); 2963 else 2964 llvm_unreachable("shuffle vector mask references neither vector operand?"); 2965 2966 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 2967 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 2968 // VSHF concatenates the vectors in a bitwise fashion: 2969 // <0b00, 0b01> + <0b10, 0b11> -> 2970 // 0b0100 + 0b1110 -> 0b01001110 2971 // <0b10, 0b11, 0b00, 0b01> 2972 // We must therefore swap the operands to get the correct result. 2973 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); 2974 } 2975 2976 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 2977 // indices in the shuffle. 2978 SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 2979 SelectionDAG &DAG) const { 2980 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 2981 EVT ResTy = Op->getValueType(0); 2982 2983 if (!ResTy.is128BitVector()) 2984 return SDValue(); 2985 2986 int ResTyNumElts = ResTy.getVectorNumElements(); 2987 SmallVector<int, 16> Indices; 2988 2989 for (int i = 0; i < ResTyNumElts; ++i) 2990 Indices.push_back(Node->getMaskElt(i)); 2991 2992 // splati.[bhwd] is preferable to the others but is matched from 2993 // MipsISD::VSHF. 2994 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) 2995 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 2996 SDValue Result; 2997 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG))) 2998 return Result; 2999 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG))) 3000 return Result; 3001 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG))) 3002 return Result; 3003 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG))) 3004 return Result; 3005 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG))) 3006 return Result; 3007 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG))) 3008 return Result; 3009 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) 3010 return Result; 3011 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 3012 } 3013 3014 MachineBasicBlock * 3015 MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, 3016 MachineBasicBlock *BB) const { 3017 // $bb: 3018 // bposge32_pseudo $vr0 3019 // => 3020 // $bb: 3021 // bposge32 $tbb 3022 // $fbb: 3023 // li $vr2, 0 3024 // b $sink 3025 // $tbb: 3026 // li $vr1, 1 3027 // $sink: 3028 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 3029 3030 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3031 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3032 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3033 DebugLoc DL = MI.getDebugLoc(); 3034 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3035 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3036 MachineFunction *F = BB->getParent(); 3037 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3038 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3039 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3040 F->insert(It, FBB); 3041 F->insert(It, TBB); 3042 F->insert(It, Sink); 3043 3044 // Transfer the remainder of BB and its successor edges to Sink. 3045 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3046 BB->end()); 3047 Sink->transferSuccessorsAndUpdatePHIs(BB); 3048 3049 // Add successors. 3050 BB->addSuccessor(FBB); 3051 BB->addSuccessor(TBB); 3052 FBB->addSuccessor(Sink); 3053 TBB->addSuccessor(Sink); 3054 3055 // Insert the real bposge32 instruction to $BB. 3056 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 3057 // Insert the real bposge32c instruction to $BB. 3058 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB); 3059 3060 // Fill $FBB. 3061 Register VR2 = RegInfo.createVirtualRegister(RC); 3062 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 3063 .addReg(Mips::ZERO).addImm(0); 3064 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3065 3066 // Fill $TBB. 3067 Register VR1 = RegInfo.createVirtualRegister(RC); 3068 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 3069 .addReg(Mips::ZERO).addImm(1); 3070 3071 // Insert phi function to $Sink. 3072 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3073 MI.getOperand(0).getReg()) 3074 .addReg(VR2) 3075 .addMBB(FBB) 3076 .addReg(VR1) 3077 .addMBB(TBB); 3078 3079 MI.eraseFromParent(); // The pseudo instruction is gone now. 3080 return Sink; 3081 } 3082 3083 MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( 3084 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { 3085 // $bb: 3086 // vany_nonzero $rd, $ws 3087 // => 3088 // $bb: 3089 // bnz.b $ws, $tbb 3090 // b $fbb 3091 // $fbb: 3092 // li $rd1, 0 3093 // b $sink 3094 // $tbb: 3095 // li $rd2, 1 3096 // $sink: 3097 // $rd = phi($rd1, $fbb, $rd2, $tbb) 3098 3099 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3100 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3101 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3102 DebugLoc DL = MI.getDebugLoc(); 3103 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3104 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3105 MachineFunction *F = BB->getParent(); 3106 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3107 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3108 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3109 F->insert(It, FBB); 3110 F->insert(It, TBB); 3111 F->insert(It, Sink); 3112 3113 // Transfer the remainder of BB and its successor edges to Sink. 3114 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3115 BB->end()); 3116 Sink->transferSuccessorsAndUpdatePHIs(BB); 3117 3118 // Add successors. 3119 BB->addSuccessor(FBB); 3120 BB->addSuccessor(TBB); 3121 FBB->addSuccessor(Sink); 3122 TBB->addSuccessor(Sink); 3123 3124 // Insert the real bnz.b instruction to $BB. 3125 BuildMI(BB, DL, TII->get(BranchOp)) 3126 .addReg(MI.getOperand(1).getReg()) 3127 .addMBB(TBB); 3128 3129 // Fill $FBB. 3130 Register RD1 = RegInfo.createVirtualRegister(RC); 3131 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 3132 .addReg(Mips::ZERO).addImm(0); 3133 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3134 3135 // Fill $TBB. 3136 Register RD2 = RegInfo.createVirtualRegister(RC); 3137 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 3138 .addReg(Mips::ZERO).addImm(1); 3139 3140 // Insert phi function to $Sink. 3141 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3142 MI.getOperand(0).getReg()) 3143 .addReg(RD1) 3144 .addMBB(FBB) 3145 .addReg(RD2) 3146 .addMBB(TBB); 3147 3148 MI.eraseFromParent(); // The pseudo instruction is gone now. 3149 return Sink; 3150 } 3151 3152 // Emit the COPY_FW pseudo instruction. 3153 // 3154 // copy_fw_pseudo $fd, $ws, n 3155 // => 3156 // copy_u_w $rt, $ws, $n 3157 // mtc1 $rt, $fd 3158 // 3159 // When n is zero, the equivalent operation can be performed with (potentially) 3160 // zero instructions due to register overlaps. This optimization is never valid 3161 // for lane 1 because it would require FR=0 mode which isn't supported by MSA. 3162 MachineBasicBlock * 3163 MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, 3164 MachineBasicBlock *BB) const { 3165 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3166 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3167 DebugLoc DL = MI.getDebugLoc(); 3168 Register Fd = MI.getOperand(0).getReg(); 3169 Register Ws = MI.getOperand(1).getReg(); 3170 unsigned Lane = MI.getOperand(2).getImm(); 3171 3172 if (Lane == 0) { 3173 unsigned Wt = Ws; 3174 if (!Subtarget.useOddSPReg()) { 3175 // We must copy to an even-numbered MSA register so that the 3176 // single-precision sub-register is also guaranteed to be even-numbered. 3177 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass); 3178 3179 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws); 3180 } 3181 3182 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3183 } else { 3184 Register Wt = RegInfo.createVirtualRegister( 3185 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3186 : &Mips::MSA128WEvensRegClass); 3187 3188 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); 3189 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3190 } 3191 3192 MI.eraseFromParent(); // The pseudo instruction is gone now. 3193 return BB; 3194 } 3195 3196 // Emit the COPY_FD pseudo instruction. 3197 // 3198 // copy_fd_pseudo $fd, $ws, n 3199 // => 3200 // splati.d $wt, $ws, $n 3201 // copy $fd, $wt:sub_64 3202 // 3203 // When n is zero, the equivalent operation can be performed with (potentially) 3204 // zero instructions due to register overlaps. This optimization is always 3205 // valid because FR=1 mode which is the only supported mode in MSA. 3206 MachineBasicBlock * 3207 MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, 3208 MachineBasicBlock *BB) const { 3209 assert(Subtarget.isFP64bit()); 3210 3211 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3212 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3213 Register Fd = MI.getOperand(0).getReg(); 3214 Register Ws = MI.getOperand(1).getReg(); 3215 unsigned Lane = MI.getOperand(2).getImm() * 2; 3216 DebugLoc DL = MI.getDebugLoc(); 3217 3218 if (Lane == 0) 3219 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 3220 else { 3221 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3222 3223 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 3224 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 3225 } 3226 3227 MI.eraseFromParent(); // The pseudo instruction is gone now. 3228 return BB; 3229 } 3230 3231 // Emit the INSERT_FW pseudo instruction. 3232 // 3233 // insert_fw_pseudo $wd, $wd_in, $n, $fs 3234 // => 3235 // subreg_to_reg $wt:sub_lo, $fs 3236 // insve_w $wd[$n], $wd_in, $wt[0] 3237 MachineBasicBlock * 3238 MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, 3239 MachineBasicBlock *BB) const { 3240 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3241 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3242 DebugLoc DL = MI.getDebugLoc(); 3243 Register Wd = MI.getOperand(0).getReg(); 3244 Register Wd_in = MI.getOperand(1).getReg(); 3245 unsigned Lane = MI.getOperand(2).getImm(); 3246 Register Fs = MI.getOperand(3).getReg(); 3247 Register Wt = RegInfo.createVirtualRegister( 3248 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3249 : &Mips::MSA128WEvensRegClass); 3250 3251 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3252 .addImm(0) 3253 .addReg(Fs) 3254 .addImm(Mips::sub_lo); 3255 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 3256 .addReg(Wd_in) 3257 .addImm(Lane) 3258 .addReg(Wt) 3259 .addImm(0); 3260 3261 MI.eraseFromParent(); // The pseudo instruction is gone now. 3262 return BB; 3263 } 3264 3265 // Emit the INSERT_FD pseudo instruction. 3266 // 3267 // insert_fd_pseudo $wd, $fs, n 3268 // => 3269 // subreg_to_reg $wt:sub_64, $fs 3270 // insve_d $wd[$n], $wd_in, $wt[0] 3271 MachineBasicBlock * 3272 MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, 3273 MachineBasicBlock *BB) const { 3274 assert(Subtarget.isFP64bit()); 3275 3276 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3277 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3278 DebugLoc DL = MI.getDebugLoc(); 3279 Register Wd = MI.getOperand(0).getReg(); 3280 Register Wd_in = MI.getOperand(1).getReg(); 3281 unsigned Lane = MI.getOperand(2).getImm(); 3282 Register Fs = MI.getOperand(3).getReg(); 3283 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3284 3285 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3286 .addImm(0) 3287 .addReg(Fs) 3288 .addImm(Mips::sub_64); 3289 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 3290 .addReg(Wd_in) 3291 .addImm(Lane) 3292 .addReg(Wt) 3293 .addImm(0); 3294 3295 MI.eraseFromParent(); // The pseudo instruction is gone now. 3296 return BB; 3297 } 3298 3299 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. 3300 // 3301 // For integer: 3302 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) 3303 // => 3304 // (SLL $lanetmp1, $lane, <log2size) 3305 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3306 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) 3307 // (NEG $lanetmp2, $lanetmp1) 3308 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3309 // 3310 // For floating point: 3311 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) 3312 // => 3313 // (SUBREG_TO_REG $wt, $fs, <subreg>) 3314 // (SLL $lanetmp1, $lane, <log2size) 3315 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3316 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) 3317 // (NEG $lanetmp2, $lanetmp1) 3318 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3319 MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( 3320 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes, 3321 bool IsFP) const { 3322 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3323 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3324 DebugLoc DL = MI.getDebugLoc(); 3325 Register Wd = MI.getOperand(0).getReg(); 3326 Register SrcVecReg = MI.getOperand(1).getReg(); 3327 Register LaneReg = MI.getOperand(2).getReg(); 3328 Register SrcValReg = MI.getOperand(3).getReg(); 3329 3330 const TargetRegisterClass *VecRC = nullptr; 3331 // FIXME: This should be true for N32 too. 3332 const TargetRegisterClass *GPRRC = 3333 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3334 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0; 3335 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL; 3336 unsigned EltLog2Size; 3337 unsigned InsertOp = 0; 3338 unsigned InsveOp = 0; 3339 switch (EltSizeInBytes) { 3340 default: 3341 llvm_unreachable("Unexpected size"); 3342 case 1: 3343 EltLog2Size = 0; 3344 InsertOp = Mips::INSERT_B; 3345 InsveOp = Mips::INSVE_B; 3346 VecRC = &Mips::MSA128BRegClass; 3347 break; 3348 case 2: 3349 EltLog2Size = 1; 3350 InsertOp = Mips::INSERT_H; 3351 InsveOp = Mips::INSVE_H; 3352 VecRC = &Mips::MSA128HRegClass; 3353 break; 3354 case 4: 3355 EltLog2Size = 2; 3356 InsertOp = Mips::INSERT_W; 3357 InsveOp = Mips::INSVE_W; 3358 VecRC = &Mips::MSA128WRegClass; 3359 break; 3360 case 8: 3361 EltLog2Size = 3; 3362 InsertOp = Mips::INSERT_D; 3363 InsveOp = Mips::INSVE_D; 3364 VecRC = &Mips::MSA128DRegClass; 3365 break; 3366 } 3367 3368 if (IsFP) { 3369 Register Wt = RegInfo.createVirtualRegister(VecRC); 3370 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3371 .addImm(0) 3372 .addReg(SrcValReg) 3373 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); 3374 SrcValReg = Wt; 3375 } 3376 3377 // Convert the lane index into a byte index 3378 if (EltSizeInBytes != 1) { 3379 Register LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); 3380 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1) 3381 .addReg(LaneReg) 3382 .addImm(EltLog2Size); 3383 LaneReg = LaneTmp1; 3384 } 3385 3386 // Rotate bytes around so that the desired lane is element zero 3387 Register WdTmp1 = RegInfo.createVirtualRegister(VecRC); 3388 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) 3389 .addReg(SrcVecReg) 3390 .addReg(SrcVecReg) 3391 .addReg(LaneReg, 0, SubRegIdx); 3392 3393 Register WdTmp2 = RegInfo.createVirtualRegister(VecRC); 3394 if (IsFP) { 3395 // Use insve.df to insert to element zero 3396 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) 3397 .addReg(WdTmp1) 3398 .addImm(0) 3399 .addReg(SrcValReg) 3400 .addImm(0); 3401 } else { 3402 // Use insert.df to insert to element zero 3403 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) 3404 .addReg(WdTmp1) 3405 .addReg(SrcValReg) 3406 .addImm(0); 3407 } 3408 3409 // Rotate elements the rest of the way for a full rotation. 3410 // sld.df inteprets $rt modulo the number of columns so we only need to negate 3411 // the lane index to do this. 3412 Register LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); 3413 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), 3414 LaneTmp2) 3415 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) 3416 .addReg(LaneReg); 3417 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) 3418 .addReg(WdTmp2) 3419 .addReg(WdTmp2) 3420 .addReg(LaneTmp2, 0, SubRegIdx); 3421 3422 MI.eraseFromParent(); // The pseudo instruction is gone now. 3423 return BB; 3424 } 3425 3426 // Emit the FILL_FW pseudo instruction. 3427 // 3428 // fill_fw_pseudo $wd, $fs 3429 // => 3430 // implicit_def $wt1 3431 // insert_subreg $wt2:subreg_lo, $wt1, $fs 3432 // splati.w $wd, $wt2[0] 3433 MachineBasicBlock * 3434 MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, 3435 MachineBasicBlock *BB) const { 3436 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3437 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3438 DebugLoc DL = MI.getDebugLoc(); 3439 Register Wd = MI.getOperand(0).getReg(); 3440 Register Fs = MI.getOperand(1).getReg(); 3441 Register Wt1 = RegInfo.createVirtualRegister( 3442 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3443 : &Mips::MSA128WEvensRegClass); 3444 Register Wt2 = RegInfo.createVirtualRegister( 3445 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3446 : &Mips::MSA128WEvensRegClass); 3447 3448 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3449 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3450 .addReg(Wt1) 3451 .addReg(Fs) 3452 .addImm(Mips::sub_lo); 3453 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); 3454 3455 MI.eraseFromParent(); // The pseudo instruction is gone now. 3456 return BB; 3457 } 3458 3459 // Emit the FILL_FD pseudo instruction. 3460 // 3461 // fill_fd_pseudo $wd, $fs 3462 // => 3463 // implicit_def $wt1 3464 // insert_subreg $wt2:subreg_64, $wt1, $fs 3465 // splati.d $wd, $wt2[0] 3466 MachineBasicBlock * 3467 MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, 3468 MachineBasicBlock *BB) const { 3469 assert(Subtarget.isFP64bit()); 3470 3471 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3472 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3473 DebugLoc DL = MI.getDebugLoc(); 3474 Register Wd = MI.getOperand(0).getReg(); 3475 Register Fs = MI.getOperand(1).getReg(); 3476 Register Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3477 Register Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3478 3479 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3480 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3481 .addReg(Wt1) 3482 .addReg(Fs) 3483 .addImm(Mips::sub_64); 3484 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); 3485 3486 MI.eraseFromParent(); // The pseudo instruction is gone now. 3487 return BB; 3488 } 3489 3490 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA 3491 // register. 3492 // 3493 // STF16 MSA128F16:$wd, mem_simm10:$addr 3494 // => 3495 // copy_u.h $rtemp,$wd[0] 3496 // sh $rtemp, $addr 3497 // 3498 // Safety: We can't use st.h & co as they would over write the memory after 3499 // the destination. It would require half floats be allocated 16 bytes(!) of 3500 // space. 3501 MachineBasicBlock * 3502 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, 3503 MachineBasicBlock *BB) const { 3504 3505 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3506 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3507 DebugLoc DL = MI.getDebugLoc(); 3508 Register Ws = MI.getOperand(0).getReg(); 3509 Register Rt = MI.getOperand(1).getReg(); 3510 const MachineMemOperand &MMO = **MI.memoperands_begin(); 3511 unsigned Imm = MMO.getOffset(); 3512 3513 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3514 // spill and reload can expand as a GPR64 operand. Examine the 3515 // operand in detail and default to ABI. 3516 const TargetRegisterClass *RC = 3517 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3518 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3519 : &Mips::GPR64RegClass); 3520 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3521 Register Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3522 3523 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); 3524 if(!UsingMips32) { 3525 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); 3526 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp) 3527 .addImm(0) 3528 .addReg(Rs) 3529 .addImm(Mips::sub_32); 3530 Rs = Tmp; 3531 } 3532 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) 3533 .addReg(Rs) 3534 .addReg(Rt) 3535 .addImm(Imm) 3536 .addMemOperand(BB->getParent()->getMachineMemOperand( 3537 &MMO, MMO.getOffset(), MMO.getSize())); 3538 3539 MI.eraseFromParent(); 3540 return BB; 3541 } 3542 3543 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. 3544 // 3545 // LD_F16 MSA128F16:$wd, mem_simm10:$addr 3546 // => 3547 // lh $rtemp, $addr 3548 // fill.h $wd, $rtemp 3549 // 3550 // Safety: We can't use ld.h & co as they over-read from the source. 3551 // Additionally, if the address is not modulo 16, 2 cases can occur: 3552 // a) Segmentation fault as the load instruction reads from a memory page 3553 // memory it's not supposed to. 3554 // b) The load crosses an implementation specific boundary, requiring OS 3555 // intervention. 3556 MachineBasicBlock * 3557 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, 3558 MachineBasicBlock *BB) const { 3559 3560 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3561 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3562 DebugLoc DL = MI.getDebugLoc(); 3563 Register Wd = MI.getOperand(0).getReg(); 3564 3565 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3566 // spill and reload can expand as a GPR64 operand. Examine the 3567 // operand in detail and default to ABI. 3568 const TargetRegisterClass *RC = 3569 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3570 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3571 : &Mips::GPR64RegClass); 3572 3573 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3574 Register Rt = RegInfo.createVirtualRegister(RC); 3575 3576 MachineInstrBuilder MIB = 3577 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); 3578 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 3579 MIB.add(MO); 3580 3581 if(!UsingMips32) { 3582 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3583 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32); 3584 Rt = Tmp; 3585 } 3586 3587 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); 3588 3589 MI.eraseFromParent(); 3590 return BB; 3591 } 3592 3593 // Emit the FPROUND_PSEUDO instruction. 3594 // 3595 // Round an FGR64Opnd, FGR32Opnd to an f16. 3596 // 3597 // Safety: Cycle the operand through the GPRs so the result always ends up 3598 // the correct MSA register. 3599 // 3600 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs 3601 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register 3602 // (which they can be, as the MSA registers are defined to alias the 3603 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3604 // the correct register class. That requires operands be tie-able across 3605 // register classes which have a sub/super register class relationship. 3606 // 3607 // For FPG32Opnd: 3608 // 3609 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs 3610 // => 3611 // mfc1 $rtemp, $fs 3612 // fill.w $rtemp, $wtemp 3613 // fexdo.w $wd, $wtemp, $wtemp 3614 // 3615 // For FPG64Opnd on mips32r2+: 3616 // 3617 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3618 // => 3619 // mfc1 $rtemp, $fs 3620 // fill.w $rtemp, $wtemp 3621 // mfhc1 $rtemp2, $fs 3622 // insert.w $wtemp[1], $rtemp2 3623 // insert.w $wtemp[3], $rtemp2 3624 // fexdo.w $wtemp2, $wtemp, $wtemp 3625 // fexdo.h $wd, $temp2, $temp2 3626 // 3627 // For FGR64Opnd on mips64r2+: 3628 // 3629 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3630 // => 3631 // dmfc1 $rtemp, $fs 3632 // fill.d $rtemp, $wtemp 3633 // fexdo.w $wtemp2, $wtemp, $wtemp 3634 // fexdo.h $wd, $wtemp2, $wtemp2 3635 // 3636 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the 3637 // undef bits are "just right" and the exception enable bits are 3638 // set. By using fill.w to replicate $fs into all elements over 3639 // insert.w for one element, we avoid that potiential case. If 3640 // fexdo.[hw] causes an exception in, the exception is valid and it 3641 // occurs for all elements. 3642 MachineBasicBlock * 3643 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, 3644 MachineBasicBlock *BB, 3645 bool IsFGR64) const { 3646 3647 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3648 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3649 // it. 3650 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3651 3652 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3653 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3654 3655 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3656 DebugLoc DL = MI.getDebugLoc(); 3657 Register Wd = MI.getOperand(0).getReg(); 3658 Register Fs = MI.getOperand(1).getReg(); 3659 3660 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3661 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3662 const TargetRegisterClass *GPRRC = 3663 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3664 unsigned MFC1Opc = IsFGR64onMips64 3665 ? Mips::DMFC1 3666 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1); 3667 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; 3668 3669 // Perform the register class copy as mentioned above. 3670 Register Rtemp = RegInfo.createVirtualRegister(GPRRC); 3671 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); 3672 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); 3673 unsigned WPHI = Wtemp; 3674 3675 if (IsFGR64onMips32) { 3676 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3677 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); 3678 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3679 Register Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3680 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) 3681 .addReg(Wtemp) 3682 .addReg(Rtemp2) 3683 .addImm(1); 3684 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3) 3685 .addReg(Wtemp2) 3686 .addReg(Rtemp2) 3687 .addImm(3); 3688 WPHI = Wtemp3; 3689 } 3690 3691 if (IsFGR64) { 3692 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3693 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) 3694 .addReg(WPHI) 3695 .addReg(WPHI); 3696 WPHI = Wtemp2; 3697 } 3698 3699 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI); 3700 3701 MI.eraseFromParent(); 3702 return BB; 3703 } 3704 3705 // Emit the FPEXTEND_PSEUDO instruction. 3706 // 3707 // Expand an f16 to either a FGR32Opnd or FGR64Opnd. 3708 // 3709 // Safety: Cycle the result through the GPRs so the result always ends up 3710 // the correct floating point register. 3711 // 3712 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd 3713 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register 3714 // (which they can be, as the MSA registers are defined to alias the 3715 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3716 // the correct register class. That requires operands be tie-able across 3717 // register classes which have a sub/super register class relationship. I 3718 // haven't checked. 3719 // 3720 // For FGR32Opnd: 3721 // 3722 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws 3723 // => 3724 // fexupr.w $wtemp, $ws 3725 // copy_s.w $rtemp, $ws[0] 3726 // mtc1 $rtemp, $fd 3727 // 3728 // For FGR64Opnd on Mips64: 3729 // 3730 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3731 // => 3732 // fexupr.w $wtemp, $ws 3733 // fexupr.d $wtemp2, $wtemp 3734 // copy_s.d $rtemp, $wtemp2s[0] 3735 // dmtc1 $rtemp, $fd 3736 // 3737 // For FGR64Opnd on Mips32: 3738 // 3739 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3740 // => 3741 // fexupr.w $wtemp, $ws 3742 // fexupr.d $wtemp2, $wtemp 3743 // copy_s.w $rtemp, $wtemp2[0] 3744 // mtc1 $rtemp, $ftemp 3745 // copy_s.w $rtemp2, $wtemp2[1] 3746 // $fd = mthc1 $rtemp2, $ftemp 3747 MachineBasicBlock * 3748 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, 3749 MachineBasicBlock *BB, 3750 bool IsFGR64) const { 3751 3752 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3753 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3754 // it. 3755 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3756 3757 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3758 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3759 3760 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3761 DebugLoc DL = MI.getDebugLoc(); 3762 Register Fd = MI.getOperand(0).getReg(); 3763 Register Ws = MI.getOperand(1).getReg(); 3764 3765 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3766 const TargetRegisterClass *GPRRC = 3767 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3768 unsigned MTC1Opc = IsFGR64onMips64 3769 ? Mips::DMTC1 3770 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1); 3771 Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; 3772 3773 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3774 Register WPHI = Wtemp; 3775 3776 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws); 3777 if (IsFGR64) { 3778 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3779 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp); 3780 } 3781 3782 // Perform the safety regclass copy mentioned above. 3783 Register Rtemp = RegInfo.createVirtualRegister(GPRRC); 3784 Register FPRPHI = IsFGR64onMips32 3785 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass) 3786 : Fd; 3787 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0); 3788 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp); 3789 3790 if (IsFGR64onMips32) { 3791 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3792 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2) 3793 .addReg(WPHI) 3794 .addImm(1); 3795 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd) 3796 .addReg(FPRPHI) 3797 .addReg(Rtemp2); 3798 } 3799 3800 MI.eraseFromParent(); 3801 return BB; 3802 } 3803 3804 // Emit the FEXP2_W_1 pseudo instructions. 3805 // 3806 // fexp2_w_1_pseudo $wd, $wt 3807 // => 3808 // ldi.w $ws, 1 3809 // fexp2.w $wd, $ws, $wt 3810 MachineBasicBlock * 3811 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, 3812 MachineBasicBlock *BB) const { 3813 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3814 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3815 const TargetRegisterClass *RC = &Mips::MSA128WRegClass; 3816 Register Ws1 = RegInfo.createVirtualRegister(RC); 3817 Register Ws2 = RegInfo.createVirtualRegister(RC); 3818 DebugLoc DL = MI.getDebugLoc(); 3819 3820 // Splat 1.0 into a vector 3821 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); 3822 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); 3823 3824 // Emit 1.0 * fexp2(Wt) 3825 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg()) 3826 .addReg(Ws2) 3827 .addReg(MI.getOperand(1).getReg()); 3828 3829 MI.eraseFromParent(); // The pseudo instruction is gone now. 3830 return BB; 3831 } 3832 3833 // Emit the FEXP2_D_1 pseudo instructions. 3834 // 3835 // fexp2_d_1_pseudo $wd, $wt 3836 // => 3837 // ldi.d $ws, 1 3838 // fexp2.d $wd, $ws, $wt 3839 MachineBasicBlock * 3840 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, 3841 MachineBasicBlock *BB) const { 3842 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3843 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3844 const TargetRegisterClass *RC = &Mips::MSA128DRegClass; 3845 Register Ws1 = RegInfo.createVirtualRegister(RC); 3846 Register Ws2 = RegInfo.createVirtualRegister(RC); 3847 DebugLoc DL = MI.getDebugLoc(); 3848 3849 // Splat 1.0 into a vector 3850 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); 3851 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); 3852 3853 // Emit 1.0 * fexp2(Wt) 3854 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg()) 3855 .addReg(Ws2) 3856 .addReg(MI.getOperand(1).getReg()); 3857 3858 MI.eraseFromParent(); // The pseudo instruction is gone now. 3859 return BB; 3860 } 3861