1 //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Subclass of MipsTargetLowering specialized for mips32/64. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MipsSEISelLowering.h" 14 #include "MipsMachineFunction.h" 15 #include "MipsRegisterInfo.h" 16 #include "MipsSubtarget.h" 17 #include "llvm/ADT/APInt.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/CodeGen/CallingConvLower.h" 21 #include "llvm/CodeGen/ISDOpcodes.h" 22 #include "llvm/CodeGen/MachineBasicBlock.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineMemOperand.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/MachineValueType.h" 29 #include "llvm/CodeGen/SelectionDAG.h" 30 #include "llvm/CodeGen/SelectionDAGNodes.h" 31 #include "llvm/CodeGen/TargetInstrInfo.h" 32 #include "llvm/CodeGen/TargetSubtargetInfo.h" 33 #include "llvm/CodeGen/ValueTypes.h" 34 #include "llvm/IR/DebugLoc.h" 35 #include "llvm/IR/Intrinsics.h" 36 #include "llvm/IR/IntrinsicsMips.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Support/MathExtras.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include "llvm/TargetParser/Triple.h" 44 #include <algorithm> 45 #include <cassert> 46 #include <cstdint> 47 #include <iterator> 48 #include <utility> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "mips-isel" 53 54 static cl::opt<bool> 55 UseMipsTailCalls("mips-tail-calls", cl::Hidden, 56 cl::desc("MIPS: permit tail calls."), cl::init(false)); 57 58 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 59 cl::desc("Expand double precision loads and " 60 "stores to their single precision " 61 "counterparts")); 62 63 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, 64 const MipsSubtarget &STI) 65 : MipsTargetLowering(TM, STI) { 66 // Set up the register classes 67 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 68 69 if (Subtarget.isGP64bit()) 70 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 71 72 if (Subtarget.hasDSP() || Subtarget.hasMSA()) { 73 // Expand all truncating stores and extending loads. 74 for (MVT VT0 : MVT::fixedlen_vector_valuetypes()) { 75 for (MVT VT1 : MVT::fixedlen_vector_valuetypes()) { 76 setTruncStoreAction(VT0, VT1, Expand); 77 setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); 78 setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); 79 setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); 80 } 81 } 82 } 83 84 if (Subtarget.hasDSP()) { 85 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 86 87 for (const auto &VecTy : VecTys) { 88 addRegisterClass(VecTy, &Mips::DSPRRegClass); 89 90 // Expand all builtin opcodes. 91 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 92 setOperationAction(Opc, VecTy, Expand); 93 94 setOperationAction(ISD::ADD, VecTy, Legal); 95 setOperationAction(ISD::SUB, VecTy, Legal); 96 setOperationAction(ISD::LOAD, VecTy, Legal); 97 setOperationAction(ISD::STORE, VecTy, Legal); 98 setOperationAction(ISD::BITCAST, VecTy, Legal); 99 } 100 101 setTargetDAGCombine( 102 {ISD::SHL, ISD::SRA, ISD::SRL, ISD::SETCC, ISD::VSELECT}); 103 104 if (Subtarget.hasMips32r2()) { 105 setOperationAction(ISD::ADDC, MVT::i32, Legal); 106 setOperationAction(ISD::ADDE, MVT::i32, Legal); 107 } 108 } 109 110 if (Subtarget.hasDSPR2()) 111 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 112 113 if (Subtarget.hasMSA()) { 114 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 115 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 116 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 117 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 118 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 119 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 120 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 121 122 // f16 is a storage-only type, always promote it to f32. 123 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass); 124 setOperationAction(ISD::SETCC, MVT::f16, Promote); 125 setOperationAction(ISD::BR_CC, MVT::f16, Promote); 126 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); 127 setOperationAction(ISD::SELECT, MVT::f16, Promote); 128 setOperationAction(ISD::FADD, MVT::f16, Promote); 129 setOperationAction(ISD::FSUB, MVT::f16, Promote); 130 setOperationAction(ISD::FMUL, MVT::f16, Promote); 131 setOperationAction(ISD::FDIV, MVT::f16, Promote); 132 setOperationAction(ISD::FREM, MVT::f16, Promote); 133 setOperationAction(ISD::FMA, MVT::f16, Promote); 134 setOperationAction(ISD::FNEG, MVT::f16, Promote); 135 setOperationAction(ISD::FABS, MVT::f16, Promote); 136 setOperationAction(ISD::FCEIL, MVT::f16, Promote); 137 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); 138 setOperationAction(ISD::FCOS, MVT::f16, Promote); 139 setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); 140 setOperationAction(ISD::FFLOOR, MVT::f16, Promote); 141 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); 142 setOperationAction(ISD::FPOW, MVT::f16, Promote); 143 setOperationAction(ISD::FPOWI, MVT::f16, Promote); 144 setOperationAction(ISD::FRINT, MVT::f16, Promote); 145 setOperationAction(ISD::FSIN, MVT::f16, Promote); 146 setOperationAction(ISD::FSINCOS, MVT::f16, Promote); 147 setOperationAction(ISD::FSQRT, MVT::f16, Promote); 148 setOperationAction(ISD::FEXP, MVT::f16, Promote); 149 setOperationAction(ISD::FEXP2, MVT::f16, Promote); 150 setOperationAction(ISD::FLOG, MVT::f16, Promote); 151 setOperationAction(ISD::FLOG2, MVT::f16, Promote); 152 setOperationAction(ISD::FLOG10, MVT::f16, Promote); 153 setOperationAction(ISD::FROUND, MVT::f16, Promote); 154 setOperationAction(ISD::FTRUNC, MVT::f16, Promote); 155 setOperationAction(ISD::FMINNUM, MVT::f16, Promote); 156 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); 157 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); 158 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); 159 160 setTargetDAGCombine({ISD::AND, ISD::OR, ISD::SRA, ISD::VSELECT, ISD::XOR}); 161 } 162 163 if (!Subtarget.useSoftFloat()) { 164 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 165 166 // When dealing with single precision only, use libcalls 167 if (!Subtarget.isSingleFloat()) { 168 if (Subtarget.isFP64bit()) 169 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 170 else 171 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 172 } 173 } 174 175 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 176 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 177 setOperationAction(ISD::MULHS, MVT::i32, Custom); 178 setOperationAction(ISD::MULHU, MVT::i32, Custom); 179 180 if (Subtarget.hasCnMips()) 181 setOperationAction(ISD::MUL, MVT::i64, Legal); 182 else if (Subtarget.isGP64bit()) 183 setOperationAction(ISD::MUL, MVT::i64, Custom); 184 185 if (Subtarget.isGP64bit()) { 186 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); 187 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); 188 setOperationAction(ISD::MULHS, MVT::i64, Custom); 189 setOperationAction(ISD::MULHU, MVT::i64, Custom); 190 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 191 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 192 } 193 194 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 195 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 196 197 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 198 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 199 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 200 setOperationAction(ISD::LOAD, MVT::i32, Custom); 201 setOperationAction(ISD::STORE, MVT::i32, Custom); 202 203 setTargetDAGCombine(ISD::MUL); 204 205 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 206 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 207 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 208 209 if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() && 210 !Subtarget.hasMips64()) { 211 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 212 } 213 214 if (NoDPLoadStore) { 215 setOperationAction(ISD::LOAD, MVT::f64, Custom); 216 setOperationAction(ISD::STORE, MVT::f64, Custom); 217 } 218 219 if (Subtarget.hasMips32r6()) { 220 // MIPS32r6 replaces the accumulator-based multiplies with a three register 221 // instruction 222 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 223 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 224 setOperationAction(ISD::MUL, MVT::i32, Legal); 225 setOperationAction(ISD::MULHS, MVT::i32, Legal); 226 setOperationAction(ISD::MULHU, MVT::i32, Legal); 227 228 // MIPS32r6 replaces the accumulator-based division/remainder with separate 229 // three register division and remainder instructions. 230 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 231 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 232 setOperationAction(ISD::SDIV, MVT::i32, Legal); 233 setOperationAction(ISD::UDIV, MVT::i32, Legal); 234 setOperationAction(ISD::SREM, MVT::i32, Legal); 235 setOperationAction(ISD::UREM, MVT::i32, Legal); 236 237 // MIPS32r6 replaces conditional moves with an equivalent that removes the 238 // need for three GPR read ports. 239 setOperationAction(ISD::SETCC, MVT::i32, Legal); 240 setOperationAction(ISD::SELECT, MVT::i32, Legal); 241 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 242 243 setOperationAction(ISD::SETCC, MVT::f32, Legal); 244 setOperationAction(ISD::SELECT, MVT::f32, Legal); 245 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 246 247 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6"); 248 setOperationAction(ISD::SETCC, MVT::f64, Legal); 249 setOperationAction(ISD::SELECT, MVT::f64, Custom); 250 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 251 252 setOperationAction(ISD::BRCOND, MVT::Other, Legal); 253 254 // Floating point > and >= are supported via < and <= 255 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 256 setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); 257 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 258 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 259 260 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 261 setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); 262 setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); 263 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 264 } 265 266 if (Subtarget.hasMips64r6()) { 267 // MIPS64r6 replaces the accumulator-based multiplies with a three register 268 // instruction 269 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 270 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 271 setOperationAction(ISD::MUL, MVT::i64, Legal); 272 setOperationAction(ISD::MULHS, MVT::i64, Legal); 273 setOperationAction(ISD::MULHU, MVT::i64, Legal); 274 275 // MIPS32r6 replaces the accumulator-based division/remainder with separate 276 // three register division and remainder instructions. 277 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 278 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 279 setOperationAction(ISD::SDIV, MVT::i64, Legal); 280 setOperationAction(ISD::UDIV, MVT::i64, Legal); 281 setOperationAction(ISD::SREM, MVT::i64, Legal); 282 setOperationAction(ISD::UREM, MVT::i64, Legal); 283 284 // MIPS64r6 replaces conditional moves with an equivalent that removes the 285 // need for three GPR read ports. 286 setOperationAction(ISD::SETCC, MVT::i64, Legal); 287 setOperationAction(ISD::SELECT, MVT::i64, Legal); 288 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 289 } 290 291 computeRegisterProperties(Subtarget.getRegisterInfo()); 292 } 293 294 const MipsTargetLowering * 295 llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, 296 const MipsSubtarget &STI) { 297 return new MipsSETargetLowering(TM, STI); 298 } 299 300 const TargetRegisterClass * 301 MipsSETargetLowering::getRepRegClassFor(MVT VT) const { 302 if (VT == MVT::Untyped) 303 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; 304 305 return TargetLowering::getRepRegClassFor(VT); 306 } 307 308 // Enable MSA support for the given integer type and Register class. 309 void MipsSETargetLowering:: 310 addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 311 addRegisterClass(Ty, RC); 312 313 // Expand all builtin opcodes. 314 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 315 setOperationAction(Opc, Ty, Expand); 316 317 setOperationAction(ISD::BITCAST, Ty, Legal); 318 setOperationAction(ISD::LOAD, Ty, Legal); 319 setOperationAction(ISD::STORE, Ty, Legal); 320 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 321 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 322 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 323 setOperationAction(ISD::UNDEF, Ty, Legal); 324 325 setOperationAction(ISD::ADD, Ty, Legal); 326 setOperationAction(ISD::AND, Ty, Legal); 327 setOperationAction(ISD::CTLZ, Ty, Legal); 328 setOperationAction(ISD::CTPOP, Ty, Legal); 329 setOperationAction(ISD::MUL, Ty, Legal); 330 setOperationAction(ISD::OR, Ty, Legal); 331 setOperationAction(ISD::SDIV, Ty, Legal); 332 setOperationAction(ISD::SREM, Ty, Legal); 333 setOperationAction(ISD::SHL, Ty, Legal); 334 setOperationAction(ISD::SRA, Ty, Legal); 335 setOperationAction(ISD::SRL, Ty, Legal); 336 setOperationAction(ISD::SUB, Ty, Legal); 337 setOperationAction(ISD::SMAX, Ty, Legal); 338 setOperationAction(ISD::SMIN, Ty, Legal); 339 setOperationAction(ISD::UDIV, Ty, Legal); 340 setOperationAction(ISD::UREM, Ty, Legal); 341 setOperationAction(ISD::UMAX, Ty, Legal); 342 setOperationAction(ISD::UMIN, Ty, Legal); 343 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 344 setOperationAction(ISD::VSELECT, Ty, Legal); 345 setOperationAction(ISD::XOR, Ty, Legal); 346 347 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { 348 setOperationAction(ISD::FP_TO_SINT, Ty, Legal); 349 setOperationAction(ISD::FP_TO_UINT, Ty, Legal); 350 setOperationAction(ISD::SINT_TO_FP, Ty, Legal); 351 setOperationAction(ISD::UINT_TO_FP, Ty, Legal); 352 } 353 354 setOperationAction(ISD::SETCC, Ty, Legal); 355 setCondCodeAction(ISD::SETNE, Ty, Expand); 356 setCondCodeAction(ISD::SETGE, Ty, Expand); 357 setCondCodeAction(ISD::SETGT, Ty, Expand); 358 setCondCodeAction(ISD::SETUGE, Ty, Expand); 359 setCondCodeAction(ISD::SETUGT, Ty, Expand); 360 } 361 362 // Enable MSA support for the given floating-point type and Register class. 363 void MipsSETargetLowering:: 364 addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 365 addRegisterClass(Ty, RC); 366 367 // Expand all builtin opcodes. 368 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 369 setOperationAction(Opc, Ty, Expand); 370 371 setOperationAction(ISD::LOAD, Ty, Legal); 372 setOperationAction(ISD::STORE, Ty, Legal); 373 setOperationAction(ISD::BITCAST, Ty, Legal); 374 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 375 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 376 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 377 378 if (Ty != MVT::v8f16) { 379 setOperationAction(ISD::FABS, Ty, Legal); 380 setOperationAction(ISD::FADD, Ty, Legal); 381 setOperationAction(ISD::FDIV, Ty, Legal); 382 setOperationAction(ISD::FEXP2, Ty, Legal); 383 setOperationAction(ISD::FLOG2, Ty, Legal); 384 setOperationAction(ISD::FMA, Ty, Legal); 385 setOperationAction(ISD::FMUL, Ty, Legal); 386 setOperationAction(ISD::FRINT, Ty, Legal); 387 setOperationAction(ISD::FSQRT, Ty, Legal); 388 setOperationAction(ISD::FSUB, Ty, Legal); 389 setOperationAction(ISD::VSELECT, Ty, Legal); 390 391 setOperationAction(ISD::SETCC, Ty, Legal); 392 setCondCodeAction(ISD::SETOGE, Ty, Expand); 393 setCondCodeAction(ISD::SETOGT, Ty, Expand); 394 setCondCodeAction(ISD::SETUGE, Ty, Expand); 395 setCondCodeAction(ISD::SETUGT, Ty, Expand); 396 setCondCodeAction(ISD::SETGE, Ty, Expand); 397 setCondCodeAction(ISD::SETGT, Ty, Expand); 398 } 399 } 400 401 SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 402 if(!Subtarget.hasMips32r6()) 403 return MipsTargetLowering::LowerOperation(Op, DAG); 404 405 EVT ResTy = Op->getValueType(0); 406 SDLoc DL(Op); 407 408 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the 409 // floating point register are undefined. Not really an issue as sel.d, which 410 // is produced from an FSELECT node, only looks at bit 0. 411 SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0)); 412 return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1), 413 Op->getOperand(2)); 414 } 415 416 bool MipsSETargetLowering::allowsMisalignedMemoryAccesses( 417 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { 418 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 419 420 if (Subtarget.systemSupportsUnalignedAccess()) { 421 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's 422 // implementation defined whether this is handled by hardware, software, or 423 // a hybrid of the two but it's expected that most implementations will 424 // handle the majority of cases in hardware. 425 if (Fast) 426 *Fast = 1; 427 return true; 428 } 429 430 switch (SVT) { 431 case MVT::i64: 432 case MVT::i32: 433 if (Fast) 434 *Fast = 1; 435 return true; 436 default: 437 return false; 438 } 439 } 440 441 SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 442 SelectionDAG &DAG) const { 443 switch(Op.getOpcode()) { 444 case ISD::LOAD: return lowerLOAD(Op, DAG); 445 case ISD::STORE: return lowerSTORE(Op, DAG); 446 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 447 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 448 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 449 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 450 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 451 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 452 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 453 DAG); 454 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 455 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 456 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 457 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 458 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 459 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 460 case ISD::SELECT: return lowerSELECT(Op, DAG); 461 case ISD::BITCAST: return lowerBITCAST(Op, DAG); 462 } 463 464 return MipsTargetLowering::LowerOperation(Op, DAG); 465 } 466 467 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 468 // 469 // Performs the following transformations: 470 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 471 // sign/zero-extension is completely overwritten by the new one performed by 472 // the ISD::AND. 473 // - Removes redundant zero extensions performed by an ISD::AND. 474 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 475 TargetLowering::DAGCombinerInfo &DCI, 476 const MipsSubtarget &Subtarget) { 477 if (!Subtarget.hasMSA()) 478 return SDValue(); 479 480 SDValue Op0 = N->getOperand(0); 481 SDValue Op1 = N->getOperand(1); 482 unsigned Op0Opcode = Op0->getOpcode(); 483 484 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 485 // where $d + 1 == 2^n and n == 32 486 // or $d + 1 == 2^n and n <= 32 and ZExt 487 // -> (MipsVExtractZExt $a, $b, $c) 488 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 489 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 490 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 491 492 if (!Mask) 493 return SDValue(); 494 495 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 496 497 if (Log2IfPositive <= 0) 498 return SDValue(); // Mask+1 is not a power of 2 499 500 SDValue Op0Op2 = Op0->getOperand(2); 501 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 502 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 503 unsigned Log2 = Log2IfPositive; 504 505 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 506 Log2 == ExtendTySize) { 507 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 508 return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), 509 Op0->getVTList(), 510 ArrayRef(Ops, Op0->getNumOperands())); 511 } 512 } 513 514 return SDValue(); 515 } 516 517 // Determine if the specified node is a constant vector splat. 518 // 519 // Returns true and sets Imm if: 520 // * N is a ISD::BUILD_VECTOR representing a constant splat 521 // 522 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The 523 // differences are that it assumes the MSA has already been checked and the 524 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and 525 // must not be in order for binsri.d to be selectable). 526 static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { 527 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); 528 529 if (!Node) 530 return false; 531 532 APInt SplatValue, SplatUndef; 533 unsigned SplatBitSize; 534 bool HasAnyUndefs; 535 536 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 537 8, !IsLittleEndian)) 538 return false; 539 540 Imm = SplatValue; 541 542 return true; 543 } 544 545 // Test whether the given node is an all-ones build_vector. 546 static bool isVectorAllOnes(SDValue N) { 547 // Look through bitcasts. Endianness doesn't matter because we are looking 548 // for an all-ones value. 549 if (N->getOpcode() == ISD::BITCAST) 550 N = N->getOperand(0); 551 552 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 553 554 if (!BVN) 555 return false; 556 557 APInt SplatValue, SplatUndef; 558 unsigned SplatBitSize; 559 bool HasAnyUndefs; 560 561 // Endianness doesn't matter in this context because we are looking for 562 // an all-ones value. 563 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) 564 return SplatValue.isAllOnes(); 565 566 return false; 567 } 568 569 // Test whether N is the bitwise inverse of OfNode. 570 static bool isBitwiseInverse(SDValue N, SDValue OfNode) { 571 if (N->getOpcode() != ISD::XOR) 572 return false; 573 574 if (isVectorAllOnes(N->getOperand(0))) 575 return N->getOperand(1) == OfNode; 576 577 if (isVectorAllOnes(N->getOperand(1))) 578 return N->getOperand(0) == OfNode; 579 580 return false; 581 } 582 583 // Perform combines where ISD::OR is the root node. 584 // 585 // Performs the following transformations: 586 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) 587 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit 588 // vector type. 589 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 590 TargetLowering::DAGCombinerInfo &DCI, 591 const MipsSubtarget &Subtarget) { 592 if (!Subtarget.hasMSA()) 593 return SDValue(); 594 595 EVT Ty = N->getValueType(0); 596 597 if (!Ty.is128BitVector()) 598 return SDValue(); 599 600 SDValue Op0 = N->getOperand(0); 601 SDValue Op1 = N->getOperand(1); 602 603 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { 604 SDValue Op0Op0 = Op0->getOperand(0); 605 SDValue Op0Op1 = Op0->getOperand(1); 606 SDValue Op1Op0 = Op1->getOperand(0); 607 SDValue Op1Op1 = Op1->getOperand(1); 608 bool IsLittleEndian = !Subtarget.isLittle(); 609 610 SDValue IfSet, IfClr, Cond; 611 bool IsConstantMask = false; 612 APInt Mask, InvMask; 613 614 // If Op0Op0 is an appropriate mask, try to find it's inverse in either 615 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while 616 // looking. 617 // IfClr will be set if we find a valid match. 618 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { 619 Cond = Op0Op0; 620 IfSet = Op0Op1; 621 622 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 623 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 624 IfClr = Op1Op1; 625 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 626 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 627 IfClr = Op1Op0; 628 629 IsConstantMask = true; 630 } 631 632 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same 633 // thing again using this mask. 634 // IfClr will be set if we find a valid match. 635 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { 636 Cond = Op0Op1; 637 IfSet = Op0Op0; 638 639 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 640 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 641 IfClr = Op1Op1; 642 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 643 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 644 IfClr = Op1Op0; 645 646 IsConstantMask = true; 647 } 648 649 // If IfClr is not yet set, try looking for a non-constant match. 650 // IfClr will be set if we find a valid match amongst the eight 651 // possibilities. 652 if (!IfClr.getNode()) { 653 if (isBitwiseInverse(Op0Op0, Op1Op0)) { 654 Cond = Op1Op0; 655 IfSet = Op1Op1; 656 IfClr = Op0Op1; 657 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { 658 Cond = Op1Op0; 659 IfSet = Op1Op1; 660 IfClr = Op0Op0; 661 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { 662 Cond = Op1Op1; 663 IfSet = Op1Op0; 664 IfClr = Op0Op1; 665 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { 666 Cond = Op1Op1; 667 IfSet = Op1Op0; 668 IfClr = Op0Op0; 669 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { 670 Cond = Op0Op0; 671 IfSet = Op0Op1; 672 IfClr = Op1Op1; 673 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { 674 Cond = Op0Op0; 675 IfSet = Op0Op1; 676 IfClr = Op1Op0; 677 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { 678 Cond = Op0Op1; 679 IfSet = Op0Op0; 680 IfClr = Op1Op1; 681 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { 682 Cond = Op0Op1; 683 IfSet = Op0Op0; 684 IfClr = Op1Op0; 685 } 686 } 687 688 // At this point, IfClr will be set if we have a valid match. 689 if (!IfClr.getNode()) 690 return SDValue(); 691 692 assert(Cond.getNode() && IfSet.getNode()); 693 694 // Fold degenerate cases. 695 if (IsConstantMask) { 696 if (Mask.isAllOnes()) 697 return IfSet; 698 else if (Mask == 0) 699 return IfClr; 700 } 701 702 // Transform the DAG into an equivalent VSELECT. 703 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr); 704 } 705 706 return SDValue(); 707 } 708 709 static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, 710 SelectionDAG &DAG, 711 const MipsSubtarget &Subtarget) { 712 // Estimate the number of operations the below transform will turn a 713 // constant multiply into. The number is approximately equal to the minimal 714 // number of powers of two that constant can be broken down to by adding 715 // or subtracting them. 716 // 717 // If we have taken more than 12[1] / 8[2] steps to attempt the 718 // optimization for a native sized value, it is more than likely that this 719 // optimization will make things worse. 720 // 721 // [1] MIPS64 requires 6 instructions at most to materialize any constant, 722 // multiplication requires at least 4 cycles, but another cycle (or two) 723 // to retrieve the result from the HI/LO registers. 724 // 725 // [2] For MIPS32, more than 8 steps is expensive as the constant could be 726 // materialized in 2 instructions, multiplication requires at least 4 727 // cycles, but another cycle (or two) to retrieve the result from the 728 // HI/LO registers. 729 // 730 // TODO: 731 // - MaxSteps needs to consider the `VT` of the constant for the current 732 // target. 733 // - Consider to perform this optimization after type legalization. 734 // That allows to remove a workaround for types not supported natively. 735 // - Take in account `-Os, -Oz` flags because this optimization 736 // increases code size. 737 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12; 738 739 SmallVector<APInt, 16> WorkStack(1, C); 740 unsigned Steps = 0; 741 unsigned BitWidth = C.getBitWidth(); 742 743 while (!WorkStack.empty()) { 744 APInt Val = WorkStack.pop_back_val(); 745 746 if (Val == 0 || Val == 1) 747 continue; 748 749 if (Steps >= MaxSteps) 750 return false; 751 752 if (Val.isPowerOf2()) { 753 ++Steps; 754 continue; 755 } 756 757 APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); 758 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) 759 : APInt(BitWidth, 1) << C.ceilLogBase2(); 760 if ((Val - Floor).ule(Ceil - Val)) { 761 WorkStack.push_back(Floor); 762 WorkStack.push_back(Val - Floor); 763 } else { 764 WorkStack.push_back(Ceil); 765 WorkStack.push_back(Ceil - Val); 766 } 767 768 ++Steps; 769 } 770 771 // If the value being multiplied is not supported natively, we have to pay 772 // an additional legalization cost, conservatively assume an increase in the 773 // cost of 3 instructions per step. This values for this heuristic were 774 // determined experimentally. 775 unsigned RegisterSize = DAG.getTargetLoweringInfo() 776 .getRegisterType(*DAG.getContext(), VT) 777 .getSizeInBits(); 778 Steps *= (VT.getSizeInBits() != RegisterSize) * 3; 779 if (Steps > 27) 780 return false; 781 782 return true; 783 } 784 785 static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, 786 EVT ShiftTy, SelectionDAG &DAG) { 787 // Return 0. 788 if (C == 0) 789 return DAG.getConstant(0, DL, VT); 790 791 // Return x. 792 if (C == 1) 793 return X; 794 795 // If c is power of 2, return (shl x, log2(c)). 796 if (C.isPowerOf2()) 797 return DAG.getNode(ISD::SHL, DL, VT, X, 798 DAG.getConstant(C.logBase2(), DL, ShiftTy)); 799 800 unsigned BitWidth = C.getBitWidth(); 801 APInt Floor = APInt(BitWidth, 1) << C.logBase2(); 802 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) : 803 APInt(BitWidth, 1) << C.ceilLogBase2(); 804 805 // If |c - floor_c| <= |c - ceil_c|, 806 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 807 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 808 if ((C - Floor).ule(Ceil - C)) { 809 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 810 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 811 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 812 } 813 814 // If |c - floor_c| > |c - ceil_c|, 815 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 816 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 817 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 818 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 819 } 820 821 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 822 const TargetLowering::DAGCombinerInfo &DCI, 823 const MipsSETargetLowering *TL, 824 const MipsSubtarget &Subtarget) { 825 EVT VT = N->getValueType(0); 826 827 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 828 if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( 829 C->getAPIntValue(), VT, DAG, Subtarget)) 830 return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, 831 TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), 832 DAG); 833 834 return SDValue(N, 0); 835 } 836 837 static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 838 SelectionDAG &DAG, 839 const MipsSubtarget &Subtarget) { 840 // See if this is a vector splat immediate node. 841 APInt SplatValue, SplatUndef; 842 unsigned SplatBitSize; 843 bool HasAnyUndefs; 844 unsigned EltSize = Ty.getScalarSizeInBits(); 845 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 846 847 if (!Subtarget.hasDSP()) 848 return SDValue(); 849 850 if (!BV || 851 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 852 EltSize, !Subtarget.isLittle()) || 853 (SplatBitSize != EltSize) || 854 (SplatValue.getZExtValue() >= EltSize)) 855 return SDValue(); 856 857 SDLoc DL(N); 858 return DAG.getNode(Opc, DL, Ty, N->getOperand(0), 859 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32)); 860 } 861 862 static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 863 TargetLowering::DAGCombinerInfo &DCI, 864 const MipsSubtarget &Subtarget) { 865 EVT Ty = N->getValueType(0); 866 867 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 868 return SDValue(); 869 870 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 871 } 872 873 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 874 // constant splats into MipsISD::SHRA_DSP for DSPr2. 875 // 876 // Performs the following transformations: 877 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 878 // sign/zero-extension is completely overwritten by the new one performed by 879 // the ISD::SRA and ISD::SHL nodes. 880 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 881 // sequence. 882 // 883 // See performDSPShiftCombine for more information about the transformation 884 // used for DSPr2. 885 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 886 TargetLowering::DAGCombinerInfo &DCI, 887 const MipsSubtarget &Subtarget) { 888 EVT Ty = N->getValueType(0); 889 890 if (Subtarget.hasMSA()) { 891 SDValue Op0 = N->getOperand(0); 892 SDValue Op1 = N->getOperand(1); 893 894 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 895 // where $d + sizeof($c) == 32 896 // or $d + sizeof($c) <= 32 and SExt 897 // -> (MipsVExtractSExt $a, $b, $c) 898 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 899 SDValue Op0Op0 = Op0->getOperand(0); 900 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 901 902 if (!ShAmount) 903 return SDValue(); 904 905 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 906 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 907 return SDValue(); 908 909 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 910 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 911 912 if (TotalBits == 32 || 913 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 914 TotalBits <= 32)) { 915 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 916 Op0Op0->getOperand(2) }; 917 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), 918 Op0Op0->getVTList(), 919 ArrayRef(Ops, Op0Op0->getNumOperands())); 920 } 921 } 922 } 923 924 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) 925 return SDValue(); 926 927 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 928 } 929 930 931 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 932 TargetLowering::DAGCombinerInfo &DCI, 933 const MipsSubtarget &Subtarget) { 934 EVT Ty = N->getValueType(0); 935 936 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) 937 return SDValue(); 938 939 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 940 } 941 942 static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 943 bool IsV216 = (Ty == MVT::v2i16); 944 945 switch (CC) { 946 case ISD::SETEQ: 947 case ISD::SETNE: return true; 948 case ISD::SETLT: 949 case ISD::SETLE: 950 case ISD::SETGT: 951 case ISD::SETGE: return IsV216; 952 case ISD::SETULT: 953 case ISD::SETULE: 954 case ISD::SETUGT: 955 case ISD::SETUGE: return !IsV216; 956 default: return false; 957 } 958 } 959 960 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 961 EVT Ty = N->getValueType(0); 962 963 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 964 return SDValue(); 965 966 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 967 return SDValue(); 968 969 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 970 N->getOperand(1), N->getOperand(2)); 971 } 972 973 static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 974 EVT Ty = N->getValueType(0); 975 976 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) { 977 SDValue SetCC = N->getOperand(0); 978 979 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 980 return SDValue(); 981 982 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 983 SetCC.getOperand(0), SetCC.getOperand(1), 984 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 985 } 986 987 return SDValue(); 988 } 989 990 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 991 const MipsSubtarget &Subtarget) { 992 EVT Ty = N->getValueType(0); 993 994 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 995 // Try the following combines: 996 // (xor (or $a, $b), (build_vector allones)) 997 // (xor (or $a, $b), (bitcast (build_vector allones))) 998 SDValue Op0 = N->getOperand(0); 999 SDValue Op1 = N->getOperand(1); 1000 SDValue NotOp; 1001 1002 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 1003 NotOp = Op1; 1004 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 1005 NotOp = Op0; 1006 else 1007 return SDValue(); 1008 1009 if (NotOp->getOpcode() == ISD::OR) 1010 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 1011 NotOp->getOperand(1)); 1012 } 1013 1014 return SDValue(); 1015 } 1016 1017 SDValue 1018 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1019 SelectionDAG &DAG = DCI.DAG; 1020 SDValue Val; 1021 1022 switch (N->getOpcode()) { 1023 case ISD::AND: 1024 Val = performANDCombine(N, DAG, DCI, Subtarget); 1025 break; 1026 case ISD::OR: 1027 Val = performORCombine(N, DAG, DCI, Subtarget); 1028 break; 1029 case ISD::MUL: 1030 return performMULCombine(N, DAG, DCI, this, Subtarget); 1031 case ISD::SHL: 1032 Val = performSHLCombine(N, DAG, DCI, Subtarget); 1033 break; 1034 case ISD::SRA: 1035 return performSRACombine(N, DAG, DCI, Subtarget); 1036 case ISD::SRL: 1037 return performSRLCombine(N, DAG, DCI, Subtarget); 1038 case ISD::VSELECT: 1039 return performVSELECTCombine(N, DAG); 1040 case ISD::XOR: 1041 Val = performXORCombine(N, DAG, Subtarget); 1042 break; 1043 case ISD::SETCC: 1044 Val = performSETCCCombine(N, DAG); 1045 break; 1046 } 1047 1048 if (Val.getNode()) { 1049 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; 1050 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n"; 1051 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n"); 1052 return Val; 1053 } 1054 1055 return MipsTargetLowering::PerformDAGCombine(N, DCI); 1056 } 1057 1058 MachineBasicBlock * 1059 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 1060 MachineBasicBlock *BB) const { 1061 switch (MI.getOpcode()) { 1062 default: 1063 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 1064 case Mips::BPOSGE32_PSEUDO: 1065 return emitBPOSGE32(MI, BB); 1066 case Mips::SNZ_B_PSEUDO: 1067 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 1068 case Mips::SNZ_H_PSEUDO: 1069 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 1070 case Mips::SNZ_W_PSEUDO: 1071 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 1072 case Mips::SNZ_D_PSEUDO: 1073 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 1074 case Mips::SNZ_V_PSEUDO: 1075 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 1076 case Mips::SZ_B_PSEUDO: 1077 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 1078 case Mips::SZ_H_PSEUDO: 1079 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 1080 case Mips::SZ_W_PSEUDO: 1081 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 1082 case Mips::SZ_D_PSEUDO: 1083 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 1084 case Mips::SZ_V_PSEUDO: 1085 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 1086 case Mips::COPY_FW_PSEUDO: 1087 return emitCOPY_FW(MI, BB); 1088 case Mips::COPY_FD_PSEUDO: 1089 return emitCOPY_FD(MI, BB); 1090 case Mips::INSERT_FW_PSEUDO: 1091 return emitINSERT_FW(MI, BB); 1092 case Mips::INSERT_FD_PSEUDO: 1093 return emitINSERT_FD(MI, BB); 1094 case Mips::INSERT_B_VIDX_PSEUDO: 1095 case Mips::INSERT_B_VIDX64_PSEUDO: 1096 return emitINSERT_DF_VIDX(MI, BB, 1, false); 1097 case Mips::INSERT_H_VIDX_PSEUDO: 1098 case Mips::INSERT_H_VIDX64_PSEUDO: 1099 return emitINSERT_DF_VIDX(MI, BB, 2, false); 1100 case Mips::INSERT_W_VIDX_PSEUDO: 1101 case Mips::INSERT_W_VIDX64_PSEUDO: 1102 return emitINSERT_DF_VIDX(MI, BB, 4, false); 1103 case Mips::INSERT_D_VIDX_PSEUDO: 1104 case Mips::INSERT_D_VIDX64_PSEUDO: 1105 return emitINSERT_DF_VIDX(MI, BB, 8, false); 1106 case Mips::INSERT_FW_VIDX_PSEUDO: 1107 case Mips::INSERT_FW_VIDX64_PSEUDO: 1108 return emitINSERT_DF_VIDX(MI, BB, 4, true); 1109 case Mips::INSERT_FD_VIDX_PSEUDO: 1110 case Mips::INSERT_FD_VIDX64_PSEUDO: 1111 return emitINSERT_DF_VIDX(MI, BB, 8, true); 1112 case Mips::FILL_FW_PSEUDO: 1113 return emitFILL_FW(MI, BB); 1114 case Mips::FILL_FD_PSEUDO: 1115 return emitFILL_FD(MI, BB); 1116 case Mips::FEXP2_W_1_PSEUDO: 1117 return emitFEXP2_W_1(MI, BB); 1118 case Mips::FEXP2_D_1_PSEUDO: 1119 return emitFEXP2_D_1(MI, BB); 1120 case Mips::ST_F16: 1121 return emitST_F16_PSEUDO(MI, BB); 1122 case Mips::LD_F16: 1123 return emitLD_F16_PSEUDO(MI, BB); 1124 case Mips::MSA_FP_EXTEND_W_PSEUDO: 1125 return emitFPEXTEND_PSEUDO(MI, BB, false); 1126 case Mips::MSA_FP_ROUND_W_PSEUDO: 1127 return emitFPROUND_PSEUDO(MI, BB, false); 1128 case Mips::MSA_FP_EXTEND_D_PSEUDO: 1129 return emitFPEXTEND_PSEUDO(MI, BB, true); 1130 case Mips::MSA_FP_ROUND_D_PSEUDO: 1131 return emitFPROUND_PSEUDO(MI, BB, true); 1132 } 1133 } 1134 1135 bool MipsSETargetLowering::isEligibleForTailCallOptimization( 1136 const CCState &CCInfo, unsigned NextStackOffset, 1137 const MipsFunctionInfo &FI) const { 1138 if (!UseMipsTailCalls) 1139 return false; 1140 1141 // Exception has to be cleared with eret. 1142 if (FI.isISR()) 1143 return false; 1144 1145 // Return false if either the callee or caller has a byval argument. 1146 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) 1147 return false; 1148 1149 // Return true if the callee's argument area is no larger than the 1150 // caller's. 1151 return NextStackOffset <= FI.getIncomingArgSize(); 1152 } 1153 1154 void MipsSETargetLowering:: 1155 getOpndList(SmallVectorImpl<SDValue> &Ops, 1156 std::deque<std::pair<unsigned, SDValue>> &RegsToPass, 1157 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 1158 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, 1159 SDValue Chain) const { 1160 Ops.push_back(Callee); 1161 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 1162 InternalLinkage, IsCallReloc, CLI, Callee, 1163 Chain); 1164 } 1165 1166 SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1167 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 1168 1169 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1170 return MipsTargetLowering::lowerLOAD(Op, DAG); 1171 1172 // Replace a double precision load with two i32 loads and a buildpair64. 1173 SDLoc DL(Op); 1174 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1175 EVT PtrVT = Ptr.getValueType(); 1176 1177 // i32 load from lower address. 1178 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(), 1179 Nd.getAlign(), Nd.getMemOperand()->getFlags()); 1180 1181 // i32 load from higher address. 1182 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1183 SDValue Hi = DAG.getLoad( 1184 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(), 1185 commonAlignment(Nd.getAlign(), 4), Nd.getMemOperand()->getFlags()); 1186 1187 if (!Subtarget.isLittle()) 1188 std::swap(Lo, Hi); 1189 1190 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1191 SDValue Ops[2] = {BP, Hi.getValue(1)}; 1192 return DAG.getMergeValues(Ops, DL); 1193 } 1194 1195 SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1196 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 1197 1198 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1199 return MipsTargetLowering::lowerSTORE(Op, DAG); 1200 1201 // Replace a double precision store with two extractelement64s and i32 stores. 1202 SDLoc DL(Op); 1203 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1204 EVT PtrVT = Ptr.getValueType(); 1205 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1206 Val, DAG.getConstant(0, DL, MVT::i32)); 1207 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1208 Val, DAG.getConstant(1, DL, MVT::i32)); 1209 1210 if (!Subtarget.isLittle()) 1211 std::swap(Lo, Hi); 1212 1213 // i32 store to lower address. 1214 Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlign(), 1215 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1216 1217 // i32 store to higher address. 1218 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1219 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 1220 commonAlignment(Nd.getAlign(), 4), 1221 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1222 } 1223 1224 SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op, 1225 SelectionDAG &DAG) const { 1226 SDLoc DL(Op); 1227 MVT Src = Op.getOperand(0).getValueType().getSimpleVT(); 1228 MVT Dest = Op.getValueType().getSimpleVT(); 1229 1230 // Bitcast i64 to double. 1231 if (Src == MVT::i64 && Dest == MVT::f64) { 1232 SDValue Lo, Hi; 1233 std::tie(Lo, Hi) = 1234 DAG.SplitScalar(Op.getOperand(0), DL, MVT::i32, MVT::i32); 1235 return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1236 } 1237 1238 // Bitcast double to i64. 1239 if (Src == MVT::f64 && Dest == MVT::i64) { 1240 SDValue Lo = 1241 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), 1242 DAG.getConstant(0, DL, MVT::i32)); 1243 SDValue Hi = 1244 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), 1245 DAG.getConstant(1, DL, MVT::i32)); 1246 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1247 } 1248 1249 // Skip other cases of bitcast and use default lowering. 1250 return SDValue(); 1251 } 1252 1253 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 1254 bool HasLo, bool HasHi, 1255 SelectionDAG &DAG) const { 1256 // MIPS32r6/MIPS64r6 removed accumulator based multiplies. 1257 assert(!Subtarget.hasMips32r6()); 1258 1259 EVT Ty = Op.getOperand(0).getValueType(); 1260 SDLoc DL(Op); 1261 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 1262 Op.getOperand(0), Op.getOperand(1)); 1263 SDValue Lo, Hi; 1264 1265 if (HasLo) 1266 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); 1267 if (HasHi) 1268 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); 1269 1270 if (!HasLo || !HasHi) 1271 return HasLo ? Lo : Hi; 1272 1273 SDValue Vals[] = { Lo, Hi }; 1274 return DAG.getMergeValues(Vals, DL); 1275 } 1276 1277 static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { 1278 SDValue InLo, InHi; 1279 std::tie(InLo, InHi) = DAG.SplitScalar(In, DL, MVT::i32, MVT::i32); 1280 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); 1281 } 1282 1283 static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { 1284 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); 1285 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); 1286 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1287 } 1288 1289 // This function expands mips intrinsic nodes which have 64-bit input operands 1290 // or output values. 1291 // 1292 // out64 = intrinsic-node in64 1293 // => 1294 // lo = copy (extract-element (in64, 0)) 1295 // hi = copy (extract-element (in64, 1)) 1296 // mips-specific-node 1297 // v0 = copy lo 1298 // v1 = copy hi 1299 // out64 = merge-values (v0, v1) 1300 // 1301 static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1302 SDLoc DL(Op); 1303 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 1304 SmallVector<SDValue, 3> Ops; 1305 unsigned OpNo = 0; 1306 1307 // See if Op has a chain input. 1308 if (HasChainIn) 1309 Ops.push_back(Op->getOperand(OpNo++)); 1310 1311 // The next operand is the intrinsic opcode. 1312 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 1313 1314 // See if the next operand has type i64. 1315 SDValue Opnd = Op->getOperand(++OpNo), In64; 1316 1317 if (Opnd.getValueType() == MVT::i64) 1318 In64 = initAccumulator(Opnd, DL, DAG); 1319 else 1320 Ops.push_back(Opnd); 1321 1322 // Push the remaining operands. 1323 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1324 Ops.push_back(Op->getOperand(OpNo)); 1325 1326 // Add In64 to the end of the list. 1327 if (In64.getNode()) 1328 Ops.push_back(In64); 1329 1330 // Scan output. 1331 SmallVector<EVT, 2> ResTys; 1332 1333 for (EVT Ty : Op->values()) 1334 ResTys.push_back((Ty == MVT::i64) ? MVT::Untyped : Ty); 1335 1336 // Create node. 1337 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); 1338 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1339 1340 if (!HasChainIn) 1341 return Out; 1342 1343 assert(Val->getValueType(1) == MVT::Other); 1344 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1345 return DAG.getMergeValues(Vals, DL); 1346 } 1347 1348 // Lower an MSA copy intrinsic into the specified SelectionDAG node 1349 static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1350 SDLoc DL(Op); 1351 SDValue Vec = Op->getOperand(1); 1352 SDValue Idx = Op->getOperand(2); 1353 EVT ResTy = Op->getValueType(0); 1354 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1355 1356 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1357 DAG.getValueType(EltTy)); 1358 1359 return Result; 1360 } 1361 1362 static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { 1363 EVT ResVecTy = Op->getValueType(0); 1364 EVT ViaVecTy = ResVecTy; 1365 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1366 SDLoc DL(Op); 1367 1368 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and 1369 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating 1370 // lanes. 1371 SDValue LaneA = Op->getOperand(OpNr); 1372 SDValue LaneB; 1373 1374 if (ResVecTy == MVT::v2i64) { 1375 // In case of the index being passed as an immediate value, set the upper 1376 // lane to 0 so that the splati.d instruction can be matched. 1377 if (isa<ConstantSDNode>(LaneA)) 1378 LaneB = DAG.getConstant(0, DL, MVT::i32); 1379 // Having the index passed in a register, set the upper lane to the same 1380 // value as the lower - this results in the BUILD_VECTOR node not being 1381 // expanded through stack. This way we are able to pattern match the set of 1382 // nodes created here to splat.d. 1383 else 1384 LaneB = LaneA; 1385 ViaVecTy = MVT::v4i32; 1386 if(BigEndian) 1387 std::swap(LaneA, LaneB); 1388 } else 1389 LaneB = LaneA; 1390 1391 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, 1392 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; 1393 1394 SDValue Result = DAG.getBuildVector( 1395 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1396 1397 if (ViaVecTy != ResVecTy) { 1398 SDValue One = DAG.getConstant(1, DL, ViaVecTy); 1399 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, 1400 DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); 1401 } 1402 1403 return Result; 1404 } 1405 1406 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, 1407 bool IsSigned = false) { 1408 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp)); 1409 return DAG.getConstant( 1410 APInt(Op->getValueType(0).getScalarType().getSizeInBits(), 1411 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), 1412 SDLoc(Op), Op->getValueType(0)); 1413 } 1414 1415 static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, 1416 bool BigEndian, SelectionDAG &DAG) { 1417 EVT ViaVecTy = VecTy; 1418 SDValue SplatValueA = SplatValue; 1419 SDValue SplatValueB = SplatValue; 1420 SDLoc DL(SplatValue); 1421 1422 if (VecTy == MVT::v2i64) { 1423 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. 1424 ViaVecTy = MVT::v4i32; 1425 1426 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); 1427 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, 1428 DAG.getConstant(32, DL, MVT::i32)); 1429 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); 1430 } 1431 1432 // We currently hold the parts in little endian order. Swap them if 1433 // necessary. 1434 if (BigEndian) 1435 std::swap(SplatValueA, SplatValueB); 1436 1437 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1438 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1439 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1440 SplatValueA, SplatValueB, SplatValueA, SplatValueB }; 1441 1442 SDValue Result = DAG.getBuildVector( 1443 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1444 1445 if (VecTy != ViaVecTy) 1446 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); 1447 1448 return Result; 1449 } 1450 1451 static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, 1452 unsigned Opc, SDValue Imm, 1453 bool BigEndian) { 1454 EVT VecTy = Op->getValueType(0); 1455 SDValue Exp2Imm; 1456 SDLoc DL(Op); 1457 1458 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it 1459 // here for now. 1460 if (VecTy == MVT::v2i64) { 1461 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) { 1462 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); 1463 1464 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL, 1465 MVT::i32); 1466 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32); 1467 1468 if (BigEndian) 1469 std::swap(BitImmLoOp, BitImmHiOp); 1470 1471 Exp2Imm = DAG.getNode( 1472 ISD::BITCAST, DL, MVT::v2i64, 1473 DAG.getBuildVector(MVT::v4i32, DL, 1474 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp})); 1475 } 1476 } 1477 1478 if (!Exp2Imm.getNode()) { 1479 // We couldnt constant fold, do a vector shift instead 1480 1481 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since 1482 // only values 0-63 are valid. 1483 if (VecTy == MVT::v2i64) 1484 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm); 1485 1486 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); 1487 1488 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy), 1489 Exp2Imm); 1490 } 1491 1492 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); 1493 } 1494 1495 static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { 1496 SDLoc DL(Op); 1497 EVT ResTy = Op->getValueType(0); 1498 SDValue Vec = Op->getOperand(2); 1499 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1500 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; 1501 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, 1502 DL, ResEltTy); 1503 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG); 1504 1505 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); 1506 } 1507 1508 static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { 1509 EVT ResTy = Op->getValueType(0); 1510 SDLoc DL(Op); 1511 SDValue One = DAG.getConstant(1, DL, ResTy); 1512 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); 1513 1514 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), 1515 DAG.getNOT(DL, Bit, ResTy)); 1516 } 1517 1518 static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { 1519 SDLoc DL(Op); 1520 EVT ResTy = Op->getValueType(0); 1521 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) 1522 << Op->getConstantOperandAPInt(2); 1523 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); 1524 1525 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); 1526 } 1527 1528 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1529 SelectionDAG &DAG) const { 1530 SDLoc DL(Op); 1531 unsigned Intrinsic = Op->getConstantOperandVal(0); 1532 switch (Intrinsic) { 1533 default: 1534 return SDValue(); 1535 case Intrinsic::mips_shilo: 1536 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1537 case Intrinsic::mips_dpau_h_qbl: 1538 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1539 case Intrinsic::mips_dpau_h_qbr: 1540 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1541 case Intrinsic::mips_dpsu_h_qbl: 1542 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1543 case Intrinsic::mips_dpsu_h_qbr: 1544 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1545 case Intrinsic::mips_dpa_w_ph: 1546 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1547 case Intrinsic::mips_dps_w_ph: 1548 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1549 case Intrinsic::mips_dpax_w_ph: 1550 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1551 case Intrinsic::mips_dpsx_w_ph: 1552 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1553 case Intrinsic::mips_mulsa_w_ph: 1554 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1555 case Intrinsic::mips_mult: 1556 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1557 case Intrinsic::mips_multu: 1558 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1559 case Intrinsic::mips_madd: 1560 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1561 case Intrinsic::mips_maddu: 1562 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1563 case Intrinsic::mips_msub: 1564 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1565 case Intrinsic::mips_msubu: 1566 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1567 case Intrinsic::mips_addv_b: 1568 case Intrinsic::mips_addv_h: 1569 case Intrinsic::mips_addv_w: 1570 case Intrinsic::mips_addv_d: 1571 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1572 Op->getOperand(2)); 1573 case Intrinsic::mips_addvi_b: 1574 case Intrinsic::mips_addvi_h: 1575 case Intrinsic::mips_addvi_w: 1576 case Intrinsic::mips_addvi_d: 1577 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1578 lowerMSASplatImm(Op, 2, DAG)); 1579 case Intrinsic::mips_and_v: 1580 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1581 Op->getOperand(2)); 1582 case Intrinsic::mips_andi_b: 1583 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1584 lowerMSASplatImm(Op, 2, DAG)); 1585 case Intrinsic::mips_bclr_b: 1586 case Intrinsic::mips_bclr_h: 1587 case Intrinsic::mips_bclr_w: 1588 case Intrinsic::mips_bclr_d: 1589 return lowerMSABitClear(Op, DAG); 1590 case Intrinsic::mips_bclri_b: 1591 case Intrinsic::mips_bclri_h: 1592 case Intrinsic::mips_bclri_w: 1593 case Intrinsic::mips_bclri_d: 1594 return lowerMSABitClearImm(Op, DAG); 1595 case Intrinsic::mips_binsli_b: 1596 case Intrinsic::mips_binsli_h: 1597 case Intrinsic::mips_binsli_w: 1598 case Intrinsic::mips_binsli_d: { 1599 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) 1600 EVT VecTy = Op->getValueType(0); 1601 EVT EltTy = VecTy.getVectorElementType(); 1602 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1603 report_fatal_error("Immediate out of range"); 1604 APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), 1605 Op->getConstantOperandVal(3) + 1); 1606 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1607 DAG.getConstant(Mask, DL, VecTy, true), 1608 Op->getOperand(2), Op->getOperand(1)); 1609 } 1610 case Intrinsic::mips_binsri_b: 1611 case Intrinsic::mips_binsri_h: 1612 case Intrinsic::mips_binsri_w: 1613 case Intrinsic::mips_binsri_d: { 1614 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) 1615 EVT VecTy = Op->getValueType(0); 1616 EVT EltTy = VecTy.getVectorElementType(); 1617 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1618 report_fatal_error("Immediate out of range"); 1619 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), 1620 Op->getConstantOperandVal(3) + 1); 1621 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1622 DAG.getConstant(Mask, DL, VecTy, true), 1623 Op->getOperand(2), Op->getOperand(1)); 1624 } 1625 case Intrinsic::mips_bmnz_v: 1626 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1627 Op->getOperand(2), Op->getOperand(1)); 1628 case Intrinsic::mips_bmnzi_b: 1629 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1630 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), 1631 Op->getOperand(1)); 1632 case Intrinsic::mips_bmz_v: 1633 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1634 Op->getOperand(1), Op->getOperand(2)); 1635 case Intrinsic::mips_bmzi_b: 1636 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1637 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), 1638 Op->getOperand(2)); 1639 case Intrinsic::mips_bneg_b: 1640 case Intrinsic::mips_bneg_h: 1641 case Intrinsic::mips_bneg_w: 1642 case Intrinsic::mips_bneg_d: { 1643 EVT VecTy = Op->getValueType(0); 1644 SDValue One = DAG.getConstant(1, DL, VecTy); 1645 1646 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), 1647 DAG.getNode(ISD::SHL, DL, VecTy, One, 1648 truncateVecElts(Op, DAG))); 1649 } 1650 case Intrinsic::mips_bnegi_b: 1651 case Intrinsic::mips_bnegi_h: 1652 case Intrinsic::mips_bnegi_w: 1653 case Intrinsic::mips_bnegi_d: 1654 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), 1655 !Subtarget.isLittle()); 1656 case Intrinsic::mips_bnz_b: 1657 case Intrinsic::mips_bnz_h: 1658 case Intrinsic::mips_bnz_w: 1659 case Intrinsic::mips_bnz_d: 1660 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1661 Op->getOperand(1)); 1662 case Intrinsic::mips_bnz_v: 1663 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1664 Op->getOperand(1)); 1665 case Intrinsic::mips_bsel_v: 1666 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1667 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1668 Op->getOperand(1), Op->getOperand(3), 1669 Op->getOperand(2)); 1670 case Intrinsic::mips_bseli_b: 1671 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1672 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1673 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG), 1674 Op->getOperand(2)); 1675 case Intrinsic::mips_bset_b: 1676 case Intrinsic::mips_bset_h: 1677 case Intrinsic::mips_bset_w: 1678 case Intrinsic::mips_bset_d: { 1679 EVT VecTy = Op->getValueType(0); 1680 SDValue One = DAG.getConstant(1, DL, VecTy); 1681 1682 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), 1683 DAG.getNode(ISD::SHL, DL, VecTy, One, 1684 truncateVecElts(Op, DAG))); 1685 } 1686 case Intrinsic::mips_bseti_b: 1687 case Intrinsic::mips_bseti_h: 1688 case Intrinsic::mips_bseti_w: 1689 case Intrinsic::mips_bseti_d: 1690 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), 1691 !Subtarget.isLittle()); 1692 case Intrinsic::mips_bz_b: 1693 case Intrinsic::mips_bz_h: 1694 case Intrinsic::mips_bz_w: 1695 case Intrinsic::mips_bz_d: 1696 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1697 Op->getOperand(1)); 1698 case Intrinsic::mips_bz_v: 1699 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1700 Op->getOperand(1)); 1701 case Intrinsic::mips_ceq_b: 1702 case Intrinsic::mips_ceq_h: 1703 case Intrinsic::mips_ceq_w: 1704 case Intrinsic::mips_ceq_d: 1705 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1706 Op->getOperand(2), ISD::SETEQ); 1707 case Intrinsic::mips_ceqi_b: 1708 case Intrinsic::mips_ceqi_h: 1709 case Intrinsic::mips_ceqi_w: 1710 case Intrinsic::mips_ceqi_d: 1711 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1712 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ); 1713 case Intrinsic::mips_cle_s_b: 1714 case Intrinsic::mips_cle_s_h: 1715 case Intrinsic::mips_cle_s_w: 1716 case Intrinsic::mips_cle_s_d: 1717 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1718 Op->getOperand(2), ISD::SETLE); 1719 case Intrinsic::mips_clei_s_b: 1720 case Intrinsic::mips_clei_s_h: 1721 case Intrinsic::mips_clei_s_w: 1722 case Intrinsic::mips_clei_s_d: 1723 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1724 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE); 1725 case Intrinsic::mips_cle_u_b: 1726 case Intrinsic::mips_cle_u_h: 1727 case Intrinsic::mips_cle_u_w: 1728 case Intrinsic::mips_cle_u_d: 1729 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1730 Op->getOperand(2), ISD::SETULE); 1731 case Intrinsic::mips_clei_u_b: 1732 case Intrinsic::mips_clei_u_h: 1733 case Intrinsic::mips_clei_u_w: 1734 case Intrinsic::mips_clei_u_d: 1735 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1736 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1737 case Intrinsic::mips_clt_s_b: 1738 case Intrinsic::mips_clt_s_h: 1739 case Intrinsic::mips_clt_s_w: 1740 case Intrinsic::mips_clt_s_d: 1741 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1742 Op->getOperand(2), ISD::SETLT); 1743 case Intrinsic::mips_clti_s_b: 1744 case Intrinsic::mips_clti_s_h: 1745 case Intrinsic::mips_clti_s_w: 1746 case Intrinsic::mips_clti_s_d: 1747 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1748 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT); 1749 case Intrinsic::mips_clt_u_b: 1750 case Intrinsic::mips_clt_u_h: 1751 case Intrinsic::mips_clt_u_w: 1752 case Intrinsic::mips_clt_u_d: 1753 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1754 Op->getOperand(2), ISD::SETULT); 1755 case Intrinsic::mips_clti_u_b: 1756 case Intrinsic::mips_clti_u_h: 1757 case Intrinsic::mips_clti_u_w: 1758 case Intrinsic::mips_clti_u_d: 1759 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1760 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1761 case Intrinsic::mips_copy_s_b: 1762 case Intrinsic::mips_copy_s_h: 1763 case Intrinsic::mips_copy_s_w: 1764 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1765 case Intrinsic::mips_copy_s_d: 1766 if (Subtarget.hasMips64()) 1767 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. 1768 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1769 else { 1770 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1771 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1772 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1773 Op->getValueType(0), Op->getOperand(1), 1774 Op->getOperand(2)); 1775 } 1776 case Intrinsic::mips_copy_u_b: 1777 case Intrinsic::mips_copy_u_h: 1778 case Intrinsic::mips_copy_u_w: 1779 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1780 case Intrinsic::mips_copy_u_d: 1781 if (Subtarget.hasMips64()) 1782 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. 1783 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1784 else { 1785 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1786 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1787 // Note: When i64 is illegal, this results in copy_s.w instructions 1788 // instead of copy_u.w instructions. This makes no difference to the 1789 // behaviour since i64 is only illegal when the register file is 32-bit. 1790 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1791 Op->getValueType(0), Op->getOperand(1), 1792 Op->getOperand(2)); 1793 } 1794 case Intrinsic::mips_div_s_b: 1795 case Intrinsic::mips_div_s_h: 1796 case Intrinsic::mips_div_s_w: 1797 case Intrinsic::mips_div_s_d: 1798 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1799 Op->getOperand(2)); 1800 case Intrinsic::mips_div_u_b: 1801 case Intrinsic::mips_div_u_h: 1802 case Intrinsic::mips_div_u_w: 1803 case Intrinsic::mips_div_u_d: 1804 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1805 Op->getOperand(2)); 1806 case Intrinsic::mips_fadd_w: 1807 case Intrinsic::mips_fadd_d: 1808 // TODO: If intrinsics have fast-math-flags, propagate them. 1809 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1810 Op->getOperand(2)); 1811 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1812 case Intrinsic::mips_fceq_w: 1813 case Intrinsic::mips_fceq_d: 1814 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1815 Op->getOperand(2), ISD::SETOEQ); 1816 case Intrinsic::mips_fcle_w: 1817 case Intrinsic::mips_fcle_d: 1818 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1819 Op->getOperand(2), ISD::SETOLE); 1820 case Intrinsic::mips_fclt_w: 1821 case Intrinsic::mips_fclt_d: 1822 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1823 Op->getOperand(2), ISD::SETOLT); 1824 case Intrinsic::mips_fcne_w: 1825 case Intrinsic::mips_fcne_d: 1826 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1827 Op->getOperand(2), ISD::SETONE); 1828 case Intrinsic::mips_fcor_w: 1829 case Intrinsic::mips_fcor_d: 1830 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1831 Op->getOperand(2), ISD::SETO); 1832 case Intrinsic::mips_fcueq_w: 1833 case Intrinsic::mips_fcueq_d: 1834 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1835 Op->getOperand(2), ISD::SETUEQ); 1836 case Intrinsic::mips_fcule_w: 1837 case Intrinsic::mips_fcule_d: 1838 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1839 Op->getOperand(2), ISD::SETULE); 1840 case Intrinsic::mips_fcult_w: 1841 case Intrinsic::mips_fcult_d: 1842 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1843 Op->getOperand(2), ISD::SETULT); 1844 case Intrinsic::mips_fcun_w: 1845 case Intrinsic::mips_fcun_d: 1846 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1847 Op->getOperand(2), ISD::SETUO); 1848 case Intrinsic::mips_fcune_w: 1849 case Intrinsic::mips_fcune_d: 1850 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1851 Op->getOperand(2), ISD::SETUNE); 1852 case Intrinsic::mips_fdiv_w: 1853 case Intrinsic::mips_fdiv_d: 1854 // TODO: If intrinsics have fast-math-flags, propagate them. 1855 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1856 Op->getOperand(2)); 1857 case Intrinsic::mips_ffint_u_w: 1858 case Intrinsic::mips_ffint_u_d: 1859 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), 1860 Op->getOperand(1)); 1861 case Intrinsic::mips_ffint_s_w: 1862 case Intrinsic::mips_ffint_s_d: 1863 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), 1864 Op->getOperand(1)); 1865 case Intrinsic::mips_fill_b: 1866 case Intrinsic::mips_fill_h: 1867 case Intrinsic::mips_fill_w: 1868 case Intrinsic::mips_fill_d: { 1869 EVT ResTy = Op->getValueType(0); 1870 SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(), 1871 Op->getOperand(1)); 1872 1873 // If ResTy is v2i64 then the type legalizer will break this node down into 1874 // an equivalent v4i32. 1875 return DAG.getBuildVector(ResTy, DL, Ops); 1876 } 1877 case Intrinsic::mips_fexp2_w: 1878 case Intrinsic::mips_fexp2_d: { 1879 // TODO: If intrinsics have fast-math-flags, propagate them. 1880 EVT ResTy = Op->getValueType(0); 1881 return DAG.getNode( 1882 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), 1883 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); 1884 } 1885 case Intrinsic::mips_flog2_w: 1886 case Intrinsic::mips_flog2_d: 1887 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1888 case Intrinsic::mips_fmadd_w: 1889 case Intrinsic::mips_fmadd_d: 1890 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), 1891 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1892 case Intrinsic::mips_fmul_w: 1893 case Intrinsic::mips_fmul_d: 1894 // TODO: If intrinsics have fast-math-flags, propagate them. 1895 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1896 Op->getOperand(2)); 1897 case Intrinsic::mips_fmsub_w: 1898 case Intrinsic::mips_fmsub_d: { 1899 // TODO: If intrinsics have fast-math-flags, propagate them. 1900 return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0), 1901 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1902 } 1903 case Intrinsic::mips_frint_w: 1904 case Intrinsic::mips_frint_d: 1905 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1906 case Intrinsic::mips_fsqrt_w: 1907 case Intrinsic::mips_fsqrt_d: 1908 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1909 case Intrinsic::mips_fsub_w: 1910 case Intrinsic::mips_fsub_d: 1911 // TODO: If intrinsics have fast-math-flags, propagate them. 1912 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1913 Op->getOperand(2)); 1914 case Intrinsic::mips_ftrunc_u_w: 1915 case Intrinsic::mips_ftrunc_u_d: 1916 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), 1917 Op->getOperand(1)); 1918 case Intrinsic::mips_ftrunc_s_w: 1919 case Intrinsic::mips_ftrunc_s_d: 1920 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), 1921 Op->getOperand(1)); 1922 case Intrinsic::mips_ilvev_b: 1923 case Intrinsic::mips_ilvev_h: 1924 case Intrinsic::mips_ilvev_w: 1925 case Intrinsic::mips_ilvev_d: 1926 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1927 Op->getOperand(1), Op->getOperand(2)); 1928 case Intrinsic::mips_ilvl_b: 1929 case Intrinsic::mips_ilvl_h: 1930 case Intrinsic::mips_ilvl_w: 1931 case Intrinsic::mips_ilvl_d: 1932 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1933 Op->getOperand(1), Op->getOperand(2)); 1934 case Intrinsic::mips_ilvod_b: 1935 case Intrinsic::mips_ilvod_h: 1936 case Intrinsic::mips_ilvod_w: 1937 case Intrinsic::mips_ilvod_d: 1938 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 1939 Op->getOperand(1), Op->getOperand(2)); 1940 case Intrinsic::mips_ilvr_b: 1941 case Intrinsic::mips_ilvr_h: 1942 case Intrinsic::mips_ilvr_w: 1943 case Intrinsic::mips_ilvr_d: 1944 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 1945 Op->getOperand(1), Op->getOperand(2)); 1946 case Intrinsic::mips_insert_b: 1947 case Intrinsic::mips_insert_h: 1948 case Intrinsic::mips_insert_w: 1949 case Intrinsic::mips_insert_d: 1950 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1951 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 1952 case Intrinsic::mips_insve_b: 1953 case Intrinsic::mips_insve_h: 1954 case Intrinsic::mips_insve_w: 1955 case Intrinsic::mips_insve_d: { 1956 // Report an error for out of range values. 1957 int64_t Max; 1958 switch (Intrinsic) { 1959 case Intrinsic::mips_insve_b: Max = 15; break; 1960 case Intrinsic::mips_insve_h: Max = 7; break; 1961 case Intrinsic::mips_insve_w: Max = 3; break; 1962 case Intrinsic::mips_insve_d: Max = 1; break; 1963 default: llvm_unreachable("Unmatched intrinsic"); 1964 } 1965 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 1966 if (Value < 0 || Value > Max) 1967 report_fatal_error("Immediate out of range"); 1968 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), 1969 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), 1970 DAG.getConstant(0, DL, MVT::i32)); 1971 } 1972 case Intrinsic::mips_ldi_b: 1973 case Intrinsic::mips_ldi_h: 1974 case Intrinsic::mips_ldi_w: 1975 case Intrinsic::mips_ldi_d: 1976 return lowerMSASplatImm(Op, 1, DAG, true); 1977 case Intrinsic::mips_lsa: 1978 case Intrinsic::mips_dlsa: { 1979 EVT ResTy = Op->getValueType(0); 1980 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1981 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, 1982 Op->getOperand(2), Op->getOperand(3))); 1983 } 1984 case Intrinsic::mips_maddv_b: 1985 case Intrinsic::mips_maddv_h: 1986 case Intrinsic::mips_maddv_w: 1987 case Intrinsic::mips_maddv_d: { 1988 EVT ResTy = Op->getValueType(0); 1989 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1990 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 1991 Op->getOperand(2), Op->getOperand(3))); 1992 } 1993 case Intrinsic::mips_max_s_b: 1994 case Intrinsic::mips_max_s_h: 1995 case Intrinsic::mips_max_s_w: 1996 case Intrinsic::mips_max_s_d: 1997 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 1998 Op->getOperand(1), Op->getOperand(2)); 1999 case Intrinsic::mips_max_u_b: 2000 case Intrinsic::mips_max_u_h: 2001 case Intrinsic::mips_max_u_w: 2002 case Intrinsic::mips_max_u_d: 2003 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 2004 Op->getOperand(1), Op->getOperand(2)); 2005 case Intrinsic::mips_maxi_s_b: 2006 case Intrinsic::mips_maxi_s_h: 2007 case Intrinsic::mips_maxi_s_w: 2008 case Intrinsic::mips_maxi_s_d: 2009 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 2010 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2011 case Intrinsic::mips_maxi_u_b: 2012 case Intrinsic::mips_maxi_u_h: 2013 case Intrinsic::mips_maxi_u_w: 2014 case Intrinsic::mips_maxi_u_d: 2015 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 2016 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2017 case Intrinsic::mips_min_s_b: 2018 case Intrinsic::mips_min_s_h: 2019 case Intrinsic::mips_min_s_w: 2020 case Intrinsic::mips_min_s_d: 2021 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 2022 Op->getOperand(1), Op->getOperand(2)); 2023 case Intrinsic::mips_min_u_b: 2024 case Intrinsic::mips_min_u_h: 2025 case Intrinsic::mips_min_u_w: 2026 case Intrinsic::mips_min_u_d: 2027 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2028 Op->getOperand(1), Op->getOperand(2)); 2029 case Intrinsic::mips_mini_s_b: 2030 case Intrinsic::mips_mini_s_h: 2031 case Intrinsic::mips_mini_s_w: 2032 case Intrinsic::mips_mini_s_d: 2033 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 2034 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2035 case Intrinsic::mips_mini_u_b: 2036 case Intrinsic::mips_mini_u_h: 2037 case Intrinsic::mips_mini_u_w: 2038 case Intrinsic::mips_mini_u_d: 2039 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2040 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2041 case Intrinsic::mips_mod_s_b: 2042 case Intrinsic::mips_mod_s_h: 2043 case Intrinsic::mips_mod_s_w: 2044 case Intrinsic::mips_mod_s_d: 2045 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 2046 Op->getOperand(2)); 2047 case Intrinsic::mips_mod_u_b: 2048 case Intrinsic::mips_mod_u_h: 2049 case Intrinsic::mips_mod_u_w: 2050 case Intrinsic::mips_mod_u_d: 2051 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 2052 Op->getOperand(2)); 2053 case Intrinsic::mips_mulv_b: 2054 case Intrinsic::mips_mulv_h: 2055 case Intrinsic::mips_mulv_w: 2056 case Intrinsic::mips_mulv_d: 2057 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 2058 Op->getOperand(2)); 2059 case Intrinsic::mips_msubv_b: 2060 case Intrinsic::mips_msubv_h: 2061 case Intrinsic::mips_msubv_w: 2062 case Intrinsic::mips_msubv_d: { 2063 EVT ResTy = Op->getValueType(0); 2064 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), 2065 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2066 Op->getOperand(2), Op->getOperand(3))); 2067 } 2068 case Intrinsic::mips_nlzc_b: 2069 case Intrinsic::mips_nlzc_h: 2070 case Intrinsic::mips_nlzc_w: 2071 case Intrinsic::mips_nlzc_d: 2072 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 2073 case Intrinsic::mips_nor_v: { 2074 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2075 Op->getOperand(1), Op->getOperand(2)); 2076 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2077 } 2078 case Intrinsic::mips_nori_b: { 2079 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2080 Op->getOperand(1), 2081 lowerMSASplatImm(Op, 2, DAG)); 2082 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2083 } 2084 case Intrinsic::mips_or_v: 2085 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 2086 Op->getOperand(2)); 2087 case Intrinsic::mips_ori_b: 2088 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2089 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2090 case Intrinsic::mips_pckev_b: 2091 case Intrinsic::mips_pckev_h: 2092 case Intrinsic::mips_pckev_w: 2093 case Intrinsic::mips_pckev_d: 2094 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 2095 Op->getOperand(1), Op->getOperand(2)); 2096 case Intrinsic::mips_pckod_b: 2097 case Intrinsic::mips_pckod_h: 2098 case Intrinsic::mips_pckod_w: 2099 case Intrinsic::mips_pckod_d: 2100 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 2101 Op->getOperand(1), Op->getOperand(2)); 2102 case Intrinsic::mips_pcnt_b: 2103 case Intrinsic::mips_pcnt_h: 2104 case Intrinsic::mips_pcnt_w: 2105 case Intrinsic::mips_pcnt_d: 2106 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 2107 case Intrinsic::mips_sat_s_b: 2108 case Intrinsic::mips_sat_s_h: 2109 case Intrinsic::mips_sat_s_w: 2110 case Intrinsic::mips_sat_s_d: 2111 case Intrinsic::mips_sat_u_b: 2112 case Intrinsic::mips_sat_u_h: 2113 case Intrinsic::mips_sat_u_w: 2114 case Intrinsic::mips_sat_u_d: { 2115 // Report an error for out of range values. 2116 int64_t Max; 2117 switch (Intrinsic) { 2118 case Intrinsic::mips_sat_s_b: 2119 case Intrinsic::mips_sat_u_b: Max = 7; break; 2120 case Intrinsic::mips_sat_s_h: 2121 case Intrinsic::mips_sat_u_h: Max = 15; break; 2122 case Intrinsic::mips_sat_s_w: 2123 case Intrinsic::mips_sat_u_w: Max = 31; break; 2124 case Intrinsic::mips_sat_s_d: 2125 case Intrinsic::mips_sat_u_d: Max = 63; break; 2126 default: llvm_unreachable("Unmatched intrinsic"); 2127 } 2128 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2129 if (Value < 0 || Value > Max) 2130 report_fatal_error("Immediate out of range"); 2131 return SDValue(); 2132 } 2133 case Intrinsic::mips_shf_b: 2134 case Intrinsic::mips_shf_h: 2135 case Intrinsic::mips_shf_w: { 2136 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2137 if (Value < 0 || Value > 255) 2138 report_fatal_error("Immediate out of range"); 2139 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 2140 Op->getOperand(2), Op->getOperand(1)); 2141 } 2142 case Intrinsic::mips_sldi_b: 2143 case Intrinsic::mips_sldi_h: 2144 case Intrinsic::mips_sldi_w: 2145 case Intrinsic::mips_sldi_d: { 2146 // Report an error for out of range values. 2147 int64_t Max; 2148 switch (Intrinsic) { 2149 case Intrinsic::mips_sldi_b: Max = 15; break; 2150 case Intrinsic::mips_sldi_h: Max = 7; break; 2151 case Intrinsic::mips_sldi_w: Max = 3; break; 2152 case Intrinsic::mips_sldi_d: Max = 1; break; 2153 default: llvm_unreachable("Unmatched intrinsic"); 2154 } 2155 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue(); 2156 if (Value < 0 || Value > Max) 2157 report_fatal_error("Immediate out of range"); 2158 return SDValue(); 2159 } 2160 case Intrinsic::mips_sll_b: 2161 case Intrinsic::mips_sll_h: 2162 case Intrinsic::mips_sll_w: 2163 case Intrinsic::mips_sll_d: 2164 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 2165 truncateVecElts(Op, DAG)); 2166 case Intrinsic::mips_slli_b: 2167 case Intrinsic::mips_slli_h: 2168 case Intrinsic::mips_slli_w: 2169 case Intrinsic::mips_slli_d: 2170 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 2171 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2172 case Intrinsic::mips_splat_b: 2173 case Intrinsic::mips_splat_h: 2174 case Intrinsic::mips_splat_w: 2175 case Intrinsic::mips_splat_d: 2176 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle 2177 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because 2178 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. 2179 // Instead we lower to MipsISD::VSHF and match from there. 2180 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2181 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1), 2182 Op->getOperand(1)); 2183 case Intrinsic::mips_splati_b: 2184 case Intrinsic::mips_splati_h: 2185 case Intrinsic::mips_splati_w: 2186 case Intrinsic::mips_splati_d: 2187 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2188 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 2189 Op->getOperand(1)); 2190 case Intrinsic::mips_sra_b: 2191 case Intrinsic::mips_sra_h: 2192 case Intrinsic::mips_sra_w: 2193 case Intrinsic::mips_sra_d: 2194 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 2195 truncateVecElts(Op, DAG)); 2196 case Intrinsic::mips_srai_b: 2197 case Intrinsic::mips_srai_h: 2198 case Intrinsic::mips_srai_w: 2199 case Intrinsic::mips_srai_d: 2200 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 2201 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2202 case Intrinsic::mips_srari_b: 2203 case Intrinsic::mips_srari_h: 2204 case Intrinsic::mips_srari_w: 2205 case Intrinsic::mips_srari_d: { 2206 // Report an error for out of range values. 2207 int64_t Max; 2208 switch (Intrinsic) { 2209 case Intrinsic::mips_srari_b: Max = 7; break; 2210 case Intrinsic::mips_srari_h: Max = 15; break; 2211 case Intrinsic::mips_srari_w: Max = 31; break; 2212 case Intrinsic::mips_srari_d: Max = 63; break; 2213 default: llvm_unreachable("Unmatched intrinsic"); 2214 } 2215 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2216 if (Value < 0 || Value > Max) 2217 report_fatal_error("Immediate out of range"); 2218 return SDValue(); 2219 } 2220 case Intrinsic::mips_srl_b: 2221 case Intrinsic::mips_srl_h: 2222 case Intrinsic::mips_srl_w: 2223 case Intrinsic::mips_srl_d: 2224 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 2225 truncateVecElts(Op, DAG)); 2226 case Intrinsic::mips_srli_b: 2227 case Intrinsic::mips_srli_h: 2228 case Intrinsic::mips_srli_w: 2229 case Intrinsic::mips_srli_d: 2230 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 2231 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2232 case Intrinsic::mips_srlri_b: 2233 case Intrinsic::mips_srlri_h: 2234 case Intrinsic::mips_srlri_w: 2235 case Intrinsic::mips_srlri_d: { 2236 // Report an error for out of range values. 2237 int64_t Max; 2238 switch (Intrinsic) { 2239 case Intrinsic::mips_srlri_b: Max = 7; break; 2240 case Intrinsic::mips_srlri_h: Max = 15; break; 2241 case Intrinsic::mips_srlri_w: Max = 31; break; 2242 case Intrinsic::mips_srlri_d: Max = 63; break; 2243 default: llvm_unreachable("Unmatched intrinsic"); 2244 } 2245 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2246 if (Value < 0 || Value > Max) 2247 report_fatal_error("Immediate out of range"); 2248 return SDValue(); 2249 } 2250 case Intrinsic::mips_subv_b: 2251 case Intrinsic::mips_subv_h: 2252 case Intrinsic::mips_subv_w: 2253 case Intrinsic::mips_subv_d: 2254 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 2255 Op->getOperand(2)); 2256 case Intrinsic::mips_subvi_b: 2257 case Intrinsic::mips_subvi_h: 2258 case Intrinsic::mips_subvi_w: 2259 case Intrinsic::mips_subvi_d: 2260 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 2261 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2262 case Intrinsic::mips_vshf_b: 2263 case Intrinsic::mips_vshf_h: 2264 case Intrinsic::mips_vshf_w: 2265 case Intrinsic::mips_vshf_d: 2266 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2267 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 2268 case Intrinsic::mips_xor_v: 2269 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 2270 Op->getOperand(2)); 2271 case Intrinsic::mips_xori_b: 2272 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 2273 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2274 case Intrinsic::thread_pointer: { 2275 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2276 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT); 2277 } 2278 } 2279 } 2280 2281 static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2282 const MipsSubtarget &Subtarget) { 2283 SDLoc DL(Op); 2284 SDValue ChainIn = Op->getOperand(0); 2285 SDValue Address = Op->getOperand(2); 2286 SDValue Offset = Op->getOperand(3); 2287 EVT ResTy = Op->getValueType(0); 2288 EVT PtrTy = Address->getValueType(0); 2289 2290 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2291 // however takes an i32 signed constant offset. The actual type of the 2292 // intrinsic is a scaled signed i10. 2293 if (Subtarget.isABI_N64()) 2294 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2295 2296 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2297 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), 2298 Align(16)); 2299 } 2300 2301 SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2302 SelectionDAG &DAG) const { 2303 unsigned Intr = Op->getConstantOperandVal(1); 2304 switch (Intr) { 2305 default: 2306 return SDValue(); 2307 case Intrinsic::mips_extp: 2308 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 2309 case Intrinsic::mips_extpdp: 2310 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 2311 case Intrinsic::mips_extr_w: 2312 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 2313 case Intrinsic::mips_extr_r_w: 2314 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 2315 case Intrinsic::mips_extr_rs_w: 2316 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 2317 case Intrinsic::mips_extr_s_h: 2318 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 2319 case Intrinsic::mips_mthlip: 2320 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 2321 case Intrinsic::mips_mulsaq_s_w_ph: 2322 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 2323 case Intrinsic::mips_maq_s_w_phl: 2324 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 2325 case Intrinsic::mips_maq_s_w_phr: 2326 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 2327 case Intrinsic::mips_maq_sa_w_phl: 2328 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 2329 case Intrinsic::mips_maq_sa_w_phr: 2330 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 2331 case Intrinsic::mips_dpaq_s_w_ph: 2332 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 2333 case Intrinsic::mips_dpsq_s_w_ph: 2334 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 2335 case Intrinsic::mips_dpaq_sa_l_w: 2336 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 2337 case Intrinsic::mips_dpsq_sa_l_w: 2338 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 2339 case Intrinsic::mips_dpaqx_s_w_ph: 2340 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 2341 case Intrinsic::mips_dpaqx_sa_w_ph: 2342 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 2343 case Intrinsic::mips_dpsqx_s_w_ph: 2344 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 2345 case Intrinsic::mips_dpsqx_sa_w_ph: 2346 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 2347 case Intrinsic::mips_ld_b: 2348 case Intrinsic::mips_ld_h: 2349 case Intrinsic::mips_ld_w: 2350 case Intrinsic::mips_ld_d: 2351 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget); 2352 } 2353 } 2354 2355 static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2356 const MipsSubtarget &Subtarget) { 2357 SDLoc DL(Op); 2358 SDValue ChainIn = Op->getOperand(0); 2359 SDValue Value = Op->getOperand(2); 2360 SDValue Address = Op->getOperand(3); 2361 SDValue Offset = Op->getOperand(4); 2362 EVT PtrTy = Address->getValueType(0); 2363 2364 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2365 // however takes an i32 signed constant offset. The actual type of the 2366 // intrinsic is a scaled signed i10. 2367 if (Subtarget.isABI_N64()) 2368 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2369 2370 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2371 2372 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), 2373 Align(16)); 2374 } 2375 2376 SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2377 SelectionDAG &DAG) const { 2378 unsigned Intr = Op->getConstantOperandVal(1); 2379 switch (Intr) { 2380 default: 2381 return SDValue(); 2382 case Intrinsic::mips_st_b: 2383 case Intrinsic::mips_st_h: 2384 case Intrinsic::mips_st_w: 2385 case Intrinsic::mips_st_d: 2386 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget); 2387 } 2388 } 2389 2390 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 2391 // 2392 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 2393 // choose to sign-extend but we could have equally chosen zero-extend. The 2394 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 2395 // result into this node later (possibly changing it to a zero-extend in the 2396 // process). 2397 SDValue MipsSETargetLowering:: 2398 lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 2399 SDLoc DL(Op); 2400 EVT ResTy = Op->getValueType(0); 2401 SDValue Op0 = Op->getOperand(0); 2402 EVT VecTy = Op0->getValueType(0); 2403 2404 if (!VecTy.is128BitVector()) 2405 return SDValue(); 2406 2407 if (ResTy.isInteger()) { 2408 SDValue Op1 = Op->getOperand(1); 2409 EVT EltTy = VecTy.getVectorElementType(); 2410 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 2411 DAG.getValueType(EltTy)); 2412 } 2413 2414 return Op; 2415 } 2416 2417 static bool isConstantOrUndef(const SDValue Op) { 2418 if (Op->isUndef()) 2419 return true; 2420 if (isa<ConstantSDNode>(Op)) 2421 return true; 2422 if (isa<ConstantFPSDNode>(Op)) 2423 return true; 2424 return false; 2425 } 2426 2427 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 2428 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 2429 if (isConstantOrUndef(Op->getOperand(i))) 2430 return true; 2431 return false; 2432 } 2433 2434 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 2435 // backend. 2436 // 2437 // Lowers according to the following rules: 2438 // - Constant splats are legal as-is as long as the SplatBitSize is a power of 2439 // 2 less than or equal to 64 and the value fits into a signed 10-bit 2440 // immediate 2441 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 2442 // is a power of 2 less than or equal to 64 and the value does not fit into a 2443 // signed 10-bit immediate 2444 // - Non-constant splats are legal as-is. 2445 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 2446 // - All others are illegal and must be expanded. 2447 SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 2448 SelectionDAG &DAG) const { 2449 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 2450 EVT ResTy = Op->getValueType(0); 2451 SDLoc DL(Op); 2452 APInt SplatValue, SplatUndef; 2453 unsigned SplatBitSize; 2454 bool HasAnyUndefs; 2455 2456 if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) 2457 return SDValue(); 2458 2459 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2460 HasAnyUndefs, 8, 2461 !Subtarget.isLittle()) && SplatBitSize <= 64) { 2462 // We can only cope with 8, 16, 32, or 64-bit elements 2463 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 2464 SplatBitSize != 64) 2465 return SDValue(); 2466 2467 // If the value isn't an integer type we will have to bitcast 2468 // from an integer type first. Also, if there are any undefs, we must 2469 // lower them to defined values first. 2470 if (ResTy.isInteger() && !HasAnyUndefs) 2471 return Op; 2472 2473 EVT ViaVecTy; 2474 2475 switch (SplatBitSize) { 2476 default: 2477 return SDValue(); 2478 case 8: 2479 ViaVecTy = MVT::v16i8; 2480 break; 2481 case 16: 2482 ViaVecTy = MVT::v8i16; 2483 break; 2484 case 32: 2485 ViaVecTy = MVT::v4i32; 2486 break; 2487 case 64: 2488 // There's no fill.d to fall back on for 64-bit values 2489 return SDValue(); 2490 } 2491 2492 // SelectionDAG::getConstant will promote SplatValue appropriately. 2493 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 2494 2495 // Bitcast to the type we originally wanted 2496 if (ViaVecTy != ResTy) 2497 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 2498 2499 return Result; 2500 } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false)) 2501 return Op; 2502 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 2503 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 2504 // The resulting code is the same length as the expansion, but it doesn't 2505 // use memory operations 2506 EVT ResTy = Node->getValueType(0); 2507 2508 assert(ResTy.isVector()); 2509 2510 unsigned NumElts = ResTy.getVectorNumElements(); 2511 SDValue Vector = DAG.getUNDEF(ResTy); 2512 for (unsigned i = 0; i < NumElts; ++i) { 2513 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 2514 Node->getOperand(i), 2515 DAG.getConstant(i, DL, MVT::i32)); 2516 } 2517 return Vector; 2518 } 2519 2520 return SDValue(); 2521 } 2522 2523 // Lower VECTOR_SHUFFLE into SHF (if possible). 2524 // 2525 // SHF splits the vector into blocks of four elements, then shuffles these 2526 // elements according to a <4 x i2> constant (encoded as an integer immediate). 2527 // 2528 // It is therefore possible to lower into SHF when the mask takes the form: 2529 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 2530 // When undef's appear they are treated as if they were whatever value is 2531 // necessary in order to fit the above forms. 2532 // 2533 // For example: 2534 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 2535 // <8 x i32> <i32 3, i32 2, i32 1, i32 0, 2536 // i32 7, i32 6, i32 5, i32 4> 2537 // is lowered to: 2538 // (SHF_H $w0, $w1, 27) 2539 // where the 27 comes from: 2540 // 3 + (2 << 2) + (1 << 4) + (0 << 6) 2541 static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 2542 SmallVector<int, 16> Indices, 2543 SelectionDAG &DAG) { 2544 int SHFIndices[4] = { -1, -1, -1, -1 }; 2545 2546 if (Indices.size() < 4) 2547 return SDValue(); 2548 2549 for (unsigned i = 0; i < 4; ++i) { 2550 for (unsigned j = i; j < Indices.size(); j += 4) { 2551 int Idx = Indices[j]; 2552 2553 // Convert from vector index to 4-element subvector index 2554 // If an index refers to an element outside of the subvector then give up 2555 if (Idx != -1) { 2556 Idx -= 4 * (j / 4); 2557 if (Idx < 0 || Idx >= 4) 2558 return SDValue(); 2559 } 2560 2561 // If the mask has an undef, replace it with the current index. 2562 // Note that it might still be undef if the current index is also undef 2563 if (SHFIndices[i] == -1) 2564 SHFIndices[i] = Idx; 2565 2566 // Check that non-undef values are the same as in the mask. If they 2567 // aren't then give up 2568 if (!(Idx == -1 || Idx == SHFIndices[i])) 2569 return SDValue(); 2570 } 2571 } 2572 2573 // Calculate the immediate. Replace any remaining undefs with zero 2574 APInt Imm(32, 0); 2575 for (int i = 3; i >= 0; --i) { 2576 int Idx = SHFIndices[i]; 2577 2578 if (Idx == -1) 2579 Idx = 0; 2580 2581 Imm <<= 2; 2582 Imm |= Idx & 0x3; 2583 } 2584 2585 SDLoc DL(Op); 2586 return DAG.getNode(MipsISD::SHF, DL, ResTy, 2587 DAG.getTargetConstant(Imm, DL, MVT::i32), 2588 Op->getOperand(0)); 2589 } 2590 2591 /// Determine whether a range fits a regular pattern of values. 2592 /// This function accounts for the possibility of jumping over the End iterator. 2593 template <typename ValType> 2594 static bool 2595 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, 2596 unsigned CheckStride, 2597 typename SmallVectorImpl<ValType>::const_iterator End, 2598 ValType ExpectedIndex, unsigned ExpectedIndexStride) { 2599 auto &I = Begin; 2600 2601 while (I != End) { 2602 if (*I != -1 && *I != ExpectedIndex) 2603 return false; 2604 ExpectedIndex += ExpectedIndexStride; 2605 2606 // Incrementing past End is undefined behaviour so we must increment one 2607 // step at a time and check for End at each step. 2608 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) 2609 ; // Empty loop body. 2610 } 2611 return true; 2612 } 2613 2614 // Determine whether VECTOR_SHUFFLE is a SPLATI. 2615 // 2616 // It is a SPLATI when the mask is: 2617 // <x, x, x, ...> 2618 // where x is any valid index. 2619 // 2620 // When undef's appear in the mask they are treated as if they were whatever 2621 // value is necessary in order to fit the above form. 2622 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, 2623 SmallVector<int, 16> Indices, 2624 SelectionDAG &DAG) { 2625 assert((Indices.size() % 2) == 0); 2626 2627 int SplatIndex = -1; 2628 for (const auto &V : Indices) { 2629 if (V != -1) { 2630 SplatIndex = V; 2631 break; 2632 } 2633 } 2634 2635 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex, 2636 0); 2637 } 2638 2639 // Lower VECTOR_SHUFFLE into ILVEV (if possible). 2640 // 2641 // ILVEV interleaves the even elements from each vector. 2642 // 2643 // It is possible to lower into ILVEV when the mask consists of two of the 2644 // following forms interleaved: 2645 // <0, 2, 4, ...> 2646 // <n, n+2, n+4, ...> 2647 // where n is the number of elements in the vector. 2648 // For example: 2649 // <0, 0, 2, 2, 4, 4, ...> 2650 // <0, n, 2, n+2, 4, n+4, ...> 2651 // 2652 // When undef's appear in the mask they are treated as if they were whatever 2653 // value is necessary in order to fit the above forms. 2654 static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 2655 SmallVector<int, 16> Indices, 2656 SelectionDAG &DAG) { 2657 assert((Indices.size() % 2) == 0); 2658 2659 SDValue Wt; 2660 SDValue Ws; 2661 const auto &Begin = Indices.begin(); 2662 const auto &End = Indices.end(); 2663 2664 // Check even elements are taken from the even elements of one half or the 2665 // other and pick an operand accordingly. 2666 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2)) 2667 Wt = Op->getOperand(0); 2668 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2)) 2669 Wt = Op->getOperand(1); 2670 else 2671 return SDValue(); 2672 2673 // Check odd elements are taken from the even elements of one half or the 2674 // other and pick an operand accordingly. 2675 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2)) 2676 Ws = Op->getOperand(0); 2677 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2)) 2678 Ws = Op->getOperand(1); 2679 else 2680 return SDValue(); 2681 2682 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt); 2683 } 2684 2685 // Lower VECTOR_SHUFFLE into ILVOD (if possible). 2686 // 2687 // ILVOD interleaves the odd elements from each vector. 2688 // 2689 // It is possible to lower into ILVOD when the mask consists of two of the 2690 // following forms interleaved: 2691 // <1, 3, 5, ...> 2692 // <n+1, n+3, n+5, ...> 2693 // where n is the number of elements in the vector. 2694 // For example: 2695 // <1, 1, 3, 3, 5, 5, ...> 2696 // <1, n+1, 3, n+3, 5, n+5, ...> 2697 // 2698 // When undef's appear in the mask they are treated as if they were whatever 2699 // value is necessary in order to fit the above forms. 2700 static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 2701 SmallVector<int, 16> Indices, 2702 SelectionDAG &DAG) { 2703 assert((Indices.size() % 2) == 0); 2704 2705 SDValue Wt; 2706 SDValue Ws; 2707 const auto &Begin = Indices.begin(); 2708 const auto &End = Indices.end(); 2709 2710 // Check even elements are taken from the odd elements of one half or the 2711 // other and pick an operand accordingly. 2712 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2)) 2713 Wt = Op->getOperand(0); 2714 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2)) 2715 Wt = Op->getOperand(1); 2716 else 2717 return SDValue(); 2718 2719 // Check odd elements are taken from the odd elements of one half or the 2720 // other and pick an operand accordingly. 2721 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2)) 2722 Ws = Op->getOperand(0); 2723 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2)) 2724 Ws = Op->getOperand(1); 2725 else 2726 return SDValue(); 2727 2728 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws); 2729 } 2730 2731 // Lower VECTOR_SHUFFLE into ILVR (if possible). 2732 // 2733 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of 2734 // each vector. 2735 // 2736 // It is possible to lower into ILVR when the mask consists of two of the 2737 // following forms interleaved: 2738 // <0, 1, 2, ...> 2739 // <n, n+1, n+2, ...> 2740 // where n is the number of elements in the vector. 2741 // For example: 2742 // <0, 0, 1, 1, 2, 2, ...> 2743 // <0, n, 1, n+1, 2, n+2, ...> 2744 // 2745 // When undef's appear in the mask they are treated as if they were whatever 2746 // value is necessary in order to fit the above forms. 2747 static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 2748 SmallVector<int, 16> Indices, 2749 SelectionDAG &DAG) { 2750 assert((Indices.size() % 2) == 0); 2751 2752 SDValue Wt; 2753 SDValue Ws; 2754 const auto &Begin = Indices.begin(); 2755 const auto &End = Indices.end(); 2756 2757 // Check even elements are taken from the right (lowest-indexed) elements of 2758 // one half or the other and pick an operand accordingly. 2759 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1)) 2760 Wt = Op->getOperand(0); 2761 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1)) 2762 Wt = Op->getOperand(1); 2763 else 2764 return SDValue(); 2765 2766 // Check odd elements are taken from the right (lowest-indexed) elements of 2767 // one half or the other and pick an operand accordingly. 2768 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1)) 2769 Ws = Op->getOperand(0); 2770 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1)) 2771 Ws = Op->getOperand(1); 2772 else 2773 return SDValue(); 2774 2775 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt); 2776 } 2777 2778 // Lower VECTOR_SHUFFLE into ILVL (if possible). 2779 // 2780 // ILVL interleaves consecutive elements from the left (highest-indexed) half 2781 // of each vector. 2782 // 2783 // It is possible to lower into ILVL when the mask consists of two of the 2784 // following forms interleaved: 2785 // <x, x+1, x+2, ...> 2786 // <n+x, n+x+1, n+x+2, ...> 2787 // where n is the number of elements in the vector and x is half n. 2788 // For example: 2789 // <x, x, x+1, x+1, x+2, x+2, ...> 2790 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 2791 // 2792 // When undef's appear in the mask they are treated as if they were whatever 2793 // value is necessary in order to fit the above forms. 2794 static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 2795 SmallVector<int, 16> Indices, 2796 SelectionDAG &DAG) { 2797 assert((Indices.size() % 2) == 0); 2798 2799 unsigned HalfSize = Indices.size() / 2; 2800 SDValue Wt; 2801 SDValue Ws; 2802 const auto &Begin = Indices.begin(); 2803 const auto &End = Indices.end(); 2804 2805 // Check even elements are taken from the left (highest-indexed) elements of 2806 // one half or the other and pick an operand accordingly. 2807 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1)) 2808 Wt = Op->getOperand(0); 2809 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1)) 2810 Wt = Op->getOperand(1); 2811 else 2812 return SDValue(); 2813 2814 // Check odd elements are taken from the left (highest-indexed) elements of 2815 // one half or the other and pick an operand accordingly. 2816 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1)) 2817 Ws = Op->getOperand(0); 2818 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize, 2819 1)) 2820 Ws = Op->getOperand(1); 2821 else 2822 return SDValue(); 2823 2824 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt); 2825 } 2826 2827 // Lower VECTOR_SHUFFLE into PCKEV (if possible). 2828 // 2829 // PCKEV copies the even elements of each vector into the result vector. 2830 // 2831 // It is possible to lower into PCKEV when the mask consists of two of the 2832 // following forms concatenated: 2833 // <0, 2, 4, ...> 2834 // <n, n+2, n+4, ...> 2835 // where n is the number of elements in the vector. 2836 // For example: 2837 // <0, 2, 4, ..., 0, 2, 4, ...> 2838 // <0, 2, 4, ..., n, n+2, n+4, ...> 2839 // 2840 // When undef's appear in the mask they are treated as if they were whatever 2841 // value is necessary in order to fit the above forms. 2842 static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2843 SmallVector<int, 16> Indices, 2844 SelectionDAG &DAG) { 2845 assert((Indices.size() % 2) == 0); 2846 2847 SDValue Wt; 2848 SDValue Ws; 2849 const auto &Begin = Indices.begin(); 2850 const auto &Mid = Indices.begin() + Indices.size() / 2; 2851 const auto &End = Indices.end(); 2852 2853 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2)) 2854 Wt = Op->getOperand(0); 2855 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2)) 2856 Wt = Op->getOperand(1); 2857 else 2858 return SDValue(); 2859 2860 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2)) 2861 Ws = Op->getOperand(0); 2862 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2)) 2863 Ws = Op->getOperand(1); 2864 else 2865 return SDValue(); 2866 2867 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt); 2868 } 2869 2870 // Lower VECTOR_SHUFFLE into PCKOD (if possible). 2871 // 2872 // PCKOD copies the odd elements of each vector into the result vector. 2873 // 2874 // It is possible to lower into PCKOD when the mask consists of two of the 2875 // following forms concatenated: 2876 // <1, 3, 5, ...> 2877 // <n+1, n+3, n+5, ...> 2878 // where n is the number of elements in the vector. 2879 // For example: 2880 // <1, 3, 5, ..., 1, 3, 5, ...> 2881 // <1, 3, 5, ..., n+1, n+3, n+5, ...> 2882 // 2883 // When undef's appear in the mask they are treated as if they were whatever 2884 // value is necessary in order to fit the above forms. 2885 static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2886 SmallVector<int, 16> Indices, 2887 SelectionDAG &DAG) { 2888 assert((Indices.size() % 2) == 0); 2889 2890 SDValue Wt; 2891 SDValue Ws; 2892 const auto &Begin = Indices.begin(); 2893 const auto &Mid = Indices.begin() + Indices.size() / 2; 2894 const auto &End = Indices.end(); 2895 2896 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2)) 2897 Wt = Op->getOperand(0); 2898 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2)) 2899 Wt = Op->getOperand(1); 2900 else 2901 return SDValue(); 2902 2903 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2)) 2904 Ws = Op->getOperand(0); 2905 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2)) 2906 Ws = Op->getOperand(1); 2907 else 2908 return SDValue(); 2909 2910 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt); 2911 } 2912 2913 // Lower VECTOR_SHUFFLE into VSHF. 2914 // 2915 // This mostly consists of converting the shuffle indices in Indices into a 2916 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2917 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2918 // if the type is v8i16 and all the indices are less than 8 then the second 2919 // operand is unused and can be replaced with anything. We choose to replace it 2920 // with the used operand since this reduces the number of instructions overall. 2921 static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 2922 const SmallVector<int, 16> &Indices, 2923 SelectionDAG &DAG) { 2924 SmallVector<SDValue, 16> Ops; 2925 SDValue Op0; 2926 SDValue Op1; 2927 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 2928 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 2929 bool Using1stVec = false; 2930 bool Using2ndVec = false; 2931 SDLoc DL(Op); 2932 int ResTyNumElts = ResTy.getVectorNumElements(); 2933 2934 for (int i = 0; i < ResTyNumElts; ++i) { 2935 // Idx == -1 means UNDEF 2936 int Idx = Indices[i]; 2937 2938 if (0 <= Idx && Idx < ResTyNumElts) 2939 Using1stVec = true; 2940 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 2941 Using2ndVec = true; 2942 } 2943 2944 for (int Idx : Indices) 2945 Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy)); 2946 2947 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); 2948 2949 if (Using1stVec && Using2ndVec) { 2950 Op0 = Op->getOperand(0); 2951 Op1 = Op->getOperand(1); 2952 } else if (Using1stVec) 2953 Op0 = Op1 = Op->getOperand(0); 2954 else if (Using2ndVec) 2955 Op0 = Op1 = Op->getOperand(1); 2956 else 2957 llvm_unreachable("shuffle vector mask references neither vector operand?"); 2958 2959 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 2960 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 2961 // VSHF concatenates the vectors in a bitwise fashion: 2962 // <0b00, 0b01> + <0b10, 0b11> -> 2963 // 0b0100 + 0b1110 -> 0b01001110 2964 // <0b10, 0b11, 0b00, 0b01> 2965 // We must therefore swap the operands to get the correct result. 2966 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); 2967 } 2968 2969 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 2970 // indices in the shuffle. 2971 SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 2972 SelectionDAG &DAG) const { 2973 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 2974 EVT ResTy = Op->getValueType(0); 2975 2976 if (!ResTy.is128BitVector()) 2977 return SDValue(); 2978 2979 int ResTyNumElts = ResTy.getVectorNumElements(); 2980 SmallVector<int, 16> Indices; 2981 2982 for (int i = 0; i < ResTyNumElts; ++i) 2983 Indices.push_back(Node->getMaskElt(i)); 2984 2985 // splati.[bhwd] is preferable to the others but is matched from 2986 // MipsISD::VSHF. 2987 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) 2988 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 2989 SDValue Result; 2990 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG))) 2991 return Result; 2992 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG))) 2993 return Result; 2994 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG))) 2995 return Result; 2996 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG))) 2997 return Result; 2998 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG))) 2999 return Result; 3000 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG))) 3001 return Result; 3002 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) 3003 return Result; 3004 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 3005 } 3006 3007 MachineBasicBlock * 3008 MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, 3009 MachineBasicBlock *BB) const { 3010 // $bb: 3011 // bposge32_pseudo $vr0 3012 // => 3013 // $bb: 3014 // bposge32 $tbb 3015 // $fbb: 3016 // li $vr2, 0 3017 // b $sink 3018 // $tbb: 3019 // li $vr1, 1 3020 // $sink: 3021 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 3022 3023 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3024 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3025 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3026 DebugLoc DL = MI.getDebugLoc(); 3027 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3028 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3029 MachineFunction *F = BB->getParent(); 3030 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3031 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3032 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3033 F->insert(It, FBB); 3034 F->insert(It, TBB); 3035 F->insert(It, Sink); 3036 3037 // Transfer the remainder of BB and its successor edges to Sink. 3038 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3039 BB->end()); 3040 Sink->transferSuccessorsAndUpdatePHIs(BB); 3041 3042 // Add successors. 3043 BB->addSuccessor(FBB); 3044 BB->addSuccessor(TBB); 3045 FBB->addSuccessor(Sink); 3046 TBB->addSuccessor(Sink); 3047 3048 // Insert the real bposge32 instruction to $BB. 3049 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 3050 // Insert the real bposge32c instruction to $BB. 3051 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB); 3052 3053 // Fill $FBB. 3054 Register VR2 = RegInfo.createVirtualRegister(RC); 3055 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 3056 .addReg(Mips::ZERO).addImm(0); 3057 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3058 3059 // Fill $TBB. 3060 Register VR1 = RegInfo.createVirtualRegister(RC); 3061 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 3062 .addReg(Mips::ZERO).addImm(1); 3063 3064 // Insert phi function to $Sink. 3065 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3066 MI.getOperand(0).getReg()) 3067 .addReg(VR2) 3068 .addMBB(FBB) 3069 .addReg(VR1) 3070 .addMBB(TBB); 3071 3072 MI.eraseFromParent(); // The pseudo instruction is gone now. 3073 return Sink; 3074 } 3075 3076 MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( 3077 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { 3078 // $bb: 3079 // vany_nonzero $rd, $ws 3080 // => 3081 // $bb: 3082 // bnz.b $ws, $tbb 3083 // b $fbb 3084 // $fbb: 3085 // li $rd1, 0 3086 // b $sink 3087 // $tbb: 3088 // li $rd2, 1 3089 // $sink: 3090 // $rd = phi($rd1, $fbb, $rd2, $tbb) 3091 3092 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3093 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3094 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3095 DebugLoc DL = MI.getDebugLoc(); 3096 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3097 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3098 MachineFunction *F = BB->getParent(); 3099 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3100 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3101 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3102 F->insert(It, FBB); 3103 F->insert(It, TBB); 3104 F->insert(It, Sink); 3105 3106 // Transfer the remainder of BB and its successor edges to Sink. 3107 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3108 BB->end()); 3109 Sink->transferSuccessorsAndUpdatePHIs(BB); 3110 3111 // Add successors. 3112 BB->addSuccessor(FBB); 3113 BB->addSuccessor(TBB); 3114 FBB->addSuccessor(Sink); 3115 TBB->addSuccessor(Sink); 3116 3117 // Insert the real bnz.b instruction to $BB. 3118 BuildMI(BB, DL, TII->get(BranchOp)) 3119 .addReg(MI.getOperand(1).getReg()) 3120 .addMBB(TBB); 3121 3122 // Fill $FBB. 3123 Register RD1 = RegInfo.createVirtualRegister(RC); 3124 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 3125 .addReg(Mips::ZERO).addImm(0); 3126 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3127 3128 // Fill $TBB. 3129 Register RD2 = RegInfo.createVirtualRegister(RC); 3130 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 3131 .addReg(Mips::ZERO).addImm(1); 3132 3133 // Insert phi function to $Sink. 3134 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3135 MI.getOperand(0).getReg()) 3136 .addReg(RD1) 3137 .addMBB(FBB) 3138 .addReg(RD2) 3139 .addMBB(TBB); 3140 3141 MI.eraseFromParent(); // The pseudo instruction is gone now. 3142 return Sink; 3143 } 3144 3145 // Emit the COPY_FW pseudo instruction. 3146 // 3147 // copy_fw_pseudo $fd, $ws, n 3148 // => 3149 // copy_u_w $rt, $ws, $n 3150 // mtc1 $rt, $fd 3151 // 3152 // When n is zero, the equivalent operation can be performed with (potentially) 3153 // zero instructions due to register overlaps. This optimization is never valid 3154 // for lane 1 because it would require FR=0 mode which isn't supported by MSA. 3155 MachineBasicBlock * 3156 MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, 3157 MachineBasicBlock *BB) const { 3158 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3159 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3160 DebugLoc DL = MI.getDebugLoc(); 3161 Register Fd = MI.getOperand(0).getReg(); 3162 Register Ws = MI.getOperand(1).getReg(); 3163 unsigned Lane = MI.getOperand(2).getImm(); 3164 3165 if (Lane == 0) { 3166 unsigned Wt = Ws; 3167 if (!Subtarget.useOddSPReg()) { 3168 // We must copy to an even-numbered MSA register so that the 3169 // single-precision sub-register is also guaranteed to be even-numbered. 3170 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass); 3171 3172 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws); 3173 } 3174 3175 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3176 } else { 3177 Register Wt = RegInfo.createVirtualRegister( 3178 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3179 : &Mips::MSA128WEvensRegClass); 3180 3181 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); 3182 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3183 } 3184 3185 MI.eraseFromParent(); // The pseudo instruction is gone now. 3186 return BB; 3187 } 3188 3189 // Emit the COPY_FD pseudo instruction. 3190 // 3191 // copy_fd_pseudo $fd, $ws, n 3192 // => 3193 // splati.d $wt, $ws, $n 3194 // copy $fd, $wt:sub_64 3195 // 3196 // When n is zero, the equivalent operation can be performed with (potentially) 3197 // zero instructions due to register overlaps. This optimization is always 3198 // valid because FR=1 mode which is the only supported mode in MSA. 3199 MachineBasicBlock * 3200 MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, 3201 MachineBasicBlock *BB) const { 3202 assert(Subtarget.isFP64bit()); 3203 3204 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3205 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3206 Register Fd = MI.getOperand(0).getReg(); 3207 Register Ws = MI.getOperand(1).getReg(); 3208 unsigned Lane = MI.getOperand(2).getImm() * 2; 3209 DebugLoc DL = MI.getDebugLoc(); 3210 3211 if (Lane == 0) 3212 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 3213 else { 3214 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3215 3216 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 3217 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 3218 } 3219 3220 MI.eraseFromParent(); // The pseudo instruction is gone now. 3221 return BB; 3222 } 3223 3224 // Emit the INSERT_FW pseudo instruction. 3225 // 3226 // insert_fw_pseudo $wd, $wd_in, $n, $fs 3227 // => 3228 // subreg_to_reg $wt:sub_lo, $fs 3229 // insve_w $wd[$n], $wd_in, $wt[0] 3230 MachineBasicBlock * 3231 MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, 3232 MachineBasicBlock *BB) const { 3233 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3234 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3235 DebugLoc DL = MI.getDebugLoc(); 3236 Register Wd = MI.getOperand(0).getReg(); 3237 Register Wd_in = MI.getOperand(1).getReg(); 3238 unsigned Lane = MI.getOperand(2).getImm(); 3239 Register Fs = MI.getOperand(3).getReg(); 3240 Register Wt = RegInfo.createVirtualRegister( 3241 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3242 : &Mips::MSA128WEvensRegClass); 3243 3244 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3245 .addImm(0) 3246 .addReg(Fs) 3247 .addImm(Mips::sub_lo); 3248 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 3249 .addReg(Wd_in) 3250 .addImm(Lane) 3251 .addReg(Wt) 3252 .addImm(0); 3253 3254 MI.eraseFromParent(); // The pseudo instruction is gone now. 3255 return BB; 3256 } 3257 3258 // Emit the INSERT_FD pseudo instruction. 3259 // 3260 // insert_fd_pseudo $wd, $fs, n 3261 // => 3262 // subreg_to_reg $wt:sub_64, $fs 3263 // insve_d $wd[$n], $wd_in, $wt[0] 3264 MachineBasicBlock * 3265 MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, 3266 MachineBasicBlock *BB) const { 3267 assert(Subtarget.isFP64bit()); 3268 3269 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3270 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3271 DebugLoc DL = MI.getDebugLoc(); 3272 Register Wd = MI.getOperand(0).getReg(); 3273 Register Wd_in = MI.getOperand(1).getReg(); 3274 unsigned Lane = MI.getOperand(2).getImm(); 3275 Register Fs = MI.getOperand(3).getReg(); 3276 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3277 3278 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3279 .addImm(0) 3280 .addReg(Fs) 3281 .addImm(Mips::sub_64); 3282 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 3283 .addReg(Wd_in) 3284 .addImm(Lane) 3285 .addReg(Wt) 3286 .addImm(0); 3287 3288 MI.eraseFromParent(); // The pseudo instruction is gone now. 3289 return BB; 3290 } 3291 3292 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. 3293 // 3294 // For integer: 3295 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) 3296 // => 3297 // (SLL $lanetmp1, $lane, <log2size) 3298 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3299 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) 3300 // (NEG $lanetmp2, $lanetmp1) 3301 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3302 // 3303 // For floating point: 3304 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) 3305 // => 3306 // (SUBREG_TO_REG $wt, $fs, <subreg>) 3307 // (SLL $lanetmp1, $lane, <log2size) 3308 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3309 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) 3310 // (NEG $lanetmp2, $lanetmp1) 3311 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3312 MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( 3313 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes, 3314 bool IsFP) const { 3315 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3316 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3317 DebugLoc DL = MI.getDebugLoc(); 3318 Register Wd = MI.getOperand(0).getReg(); 3319 Register SrcVecReg = MI.getOperand(1).getReg(); 3320 Register LaneReg = MI.getOperand(2).getReg(); 3321 Register SrcValReg = MI.getOperand(3).getReg(); 3322 3323 const TargetRegisterClass *VecRC = nullptr; 3324 // FIXME: This should be true for N32 too. 3325 const TargetRegisterClass *GPRRC = 3326 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3327 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0; 3328 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL; 3329 unsigned EltLog2Size; 3330 unsigned InsertOp = 0; 3331 unsigned InsveOp = 0; 3332 switch (EltSizeInBytes) { 3333 default: 3334 llvm_unreachable("Unexpected size"); 3335 case 1: 3336 EltLog2Size = 0; 3337 InsertOp = Mips::INSERT_B; 3338 InsveOp = Mips::INSVE_B; 3339 VecRC = &Mips::MSA128BRegClass; 3340 break; 3341 case 2: 3342 EltLog2Size = 1; 3343 InsertOp = Mips::INSERT_H; 3344 InsveOp = Mips::INSVE_H; 3345 VecRC = &Mips::MSA128HRegClass; 3346 break; 3347 case 4: 3348 EltLog2Size = 2; 3349 InsertOp = Mips::INSERT_W; 3350 InsveOp = Mips::INSVE_W; 3351 VecRC = &Mips::MSA128WRegClass; 3352 break; 3353 case 8: 3354 EltLog2Size = 3; 3355 InsertOp = Mips::INSERT_D; 3356 InsveOp = Mips::INSVE_D; 3357 VecRC = &Mips::MSA128DRegClass; 3358 break; 3359 } 3360 3361 if (IsFP) { 3362 Register Wt = RegInfo.createVirtualRegister(VecRC); 3363 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3364 .addImm(0) 3365 .addReg(SrcValReg) 3366 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); 3367 SrcValReg = Wt; 3368 } 3369 3370 // Convert the lane index into a byte index 3371 if (EltSizeInBytes != 1) { 3372 Register LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); 3373 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1) 3374 .addReg(LaneReg) 3375 .addImm(EltLog2Size); 3376 LaneReg = LaneTmp1; 3377 } 3378 3379 // Rotate bytes around so that the desired lane is element zero 3380 Register WdTmp1 = RegInfo.createVirtualRegister(VecRC); 3381 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) 3382 .addReg(SrcVecReg) 3383 .addReg(SrcVecReg) 3384 .addReg(LaneReg, 0, SubRegIdx); 3385 3386 Register WdTmp2 = RegInfo.createVirtualRegister(VecRC); 3387 if (IsFP) { 3388 // Use insve.df to insert to element zero 3389 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) 3390 .addReg(WdTmp1) 3391 .addImm(0) 3392 .addReg(SrcValReg) 3393 .addImm(0); 3394 } else { 3395 // Use insert.df to insert to element zero 3396 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) 3397 .addReg(WdTmp1) 3398 .addReg(SrcValReg) 3399 .addImm(0); 3400 } 3401 3402 // Rotate elements the rest of the way for a full rotation. 3403 // sld.df inteprets $rt modulo the number of columns so we only need to negate 3404 // the lane index to do this. 3405 Register LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); 3406 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), 3407 LaneTmp2) 3408 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) 3409 .addReg(LaneReg); 3410 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) 3411 .addReg(WdTmp2) 3412 .addReg(WdTmp2) 3413 .addReg(LaneTmp2, 0, SubRegIdx); 3414 3415 MI.eraseFromParent(); // The pseudo instruction is gone now. 3416 return BB; 3417 } 3418 3419 // Emit the FILL_FW pseudo instruction. 3420 // 3421 // fill_fw_pseudo $wd, $fs 3422 // => 3423 // implicit_def $wt1 3424 // insert_subreg $wt2:subreg_lo, $wt1, $fs 3425 // splati.w $wd, $wt2[0] 3426 MachineBasicBlock * 3427 MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, 3428 MachineBasicBlock *BB) const { 3429 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3430 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3431 DebugLoc DL = MI.getDebugLoc(); 3432 Register Wd = MI.getOperand(0).getReg(); 3433 Register Fs = MI.getOperand(1).getReg(); 3434 Register Wt1 = RegInfo.createVirtualRegister( 3435 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3436 : &Mips::MSA128WEvensRegClass); 3437 Register Wt2 = RegInfo.createVirtualRegister( 3438 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3439 : &Mips::MSA128WEvensRegClass); 3440 3441 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3442 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3443 .addReg(Wt1) 3444 .addReg(Fs) 3445 .addImm(Mips::sub_lo); 3446 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); 3447 3448 MI.eraseFromParent(); // The pseudo instruction is gone now. 3449 return BB; 3450 } 3451 3452 // Emit the FILL_FD pseudo instruction. 3453 // 3454 // fill_fd_pseudo $wd, $fs 3455 // => 3456 // implicit_def $wt1 3457 // insert_subreg $wt2:subreg_64, $wt1, $fs 3458 // splati.d $wd, $wt2[0] 3459 MachineBasicBlock * 3460 MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, 3461 MachineBasicBlock *BB) const { 3462 assert(Subtarget.isFP64bit()); 3463 3464 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3465 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3466 DebugLoc DL = MI.getDebugLoc(); 3467 Register Wd = MI.getOperand(0).getReg(); 3468 Register Fs = MI.getOperand(1).getReg(); 3469 Register Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3470 Register Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3471 3472 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3473 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3474 .addReg(Wt1) 3475 .addReg(Fs) 3476 .addImm(Mips::sub_64); 3477 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); 3478 3479 MI.eraseFromParent(); // The pseudo instruction is gone now. 3480 return BB; 3481 } 3482 3483 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA 3484 // register. 3485 // 3486 // STF16 MSA128F16:$wd, mem_simm10:$addr 3487 // => 3488 // copy_u.h $rtemp,$wd[0] 3489 // sh $rtemp, $addr 3490 // 3491 // Safety: We can't use st.h & co as they would over write the memory after 3492 // the destination. It would require half floats be allocated 16 bytes(!) of 3493 // space. 3494 MachineBasicBlock * 3495 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, 3496 MachineBasicBlock *BB) const { 3497 3498 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3499 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3500 DebugLoc DL = MI.getDebugLoc(); 3501 Register Ws = MI.getOperand(0).getReg(); 3502 Register Rt = MI.getOperand(1).getReg(); 3503 const MachineMemOperand &MMO = **MI.memoperands_begin(); 3504 unsigned Imm = MMO.getOffset(); 3505 3506 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3507 // spill and reload can expand as a GPR64 operand. Examine the 3508 // operand in detail and default to ABI. 3509 const TargetRegisterClass *RC = 3510 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3511 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3512 : &Mips::GPR64RegClass); 3513 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3514 Register Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3515 3516 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); 3517 if(!UsingMips32) { 3518 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); 3519 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp) 3520 .addImm(0) 3521 .addReg(Rs) 3522 .addImm(Mips::sub_32); 3523 Rs = Tmp; 3524 } 3525 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) 3526 .addReg(Rs) 3527 .addReg(Rt) 3528 .addImm(Imm) 3529 .addMemOperand(BB->getParent()->getMachineMemOperand( 3530 &MMO, MMO.getOffset(), MMO.getSize())); 3531 3532 MI.eraseFromParent(); 3533 return BB; 3534 } 3535 3536 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. 3537 // 3538 // LD_F16 MSA128F16:$wd, mem_simm10:$addr 3539 // => 3540 // lh $rtemp, $addr 3541 // fill.h $wd, $rtemp 3542 // 3543 // Safety: We can't use ld.h & co as they over-read from the source. 3544 // Additionally, if the address is not modulo 16, 2 cases can occur: 3545 // a) Segmentation fault as the load instruction reads from a memory page 3546 // memory it's not supposed to. 3547 // b) The load crosses an implementation specific boundary, requiring OS 3548 // intervention. 3549 MachineBasicBlock * 3550 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, 3551 MachineBasicBlock *BB) const { 3552 3553 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3554 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3555 DebugLoc DL = MI.getDebugLoc(); 3556 Register Wd = MI.getOperand(0).getReg(); 3557 3558 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3559 // spill and reload can expand as a GPR64 operand. Examine the 3560 // operand in detail and default to ABI. 3561 const TargetRegisterClass *RC = 3562 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3563 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3564 : &Mips::GPR64RegClass); 3565 3566 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3567 Register Rt = RegInfo.createVirtualRegister(RC); 3568 3569 MachineInstrBuilder MIB = 3570 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); 3571 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 3572 MIB.add(MO); 3573 3574 if(!UsingMips32) { 3575 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3576 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32); 3577 Rt = Tmp; 3578 } 3579 3580 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); 3581 3582 MI.eraseFromParent(); 3583 return BB; 3584 } 3585 3586 // Emit the FPROUND_PSEUDO instruction. 3587 // 3588 // Round an FGR64Opnd, FGR32Opnd to an f16. 3589 // 3590 // Safety: Cycle the operand through the GPRs so the result always ends up 3591 // the correct MSA register. 3592 // 3593 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs 3594 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register 3595 // (which they can be, as the MSA registers are defined to alias the 3596 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3597 // the correct register class. That requires operands be tie-able across 3598 // register classes which have a sub/super register class relationship. 3599 // 3600 // For FPG32Opnd: 3601 // 3602 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs 3603 // => 3604 // mfc1 $rtemp, $fs 3605 // fill.w $rtemp, $wtemp 3606 // fexdo.w $wd, $wtemp, $wtemp 3607 // 3608 // For FPG64Opnd on mips32r2+: 3609 // 3610 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3611 // => 3612 // mfc1 $rtemp, $fs 3613 // fill.w $rtemp, $wtemp 3614 // mfhc1 $rtemp2, $fs 3615 // insert.w $wtemp[1], $rtemp2 3616 // insert.w $wtemp[3], $rtemp2 3617 // fexdo.w $wtemp2, $wtemp, $wtemp 3618 // fexdo.h $wd, $temp2, $temp2 3619 // 3620 // For FGR64Opnd on mips64r2+: 3621 // 3622 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3623 // => 3624 // dmfc1 $rtemp, $fs 3625 // fill.d $rtemp, $wtemp 3626 // fexdo.w $wtemp2, $wtemp, $wtemp 3627 // fexdo.h $wd, $wtemp2, $wtemp2 3628 // 3629 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the 3630 // undef bits are "just right" and the exception enable bits are 3631 // set. By using fill.w to replicate $fs into all elements over 3632 // insert.w for one element, we avoid that potiential case. If 3633 // fexdo.[hw] causes an exception in, the exception is valid and it 3634 // occurs for all elements. 3635 MachineBasicBlock * 3636 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, 3637 MachineBasicBlock *BB, 3638 bool IsFGR64) const { 3639 3640 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3641 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3642 // it. 3643 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3644 3645 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3646 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3647 3648 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3649 DebugLoc DL = MI.getDebugLoc(); 3650 Register Wd = MI.getOperand(0).getReg(); 3651 Register Fs = MI.getOperand(1).getReg(); 3652 3653 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3654 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3655 const TargetRegisterClass *GPRRC = 3656 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3657 unsigned MFC1Opc = IsFGR64onMips64 3658 ? Mips::DMFC1 3659 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1); 3660 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; 3661 3662 // Perform the register class copy as mentioned above. 3663 Register Rtemp = RegInfo.createVirtualRegister(GPRRC); 3664 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); 3665 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); 3666 unsigned WPHI = Wtemp; 3667 3668 if (IsFGR64onMips32) { 3669 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3670 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); 3671 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3672 Register Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3673 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) 3674 .addReg(Wtemp) 3675 .addReg(Rtemp2) 3676 .addImm(1); 3677 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3) 3678 .addReg(Wtemp2) 3679 .addReg(Rtemp2) 3680 .addImm(3); 3681 WPHI = Wtemp3; 3682 } 3683 3684 if (IsFGR64) { 3685 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3686 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) 3687 .addReg(WPHI) 3688 .addReg(WPHI); 3689 WPHI = Wtemp2; 3690 } 3691 3692 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI); 3693 3694 MI.eraseFromParent(); 3695 return BB; 3696 } 3697 3698 // Emit the FPEXTEND_PSEUDO instruction. 3699 // 3700 // Expand an f16 to either a FGR32Opnd or FGR64Opnd. 3701 // 3702 // Safety: Cycle the result through the GPRs so the result always ends up 3703 // the correct floating point register. 3704 // 3705 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd 3706 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register 3707 // (which they can be, as the MSA registers are defined to alias the 3708 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3709 // the correct register class. That requires operands be tie-able across 3710 // register classes which have a sub/super register class relationship. I 3711 // haven't checked. 3712 // 3713 // For FGR32Opnd: 3714 // 3715 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws 3716 // => 3717 // fexupr.w $wtemp, $ws 3718 // copy_s.w $rtemp, $ws[0] 3719 // mtc1 $rtemp, $fd 3720 // 3721 // For FGR64Opnd on Mips64: 3722 // 3723 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3724 // => 3725 // fexupr.w $wtemp, $ws 3726 // fexupr.d $wtemp2, $wtemp 3727 // copy_s.d $rtemp, $wtemp2s[0] 3728 // dmtc1 $rtemp, $fd 3729 // 3730 // For FGR64Opnd on Mips32: 3731 // 3732 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3733 // => 3734 // fexupr.w $wtemp, $ws 3735 // fexupr.d $wtemp2, $wtemp 3736 // copy_s.w $rtemp, $wtemp2[0] 3737 // mtc1 $rtemp, $ftemp 3738 // copy_s.w $rtemp2, $wtemp2[1] 3739 // $fd = mthc1 $rtemp2, $ftemp 3740 MachineBasicBlock * 3741 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, 3742 MachineBasicBlock *BB, 3743 bool IsFGR64) const { 3744 3745 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3746 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3747 // it. 3748 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3749 3750 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3751 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3752 3753 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3754 DebugLoc DL = MI.getDebugLoc(); 3755 Register Fd = MI.getOperand(0).getReg(); 3756 Register Ws = MI.getOperand(1).getReg(); 3757 3758 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3759 const TargetRegisterClass *GPRRC = 3760 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3761 unsigned MTC1Opc = IsFGR64onMips64 3762 ? Mips::DMTC1 3763 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1); 3764 Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; 3765 3766 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3767 Register WPHI = Wtemp; 3768 3769 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws); 3770 if (IsFGR64) { 3771 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3772 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp); 3773 } 3774 3775 // Perform the safety regclass copy mentioned above. 3776 Register Rtemp = RegInfo.createVirtualRegister(GPRRC); 3777 Register FPRPHI = IsFGR64onMips32 3778 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass) 3779 : Fd; 3780 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0); 3781 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp); 3782 3783 if (IsFGR64onMips32) { 3784 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3785 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2) 3786 .addReg(WPHI) 3787 .addImm(1); 3788 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd) 3789 .addReg(FPRPHI) 3790 .addReg(Rtemp2); 3791 } 3792 3793 MI.eraseFromParent(); 3794 return BB; 3795 } 3796 3797 // Emit the FEXP2_W_1 pseudo instructions. 3798 // 3799 // fexp2_w_1_pseudo $wd, $wt 3800 // => 3801 // ldi.w $ws, 1 3802 // fexp2.w $wd, $ws, $wt 3803 MachineBasicBlock * 3804 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, 3805 MachineBasicBlock *BB) const { 3806 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3807 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3808 const TargetRegisterClass *RC = &Mips::MSA128WRegClass; 3809 Register Ws1 = RegInfo.createVirtualRegister(RC); 3810 Register Ws2 = RegInfo.createVirtualRegister(RC); 3811 DebugLoc DL = MI.getDebugLoc(); 3812 3813 // Splat 1.0 into a vector 3814 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); 3815 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); 3816 3817 // Emit 1.0 * fexp2(Wt) 3818 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg()) 3819 .addReg(Ws2) 3820 .addReg(MI.getOperand(1).getReg()); 3821 3822 MI.eraseFromParent(); // The pseudo instruction is gone now. 3823 return BB; 3824 } 3825 3826 // Emit the FEXP2_D_1 pseudo instructions. 3827 // 3828 // fexp2_d_1_pseudo $wd, $wt 3829 // => 3830 // ldi.d $ws, 1 3831 // fexp2.d $wd, $ws, $wt 3832 MachineBasicBlock * 3833 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, 3834 MachineBasicBlock *BB) const { 3835 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3836 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3837 const TargetRegisterClass *RC = &Mips::MSA128DRegClass; 3838 Register Ws1 = RegInfo.createVirtualRegister(RC); 3839 Register Ws2 = RegInfo.createVirtualRegister(RC); 3840 DebugLoc DL = MI.getDebugLoc(); 3841 3842 // Splat 1.0 into a vector 3843 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); 3844 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); 3845 3846 // Emit 1.0 * fexp2(Wt) 3847 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg()) 3848 .addReg(Ws2) 3849 .addReg(MI.getOperand(1).getReg()); 3850 3851 MI.eraseFromParent(); // The pseudo instruction is gone now. 3852 return BB; 3853 } 3854