1 //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Subclass of MipsTargetLowering specialized for mips32/64. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MipsSEISelLowering.h" 14 #include "MipsMachineFunction.h" 15 #include "MipsRegisterInfo.h" 16 #include "MipsSubtarget.h" 17 #include "llvm/ADT/APInt.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/Triple.h" 22 #include "llvm/CodeGen/CallingConvLower.h" 23 #include "llvm/CodeGen/ISDOpcodes.h" 24 #include "llvm/CodeGen/MachineBasicBlock.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineMemOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/SelectionDAG.h" 31 #include "llvm/CodeGen/SelectionDAGNodes.h" 32 #include "llvm/CodeGen/TargetInstrInfo.h" 33 #include "llvm/CodeGen/TargetSubtargetInfo.h" 34 #include "llvm/CodeGen/ValueTypes.h" 35 #include "llvm/IR/DebugLoc.h" 36 #include "llvm/IR/Intrinsics.h" 37 #include "llvm/IR/IntrinsicsMips.h" 38 #include "llvm/Support/Casting.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/MachineValueType.h" 43 #include "llvm/Support/MathExtras.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include <algorithm> 46 #include <cassert> 47 #include <cstdint> 48 #include <iterator> 49 #include <utility> 50 51 using namespace llvm; 52 53 #define DEBUG_TYPE "mips-isel" 54 55 static cl::opt<bool> 56 UseMipsTailCalls("mips-tail-calls", cl::Hidden, 57 cl::desc("MIPS: permit tail calls."), cl::init(false)); 58 59 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 60 cl::desc("Expand double precision loads and " 61 "stores to their single precision " 62 "counterparts")); 63 64 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, 65 const MipsSubtarget &STI) 66 : MipsTargetLowering(TM, STI) { 67 // Set up the register classes 68 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 69 70 if (Subtarget.isGP64bit()) 71 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 72 73 if (Subtarget.hasDSP() || Subtarget.hasMSA()) { 74 // Expand all truncating stores and extending loads. 75 for (MVT VT0 : MVT::fixedlen_vector_valuetypes()) { 76 for (MVT VT1 : MVT::fixedlen_vector_valuetypes()) { 77 setTruncStoreAction(VT0, VT1, Expand); 78 setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); 79 setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); 80 setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); 81 } 82 } 83 } 84 85 if (Subtarget.hasDSP()) { 86 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 87 88 for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { 89 addRegisterClass(VecTys[i], &Mips::DSPRRegClass); 90 91 // Expand all builtin opcodes. 92 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 93 setOperationAction(Opc, VecTys[i], Expand); 94 95 setOperationAction(ISD::ADD, VecTys[i], Legal); 96 setOperationAction(ISD::SUB, VecTys[i], Legal); 97 setOperationAction(ISD::LOAD, VecTys[i], Legal); 98 setOperationAction(ISD::STORE, VecTys[i], Legal); 99 setOperationAction(ISD::BITCAST, VecTys[i], Legal); 100 } 101 102 setTargetDAGCombine(ISD::SHL); 103 setTargetDAGCombine(ISD::SRA); 104 setTargetDAGCombine(ISD::SRL); 105 setTargetDAGCombine(ISD::SETCC); 106 setTargetDAGCombine(ISD::VSELECT); 107 108 if (Subtarget.hasMips32r2()) { 109 setOperationAction(ISD::ADDC, MVT::i32, Legal); 110 setOperationAction(ISD::ADDE, MVT::i32, Legal); 111 } 112 } 113 114 if (Subtarget.hasDSPR2()) 115 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 116 117 if (Subtarget.hasMSA()) { 118 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 119 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 120 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 121 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 122 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 123 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 124 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 125 126 // f16 is a storage-only type, always promote it to f32. 127 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass); 128 setOperationAction(ISD::SETCC, MVT::f16, Promote); 129 setOperationAction(ISD::BR_CC, MVT::f16, Promote); 130 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); 131 setOperationAction(ISD::SELECT, MVT::f16, Promote); 132 setOperationAction(ISD::FADD, MVT::f16, Promote); 133 setOperationAction(ISD::FSUB, MVT::f16, Promote); 134 setOperationAction(ISD::FMUL, MVT::f16, Promote); 135 setOperationAction(ISD::FDIV, MVT::f16, Promote); 136 setOperationAction(ISD::FREM, MVT::f16, Promote); 137 setOperationAction(ISD::FMA, MVT::f16, Promote); 138 setOperationAction(ISD::FNEG, MVT::f16, Promote); 139 setOperationAction(ISD::FABS, MVT::f16, Promote); 140 setOperationAction(ISD::FCEIL, MVT::f16, Promote); 141 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); 142 setOperationAction(ISD::FCOS, MVT::f16, Promote); 143 setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); 144 setOperationAction(ISD::FFLOOR, MVT::f16, Promote); 145 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); 146 setOperationAction(ISD::FPOW, MVT::f16, Promote); 147 setOperationAction(ISD::FPOWI, MVT::f16, Promote); 148 setOperationAction(ISD::FRINT, MVT::f16, Promote); 149 setOperationAction(ISD::FSIN, MVT::f16, Promote); 150 setOperationAction(ISD::FSINCOS, MVT::f16, Promote); 151 setOperationAction(ISD::FSQRT, MVT::f16, Promote); 152 setOperationAction(ISD::FEXP, MVT::f16, Promote); 153 setOperationAction(ISD::FEXP2, MVT::f16, Promote); 154 setOperationAction(ISD::FLOG, MVT::f16, Promote); 155 setOperationAction(ISD::FLOG2, MVT::f16, Promote); 156 setOperationAction(ISD::FLOG10, MVT::f16, Promote); 157 setOperationAction(ISD::FROUND, MVT::f16, Promote); 158 setOperationAction(ISD::FTRUNC, MVT::f16, Promote); 159 setOperationAction(ISD::FMINNUM, MVT::f16, Promote); 160 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); 161 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); 162 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); 163 164 setTargetDAGCombine(ISD::AND); 165 setTargetDAGCombine(ISD::OR); 166 setTargetDAGCombine(ISD::SRA); 167 setTargetDAGCombine(ISD::VSELECT); 168 setTargetDAGCombine(ISD::XOR); 169 } 170 171 if (!Subtarget.useSoftFloat()) { 172 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 173 174 // When dealing with single precision only, use libcalls 175 if (!Subtarget.isSingleFloat()) { 176 if (Subtarget.isFP64bit()) 177 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 178 else 179 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 180 } 181 } 182 183 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 184 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 185 setOperationAction(ISD::MULHS, MVT::i32, Custom); 186 setOperationAction(ISD::MULHU, MVT::i32, Custom); 187 188 if (Subtarget.hasCnMips()) 189 setOperationAction(ISD::MUL, MVT::i64, Legal); 190 else if (Subtarget.isGP64bit()) 191 setOperationAction(ISD::MUL, MVT::i64, Custom); 192 193 if (Subtarget.isGP64bit()) { 194 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); 195 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); 196 setOperationAction(ISD::MULHS, MVT::i64, Custom); 197 setOperationAction(ISD::MULHU, MVT::i64, Custom); 198 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 199 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 200 } 201 202 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 203 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 204 205 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 206 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 207 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 208 setOperationAction(ISD::LOAD, MVT::i32, Custom); 209 setOperationAction(ISD::STORE, MVT::i32, Custom); 210 211 setTargetDAGCombine(ISD::MUL); 212 213 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 214 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 215 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 216 217 if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() && 218 !Subtarget.hasMips64()) { 219 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 220 } 221 222 if (NoDPLoadStore) { 223 setOperationAction(ISD::LOAD, MVT::f64, Custom); 224 setOperationAction(ISD::STORE, MVT::f64, Custom); 225 } 226 227 if (Subtarget.hasMips32r6()) { 228 // MIPS32r6 replaces the accumulator-based multiplies with a three register 229 // instruction 230 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 231 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 232 setOperationAction(ISD::MUL, MVT::i32, Legal); 233 setOperationAction(ISD::MULHS, MVT::i32, Legal); 234 setOperationAction(ISD::MULHU, MVT::i32, Legal); 235 236 // MIPS32r6 replaces the accumulator-based division/remainder with separate 237 // three register division and remainder instructions. 238 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 239 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 240 setOperationAction(ISD::SDIV, MVT::i32, Legal); 241 setOperationAction(ISD::UDIV, MVT::i32, Legal); 242 setOperationAction(ISD::SREM, MVT::i32, Legal); 243 setOperationAction(ISD::UREM, MVT::i32, Legal); 244 245 // MIPS32r6 replaces conditional moves with an equivalent that removes the 246 // need for three GPR read ports. 247 setOperationAction(ISD::SETCC, MVT::i32, Legal); 248 setOperationAction(ISD::SELECT, MVT::i32, Legal); 249 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 250 251 setOperationAction(ISD::SETCC, MVT::f32, Legal); 252 setOperationAction(ISD::SELECT, MVT::f32, Legal); 253 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 254 255 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6"); 256 setOperationAction(ISD::SETCC, MVT::f64, Legal); 257 setOperationAction(ISD::SELECT, MVT::f64, Custom); 258 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 259 260 setOperationAction(ISD::BRCOND, MVT::Other, Legal); 261 262 // Floating point > and >= are supported via < and <= 263 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 264 setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); 265 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 266 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 267 268 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 269 setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); 270 setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); 271 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 272 } 273 274 if (Subtarget.hasMips64r6()) { 275 // MIPS64r6 replaces the accumulator-based multiplies with a three register 276 // instruction 277 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 278 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 279 setOperationAction(ISD::MUL, MVT::i64, Legal); 280 setOperationAction(ISD::MULHS, MVT::i64, Legal); 281 setOperationAction(ISD::MULHU, MVT::i64, Legal); 282 283 // MIPS32r6 replaces the accumulator-based division/remainder with separate 284 // three register division and remainder instructions. 285 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 286 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 287 setOperationAction(ISD::SDIV, MVT::i64, Legal); 288 setOperationAction(ISD::UDIV, MVT::i64, Legal); 289 setOperationAction(ISD::SREM, MVT::i64, Legal); 290 setOperationAction(ISD::UREM, MVT::i64, Legal); 291 292 // MIPS64r6 replaces conditional moves with an equivalent that removes the 293 // need for three GPR read ports. 294 setOperationAction(ISD::SETCC, MVT::i64, Legal); 295 setOperationAction(ISD::SELECT, MVT::i64, Legal); 296 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 297 } 298 299 computeRegisterProperties(Subtarget.getRegisterInfo()); 300 } 301 302 const MipsTargetLowering * 303 llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, 304 const MipsSubtarget &STI) { 305 return new MipsSETargetLowering(TM, STI); 306 } 307 308 const TargetRegisterClass * 309 MipsSETargetLowering::getRepRegClassFor(MVT VT) const { 310 if (VT == MVT::Untyped) 311 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; 312 313 return TargetLowering::getRepRegClassFor(VT); 314 } 315 316 // Enable MSA support for the given integer type and Register class. 317 void MipsSETargetLowering:: 318 addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 319 addRegisterClass(Ty, RC); 320 321 // Expand all builtin opcodes. 322 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 323 setOperationAction(Opc, Ty, Expand); 324 325 setOperationAction(ISD::BITCAST, Ty, Legal); 326 setOperationAction(ISD::LOAD, Ty, Legal); 327 setOperationAction(ISD::STORE, Ty, Legal); 328 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 329 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 330 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 331 setOperationAction(ISD::UNDEF, Ty, Legal); 332 333 setOperationAction(ISD::ADD, Ty, Legal); 334 setOperationAction(ISD::AND, Ty, Legal); 335 setOperationAction(ISD::CTLZ, Ty, Legal); 336 setOperationAction(ISD::CTPOP, Ty, Legal); 337 setOperationAction(ISD::MUL, Ty, Legal); 338 setOperationAction(ISD::OR, Ty, Legal); 339 setOperationAction(ISD::SDIV, Ty, Legal); 340 setOperationAction(ISD::SREM, Ty, Legal); 341 setOperationAction(ISD::SHL, Ty, Legal); 342 setOperationAction(ISD::SRA, Ty, Legal); 343 setOperationAction(ISD::SRL, Ty, Legal); 344 setOperationAction(ISD::SUB, Ty, Legal); 345 setOperationAction(ISD::SMAX, Ty, Legal); 346 setOperationAction(ISD::SMIN, Ty, Legal); 347 setOperationAction(ISD::UDIV, Ty, Legal); 348 setOperationAction(ISD::UREM, Ty, Legal); 349 setOperationAction(ISD::UMAX, Ty, Legal); 350 setOperationAction(ISD::UMIN, Ty, Legal); 351 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 352 setOperationAction(ISD::VSELECT, Ty, Legal); 353 setOperationAction(ISD::XOR, Ty, Legal); 354 355 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { 356 setOperationAction(ISD::FP_TO_SINT, Ty, Legal); 357 setOperationAction(ISD::FP_TO_UINT, Ty, Legal); 358 setOperationAction(ISD::SINT_TO_FP, Ty, Legal); 359 setOperationAction(ISD::UINT_TO_FP, Ty, Legal); 360 } 361 362 setOperationAction(ISD::SETCC, Ty, Legal); 363 setCondCodeAction(ISD::SETNE, Ty, Expand); 364 setCondCodeAction(ISD::SETGE, Ty, Expand); 365 setCondCodeAction(ISD::SETGT, Ty, Expand); 366 setCondCodeAction(ISD::SETUGE, Ty, Expand); 367 setCondCodeAction(ISD::SETUGT, Ty, Expand); 368 } 369 370 // Enable MSA support for the given floating-point type and Register class. 371 void MipsSETargetLowering:: 372 addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 373 addRegisterClass(Ty, RC); 374 375 // Expand all builtin opcodes. 376 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 377 setOperationAction(Opc, Ty, Expand); 378 379 setOperationAction(ISD::LOAD, Ty, Legal); 380 setOperationAction(ISD::STORE, Ty, Legal); 381 setOperationAction(ISD::BITCAST, Ty, Legal); 382 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 383 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 384 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 385 386 if (Ty != MVT::v8f16) { 387 setOperationAction(ISD::FABS, Ty, Legal); 388 setOperationAction(ISD::FADD, Ty, Legal); 389 setOperationAction(ISD::FDIV, Ty, Legal); 390 setOperationAction(ISD::FEXP2, Ty, Legal); 391 setOperationAction(ISD::FLOG2, Ty, Legal); 392 setOperationAction(ISD::FMA, Ty, Legal); 393 setOperationAction(ISD::FMUL, Ty, Legal); 394 setOperationAction(ISD::FRINT, Ty, Legal); 395 setOperationAction(ISD::FSQRT, Ty, Legal); 396 setOperationAction(ISD::FSUB, Ty, Legal); 397 setOperationAction(ISD::VSELECT, Ty, Legal); 398 399 setOperationAction(ISD::SETCC, Ty, Legal); 400 setCondCodeAction(ISD::SETOGE, Ty, Expand); 401 setCondCodeAction(ISD::SETOGT, Ty, Expand); 402 setCondCodeAction(ISD::SETUGE, Ty, Expand); 403 setCondCodeAction(ISD::SETUGT, Ty, Expand); 404 setCondCodeAction(ISD::SETGE, Ty, Expand); 405 setCondCodeAction(ISD::SETGT, Ty, Expand); 406 } 407 } 408 409 SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 410 if(!Subtarget.hasMips32r6()) 411 return MipsTargetLowering::LowerOperation(Op, DAG); 412 413 EVT ResTy = Op->getValueType(0); 414 SDLoc DL(Op); 415 416 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the 417 // floating point register are undefined. Not really an issue as sel.d, which 418 // is produced from an FSELECT node, only looks at bit 0. 419 SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0)); 420 return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1), 421 Op->getOperand(2)); 422 } 423 424 bool MipsSETargetLowering::allowsMisalignedMemoryAccesses( 425 EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const { 426 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 427 428 if (Subtarget.systemSupportsUnalignedAccess()) { 429 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's 430 // implementation defined whether this is handled by hardware, software, or 431 // a hybrid of the two but it's expected that most implementations will 432 // handle the majority of cases in hardware. 433 if (Fast) 434 *Fast = true; 435 return true; 436 } 437 438 switch (SVT) { 439 case MVT::i64: 440 case MVT::i32: 441 if (Fast) 442 *Fast = true; 443 return true; 444 default: 445 return false; 446 } 447 } 448 449 SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 450 SelectionDAG &DAG) const { 451 switch(Op.getOpcode()) { 452 case ISD::LOAD: return lowerLOAD(Op, DAG); 453 case ISD::STORE: return lowerSTORE(Op, DAG); 454 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 455 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 456 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 457 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 458 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 459 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 460 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 461 DAG); 462 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 463 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 464 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 465 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 466 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 467 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 468 case ISD::SELECT: return lowerSELECT(Op, DAG); 469 case ISD::BITCAST: return lowerBITCAST(Op, DAG); 470 } 471 472 return MipsTargetLowering::LowerOperation(Op, DAG); 473 } 474 475 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 476 // 477 // Performs the following transformations: 478 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 479 // sign/zero-extension is completely overwritten by the new one performed by 480 // the ISD::AND. 481 // - Removes redundant zero extensions performed by an ISD::AND. 482 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 483 TargetLowering::DAGCombinerInfo &DCI, 484 const MipsSubtarget &Subtarget) { 485 if (!Subtarget.hasMSA()) 486 return SDValue(); 487 488 SDValue Op0 = N->getOperand(0); 489 SDValue Op1 = N->getOperand(1); 490 unsigned Op0Opcode = Op0->getOpcode(); 491 492 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 493 // where $d + 1 == 2^n and n == 32 494 // or $d + 1 == 2^n and n <= 32 and ZExt 495 // -> (MipsVExtractZExt $a, $b, $c) 496 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 497 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 498 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 499 500 if (!Mask) 501 return SDValue(); 502 503 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 504 505 if (Log2IfPositive <= 0) 506 return SDValue(); // Mask+1 is not a power of 2 507 508 SDValue Op0Op2 = Op0->getOperand(2); 509 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 510 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 511 unsigned Log2 = Log2IfPositive; 512 513 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 514 Log2 == ExtendTySize) { 515 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 516 return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), 517 Op0->getVTList(), 518 makeArrayRef(Ops, Op0->getNumOperands())); 519 } 520 } 521 522 return SDValue(); 523 } 524 525 // Determine if the specified node is a constant vector splat. 526 // 527 // Returns true and sets Imm if: 528 // * N is a ISD::BUILD_VECTOR representing a constant splat 529 // 530 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The 531 // differences are that it assumes the MSA has already been checked and the 532 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and 533 // must not be in order for binsri.d to be selectable). 534 static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { 535 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); 536 537 if (!Node) 538 return false; 539 540 APInt SplatValue, SplatUndef; 541 unsigned SplatBitSize; 542 bool HasAnyUndefs; 543 544 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 545 8, !IsLittleEndian)) 546 return false; 547 548 Imm = SplatValue; 549 550 return true; 551 } 552 553 // Test whether the given node is an all-ones build_vector. 554 static bool isVectorAllOnes(SDValue N) { 555 // Look through bitcasts. Endianness doesn't matter because we are looking 556 // for an all-ones value. 557 if (N->getOpcode() == ISD::BITCAST) 558 N = N->getOperand(0); 559 560 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 561 562 if (!BVN) 563 return false; 564 565 APInt SplatValue, SplatUndef; 566 unsigned SplatBitSize; 567 bool HasAnyUndefs; 568 569 // Endianness doesn't matter in this context because we are looking for 570 // an all-ones value. 571 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) 572 return SplatValue.isAllOnesValue(); 573 574 return false; 575 } 576 577 // Test whether N is the bitwise inverse of OfNode. 578 static bool isBitwiseInverse(SDValue N, SDValue OfNode) { 579 if (N->getOpcode() != ISD::XOR) 580 return false; 581 582 if (isVectorAllOnes(N->getOperand(0))) 583 return N->getOperand(1) == OfNode; 584 585 if (isVectorAllOnes(N->getOperand(1))) 586 return N->getOperand(0) == OfNode; 587 588 return false; 589 } 590 591 // Perform combines where ISD::OR is the root node. 592 // 593 // Performs the following transformations: 594 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) 595 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit 596 // vector type. 597 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 598 TargetLowering::DAGCombinerInfo &DCI, 599 const MipsSubtarget &Subtarget) { 600 if (!Subtarget.hasMSA()) 601 return SDValue(); 602 603 EVT Ty = N->getValueType(0); 604 605 if (!Ty.is128BitVector()) 606 return SDValue(); 607 608 SDValue Op0 = N->getOperand(0); 609 SDValue Op1 = N->getOperand(1); 610 611 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { 612 SDValue Op0Op0 = Op0->getOperand(0); 613 SDValue Op0Op1 = Op0->getOperand(1); 614 SDValue Op1Op0 = Op1->getOperand(0); 615 SDValue Op1Op1 = Op1->getOperand(1); 616 bool IsLittleEndian = !Subtarget.isLittle(); 617 618 SDValue IfSet, IfClr, Cond; 619 bool IsConstantMask = false; 620 APInt Mask, InvMask; 621 622 // If Op0Op0 is an appropriate mask, try to find it's inverse in either 623 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while 624 // looking. 625 // IfClr will be set if we find a valid match. 626 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { 627 Cond = Op0Op0; 628 IfSet = Op0Op1; 629 630 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 631 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 632 IfClr = Op1Op1; 633 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 634 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 635 IfClr = Op1Op0; 636 637 IsConstantMask = true; 638 } 639 640 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same 641 // thing again using this mask. 642 // IfClr will be set if we find a valid match. 643 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { 644 Cond = Op0Op1; 645 IfSet = Op0Op0; 646 647 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 648 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 649 IfClr = Op1Op1; 650 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 651 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 652 IfClr = Op1Op0; 653 654 IsConstantMask = true; 655 } 656 657 // If IfClr is not yet set, try looking for a non-constant match. 658 // IfClr will be set if we find a valid match amongst the eight 659 // possibilities. 660 if (!IfClr.getNode()) { 661 if (isBitwiseInverse(Op0Op0, Op1Op0)) { 662 Cond = Op1Op0; 663 IfSet = Op1Op1; 664 IfClr = Op0Op1; 665 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { 666 Cond = Op1Op0; 667 IfSet = Op1Op1; 668 IfClr = Op0Op0; 669 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { 670 Cond = Op1Op1; 671 IfSet = Op1Op0; 672 IfClr = Op0Op1; 673 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { 674 Cond = Op1Op1; 675 IfSet = Op1Op0; 676 IfClr = Op0Op0; 677 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { 678 Cond = Op0Op0; 679 IfSet = Op0Op1; 680 IfClr = Op1Op1; 681 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { 682 Cond = Op0Op0; 683 IfSet = Op0Op1; 684 IfClr = Op1Op0; 685 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { 686 Cond = Op0Op1; 687 IfSet = Op0Op0; 688 IfClr = Op1Op1; 689 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { 690 Cond = Op0Op1; 691 IfSet = Op0Op0; 692 IfClr = Op1Op0; 693 } 694 } 695 696 // At this point, IfClr will be set if we have a valid match. 697 if (!IfClr.getNode()) 698 return SDValue(); 699 700 assert(Cond.getNode() && IfSet.getNode()); 701 702 // Fold degenerate cases. 703 if (IsConstantMask) { 704 if (Mask.isAllOnesValue()) 705 return IfSet; 706 else if (Mask == 0) 707 return IfClr; 708 } 709 710 // Transform the DAG into an equivalent VSELECT. 711 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr); 712 } 713 714 return SDValue(); 715 } 716 717 static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, 718 SelectionDAG &DAG, 719 const MipsSubtarget &Subtarget) { 720 // Estimate the number of operations the below transform will turn a 721 // constant multiply into. The number is approximately equal to the minimal 722 // number of powers of two that constant can be broken down to by adding 723 // or subtracting them. 724 // 725 // If we have taken more than 12[1] / 8[2] steps to attempt the 726 // optimization for a native sized value, it is more than likely that this 727 // optimization will make things worse. 728 // 729 // [1] MIPS64 requires 6 instructions at most to materialize any constant, 730 // multiplication requires at least 4 cycles, but another cycle (or two) 731 // to retrieve the result from the HI/LO registers. 732 // 733 // [2] For MIPS32, more than 8 steps is expensive as the constant could be 734 // materialized in 2 instructions, multiplication requires at least 4 735 // cycles, but another cycle (or two) to retrieve the result from the 736 // HI/LO registers. 737 // 738 // TODO: 739 // - MaxSteps needs to consider the `VT` of the constant for the current 740 // target. 741 // - Consider to perform this optimization after type legalization. 742 // That allows to remove a workaround for types not supported natively. 743 // - Take in account `-Os, -Oz` flags because this optimization 744 // increases code size. 745 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12; 746 747 SmallVector<APInt, 16> WorkStack(1, C); 748 unsigned Steps = 0; 749 unsigned BitWidth = C.getBitWidth(); 750 751 while (!WorkStack.empty()) { 752 APInt Val = WorkStack.pop_back_val(); 753 754 if (Val == 0 || Val == 1) 755 continue; 756 757 if (Steps >= MaxSteps) 758 return false; 759 760 if (Val.isPowerOf2()) { 761 ++Steps; 762 continue; 763 } 764 765 APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); 766 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) 767 : APInt(BitWidth, 1) << C.ceilLogBase2(); 768 if ((Val - Floor).ule(Ceil - Val)) { 769 WorkStack.push_back(Floor); 770 WorkStack.push_back(Val - Floor); 771 } else { 772 WorkStack.push_back(Ceil); 773 WorkStack.push_back(Ceil - Val); 774 } 775 776 ++Steps; 777 } 778 779 // If the value being multiplied is not supported natively, we have to pay 780 // an additional legalization cost, conservatively assume an increase in the 781 // cost of 3 instructions per step. This values for this heuristic were 782 // determined experimentally. 783 unsigned RegisterSize = DAG.getTargetLoweringInfo() 784 .getRegisterType(*DAG.getContext(), VT) 785 .getSizeInBits(); 786 Steps *= (VT.getSizeInBits() != RegisterSize) * 3; 787 if (Steps > 27) 788 return false; 789 790 return true; 791 } 792 793 static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, 794 EVT ShiftTy, SelectionDAG &DAG) { 795 // Return 0. 796 if (C == 0) 797 return DAG.getConstant(0, DL, VT); 798 799 // Return x. 800 if (C == 1) 801 return X; 802 803 // If c is power of 2, return (shl x, log2(c)). 804 if (C.isPowerOf2()) 805 return DAG.getNode(ISD::SHL, DL, VT, X, 806 DAG.getConstant(C.logBase2(), DL, ShiftTy)); 807 808 unsigned BitWidth = C.getBitWidth(); 809 APInt Floor = APInt(BitWidth, 1) << C.logBase2(); 810 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) : 811 APInt(BitWidth, 1) << C.ceilLogBase2(); 812 813 // If |c - floor_c| <= |c - ceil_c|, 814 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 815 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 816 if ((C - Floor).ule(Ceil - C)) { 817 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 818 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 819 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 820 } 821 822 // If |c - floor_c| > |c - ceil_c|, 823 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 824 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 825 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 826 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 827 } 828 829 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 830 const TargetLowering::DAGCombinerInfo &DCI, 831 const MipsSETargetLowering *TL, 832 const MipsSubtarget &Subtarget) { 833 EVT VT = N->getValueType(0); 834 835 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 836 if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( 837 C->getAPIntValue(), VT, DAG, Subtarget)) 838 return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, 839 TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), 840 DAG); 841 842 return SDValue(N, 0); 843 } 844 845 static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 846 SelectionDAG &DAG, 847 const MipsSubtarget &Subtarget) { 848 // See if this is a vector splat immediate node. 849 APInt SplatValue, SplatUndef; 850 unsigned SplatBitSize; 851 bool HasAnyUndefs; 852 unsigned EltSize = Ty.getScalarSizeInBits(); 853 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 854 855 if (!Subtarget.hasDSP()) 856 return SDValue(); 857 858 if (!BV || 859 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 860 EltSize, !Subtarget.isLittle()) || 861 (SplatBitSize != EltSize) || 862 (SplatValue.getZExtValue() >= EltSize)) 863 return SDValue(); 864 865 SDLoc DL(N); 866 return DAG.getNode(Opc, DL, Ty, N->getOperand(0), 867 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32)); 868 } 869 870 static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 871 TargetLowering::DAGCombinerInfo &DCI, 872 const MipsSubtarget &Subtarget) { 873 EVT Ty = N->getValueType(0); 874 875 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 876 return SDValue(); 877 878 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 879 } 880 881 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 882 // constant splats into MipsISD::SHRA_DSP for DSPr2. 883 // 884 // Performs the following transformations: 885 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 886 // sign/zero-extension is completely overwritten by the new one performed by 887 // the ISD::SRA and ISD::SHL nodes. 888 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 889 // sequence. 890 // 891 // See performDSPShiftCombine for more information about the transformation 892 // used for DSPr2. 893 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 894 TargetLowering::DAGCombinerInfo &DCI, 895 const MipsSubtarget &Subtarget) { 896 EVT Ty = N->getValueType(0); 897 898 if (Subtarget.hasMSA()) { 899 SDValue Op0 = N->getOperand(0); 900 SDValue Op1 = N->getOperand(1); 901 902 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 903 // where $d + sizeof($c) == 32 904 // or $d + sizeof($c) <= 32 and SExt 905 // -> (MipsVExtractSExt $a, $b, $c) 906 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 907 SDValue Op0Op0 = Op0->getOperand(0); 908 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 909 910 if (!ShAmount) 911 return SDValue(); 912 913 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 914 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 915 return SDValue(); 916 917 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 918 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 919 920 if (TotalBits == 32 || 921 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 922 TotalBits <= 32)) { 923 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 924 Op0Op0->getOperand(2) }; 925 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), 926 Op0Op0->getVTList(), 927 makeArrayRef(Ops, Op0Op0->getNumOperands())); 928 } 929 } 930 } 931 932 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) 933 return SDValue(); 934 935 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 936 } 937 938 939 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 940 TargetLowering::DAGCombinerInfo &DCI, 941 const MipsSubtarget &Subtarget) { 942 EVT Ty = N->getValueType(0); 943 944 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) 945 return SDValue(); 946 947 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 948 } 949 950 static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 951 bool IsV216 = (Ty == MVT::v2i16); 952 953 switch (CC) { 954 case ISD::SETEQ: 955 case ISD::SETNE: return true; 956 case ISD::SETLT: 957 case ISD::SETLE: 958 case ISD::SETGT: 959 case ISD::SETGE: return IsV216; 960 case ISD::SETULT: 961 case ISD::SETULE: 962 case ISD::SETUGT: 963 case ISD::SETUGE: return !IsV216; 964 default: return false; 965 } 966 } 967 968 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 969 EVT Ty = N->getValueType(0); 970 971 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 972 return SDValue(); 973 974 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 975 return SDValue(); 976 977 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 978 N->getOperand(1), N->getOperand(2)); 979 } 980 981 static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 982 EVT Ty = N->getValueType(0); 983 984 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) { 985 SDValue SetCC = N->getOperand(0); 986 987 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 988 return SDValue(); 989 990 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 991 SetCC.getOperand(0), SetCC.getOperand(1), 992 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 993 } 994 995 return SDValue(); 996 } 997 998 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 999 const MipsSubtarget &Subtarget) { 1000 EVT Ty = N->getValueType(0); 1001 1002 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 1003 // Try the following combines: 1004 // (xor (or $a, $b), (build_vector allones)) 1005 // (xor (or $a, $b), (bitcast (build_vector allones))) 1006 SDValue Op0 = N->getOperand(0); 1007 SDValue Op1 = N->getOperand(1); 1008 SDValue NotOp; 1009 1010 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 1011 NotOp = Op1; 1012 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 1013 NotOp = Op0; 1014 else 1015 return SDValue(); 1016 1017 if (NotOp->getOpcode() == ISD::OR) 1018 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 1019 NotOp->getOperand(1)); 1020 } 1021 1022 return SDValue(); 1023 } 1024 1025 SDValue 1026 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1027 SelectionDAG &DAG = DCI.DAG; 1028 SDValue Val; 1029 1030 switch (N->getOpcode()) { 1031 case ISD::AND: 1032 Val = performANDCombine(N, DAG, DCI, Subtarget); 1033 break; 1034 case ISD::OR: 1035 Val = performORCombine(N, DAG, DCI, Subtarget); 1036 break; 1037 case ISD::MUL: 1038 return performMULCombine(N, DAG, DCI, this, Subtarget); 1039 case ISD::SHL: 1040 Val = performSHLCombine(N, DAG, DCI, Subtarget); 1041 break; 1042 case ISD::SRA: 1043 return performSRACombine(N, DAG, DCI, Subtarget); 1044 case ISD::SRL: 1045 return performSRLCombine(N, DAG, DCI, Subtarget); 1046 case ISD::VSELECT: 1047 return performVSELECTCombine(N, DAG); 1048 case ISD::XOR: 1049 Val = performXORCombine(N, DAG, Subtarget); 1050 break; 1051 case ISD::SETCC: 1052 Val = performSETCCCombine(N, DAG); 1053 break; 1054 } 1055 1056 if (Val.getNode()) { 1057 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; 1058 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n"; 1059 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n"); 1060 return Val; 1061 } 1062 1063 return MipsTargetLowering::PerformDAGCombine(N, DCI); 1064 } 1065 1066 MachineBasicBlock * 1067 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 1068 MachineBasicBlock *BB) const { 1069 switch (MI.getOpcode()) { 1070 default: 1071 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 1072 case Mips::BPOSGE32_PSEUDO: 1073 return emitBPOSGE32(MI, BB); 1074 case Mips::SNZ_B_PSEUDO: 1075 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 1076 case Mips::SNZ_H_PSEUDO: 1077 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 1078 case Mips::SNZ_W_PSEUDO: 1079 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 1080 case Mips::SNZ_D_PSEUDO: 1081 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 1082 case Mips::SNZ_V_PSEUDO: 1083 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 1084 case Mips::SZ_B_PSEUDO: 1085 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 1086 case Mips::SZ_H_PSEUDO: 1087 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 1088 case Mips::SZ_W_PSEUDO: 1089 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 1090 case Mips::SZ_D_PSEUDO: 1091 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 1092 case Mips::SZ_V_PSEUDO: 1093 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 1094 case Mips::COPY_FW_PSEUDO: 1095 return emitCOPY_FW(MI, BB); 1096 case Mips::COPY_FD_PSEUDO: 1097 return emitCOPY_FD(MI, BB); 1098 case Mips::INSERT_FW_PSEUDO: 1099 return emitINSERT_FW(MI, BB); 1100 case Mips::INSERT_FD_PSEUDO: 1101 return emitINSERT_FD(MI, BB); 1102 case Mips::INSERT_B_VIDX_PSEUDO: 1103 case Mips::INSERT_B_VIDX64_PSEUDO: 1104 return emitINSERT_DF_VIDX(MI, BB, 1, false); 1105 case Mips::INSERT_H_VIDX_PSEUDO: 1106 case Mips::INSERT_H_VIDX64_PSEUDO: 1107 return emitINSERT_DF_VIDX(MI, BB, 2, false); 1108 case Mips::INSERT_W_VIDX_PSEUDO: 1109 case Mips::INSERT_W_VIDX64_PSEUDO: 1110 return emitINSERT_DF_VIDX(MI, BB, 4, false); 1111 case Mips::INSERT_D_VIDX_PSEUDO: 1112 case Mips::INSERT_D_VIDX64_PSEUDO: 1113 return emitINSERT_DF_VIDX(MI, BB, 8, false); 1114 case Mips::INSERT_FW_VIDX_PSEUDO: 1115 case Mips::INSERT_FW_VIDX64_PSEUDO: 1116 return emitINSERT_DF_VIDX(MI, BB, 4, true); 1117 case Mips::INSERT_FD_VIDX_PSEUDO: 1118 case Mips::INSERT_FD_VIDX64_PSEUDO: 1119 return emitINSERT_DF_VIDX(MI, BB, 8, true); 1120 case Mips::FILL_FW_PSEUDO: 1121 return emitFILL_FW(MI, BB); 1122 case Mips::FILL_FD_PSEUDO: 1123 return emitFILL_FD(MI, BB); 1124 case Mips::FEXP2_W_1_PSEUDO: 1125 return emitFEXP2_W_1(MI, BB); 1126 case Mips::FEXP2_D_1_PSEUDO: 1127 return emitFEXP2_D_1(MI, BB); 1128 case Mips::ST_F16: 1129 return emitST_F16_PSEUDO(MI, BB); 1130 case Mips::LD_F16: 1131 return emitLD_F16_PSEUDO(MI, BB); 1132 case Mips::MSA_FP_EXTEND_W_PSEUDO: 1133 return emitFPEXTEND_PSEUDO(MI, BB, false); 1134 case Mips::MSA_FP_ROUND_W_PSEUDO: 1135 return emitFPROUND_PSEUDO(MI, BB, false); 1136 case Mips::MSA_FP_EXTEND_D_PSEUDO: 1137 return emitFPEXTEND_PSEUDO(MI, BB, true); 1138 case Mips::MSA_FP_ROUND_D_PSEUDO: 1139 return emitFPROUND_PSEUDO(MI, BB, true); 1140 } 1141 } 1142 1143 bool MipsSETargetLowering::isEligibleForTailCallOptimization( 1144 const CCState &CCInfo, unsigned NextStackOffset, 1145 const MipsFunctionInfo &FI) const { 1146 if (!UseMipsTailCalls) 1147 return false; 1148 1149 // Exception has to be cleared with eret. 1150 if (FI.isISR()) 1151 return false; 1152 1153 // Return false if either the callee or caller has a byval argument. 1154 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) 1155 return false; 1156 1157 // Return true if the callee's argument area is no larger than the 1158 // caller's. 1159 return NextStackOffset <= FI.getIncomingArgSize(); 1160 } 1161 1162 void MipsSETargetLowering:: 1163 getOpndList(SmallVectorImpl<SDValue> &Ops, 1164 std::deque<std::pair<unsigned, SDValue>> &RegsToPass, 1165 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 1166 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, 1167 SDValue Chain) const { 1168 Ops.push_back(Callee); 1169 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 1170 InternalLinkage, IsCallReloc, CLI, Callee, 1171 Chain); 1172 } 1173 1174 SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1175 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 1176 1177 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1178 return MipsTargetLowering::lowerLOAD(Op, DAG); 1179 1180 // Replace a double precision load with two i32 loads and a buildpair64. 1181 SDLoc DL(Op); 1182 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1183 EVT PtrVT = Ptr.getValueType(); 1184 1185 // i32 load from lower address. 1186 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(), 1187 Nd.getAlignment(), Nd.getMemOperand()->getFlags()); 1188 1189 // i32 load from higher address. 1190 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1191 SDValue Hi = DAG.getLoad( 1192 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(), 1193 std::min(Nd.getAlignment(), 4U), Nd.getMemOperand()->getFlags()); 1194 1195 if (!Subtarget.isLittle()) 1196 std::swap(Lo, Hi); 1197 1198 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1199 SDValue Ops[2] = {BP, Hi.getValue(1)}; 1200 return DAG.getMergeValues(Ops, DL); 1201 } 1202 1203 SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1204 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 1205 1206 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1207 return MipsTargetLowering::lowerSTORE(Op, DAG); 1208 1209 // Replace a double precision store with two extractelement64s and i32 stores. 1210 SDLoc DL(Op); 1211 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1212 EVT PtrVT = Ptr.getValueType(); 1213 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1214 Val, DAG.getConstant(0, DL, MVT::i32)); 1215 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1216 Val, DAG.getConstant(1, DL, MVT::i32)); 1217 1218 if (!Subtarget.isLittle()) 1219 std::swap(Lo, Hi); 1220 1221 // i32 store to lower address. 1222 Chain = 1223 DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlignment(), 1224 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1225 1226 // i32 store to higher address. 1227 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1228 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 1229 std::min(Nd.getAlignment(), 4U), 1230 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1231 } 1232 1233 SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op, 1234 SelectionDAG &DAG) const { 1235 SDLoc DL(Op); 1236 MVT Src = Op.getOperand(0).getValueType().getSimpleVT(); 1237 MVT Dest = Op.getValueType().getSimpleVT(); 1238 1239 // Bitcast i64 to double. 1240 if (Src == MVT::i64 && Dest == MVT::f64) { 1241 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, 1242 Op.getOperand(0), DAG.getIntPtrConstant(0, DL)); 1243 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, 1244 Op.getOperand(0), DAG.getIntPtrConstant(1, DL)); 1245 return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1246 } 1247 1248 // Bitcast double to i64. 1249 if (Src == MVT::f64 && Dest == MVT::i64) { 1250 SDValue Lo = 1251 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), 1252 DAG.getConstant(0, DL, MVT::i32)); 1253 SDValue Hi = 1254 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), 1255 DAG.getConstant(1, DL, MVT::i32)); 1256 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1257 } 1258 1259 // Skip other cases of bitcast and use default lowering. 1260 return SDValue(); 1261 } 1262 1263 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 1264 bool HasLo, bool HasHi, 1265 SelectionDAG &DAG) const { 1266 // MIPS32r6/MIPS64r6 removed accumulator based multiplies. 1267 assert(!Subtarget.hasMips32r6()); 1268 1269 EVT Ty = Op.getOperand(0).getValueType(); 1270 SDLoc DL(Op); 1271 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 1272 Op.getOperand(0), Op.getOperand(1)); 1273 SDValue Lo, Hi; 1274 1275 if (HasLo) 1276 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); 1277 if (HasHi) 1278 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); 1279 1280 if (!HasLo || !HasHi) 1281 return HasLo ? Lo : Hi; 1282 1283 SDValue Vals[] = { Lo, Hi }; 1284 return DAG.getMergeValues(Vals, DL); 1285 } 1286 1287 static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { 1288 SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1289 DAG.getConstant(0, DL, MVT::i32)); 1290 SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1291 DAG.getConstant(1, DL, MVT::i32)); 1292 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); 1293 } 1294 1295 static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { 1296 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); 1297 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); 1298 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1299 } 1300 1301 // This function expands mips intrinsic nodes which have 64-bit input operands 1302 // or output values. 1303 // 1304 // out64 = intrinsic-node in64 1305 // => 1306 // lo = copy (extract-element (in64, 0)) 1307 // hi = copy (extract-element (in64, 1)) 1308 // mips-specific-node 1309 // v0 = copy lo 1310 // v1 = copy hi 1311 // out64 = merge-values (v0, v1) 1312 // 1313 static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1314 SDLoc DL(Op); 1315 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 1316 SmallVector<SDValue, 3> Ops; 1317 unsigned OpNo = 0; 1318 1319 // See if Op has a chain input. 1320 if (HasChainIn) 1321 Ops.push_back(Op->getOperand(OpNo++)); 1322 1323 // The next operand is the intrinsic opcode. 1324 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 1325 1326 // See if the next operand has type i64. 1327 SDValue Opnd = Op->getOperand(++OpNo), In64; 1328 1329 if (Opnd.getValueType() == MVT::i64) 1330 In64 = initAccumulator(Opnd, DL, DAG); 1331 else 1332 Ops.push_back(Opnd); 1333 1334 // Push the remaining operands. 1335 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1336 Ops.push_back(Op->getOperand(OpNo)); 1337 1338 // Add In64 to the end of the list. 1339 if (In64.getNode()) 1340 Ops.push_back(In64); 1341 1342 // Scan output. 1343 SmallVector<EVT, 2> ResTys; 1344 1345 for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); 1346 I != E; ++I) 1347 ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); 1348 1349 // Create node. 1350 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); 1351 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1352 1353 if (!HasChainIn) 1354 return Out; 1355 1356 assert(Val->getValueType(1) == MVT::Other); 1357 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1358 return DAG.getMergeValues(Vals, DL); 1359 } 1360 1361 // Lower an MSA copy intrinsic into the specified SelectionDAG node 1362 static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1363 SDLoc DL(Op); 1364 SDValue Vec = Op->getOperand(1); 1365 SDValue Idx = Op->getOperand(2); 1366 EVT ResTy = Op->getValueType(0); 1367 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1368 1369 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1370 DAG.getValueType(EltTy)); 1371 1372 return Result; 1373 } 1374 1375 static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { 1376 EVT ResVecTy = Op->getValueType(0); 1377 EVT ViaVecTy = ResVecTy; 1378 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1379 SDLoc DL(Op); 1380 1381 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and 1382 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating 1383 // lanes. 1384 SDValue LaneA = Op->getOperand(OpNr); 1385 SDValue LaneB; 1386 1387 if (ResVecTy == MVT::v2i64) { 1388 // In case of the index being passed as an immediate value, set the upper 1389 // lane to 0 so that the splati.d instruction can be matched. 1390 if (isa<ConstantSDNode>(LaneA)) 1391 LaneB = DAG.getConstant(0, DL, MVT::i32); 1392 // Having the index passed in a register, set the upper lane to the same 1393 // value as the lower - this results in the BUILD_VECTOR node not being 1394 // expanded through stack. This way we are able to pattern match the set of 1395 // nodes created here to splat.d. 1396 else 1397 LaneB = LaneA; 1398 ViaVecTy = MVT::v4i32; 1399 if(BigEndian) 1400 std::swap(LaneA, LaneB); 1401 } else 1402 LaneB = LaneA; 1403 1404 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, 1405 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; 1406 1407 SDValue Result = DAG.getBuildVector( 1408 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1409 1410 if (ViaVecTy != ResVecTy) { 1411 SDValue One = DAG.getConstant(1, DL, ViaVecTy); 1412 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, 1413 DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); 1414 } 1415 1416 return Result; 1417 } 1418 1419 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, 1420 bool IsSigned = false) { 1421 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp)); 1422 return DAG.getConstant( 1423 APInt(Op->getValueType(0).getScalarType().getSizeInBits(), 1424 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), 1425 SDLoc(Op), Op->getValueType(0)); 1426 } 1427 1428 static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, 1429 bool BigEndian, SelectionDAG &DAG) { 1430 EVT ViaVecTy = VecTy; 1431 SDValue SplatValueA = SplatValue; 1432 SDValue SplatValueB = SplatValue; 1433 SDLoc DL(SplatValue); 1434 1435 if (VecTy == MVT::v2i64) { 1436 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. 1437 ViaVecTy = MVT::v4i32; 1438 1439 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); 1440 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, 1441 DAG.getConstant(32, DL, MVT::i32)); 1442 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); 1443 } 1444 1445 // We currently hold the parts in little endian order. Swap them if 1446 // necessary. 1447 if (BigEndian) 1448 std::swap(SplatValueA, SplatValueB); 1449 1450 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1451 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1452 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1453 SplatValueA, SplatValueB, SplatValueA, SplatValueB }; 1454 1455 SDValue Result = DAG.getBuildVector( 1456 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1457 1458 if (VecTy != ViaVecTy) 1459 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); 1460 1461 return Result; 1462 } 1463 1464 static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, 1465 unsigned Opc, SDValue Imm, 1466 bool BigEndian) { 1467 EVT VecTy = Op->getValueType(0); 1468 SDValue Exp2Imm; 1469 SDLoc DL(Op); 1470 1471 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it 1472 // here for now. 1473 if (VecTy == MVT::v2i64) { 1474 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) { 1475 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); 1476 1477 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL, 1478 MVT::i32); 1479 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32); 1480 1481 if (BigEndian) 1482 std::swap(BitImmLoOp, BitImmHiOp); 1483 1484 Exp2Imm = DAG.getNode( 1485 ISD::BITCAST, DL, MVT::v2i64, 1486 DAG.getBuildVector(MVT::v4i32, DL, 1487 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp})); 1488 } 1489 } 1490 1491 if (!Exp2Imm.getNode()) { 1492 // We couldnt constant fold, do a vector shift instead 1493 1494 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since 1495 // only values 0-63 are valid. 1496 if (VecTy == MVT::v2i64) 1497 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm); 1498 1499 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); 1500 1501 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy), 1502 Exp2Imm); 1503 } 1504 1505 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); 1506 } 1507 1508 static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { 1509 SDLoc DL(Op); 1510 EVT ResTy = Op->getValueType(0); 1511 SDValue Vec = Op->getOperand(2); 1512 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1513 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; 1514 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, 1515 DL, ResEltTy); 1516 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG); 1517 1518 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); 1519 } 1520 1521 static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { 1522 EVT ResTy = Op->getValueType(0); 1523 SDLoc DL(Op); 1524 SDValue One = DAG.getConstant(1, DL, ResTy); 1525 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); 1526 1527 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), 1528 DAG.getNOT(DL, Bit, ResTy)); 1529 } 1530 1531 static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { 1532 SDLoc DL(Op); 1533 EVT ResTy = Op->getValueType(0); 1534 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) 1535 << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue(); 1536 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); 1537 1538 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); 1539 } 1540 1541 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1542 SelectionDAG &DAG) const { 1543 SDLoc DL(Op); 1544 unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue(); 1545 switch (Intrinsic) { 1546 default: 1547 return SDValue(); 1548 case Intrinsic::mips_shilo: 1549 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1550 case Intrinsic::mips_dpau_h_qbl: 1551 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1552 case Intrinsic::mips_dpau_h_qbr: 1553 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1554 case Intrinsic::mips_dpsu_h_qbl: 1555 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1556 case Intrinsic::mips_dpsu_h_qbr: 1557 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1558 case Intrinsic::mips_dpa_w_ph: 1559 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1560 case Intrinsic::mips_dps_w_ph: 1561 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1562 case Intrinsic::mips_dpax_w_ph: 1563 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1564 case Intrinsic::mips_dpsx_w_ph: 1565 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1566 case Intrinsic::mips_mulsa_w_ph: 1567 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1568 case Intrinsic::mips_mult: 1569 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1570 case Intrinsic::mips_multu: 1571 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1572 case Intrinsic::mips_madd: 1573 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1574 case Intrinsic::mips_maddu: 1575 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1576 case Intrinsic::mips_msub: 1577 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1578 case Intrinsic::mips_msubu: 1579 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1580 case Intrinsic::mips_addv_b: 1581 case Intrinsic::mips_addv_h: 1582 case Intrinsic::mips_addv_w: 1583 case Intrinsic::mips_addv_d: 1584 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1585 Op->getOperand(2)); 1586 case Intrinsic::mips_addvi_b: 1587 case Intrinsic::mips_addvi_h: 1588 case Intrinsic::mips_addvi_w: 1589 case Intrinsic::mips_addvi_d: 1590 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1591 lowerMSASplatImm(Op, 2, DAG)); 1592 case Intrinsic::mips_and_v: 1593 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1594 Op->getOperand(2)); 1595 case Intrinsic::mips_andi_b: 1596 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1597 lowerMSASplatImm(Op, 2, DAG)); 1598 case Intrinsic::mips_bclr_b: 1599 case Intrinsic::mips_bclr_h: 1600 case Intrinsic::mips_bclr_w: 1601 case Intrinsic::mips_bclr_d: 1602 return lowerMSABitClear(Op, DAG); 1603 case Intrinsic::mips_bclri_b: 1604 case Intrinsic::mips_bclri_h: 1605 case Intrinsic::mips_bclri_w: 1606 case Intrinsic::mips_bclri_d: 1607 return lowerMSABitClearImm(Op, DAG); 1608 case Intrinsic::mips_binsli_b: 1609 case Intrinsic::mips_binsli_h: 1610 case Intrinsic::mips_binsli_w: 1611 case Intrinsic::mips_binsli_d: { 1612 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) 1613 EVT VecTy = Op->getValueType(0); 1614 EVT EltTy = VecTy.getVectorElementType(); 1615 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1616 report_fatal_error("Immediate out of range"); 1617 APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), 1618 Op->getConstantOperandVal(3) + 1); 1619 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1620 DAG.getConstant(Mask, DL, VecTy, true), 1621 Op->getOperand(2), Op->getOperand(1)); 1622 } 1623 case Intrinsic::mips_binsri_b: 1624 case Intrinsic::mips_binsri_h: 1625 case Intrinsic::mips_binsri_w: 1626 case Intrinsic::mips_binsri_d: { 1627 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) 1628 EVT VecTy = Op->getValueType(0); 1629 EVT EltTy = VecTy.getVectorElementType(); 1630 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1631 report_fatal_error("Immediate out of range"); 1632 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), 1633 Op->getConstantOperandVal(3) + 1); 1634 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1635 DAG.getConstant(Mask, DL, VecTy, true), 1636 Op->getOperand(2), Op->getOperand(1)); 1637 } 1638 case Intrinsic::mips_bmnz_v: 1639 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1640 Op->getOperand(2), Op->getOperand(1)); 1641 case Intrinsic::mips_bmnzi_b: 1642 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1643 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), 1644 Op->getOperand(1)); 1645 case Intrinsic::mips_bmz_v: 1646 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1647 Op->getOperand(1), Op->getOperand(2)); 1648 case Intrinsic::mips_bmzi_b: 1649 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1650 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), 1651 Op->getOperand(2)); 1652 case Intrinsic::mips_bneg_b: 1653 case Intrinsic::mips_bneg_h: 1654 case Intrinsic::mips_bneg_w: 1655 case Intrinsic::mips_bneg_d: { 1656 EVT VecTy = Op->getValueType(0); 1657 SDValue One = DAG.getConstant(1, DL, VecTy); 1658 1659 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), 1660 DAG.getNode(ISD::SHL, DL, VecTy, One, 1661 truncateVecElts(Op, DAG))); 1662 } 1663 case Intrinsic::mips_bnegi_b: 1664 case Intrinsic::mips_bnegi_h: 1665 case Intrinsic::mips_bnegi_w: 1666 case Intrinsic::mips_bnegi_d: 1667 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), 1668 !Subtarget.isLittle()); 1669 case Intrinsic::mips_bnz_b: 1670 case Intrinsic::mips_bnz_h: 1671 case Intrinsic::mips_bnz_w: 1672 case Intrinsic::mips_bnz_d: 1673 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1674 Op->getOperand(1)); 1675 case Intrinsic::mips_bnz_v: 1676 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1677 Op->getOperand(1)); 1678 case Intrinsic::mips_bsel_v: 1679 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1680 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1681 Op->getOperand(1), Op->getOperand(3), 1682 Op->getOperand(2)); 1683 case Intrinsic::mips_bseli_b: 1684 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1685 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1686 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG), 1687 Op->getOperand(2)); 1688 case Intrinsic::mips_bset_b: 1689 case Intrinsic::mips_bset_h: 1690 case Intrinsic::mips_bset_w: 1691 case Intrinsic::mips_bset_d: { 1692 EVT VecTy = Op->getValueType(0); 1693 SDValue One = DAG.getConstant(1, DL, VecTy); 1694 1695 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), 1696 DAG.getNode(ISD::SHL, DL, VecTy, One, 1697 truncateVecElts(Op, DAG))); 1698 } 1699 case Intrinsic::mips_bseti_b: 1700 case Intrinsic::mips_bseti_h: 1701 case Intrinsic::mips_bseti_w: 1702 case Intrinsic::mips_bseti_d: 1703 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), 1704 !Subtarget.isLittle()); 1705 case Intrinsic::mips_bz_b: 1706 case Intrinsic::mips_bz_h: 1707 case Intrinsic::mips_bz_w: 1708 case Intrinsic::mips_bz_d: 1709 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1710 Op->getOperand(1)); 1711 case Intrinsic::mips_bz_v: 1712 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1713 Op->getOperand(1)); 1714 case Intrinsic::mips_ceq_b: 1715 case Intrinsic::mips_ceq_h: 1716 case Intrinsic::mips_ceq_w: 1717 case Intrinsic::mips_ceq_d: 1718 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1719 Op->getOperand(2), ISD::SETEQ); 1720 case Intrinsic::mips_ceqi_b: 1721 case Intrinsic::mips_ceqi_h: 1722 case Intrinsic::mips_ceqi_w: 1723 case Intrinsic::mips_ceqi_d: 1724 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1725 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ); 1726 case Intrinsic::mips_cle_s_b: 1727 case Intrinsic::mips_cle_s_h: 1728 case Intrinsic::mips_cle_s_w: 1729 case Intrinsic::mips_cle_s_d: 1730 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1731 Op->getOperand(2), ISD::SETLE); 1732 case Intrinsic::mips_clei_s_b: 1733 case Intrinsic::mips_clei_s_h: 1734 case Intrinsic::mips_clei_s_w: 1735 case Intrinsic::mips_clei_s_d: 1736 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1737 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE); 1738 case Intrinsic::mips_cle_u_b: 1739 case Intrinsic::mips_cle_u_h: 1740 case Intrinsic::mips_cle_u_w: 1741 case Intrinsic::mips_cle_u_d: 1742 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1743 Op->getOperand(2), ISD::SETULE); 1744 case Intrinsic::mips_clei_u_b: 1745 case Intrinsic::mips_clei_u_h: 1746 case Intrinsic::mips_clei_u_w: 1747 case Intrinsic::mips_clei_u_d: 1748 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1749 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1750 case Intrinsic::mips_clt_s_b: 1751 case Intrinsic::mips_clt_s_h: 1752 case Intrinsic::mips_clt_s_w: 1753 case Intrinsic::mips_clt_s_d: 1754 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1755 Op->getOperand(2), ISD::SETLT); 1756 case Intrinsic::mips_clti_s_b: 1757 case Intrinsic::mips_clti_s_h: 1758 case Intrinsic::mips_clti_s_w: 1759 case Intrinsic::mips_clti_s_d: 1760 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1761 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT); 1762 case Intrinsic::mips_clt_u_b: 1763 case Intrinsic::mips_clt_u_h: 1764 case Intrinsic::mips_clt_u_w: 1765 case Intrinsic::mips_clt_u_d: 1766 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1767 Op->getOperand(2), ISD::SETULT); 1768 case Intrinsic::mips_clti_u_b: 1769 case Intrinsic::mips_clti_u_h: 1770 case Intrinsic::mips_clti_u_w: 1771 case Intrinsic::mips_clti_u_d: 1772 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1773 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1774 case Intrinsic::mips_copy_s_b: 1775 case Intrinsic::mips_copy_s_h: 1776 case Intrinsic::mips_copy_s_w: 1777 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1778 case Intrinsic::mips_copy_s_d: 1779 if (Subtarget.hasMips64()) 1780 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. 1781 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1782 else { 1783 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1784 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1785 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1786 Op->getValueType(0), Op->getOperand(1), 1787 Op->getOperand(2)); 1788 } 1789 case Intrinsic::mips_copy_u_b: 1790 case Intrinsic::mips_copy_u_h: 1791 case Intrinsic::mips_copy_u_w: 1792 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1793 case Intrinsic::mips_copy_u_d: 1794 if (Subtarget.hasMips64()) 1795 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. 1796 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1797 else { 1798 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1799 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1800 // Note: When i64 is illegal, this results in copy_s.w instructions 1801 // instead of copy_u.w instructions. This makes no difference to the 1802 // behaviour since i64 is only illegal when the register file is 32-bit. 1803 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1804 Op->getValueType(0), Op->getOperand(1), 1805 Op->getOperand(2)); 1806 } 1807 case Intrinsic::mips_div_s_b: 1808 case Intrinsic::mips_div_s_h: 1809 case Intrinsic::mips_div_s_w: 1810 case Intrinsic::mips_div_s_d: 1811 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1812 Op->getOperand(2)); 1813 case Intrinsic::mips_div_u_b: 1814 case Intrinsic::mips_div_u_h: 1815 case Intrinsic::mips_div_u_w: 1816 case Intrinsic::mips_div_u_d: 1817 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1818 Op->getOperand(2)); 1819 case Intrinsic::mips_fadd_w: 1820 case Intrinsic::mips_fadd_d: 1821 // TODO: If intrinsics have fast-math-flags, propagate them. 1822 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1823 Op->getOperand(2)); 1824 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1825 case Intrinsic::mips_fceq_w: 1826 case Intrinsic::mips_fceq_d: 1827 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1828 Op->getOperand(2), ISD::SETOEQ); 1829 case Intrinsic::mips_fcle_w: 1830 case Intrinsic::mips_fcle_d: 1831 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1832 Op->getOperand(2), ISD::SETOLE); 1833 case Intrinsic::mips_fclt_w: 1834 case Intrinsic::mips_fclt_d: 1835 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1836 Op->getOperand(2), ISD::SETOLT); 1837 case Intrinsic::mips_fcne_w: 1838 case Intrinsic::mips_fcne_d: 1839 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1840 Op->getOperand(2), ISD::SETONE); 1841 case Intrinsic::mips_fcor_w: 1842 case Intrinsic::mips_fcor_d: 1843 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1844 Op->getOperand(2), ISD::SETO); 1845 case Intrinsic::mips_fcueq_w: 1846 case Intrinsic::mips_fcueq_d: 1847 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1848 Op->getOperand(2), ISD::SETUEQ); 1849 case Intrinsic::mips_fcule_w: 1850 case Intrinsic::mips_fcule_d: 1851 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1852 Op->getOperand(2), ISD::SETULE); 1853 case Intrinsic::mips_fcult_w: 1854 case Intrinsic::mips_fcult_d: 1855 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1856 Op->getOperand(2), ISD::SETULT); 1857 case Intrinsic::mips_fcun_w: 1858 case Intrinsic::mips_fcun_d: 1859 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1860 Op->getOperand(2), ISD::SETUO); 1861 case Intrinsic::mips_fcune_w: 1862 case Intrinsic::mips_fcune_d: 1863 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1864 Op->getOperand(2), ISD::SETUNE); 1865 case Intrinsic::mips_fdiv_w: 1866 case Intrinsic::mips_fdiv_d: 1867 // TODO: If intrinsics have fast-math-flags, propagate them. 1868 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1869 Op->getOperand(2)); 1870 case Intrinsic::mips_ffint_u_w: 1871 case Intrinsic::mips_ffint_u_d: 1872 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), 1873 Op->getOperand(1)); 1874 case Intrinsic::mips_ffint_s_w: 1875 case Intrinsic::mips_ffint_s_d: 1876 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), 1877 Op->getOperand(1)); 1878 case Intrinsic::mips_fill_b: 1879 case Intrinsic::mips_fill_h: 1880 case Intrinsic::mips_fill_w: 1881 case Intrinsic::mips_fill_d: { 1882 EVT ResTy = Op->getValueType(0); 1883 SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(), 1884 Op->getOperand(1)); 1885 1886 // If ResTy is v2i64 then the type legalizer will break this node down into 1887 // an equivalent v4i32. 1888 return DAG.getBuildVector(ResTy, DL, Ops); 1889 } 1890 case Intrinsic::mips_fexp2_w: 1891 case Intrinsic::mips_fexp2_d: { 1892 // TODO: If intrinsics have fast-math-flags, propagate them. 1893 EVT ResTy = Op->getValueType(0); 1894 return DAG.getNode( 1895 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), 1896 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); 1897 } 1898 case Intrinsic::mips_flog2_w: 1899 case Intrinsic::mips_flog2_d: 1900 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1901 case Intrinsic::mips_fmadd_w: 1902 case Intrinsic::mips_fmadd_d: 1903 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), 1904 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1905 case Intrinsic::mips_fmul_w: 1906 case Intrinsic::mips_fmul_d: 1907 // TODO: If intrinsics have fast-math-flags, propagate them. 1908 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1909 Op->getOperand(2)); 1910 case Intrinsic::mips_fmsub_w: 1911 case Intrinsic::mips_fmsub_d: { 1912 // TODO: If intrinsics have fast-math-flags, propagate them. 1913 return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0), 1914 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1915 } 1916 case Intrinsic::mips_frint_w: 1917 case Intrinsic::mips_frint_d: 1918 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1919 case Intrinsic::mips_fsqrt_w: 1920 case Intrinsic::mips_fsqrt_d: 1921 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1922 case Intrinsic::mips_fsub_w: 1923 case Intrinsic::mips_fsub_d: 1924 // TODO: If intrinsics have fast-math-flags, propagate them. 1925 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1926 Op->getOperand(2)); 1927 case Intrinsic::mips_ftrunc_u_w: 1928 case Intrinsic::mips_ftrunc_u_d: 1929 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), 1930 Op->getOperand(1)); 1931 case Intrinsic::mips_ftrunc_s_w: 1932 case Intrinsic::mips_ftrunc_s_d: 1933 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), 1934 Op->getOperand(1)); 1935 case Intrinsic::mips_ilvev_b: 1936 case Intrinsic::mips_ilvev_h: 1937 case Intrinsic::mips_ilvev_w: 1938 case Intrinsic::mips_ilvev_d: 1939 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1940 Op->getOperand(1), Op->getOperand(2)); 1941 case Intrinsic::mips_ilvl_b: 1942 case Intrinsic::mips_ilvl_h: 1943 case Intrinsic::mips_ilvl_w: 1944 case Intrinsic::mips_ilvl_d: 1945 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1946 Op->getOperand(1), Op->getOperand(2)); 1947 case Intrinsic::mips_ilvod_b: 1948 case Intrinsic::mips_ilvod_h: 1949 case Intrinsic::mips_ilvod_w: 1950 case Intrinsic::mips_ilvod_d: 1951 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 1952 Op->getOperand(1), Op->getOperand(2)); 1953 case Intrinsic::mips_ilvr_b: 1954 case Intrinsic::mips_ilvr_h: 1955 case Intrinsic::mips_ilvr_w: 1956 case Intrinsic::mips_ilvr_d: 1957 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 1958 Op->getOperand(1), Op->getOperand(2)); 1959 case Intrinsic::mips_insert_b: 1960 case Intrinsic::mips_insert_h: 1961 case Intrinsic::mips_insert_w: 1962 case Intrinsic::mips_insert_d: 1963 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1964 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 1965 case Intrinsic::mips_insve_b: 1966 case Intrinsic::mips_insve_h: 1967 case Intrinsic::mips_insve_w: 1968 case Intrinsic::mips_insve_d: { 1969 // Report an error for out of range values. 1970 int64_t Max; 1971 switch (Intrinsic) { 1972 case Intrinsic::mips_insve_b: Max = 15; break; 1973 case Intrinsic::mips_insve_h: Max = 7; break; 1974 case Intrinsic::mips_insve_w: Max = 3; break; 1975 case Intrinsic::mips_insve_d: Max = 1; break; 1976 default: llvm_unreachable("Unmatched intrinsic"); 1977 } 1978 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 1979 if (Value < 0 || Value > Max) 1980 report_fatal_error("Immediate out of range"); 1981 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), 1982 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), 1983 DAG.getConstant(0, DL, MVT::i32)); 1984 } 1985 case Intrinsic::mips_ldi_b: 1986 case Intrinsic::mips_ldi_h: 1987 case Intrinsic::mips_ldi_w: 1988 case Intrinsic::mips_ldi_d: 1989 return lowerMSASplatImm(Op, 1, DAG, true); 1990 case Intrinsic::mips_lsa: 1991 case Intrinsic::mips_dlsa: { 1992 EVT ResTy = Op->getValueType(0); 1993 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1994 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, 1995 Op->getOperand(2), Op->getOperand(3))); 1996 } 1997 case Intrinsic::mips_maddv_b: 1998 case Intrinsic::mips_maddv_h: 1999 case Intrinsic::mips_maddv_w: 2000 case Intrinsic::mips_maddv_d: { 2001 EVT ResTy = Op->getValueType(0); 2002 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 2003 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2004 Op->getOperand(2), Op->getOperand(3))); 2005 } 2006 case Intrinsic::mips_max_s_b: 2007 case Intrinsic::mips_max_s_h: 2008 case Intrinsic::mips_max_s_w: 2009 case Intrinsic::mips_max_s_d: 2010 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 2011 Op->getOperand(1), Op->getOperand(2)); 2012 case Intrinsic::mips_max_u_b: 2013 case Intrinsic::mips_max_u_h: 2014 case Intrinsic::mips_max_u_w: 2015 case Intrinsic::mips_max_u_d: 2016 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 2017 Op->getOperand(1), Op->getOperand(2)); 2018 case Intrinsic::mips_maxi_s_b: 2019 case Intrinsic::mips_maxi_s_h: 2020 case Intrinsic::mips_maxi_s_w: 2021 case Intrinsic::mips_maxi_s_d: 2022 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 2023 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2024 case Intrinsic::mips_maxi_u_b: 2025 case Intrinsic::mips_maxi_u_h: 2026 case Intrinsic::mips_maxi_u_w: 2027 case Intrinsic::mips_maxi_u_d: 2028 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 2029 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2030 case Intrinsic::mips_min_s_b: 2031 case Intrinsic::mips_min_s_h: 2032 case Intrinsic::mips_min_s_w: 2033 case Intrinsic::mips_min_s_d: 2034 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 2035 Op->getOperand(1), Op->getOperand(2)); 2036 case Intrinsic::mips_min_u_b: 2037 case Intrinsic::mips_min_u_h: 2038 case Intrinsic::mips_min_u_w: 2039 case Intrinsic::mips_min_u_d: 2040 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2041 Op->getOperand(1), Op->getOperand(2)); 2042 case Intrinsic::mips_mini_s_b: 2043 case Intrinsic::mips_mini_s_h: 2044 case Intrinsic::mips_mini_s_w: 2045 case Intrinsic::mips_mini_s_d: 2046 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 2047 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2048 case Intrinsic::mips_mini_u_b: 2049 case Intrinsic::mips_mini_u_h: 2050 case Intrinsic::mips_mini_u_w: 2051 case Intrinsic::mips_mini_u_d: 2052 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2053 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2054 case Intrinsic::mips_mod_s_b: 2055 case Intrinsic::mips_mod_s_h: 2056 case Intrinsic::mips_mod_s_w: 2057 case Intrinsic::mips_mod_s_d: 2058 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 2059 Op->getOperand(2)); 2060 case Intrinsic::mips_mod_u_b: 2061 case Intrinsic::mips_mod_u_h: 2062 case Intrinsic::mips_mod_u_w: 2063 case Intrinsic::mips_mod_u_d: 2064 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 2065 Op->getOperand(2)); 2066 case Intrinsic::mips_mulv_b: 2067 case Intrinsic::mips_mulv_h: 2068 case Intrinsic::mips_mulv_w: 2069 case Intrinsic::mips_mulv_d: 2070 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 2071 Op->getOperand(2)); 2072 case Intrinsic::mips_msubv_b: 2073 case Intrinsic::mips_msubv_h: 2074 case Intrinsic::mips_msubv_w: 2075 case Intrinsic::mips_msubv_d: { 2076 EVT ResTy = Op->getValueType(0); 2077 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), 2078 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2079 Op->getOperand(2), Op->getOperand(3))); 2080 } 2081 case Intrinsic::mips_nlzc_b: 2082 case Intrinsic::mips_nlzc_h: 2083 case Intrinsic::mips_nlzc_w: 2084 case Intrinsic::mips_nlzc_d: 2085 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 2086 case Intrinsic::mips_nor_v: { 2087 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2088 Op->getOperand(1), Op->getOperand(2)); 2089 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2090 } 2091 case Intrinsic::mips_nori_b: { 2092 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2093 Op->getOperand(1), 2094 lowerMSASplatImm(Op, 2, DAG)); 2095 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2096 } 2097 case Intrinsic::mips_or_v: 2098 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 2099 Op->getOperand(2)); 2100 case Intrinsic::mips_ori_b: 2101 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2102 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2103 case Intrinsic::mips_pckev_b: 2104 case Intrinsic::mips_pckev_h: 2105 case Intrinsic::mips_pckev_w: 2106 case Intrinsic::mips_pckev_d: 2107 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 2108 Op->getOperand(1), Op->getOperand(2)); 2109 case Intrinsic::mips_pckod_b: 2110 case Intrinsic::mips_pckod_h: 2111 case Intrinsic::mips_pckod_w: 2112 case Intrinsic::mips_pckod_d: 2113 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 2114 Op->getOperand(1), Op->getOperand(2)); 2115 case Intrinsic::mips_pcnt_b: 2116 case Intrinsic::mips_pcnt_h: 2117 case Intrinsic::mips_pcnt_w: 2118 case Intrinsic::mips_pcnt_d: 2119 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 2120 case Intrinsic::mips_sat_s_b: 2121 case Intrinsic::mips_sat_s_h: 2122 case Intrinsic::mips_sat_s_w: 2123 case Intrinsic::mips_sat_s_d: 2124 case Intrinsic::mips_sat_u_b: 2125 case Intrinsic::mips_sat_u_h: 2126 case Intrinsic::mips_sat_u_w: 2127 case Intrinsic::mips_sat_u_d: { 2128 // Report an error for out of range values. 2129 int64_t Max; 2130 switch (Intrinsic) { 2131 case Intrinsic::mips_sat_s_b: 2132 case Intrinsic::mips_sat_u_b: Max = 7; break; 2133 case Intrinsic::mips_sat_s_h: 2134 case Intrinsic::mips_sat_u_h: Max = 15; break; 2135 case Intrinsic::mips_sat_s_w: 2136 case Intrinsic::mips_sat_u_w: Max = 31; break; 2137 case Intrinsic::mips_sat_s_d: 2138 case Intrinsic::mips_sat_u_d: Max = 63; break; 2139 default: llvm_unreachable("Unmatched intrinsic"); 2140 } 2141 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2142 if (Value < 0 || Value > Max) 2143 report_fatal_error("Immediate out of range"); 2144 return SDValue(); 2145 } 2146 case Intrinsic::mips_shf_b: 2147 case Intrinsic::mips_shf_h: 2148 case Intrinsic::mips_shf_w: { 2149 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2150 if (Value < 0 || Value > 255) 2151 report_fatal_error("Immediate out of range"); 2152 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 2153 Op->getOperand(2), Op->getOperand(1)); 2154 } 2155 case Intrinsic::mips_sldi_b: 2156 case Intrinsic::mips_sldi_h: 2157 case Intrinsic::mips_sldi_w: 2158 case Intrinsic::mips_sldi_d: { 2159 // Report an error for out of range values. 2160 int64_t Max; 2161 switch (Intrinsic) { 2162 case Intrinsic::mips_sldi_b: Max = 15; break; 2163 case Intrinsic::mips_sldi_h: Max = 7; break; 2164 case Intrinsic::mips_sldi_w: Max = 3; break; 2165 case Intrinsic::mips_sldi_d: Max = 1; break; 2166 default: llvm_unreachable("Unmatched intrinsic"); 2167 } 2168 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue(); 2169 if (Value < 0 || Value > Max) 2170 report_fatal_error("Immediate out of range"); 2171 return SDValue(); 2172 } 2173 case Intrinsic::mips_sll_b: 2174 case Intrinsic::mips_sll_h: 2175 case Intrinsic::mips_sll_w: 2176 case Intrinsic::mips_sll_d: 2177 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 2178 truncateVecElts(Op, DAG)); 2179 case Intrinsic::mips_slli_b: 2180 case Intrinsic::mips_slli_h: 2181 case Intrinsic::mips_slli_w: 2182 case Intrinsic::mips_slli_d: 2183 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 2184 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2185 case Intrinsic::mips_splat_b: 2186 case Intrinsic::mips_splat_h: 2187 case Intrinsic::mips_splat_w: 2188 case Intrinsic::mips_splat_d: 2189 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle 2190 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because 2191 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. 2192 // Instead we lower to MipsISD::VSHF and match from there. 2193 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2194 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1), 2195 Op->getOperand(1)); 2196 case Intrinsic::mips_splati_b: 2197 case Intrinsic::mips_splati_h: 2198 case Intrinsic::mips_splati_w: 2199 case Intrinsic::mips_splati_d: 2200 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2201 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 2202 Op->getOperand(1)); 2203 case Intrinsic::mips_sra_b: 2204 case Intrinsic::mips_sra_h: 2205 case Intrinsic::mips_sra_w: 2206 case Intrinsic::mips_sra_d: 2207 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 2208 truncateVecElts(Op, DAG)); 2209 case Intrinsic::mips_srai_b: 2210 case Intrinsic::mips_srai_h: 2211 case Intrinsic::mips_srai_w: 2212 case Intrinsic::mips_srai_d: 2213 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 2214 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2215 case Intrinsic::mips_srari_b: 2216 case Intrinsic::mips_srari_h: 2217 case Intrinsic::mips_srari_w: 2218 case Intrinsic::mips_srari_d: { 2219 // Report an error for out of range values. 2220 int64_t Max; 2221 switch (Intrinsic) { 2222 case Intrinsic::mips_srari_b: Max = 7; break; 2223 case Intrinsic::mips_srari_h: Max = 15; break; 2224 case Intrinsic::mips_srari_w: Max = 31; break; 2225 case Intrinsic::mips_srari_d: Max = 63; break; 2226 default: llvm_unreachable("Unmatched intrinsic"); 2227 } 2228 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2229 if (Value < 0 || Value > Max) 2230 report_fatal_error("Immediate out of range"); 2231 return SDValue(); 2232 } 2233 case Intrinsic::mips_srl_b: 2234 case Intrinsic::mips_srl_h: 2235 case Intrinsic::mips_srl_w: 2236 case Intrinsic::mips_srl_d: 2237 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 2238 truncateVecElts(Op, DAG)); 2239 case Intrinsic::mips_srli_b: 2240 case Intrinsic::mips_srli_h: 2241 case Intrinsic::mips_srli_w: 2242 case Intrinsic::mips_srli_d: 2243 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 2244 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2245 case Intrinsic::mips_srlri_b: 2246 case Intrinsic::mips_srlri_h: 2247 case Intrinsic::mips_srlri_w: 2248 case Intrinsic::mips_srlri_d: { 2249 // Report an error for out of range values. 2250 int64_t Max; 2251 switch (Intrinsic) { 2252 case Intrinsic::mips_srlri_b: Max = 7; break; 2253 case Intrinsic::mips_srlri_h: Max = 15; break; 2254 case Intrinsic::mips_srlri_w: Max = 31; break; 2255 case Intrinsic::mips_srlri_d: Max = 63; break; 2256 default: llvm_unreachable("Unmatched intrinsic"); 2257 } 2258 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2259 if (Value < 0 || Value > Max) 2260 report_fatal_error("Immediate out of range"); 2261 return SDValue(); 2262 } 2263 case Intrinsic::mips_subv_b: 2264 case Intrinsic::mips_subv_h: 2265 case Intrinsic::mips_subv_w: 2266 case Intrinsic::mips_subv_d: 2267 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 2268 Op->getOperand(2)); 2269 case Intrinsic::mips_subvi_b: 2270 case Intrinsic::mips_subvi_h: 2271 case Intrinsic::mips_subvi_w: 2272 case Intrinsic::mips_subvi_d: 2273 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 2274 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2275 case Intrinsic::mips_vshf_b: 2276 case Intrinsic::mips_vshf_h: 2277 case Intrinsic::mips_vshf_w: 2278 case Intrinsic::mips_vshf_d: 2279 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2280 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 2281 case Intrinsic::mips_xor_v: 2282 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 2283 Op->getOperand(2)); 2284 case Intrinsic::mips_xori_b: 2285 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 2286 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2287 case Intrinsic::thread_pointer: { 2288 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2289 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT); 2290 } 2291 } 2292 } 2293 2294 static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2295 const MipsSubtarget &Subtarget) { 2296 SDLoc DL(Op); 2297 SDValue ChainIn = Op->getOperand(0); 2298 SDValue Address = Op->getOperand(2); 2299 SDValue Offset = Op->getOperand(3); 2300 EVT ResTy = Op->getValueType(0); 2301 EVT PtrTy = Address->getValueType(0); 2302 2303 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2304 // however takes an i32 signed constant offset. The actual type of the 2305 // intrinsic is a scaled signed i10. 2306 if (Subtarget.isABI_N64()) 2307 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2308 2309 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2310 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), 2311 /* Alignment = */ 16); 2312 } 2313 2314 SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2315 SelectionDAG &DAG) const { 2316 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2317 switch (Intr) { 2318 default: 2319 return SDValue(); 2320 case Intrinsic::mips_extp: 2321 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 2322 case Intrinsic::mips_extpdp: 2323 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 2324 case Intrinsic::mips_extr_w: 2325 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 2326 case Intrinsic::mips_extr_r_w: 2327 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 2328 case Intrinsic::mips_extr_rs_w: 2329 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 2330 case Intrinsic::mips_extr_s_h: 2331 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 2332 case Intrinsic::mips_mthlip: 2333 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 2334 case Intrinsic::mips_mulsaq_s_w_ph: 2335 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 2336 case Intrinsic::mips_maq_s_w_phl: 2337 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 2338 case Intrinsic::mips_maq_s_w_phr: 2339 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 2340 case Intrinsic::mips_maq_sa_w_phl: 2341 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 2342 case Intrinsic::mips_maq_sa_w_phr: 2343 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 2344 case Intrinsic::mips_dpaq_s_w_ph: 2345 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 2346 case Intrinsic::mips_dpsq_s_w_ph: 2347 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 2348 case Intrinsic::mips_dpaq_sa_l_w: 2349 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 2350 case Intrinsic::mips_dpsq_sa_l_w: 2351 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 2352 case Intrinsic::mips_dpaqx_s_w_ph: 2353 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 2354 case Intrinsic::mips_dpaqx_sa_w_ph: 2355 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 2356 case Intrinsic::mips_dpsqx_s_w_ph: 2357 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 2358 case Intrinsic::mips_dpsqx_sa_w_ph: 2359 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 2360 case Intrinsic::mips_ld_b: 2361 case Intrinsic::mips_ld_h: 2362 case Intrinsic::mips_ld_w: 2363 case Intrinsic::mips_ld_d: 2364 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget); 2365 } 2366 } 2367 2368 static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2369 const MipsSubtarget &Subtarget) { 2370 SDLoc DL(Op); 2371 SDValue ChainIn = Op->getOperand(0); 2372 SDValue Value = Op->getOperand(2); 2373 SDValue Address = Op->getOperand(3); 2374 SDValue Offset = Op->getOperand(4); 2375 EVT PtrTy = Address->getValueType(0); 2376 2377 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2378 // however takes an i32 signed constant offset. The actual type of the 2379 // intrinsic is a scaled signed i10. 2380 if (Subtarget.isABI_N64()) 2381 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2382 2383 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2384 2385 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), 2386 /* Alignment = */ 16); 2387 } 2388 2389 SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2390 SelectionDAG &DAG) const { 2391 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2392 switch (Intr) { 2393 default: 2394 return SDValue(); 2395 case Intrinsic::mips_st_b: 2396 case Intrinsic::mips_st_h: 2397 case Intrinsic::mips_st_w: 2398 case Intrinsic::mips_st_d: 2399 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget); 2400 } 2401 } 2402 2403 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 2404 // 2405 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 2406 // choose to sign-extend but we could have equally chosen zero-extend. The 2407 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 2408 // result into this node later (possibly changing it to a zero-extend in the 2409 // process). 2410 SDValue MipsSETargetLowering:: 2411 lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 2412 SDLoc DL(Op); 2413 EVT ResTy = Op->getValueType(0); 2414 SDValue Op0 = Op->getOperand(0); 2415 EVT VecTy = Op0->getValueType(0); 2416 2417 if (!VecTy.is128BitVector()) 2418 return SDValue(); 2419 2420 if (ResTy.isInteger()) { 2421 SDValue Op1 = Op->getOperand(1); 2422 EVT EltTy = VecTy.getVectorElementType(); 2423 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 2424 DAG.getValueType(EltTy)); 2425 } 2426 2427 return Op; 2428 } 2429 2430 static bool isConstantOrUndef(const SDValue Op) { 2431 if (Op->isUndef()) 2432 return true; 2433 if (isa<ConstantSDNode>(Op)) 2434 return true; 2435 if (isa<ConstantFPSDNode>(Op)) 2436 return true; 2437 return false; 2438 } 2439 2440 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 2441 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 2442 if (isConstantOrUndef(Op->getOperand(i))) 2443 return true; 2444 return false; 2445 } 2446 2447 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 2448 // backend. 2449 // 2450 // Lowers according to the following rules: 2451 // - Constant splats are legal as-is as long as the SplatBitSize is a power of 2452 // 2 less than or equal to 64 and the value fits into a signed 10-bit 2453 // immediate 2454 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 2455 // is a power of 2 less than or equal to 64 and the value does not fit into a 2456 // signed 10-bit immediate 2457 // - Non-constant splats are legal as-is. 2458 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 2459 // - All others are illegal and must be expanded. 2460 SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 2461 SelectionDAG &DAG) const { 2462 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 2463 EVT ResTy = Op->getValueType(0); 2464 SDLoc DL(Op); 2465 APInt SplatValue, SplatUndef; 2466 unsigned SplatBitSize; 2467 bool HasAnyUndefs; 2468 2469 if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) 2470 return SDValue(); 2471 2472 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2473 HasAnyUndefs, 8, 2474 !Subtarget.isLittle()) && SplatBitSize <= 64) { 2475 // We can only cope with 8, 16, 32, or 64-bit elements 2476 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 2477 SplatBitSize != 64) 2478 return SDValue(); 2479 2480 // If the value isn't an integer type we will have to bitcast 2481 // from an integer type first. Also, if there are any undefs, we must 2482 // lower them to defined values first. 2483 if (ResTy.isInteger() && !HasAnyUndefs) 2484 return Op; 2485 2486 EVT ViaVecTy; 2487 2488 switch (SplatBitSize) { 2489 default: 2490 return SDValue(); 2491 case 8: 2492 ViaVecTy = MVT::v16i8; 2493 break; 2494 case 16: 2495 ViaVecTy = MVT::v8i16; 2496 break; 2497 case 32: 2498 ViaVecTy = MVT::v4i32; 2499 break; 2500 case 64: 2501 // There's no fill.d to fall back on for 64-bit values 2502 return SDValue(); 2503 } 2504 2505 // SelectionDAG::getConstant will promote SplatValue appropriately. 2506 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 2507 2508 // Bitcast to the type we originally wanted 2509 if (ViaVecTy != ResTy) 2510 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 2511 2512 return Result; 2513 } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false)) 2514 return Op; 2515 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 2516 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 2517 // The resulting code is the same length as the expansion, but it doesn't 2518 // use memory operations 2519 EVT ResTy = Node->getValueType(0); 2520 2521 assert(ResTy.isVector()); 2522 2523 unsigned NumElts = ResTy.getVectorNumElements(); 2524 SDValue Vector = DAG.getUNDEF(ResTy); 2525 for (unsigned i = 0; i < NumElts; ++i) { 2526 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 2527 Node->getOperand(i), 2528 DAG.getConstant(i, DL, MVT::i32)); 2529 } 2530 return Vector; 2531 } 2532 2533 return SDValue(); 2534 } 2535 2536 // Lower VECTOR_SHUFFLE into SHF (if possible). 2537 // 2538 // SHF splits the vector into blocks of four elements, then shuffles these 2539 // elements according to a <4 x i2> constant (encoded as an integer immediate). 2540 // 2541 // It is therefore possible to lower into SHF when the mask takes the form: 2542 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 2543 // When undef's appear they are treated as if they were whatever value is 2544 // necessary in order to fit the above forms. 2545 // 2546 // For example: 2547 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 2548 // <8 x i32> <i32 3, i32 2, i32 1, i32 0, 2549 // i32 7, i32 6, i32 5, i32 4> 2550 // is lowered to: 2551 // (SHF_H $w0, $w1, 27) 2552 // where the 27 comes from: 2553 // 3 + (2 << 2) + (1 << 4) + (0 << 6) 2554 static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 2555 SmallVector<int, 16> Indices, 2556 SelectionDAG &DAG) { 2557 int SHFIndices[4] = { -1, -1, -1, -1 }; 2558 2559 if (Indices.size() < 4) 2560 return SDValue(); 2561 2562 for (unsigned i = 0; i < 4; ++i) { 2563 for (unsigned j = i; j < Indices.size(); j += 4) { 2564 int Idx = Indices[j]; 2565 2566 // Convert from vector index to 4-element subvector index 2567 // If an index refers to an element outside of the subvector then give up 2568 if (Idx != -1) { 2569 Idx -= 4 * (j / 4); 2570 if (Idx < 0 || Idx >= 4) 2571 return SDValue(); 2572 } 2573 2574 // If the mask has an undef, replace it with the current index. 2575 // Note that it might still be undef if the current index is also undef 2576 if (SHFIndices[i] == -1) 2577 SHFIndices[i] = Idx; 2578 2579 // Check that non-undef values are the same as in the mask. If they 2580 // aren't then give up 2581 if (!(Idx == -1 || Idx == SHFIndices[i])) 2582 return SDValue(); 2583 } 2584 } 2585 2586 // Calculate the immediate. Replace any remaining undefs with zero 2587 APInt Imm(32, 0); 2588 for (int i = 3; i >= 0; --i) { 2589 int Idx = SHFIndices[i]; 2590 2591 if (Idx == -1) 2592 Idx = 0; 2593 2594 Imm <<= 2; 2595 Imm |= Idx & 0x3; 2596 } 2597 2598 SDLoc DL(Op); 2599 return DAG.getNode(MipsISD::SHF, DL, ResTy, 2600 DAG.getTargetConstant(Imm, DL, MVT::i32), 2601 Op->getOperand(0)); 2602 } 2603 2604 /// Determine whether a range fits a regular pattern of values. 2605 /// This function accounts for the possibility of jumping over the End iterator. 2606 template <typename ValType> 2607 static bool 2608 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, 2609 unsigned CheckStride, 2610 typename SmallVectorImpl<ValType>::const_iterator End, 2611 ValType ExpectedIndex, unsigned ExpectedIndexStride) { 2612 auto &I = Begin; 2613 2614 while (I != End) { 2615 if (*I != -1 && *I != ExpectedIndex) 2616 return false; 2617 ExpectedIndex += ExpectedIndexStride; 2618 2619 // Incrementing past End is undefined behaviour so we must increment one 2620 // step at a time and check for End at each step. 2621 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) 2622 ; // Empty loop body. 2623 } 2624 return true; 2625 } 2626 2627 // Determine whether VECTOR_SHUFFLE is a SPLATI. 2628 // 2629 // It is a SPLATI when the mask is: 2630 // <x, x, x, ...> 2631 // where x is any valid index. 2632 // 2633 // When undef's appear in the mask they are treated as if they were whatever 2634 // value is necessary in order to fit the above form. 2635 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, 2636 SmallVector<int, 16> Indices, 2637 SelectionDAG &DAG) { 2638 assert((Indices.size() % 2) == 0); 2639 2640 int SplatIndex = -1; 2641 for (const auto &V : Indices) { 2642 if (V != -1) { 2643 SplatIndex = V; 2644 break; 2645 } 2646 } 2647 2648 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex, 2649 0); 2650 } 2651 2652 // Lower VECTOR_SHUFFLE into ILVEV (if possible). 2653 // 2654 // ILVEV interleaves the even elements from each vector. 2655 // 2656 // It is possible to lower into ILVEV when the mask consists of two of the 2657 // following forms interleaved: 2658 // <0, 2, 4, ...> 2659 // <n, n+2, n+4, ...> 2660 // where n is the number of elements in the vector. 2661 // For example: 2662 // <0, 0, 2, 2, 4, 4, ...> 2663 // <0, n, 2, n+2, 4, n+4, ...> 2664 // 2665 // When undef's appear in the mask they are treated as if they were whatever 2666 // value is necessary in order to fit the above forms. 2667 static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 2668 SmallVector<int, 16> Indices, 2669 SelectionDAG &DAG) { 2670 assert((Indices.size() % 2) == 0); 2671 2672 SDValue Wt; 2673 SDValue Ws; 2674 const auto &Begin = Indices.begin(); 2675 const auto &End = Indices.end(); 2676 2677 // Check even elements are taken from the even elements of one half or the 2678 // other and pick an operand accordingly. 2679 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2)) 2680 Wt = Op->getOperand(0); 2681 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2)) 2682 Wt = Op->getOperand(1); 2683 else 2684 return SDValue(); 2685 2686 // Check odd elements are taken from the even elements of one half or the 2687 // other and pick an operand accordingly. 2688 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2)) 2689 Ws = Op->getOperand(0); 2690 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2)) 2691 Ws = Op->getOperand(1); 2692 else 2693 return SDValue(); 2694 2695 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt); 2696 } 2697 2698 // Lower VECTOR_SHUFFLE into ILVOD (if possible). 2699 // 2700 // ILVOD interleaves the odd elements from each vector. 2701 // 2702 // It is possible to lower into ILVOD when the mask consists of two of the 2703 // following forms interleaved: 2704 // <1, 3, 5, ...> 2705 // <n+1, n+3, n+5, ...> 2706 // where n is the number of elements in the vector. 2707 // For example: 2708 // <1, 1, 3, 3, 5, 5, ...> 2709 // <1, n+1, 3, n+3, 5, n+5, ...> 2710 // 2711 // When undef's appear in the mask they are treated as if they were whatever 2712 // value is necessary in order to fit the above forms. 2713 static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 2714 SmallVector<int, 16> Indices, 2715 SelectionDAG &DAG) { 2716 assert((Indices.size() % 2) == 0); 2717 2718 SDValue Wt; 2719 SDValue Ws; 2720 const auto &Begin = Indices.begin(); 2721 const auto &End = Indices.end(); 2722 2723 // Check even elements are taken from the odd elements of one half or the 2724 // other and pick an operand accordingly. 2725 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2)) 2726 Wt = Op->getOperand(0); 2727 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2)) 2728 Wt = Op->getOperand(1); 2729 else 2730 return SDValue(); 2731 2732 // Check odd elements are taken from the odd elements of one half or the 2733 // other and pick an operand accordingly. 2734 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2)) 2735 Ws = Op->getOperand(0); 2736 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2)) 2737 Ws = Op->getOperand(1); 2738 else 2739 return SDValue(); 2740 2741 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws); 2742 } 2743 2744 // Lower VECTOR_SHUFFLE into ILVR (if possible). 2745 // 2746 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of 2747 // each vector. 2748 // 2749 // It is possible to lower into ILVR when the mask consists of two of the 2750 // following forms interleaved: 2751 // <0, 1, 2, ...> 2752 // <n, n+1, n+2, ...> 2753 // where n is the number of elements in the vector. 2754 // For example: 2755 // <0, 0, 1, 1, 2, 2, ...> 2756 // <0, n, 1, n+1, 2, n+2, ...> 2757 // 2758 // When undef's appear in the mask they are treated as if they were whatever 2759 // value is necessary in order to fit the above forms. 2760 static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 2761 SmallVector<int, 16> Indices, 2762 SelectionDAG &DAG) { 2763 assert((Indices.size() % 2) == 0); 2764 2765 SDValue Wt; 2766 SDValue Ws; 2767 const auto &Begin = Indices.begin(); 2768 const auto &End = Indices.end(); 2769 2770 // Check even elements are taken from the right (lowest-indexed) elements of 2771 // one half or the other and pick an operand accordingly. 2772 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1)) 2773 Wt = Op->getOperand(0); 2774 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1)) 2775 Wt = Op->getOperand(1); 2776 else 2777 return SDValue(); 2778 2779 // Check odd elements are taken from the right (lowest-indexed) elements of 2780 // one half or the other and pick an operand accordingly. 2781 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1)) 2782 Ws = Op->getOperand(0); 2783 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1)) 2784 Ws = Op->getOperand(1); 2785 else 2786 return SDValue(); 2787 2788 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt); 2789 } 2790 2791 // Lower VECTOR_SHUFFLE into ILVL (if possible). 2792 // 2793 // ILVL interleaves consecutive elements from the left (highest-indexed) half 2794 // of each vector. 2795 // 2796 // It is possible to lower into ILVL when the mask consists of two of the 2797 // following forms interleaved: 2798 // <x, x+1, x+2, ...> 2799 // <n+x, n+x+1, n+x+2, ...> 2800 // where n is the number of elements in the vector and x is half n. 2801 // For example: 2802 // <x, x, x+1, x+1, x+2, x+2, ...> 2803 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 2804 // 2805 // When undef's appear in the mask they are treated as if they were whatever 2806 // value is necessary in order to fit the above forms. 2807 static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 2808 SmallVector<int, 16> Indices, 2809 SelectionDAG &DAG) { 2810 assert((Indices.size() % 2) == 0); 2811 2812 unsigned HalfSize = Indices.size() / 2; 2813 SDValue Wt; 2814 SDValue Ws; 2815 const auto &Begin = Indices.begin(); 2816 const auto &End = Indices.end(); 2817 2818 // Check even elements are taken from the left (highest-indexed) elements of 2819 // one half or the other and pick an operand accordingly. 2820 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1)) 2821 Wt = Op->getOperand(0); 2822 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1)) 2823 Wt = Op->getOperand(1); 2824 else 2825 return SDValue(); 2826 2827 // Check odd elements are taken from the left (highest-indexed) elements of 2828 // one half or the other and pick an operand accordingly. 2829 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1)) 2830 Ws = Op->getOperand(0); 2831 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize, 2832 1)) 2833 Ws = Op->getOperand(1); 2834 else 2835 return SDValue(); 2836 2837 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt); 2838 } 2839 2840 // Lower VECTOR_SHUFFLE into PCKEV (if possible). 2841 // 2842 // PCKEV copies the even elements of each vector into the result vector. 2843 // 2844 // It is possible to lower into PCKEV when the mask consists of two of the 2845 // following forms concatenated: 2846 // <0, 2, 4, ...> 2847 // <n, n+2, n+4, ...> 2848 // where n is the number of elements in the vector. 2849 // For example: 2850 // <0, 2, 4, ..., 0, 2, 4, ...> 2851 // <0, 2, 4, ..., n, n+2, n+4, ...> 2852 // 2853 // When undef's appear in the mask they are treated as if they were whatever 2854 // value is necessary in order to fit the above forms. 2855 static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2856 SmallVector<int, 16> Indices, 2857 SelectionDAG &DAG) { 2858 assert((Indices.size() % 2) == 0); 2859 2860 SDValue Wt; 2861 SDValue Ws; 2862 const auto &Begin = Indices.begin(); 2863 const auto &Mid = Indices.begin() + Indices.size() / 2; 2864 const auto &End = Indices.end(); 2865 2866 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2)) 2867 Wt = Op->getOperand(0); 2868 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2)) 2869 Wt = Op->getOperand(1); 2870 else 2871 return SDValue(); 2872 2873 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2)) 2874 Ws = Op->getOperand(0); 2875 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2)) 2876 Ws = Op->getOperand(1); 2877 else 2878 return SDValue(); 2879 2880 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt); 2881 } 2882 2883 // Lower VECTOR_SHUFFLE into PCKOD (if possible). 2884 // 2885 // PCKOD copies the odd elements of each vector into the result vector. 2886 // 2887 // It is possible to lower into PCKOD when the mask consists of two of the 2888 // following forms concatenated: 2889 // <1, 3, 5, ...> 2890 // <n+1, n+3, n+5, ...> 2891 // where n is the number of elements in the vector. 2892 // For example: 2893 // <1, 3, 5, ..., 1, 3, 5, ...> 2894 // <1, 3, 5, ..., n+1, n+3, n+5, ...> 2895 // 2896 // When undef's appear in the mask they are treated as if they were whatever 2897 // value is necessary in order to fit the above forms. 2898 static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2899 SmallVector<int, 16> Indices, 2900 SelectionDAG &DAG) { 2901 assert((Indices.size() % 2) == 0); 2902 2903 SDValue Wt; 2904 SDValue Ws; 2905 const auto &Begin = Indices.begin(); 2906 const auto &Mid = Indices.begin() + Indices.size() / 2; 2907 const auto &End = Indices.end(); 2908 2909 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2)) 2910 Wt = Op->getOperand(0); 2911 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2)) 2912 Wt = Op->getOperand(1); 2913 else 2914 return SDValue(); 2915 2916 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2)) 2917 Ws = Op->getOperand(0); 2918 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2)) 2919 Ws = Op->getOperand(1); 2920 else 2921 return SDValue(); 2922 2923 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt); 2924 } 2925 2926 // Lower VECTOR_SHUFFLE into VSHF. 2927 // 2928 // This mostly consists of converting the shuffle indices in Indices into a 2929 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2930 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2931 // if the type is v8i16 and all the indices are less than 8 then the second 2932 // operand is unused and can be replaced with anything. We choose to replace it 2933 // with the used operand since this reduces the number of instructions overall. 2934 static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 2935 SmallVector<int, 16> Indices, 2936 SelectionDAG &DAG) { 2937 SmallVector<SDValue, 16> Ops; 2938 SDValue Op0; 2939 SDValue Op1; 2940 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 2941 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 2942 bool Using1stVec = false; 2943 bool Using2ndVec = false; 2944 SDLoc DL(Op); 2945 int ResTyNumElts = ResTy.getVectorNumElements(); 2946 2947 for (int i = 0; i < ResTyNumElts; ++i) { 2948 // Idx == -1 means UNDEF 2949 int Idx = Indices[i]; 2950 2951 if (0 <= Idx && Idx < ResTyNumElts) 2952 Using1stVec = true; 2953 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 2954 Using2ndVec = true; 2955 } 2956 2957 for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end(); 2958 ++I) 2959 Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy)); 2960 2961 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); 2962 2963 if (Using1stVec && Using2ndVec) { 2964 Op0 = Op->getOperand(0); 2965 Op1 = Op->getOperand(1); 2966 } else if (Using1stVec) 2967 Op0 = Op1 = Op->getOperand(0); 2968 else if (Using2ndVec) 2969 Op0 = Op1 = Op->getOperand(1); 2970 else 2971 llvm_unreachable("shuffle vector mask references neither vector operand?"); 2972 2973 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 2974 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 2975 // VSHF concatenates the vectors in a bitwise fashion: 2976 // <0b00, 0b01> + <0b10, 0b11> -> 2977 // 0b0100 + 0b1110 -> 0b01001110 2978 // <0b10, 0b11, 0b00, 0b01> 2979 // We must therefore swap the operands to get the correct result. 2980 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); 2981 } 2982 2983 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 2984 // indices in the shuffle. 2985 SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 2986 SelectionDAG &DAG) const { 2987 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 2988 EVT ResTy = Op->getValueType(0); 2989 2990 if (!ResTy.is128BitVector()) 2991 return SDValue(); 2992 2993 int ResTyNumElts = ResTy.getVectorNumElements(); 2994 SmallVector<int, 16> Indices; 2995 2996 for (int i = 0; i < ResTyNumElts; ++i) 2997 Indices.push_back(Node->getMaskElt(i)); 2998 2999 // splati.[bhwd] is preferable to the others but is matched from 3000 // MipsISD::VSHF. 3001 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) 3002 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 3003 SDValue Result; 3004 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG))) 3005 return Result; 3006 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG))) 3007 return Result; 3008 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG))) 3009 return Result; 3010 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG))) 3011 return Result; 3012 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG))) 3013 return Result; 3014 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG))) 3015 return Result; 3016 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) 3017 return Result; 3018 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 3019 } 3020 3021 MachineBasicBlock * 3022 MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, 3023 MachineBasicBlock *BB) const { 3024 // $bb: 3025 // bposge32_pseudo $vr0 3026 // => 3027 // $bb: 3028 // bposge32 $tbb 3029 // $fbb: 3030 // li $vr2, 0 3031 // b $sink 3032 // $tbb: 3033 // li $vr1, 1 3034 // $sink: 3035 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 3036 3037 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3038 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3039 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3040 DebugLoc DL = MI.getDebugLoc(); 3041 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3042 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3043 MachineFunction *F = BB->getParent(); 3044 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3045 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3046 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3047 F->insert(It, FBB); 3048 F->insert(It, TBB); 3049 F->insert(It, Sink); 3050 3051 // Transfer the remainder of BB and its successor edges to Sink. 3052 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3053 BB->end()); 3054 Sink->transferSuccessorsAndUpdatePHIs(BB); 3055 3056 // Add successors. 3057 BB->addSuccessor(FBB); 3058 BB->addSuccessor(TBB); 3059 FBB->addSuccessor(Sink); 3060 TBB->addSuccessor(Sink); 3061 3062 // Insert the real bposge32 instruction to $BB. 3063 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 3064 // Insert the real bposge32c instruction to $BB. 3065 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB); 3066 3067 // Fill $FBB. 3068 Register VR2 = RegInfo.createVirtualRegister(RC); 3069 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 3070 .addReg(Mips::ZERO).addImm(0); 3071 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3072 3073 // Fill $TBB. 3074 Register VR1 = RegInfo.createVirtualRegister(RC); 3075 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 3076 .addReg(Mips::ZERO).addImm(1); 3077 3078 // Insert phi function to $Sink. 3079 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3080 MI.getOperand(0).getReg()) 3081 .addReg(VR2) 3082 .addMBB(FBB) 3083 .addReg(VR1) 3084 .addMBB(TBB); 3085 3086 MI.eraseFromParent(); // The pseudo instruction is gone now. 3087 return Sink; 3088 } 3089 3090 MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( 3091 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { 3092 // $bb: 3093 // vany_nonzero $rd, $ws 3094 // => 3095 // $bb: 3096 // bnz.b $ws, $tbb 3097 // b $fbb 3098 // $fbb: 3099 // li $rd1, 0 3100 // b $sink 3101 // $tbb: 3102 // li $rd2, 1 3103 // $sink: 3104 // $rd = phi($rd1, $fbb, $rd2, $tbb) 3105 3106 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3107 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3108 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3109 DebugLoc DL = MI.getDebugLoc(); 3110 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3111 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3112 MachineFunction *F = BB->getParent(); 3113 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3114 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3115 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3116 F->insert(It, FBB); 3117 F->insert(It, TBB); 3118 F->insert(It, Sink); 3119 3120 // Transfer the remainder of BB and its successor edges to Sink. 3121 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3122 BB->end()); 3123 Sink->transferSuccessorsAndUpdatePHIs(BB); 3124 3125 // Add successors. 3126 BB->addSuccessor(FBB); 3127 BB->addSuccessor(TBB); 3128 FBB->addSuccessor(Sink); 3129 TBB->addSuccessor(Sink); 3130 3131 // Insert the real bnz.b instruction to $BB. 3132 BuildMI(BB, DL, TII->get(BranchOp)) 3133 .addReg(MI.getOperand(1).getReg()) 3134 .addMBB(TBB); 3135 3136 // Fill $FBB. 3137 Register RD1 = RegInfo.createVirtualRegister(RC); 3138 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 3139 .addReg(Mips::ZERO).addImm(0); 3140 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3141 3142 // Fill $TBB. 3143 Register RD2 = RegInfo.createVirtualRegister(RC); 3144 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 3145 .addReg(Mips::ZERO).addImm(1); 3146 3147 // Insert phi function to $Sink. 3148 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3149 MI.getOperand(0).getReg()) 3150 .addReg(RD1) 3151 .addMBB(FBB) 3152 .addReg(RD2) 3153 .addMBB(TBB); 3154 3155 MI.eraseFromParent(); // The pseudo instruction is gone now. 3156 return Sink; 3157 } 3158 3159 // Emit the COPY_FW pseudo instruction. 3160 // 3161 // copy_fw_pseudo $fd, $ws, n 3162 // => 3163 // copy_u_w $rt, $ws, $n 3164 // mtc1 $rt, $fd 3165 // 3166 // When n is zero, the equivalent operation can be performed with (potentially) 3167 // zero instructions due to register overlaps. This optimization is never valid 3168 // for lane 1 because it would require FR=0 mode which isn't supported by MSA. 3169 MachineBasicBlock * 3170 MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, 3171 MachineBasicBlock *BB) const { 3172 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3173 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3174 DebugLoc DL = MI.getDebugLoc(); 3175 Register Fd = MI.getOperand(0).getReg(); 3176 Register Ws = MI.getOperand(1).getReg(); 3177 unsigned Lane = MI.getOperand(2).getImm(); 3178 3179 if (Lane == 0) { 3180 unsigned Wt = Ws; 3181 if (!Subtarget.useOddSPReg()) { 3182 // We must copy to an even-numbered MSA register so that the 3183 // single-precision sub-register is also guaranteed to be even-numbered. 3184 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass); 3185 3186 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws); 3187 } 3188 3189 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3190 } else { 3191 Register Wt = RegInfo.createVirtualRegister( 3192 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3193 : &Mips::MSA128WEvensRegClass); 3194 3195 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); 3196 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3197 } 3198 3199 MI.eraseFromParent(); // The pseudo instruction is gone now. 3200 return BB; 3201 } 3202 3203 // Emit the COPY_FD pseudo instruction. 3204 // 3205 // copy_fd_pseudo $fd, $ws, n 3206 // => 3207 // splati.d $wt, $ws, $n 3208 // copy $fd, $wt:sub_64 3209 // 3210 // When n is zero, the equivalent operation can be performed with (potentially) 3211 // zero instructions due to register overlaps. This optimization is always 3212 // valid because FR=1 mode which is the only supported mode in MSA. 3213 MachineBasicBlock * 3214 MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, 3215 MachineBasicBlock *BB) const { 3216 assert(Subtarget.isFP64bit()); 3217 3218 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3219 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3220 Register Fd = MI.getOperand(0).getReg(); 3221 Register Ws = MI.getOperand(1).getReg(); 3222 unsigned Lane = MI.getOperand(2).getImm() * 2; 3223 DebugLoc DL = MI.getDebugLoc(); 3224 3225 if (Lane == 0) 3226 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 3227 else { 3228 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3229 3230 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 3231 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 3232 } 3233 3234 MI.eraseFromParent(); // The pseudo instruction is gone now. 3235 return BB; 3236 } 3237 3238 // Emit the INSERT_FW pseudo instruction. 3239 // 3240 // insert_fw_pseudo $wd, $wd_in, $n, $fs 3241 // => 3242 // subreg_to_reg $wt:sub_lo, $fs 3243 // insve_w $wd[$n], $wd_in, $wt[0] 3244 MachineBasicBlock * 3245 MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, 3246 MachineBasicBlock *BB) const { 3247 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3248 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3249 DebugLoc DL = MI.getDebugLoc(); 3250 Register Wd = MI.getOperand(0).getReg(); 3251 Register Wd_in = MI.getOperand(1).getReg(); 3252 unsigned Lane = MI.getOperand(2).getImm(); 3253 Register Fs = MI.getOperand(3).getReg(); 3254 Register Wt = RegInfo.createVirtualRegister( 3255 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3256 : &Mips::MSA128WEvensRegClass); 3257 3258 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3259 .addImm(0) 3260 .addReg(Fs) 3261 .addImm(Mips::sub_lo); 3262 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 3263 .addReg(Wd_in) 3264 .addImm(Lane) 3265 .addReg(Wt) 3266 .addImm(0); 3267 3268 MI.eraseFromParent(); // The pseudo instruction is gone now. 3269 return BB; 3270 } 3271 3272 // Emit the INSERT_FD pseudo instruction. 3273 // 3274 // insert_fd_pseudo $wd, $fs, n 3275 // => 3276 // subreg_to_reg $wt:sub_64, $fs 3277 // insve_d $wd[$n], $wd_in, $wt[0] 3278 MachineBasicBlock * 3279 MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, 3280 MachineBasicBlock *BB) const { 3281 assert(Subtarget.isFP64bit()); 3282 3283 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3284 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3285 DebugLoc DL = MI.getDebugLoc(); 3286 Register Wd = MI.getOperand(0).getReg(); 3287 Register Wd_in = MI.getOperand(1).getReg(); 3288 unsigned Lane = MI.getOperand(2).getImm(); 3289 Register Fs = MI.getOperand(3).getReg(); 3290 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3291 3292 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3293 .addImm(0) 3294 .addReg(Fs) 3295 .addImm(Mips::sub_64); 3296 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 3297 .addReg(Wd_in) 3298 .addImm(Lane) 3299 .addReg(Wt) 3300 .addImm(0); 3301 3302 MI.eraseFromParent(); // The pseudo instruction is gone now. 3303 return BB; 3304 } 3305 3306 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. 3307 // 3308 // For integer: 3309 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) 3310 // => 3311 // (SLL $lanetmp1, $lane, <log2size) 3312 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3313 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) 3314 // (NEG $lanetmp2, $lanetmp1) 3315 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3316 // 3317 // For floating point: 3318 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) 3319 // => 3320 // (SUBREG_TO_REG $wt, $fs, <subreg>) 3321 // (SLL $lanetmp1, $lane, <log2size) 3322 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3323 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) 3324 // (NEG $lanetmp2, $lanetmp1) 3325 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3326 MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( 3327 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes, 3328 bool IsFP) const { 3329 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3330 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3331 DebugLoc DL = MI.getDebugLoc(); 3332 Register Wd = MI.getOperand(0).getReg(); 3333 Register SrcVecReg = MI.getOperand(1).getReg(); 3334 Register LaneReg = MI.getOperand(2).getReg(); 3335 Register SrcValReg = MI.getOperand(3).getReg(); 3336 3337 const TargetRegisterClass *VecRC = nullptr; 3338 // FIXME: This should be true for N32 too. 3339 const TargetRegisterClass *GPRRC = 3340 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3341 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0; 3342 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL; 3343 unsigned EltLog2Size; 3344 unsigned InsertOp = 0; 3345 unsigned InsveOp = 0; 3346 switch (EltSizeInBytes) { 3347 default: 3348 llvm_unreachable("Unexpected size"); 3349 case 1: 3350 EltLog2Size = 0; 3351 InsertOp = Mips::INSERT_B; 3352 InsveOp = Mips::INSVE_B; 3353 VecRC = &Mips::MSA128BRegClass; 3354 break; 3355 case 2: 3356 EltLog2Size = 1; 3357 InsertOp = Mips::INSERT_H; 3358 InsveOp = Mips::INSVE_H; 3359 VecRC = &Mips::MSA128HRegClass; 3360 break; 3361 case 4: 3362 EltLog2Size = 2; 3363 InsertOp = Mips::INSERT_W; 3364 InsveOp = Mips::INSVE_W; 3365 VecRC = &Mips::MSA128WRegClass; 3366 break; 3367 case 8: 3368 EltLog2Size = 3; 3369 InsertOp = Mips::INSERT_D; 3370 InsveOp = Mips::INSVE_D; 3371 VecRC = &Mips::MSA128DRegClass; 3372 break; 3373 } 3374 3375 if (IsFP) { 3376 Register Wt = RegInfo.createVirtualRegister(VecRC); 3377 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3378 .addImm(0) 3379 .addReg(SrcValReg) 3380 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); 3381 SrcValReg = Wt; 3382 } 3383 3384 // Convert the lane index into a byte index 3385 if (EltSizeInBytes != 1) { 3386 Register LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); 3387 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1) 3388 .addReg(LaneReg) 3389 .addImm(EltLog2Size); 3390 LaneReg = LaneTmp1; 3391 } 3392 3393 // Rotate bytes around so that the desired lane is element zero 3394 Register WdTmp1 = RegInfo.createVirtualRegister(VecRC); 3395 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) 3396 .addReg(SrcVecReg) 3397 .addReg(SrcVecReg) 3398 .addReg(LaneReg, 0, SubRegIdx); 3399 3400 Register WdTmp2 = RegInfo.createVirtualRegister(VecRC); 3401 if (IsFP) { 3402 // Use insve.df to insert to element zero 3403 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) 3404 .addReg(WdTmp1) 3405 .addImm(0) 3406 .addReg(SrcValReg) 3407 .addImm(0); 3408 } else { 3409 // Use insert.df to insert to element zero 3410 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) 3411 .addReg(WdTmp1) 3412 .addReg(SrcValReg) 3413 .addImm(0); 3414 } 3415 3416 // Rotate elements the rest of the way for a full rotation. 3417 // sld.df inteprets $rt modulo the number of columns so we only need to negate 3418 // the lane index to do this. 3419 Register LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); 3420 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), 3421 LaneTmp2) 3422 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) 3423 .addReg(LaneReg); 3424 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) 3425 .addReg(WdTmp2) 3426 .addReg(WdTmp2) 3427 .addReg(LaneTmp2, 0, SubRegIdx); 3428 3429 MI.eraseFromParent(); // The pseudo instruction is gone now. 3430 return BB; 3431 } 3432 3433 // Emit the FILL_FW pseudo instruction. 3434 // 3435 // fill_fw_pseudo $wd, $fs 3436 // => 3437 // implicit_def $wt1 3438 // insert_subreg $wt2:subreg_lo, $wt1, $fs 3439 // splati.w $wd, $wt2[0] 3440 MachineBasicBlock * 3441 MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, 3442 MachineBasicBlock *BB) const { 3443 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3444 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3445 DebugLoc DL = MI.getDebugLoc(); 3446 Register Wd = MI.getOperand(0).getReg(); 3447 Register Fs = MI.getOperand(1).getReg(); 3448 Register Wt1 = RegInfo.createVirtualRegister( 3449 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3450 : &Mips::MSA128WEvensRegClass); 3451 Register Wt2 = RegInfo.createVirtualRegister( 3452 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3453 : &Mips::MSA128WEvensRegClass); 3454 3455 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3456 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3457 .addReg(Wt1) 3458 .addReg(Fs) 3459 .addImm(Mips::sub_lo); 3460 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); 3461 3462 MI.eraseFromParent(); // The pseudo instruction is gone now. 3463 return BB; 3464 } 3465 3466 // Emit the FILL_FD pseudo instruction. 3467 // 3468 // fill_fd_pseudo $wd, $fs 3469 // => 3470 // implicit_def $wt1 3471 // insert_subreg $wt2:subreg_64, $wt1, $fs 3472 // splati.d $wd, $wt2[0] 3473 MachineBasicBlock * 3474 MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, 3475 MachineBasicBlock *BB) const { 3476 assert(Subtarget.isFP64bit()); 3477 3478 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3479 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3480 DebugLoc DL = MI.getDebugLoc(); 3481 Register Wd = MI.getOperand(0).getReg(); 3482 Register Fs = MI.getOperand(1).getReg(); 3483 Register Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3484 Register Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3485 3486 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3487 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3488 .addReg(Wt1) 3489 .addReg(Fs) 3490 .addImm(Mips::sub_64); 3491 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); 3492 3493 MI.eraseFromParent(); // The pseudo instruction is gone now. 3494 return BB; 3495 } 3496 3497 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA 3498 // register. 3499 // 3500 // STF16 MSA128F16:$wd, mem_simm10:$addr 3501 // => 3502 // copy_u.h $rtemp,$wd[0] 3503 // sh $rtemp, $addr 3504 // 3505 // Safety: We can't use st.h & co as they would over write the memory after 3506 // the destination. It would require half floats be allocated 16 bytes(!) of 3507 // space. 3508 MachineBasicBlock * 3509 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, 3510 MachineBasicBlock *BB) const { 3511 3512 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3513 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3514 DebugLoc DL = MI.getDebugLoc(); 3515 Register Ws = MI.getOperand(0).getReg(); 3516 Register Rt = MI.getOperand(1).getReg(); 3517 const MachineMemOperand &MMO = **MI.memoperands_begin(); 3518 unsigned Imm = MMO.getOffset(); 3519 3520 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3521 // spill and reload can expand as a GPR64 operand. Examine the 3522 // operand in detail and default to ABI. 3523 const TargetRegisterClass *RC = 3524 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3525 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3526 : &Mips::GPR64RegClass); 3527 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3528 Register Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3529 3530 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); 3531 if(!UsingMips32) { 3532 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); 3533 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp) 3534 .addImm(0) 3535 .addReg(Rs) 3536 .addImm(Mips::sub_32); 3537 Rs = Tmp; 3538 } 3539 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) 3540 .addReg(Rs) 3541 .addReg(Rt) 3542 .addImm(Imm) 3543 .addMemOperand(BB->getParent()->getMachineMemOperand( 3544 &MMO, MMO.getOffset(), MMO.getSize())); 3545 3546 MI.eraseFromParent(); 3547 return BB; 3548 } 3549 3550 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. 3551 // 3552 // LD_F16 MSA128F16:$wd, mem_simm10:$addr 3553 // => 3554 // lh $rtemp, $addr 3555 // fill.h $wd, $rtemp 3556 // 3557 // Safety: We can't use ld.h & co as they over-read from the source. 3558 // Additionally, if the address is not modulo 16, 2 cases can occur: 3559 // a) Segmentation fault as the load instruction reads from a memory page 3560 // memory it's not supposed to. 3561 // b) The load crosses an implementation specific boundary, requiring OS 3562 // intervention. 3563 MachineBasicBlock * 3564 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, 3565 MachineBasicBlock *BB) const { 3566 3567 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3568 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3569 DebugLoc DL = MI.getDebugLoc(); 3570 Register Wd = MI.getOperand(0).getReg(); 3571 3572 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3573 // spill and reload can expand as a GPR64 operand. Examine the 3574 // operand in detail and default to ABI. 3575 const TargetRegisterClass *RC = 3576 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3577 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3578 : &Mips::GPR64RegClass); 3579 3580 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3581 Register Rt = RegInfo.createVirtualRegister(RC); 3582 3583 MachineInstrBuilder MIB = 3584 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); 3585 for (unsigned i = 1; i < MI.getNumOperands(); i++) 3586 MIB.add(MI.getOperand(i)); 3587 3588 if(!UsingMips32) { 3589 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3590 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32); 3591 Rt = Tmp; 3592 } 3593 3594 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); 3595 3596 MI.eraseFromParent(); 3597 return BB; 3598 } 3599 3600 // Emit the FPROUND_PSEUDO instruction. 3601 // 3602 // Round an FGR64Opnd, FGR32Opnd to an f16. 3603 // 3604 // Safety: Cycle the operand through the GPRs so the result always ends up 3605 // the correct MSA register. 3606 // 3607 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs 3608 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register 3609 // (which they can be, as the MSA registers are defined to alias the 3610 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3611 // the correct register class. That requires operands be tie-able across 3612 // register classes which have a sub/super register class relationship. 3613 // 3614 // For FPG32Opnd: 3615 // 3616 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs 3617 // => 3618 // mfc1 $rtemp, $fs 3619 // fill.w $rtemp, $wtemp 3620 // fexdo.w $wd, $wtemp, $wtemp 3621 // 3622 // For FPG64Opnd on mips32r2+: 3623 // 3624 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3625 // => 3626 // mfc1 $rtemp, $fs 3627 // fill.w $rtemp, $wtemp 3628 // mfhc1 $rtemp2, $fs 3629 // insert.w $wtemp[1], $rtemp2 3630 // insert.w $wtemp[3], $rtemp2 3631 // fexdo.w $wtemp2, $wtemp, $wtemp 3632 // fexdo.h $wd, $temp2, $temp2 3633 // 3634 // For FGR64Opnd on mips64r2+: 3635 // 3636 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3637 // => 3638 // dmfc1 $rtemp, $fs 3639 // fill.d $rtemp, $wtemp 3640 // fexdo.w $wtemp2, $wtemp, $wtemp 3641 // fexdo.h $wd, $wtemp2, $wtemp2 3642 // 3643 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the 3644 // undef bits are "just right" and the exception enable bits are 3645 // set. By using fill.w to replicate $fs into all elements over 3646 // insert.w for one element, we avoid that potiential case. If 3647 // fexdo.[hw] causes an exception in, the exception is valid and it 3648 // occurs for all elements. 3649 MachineBasicBlock * 3650 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, 3651 MachineBasicBlock *BB, 3652 bool IsFGR64) const { 3653 3654 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3655 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3656 // it. 3657 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3658 3659 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3660 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3661 3662 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3663 DebugLoc DL = MI.getDebugLoc(); 3664 Register Wd = MI.getOperand(0).getReg(); 3665 Register Fs = MI.getOperand(1).getReg(); 3666 3667 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3668 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3669 const TargetRegisterClass *GPRRC = 3670 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3671 unsigned MFC1Opc = IsFGR64onMips64 3672 ? Mips::DMFC1 3673 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1); 3674 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; 3675 3676 // Perform the register class copy as mentioned above. 3677 Register Rtemp = RegInfo.createVirtualRegister(GPRRC); 3678 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); 3679 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); 3680 unsigned WPHI = Wtemp; 3681 3682 if (IsFGR64onMips32) { 3683 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3684 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); 3685 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3686 Register Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3687 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) 3688 .addReg(Wtemp) 3689 .addReg(Rtemp2) 3690 .addImm(1); 3691 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3) 3692 .addReg(Wtemp2) 3693 .addReg(Rtemp2) 3694 .addImm(3); 3695 WPHI = Wtemp3; 3696 } 3697 3698 if (IsFGR64) { 3699 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3700 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) 3701 .addReg(WPHI) 3702 .addReg(WPHI); 3703 WPHI = Wtemp2; 3704 } 3705 3706 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI); 3707 3708 MI.eraseFromParent(); 3709 return BB; 3710 } 3711 3712 // Emit the FPEXTEND_PSEUDO instruction. 3713 // 3714 // Expand an f16 to either a FGR32Opnd or FGR64Opnd. 3715 // 3716 // Safety: Cycle the result through the GPRs so the result always ends up 3717 // the correct floating point register. 3718 // 3719 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd 3720 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register 3721 // (which they can be, as the MSA registers are defined to alias the 3722 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3723 // the correct register class. That requires operands be tie-able across 3724 // register classes which have a sub/super register class relationship. I 3725 // haven't checked. 3726 // 3727 // For FGR32Opnd: 3728 // 3729 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws 3730 // => 3731 // fexupr.w $wtemp, $ws 3732 // copy_s.w $rtemp, $ws[0] 3733 // mtc1 $rtemp, $fd 3734 // 3735 // For FGR64Opnd on Mips64: 3736 // 3737 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3738 // => 3739 // fexupr.w $wtemp, $ws 3740 // fexupr.d $wtemp2, $wtemp 3741 // copy_s.d $rtemp, $wtemp2s[0] 3742 // dmtc1 $rtemp, $fd 3743 // 3744 // For FGR64Opnd on Mips32: 3745 // 3746 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3747 // => 3748 // fexupr.w $wtemp, $ws 3749 // fexupr.d $wtemp2, $wtemp 3750 // copy_s.w $rtemp, $wtemp2[0] 3751 // mtc1 $rtemp, $ftemp 3752 // copy_s.w $rtemp2, $wtemp2[1] 3753 // $fd = mthc1 $rtemp2, $ftemp 3754 MachineBasicBlock * 3755 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, 3756 MachineBasicBlock *BB, 3757 bool IsFGR64) const { 3758 3759 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3760 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3761 // it. 3762 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3763 3764 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3765 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3766 3767 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3768 DebugLoc DL = MI.getDebugLoc(); 3769 Register Fd = MI.getOperand(0).getReg(); 3770 Register Ws = MI.getOperand(1).getReg(); 3771 3772 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3773 const TargetRegisterClass *GPRRC = 3774 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3775 unsigned MTC1Opc = IsFGR64onMips64 3776 ? Mips::DMTC1 3777 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1); 3778 Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; 3779 3780 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3781 Register WPHI = Wtemp; 3782 3783 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws); 3784 if (IsFGR64) { 3785 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3786 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp); 3787 } 3788 3789 // Perform the safety regclass copy mentioned above. 3790 Register Rtemp = RegInfo.createVirtualRegister(GPRRC); 3791 Register FPRPHI = IsFGR64onMips32 3792 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass) 3793 : Fd; 3794 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0); 3795 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp); 3796 3797 if (IsFGR64onMips32) { 3798 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3799 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2) 3800 .addReg(WPHI) 3801 .addImm(1); 3802 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd) 3803 .addReg(FPRPHI) 3804 .addReg(Rtemp2); 3805 } 3806 3807 MI.eraseFromParent(); 3808 return BB; 3809 } 3810 3811 // Emit the FEXP2_W_1 pseudo instructions. 3812 // 3813 // fexp2_w_1_pseudo $wd, $wt 3814 // => 3815 // ldi.w $ws, 1 3816 // fexp2.w $wd, $ws, $wt 3817 MachineBasicBlock * 3818 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, 3819 MachineBasicBlock *BB) const { 3820 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3821 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3822 const TargetRegisterClass *RC = &Mips::MSA128WRegClass; 3823 Register Ws1 = RegInfo.createVirtualRegister(RC); 3824 Register Ws2 = RegInfo.createVirtualRegister(RC); 3825 DebugLoc DL = MI.getDebugLoc(); 3826 3827 // Splat 1.0 into a vector 3828 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); 3829 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); 3830 3831 // Emit 1.0 * fexp2(Wt) 3832 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg()) 3833 .addReg(Ws2) 3834 .addReg(MI.getOperand(1).getReg()); 3835 3836 MI.eraseFromParent(); // The pseudo instruction is gone now. 3837 return BB; 3838 } 3839 3840 // Emit the FEXP2_D_1 pseudo instructions. 3841 // 3842 // fexp2_d_1_pseudo $wd, $wt 3843 // => 3844 // ldi.d $ws, 1 3845 // fexp2.d $wd, $ws, $wt 3846 MachineBasicBlock * 3847 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, 3848 MachineBasicBlock *BB) const { 3849 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3850 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3851 const TargetRegisterClass *RC = &Mips::MSA128DRegClass; 3852 Register Ws1 = RegInfo.createVirtualRegister(RC); 3853 Register Ws2 = RegInfo.createVirtualRegister(RC); 3854 DebugLoc DL = MI.getDebugLoc(); 3855 3856 // Splat 1.0 into a vector 3857 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); 3858 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); 3859 3860 // Emit 1.0 * fexp2(Wt) 3861 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg()) 3862 .addReg(Ws2) 3863 .addReg(MI.getOperand(1).getReg()); 3864 3865 MI.eraseFromParent(); // The pseudo instruction is gone now. 3866 return BB; 3867 } 3868