1 //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Subclass of MipsTargetLowering specialized for mips32/64. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MipsSEISelLowering.h" 14 #include "MipsMachineFunction.h" 15 #include "MipsRegisterInfo.h" 16 #include "MipsSubtarget.h" 17 #include "llvm/ADT/APInt.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/Triple.h" 22 #include "llvm/CodeGen/CallingConvLower.h" 23 #include "llvm/CodeGen/ISDOpcodes.h" 24 #include "llvm/CodeGen/MachineBasicBlock.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineMemOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/SelectionDAG.h" 31 #include "llvm/CodeGen/SelectionDAGNodes.h" 32 #include "llvm/CodeGen/TargetInstrInfo.h" 33 #include "llvm/CodeGen/TargetSubtargetInfo.h" 34 #include "llvm/CodeGen/ValueTypes.h" 35 #include "llvm/IR/DebugLoc.h" 36 #include "llvm/IR/Intrinsics.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Support/MachineValueType.h" 42 #include "llvm/Support/MathExtras.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include <algorithm> 45 #include <cassert> 46 #include <cstdint> 47 #include <iterator> 48 #include <utility> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "mips-isel" 53 54 static cl::opt<bool> 55 UseMipsTailCalls("mips-tail-calls", cl::Hidden, 56 cl::desc("MIPS: permit tail calls."), cl::init(false)); 57 58 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 59 cl::desc("Expand double precision loads and " 60 "stores to their single precision " 61 "counterparts")); 62 63 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, 64 const MipsSubtarget &STI) 65 : MipsTargetLowering(TM, STI) { 66 // Set up the register classes 67 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 68 69 if (Subtarget.isGP64bit()) 70 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 71 72 if (Subtarget.hasDSP() || Subtarget.hasMSA()) { 73 // Expand all truncating stores and extending loads. 74 for (MVT VT0 : MVT::vector_valuetypes()) { 75 for (MVT VT1 : MVT::vector_valuetypes()) { 76 setTruncStoreAction(VT0, VT1, Expand); 77 setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); 78 setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); 79 setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); 80 } 81 } 82 } 83 84 if (Subtarget.hasDSP()) { 85 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 86 87 for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { 88 addRegisterClass(VecTys[i], &Mips::DSPRRegClass); 89 90 // Expand all builtin opcodes. 91 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 92 setOperationAction(Opc, VecTys[i], Expand); 93 94 setOperationAction(ISD::ADD, VecTys[i], Legal); 95 setOperationAction(ISD::SUB, VecTys[i], Legal); 96 setOperationAction(ISD::LOAD, VecTys[i], Legal); 97 setOperationAction(ISD::STORE, VecTys[i], Legal); 98 setOperationAction(ISD::BITCAST, VecTys[i], Legal); 99 } 100 101 setTargetDAGCombine(ISD::SHL); 102 setTargetDAGCombine(ISD::SRA); 103 setTargetDAGCombine(ISD::SRL); 104 setTargetDAGCombine(ISD::SETCC); 105 setTargetDAGCombine(ISD::VSELECT); 106 107 if (Subtarget.hasMips32r2()) { 108 setOperationAction(ISD::ADDC, MVT::i32, Legal); 109 setOperationAction(ISD::ADDE, MVT::i32, Legal); 110 } 111 } 112 113 if (Subtarget.hasDSPR2()) 114 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 115 116 if (Subtarget.hasMSA()) { 117 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 118 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 119 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 120 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 121 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 122 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 123 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 124 125 // f16 is a storage-only type, always promote it to f32. 126 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass); 127 setOperationAction(ISD::SETCC, MVT::f16, Promote); 128 setOperationAction(ISD::BR_CC, MVT::f16, Promote); 129 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); 130 setOperationAction(ISD::SELECT, MVT::f16, Promote); 131 setOperationAction(ISD::FADD, MVT::f16, Promote); 132 setOperationAction(ISD::FSUB, MVT::f16, Promote); 133 setOperationAction(ISD::FMUL, MVT::f16, Promote); 134 setOperationAction(ISD::FDIV, MVT::f16, Promote); 135 setOperationAction(ISD::FREM, MVT::f16, Promote); 136 setOperationAction(ISD::FMA, MVT::f16, Promote); 137 setOperationAction(ISD::FNEG, MVT::f16, Promote); 138 setOperationAction(ISD::FABS, MVT::f16, Promote); 139 setOperationAction(ISD::FCEIL, MVT::f16, Promote); 140 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); 141 setOperationAction(ISD::FCOS, MVT::f16, Promote); 142 setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); 143 setOperationAction(ISD::FFLOOR, MVT::f16, Promote); 144 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); 145 setOperationAction(ISD::FPOW, MVT::f16, Promote); 146 setOperationAction(ISD::FPOWI, MVT::f16, Promote); 147 setOperationAction(ISD::FRINT, MVT::f16, Promote); 148 setOperationAction(ISD::FSIN, MVT::f16, Promote); 149 setOperationAction(ISD::FSINCOS, MVT::f16, Promote); 150 setOperationAction(ISD::FSQRT, MVT::f16, Promote); 151 setOperationAction(ISD::FEXP, MVT::f16, Promote); 152 setOperationAction(ISD::FEXP2, MVT::f16, Promote); 153 setOperationAction(ISD::FLOG, MVT::f16, Promote); 154 setOperationAction(ISD::FLOG2, MVT::f16, Promote); 155 setOperationAction(ISD::FLOG10, MVT::f16, Promote); 156 setOperationAction(ISD::FROUND, MVT::f16, Promote); 157 setOperationAction(ISD::FTRUNC, MVT::f16, Promote); 158 setOperationAction(ISD::FMINNUM, MVT::f16, Promote); 159 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); 160 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); 161 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); 162 163 setTargetDAGCombine(ISD::AND); 164 setTargetDAGCombine(ISD::OR); 165 setTargetDAGCombine(ISD::SRA); 166 setTargetDAGCombine(ISD::VSELECT); 167 setTargetDAGCombine(ISD::XOR); 168 } 169 170 if (!Subtarget.useSoftFloat()) { 171 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 172 173 // When dealing with single precision only, use libcalls 174 if (!Subtarget.isSingleFloat()) { 175 if (Subtarget.isFP64bit()) 176 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 177 else 178 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 179 } 180 } 181 182 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 183 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 184 setOperationAction(ISD::MULHS, MVT::i32, Custom); 185 setOperationAction(ISD::MULHU, MVT::i32, Custom); 186 187 if (Subtarget.hasCnMips()) 188 setOperationAction(ISD::MUL, MVT::i64, Legal); 189 else if (Subtarget.isGP64bit()) 190 setOperationAction(ISD::MUL, MVT::i64, Custom); 191 192 if (Subtarget.isGP64bit()) { 193 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); 194 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); 195 setOperationAction(ISD::MULHS, MVT::i64, Custom); 196 setOperationAction(ISD::MULHU, MVT::i64, Custom); 197 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 198 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 199 } 200 201 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 202 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 203 204 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 205 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 206 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 207 setOperationAction(ISD::LOAD, MVT::i32, Custom); 208 setOperationAction(ISD::STORE, MVT::i32, Custom); 209 210 setTargetDAGCombine(ISD::MUL); 211 212 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 213 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 214 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 215 216 if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() && 217 !Subtarget.hasMips64()) { 218 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 219 } 220 221 if (NoDPLoadStore) { 222 setOperationAction(ISD::LOAD, MVT::f64, Custom); 223 setOperationAction(ISD::STORE, MVT::f64, Custom); 224 } 225 226 if (Subtarget.hasMips32r6()) { 227 // MIPS32r6 replaces the accumulator-based multiplies with a three register 228 // instruction 229 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 230 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 231 setOperationAction(ISD::MUL, MVT::i32, Legal); 232 setOperationAction(ISD::MULHS, MVT::i32, Legal); 233 setOperationAction(ISD::MULHU, MVT::i32, Legal); 234 235 // MIPS32r6 replaces the accumulator-based division/remainder with separate 236 // three register division and remainder instructions. 237 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 238 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 239 setOperationAction(ISD::SDIV, MVT::i32, Legal); 240 setOperationAction(ISD::UDIV, MVT::i32, Legal); 241 setOperationAction(ISD::SREM, MVT::i32, Legal); 242 setOperationAction(ISD::UREM, MVT::i32, Legal); 243 244 // MIPS32r6 replaces conditional moves with an equivalent that removes the 245 // need for three GPR read ports. 246 setOperationAction(ISD::SETCC, MVT::i32, Legal); 247 setOperationAction(ISD::SELECT, MVT::i32, Legal); 248 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 249 250 setOperationAction(ISD::SETCC, MVT::f32, Legal); 251 setOperationAction(ISD::SELECT, MVT::f32, Legal); 252 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 253 254 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6"); 255 setOperationAction(ISD::SETCC, MVT::f64, Legal); 256 setOperationAction(ISD::SELECT, MVT::f64, Custom); 257 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 258 259 setOperationAction(ISD::BRCOND, MVT::Other, Legal); 260 261 // Floating point > and >= are supported via < and <= 262 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 263 setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); 264 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 265 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 266 267 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 268 setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); 269 setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); 270 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 271 } 272 273 if (Subtarget.hasMips64r6()) { 274 // MIPS64r6 replaces the accumulator-based multiplies with a three register 275 // instruction 276 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 277 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 278 setOperationAction(ISD::MUL, MVT::i64, Legal); 279 setOperationAction(ISD::MULHS, MVT::i64, Legal); 280 setOperationAction(ISD::MULHU, MVT::i64, Legal); 281 282 // MIPS32r6 replaces the accumulator-based division/remainder with separate 283 // three register division and remainder instructions. 284 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 285 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 286 setOperationAction(ISD::SDIV, MVT::i64, Legal); 287 setOperationAction(ISD::UDIV, MVT::i64, Legal); 288 setOperationAction(ISD::SREM, MVT::i64, Legal); 289 setOperationAction(ISD::UREM, MVT::i64, Legal); 290 291 // MIPS64r6 replaces conditional moves with an equivalent that removes the 292 // need for three GPR read ports. 293 setOperationAction(ISD::SETCC, MVT::i64, Legal); 294 setOperationAction(ISD::SELECT, MVT::i64, Legal); 295 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 296 } 297 298 computeRegisterProperties(Subtarget.getRegisterInfo()); 299 } 300 301 const MipsTargetLowering * 302 llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, 303 const MipsSubtarget &STI) { 304 return new MipsSETargetLowering(TM, STI); 305 } 306 307 const TargetRegisterClass * 308 MipsSETargetLowering::getRepRegClassFor(MVT VT) const { 309 if (VT == MVT::Untyped) 310 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; 311 312 return TargetLowering::getRepRegClassFor(VT); 313 } 314 315 // Enable MSA support for the given integer type and Register class. 316 void MipsSETargetLowering:: 317 addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 318 addRegisterClass(Ty, RC); 319 320 // Expand all builtin opcodes. 321 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 322 setOperationAction(Opc, Ty, Expand); 323 324 setOperationAction(ISD::BITCAST, Ty, Legal); 325 setOperationAction(ISD::LOAD, Ty, Legal); 326 setOperationAction(ISD::STORE, Ty, Legal); 327 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 328 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 329 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 330 331 setOperationAction(ISD::ADD, Ty, Legal); 332 setOperationAction(ISD::AND, Ty, Legal); 333 setOperationAction(ISD::CTLZ, Ty, Legal); 334 setOperationAction(ISD::CTPOP, Ty, Legal); 335 setOperationAction(ISD::MUL, Ty, Legal); 336 setOperationAction(ISD::OR, Ty, Legal); 337 setOperationAction(ISD::SDIV, Ty, Legal); 338 setOperationAction(ISD::SREM, Ty, Legal); 339 setOperationAction(ISD::SHL, Ty, Legal); 340 setOperationAction(ISD::SRA, Ty, Legal); 341 setOperationAction(ISD::SRL, Ty, Legal); 342 setOperationAction(ISD::SUB, Ty, Legal); 343 setOperationAction(ISD::SMAX, Ty, Legal); 344 setOperationAction(ISD::SMIN, Ty, Legal); 345 setOperationAction(ISD::UDIV, Ty, Legal); 346 setOperationAction(ISD::UREM, Ty, Legal); 347 setOperationAction(ISD::UMAX, Ty, Legal); 348 setOperationAction(ISD::UMIN, Ty, Legal); 349 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 350 setOperationAction(ISD::VSELECT, Ty, Legal); 351 setOperationAction(ISD::XOR, Ty, Legal); 352 353 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { 354 setOperationAction(ISD::FP_TO_SINT, Ty, Legal); 355 setOperationAction(ISD::FP_TO_UINT, Ty, Legal); 356 setOperationAction(ISD::SINT_TO_FP, Ty, Legal); 357 setOperationAction(ISD::UINT_TO_FP, Ty, Legal); 358 } 359 360 setOperationAction(ISD::SETCC, Ty, Legal); 361 setCondCodeAction(ISD::SETNE, Ty, Expand); 362 setCondCodeAction(ISD::SETGE, Ty, Expand); 363 setCondCodeAction(ISD::SETGT, Ty, Expand); 364 setCondCodeAction(ISD::SETUGE, Ty, Expand); 365 setCondCodeAction(ISD::SETUGT, Ty, Expand); 366 } 367 368 // Enable MSA support for the given floating-point type and Register class. 369 void MipsSETargetLowering:: 370 addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 371 addRegisterClass(Ty, RC); 372 373 // Expand all builtin opcodes. 374 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 375 setOperationAction(Opc, Ty, Expand); 376 377 setOperationAction(ISD::LOAD, Ty, Legal); 378 setOperationAction(ISD::STORE, Ty, Legal); 379 setOperationAction(ISD::BITCAST, Ty, Legal); 380 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 381 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 382 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 383 384 if (Ty != MVT::v8f16) { 385 setOperationAction(ISD::FABS, Ty, Legal); 386 setOperationAction(ISD::FADD, Ty, Legal); 387 setOperationAction(ISD::FDIV, Ty, Legal); 388 setOperationAction(ISD::FEXP2, Ty, Legal); 389 setOperationAction(ISD::FLOG2, Ty, Legal); 390 setOperationAction(ISD::FMA, Ty, Legal); 391 setOperationAction(ISD::FMUL, Ty, Legal); 392 setOperationAction(ISD::FRINT, Ty, Legal); 393 setOperationAction(ISD::FSQRT, Ty, Legal); 394 setOperationAction(ISD::FSUB, Ty, Legal); 395 setOperationAction(ISD::VSELECT, Ty, Legal); 396 397 setOperationAction(ISD::SETCC, Ty, Legal); 398 setCondCodeAction(ISD::SETOGE, Ty, Expand); 399 setCondCodeAction(ISD::SETOGT, Ty, Expand); 400 setCondCodeAction(ISD::SETUGE, Ty, Expand); 401 setCondCodeAction(ISD::SETUGT, Ty, Expand); 402 setCondCodeAction(ISD::SETGE, Ty, Expand); 403 setCondCodeAction(ISD::SETGT, Ty, Expand); 404 } 405 } 406 407 SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 408 if(!Subtarget.hasMips32r6()) 409 return MipsTargetLowering::LowerOperation(Op, DAG); 410 411 EVT ResTy = Op->getValueType(0); 412 SDLoc DL(Op); 413 414 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the 415 // floating point register are undefined. Not really an issue as sel.d, which 416 // is produced from an FSELECT node, only looks at bit 0. 417 SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0)); 418 return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1), 419 Op->getOperand(2)); 420 } 421 422 bool MipsSETargetLowering::allowsMisalignedMemoryAccesses( 423 EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const { 424 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 425 426 if (Subtarget.systemSupportsUnalignedAccess()) { 427 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's 428 // implementation defined whether this is handled by hardware, software, or 429 // a hybrid of the two but it's expected that most implementations will 430 // handle the majority of cases in hardware. 431 if (Fast) 432 *Fast = true; 433 return true; 434 } 435 436 switch (SVT) { 437 case MVT::i64: 438 case MVT::i32: 439 if (Fast) 440 *Fast = true; 441 return true; 442 default: 443 return false; 444 } 445 } 446 447 SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 448 SelectionDAG &DAG) const { 449 switch(Op.getOpcode()) { 450 case ISD::LOAD: return lowerLOAD(Op, DAG); 451 case ISD::STORE: return lowerSTORE(Op, DAG); 452 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 453 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 454 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 455 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 456 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 457 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 458 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 459 DAG); 460 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 461 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 462 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 463 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 464 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 465 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 466 case ISD::SELECT: return lowerSELECT(Op, DAG); 467 case ISD::BITCAST: return lowerBITCAST(Op, DAG); 468 } 469 470 return MipsTargetLowering::LowerOperation(Op, DAG); 471 } 472 473 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 474 // 475 // Performs the following transformations: 476 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 477 // sign/zero-extension is completely overwritten by the new one performed by 478 // the ISD::AND. 479 // - Removes redundant zero extensions performed by an ISD::AND. 480 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 481 TargetLowering::DAGCombinerInfo &DCI, 482 const MipsSubtarget &Subtarget) { 483 if (!Subtarget.hasMSA()) 484 return SDValue(); 485 486 SDValue Op0 = N->getOperand(0); 487 SDValue Op1 = N->getOperand(1); 488 unsigned Op0Opcode = Op0->getOpcode(); 489 490 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 491 // where $d + 1 == 2^n and n == 32 492 // or $d + 1 == 2^n and n <= 32 and ZExt 493 // -> (MipsVExtractZExt $a, $b, $c) 494 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 495 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 496 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 497 498 if (!Mask) 499 return SDValue(); 500 501 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 502 503 if (Log2IfPositive <= 0) 504 return SDValue(); // Mask+1 is not a power of 2 505 506 SDValue Op0Op2 = Op0->getOperand(2); 507 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 508 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 509 unsigned Log2 = Log2IfPositive; 510 511 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 512 Log2 == ExtendTySize) { 513 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 514 return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), 515 Op0->getVTList(), 516 makeArrayRef(Ops, Op0->getNumOperands())); 517 } 518 } 519 520 return SDValue(); 521 } 522 523 // Determine if the specified node is a constant vector splat. 524 // 525 // Returns true and sets Imm if: 526 // * N is a ISD::BUILD_VECTOR representing a constant splat 527 // 528 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The 529 // differences are that it assumes the MSA has already been checked and the 530 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and 531 // must not be in order for binsri.d to be selectable). 532 static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { 533 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); 534 535 if (!Node) 536 return false; 537 538 APInt SplatValue, SplatUndef; 539 unsigned SplatBitSize; 540 bool HasAnyUndefs; 541 542 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 543 8, !IsLittleEndian)) 544 return false; 545 546 Imm = SplatValue; 547 548 return true; 549 } 550 551 // Test whether the given node is an all-ones build_vector. 552 static bool isVectorAllOnes(SDValue N) { 553 // Look through bitcasts. Endianness doesn't matter because we are looking 554 // for an all-ones value. 555 if (N->getOpcode() == ISD::BITCAST) 556 N = N->getOperand(0); 557 558 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 559 560 if (!BVN) 561 return false; 562 563 APInt SplatValue, SplatUndef; 564 unsigned SplatBitSize; 565 bool HasAnyUndefs; 566 567 // Endianness doesn't matter in this context because we are looking for 568 // an all-ones value. 569 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) 570 return SplatValue.isAllOnesValue(); 571 572 return false; 573 } 574 575 // Test whether N is the bitwise inverse of OfNode. 576 static bool isBitwiseInverse(SDValue N, SDValue OfNode) { 577 if (N->getOpcode() != ISD::XOR) 578 return false; 579 580 if (isVectorAllOnes(N->getOperand(0))) 581 return N->getOperand(1) == OfNode; 582 583 if (isVectorAllOnes(N->getOperand(1))) 584 return N->getOperand(0) == OfNode; 585 586 return false; 587 } 588 589 // Perform combines where ISD::OR is the root node. 590 // 591 // Performs the following transformations: 592 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) 593 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit 594 // vector type. 595 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 596 TargetLowering::DAGCombinerInfo &DCI, 597 const MipsSubtarget &Subtarget) { 598 if (!Subtarget.hasMSA()) 599 return SDValue(); 600 601 EVT Ty = N->getValueType(0); 602 603 if (!Ty.is128BitVector()) 604 return SDValue(); 605 606 SDValue Op0 = N->getOperand(0); 607 SDValue Op1 = N->getOperand(1); 608 609 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { 610 SDValue Op0Op0 = Op0->getOperand(0); 611 SDValue Op0Op1 = Op0->getOperand(1); 612 SDValue Op1Op0 = Op1->getOperand(0); 613 SDValue Op1Op1 = Op1->getOperand(1); 614 bool IsLittleEndian = !Subtarget.isLittle(); 615 616 SDValue IfSet, IfClr, Cond; 617 bool IsConstantMask = false; 618 APInt Mask, InvMask; 619 620 // If Op0Op0 is an appropriate mask, try to find it's inverse in either 621 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while 622 // looking. 623 // IfClr will be set if we find a valid match. 624 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { 625 Cond = Op0Op0; 626 IfSet = Op0Op1; 627 628 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 629 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 630 IfClr = Op1Op1; 631 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 632 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 633 IfClr = Op1Op0; 634 635 IsConstantMask = true; 636 } 637 638 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same 639 // thing again using this mask. 640 // IfClr will be set if we find a valid match. 641 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { 642 Cond = Op0Op1; 643 IfSet = Op0Op0; 644 645 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 646 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 647 IfClr = Op1Op1; 648 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 649 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 650 IfClr = Op1Op0; 651 652 IsConstantMask = true; 653 } 654 655 // If IfClr is not yet set, try looking for a non-constant match. 656 // IfClr will be set if we find a valid match amongst the eight 657 // possibilities. 658 if (!IfClr.getNode()) { 659 if (isBitwiseInverse(Op0Op0, Op1Op0)) { 660 Cond = Op1Op0; 661 IfSet = Op1Op1; 662 IfClr = Op0Op1; 663 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { 664 Cond = Op1Op0; 665 IfSet = Op1Op1; 666 IfClr = Op0Op0; 667 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { 668 Cond = Op1Op1; 669 IfSet = Op1Op0; 670 IfClr = Op0Op1; 671 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { 672 Cond = Op1Op1; 673 IfSet = Op1Op0; 674 IfClr = Op0Op0; 675 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { 676 Cond = Op0Op0; 677 IfSet = Op0Op1; 678 IfClr = Op1Op1; 679 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { 680 Cond = Op0Op0; 681 IfSet = Op0Op1; 682 IfClr = Op1Op0; 683 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { 684 Cond = Op0Op1; 685 IfSet = Op0Op0; 686 IfClr = Op1Op1; 687 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { 688 Cond = Op0Op1; 689 IfSet = Op0Op0; 690 IfClr = Op1Op0; 691 } 692 } 693 694 // At this point, IfClr will be set if we have a valid match. 695 if (!IfClr.getNode()) 696 return SDValue(); 697 698 assert(Cond.getNode() && IfSet.getNode()); 699 700 // Fold degenerate cases. 701 if (IsConstantMask) { 702 if (Mask.isAllOnesValue()) 703 return IfSet; 704 else if (Mask == 0) 705 return IfClr; 706 } 707 708 // Transform the DAG into an equivalent VSELECT. 709 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr); 710 } 711 712 return SDValue(); 713 } 714 715 static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, 716 SelectionDAG &DAG, 717 const MipsSubtarget &Subtarget) { 718 // Estimate the number of operations the below transform will turn a 719 // constant multiply into. The number is approximately equal to the minimal 720 // number of powers of two that constant can be broken down to by adding 721 // or subtracting them. 722 // 723 // If we have taken more than 12[1] / 8[2] steps to attempt the 724 // optimization for a native sized value, it is more than likely that this 725 // optimization will make things worse. 726 // 727 // [1] MIPS64 requires 6 instructions at most to materialize any constant, 728 // multiplication requires at least 4 cycles, but another cycle (or two) 729 // to retrieve the result from the HI/LO registers. 730 // 731 // [2] For MIPS32, more than 8 steps is expensive as the constant could be 732 // materialized in 2 instructions, multiplication requires at least 4 733 // cycles, but another cycle (or two) to retrieve the result from the 734 // HI/LO registers. 735 // 736 // TODO: 737 // - MaxSteps needs to consider the `VT` of the constant for the current 738 // target. 739 // - Consider to perform this optimization after type legalization. 740 // That allows to remove a workaround for types not supported natively. 741 // - Take in account `-Os, -Oz` flags because this optimization 742 // increases code size. 743 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12; 744 745 SmallVector<APInt, 16> WorkStack(1, C); 746 unsigned Steps = 0; 747 unsigned BitWidth = C.getBitWidth(); 748 749 while (!WorkStack.empty()) { 750 APInt Val = WorkStack.pop_back_val(); 751 752 if (Val == 0 || Val == 1) 753 continue; 754 755 if (Steps >= MaxSteps) 756 return false; 757 758 if (Val.isPowerOf2()) { 759 ++Steps; 760 continue; 761 } 762 763 APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); 764 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) 765 : APInt(BitWidth, 1) << C.ceilLogBase2(); 766 if ((Val - Floor).ule(Ceil - Val)) { 767 WorkStack.push_back(Floor); 768 WorkStack.push_back(Val - Floor); 769 } else { 770 WorkStack.push_back(Ceil); 771 WorkStack.push_back(Ceil - Val); 772 } 773 774 ++Steps; 775 } 776 777 // If the value being multiplied is not supported natively, we have to pay 778 // an additional legalization cost, conservatively assume an increase in the 779 // cost of 3 instructions per step. This values for this heuristic were 780 // determined experimentally. 781 unsigned RegisterSize = DAG.getTargetLoweringInfo() 782 .getRegisterType(*DAG.getContext(), VT) 783 .getSizeInBits(); 784 Steps *= (VT.getSizeInBits() != RegisterSize) * 3; 785 if (Steps > 27) 786 return false; 787 788 return true; 789 } 790 791 static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, 792 EVT ShiftTy, SelectionDAG &DAG) { 793 // Return 0. 794 if (C == 0) 795 return DAG.getConstant(0, DL, VT); 796 797 // Return x. 798 if (C == 1) 799 return X; 800 801 // If c is power of 2, return (shl x, log2(c)). 802 if (C.isPowerOf2()) 803 return DAG.getNode(ISD::SHL, DL, VT, X, 804 DAG.getConstant(C.logBase2(), DL, ShiftTy)); 805 806 unsigned BitWidth = C.getBitWidth(); 807 APInt Floor = APInt(BitWidth, 1) << C.logBase2(); 808 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) : 809 APInt(BitWidth, 1) << C.ceilLogBase2(); 810 811 // If |c - floor_c| <= |c - ceil_c|, 812 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 813 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 814 if ((C - Floor).ule(Ceil - C)) { 815 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 816 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 817 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 818 } 819 820 // If |c - floor_c| > |c - ceil_c|, 821 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 822 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 823 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 824 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 825 } 826 827 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 828 const TargetLowering::DAGCombinerInfo &DCI, 829 const MipsSETargetLowering *TL, 830 const MipsSubtarget &Subtarget) { 831 EVT VT = N->getValueType(0); 832 833 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 834 if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( 835 C->getAPIntValue(), VT, DAG, Subtarget)) 836 return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, 837 TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), 838 DAG); 839 840 return SDValue(N, 0); 841 } 842 843 static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 844 SelectionDAG &DAG, 845 const MipsSubtarget &Subtarget) { 846 // See if this is a vector splat immediate node. 847 APInt SplatValue, SplatUndef; 848 unsigned SplatBitSize; 849 bool HasAnyUndefs; 850 unsigned EltSize = Ty.getScalarSizeInBits(); 851 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 852 853 if (!Subtarget.hasDSP()) 854 return SDValue(); 855 856 if (!BV || 857 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 858 EltSize, !Subtarget.isLittle()) || 859 (SplatBitSize != EltSize) || 860 (SplatValue.getZExtValue() >= EltSize)) 861 return SDValue(); 862 863 SDLoc DL(N); 864 return DAG.getNode(Opc, DL, Ty, N->getOperand(0), 865 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32)); 866 } 867 868 static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 869 TargetLowering::DAGCombinerInfo &DCI, 870 const MipsSubtarget &Subtarget) { 871 EVT Ty = N->getValueType(0); 872 873 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 874 return SDValue(); 875 876 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 877 } 878 879 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 880 // constant splats into MipsISD::SHRA_DSP for DSPr2. 881 // 882 // Performs the following transformations: 883 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 884 // sign/zero-extension is completely overwritten by the new one performed by 885 // the ISD::SRA and ISD::SHL nodes. 886 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 887 // sequence. 888 // 889 // See performDSPShiftCombine for more information about the transformation 890 // used for DSPr2. 891 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 892 TargetLowering::DAGCombinerInfo &DCI, 893 const MipsSubtarget &Subtarget) { 894 EVT Ty = N->getValueType(0); 895 896 if (Subtarget.hasMSA()) { 897 SDValue Op0 = N->getOperand(0); 898 SDValue Op1 = N->getOperand(1); 899 900 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 901 // where $d + sizeof($c) == 32 902 // or $d + sizeof($c) <= 32 and SExt 903 // -> (MipsVExtractSExt $a, $b, $c) 904 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 905 SDValue Op0Op0 = Op0->getOperand(0); 906 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 907 908 if (!ShAmount) 909 return SDValue(); 910 911 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 912 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 913 return SDValue(); 914 915 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 916 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 917 918 if (TotalBits == 32 || 919 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 920 TotalBits <= 32)) { 921 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 922 Op0Op0->getOperand(2) }; 923 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), 924 Op0Op0->getVTList(), 925 makeArrayRef(Ops, Op0Op0->getNumOperands())); 926 } 927 } 928 } 929 930 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) 931 return SDValue(); 932 933 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 934 } 935 936 937 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 938 TargetLowering::DAGCombinerInfo &DCI, 939 const MipsSubtarget &Subtarget) { 940 EVT Ty = N->getValueType(0); 941 942 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) 943 return SDValue(); 944 945 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 946 } 947 948 static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 949 bool IsV216 = (Ty == MVT::v2i16); 950 951 switch (CC) { 952 case ISD::SETEQ: 953 case ISD::SETNE: return true; 954 case ISD::SETLT: 955 case ISD::SETLE: 956 case ISD::SETGT: 957 case ISD::SETGE: return IsV216; 958 case ISD::SETULT: 959 case ISD::SETULE: 960 case ISD::SETUGT: 961 case ISD::SETUGE: return !IsV216; 962 default: return false; 963 } 964 } 965 966 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 967 EVT Ty = N->getValueType(0); 968 969 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 970 return SDValue(); 971 972 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 973 return SDValue(); 974 975 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 976 N->getOperand(1), N->getOperand(2)); 977 } 978 979 static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 980 EVT Ty = N->getValueType(0); 981 982 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) { 983 SDValue SetCC = N->getOperand(0); 984 985 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 986 return SDValue(); 987 988 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 989 SetCC.getOperand(0), SetCC.getOperand(1), 990 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 991 } 992 993 return SDValue(); 994 } 995 996 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 997 const MipsSubtarget &Subtarget) { 998 EVT Ty = N->getValueType(0); 999 1000 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 1001 // Try the following combines: 1002 // (xor (or $a, $b), (build_vector allones)) 1003 // (xor (or $a, $b), (bitcast (build_vector allones))) 1004 SDValue Op0 = N->getOperand(0); 1005 SDValue Op1 = N->getOperand(1); 1006 SDValue NotOp; 1007 1008 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 1009 NotOp = Op1; 1010 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 1011 NotOp = Op0; 1012 else 1013 return SDValue(); 1014 1015 if (NotOp->getOpcode() == ISD::OR) 1016 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 1017 NotOp->getOperand(1)); 1018 } 1019 1020 return SDValue(); 1021 } 1022 1023 SDValue 1024 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1025 SelectionDAG &DAG = DCI.DAG; 1026 SDValue Val; 1027 1028 switch (N->getOpcode()) { 1029 case ISD::AND: 1030 Val = performANDCombine(N, DAG, DCI, Subtarget); 1031 break; 1032 case ISD::OR: 1033 Val = performORCombine(N, DAG, DCI, Subtarget); 1034 break; 1035 case ISD::MUL: 1036 return performMULCombine(N, DAG, DCI, this, Subtarget); 1037 case ISD::SHL: 1038 Val = performSHLCombine(N, DAG, DCI, Subtarget); 1039 break; 1040 case ISD::SRA: 1041 return performSRACombine(N, DAG, DCI, Subtarget); 1042 case ISD::SRL: 1043 return performSRLCombine(N, DAG, DCI, Subtarget); 1044 case ISD::VSELECT: 1045 return performVSELECTCombine(N, DAG); 1046 case ISD::XOR: 1047 Val = performXORCombine(N, DAG, Subtarget); 1048 break; 1049 case ISD::SETCC: 1050 Val = performSETCCCombine(N, DAG); 1051 break; 1052 } 1053 1054 if (Val.getNode()) { 1055 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; 1056 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n"; 1057 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n"); 1058 return Val; 1059 } 1060 1061 return MipsTargetLowering::PerformDAGCombine(N, DCI); 1062 } 1063 1064 MachineBasicBlock * 1065 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 1066 MachineBasicBlock *BB) const { 1067 switch (MI.getOpcode()) { 1068 default: 1069 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 1070 case Mips::BPOSGE32_PSEUDO: 1071 return emitBPOSGE32(MI, BB); 1072 case Mips::SNZ_B_PSEUDO: 1073 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 1074 case Mips::SNZ_H_PSEUDO: 1075 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 1076 case Mips::SNZ_W_PSEUDO: 1077 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 1078 case Mips::SNZ_D_PSEUDO: 1079 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 1080 case Mips::SNZ_V_PSEUDO: 1081 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 1082 case Mips::SZ_B_PSEUDO: 1083 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 1084 case Mips::SZ_H_PSEUDO: 1085 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 1086 case Mips::SZ_W_PSEUDO: 1087 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 1088 case Mips::SZ_D_PSEUDO: 1089 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 1090 case Mips::SZ_V_PSEUDO: 1091 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 1092 case Mips::COPY_FW_PSEUDO: 1093 return emitCOPY_FW(MI, BB); 1094 case Mips::COPY_FD_PSEUDO: 1095 return emitCOPY_FD(MI, BB); 1096 case Mips::INSERT_FW_PSEUDO: 1097 return emitINSERT_FW(MI, BB); 1098 case Mips::INSERT_FD_PSEUDO: 1099 return emitINSERT_FD(MI, BB); 1100 case Mips::INSERT_B_VIDX_PSEUDO: 1101 case Mips::INSERT_B_VIDX64_PSEUDO: 1102 return emitINSERT_DF_VIDX(MI, BB, 1, false); 1103 case Mips::INSERT_H_VIDX_PSEUDO: 1104 case Mips::INSERT_H_VIDX64_PSEUDO: 1105 return emitINSERT_DF_VIDX(MI, BB, 2, false); 1106 case Mips::INSERT_W_VIDX_PSEUDO: 1107 case Mips::INSERT_W_VIDX64_PSEUDO: 1108 return emitINSERT_DF_VIDX(MI, BB, 4, false); 1109 case Mips::INSERT_D_VIDX_PSEUDO: 1110 case Mips::INSERT_D_VIDX64_PSEUDO: 1111 return emitINSERT_DF_VIDX(MI, BB, 8, false); 1112 case Mips::INSERT_FW_VIDX_PSEUDO: 1113 case Mips::INSERT_FW_VIDX64_PSEUDO: 1114 return emitINSERT_DF_VIDX(MI, BB, 4, true); 1115 case Mips::INSERT_FD_VIDX_PSEUDO: 1116 case Mips::INSERT_FD_VIDX64_PSEUDO: 1117 return emitINSERT_DF_VIDX(MI, BB, 8, true); 1118 case Mips::FILL_FW_PSEUDO: 1119 return emitFILL_FW(MI, BB); 1120 case Mips::FILL_FD_PSEUDO: 1121 return emitFILL_FD(MI, BB); 1122 case Mips::FEXP2_W_1_PSEUDO: 1123 return emitFEXP2_W_1(MI, BB); 1124 case Mips::FEXP2_D_1_PSEUDO: 1125 return emitFEXP2_D_1(MI, BB); 1126 case Mips::ST_F16: 1127 return emitST_F16_PSEUDO(MI, BB); 1128 case Mips::LD_F16: 1129 return emitLD_F16_PSEUDO(MI, BB); 1130 case Mips::MSA_FP_EXTEND_W_PSEUDO: 1131 return emitFPEXTEND_PSEUDO(MI, BB, false); 1132 case Mips::MSA_FP_ROUND_W_PSEUDO: 1133 return emitFPROUND_PSEUDO(MI, BB, false); 1134 case Mips::MSA_FP_EXTEND_D_PSEUDO: 1135 return emitFPEXTEND_PSEUDO(MI, BB, true); 1136 case Mips::MSA_FP_ROUND_D_PSEUDO: 1137 return emitFPROUND_PSEUDO(MI, BB, true); 1138 } 1139 } 1140 1141 bool MipsSETargetLowering::isEligibleForTailCallOptimization( 1142 const CCState &CCInfo, unsigned NextStackOffset, 1143 const MipsFunctionInfo &FI) const { 1144 if (!UseMipsTailCalls) 1145 return false; 1146 1147 // Exception has to be cleared with eret. 1148 if (FI.isISR()) 1149 return false; 1150 1151 // Return false if either the callee or caller has a byval argument. 1152 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) 1153 return false; 1154 1155 // Return true if the callee's argument area is no larger than the 1156 // caller's. 1157 return NextStackOffset <= FI.getIncomingArgSize(); 1158 } 1159 1160 void MipsSETargetLowering:: 1161 getOpndList(SmallVectorImpl<SDValue> &Ops, 1162 std::deque<std::pair<unsigned, SDValue>> &RegsToPass, 1163 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 1164 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, 1165 SDValue Chain) const { 1166 Ops.push_back(Callee); 1167 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 1168 InternalLinkage, IsCallReloc, CLI, Callee, 1169 Chain); 1170 } 1171 1172 SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1173 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 1174 1175 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1176 return MipsTargetLowering::lowerLOAD(Op, DAG); 1177 1178 // Replace a double precision load with two i32 loads and a buildpair64. 1179 SDLoc DL(Op); 1180 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1181 EVT PtrVT = Ptr.getValueType(); 1182 1183 // i32 load from lower address. 1184 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(), 1185 Nd.getAlignment(), Nd.getMemOperand()->getFlags()); 1186 1187 // i32 load from higher address. 1188 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1189 SDValue Hi = DAG.getLoad( 1190 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(), 1191 std::min(Nd.getAlignment(), 4U), Nd.getMemOperand()->getFlags()); 1192 1193 if (!Subtarget.isLittle()) 1194 std::swap(Lo, Hi); 1195 1196 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1197 SDValue Ops[2] = {BP, Hi.getValue(1)}; 1198 return DAG.getMergeValues(Ops, DL); 1199 } 1200 1201 SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1202 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 1203 1204 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1205 return MipsTargetLowering::lowerSTORE(Op, DAG); 1206 1207 // Replace a double precision store with two extractelement64s and i32 stores. 1208 SDLoc DL(Op); 1209 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1210 EVT PtrVT = Ptr.getValueType(); 1211 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1212 Val, DAG.getConstant(0, DL, MVT::i32)); 1213 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1214 Val, DAG.getConstant(1, DL, MVT::i32)); 1215 1216 if (!Subtarget.isLittle()) 1217 std::swap(Lo, Hi); 1218 1219 // i32 store to lower address. 1220 Chain = 1221 DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlignment(), 1222 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1223 1224 // i32 store to higher address. 1225 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1226 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 1227 std::min(Nd.getAlignment(), 4U), 1228 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1229 } 1230 1231 SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op, 1232 SelectionDAG &DAG) const { 1233 SDLoc DL(Op); 1234 MVT Src = Op.getOperand(0).getValueType().getSimpleVT(); 1235 MVT Dest = Op.getValueType().getSimpleVT(); 1236 1237 // Bitcast i64 to double. 1238 if (Src == MVT::i64 && Dest == MVT::f64) { 1239 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, 1240 Op.getOperand(0), DAG.getIntPtrConstant(0, DL)); 1241 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, 1242 Op.getOperand(0), DAG.getIntPtrConstant(1, DL)); 1243 return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1244 } 1245 1246 // Bitcast double to i64. 1247 if (Src == MVT::f64 && Dest == MVT::i64) { 1248 SDValue Lo = 1249 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), 1250 DAG.getConstant(0, DL, MVT::i32)); 1251 SDValue Hi = 1252 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), 1253 DAG.getConstant(1, DL, MVT::i32)); 1254 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1255 } 1256 1257 // Skip other cases of bitcast and use default lowering. 1258 return SDValue(); 1259 } 1260 1261 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 1262 bool HasLo, bool HasHi, 1263 SelectionDAG &DAG) const { 1264 // MIPS32r6/MIPS64r6 removed accumulator based multiplies. 1265 assert(!Subtarget.hasMips32r6()); 1266 1267 EVT Ty = Op.getOperand(0).getValueType(); 1268 SDLoc DL(Op); 1269 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 1270 Op.getOperand(0), Op.getOperand(1)); 1271 SDValue Lo, Hi; 1272 1273 if (HasLo) 1274 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); 1275 if (HasHi) 1276 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); 1277 1278 if (!HasLo || !HasHi) 1279 return HasLo ? Lo : Hi; 1280 1281 SDValue Vals[] = { Lo, Hi }; 1282 return DAG.getMergeValues(Vals, DL); 1283 } 1284 1285 static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { 1286 SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1287 DAG.getConstant(0, DL, MVT::i32)); 1288 SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1289 DAG.getConstant(1, DL, MVT::i32)); 1290 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); 1291 } 1292 1293 static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { 1294 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); 1295 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); 1296 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1297 } 1298 1299 // This function expands mips intrinsic nodes which have 64-bit input operands 1300 // or output values. 1301 // 1302 // out64 = intrinsic-node in64 1303 // => 1304 // lo = copy (extract-element (in64, 0)) 1305 // hi = copy (extract-element (in64, 1)) 1306 // mips-specific-node 1307 // v0 = copy lo 1308 // v1 = copy hi 1309 // out64 = merge-values (v0, v1) 1310 // 1311 static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1312 SDLoc DL(Op); 1313 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 1314 SmallVector<SDValue, 3> Ops; 1315 unsigned OpNo = 0; 1316 1317 // See if Op has a chain input. 1318 if (HasChainIn) 1319 Ops.push_back(Op->getOperand(OpNo++)); 1320 1321 // The next operand is the intrinsic opcode. 1322 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 1323 1324 // See if the next operand has type i64. 1325 SDValue Opnd = Op->getOperand(++OpNo), In64; 1326 1327 if (Opnd.getValueType() == MVT::i64) 1328 In64 = initAccumulator(Opnd, DL, DAG); 1329 else 1330 Ops.push_back(Opnd); 1331 1332 // Push the remaining operands. 1333 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1334 Ops.push_back(Op->getOperand(OpNo)); 1335 1336 // Add In64 to the end of the list. 1337 if (In64.getNode()) 1338 Ops.push_back(In64); 1339 1340 // Scan output. 1341 SmallVector<EVT, 2> ResTys; 1342 1343 for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); 1344 I != E; ++I) 1345 ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); 1346 1347 // Create node. 1348 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); 1349 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1350 1351 if (!HasChainIn) 1352 return Out; 1353 1354 assert(Val->getValueType(1) == MVT::Other); 1355 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1356 return DAG.getMergeValues(Vals, DL); 1357 } 1358 1359 // Lower an MSA copy intrinsic into the specified SelectionDAG node 1360 static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1361 SDLoc DL(Op); 1362 SDValue Vec = Op->getOperand(1); 1363 SDValue Idx = Op->getOperand(2); 1364 EVT ResTy = Op->getValueType(0); 1365 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1366 1367 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1368 DAG.getValueType(EltTy)); 1369 1370 return Result; 1371 } 1372 1373 static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { 1374 EVT ResVecTy = Op->getValueType(0); 1375 EVT ViaVecTy = ResVecTy; 1376 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1377 SDLoc DL(Op); 1378 1379 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and 1380 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating 1381 // lanes. 1382 SDValue LaneA = Op->getOperand(OpNr); 1383 SDValue LaneB; 1384 1385 if (ResVecTy == MVT::v2i64) { 1386 // In case of the index being passed as an immediate value, set the upper 1387 // lane to 0 so that the splati.d instruction can be matched. 1388 if (isa<ConstantSDNode>(LaneA)) 1389 LaneB = DAG.getConstant(0, DL, MVT::i32); 1390 // Having the index passed in a register, set the upper lane to the same 1391 // value as the lower - this results in the BUILD_VECTOR node not being 1392 // expanded through stack. This way we are able to pattern match the set of 1393 // nodes created here to splat.d. 1394 else 1395 LaneB = LaneA; 1396 ViaVecTy = MVT::v4i32; 1397 if(BigEndian) 1398 std::swap(LaneA, LaneB); 1399 } else 1400 LaneB = LaneA; 1401 1402 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, 1403 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; 1404 1405 SDValue Result = DAG.getBuildVector( 1406 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1407 1408 if (ViaVecTy != ResVecTy) { 1409 SDValue One = DAG.getConstant(1, DL, ViaVecTy); 1410 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, 1411 DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); 1412 } 1413 1414 return Result; 1415 } 1416 1417 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, 1418 bool IsSigned = false) { 1419 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp)); 1420 return DAG.getConstant( 1421 APInt(Op->getValueType(0).getScalarType().getSizeInBits(), 1422 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), 1423 SDLoc(Op), Op->getValueType(0)); 1424 } 1425 1426 static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, 1427 bool BigEndian, SelectionDAG &DAG) { 1428 EVT ViaVecTy = VecTy; 1429 SDValue SplatValueA = SplatValue; 1430 SDValue SplatValueB = SplatValue; 1431 SDLoc DL(SplatValue); 1432 1433 if (VecTy == MVT::v2i64) { 1434 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. 1435 ViaVecTy = MVT::v4i32; 1436 1437 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); 1438 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, 1439 DAG.getConstant(32, DL, MVT::i32)); 1440 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); 1441 } 1442 1443 // We currently hold the parts in little endian order. Swap them if 1444 // necessary. 1445 if (BigEndian) 1446 std::swap(SplatValueA, SplatValueB); 1447 1448 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1449 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1450 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1451 SplatValueA, SplatValueB, SplatValueA, SplatValueB }; 1452 1453 SDValue Result = DAG.getBuildVector( 1454 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1455 1456 if (VecTy != ViaVecTy) 1457 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); 1458 1459 return Result; 1460 } 1461 1462 static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, 1463 unsigned Opc, SDValue Imm, 1464 bool BigEndian) { 1465 EVT VecTy = Op->getValueType(0); 1466 SDValue Exp2Imm; 1467 SDLoc DL(Op); 1468 1469 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it 1470 // here for now. 1471 if (VecTy == MVT::v2i64) { 1472 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) { 1473 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); 1474 1475 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL, 1476 MVT::i32); 1477 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32); 1478 1479 if (BigEndian) 1480 std::swap(BitImmLoOp, BitImmHiOp); 1481 1482 Exp2Imm = DAG.getNode( 1483 ISD::BITCAST, DL, MVT::v2i64, 1484 DAG.getBuildVector(MVT::v4i32, DL, 1485 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp})); 1486 } 1487 } 1488 1489 if (!Exp2Imm.getNode()) { 1490 // We couldnt constant fold, do a vector shift instead 1491 1492 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since 1493 // only values 0-63 are valid. 1494 if (VecTy == MVT::v2i64) 1495 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm); 1496 1497 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); 1498 1499 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy), 1500 Exp2Imm); 1501 } 1502 1503 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); 1504 } 1505 1506 static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { 1507 SDLoc DL(Op); 1508 EVT ResTy = Op->getValueType(0); 1509 SDValue Vec = Op->getOperand(2); 1510 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1511 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; 1512 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, 1513 DL, ResEltTy); 1514 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG); 1515 1516 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); 1517 } 1518 1519 static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { 1520 EVT ResTy = Op->getValueType(0); 1521 SDLoc DL(Op); 1522 SDValue One = DAG.getConstant(1, DL, ResTy); 1523 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); 1524 1525 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), 1526 DAG.getNOT(DL, Bit, ResTy)); 1527 } 1528 1529 static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { 1530 SDLoc DL(Op); 1531 EVT ResTy = Op->getValueType(0); 1532 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) 1533 << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue(); 1534 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); 1535 1536 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); 1537 } 1538 1539 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1540 SelectionDAG &DAG) const { 1541 SDLoc DL(Op); 1542 unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue(); 1543 switch (Intrinsic) { 1544 default: 1545 return SDValue(); 1546 case Intrinsic::mips_shilo: 1547 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1548 case Intrinsic::mips_dpau_h_qbl: 1549 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1550 case Intrinsic::mips_dpau_h_qbr: 1551 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1552 case Intrinsic::mips_dpsu_h_qbl: 1553 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1554 case Intrinsic::mips_dpsu_h_qbr: 1555 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1556 case Intrinsic::mips_dpa_w_ph: 1557 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1558 case Intrinsic::mips_dps_w_ph: 1559 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1560 case Intrinsic::mips_dpax_w_ph: 1561 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1562 case Intrinsic::mips_dpsx_w_ph: 1563 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1564 case Intrinsic::mips_mulsa_w_ph: 1565 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1566 case Intrinsic::mips_mult: 1567 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1568 case Intrinsic::mips_multu: 1569 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1570 case Intrinsic::mips_madd: 1571 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1572 case Intrinsic::mips_maddu: 1573 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1574 case Intrinsic::mips_msub: 1575 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1576 case Intrinsic::mips_msubu: 1577 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1578 case Intrinsic::mips_addv_b: 1579 case Intrinsic::mips_addv_h: 1580 case Intrinsic::mips_addv_w: 1581 case Intrinsic::mips_addv_d: 1582 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1583 Op->getOperand(2)); 1584 case Intrinsic::mips_addvi_b: 1585 case Intrinsic::mips_addvi_h: 1586 case Intrinsic::mips_addvi_w: 1587 case Intrinsic::mips_addvi_d: 1588 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1589 lowerMSASplatImm(Op, 2, DAG)); 1590 case Intrinsic::mips_and_v: 1591 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1592 Op->getOperand(2)); 1593 case Intrinsic::mips_andi_b: 1594 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1595 lowerMSASplatImm(Op, 2, DAG)); 1596 case Intrinsic::mips_bclr_b: 1597 case Intrinsic::mips_bclr_h: 1598 case Intrinsic::mips_bclr_w: 1599 case Intrinsic::mips_bclr_d: 1600 return lowerMSABitClear(Op, DAG); 1601 case Intrinsic::mips_bclri_b: 1602 case Intrinsic::mips_bclri_h: 1603 case Intrinsic::mips_bclri_w: 1604 case Intrinsic::mips_bclri_d: 1605 return lowerMSABitClearImm(Op, DAG); 1606 case Intrinsic::mips_binsli_b: 1607 case Intrinsic::mips_binsli_h: 1608 case Intrinsic::mips_binsli_w: 1609 case Intrinsic::mips_binsli_d: { 1610 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) 1611 EVT VecTy = Op->getValueType(0); 1612 EVT EltTy = VecTy.getVectorElementType(); 1613 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1614 report_fatal_error("Immediate out of range"); 1615 APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), 1616 Op->getConstantOperandVal(3) + 1); 1617 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1618 DAG.getConstant(Mask, DL, VecTy, true), 1619 Op->getOperand(2), Op->getOperand(1)); 1620 } 1621 case Intrinsic::mips_binsri_b: 1622 case Intrinsic::mips_binsri_h: 1623 case Intrinsic::mips_binsri_w: 1624 case Intrinsic::mips_binsri_d: { 1625 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) 1626 EVT VecTy = Op->getValueType(0); 1627 EVT EltTy = VecTy.getVectorElementType(); 1628 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1629 report_fatal_error("Immediate out of range"); 1630 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), 1631 Op->getConstantOperandVal(3) + 1); 1632 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1633 DAG.getConstant(Mask, DL, VecTy, true), 1634 Op->getOperand(2), Op->getOperand(1)); 1635 } 1636 case Intrinsic::mips_bmnz_v: 1637 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1638 Op->getOperand(2), Op->getOperand(1)); 1639 case Intrinsic::mips_bmnzi_b: 1640 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1641 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), 1642 Op->getOperand(1)); 1643 case Intrinsic::mips_bmz_v: 1644 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1645 Op->getOperand(1), Op->getOperand(2)); 1646 case Intrinsic::mips_bmzi_b: 1647 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1648 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), 1649 Op->getOperand(2)); 1650 case Intrinsic::mips_bneg_b: 1651 case Intrinsic::mips_bneg_h: 1652 case Intrinsic::mips_bneg_w: 1653 case Intrinsic::mips_bneg_d: { 1654 EVT VecTy = Op->getValueType(0); 1655 SDValue One = DAG.getConstant(1, DL, VecTy); 1656 1657 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), 1658 DAG.getNode(ISD::SHL, DL, VecTy, One, 1659 truncateVecElts(Op, DAG))); 1660 } 1661 case Intrinsic::mips_bnegi_b: 1662 case Intrinsic::mips_bnegi_h: 1663 case Intrinsic::mips_bnegi_w: 1664 case Intrinsic::mips_bnegi_d: 1665 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), 1666 !Subtarget.isLittle()); 1667 case Intrinsic::mips_bnz_b: 1668 case Intrinsic::mips_bnz_h: 1669 case Intrinsic::mips_bnz_w: 1670 case Intrinsic::mips_bnz_d: 1671 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1672 Op->getOperand(1)); 1673 case Intrinsic::mips_bnz_v: 1674 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1675 Op->getOperand(1)); 1676 case Intrinsic::mips_bsel_v: 1677 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1678 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1679 Op->getOperand(1), Op->getOperand(3), 1680 Op->getOperand(2)); 1681 case Intrinsic::mips_bseli_b: 1682 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1683 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1684 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG), 1685 Op->getOperand(2)); 1686 case Intrinsic::mips_bset_b: 1687 case Intrinsic::mips_bset_h: 1688 case Intrinsic::mips_bset_w: 1689 case Intrinsic::mips_bset_d: { 1690 EVT VecTy = Op->getValueType(0); 1691 SDValue One = DAG.getConstant(1, DL, VecTy); 1692 1693 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), 1694 DAG.getNode(ISD::SHL, DL, VecTy, One, 1695 truncateVecElts(Op, DAG))); 1696 } 1697 case Intrinsic::mips_bseti_b: 1698 case Intrinsic::mips_bseti_h: 1699 case Intrinsic::mips_bseti_w: 1700 case Intrinsic::mips_bseti_d: 1701 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), 1702 !Subtarget.isLittle()); 1703 case Intrinsic::mips_bz_b: 1704 case Intrinsic::mips_bz_h: 1705 case Intrinsic::mips_bz_w: 1706 case Intrinsic::mips_bz_d: 1707 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1708 Op->getOperand(1)); 1709 case Intrinsic::mips_bz_v: 1710 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1711 Op->getOperand(1)); 1712 case Intrinsic::mips_ceq_b: 1713 case Intrinsic::mips_ceq_h: 1714 case Intrinsic::mips_ceq_w: 1715 case Intrinsic::mips_ceq_d: 1716 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1717 Op->getOperand(2), ISD::SETEQ); 1718 case Intrinsic::mips_ceqi_b: 1719 case Intrinsic::mips_ceqi_h: 1720 case Intrinsic::mips_ceqi_w: 1721 case Intrinsic::mips_ceqi_d: 1722 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1723 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ); 1724 case Intrinsic::mips_cle_s_b: 1725 case Intrinsic::mips_cle_s_h: 1726 case Intrinsic::mips_cle_s_w: 1727 case Intrinsic::mips_cle_s_d: 1728 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1729 Op->getOperand(2), ISD::SETLE); 1730 case Intrinsic::mips_clei_s_b: 1731 case Intrinsic::mips_clei_s_h: 1732 case Intrinsic::mips_clei_s_w: 1733 case Intrinsic::mips_clei_s_d: 1734 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1735 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE); 1736 case Intrinsic::mips_cle_u_b: 1737 case Intrinsic::mips_cle_u_h: 1738 case Intrinsic::mips_cle_u_w: 1739 case Intrinsic::mips_cle_u_d: 1740 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1741 Op->getOperand(2), ISD::SETULE); 1742 case Intrinsic::mips_clei_u_b: 1743 case Intrinsic::mips_clei_u_h: 1744 case Intrinsic::mips_clei_u_w: 1745 case Intrinsic::mips_clei_u_d: 1746 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1747 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1748 case Intrinsic::mips_clt_s_b: 1749 case Intrinsic::mips_clt_s_h: 1750 case Intrinsic::mips_clt_s_w: 1751 case Intrinsic::mips_clt_s_d: 1752 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1753 Op->getOperand(2), ISD::SETLT); 1754 case Intrinsic::mips_clti_s_b: 1755 case Intrinsic::mips_clti_s_h: 1756 case Intrinsic::mips_clti_s_w: 1757 case Intrinsic::mips_clti_s_d: 1758 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1759 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT); 1760 case Intrinsic::mips_clt_u_b: 1761 case Intrinsic::mips_clt_u_h: 1762 case Intrinsic::mips_clt_u_w: 1763 case Intrinsic::mips_clt_u_d: 1764 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1765 Op->getOperand(2), ISD::SETULT); 1766 case Intrinsic::mips_clti_u_b: 1767 case Intrinsic::mips_clti_u_h: 1768 case Intrinsic::mips_clti_u_w: 1769 case Intrinsic::mips_clti_u_d: 1770 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1771 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1772 case Intrinsic::mips_copy_s_b: 1773 case Intrinsic::mips_copy_s_h: 1774 case Intrinsic::mips_copy_s_w: 1775 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1776 case Intrinsic::mips_copy_s_d: 1777 if (Subtarget.hasMips64()) 1778 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. 1779 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1780 else { 1781 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1782 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1783 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1784 Op->getValueType(0), Op->getOperand(1), 1785 Op->getOperand(2)); 1786 } 1787 case Intrinsic::mips_copy_u_b: 1788 case Intrinsic::mips_copy_u_h: 1789 case Intrinsic::mips_copy_u_w: 1790 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1791 case Intrinsic::mips_copy_u_d: 1792 if (Subtarget.hasMips64()) 1793 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. 1794 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1795 else { 1796 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1797 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1798 // Note: When i64 is illegal, this results in copy_s.w instructions 1799 // instead of copy_u.w instructions. This makes no difference to the 1800 // behaviour since i64 is only illegal when the register file is 32-bit. 1801 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1802 Op->getValueType(0), Op->getOperand(1), 1803 Op->getOperand(2)); 1804 } 1805 case Intrinsic::mips_div_s_b: 1806 case Intrinsic::mips_div_s_h: 1807 case Intrinsic::mips_div_s_w: 1808 case Intrinsic::mips_div_s_d: 1809 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1810 Op->getOperand(2)); 1811 case Intrinsic::mips_div_u_b: 1812 case Intrinsic::mips_div_u_h: 1813 case Intrinsic::mips_div_u_w: 1814 case Intrinsic::mips_div_u_d: 1815 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1816 Op->getOperand(2)); 1817 case Intrinsic::mips_fadd_w: 1818 case Intrinsic::mips_fadd_d: 1819 // TODO: If intrinsics have fast-math-flags, propagate them. 1820 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1821 Op->getOperand(2)); 1822 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1823 case Intrinsic::mips_fceq_w: 1824 case Intrinsic::mips_fceq_d: 1825 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1826 Op->getOperand(2), ISD::SETOEQ); 1827 case Intrinsic::mips_fcle_w: 1828 case Intrinsic::mips_fcle_d: 1829 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1830 Op->getOperand(2), ISD::SETOLE); 1831 case Intrinsic::mips_fclt_w: 1832 case Intrinsic::mips_fclt_d: 1833 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1834 Op->getOperand(2), ISD::SETOLT); 1835 case Intrinsic::mips_fcne_w: 1836 case Intrinsic::mips_fcne_d: 1837 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1838 Op->getOperand(2), ISD::SETONE); 1839 case Intrinsic::mips_fcor_w: 1840 case Intrinsic::mips_fcor_d: 1841 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1842 Op->getOperand(2), ISD::SETO); 1843 case Intrinsic::mips_fcueq_w: 1844 case Intrinsic::mips_fcueq_d: 1845 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1846 Op->getOperand(2), ISD::SETUEQ); 1847 case Intrinsic::mips_fcule_w: 1848 case Intrinsic::mips_fcule_d: 1849 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1850 Op->getOperand(2), ISD::SETULE); 1851 case Intrinsic::mips_fcult_w: 1852 case Intrinsic::mips_fcult_d: 1853 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1854 Op->getOperand(2), ISD::SETULT); 1855 case Intrinsic::mips_fcun_w: 1856 case Intrinsic::mips_fcun_d: 1857 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1858 Op->getOperand(2), ISD::SETUO); 1859 case Intrinsic::mips_fcune_w: 1860 case Intrinsic::mips_fcune_d: 1861 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1862 Op->getOperand(2), ISD::SETUNE); 1863 case Intrinsic::mips_fdiv_w: 1864 case Intrinsic::mips_fdiv_d: 1865 // TODO: If intrinsics have fast-math-flags, propagate them. 1866 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1867 Op->getOperand(2)); 1868 case Intrinsic::mips_ffint_u_w: 1869 case Intrinsic::mips_ffint_u_d: 1870 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), 1871 Op->getOperand(1)); 1872 case Intrinsic::mips_ffint_s_w: 1873 case Intrinsic::mips_ffint_s_d: 1874 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), 1875 Op->getOperand(1)); 1876 case Intrinsic::mips_fill_b: 1877 case Intrinsic::mips_fill_h: 1878 case Intrinsic::mips_fill_w: 1879 case Intrinsic::mips_fill_d: { 1880 EVT ResTy = Op->getValueType(0); 1881 SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(), 1882 Op->getOperand(1)); 1883 1884 // If ResTy is v2i64 then the type legalizer will break this node down into 1885 // an equivalent v4i32. 1886 return DAG.getBuildVector(ResTy, DL, Ops); 1887 } 1888 case Intrinsic::mips_fexp2_w: 1889 case Intrinsic::mips_fexp2_d: { 1890 // TODO: If intrinsics have fast-math-flags, propagate them. 1891 EVT ResTy = Op->getValueType(0); 1892 return DAG.getNode( 1893 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), 1894 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); 1895 } 1896 case Intrinsic::mips_flog2_w: 1897 case Intrinsic::mips_flog2_d: 1898 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1899 case Intrinsic::mips_fmadd_w: 1900 case Intrinsic::mips_fmadd_d: 1901 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), 1902 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1903 case Intrinsic::mips_fmul_w: 1904 case Intrinsic::mips_fmul_d: 1905 // TODO: If intrinsics have fast-math-flags, propagate them. 1906 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1907 Op->getOperand(2)); 1908 case Intrinsic::mips_fmsub_w: 1909 case Intrinsic::mips_fmsub_d: { 1910 // TODO: If intrinsics have fast-math-flags, propagate them. 1911 return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0), 1912 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1913 } 1914 case Intrinsic::mips_frint_w: 1915 case Intrinsic::mips_frint_d: 1916 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1917 case Intrinsic::mips_fsqrt_w: 1918 case Intrinsic::mips_fsqrt_d: 1919 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1920 case Intrinsic::mips_fsub_w: 1921 case Intrinsic::mips_fsub_d: 1922 // TODO: If intrinsics have fast-math-flags, propagate them. 1923 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1924 Op->getOperand(2)); 1925 case Intrinsic::mips_ftrunc_u_w: 1926 case Intrinsic::mips_ftrunc_u_d: 1927 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), 1928 Op->getOperand(1)); 1929 case Intrinsic::mips_ftrunc_s_w: 1930 case Intrinsic::mips_ftrunc_s_d: 1931 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), 1932 Op->getOperand(1)); 1933 case Intrinsic::mips_ilvev_b: 1934 case Intrinsic::mips_ilvev_h: 1935 case Intrinsic::mips_ilvev_w: 1936 case Intrinsic::mips_ilvev_d: 1937 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1938 Op->getOperand(1), Op->getOperand(2)); 1939 case Intrinsic::mips_ilvl_b: 1940 case Intrinsic::mips_ilvl_h: 1941 case Intrinsic::mips_ilvl_w: 1942 case Intrinsic::mips_ilvl_d: 1943 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1944 Op->getOperand(1), Op->getOperand(2)); 1945 case Intrinsic::mips_ilvod_b: 1946 case Intrinsic::mips_ilvod_h: 1947 case Intrinsic::mips_ilvod_w: 1948 case Intrinsic::mips_ilvod_d: 1949 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 1950 Op->getOperand(1), Op->getOperand(2)); 1951 case Intrinsic::mips_ilvr_b: 1952 case Intrinsic::mips_ilvr_h: 1953 case Intrinsic::mips_ilvr_w: 1954 case Intrinsic::mips_ilvr_d: 1955 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 1956 Op->getOperand(1), Op->getOperand(2)); 1957 case Intrinsic::mips_insert_b: 1958 case Intrinsic::mips_insert_h: 1959 case Intrinsic::mips_insert_w: 1960 case Intrinsic::mips_insert_d: 1961 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1962 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 1963 case Intrinsic::mips_insve_b: 1964 case Intrinsic::mips_insve_h: 1965 case Intrinsic::mips_insve_w: 1966 case Intrinsic::mips_insve_d: { 1967 // Report an error for out of range values. 1968 int64_t Max; 1969 switch (Intrinsic) { 1970 case Intrinsic::mips_insve_b: Max = 15; break; 1971 case Intrinsic::mips_insve_h: Max = 7; break; 1972 case Intrinsic::mips_insve_w: Max = 3; break; 1973 case Intrinsic::mips_insve_d: Max = 1; break; 1974 default: llvm_unreachable("Unmatched intrinsic"); 1975 } 1976 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 1977 if (Value < 0 || Value > Max) 1978 report_fatal_error("Immediate out of range"); 1979 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), 1980 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), 1981 DAG.getConstant(0, DL, MVT::i32)); 1982 } 1983 case Intrinsic::mips_ldi_b: 1984 case Intrinsic::mips_ldi_h: 1985 case Intrinsic::mips_ldi_w: 1986 case Intrinsic::mips_ldi_d: 1987 return lowerMSASplatImm(Op, 1, DAG, true); 1988 case Intrinsic::mips_lsa: 1989 case Intrinsic::mips_dlsa: { 1990 EVT ResTy = Op->getValueType(0); 1991 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1992 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, 1993 Op->getOperand(2), Op->getOperand(3))); 1994 } 1995 case Intrinsic::mips_maddv_b: 1996 case Intrinsic::mips_maddv_h: 1997 case Intrinsic::mips_maddv_w: 1998 case Intrinsic::mips_maddv_d: { 1999 EVT ResTy = Op->getValueType(0); 2000 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 2001 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2002 Op->getOperand(2), Op->getOperand(3))); 2003 } 2004 case Intrinsic::mips_max_s_b: 2005 case Intrinsic::mips_max_s_h: 2006 case Intrinsic::mips_max_s_w: 2007 case Intrinsic::mips_max_s_d: 2008 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 2009 Op->getOperand(1), Op->getOperand(2)); 2010 case Intrinsic::mips_max_u_b: 2011 case Intrinsic::mips_max_u_h: 2012 case Intrinsic::mips_max_u_w: 2013 case Intrinsic::mips_max_u_d: 2014 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 2015 Op->getOperand(1), Op->getOperand(2)); 2016 case Intrinsic::mips_maxi_s_b: 2017 case Intrinsic::mips_maxi_s_h: 2018 case Intrinsic::mips_maxi_s_w: 2019 case Intrinsic::mips_maxi_s_d: 2020 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 2021 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2022 case Intrinsic::mips_maxi_u_b: 2023 case Intrinsic::mips_maxi_u_h: 2024 case Intrinsic::mips_maxi_u_w: 2025 case Intrinsic::mips_maxi_u_d: 2026 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 2027 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2028 case Intrinsic::mips_min_s_b: 2029 case Intrinsic::mips_min_s_h: 2030 case Intrinsic::mips_min_s_w: 2031 case Intrinsic::mips_min_s_d: 2032 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 2033 Op->getOperand(1), Op->getOperand(2)); 2034 case Intrinsic::mips_min_u_b: 2035 case Intrinsic::mips_min_u_h: 2036 case Intrinsic::mips_min_u_w: 2037 case Intrinsic::mips_min_u_d: 2038 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2039 Op->getOperand(1), Op->getOperand(2)); 2040 case Intrinsic::mips_mini_s_b: 2041 case Intrinsic::mips_mini_s_h: 2042 case Intrinsic::mips_mini_s_w: 2043 case Intrinsic::mips_mini_s_d: 2044 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 2045 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2046 case Intrinsic::mips_mini_u_b: 2047 case Intrinsic::mips_mini_u_h: 2048 case Intrinsic::mips_mini_u_w: 2049 case Intrinsic::mips_mini_u_d: 2050 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2051 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2052 case Intrinsic::mips_mod_s_b: 2053 case Intrinsic::mips_mod_s_h: 2054 case Intrinsic::mips_mod_s_w: 2055 case Intrinsic::mips_mod_s_d: 2056 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 2057 Op->getOperand(2)); 2058 case Intrinsic::mips_mod_u_b: 2059 case Intrinsic::mips_mod_u_h: 2060 case Intrinsic::mips_mod_u_w: 2061 case Intrinsic::mips_mod_u_d: 2062 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 2063 Op->getOperand(2)); 2064 case Intrinsic::mips_mulv_b: 2065 case Intrinsic::mips_mulv_h: 2066 case Intrinsic::mips_mulv_w: 2067 case Intrinsic::mips_mulv_d: 2068 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 2069 Op->getOperand(2)); 2070 case Intrinsic::mips_msubv_b: 2071 case Intrinsic::mips_msubv_h: 2072 case Intrinsic::mips_msubv_w: 2073 case Intrinsic::mips_msubv_d: { 2074 EVT ResTy = Op->getValueType(0); 2075 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), 2076 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2077 Op->getOperand(2), Op->getOperand(3))); 2078 } 2079 case Intrinsic::mips_nlzc_b: 2080 case Intrinsic::mips_nlzc_h: 2081 case Intrinsic::mips_nlzc_w: 2082 case Intrinsic::mips_nlzc_d: 2083 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 2084 case Intrinsic::mips_nor_v: { 2085 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2086 Op->getOperand(1), Op->getOperand(2)); 2087 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2088 } 2089 case Intrinsic::mips_nori_b: { 2090 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2091 Op->getOperand(1), 2092 lowerMSASplatImm(Op, 2, DAG)); 2093 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2094 } 2095 case Intrinsic::mips_or_v: 2096 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 2097 Op->getOperand(2)); 2098 case Intrinsic::mips_ori_b: 2099 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2100 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2101 case Intrinsic::mips_pckev_b: 2102 case Intrinsic::mips_pckev_h: 2103 case Intrinsic::mips_pckev_w: 2104 case Intrinsic::mips_pckev_d: 2105 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 2106 Op->getOperand(1), Op->getOperand(2)); 2107 case Intrinsic::mips_pckod_b: 2108 case Intrinsic::mips_pckod_h: 2109 case Intrinsic::mips_pckod_w: 2110 case Intrinsic::mips_pckod_d: 2111 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 2112 Op->getOperand(1), Op->getOperand(2)); 2113 case Intrinsic::mips_pcnt_b: 2114 case Intrinsic::mips_pcnt_h: 2115 case Intrinsic::mips_pcnt_w: 2116 case Intrinsic::mips_pcnt_d: 2117 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 2118 case Intrinsic::mips_sat_s_b: 2119 case Intrinsic::mips_sat_s_h: 2120 case Intrinsic::mips_sat_s_w: 2121 case Intrinsic::mips_sat_s_d: 2122 case Intrinsic::mips_sat_u_b: 2123 case Intrinsic::mips_sat_u_h: 2124 case Intrinsic::mips_sat_u_w: 2125 case Intrinsic::mips_sat_u_d: { 2126 // Report an error for out of range values. 2127 int64_t Max; 2128 switch (Intrinsic) { 2129 case Intrinsic::mips_sat_s_b: 2130 case Intrinsic::mips_sat_u_b: Max = 7; break; 2131 case Intrinsic::mips_sat_s_h: 2132 case Intrinsic::mips_sat_u_h: Max = 15; break; 2133 case Intrinsic::mips_sat_s_w: 2134 case Intrinsic::mips_sat_u_w: Max = 31; break; 2135 case Intrinsic::mips_sat_s_d: 2136 case Intrinsic::mips_sat_u_d: Max = 63; break; 2137 default: llvm_unreachable("Unmatched intrinsic"); 2138 } 2139 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2140 if (Value < 0 || Value > Max) 2141 report_fatal_error("Immediate out of range"); 2142 return SDValue(); 2143 } 2144 case Intrinsic::mips_shf_b: 2145 case Intrinsic::mips_shf_h: 2146 case Intrinsic::mips_shf_w: { 2147 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2148 if (Value < 0 || Value > 255) 2149 report_fatal_error("Immediate out of range"); 2150 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 2151 Op->getOperand(2), Op->getOperand(1)); 2152 } 2153 case Intrinsic::mips_sldi_b: 2154 case Intrinsic::mips_sldi_h: 2155 case Intrinsic::mips_sldi_w: 2156 case Intrinsic::mips_sldi_d: { 2157 // Report an error for out of range values. 2158 int64_t Max; 2159 switch (Intrinsic) { 2160 case Intrinsic::mips_sldi_b: Max = 15; break; 2161 case Intrinsic::mips_sldi_h: Max = 7; break; 2162 case Intrinsic::mips_sldi_w: Max = 3; break; 2163 case Intrinsic::mips_sldi_d: Max = 1; break; 2164 default: llvm_unreachable("Unmatched intrinsic"); 2165 } 2166 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue(); 2167 if (Value < 0 || Value > Max) 2168 report_fatal_error("Immediate out of range"); 2169 return SDValue(); 2170 } 2171 case Intrinsic::mips_sll_b: 2172 case Intrinsic::mips_sll_h: 2173 case Intrinsic::mips_sll_w: 2174 case Intrinsic::mips_sll_d: 2175 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 2176 truncateVecElts(Op, DAG)); 2177 case Intrinsic::mips_slli_b: 2178 case Intrinsic::mips_slli_h: 2179 case Intrinsic::mips_slli_w: 2180 case Intrinsic::mips_slli_d: 2181 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 2182 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2183 case Intrinsic::mips_splat_b: 2184 case Intrinsic::mips_splat_h: 2185 case Intrinsic::mips_splat_w: 2186 case Intrinsic::mips_splat_d: 2187 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle 2188 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because 2189 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. 2190 // Instead we lower to MipsISD::VSHF and match from there. 2191 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2192 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1), 2193 Op->getOperand(1)); 2194 case Intrinsic::mips_splati_b: 2195 case Intrinsic::mips_splati_h: 2196 case Intrinsic::mips_splati_w: 2197 case Intrinsic::mips_splati_d: 2198 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2199 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 2200 Op->getOperand(1)); 2201 case Intrinsic::mips_sra_b: 2202 case Intrinsic::mips_sra_h: 2203 case Intrinsic::mips_sra_w: 2204 case Intrinsic::mips_sra_d: 2205 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 2206 truncateVecElts(Op, DAG)); 2207 case Intrinsic::mips_srai_b: 2208 case Intrinsic::mips_srai_h: 2209 case Intrinsic::mips_srai_w: 2210 case Intrinsic::mips_srai_d: 2211 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 2212 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2213 case Intrinsic::mips_srari_b: 2214 case Intrinsic::mips_srari_h: 2215 case Intrinsic::mips_srari_w: 2216 case Intrinsic::mips_srari_d: { 2217 // Report an error for out of range values. 2218 int64_t Max; 2219 switch (Intrinsic) { 2220 case Intrinsic::mips_srari_b: Max = 7; break; 2221 case Intrinsic::mips_srari_h: Max = 15; break; 2222 case Intrinsic::mips_srari_w: Max = 31; break; 2223 case Intrinsic::mips_srari_d: Max = 63; break; 2224 default: llvm_unreachable("Unmatched intrinsic"); 2225 } 2226 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2227 if (Value < 0 || Value > Max) 2228 report_fatal_error("Immediate out of range"); 2229 return SDValue(); 2230 } 2231 case Intrinsic::mips_srl_b: 2232 case Intrinsic::mips_srl_h: 2233 case Intrinsic::mips_srl_w: 2234 case Intrinsic::mips_srl_d: 2235 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 2236 truncateVecElts(Op, DAG)); 2237 case Intrinsic::mips_srli_b: 2238 case Intrinsic::mips_srli_h: 2239 case Intrinsic::mips_srli_w: 2240 case Intrinsic::mips_srli_d: 2241 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 2242 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2243 case Intrinsic::mips_srlri_b: 2244 case Intrinsic::mips_srlri_h: 2245 case Intrinsic::mips_srlri_w: 2246 case Intrinsic::mips_srlri_d: { 2247 // Report an error for out of range values. 2248 int64_t Max; 2249 switch (Intrinsic) { 2250 case Intrinsic::mips_srlri_b: Max = 7; break; 2251 case Intrinsic::mips_srlri_h: Max = 15; break; 2252 case Intrinsic::mips_srlri_w: Max = 31; break; 2253 case Intrinsic::mips_srlri_d: Max = 63; break; 2254 default: llvm_unreachable("Unmatched intrinsic"); 2255 } 2256 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2257 if (Value < 0 || Value > Max) 2258 report_fatal_error("Immediate out of range"); 2259 return SDValue(); 2260 } 2261 case Intrinsic::mips_subv_b: 2262 case Intrinsic::mips_subv_h: 2263 case Intrinsic::mips_subv_w: 2264 case Intrinsic::mips_subv_d: 2265 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 2266 Op->getOperand(2)); 2267 case Intrinsic::mips_subvi_b: 2268 case Intrinsic::mips_subvi_h: 2269 case Intrinsic::mips_subvi_w: 2270 case Intrinsic::mips_subvi_d: 2271 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 2272 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2273 case Intrinsic::mips_vshf_b: 2274 case Intrinsic::mips_vshf_h: 2275 case Intrinsic::mips_vshf_w: 2276 case Intrinsic::mips_vshf_d: 2277 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2278 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 2279 case Intrinsic::mips_xor_v: 2280 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 2281 Op->getOperand(2)); 2282 case Intrinsic::mips_xori_b: 2283 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 2284 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2285 case Intrinsic::thread_pointer: { 2286 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2287 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT); 2288 } 2289 } 2290 } 2291 2292 static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2293 const MipsSubtarget &Subtarget) { 2294 SDLoc DL(Op); 2295 SDValue ChainIn = Op->getOperand(0); 2296 SDValue Address = Op->getOperand(2); 2297 SDValue Offset = Op->getOperand(3); 2298 EVT ResTy = Op->getValueType(0); 2299 EVT PtrTy = Address->getValueType(0); 2300 2301 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2302 // however takes an i32 signed constant offset. The actual type of the 2303 // intrinsic is a scaled signed i10. 2304 if (Subtarget.isABI_N64()) 2305 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2306 2307 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2308 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), 2309 /* Alignment = */ 16); 2310 } 2311 2312 SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2313 SelectionDAG &DAG) const { 2314 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2315 switch (Intr) { 2316 default: 2317 return SDValue(); 2318 case Intrinsic::mips_extp: 2319 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 2320 case Intrinsic::mips_extpdp: 2321 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 2322 case Intrinsic::mips_extr_w: 2323 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 2324 case Intrinsic::mips_extr_r_w: 2325 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 2326 case Intrinsic::mips_extr_rs_w: 2327 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 2328 case Intrinsic::mips_extr_s_h: 2329 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 2330 case Intrinsic::mips_mthlip: 2331 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 2332 case Intrinsic::mips_mulsaq_s_w_ph: 2333 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 2334 case Intrinsic::mips_maq_s_w_phl: 2335 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 2336 case Intrinsic::mips_maq_s_w_phr: 2337 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 2338 case Intrinsic::mips_maq_sa_w_phl: 2339 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 2340 case Intrinsic::mips_maq_sa_w_phr: 2341 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 2342 case Intrinsic::mips_dpaq_s_w_ph: 2343 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 2344 case Intrinsic::mips_dpsq_s_w_ph: 2345 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 2346 case Intrinsic::mips_dpaq_sa_l_w: 2347 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 2348 case Intrinsic::mips_dpsq_sa_l_w: 2349 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 2350 case Intrinsic::mips_dpaqx_s_w_ph: 2351 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 2352 case Intrinsic::mips_dpaqx_sa_w_ph: 2353 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 2354 case Intrinsic::mips_dpsqx_s_w_ph: 2355 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 2356 case Intrinsic::mips_dpsqx_sa_w_ph: 2357 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 2358 case Intrinsic::mips_ld_b: 2359 case Intrinsic::mips_ld_h: 2360 case Intrinsic::mips_ld_w: 2361 case Intrinsic::mips_ld_d: 2362 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget); 2363 } 2364 } 2365 2366 static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2367 const MipsSubtarget &Subtarget) { 2368 SDLoc DL(Op); 2369 SDValue ChainIn = Op->getOperand(0); 2370 SDValue Value = Op->getOperand(2); 2371 SDValue Address = Op->getOperand(3); 2372 SDValue Offset = Op->getOperand(4); 2373 EVT PtrTy = Address->getValueType(0); 2374 2375 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2376 // however takes an i32 signed constant offset. The actual type of the 2377 // intrinsic is a scaled signed i10. 2378 if (Subtarget.isABI_N64()) 2379 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2380 2381 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2382 2383 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), 2384 /* Alignment = */ 16); 2385 } 2386 2387 SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2388 SelectionDAG &DAG) const { 2389 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2390 switch (Intr) { 2391 default: 2392 return SDValue(); 2393 case Intrinsic::mips_st_b: 2394 case Intrinsic::mips_st_h: 2395 case Intrinsic::mips_st_w: 2396 case Intrinsic::mips_st_d: 2397 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget); 2398 } 2399 } 2400 2401 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 2402 // 2403 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 2404 // choose to sign-extend but we could have equally chosen zero-extend. The 2405 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 2406 // result into this node later (possibly changing it to a zero-extend in the 2407 // process). 2408 SDValue MipsSETargetLowering:: 2409 lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 2410 SDLoc DL(Op); 2411 EVT ResTy = Op->getValueType(0); 2412 SDValue Op0 = Op->getOperand(0); 2413 EVT VecTy = Op0->getValueType(0); 2414 2415 if (!VecTy.is128BitVector()) 2416 return SDValue(); 2417 2418 if (ResTy.isInteger()) { 2419 SDValue Op1 = Op->getOperand(1); 2420 EVT EltTy = VecTy.getVectorElementType(); 2421 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 2422 DAG.getValueType(EltTy)); 2423 } 2424 2425 return Op; 2426 } 2427 2428 static bool isConstantOrUndef(const SDValue Op) { 2429 if (Op->isUndef()) 2430 return true; 2431 if (isa<ConstantSDNode>(Op)) 2432 return true; 2433 if (isa<ConstantFPSDNode>(Op)) 2434 return true; 2435 return false; 2436 } 2437 2438 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 2439 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 2440 if (isConstantOrUndef(Op->getOperand(i))) 2441 return true; 2442 return false; 2443 } 2444 2445 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 2446 // backend. 2447 // 2448 // Lowers according to the following rules: 2449 // - Constant splats are legal as-is as long as the SplatBitSize is a power of 2450 // 2 less than or equal to 64 and the value fits into a signed 10-bit 2451 // immediate 2452 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 2453 // is a power of 2 less than or equal to 64 and the value does not fit into a 2454 // signed 10-bit immediate 2455 // - Non-constant splats are legal as-is. 2456 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 2457 // - All others are illegal and must be expanded. 2458 SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 2459 SelectionDAG &DAG) const { 2460 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 2461 EVT ResTy = Op->getValueType(0); 2462 SDLoc DL(Op); 2463 APInt SplatValue, SplatUndef; 2464 unsigned SplatBitSize; 2465 bool HasAnyUndefs; 2466 2467 if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) 2468 return SDValue(); 2469 2470 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2471 HasAnyUndefs, 8, 2472 !Subtarget.isLittle()) && SplatBitSize <= 64) { 2473 // We can only cope with 8, 16, 32, or 64-bit elements 2474 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 2475 SplatBitSize != 64) 2476 return SDValue(); 2477 2478 // If the value isn't an integer type we will have to bitcast 2479 // from an integer type first. Also, if there are any undefs, we must 2480 // lower them to defined values first. 2481 if (ResTy.isInteger() && !HasAnyUndefs) 2482 return Op; 2483 2484 EVT ViaVecTy; 2485 2486 switch (SplatBitSize) { 2487 default: 2488 return SDValue(); 2489 case 8: 2490 ViaVecTy = MVT::v16i8; 2491 break; 2492 case 16: 2493 ViaVecTy = MVT::v8i16; 2494 break; 2495 case 32: 2496 ViaVecTy = MVT::v4i32; 2497 break; 2498 case 64: 2499 // There's no fill.d to fall back on for 64-bit values 2500 return SDValue(); 2501 } 2502 2503 // SelectionDAG::getConstant will promote SplatValue appropriately. 2504 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 2505 2506 // Bitcast to the type we originally wanted 2507 if (ViaVecTy != ResTy) 2508 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 2509 2510 return Result; 2511 } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false)) 2512 return Op; 2513 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 2514 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 2515 // The resulting code is the same length as the expansion, but it doesn't 2516 // use memory operations 2517 EVT ResTy = Node->getValueType(0); 2518 2519 assert(ResTy.isVector()); 2520 2521 unsigned NumElts = ResTy.getVectorNumElements(); 2522 SDValue Vector = DAG.getUNDEF(ResTy); 2523 for (unsigned i = 0; i < NumElts; ++i) { 2524 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 2525 Node->getOperand(i), 2526 DAG.getConstant(i, DL, MVT::i32)); 2527 } 2528 return Vector; 2529 } 2530 2531 return SDValue(); 2532 } 2533 2534 // Lower VECTOR_SHUFFLE into SHF (if possible). 2535 // 2536 // SHF splits the vector into blocks of four elements, then shuffles these 2537 // elements according to a <4 x i2> constant (encoded as an integer immediate). 2538 // 2539 // It is therefore possible to lower into SHF when the mask takes the form: 2540 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 2541 // When undef's appear they are treated as if they were whatever value is 2542 // necessary in order to fit the above forms. 2543 // 2544 // For example: 2545 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 2546 // <8 x i32> <i32 3, i32 2, i32 1, i32 0, 2547 // i32 7, i32 6, i32 5, i32 4> 2548 // is lowered to: 2549 // (SHF_H $w0, $w1, 27) 2550 // where the 27 comes from: 2551 // 3 + (2 << 2) + (1 << 4) + (0 << 6) 2552 static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 2553 SmallVector<int, 16> Indices, 2554 SelectionDAG &DAG) { 2555 int SHFIndices[4] = { -1, -1, -1, -1 }; 2556 2557 if (Indices.size() < 4) 2558 return SDValue(); 2559 2560 for (unsigned i = 0; i < 4; ++i) { 2561 for (unsigned j = i; j < Indices.size(); j += 4) { 2562 int Idx = Indices[j]; 2563 2564 // Convert from vector index to 4-element subvector index 2565 // If an index refers to an element outside of the subvector then give up 2566 if (Idx != -1) { 2567 Idx -= 4 * (j / 4); 2568 if (Idx < 0 || Idx >= 4) 2569 return SDValue(); 2570 } 2571 2572 // If the mask has an undef, replace it with the current index. 2573 // Note that it might still be undef if the current index is also undef 2574 if (SHFIndices[i] == -1) 2575 SHFIndices[i] = Idx; 2576 2577 // Check that non-undef values are the same as in the mask. If they 2578 // aren't then give up 2579 if (!(Idx == -1 || Idx == SHFIndices[i])) 2580 return SDValue(); 2581 } 2582 } 2583 2584 // Calculate the immediate. Replace any remaining undefs with zero 2585 APInt Imm(32, 0); 2586 for (int i = 3; i >= 0; --i) { 2587 int Idx = SHFIndices[i]; 2588 2589 if (Idx == -1) 2590 Idx = 0; 2591 2592 Imm <<= 2; 2593 Imm |= Idx & 0x3; 2594 } 2595 2596 SDLoc DL(Op); 2597 return DAG.getNode(MipsISD::SHF, DL, ResTy, 2598 DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); 2599 } 2600 2601 /// Determine whether a range fits a regular pattern of values. 2602 /// This function accounts for the possibility of jumping over the End iterator. 2603 template <typename ValType> 2604 static bool 2605 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, 2606 unsigned CheckStride, 2607 typename SmallVectorImpl<ValType>::const_iterator End, 2608 ValType ExpectedIndex, unsigned ExpectedIndexStride) { 2609 auto &I = Begin; 2610 2611 while (I != End) { 2612 if (*I != -1 && *I != ExpectedIndex) 2613 return false; 2614 ExpectedIndex += ExpectedIndexStride; 2615 2616 // Incrementing past End is undefined behaviour so we must increment one 2617 // step at a time and check for End at each step. 2618 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) 2619 ; // Empty loop body. 2620 } 2621 return true; 2622 } 2623 2624 // Determine whether VECTOR_SHUFFLE is a SPLATI. 2625 // 2626 // It is a SPLATI when the mask is: 2627 // <x, x, x, ...> 2628 // where x is any valid index. 2629 // 2630 // When undef's appear in the mask they are treated as if they were whatever 2631 // value is necessary in order to fit the above form. 2632 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, 2633 SmallVector<int, 16> Indices, 2634 SelectionDAG &DAG) { 2635 assert((Indices.size() % 2) == 0); 2636 2637 int SplatIndex = -1; 2638 for (const auto &V : Indices) { 2639 if (V != -1) { 2640 SplatIndex = V; 2641 break; 2642 } 2643 } 2644 2645 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex, 2646 0); 2647 } 2648 2649 // Lower VECTOR_SHUFFLE into ILVEV (if possible). 2650 // 2651 // ILVEV interleaves the even elements from each vector. 2652 // 2653 // It is possible to lower into ILVEV when the mask consists of two of the 2654 // following forms interleaved: 2655 // <0, 2, 4, ...> 2656 // <n, n+2, n+4, ...> 2657 // where n is the number of elements in the vector. 2658 // For example: 2659 // <0, 0, 2, 2, 4, 4, ...> 2660 // <0, n, 2, n+2, 4, n+4, ...> 2661 // 2662 // When undef's appear in the mask they are treated as if they were whatever 2663 // value is necessary in order to fit the above forms. 2664 static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 2665 SmallVector<int, 16> Indices, 2666 SelectionDAG &DAG) { 2667 assert((Indices.size() % 2) == 0); 2668 2669 SDValue Wt; 2670 SDValue Ws; 2671 const auto &Begin = Indices.begin(); 2672 const auto &End = Indices.end(); 2673 2674 // Check even elements are taken from the even elements of one half or the 2675 // other and pick an operand accordingly. 2676 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2)) 2677 Wt = Op->getOperand(0); 2678 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2)) 2679 Wt = Op->getOperand(1); 2680 else 2681 return SDValue(); 2682 2683 // Check odd elements are taken from the even elements of one half or the 2684 // other and pick an operand accordingly. 2685 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2)) 2686 Ws = Op->getOperand(0); 2687 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2)) 2688 Ws = Op->getOperand(1); 2689 else 2690 return SDValue(); 2691 2692 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt); 2693 } 2694 2695 // Lower VECTOR_SHUFFLE into ILVOD (if possible). 2696 // 2697 // ILVOD interleaves the odd elements from each vector. 2698 // 2699 // It is possible to lower into ILVOD when the mask consists of two of the 2700 // following forms interleaved: 2701 // <1, 3, 5, ...> 2702 // <n+1, n+3, n+5, ...> 2703 // where n is the number of elements in the vector. 2704 // For example: 2705 // <1, 1, 3, 3, 5, 5, ...> 2706 // <1, n+1, 3, n+3, 5, n+5, ...> 2707 // 2708 // When undef's appear in the mask they are treated as if they were whatever 2709 // value is necessary in order to fit the above forms. 2710 static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 2711 SmallVector<int, 16> Indices, 2712 SelectionDAG &DAG) { 2713 assert((Indices.size() % 2) == 0); 2714 2715 SDValue Wt; 2716 SDValue Ws; 2717 const auto &Begin = Indices.begin(); 2718 const auto &End = Indices.end(); 2719 2720 // Check even elements are taken from the odd elements of one half or the 2721 // other and pick an operand accordingly. 2722 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2)) 2723 Wt = Op->getOperand(0); 2724 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2)) 2725 Wt = Op->getOperand(1); 2726 else 2727 return SDValue(); 2728 2729 // Check odd elements are taken from the odd elements of one half or the 2730 // other and pick an operand accordingly. 2731 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2)) 2732 Ws = Op->getOperand(0); 2733 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2)) 2734 Ws = Op->getOperand(1); 2735 else 2736 return SDValue(); 2737 2738 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws); 2739 } 2740 2741 // Lower VECTOR_SHUFFLE into ILVR (if possible). 2742 // 2743 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of 2744 // each vector. 2745 // 2746 // It is possible to lower into ILVR when the mask consists of two of the 2747 // following forms interleaved: 2748 // <0, 1, 2, ...> 2749 // <n, n+1, n+2, ...> 2750 // where n is the number of elements in the vector. 2751 // For example: 2752 // <0, 0, 1, 1, 2, 2, ...> 2753 // <0, n, 1, n+1, 2, n+2, ...> 2754 // 2755 // When undef's appear in the mask they are treated as if they were whatever 2756 // value is necessary in order to fit the above forms. 2757 static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 2758 SmallVector<int, 16> Indices, 2759 SelectionDAG &DAG) { 2760 assert((Indices.size() % 2) == 0); 2761 2762 SDValue Wt; 2763 SDValue Ws; 2764 const auto &Begin = Indices.begin(); 2765 const auto &End = Indices.end(); 2766 2767 // Check even elements are taken from the right (lowest-indexed) elements of 2768 // one half or the other and pick an operand accordingly. 2769 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1)) 2770 Wt = Op->getOperand(0); 2771 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1)) 2772 Wt = Op->getOperand(1); 2773 else 2774 return SDValue(); 2775 2776 // Check odd elements are taken from the right (lowest-indexed) elements of 2777 // one half or the other and pick an operand accordingly. 2778 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1)) 2779 Ws = Op->getOperand(0); 2780 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1)) 2781 Ws = Op->getOperand(1); 2782 else 2783 return SDValue(); 2784 2785 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt); 2786 } 2787 2788 // Lower VECTOR_SHUFFLE into ILVL (if possible). 2789 // 2790 // ILVL interleaves consecutive elements from the left (highest-indexed) half 2791 // of each vector. 2792 // 2793 // It is possible to lower into ILVL when the mask consists of two of the 2794 // following forms interleaved: 2795 // <x, x+1, x+2, ...> 2796 // <n+x, n+x+1, n+x+2, ...> 2797 // where n is the number of elements in the vector and x is half n. 2798 // For example: 2799 // <x, x, x+1, x+1, x+2, x+2, ...> 2800 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 2801 // 2802 // When undef's appear in the mask they are treated as if they were whatever 2803 // value is necessary in order to fit the above forms. 2804 static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 2805 SmallVector<int, 16> Indices, 2806 SelectionDAG &DAG) { 2807 assert((Indices.size() % 2) == 0); 2808 2809 unsigned HalfSize = Indices.size() / 2; 2810 SDValue Wt; 2811 SDValue Ws; 2812 const auto &Begin = Indices.begin(); 2813 const auto &End = Indices.end(); 2814 2815 // Check even elements are taken from the left (highest-indexed) elements of 2816 // one half or the other and pick an operand accordingly. 2817 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1)) 2818 Wt = Op->getOperand(0); 2819 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1)) 2820 Wt = Op->getOperand(1); 2821 else 2822 return SDValue(); 2823 2824 // Check odd elements are taken from the left (highest-indexed) elements of 2825 // one half or the other and pick an operand accordingly. 2826 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1)) 2827 Ws = Op->getOperand(0); 2828 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize, 2829 1)) 2830 Ws = Op->getOperand(1); 2831 else 2832 return SDValue(); 2833 2834 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt); 2835 } 2836 2837 // Lower VECTOR_SHUFFLE into PCKEV (if possible). 2838 // 2839 // PCKEV copies the even elements of each vector into the result vector. 2840 // 2841 // It is possible to lower into PCKEV when the mask consists of two of the 2842 // following forms concatenated: 2843 // <0, 2, 4, ...> 2844 // <n, n+2, n+4, ...> 2845 // where n is the number of elements in the vector. 2846 // For example: 2847 // <0, 2, 4, ..., 0, 2, 4, ...> 2848 // <0, 2, 4, ..., n, n+2, n+4, ...> 2849 // 2850 // When undef's appear in the mask they are treated as if they were whatever 2851 // value is necessary in order to fit the above forms. 2852 static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2853 SmallVector<int, 16> Indices, 2854 SelectionDAG &DAG) { 2855 assert((Indices.size() % 2) == 0); 2856 2857 SDValue Wt; 2858 SDValue Ws; 2859 const auto &Begin = Indices.begin(); 2860 const auto &Mid = Indices.begin() + Indices.size() / 2; 2861 const auto &End = Indices.end(); 2862 2863 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2)) 2864 Wt = Op->getOperand(0); 2865 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2)) 2866 Wt = Op->getOperand(1); 2867 else 2868 return SDValue(); 2869 2870 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2)) 2871 Ws = Op->getOperand(0); 2872 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2)) 2873 Ws = Op->getOperand(1); 2874 else 2875 return SDValue(); 2876 2877 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt); 2878 } 2879 2880 // Lower VECTOR_SHUFFLE into PCKOD (if possible). 2881 // 2882 // PCKOD copies the odd elements of each vector into the result vector. 2883 // 2884 // It is possible to lower into PCKOD when the mask consists of two of the 2885 // following forms concatenated: 2886 // <1, 3, 5, ...> 2887 // <n+1, n+3, n+5, ...> 2888 // where n is the number of elements in the vector. 2889 // For example: 2890 // <1, 3, 5, ..., 1, 3, 5, ...> 2891 // <1, 3, 5, ..., n+1, n+3, n+5, ...> 2892 // 2893 // When undef's appear in the mask they are treated as if they were whatever 2894 // value is necessary in order to fit the above forms. 2895 static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2896 SmallVector<int, 16> Indices, 2897 SelectionDAG &DAG) { 2898 assert((Indices.size() % 2) == 0); 2899 2900 SDValue Wt; 2901 SDValue Ws; 2902 const auto &Begin = Indices.begin(); 2903 const auto &Mid = Indices.begin() + Indices.size() / 2; 2904 const auto &End = Indices.end(); 2905 2906 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2)) 2907 Wt = Op->getOperand(0); 2908 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2)) 2909 Wt = Op->getOperand(1); 2910 else 2911 return SDValue(); 2912 2913 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2)) 2914 Ws = Op->getOperand(0); 2915 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2)) 2916 Ws = Op->getOperand(1); 2917 else 2918 return SDValue(); 2919 2920 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt); 2921 } 2922 2923 // Lower VECTOR_SHUFFLE into VSHF. 2924 // 2925 // This mostly consists of converting the shuffle indices in Indices into a 2926 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2927 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2928 // if the type is v8i16 and all the indices are less than 8 then the second 2929 // operand is unused and can be replaced with anything. We choose to replace it 2930 // with the used operand since this reduces the number of instructions overall. 2931 static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 2932 SmallVector<int, 16> Indices, 2933 SelectionDAG &DAG) { 2934 SmallVector<SDValue, 16> Ops; 2935 SDValue Op0; 2936 SDValue Op1; 2937 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 2938 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 2939 bool Using1stVec = false; 2940 bool Using2ndVec = false; 2941 SDLoc DL(Op); 2942 int ResTyNumElts = ResTy.getVectorNumElements(); 2943 2944 for (int i = 0; i < ResTyNumElts; ++i) { 2945 // Idx == -1 means UNDEF 2946 int Idx = Indices[i]; 2947 2948 if (0 <= Idx && Idx < ResTyNumElts) 2949 Using1stVec = true; 2950 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 2951 Using2ndVec = true; 2952 } 2953 2954 for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end(); 2955 ++I) 2956 Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy)); 2957 2958 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); 2959 2960 if (Using1stVec && Using2ndVec) { 2961 Op0 = Op->getOperand(0); 2962 Op1 = Op->getOperand(1); 2963 } else if (Using1stVec) 2964 Op0 = Op1 = Op->getOperand(0); 2965 else if (Using2ndVec) 2966 Op0 = Op1 = Op->getOperand(1); 2967 else 2968 llvm_unreachable("shuffle vector mask references neither vector operand?"); 2969 2970 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 2971 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 2972 // VSHF concatenates the vectors in a bitwise fashion: 2973 // <0b00, 0b01> + <0b10, 0b11> -> 2974 // 0b0100 + 0b1110 -> 0b01001110 2975 // <0b10, 0b11, 0b00, 0b01> 2976 // We must therefore swap the operands to get the correct result. 2977 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); 2978 } 2979 2980 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 2981 // indices in the shuffle. 2982 SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 2983 SelectionDAG &DAG) const { 2984 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 2985 EVT ResTy = Op->getValueType(0); 2986 2987 if (!ResTy.is128BitVector()) 2988 return SDValue(); 2989 2990 int ResTyNumElts = ResTy.getVectorNumElements(); 2991 SmallVector<int, 16> Indices; 2992 2993 for (int i = 0; i < ResTyNumElts; ++i) 2994 Indices.push_back(Node->getMaskElt(i)); 2995 2996 // splati.[bhwd] is preferable to the others but is matched from 2997 // MipsISD::VSHF. 2998 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) 2999 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 3000 SDValue Result; 3001 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG))) 3002 return Result; 3003 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG))) 3004 return Result; 3005 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG))) 3006 return Result; 3007 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG))) 3008 return Result; 3009 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG))) 3010 return Result; 3011 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG))) 3012 return Result; 3013 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) 3014 return Result; 3015 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 3016 } 3017 3018 MachineBasicBlock * 3019 MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, 3020 MachineBasicBlock *BB) const { 3021 // $bb: 3022 // bposge32_pseudo $vr0 3023 // => 3024 // $bb: 3025 // bposge32 $tbb 3026 // $fbb: 3027 // li $vr2, 0 3028 // b $sink 3029 // $tbb: 3030 // li $vr1, 1 3031 // $sink: 3032 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 3033 3034 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3035 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3036 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3037 DebugLoc DL = MI.getDebugLoc(); 3038 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3039 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3040 MachineFunction *F = BB->getParent(); 3041 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3042 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3043 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3044 F->insert(It, FBB); 3045 F->insert(It, TBB); 3046 F->insert(It, Sink); 3047 3048 // Transfer the remainder of BB and its successor edges to Sink. 3049 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3050 BB->end()); 3051 Sink->transferSuccessorsAndUpdatePHIs(BB); 3052 3053 // Add successors. 3054 BB->addSuccessor(FBB); 3055 BB->addSuccessor(TBB); 3056 FBB->addSuccessor(Sink); 3057 TBB->addSuccessor(Sink); 3058 3059 // Insert the real bposge32 instruction to $BB. 3060 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 3061 // Insert the real bposge32c instruction to $BB. 3062 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB); 3063 3064 // Fill $FBB. 3065 unsigned VR2 = RegInfo.createVirtualRegister(RC); 3066 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 3067 .addReg(Mips::ZERO).addImm(0); 3068 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3069 3070 // Fill $TBB. 3071 unsigned VR1 = RegInfo.createVirtualRegister(RC); 3072 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 3073 .addReg(Mips::ZERO).addImm(1); 3074 3075 // Insert phi function to $Sink. 3076 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3077 MI.getOperand(0).getReg()) 3078 .addReg(VR2) 3079 .addMBB(FBB) 3080 .addReg(VR1) 3081 .addMBB(TBB); 3082 3083 MI.eraseFromParent(); // The pseudo instruction is gone now. 3084 return Sink; 3085 } 3086 3087 MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( 3088 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { 3089 // $bb: 3090 // vany_nonzero $rd, $ws 3091 // => 3092 // $bb: 3093 // bnz.b $ws, $tbb 3094 // b $fbb 3095 // $fbb: 3096 // li $rd1, 0 3097 // b $sink 3098 // $tbb: 3099 // li $rd2, 1 3100 // $sink: 3101 // $rd = phi($rd1, $fbb, $rd2, $tbb) 3102 3103 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3104 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3105 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3106 DebugLoc DL = MI.getDebugLoc(); 3107 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3108 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3109 MachineFunction *F = BB->getParent(); 3110 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3111 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3112 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3113 F->insert(It, FBB); 3114 F->insert(It, TBB); 3115 F->insert(It, Sink); 3116 3117 // Transfer the remainder of BB and its successor edges to Sink. 3118 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3119 BB->end()); 3120 Sink->transferSuccessorsAndUpdatePHIs(BB); 3121 3122 // Add successors. 3123 BB->addSuccessor(FBB); 3124 BB->addSuccessor(TBB); 3125 FBB->addSuccessor(Sink); 3126 TBB->addSuccessor(Sink); 3127 3128 // Insert the real bnz.b instruction to $BB. 3129 BuildMI(BB, DL, TII->get(BranchOp)) 3130 .addReg(MI.getOperand(1).getReg()) 3131 .addMBB(TBB); 3132 3133 // Fill $FBB. 3134 unsigned RD1 = RegInfo.createVirtualRegister(RC); 3135 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 3136 .addReg(Mips::ZERO).addImm(0); 3137 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3138 3139 // Fill $TBB. 3140 unsigned RD2 = RegInfo.createVirtualRegister(RC); 3141 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 3142 .addReg(Mips::ZERO).addImm(1); 3143 3144 // Insert phi function to $Sink. 3145 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3146 MI.getOperand(0).getReg()) 3147 .addReg(RD1) 3148 .addMBB(FBB) 3149 .addReg(RD2) 3150 .addMBB(TBB); 3151 3152 MI.eraseFromParent(); // The pseudo instruction is gone now. 3153 return Sink; 3154 } 3155 3156 // Emit the COPY_FW pseudo instruction. 3157 // 3158 // copy_fw_pseudo $fd, $ws, n 3159 // => 3160 // copy_u_w $rt, $ws, $n 3161 // mtc1 $rt, $fd 3162 // 3163 // When n is zero, the equivalent operation can be performed with (potentially) 3164 // zero instructions due to register overlaps. This optimization is never valid 3165 // for lane 1 because it would require FR=0 mode which isn't supported by MSA. 3166 MachineBasicBlock * 3167 MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, 3168 MachineBasicBlock *BB) const { 3169 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3170 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3171 DebugLoc DL = MI.getDebugLoc(); 3172 unsigned Fd = MI.getOperand(0).getReg(); 3173 unsigned Ws = MI.getOperand(1).getReg(); 3174 unsigned Lane = MI.getOperand(2).getImm(); 3175 3176 if (Lane == 0) { 3177 unsigned Wt = Ws; 3178 if (!Subtarget.useOddSPReg()) { 3179 // We must copy to an even-numbered MSA register so that the 3180 // single-precision sub-register is also guaranteed to be even-numbered. 3181 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass); 3182 3183 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws); 3184 } 3185 3186 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3187 } else { 3188 unsigned Wt = RegInfo.createVirtualRegister( 3189 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : 3190 &Mips::MSA128WEvensRegClass); 3191 3192 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); 3193 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3194 } 3195 3196 MI.eraseFromParent(); // The pseudo instruction is gone now. 3197 return BB; 3198 } 3199 3200 // Emit the COPY_FD pseudo instruction. 3201 // 3202 // copy_fd_pseudo $fd, $ws, n 3203 // => 3204 // splati.d $wt, $ws, $n 3205 // copy $fd, $wt:sub_64 3206 // 3207 // When n is zero, the equivalent operation can be performed with (potentially) 3208 // zero instructions due to register overlaps. This optimization is always 3209 // valid because FR=1 mode which is the only supported mode in MSA. 3210 MachineBasicBlock * 3211 MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, 3212 MachineBasicBlock *BB) const { 3213 assert(Subtarget.isFP64bit()); 3214 3215 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3216 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3217 unsigned Fd = MI.getOperand(0).getReg(); 3218 unsigned Ws = MI.getOperand(1).getReg(); 3219 unsigned Lane = MI.getOperand(2).getImm() * 2; 3220 DebugLoc DL = MI.getDebugLoc(); 3221 3222 if (Lane == 0) 3223 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 3224 else { 3225 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3226 3227 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 3228 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 3229 } 3230 3231 MI.eraseFromParent(); // The pseudo instruction is gone now. 3232 return BB; 3233 } 3234 3235 // Emit the INSERT_FW pseudo instruction. 3236 // 3237 // insert_fw_pseudo $wd, $wd_in, $n, $fs 3238 // => 3239 // subreg_to_reg $wt:sub_lo, $fs 3240 // insve_w $wd[$n], $wd_in, $wt[0] 3241 MachineBasicBlock * 3242 MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, 3243 MachineBasicBlock *BB) const { 3244 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3245 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3246 DebugLoc DL = MI.getDebugLoc(); 3247 unsigned Wd = MI.getOperand(0).getReg(); 3248 unsigned Wd_in = MI.getOperand(1).getReg(); 3249 unsigned Lane = MI.getOperand(2).getImm(); 3250 unsigned Fs = MI.getOperand(3).getReg(); 3251 unsigned Wt = RegInfo.createVirtualRegister( 3252 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : 3253 &Mips::MSA128WEvensRegClass); 3254 3255 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3256 .addImm(0) 3257 .addReg(Fs) 3258 .addImm(Mips::sub_lo); 3259 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 3260 .addReg(Wd_in) 3261 .addImm(Lane) 3262 .addReg(Wt) 3263 .addImm(0); 3264 3265 MI.eraseFromParent(); // The pseudo instruction is gone now. 3266 return BB; 3267 } 3268 3269 // Emit the INSERT_FD pseudo instruction. 3270 // 3271 // insert_fd_pseudo $wd, $fs, n 3272 // => 3273 // subreg_to_reg $wt:sub_64, $fs 3274 // insve_d $wd[$n], $wd_in, $wt[0] 3275 MachineBasicBlock * 3276 MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, 3277 MachineBasicBlock *BB) const { 3278 assert(Subtarget.isFP64bit()); 3279 3280 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3281 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3282 DebugLoc DL = MI.getDebugLoc(); 3283 unsigned Wd = MI.getOperand(0).getReg(); 3284 unsigned Wd_in = MI.getOperand(1).getReg(); 3285 unsigned Lane = MI.getOperand(2).getImm(); 3286 unsigned Fs = MI.getOperand(3).getReg(); 3287 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3288 3289 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3290 .addImm(0) 3291 .addReg(Fs) 3292 .addImm(Mips::sub_64); 3293 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 3294 .addReg(Wd_in) 3295 .addImm(Lane) 3296 .addReg(Wt) 3297 .addImm(0); 3298 3299 MI.eraseFromParent(); // The pseudo instruction is gone now. 3300 return BB; 3301 } 3302 3303 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. 3304 // 3305 // For integer: 3306 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) 3307 // => 3308 // (SLL $lanetmp1, $lane, <log2size) 3309 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3310 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) 3311 // (NEG $lanetmp2, $lanetmp1) 3312 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3313 // 3314 // For floating point: 3315 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) 3316 // => 3317 // (SUBREG_TO_REG $wt, $fs, <subreg>) 3318 // (SLL $lanetmp1, $lane, <log2size) 3319 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3320 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) 3321 // (NEG $lanetmp2, $lanetmp1) 3322 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3323 MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( 3324 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes, 3325 bool IsFP) const { 3326 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3327 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3328 DebugLoc DL = MI.getDebugLoc(); 3329 unsigned Wd = MI.getOperand(0).getReg(); 3330 unsigned SrcVecReg = MI.getOperand(1).getReg(); 3331 unsigned LaneReg = MI.getOperand(2).getReg(); 3332 unsigned SrcValReg = MI.getOperand(3).getReg(); 3333 3334 const TargetRegisterClass *VecRC = nullptr; 3335 // FIXME: This should be true for N32 too. 3336 const TargetRegisterClass *GPRRC = 3337 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3338 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0; 3339 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL; 3340 unsigned EltLog2Size; 3341 unsigned InsertOp = 0; 3342 unsigned InsveOp = 0; 3343 switch (EltSizeInBytes) { 3344 default: 3345 llvm_unreachable("Unexpected size"); 3346 case 1: 3347 EltLog2Size = 0; 3348 InsertOp = Mips::INSERT_B; 3349 InsveOp = Mips::INSVE_B; 3350 VecRC = &Mips::MSA128BRegClass; 3351 break; 3352 case 2: 3353 EltLog2Size = 1; 3354 InsertOp = Mips::INSERT_H; 3355 InsveOp = Mips::INSVE_H; 3356 VecRC = &Mips::MSA128HRegClass; 3357 break; 3358 case 4: 3359 EltLog2Size = 2; 3360 InsertOp = Mips::INSERT_W; 3361 InsveOp = Mips::INSVE_W; 3362 VecRC = &Mips::MSA128WRegClass; 3363 break; 3364 case 8: 3365 EltLog2Size = 3; 3366 InsertOp = Mips::INSERT_D; 3367 InsveOp = Mips::INSVE_D; 3368 VecRC = &Mips::MSA128DRegClass; 3369 break; 3370 } 3371 3372 if (IsFP) { 3373 unsigned Wt = RegInfo.createVirtualRegister(VecRC); 3374 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3375 .addImm(0) 3376 .addReg(SrcValReg) 3377 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); 3378 SrcValReg = Wt; 3379 } 3380 3381 // Convert the lane index into a byte index 3382 if (EltSizeInBytes != 1) { 3383 unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); 3384 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1) 3385 .addReg(LaneReg) 3386 .addImm(EltLog2Size); 3387 LaneReg = LaneTmp1; 3388 } 3389 3390 // Rotate bytes around so that the desired lane is element zero 3391 unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC); 3392 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) 3393 .addReg(SrcVecReg) 3394 .addReg(SrcVecReg) 3395 .addReg(LaneReg, 0, SubRegIdx); 3396 3397 unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC); 3398 if (IsFP) { 3399 // Use insve.df to insert to element zero 3400 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) 3401 .addReg(WdTmp1) 3402 .addImm(0) 3403 .addReg(SrcValReg) 3404 .addImm(0); 3405 } else { 3406 // Use insert.df to insert to element zero 3407 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) 3408 .addReg(WdTmp1) 3409 .addReg(SrcValReg) 3410 .addImm(0); 3411 } 3412 3413 // Rotate elements the rest of the way for a full rotation. 3414 // sld.df inteprets $rt modulo the number of columns so we only need to negate 3415 // the lane index to do this. 3416 unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); 3417 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), 3418 LaneTmp2) 3419 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) 3420 .addReg(LaneReg); 3421 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) 3422 .addReg(WdTmp2) 3423 .addReg(WdTmp2) 3424 .addReg(LaneTmp2, 0, SubRegIdx); 3425 3426 MI.eraseFromParent(); // The pseudo instruction is gone now. 3427 return BB; 3428 } 3429 3430 // Emit the FILL_FW pseudo instruction. 3431 // 3432 // fill_fw_pseudo $wd, $fs 3433 // => 3434 // implicit_def $wt1 3435 // insert_subreg $wt2:subreg_lo, $wt1, $fs 3436 // splati.w $wd, $wt2[0] 3437 MachineBasicBlock * 3438 MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, 3439 MachineBasicBlock *BB) const { 3440 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3441 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3442 DebugLoc DL = MI.getDebugLoc(); 3443 unsigned Wd = MI.getOperand(0).getReg(); 3444 unsigned Fs = MI.getOperand(1).getReg(); 3445 unsigned Wt1 = RegInfo.createVirtualRegister( 3446 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3447 : &Mips::MSA128WEvensRegClass); 3448 unsigned Wt2 = RegInfo.createVirtualRegister( 3449 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3450 : &Mips::MSA128WEvensRegClass); 3451 3452 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3453 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3454 .addReg(Wt1) 3455 .addReg(Fs) 3456 .addImm(Mips::sub_lo); 3457 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); 3458 3459 MI.eraseFromParent(); // The pseudo instruction is gone now. 3460 return BB; 3461 } 3462 3463 // Emit the FILL_FD pseudo instruction. 3464 // 3465 // fill_fd_pseudo $wd, $fs 3466 // => 3467 // implicit_def $wt1 3468 // insert_subreg $wt2:subreg_64, $wt1, $fs 3469 // splati.d $wd, $wt2[0] 3470 MachineBasicBlock * 3471 MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, 3472 MachineBasicBlock *BB) const { 3473 assert(Subtarget.isFP64bit()); 3474 3475 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3476 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3477 DebugLoc DL = MI.getDebugLoc(); 3478 unsigned Wd = MI.getOperand(0).getReg(); 3479 unsigned Fs = MI.getOperand(1).getReg(); 3480 unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3481 unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3482 3483 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3484 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3485 .addReg(Wt1) 3486 .addReg(Fs) 3487 .addImm(Mips::sub_64); 3488 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); 3489 3490 MI.eraseFromParent(); // The pseudo instruction is gone now. 3491 return BB; 3492 } 3493 3494 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA 3495 // register. 3496 // 3497 // STF16 MSA128F16:$wd, mem_simm10:$addr 3498 // => 3499 // copy_u.h $rtemp,$wd[0] 3500 // sh $rtemp, $addr 3501 // 3502 // Safety: We can't use st.h & co as they would over write the memory after 3503 // the destination. It would require half floats be allocated 16 bytes(!) of 3504 // space. 3505 MachineBasicBlock * 3506 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, 3507 MachineBasicBlock *BB) const { 3508 3509 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3510 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3511 DebugLoc DL = MI.getDebugLoc(); 3512 unsigned Ws = MI.getOperand(0).getReg(); 3513 unsigned Rt = MI.getOperand(1).getReg(); 3514 const MachineMemOperand &MMO = **MI.memoperands_begin(); 3515 unsigned Imm = MMO.getOffset(); 3516 3517 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3518 // spill and reload can expand as a GPR64 operand. Examine the 3519 // operand in detail and default to ABI. 3520 const TargetRegisterClass *RC = 3521 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3522 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3523 : &Mips::GPR64RegClass); 3524 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3525 unsigned Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3526 3527 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); 3528 if(!UsingMips32) { 3529 unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); 3530 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp) 3531 .addImm(0) 3532 .addReg(Rs) 3533 .addImm(Mips::sub_32); 3534 Rs = Tmp; 3535 } 3536 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) 3537 .addReg(Rs) 3538 .addReg(Rt) 3539 .addImm(Imm) 3540 .addMemOperand(BB->getParent()->getMachineMemOperand( 3541 &MMO, MMO.getOffset(), MMO.getSize())); 3542 3543 MI.eraseFromParent(); 3544 return BB; 3545 } 3546 3547 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. 3548 // 3549 // LD_F16 MSA128F16:$wd, mem_simm10:$addr 3550 // => 3551 // lh $rtemp, $addr 3552 // fill.h $wd, $rtemp 3553 // 3554 // Safety: We can't use ld.h & co as they over-read from the source. 3555 // Additionally, if the address is not modulo 16, 2 cases can occur: 3556 // a) Segmentation fault as the load instruction reads from a memory page 3557 // memory it's not supposed to. 3558 // b) The load crosses an implementation specific boundary, requiring OS 3559 // intervention. 3560 MachineBasicBlock * 3561 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, 3562 MachineBasicBlock *BB) const { 3563 3564 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3565 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3566 DebugLoc DL = MI.getDebugLoc(); 3567 unsigned Wd = MI.getOperand(0).getReg(); 3568 3569 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3570 // spill and reload can expand as a GPR64 operand. Examine the 3571 // operand in detail and default to ABI. 3572 const TargetRegisterClass *RC = 3573 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3574 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3575 : &Mips::GPR64RegClass); 3576 3577 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3578 unsigned Rt = RegInfo.createVirtualRegister(RC); 3579 3580 MachineInstrBuilder MIB = 3581 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); 3582 for (unsigned i = 1; i < MI.getNumOperands(); i++) 3583 MIB.add(MI.getOperand(i)); 3584 3585 if(!UsingMips32) { 3586 unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3587 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32); 3588 Rt = Tmp; 3589 } 3590 3591 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); 3592 3593 MI.eraseFromParent(); 3594 return BB; 3595 } 3596 3597 // Emit the FPROUND_PSEUDO instruction. 3598 // 3599 // Round an FGR64Opnd, FGR32Opnd to an f16. 3600 // 3601 // Safety: Cycle the operand through the GPRs so the result always ends up 3602 // the correct MSA register. 3603 // 3604 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs 3605 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register 3606 // (which they can be, as the MSA registers are defined to alias the 3607 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3608 // the correct register class. That requires operands be tie-able across 3609 // register classes which have a sub/super register class relationship. 3610 // 3611 // For FPG32Opnd: 3612 // 3613 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs 3614 // => 3615 // mfc1 $rtemp, $fs 3616 // fill.w $rtemp, $wtemp 3617 // fexdo.w $wd, $wtemp, $wtemp 3618 // 3619 // For FPG64Opnd on mips32r2+: 3620 // 3621 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3622 // => 3623 // mfc1 $rtemp, $fs 3624 // fill.w $rtemp, $wtemp 3625 // mfhc1 $rtemp2, $fs 3626 // insert.w $wtemp[1], $rtemp2 3627 // insert.w $wtemp[3], $rtemp2 3628 // fexdo.w $wtemp2, $wtemp, $wtemp 3629 // fexdo.h $wd, $temp2, $temp2 3630 // 3631 // For FGR64Opnd on mips64r2+: 3632 // 3633 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3634 // => 3635 // dmfc1 $rtemp, $fs 3636 // fill.d $rtemp, $wtemp 3637 // fexdo.w $wtemp2, $wtemp, $wtemp 3638 // fexdo.h $wd, $wtemp2, $wtemp2 3639 // 3640 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the 3641 // undef bits are "just right" and the exception enable bits are 3642 // set. By using fill.w to replicate $fs into all elements over 3643 // insert.w for one element, we avoid that potiential case. If 3644 // fexdo.[hw] causes an exception in, the exception is valid and it 3645 // occurs for all elements. 3646 MachineBasicBlock * 3647 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, 3648 MachineBasicBlock *BB, 3649 bool IsFGR64) const { 3650 3651 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3652 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3653 // it. 3654 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3655 3656 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3657 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3658 3659 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3660 DebugLoc DL = MI.getDebugLoc(); 3661 unsigned Wd = MI.getOperand(0).getReg(); 3662 unsigned Fs = MI.getOperand(1).getReg(); 3663 3664 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3665 unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3666 const TargetRegisterClass *GPRRC = 3667 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3668 unsigned MFC1Opc = IsFGR64onMips64 3669 ? Mips::DMFC1 3670 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1); 3671 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; 3672 3673 // Perform the register class copy as mentioned above. 3674 unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); 3675 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); 3676 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); 3677 unsigned WPHI = Wtemp; 3678 3679 if (IsFGR64onMips32) { 3680 unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3681 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); 3682 unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3683 unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3684 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) 3685 .addReg(Wtemp) 3686 .addReg(Rtemp2) 3687 .addImm(1); 3688 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3) 3689 .addReg(Wtemp2) 3690 .addReg(Rtemp2) 3691 .addImm(3); 3692 WPHI = Wtemp3; 3693 } 3694 3695 if (IsFGR64) { 3696 unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3697 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) 3698 .addReg(WPHI) 3699 .addReg(WPHI); 3700 WPHI = Wtemp2; 3701 } 3702 3703 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI); 3704 3705 MI.eraseFromParent(); 3706 return BB; 3707 } 3708 3709 // Emit the FPEXTEND_PSEUDO instruction. 3710 // 3711 // Expand an f16 to either a FGR32Opnd or FGR64Opnd. 3712 // 3713 // Safety: Cycle the result through the GPRs so the result always ends up 3714 // the correct floating point register. 3715 // 3716 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd 3717 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register 3718 // (which they can be, as the MSA registers are defined to alias the 3719 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3720 // the correct register class. That requires operands be tie-able across 3721 // register classes which have a sub/super register class relationship. I 3722 // haven't checked. 3723 // 3724 // For FGR32Opnd: 3725 // 3726 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws 3727 // => 3728 // fexupr.w $wtemp, $ws 3729 // copy_s.w $rtemp, $ws[0] 3730 // mtc1 $rtemp, $fd 3731 // 3732 // For FGR64Opnd on Mips64: 3733 // 3734 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3735 // => 3736 // fexupr.w $wtemp, $ws 3737 // fexupr.d $wtemp2, $wtemp 3738 // copy_s.d $rtemp, $wtemp2s[0] 3739 // dmtc1 $rtemp, $fd 3740 // 3741 // For FGR64Opnd on Mips32: 3742 // 3743 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3744 // => 3745 // fexupr.w $wtemp, $ws 3746 // fexupr.d $wtemp2, $wtemp 3747 // copy_s.w $rtemp, $wtemp2[0] 3748 // mtc1 $rtemp, $ftemp 3749 // copy_s.w $rtemp2, $wtemp2[1] 3750 // $fd = mthc1 $rtemp2, $ftemp 3751 MachineBasicBlock * 3752 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, 3753 MachineBasicBlock *BB, 3754 bool IsFGR64) const { 3755 3756 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3757 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3758 // it. 3759 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3760 3761 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3762 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3763 3764 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3765 DebugLoc DL = MI.getDebugLoc(); 3766 Register Fd = MI.getOperand(0).getReg(); 3767 Register Ws = MI.getOperand(1).getReg(); 3768 3769 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3770 const TargetRegisterClass *GPRRC = 3771 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3772 unsigned MTC1Opc = IsFGR64onMips64 3773 ? Mips::DMTC1 3774 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1); 3775 Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; 3776 3777 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3778 Register WPHI = Wtemp; 3779 3780 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws); 3781 if (IsFGR64) { 3782 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3783 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp); 3784 } 3785 3786 // Perform the safety regclass copy mentioned above. 3787 Register Rtemp = RegInfo.createVirtualRegister(GPRRC); 3788 Register FPRPHI = IsFGR64onMips32 3789 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass) 3790 : Fd; 3791 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0); 3792 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp); 3793 3794 if (IsFGR64onMips32) { 3795 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3796 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2) 3797 .addReg(WPHI) 3798 .addImm(1); 3799 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd) 3800 .addReg(FPRPHI) 3801 .addReg(Rtemp2); 3802 } 3803 3804 MI.eraseFromParent(); 3805 return BB; 3806 } 3807 3808 // Emit the FEXP2_W_1 pseudo instructions. 3809 // 3810 // fexp2_w_1_pseudo $wd, $wt 3811 // => 3812 // ldi.w $ws, 1 3813 // fexp2.w $wd, $ws, $wt 3814 MachineBasicBlock * 3815 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, 3816 MachineBasicBlock *BB) const { 3817 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3818 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3819 const TargetRegisterClass *RC = &Mips::MSA128WRegClass; 3820 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3821 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3822 DebugLoc DL = MI.getDebugLoc(); 3823 3824 // Splat 1.0 into a vector 3825 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); 3826 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); 3827 3828 // Emit 1.0 * fexp2(Wt) 3829 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg()) 3830 .addReg(Ws2) 3831 .addReg(MI.getOperand(1).getReg()); 3832 3833 MI.eraseFromParent(); // The pseudo instruction is gone now. 3834 return BB; 3835 } 3836 3837 // Emit the FEXP2_D_1 pseudo instructions. 3838 // 3839 // fexp2_d_1_pseudo $wd, $wt 3840 // => 3841 // ldi.d $ws, 1 3842 // fexp2.d $wd, $ws, $wt 3843 MachineBasicBlock * 3844 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, 3845 MachineBasicBlock *BB) const { 3846 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3847 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3848 const TargetRegisterClass *RC = &Mips::MSA128DRegClass; 3849 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3850 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3851 DebugLoc DL = MI.getDebugLoc(); 3852 3853 // Splat 1.0 into a vector 3854 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); 3855 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); 3856 3857 // Emit 1.0 * fexp2(Wt) 3858 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg()) 3859 .addReg(Ws2) 3860 .addReg(MI.getOperand(1).getReg()); 3861 3862 MI.eraseFromParent(); // The pseudo instruction is gone now. 3863 return BB; 3864 } 3865