1*0b57cec5SDimitry Andric //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric /// \file 10*0b57cec5SDimitry Andric /// This is the parent TargetLowering class for hardware code gen 11*0b57cec5SDimitry Andric /// targets. 12*0b57cec5SDimitry Andric // 13*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 14*0b57cec5SDimitry Andric 15*0b57cec5SDimitry Andric #define AMDGPU_LOG2E_F 1.44269504088896340735992468100189214f 16*0b57cec5SDimitry Andric #define AMDGPU_LN2_F 0.693147180559945309417232121458176568f 17*0b57cec5SDimitry Andric #define AMDGPU_LN10_F 2.30258509299404568401799145468436421f 18*0b57cec5SDimitry Andric 19*0b57cec5SDimitry Andric #include "AMDGPUISelLowering.h" 20*0b57cec5SDimitry Andric #include "AMDGPU.h" 21*0b57cec5SDimitry Andric #include "AMDGPUCallLowering.h" 22*0b57cec5SDimitry Andric #include "AMDGPUFrameLowering.h" 23*0b57cec5SDimitry Andric #include "AMDGPURegisterInfo.h" 24*0b57cec5SDimitry Andric #include "AMDGPUSubtarget.h" 25*0b57cec5SDimitry Andric #include "AMDGPUTargetMachine.h" 26*0b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 27*0b57cec5SDimitry Andric #include "R600MachineFunctionInfo.h" 28*0b57cec5SDimitry Andric #include "SIInstrInfo.h" 29*0b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 30*0b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 31*0b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h" 32*0b57cec5SDimitry Andric #include "llvm/CodeGen/CallingConvLower.h" 33*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 34*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 35*0b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAG.h" 36*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 37*0b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 38*0b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 39*0b57cec5SDimitry Andric #include "llvm/Support/KnownBits.h" 40*0b57cec5SDimitry Andric using namespace llvm; 41*0b57cec5SDimitry Andric 42*0b57cec5SDimitry Andric static bool allocateCCRegs(unsigned ValNo, MVT ValVT, MVT LocVT, 43*0b57cec5SDimitry Andric CCValAssign::LocInfo LocInfo, 44*0b57cec5SDimitry Andric ISD::ArgFlagsTy ArgFlags, CCState &State, 45*0b57cec5SDimitry Andric const TargetRegisterClass *RC, 46*0b57cec5SDimitry Andric unsigned NumRegs) { 47*0b57cec5SDimitry Andric ArrayRef<MCPhysReg> RegList = makeArrayRef(RC->begin(), NumRegs); 48*0b57cec5SDimitry Andric unsigned RegResult = State.AllocateReg(RegList); 49*0b57cec5SDimitry Andric if (RegResult == AMDGPU::NoRegister) 50*0b57cec5SDimitry Andric return false; 51*0b57cec5SDimitry Andric 52*0b57cec5SDimitry Andric State.addLoc(CCValAssign::getReg(ValNo, ValVT, RegResult, LocVT, LocInfo)); 53*0b57cec5SDimitry Andric return true; 54*0b57cec5SDimitry Andric } 55*0b57cec5SDimitry Andric 56*0b57cec5SDimitry Andric static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, 57*0b57cec5SDimitry Andric CCValAssign::LocInfo LocInfo, 58*0b57cec5SDimitry Andric ISD::ArgFlagsTy ArgFlags, CCState &State) { 59*0b57cec5SDimitry Andric switch (LocVT.SimpleTy) { 60*0b57cec5SDimitry Andric case MVT::i64: 61*0b57cec5SDimitry Andric case MVT::f64: 62*0b57cec5SDimitry Andric case MVT::v2i32: 63*0b57cec5SDimitry Andric case MVT::v2f32: 64*0b57cec5SDimitry Andric case MVT::v4i16: 65*0b57cec5SDimitry Andric case MVT::v4f16: { 66*0b57cec5SDimitry Andric // Up to SGPR0-SGPR105 67*0b57cec5SDimitry Andric return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, 68*0b57cec5SDimitry Andric &AMDGPU::SGPR_64RegClass, 53); 69*0b57cec5SDimitry Andric } 70*0b57cec5SDimitry Andric default: 71*0b57cec5SDimitry Andric return false; 72*0b57cec5SDimitry Andric } 73*0b57cec5SDimitry Andric } 74*0b57cec5SDimitry Andric 75*0b57cec5SDimitry Andric // Allocate up to VGPR31. 76*0b57cec5SDimitry Andric // 77*0b57cec5SDimitry Andric // TODO: Since there are no VGPR alignent requirements would it be better to 78*0b57cec5SDimitry Andric // split into individual scalar registers? 79*0b57cec5SDimitry Andric static bool allocateVGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, 80*0b57cec5SDimitry Andric CCValAssign::LocInfo LocInfo, 81*0b57cec5SDimitry Andric ISD::ArgFlagsTy ArgFlags, CCState &State) { 82*0b57cec5SDimitry Andric switch (LocVT.SimpleTy) { 83*0b57cec5SDimitry Andric case MVT::i64: 84*0b57cec5SDimitry Andric case MVT::f64: 85*0b57cec5SDimitry Andric case MVT::v2i32: 86*0b57cec5SDimitry Andric case MVT::v2f32: 87*0b57cec5SDimitry Andric case MVT::v4i16: 88*0b57cec5SDimitry Andric case MVT::v4f16: { 89*0b57cec5SDimitry Andric return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, 90*0b57cec5SDimitry Andric &AMDGPU::VReg_64RegClass, 31); 91*0b57cec5SDimitry Andric } 92*0b57cec5SDimitry Andric case MVT::v4i32: 93*0b57cec5SDimitry Andric case MVT::v4f32: 94*0b57cec5SDimitry Andric case MVT::v2i64: 95*0b57cec5SDimitry Andric case MVT::v2f64: { 96*0b57cec5SDimitry Andric return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, 97*0b57cec5SDimitry Andric &AMDGPU::VReg_128RegClass, 29); 98*0b57cec5SDimitry Andric } 99*0b57cec5SDimitry Andric case MVT::v8i32: 100*0b57cec5SDimitry Andric case MVT::v8f32: { 101*0b57cec5SDimitry Andric return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, 102*0b57cec5SDimitry Andric &AMDGPU::VReg_256RegClass, 25); 103*0b57cec5SDimitry Andric 104*0b57cec5SDimitry Andric } 105*0b57cec5SDimitry Andric case MVT::v16i32: 106*0b57cec5SDimitry Andric case MVT::v16f32: { 107*0b57cec5SDimitry Andric return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, 108*0b57cec5SDimitry Andric &AMDGPU::VReg_512RegClass, 17); 109*0b57cec5SDimitry Andric 110*0b57cec5SDimitry Andric } 111*0b57cec5SDimitry Andric default: 112*0b57cec5SDimitry Andric return false; 113*0b57cec5SDimitry Andric } 114*0b57cec5SDimitry Andric } 115*0b57cec5SDimitry Andric 116*0b57cec5SDimitry Andric #include "AMDGPUGenCallingConv.inc" 117*0b57cec5SDimitry Andric 118*0b57cec5SDimitry Andric // Find a larger type to do a load / store of a vector with. 119*0b57cec5SDimitry Andric EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { 120*0b57cec5SDimitry Andric unsigned StoreSize = VT.getStoreSizeInBits(); 121*0b57cec5SDimitry Andric if (StoreSize <= 32) 122*0b57cec5SDimitry Andric return EVT::getIntegerVT(Ctx, StoreSize); 123*0b57cec5SDimitry Andric 124*0b57cec5SDimitry Andric assert(StoreSize % 32 == 0 && "Store size not a multiple of 32"); 125*0b57cec5SDimitry Andric return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); 126*0b57cec5SDimitry Andric } 127*0b57cec5SDimitry Andric 128*0b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) { 129*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 130*0b57cec5SDimitry Andric KnownBits Known = DAG.computeKnownBits(Op); 131*0b57cec5SDimitry Andric return VT.getSizeInBits() - Known.countMinLeadingZeros(); 132*0b57cec5SDimitry Andric } 133*0b57cec5SDimitry Andric 134*0b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) { 135*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 136*0b57cec5SDimitry Andric 137*0b57cec5SDimitry Andric // In order for this to be a signed 24-bit value, bit 23, must 138*0b57cec5SDimitry Andric // be a sign bit. 139*0b57cec5SDimitry Andric return VT.getSizeInBits() - DAG.ComputeNumSignBits(Op); 140*0b57cec5SDimitry Andric } 141*0b57cec5SDimitry Andric 142*0b57cec5SDimitry Andric AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, 143*0b57cec5SDimitry Andric const AMDGPUSubtarget &STI) 144*0b57cec5SDimitry Andric : TargetLowering(TM), Subtarget(&STI) { 145*0b57cec5SDimitry Andric // Lower floating point store/load to integer store/load to reduce the number 146*0b57cec5SDimitry Andric // of patterns in tablegen. 147*0b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::f32, Promote); 148*0b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); 149*0b57cec5SDimitry Andric 150*0b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 151*0b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); 152*0b57cec5SDimitry Andric 153*0b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v3f32, Promote); 154*0b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v3f32, MVT::v3i32); 155*0b57cec5SDimitry Andric 156*0b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v4f32, Promote); 157*0b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); 158*0b57cec5SDimitry Andric 159*0b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v5f32, Promote); 160*0b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32); 161*0b57cec5SDimitry Andric 162*0b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v8f32, Promote); 163*0b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); 164*0b57cec5SDimitry Andric 165*0b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v16f32, Promote); 166*0b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); 167*0b57cec5SDimitry Andric 168*0b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v32f32, Promote); 169*0b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32); 170*0b57cec5SDimitry Andric 171*0b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::i64, Promote); 172*0b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); 173*0b57cec5SDimitry Andric 174*0b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 175*0b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32); 176*0b57cec5SDimitry Andric 177*0b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::f64, Promote); 178*0b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32); 179*0b57cec5SDimitry Andric 180*0b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v2f64, Promote); 181*0b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v4i32); 182*0b57cec5SDimitry Andric 183*0b57cec5SDimitry Andric // There are no 64-bit extloads. These should be done as a 32-bit extload and 184*0b57cec5SDimitry Andric // an extension to 64-bit. 185*0b57cec5SDimitry Andric for (MVT VT : MVT::integer_valuetypes()) { 186*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand); 187*0b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand); 188*0b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand); 189*0b57cec5SDimitry Andric } 190*0b57cec5SDimitry Andric 191*0b57cec5SDimitry Andric for (MVT VT : MVT::integer_valuetypes()) { 192*0b57cec5SDimitry Andric if (VT == MVT::i64) 193*0b57cec5SDimitry Andric continue; 194*0b57cec5SDimitry Andric 195*0b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 196*0b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal); 197*0b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal); 198*0b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); 199*0b57cec5SDimitry Andric 200*0b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 201*0b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal); 202*0b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal); 203*0b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand); 204*0b57cec5SDimitry Andric 205*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 206*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal); 207*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal); 208*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); 209*0b57cec5SDimitry Andric } 210*0b57cec5SDimitry Andric 211*0b57cec5SDimitry Andric for (MVT VT : MVT::integer_vector_valuetypes()) { 212*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand); 213*0b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand); 214*0b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand); 215*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand); 216*0b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand); 217*0b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand); 218*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand); 219*0b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand); 220*0b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand); 221*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand); 222*0b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand); 223*0b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand); 224*0b57cec5SDimitry Andric } 225*0b57cec5SDimitry Andric 226*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 227*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); 228*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); 229*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); 230*0b57cec5SDimitry Andric 231*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 232*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); 233*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); 234*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f32, Expand); 235*0b57cec5SDimitry Andric 236*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 237*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); 238*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); 239*0b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand); 240*0b57cec5SDimitry Andric 241*0b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::f32, Promote); 242*0b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); 243*0b57cec5SDimitry Andric 244*0b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v2f32, Promote); 245*0b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); 246*0b57cec5SDimitry Andric 247*0b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v3f32, Promote); 248*0b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v3f32, MVT::v3i32); 249*0b57cec5SDimitry Andric 250*0b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v4f32, Promote); 251*0b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); 252*0b57cec5SDimitry Andric 253*0b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v5f32, Promote); 254*0b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32); 255*0b57cec5SDimitry Andric 256*0b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v8f32, Promote); 257*0b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32); 258*0b57cec5SDimitry Andric 259*0b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v16f32, Promote); 260*0b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32); 261*0b57cec5SDimitry Andric 262*0b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v32f32, Promote); 263*0b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32); 264*0b57cec5SDimitry Andric 265*0b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::i64, Promote); 266*0b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); 267*0b57cec5SDimitry Andric 268*0b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v2i64, Promote); 269*0b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32); 270*0b57cec5SDimitry Andric 271*0b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::f64, Promote); 272*0b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32); 273*0b57cec5SDimitry Andric 274*0b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v2f64, Promote); 275*0b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32); 276*0b57cec5SDimitry Andric 277*0b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i1, Expand); 278*0b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i8, Expand); 279*0b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i16, Expand); 280*0b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i32, Expand); 281*0b57cec5SDimitry Andric 282*0b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand); 283*0b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i8, Expand); 284*0b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i16, Expand); 285*0b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand); 286*0b57cec5SDimitry Andric 287*0b57cec5SDimitry Andric setTruncStoreAction(MVT::f32, MVT::f16, Expand); 288*0b57cec5SDimitry Andric setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); 289*0b57cec5SDimitry Andric setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand); 290*0b57cec5SDimitry Andric setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand); 291*0b57cec5SDimitry Andric 292*0b57cec5SDimitry Andric setTruncStoreAction(MVT::f64, MVT::f16, Expand); 293*0b57cec5SDimitry Andric setTruncStoreAction(MVT::f64, MVT::f32, Expand); 294*0b57cec5SDimitry Andric 295*0b57cec5SDimitry Andric setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); 296*0b57cec5SDimitry Andric setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand); 297*0b57cec5SDimitry Andric 298*0b57cec5SDimitry Andric setTruncStoreAction(MVT::v4f64, MVT::v4f32, Expand); 299*0b57cec5SDimitry Andric setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand); 300*0b57cec5SDimitry Andric 301*0b57cec5SDimitry Andric setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand); 302*0b57cec5SDimitry Andric setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand); 303*0b57cec5SDimitry Andric 304*0b57cec5SDimitry Andric 305*0b57cec5SDimitry Andric setOperationAction(ISD::Constant, MVT::i32, Legal); 306*0b57cec5SDimitry Andric setOperationAction(ISD::Constant, MVT::i64, Legal); 307*0b57cec5SDimitry Andric setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 308*0b57cec5SDimitry Andric setOperationAction(ISD::ConstantFP, MVT::f64, Legal); 309*0b57cec5SDimitry Andric 310*0b57cec5SDimitry Andric setOperationAction(ISD::BR_JT, MVT::Other, Expand); 311*0b57cec5SDimitry Andric setOperationAction(ISD::BRIND, MVT::Other, Expand); 312*0b57cec5SDimitry Andric 313*0b57cec5SDimitry Andric // This is totally unsupported, just custom lower to produce an error. 314*0b57cec5SDimitry Andric setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 315*0b57cec5SDimitry Andric 316*0b57cec5SDimitry Andric // Library functions. These default to Expand, but we have instructions 317*0b57cec5SDimitry Andric // for them. 318*0b57cec5SDimitry Andric setOperationAction(ISD::FCEIL, MVT::f32, Legal); 319*0b57cec5SDimitry Andric setOperationAction(ISD::FEXP2, MVT::f32, Legal); 320*0b57cec5SDimitry Andric setOperationAction(ISD::FPOW, MVT::f32, Legal); 321*0b57cec5SDimitry Andric setOperationAction(ISD::FLOG2, MVT::f32, Legal); 322*0b57cec5SDimitry Andric setOperationAction(ISD::FABS, MVT::f32, Legal); 323*0b57cec5SDimitry Andric setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 324*0b57cec5SDimitry Andric setOperationAction(ISD::FRINT, MVT::f32, Legal); 325*0b57cec5SDimitry Andric setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 326*0b57cec5SDimitry Andric setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 327*0b57cec5SDimitry Andric setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 328*0b57cec5SDimitry Andric 329*0b57cec5SDimitry Andric setOperationAction(ISD::FROUND, MVT::f32, Custom); 330*0b57cec5SDimitry Andric setOperationAction(ISD::FROUND, MVT::f64, Custom); 331*0b57cec5SDimitry Andric 332*0b57cec5SDimitry Andric setOperationAction(ISD::FLOG, MVT::f32, Custom); 333*0b57cec5SDimitry Andric setOperationAction(ISD::FLOG10, MVT::f32, Custom); 334*0b57cec5SDimitry Andric setOperationAction(ISD::FEXP, MVT::f32, Custom); 335*0b57cec5SDimitry Andric 336*0b57cec5SDimitry Andric 337*0b57cec5SDimitry Andric setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom); 338*0b57cec5SDimitry Andric setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); 339*0b57cec5SDimitry Andric 340*0b57cec5SDimitry Andric setOperationAction(ISD::FREM, MVT::f32, Custom); 341*0b57cec5SDimitry Andric setOperationAction(ISD::FREM, MVT::f64, Custom); 342*0b57cec5SDimitry Andric 343*0b57cec5SDimitry Andric // Expand to fneg + fadd. 344*0b57cec5SDimitry Andric setOperationAction(ISD::FSUB, MVT::f64, Expand); 345*0b57cec5SDimitry Andric 346*0b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v3i32, Custom); 347*0b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v3f32, Custom); 348*0b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); 349*0b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); 350*0b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v5i32, Custom); 351*0b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v5f32, Custom); 352*0b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom); 353*0b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom); 354*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); 355*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); 356*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f32, Custom); 357*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i32, Custom); 358*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom); 359*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom); 360*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5f32, Custom); 361*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom); 362*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom); 363*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom); 364*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f32, Custom); 365*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom); 366*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom); 367*0b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom); 368*0b57cec5SDimitry Andric 369*0b57cec5SDimitry Andric setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); 370*0b57cec5SDimitry Andric setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom); 371*0b57cec5SDimitry Andric setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom); 372*0b57cec5SDimitry Andric 373*0b57cec5SDimitry Andric const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; 374*0b57cec5SDimitry Andric for (MVT VT : ScalarIntVTs) { 375*0b57cec5SDimitry Andric // These should use [SU]DIVREM, so set them to expand 376*0b57cec5SDimitry Andric setOperationAction(ISD::SDIV, VT, Expand); 377*0b57cec5SDimitry Andric setOperationAction(ISD::UDIV, VT, Expand); 378*0b57cec5SDimitry Andric setOperationAction(ISD::SREM, VT, Expand); 379*0b57cec5SDimitry Andric setOperationAction(ISD::UREM, VT, Expand); 380*0b57cec5SDimitry Andric 381*0b57cec5SDimitry Andric // GPU does not have divrem function for signed or unsigned. 382*0b57cec5SDimitry Andric setOperationAction(ISD::SDIVREM, VT, Custom); 383*0b57cec5SDimitry Andric setOperationAction(ISD::UDIVREM, VT, Custom); 384*0b57cec5SDimitry Andric 385*0b57cec5SDimitry Andric // GPU does not have [S|U]MUL_LOHI functions as a single instruction. 386*0b57cec5SDimitry Andric setOperationAction(ISD::SMUL_LOHI, VT, Expand); 387*0b57cec5SDimitry Andric setOperationAction(ISD::UMUL_LOHI, VT, Expand); 388*0b57cec5SDimitry Andric 389*0b57cec5SDimitry Andric setOperationAction(ISD::BSWAP, VT, Expand); 390*0b57cec5SDimitry Andric setOperationAction(ISD::CTTZ, VT, Expand); 391*0b57cec5SDimitry Andric setOperationAction(ISD::CTLZ, VT, Expand); 392*0b57cec5SDimitry Andric 393*0b57cec5SDimitry Andric // AMDGPU uses ADDC/SUBC/ADDE/SUBE 394*0b57cec5SDimitry Andric setOperationAction(ISD::ADDC, VT, Legal); 395*0b57cec5SDimitry Andric setOperationAction(ISD::SUBC, VT, Legal); 396*0b57cec5SDimitry Andric setOperationAction(ISD::ADDE, VT, Legal); 397*0b57cec5SDimitry Andric setOperationAction(ISD::SUBE, VT, Legal); 398*0b57cec5SDimitry Andric } 399*0b57cec5SDimitry Andric 400*0b57cec5SDimitry Andric // The hardware supports 32-bit ROTR, but not ROTL. 401*0b57cec5SDimitry Andric setOperationAction(ISD::ROTL, MVT::i32, Expand); 402*0b57cec5SDimitry Andric setOperationAction(ISD::ROTL, MVT::i64, Expand); 403*0b57cec5SDimitry Andric setOperationAction(ISD::ROTR, MVT::i64, Expand); 404*0b57cec5SDimitry Andric 405*0b57cec5SDimitry Andric setOperationAction(ISD::MUL, MVT::i64, Expand); 406*0b57cec5SDimitry Andric setOperationAction(ISD::MULHU, MVT::i64, Expand); 407*0b57cec5SDimitry Andric setOperationAction(ISD::MULHS, MVT::i64, Expand); 408*0b57cec5SDimitry Andric setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 409*0b57cec5SDimitry Andric setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 410*0b57cec5SDimitry Andric setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 411*0b57cec5SDimitry Andric setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 412*0b57cec5SDimitry Andric setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 413*0b57cec5SDimitry Andric 414*0b57cec5SDimitry Andric setOperationAction(ISD::SMIN, MVT::i32, Legal); 415*0b57cec5SDimitry Andric setOperationAction(ISD::UMIN, MVT::i32, Legal); 416*0b57cec5SDimitry Andric setOperationAction(ISD::SMAX, MVT::i32, Legal); 417*0b57cec5SDimitry Andric setOperationAction(ISD::UMAX, MVT::i32, Legal); 418*0b57cec5SDimitry Andric 419*0b57cec5SDimitry Andric setOperationAction(ISD::CTTZ, MVT::i64, Custom); 420*0b57cec5SDimitry Andric setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom); 421*0b57cec5SDimitry Andric setOperationAction(ISD::CTLZ, MVT::i64, Custom); 422*0b57cec5SDimitry Andric setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); 423*0b57cec5SDimitry Andric 424*0b57cec5SDimitry Andric static const MVT::SimpleValueType VectorIntTypes[] = { 425*0b57cec5SDimitry Andric MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32 426*0b57cec5SDimitry Andric }; 427*0b57cec5SDimitry Andric 428*0b57cec5SDimitry Andric for (MVT VT : VectorIntTypes) { 429*0b57cec5SDimitry Andric // Expand the following operations for the current type by default. 430*0b57cec5SDimitry Andric setOperationAction(ISD::ADD, VT, Expand); 431*0b57cec5SDimitry Andric setOperationAction(ISD::AND, VT, Expand); 432*0b57cec5SDimitry Andric setOperationAction(ISD::FP_TO_SINT, VT, Expand); 433*0b57cec5SDimitry Andric setOperationAction(ISD::FP_TO_UINT, VT, Expand); 434*0b57cec5SDimitry Andric setOperationAction(ISD::MUL, VT, Expand); 435*0b57cec5SDimitry Andric setOperationAction(ISD::MULHU, VT, Expand); 436*0b57cec5SDimitry Andric setOperationAction(ISD::MULHS, VT, Expand); 437*0b57cec5SDimitry Andric setOperationAction(ISD::OR, VT, Expand); 438*0b57cec5SDimitry Andric setOperationAction(ISD::SHL, VT, Expand); 439*0b57cec5SDimitry Andric setOperationAction(ISD::SRA, VT, Expand); 440*0b57cec5SDimitry Andric setOperationAction(ISD::SRL, VT, Expand); 441*0b57cec5SDimitry Andric setOperationAction(ISD::ROTL, VT, Expand); 442*0b57cec5SDimitry Andric setOperationAction(ISD::ROTR, VT, Expand); 443*0b57cec5SDimitry Andric setOperationAction(ISD::SUB, VT, Expand); 444*0b57cec5SDimitry Andric setOperationAction(ISD::SINT_TO_FP, VT, Expand); 445*0b57cec5SDimitry Andric setOperationAction(ISD::UINT_TO_FP, VT, Expand); 446*0b57cec5SDimitry Andric setOperationAction(ISD::SDIV, VT, Expand); 447*0b57cec5SDimitry Andric setOperationAction(ISD::UDIV, VT, Expand); 448*0b57cec5SDimitry Andric setOperationAction(ISD::SREM, VT, Expand); 449*0b57cec5SDimitry Andric setOperationAction(ISD::UREM, VT, Expand); 450*0b57cec5SDimitry Andric setOperationAction(ISD::SMUL_LOHI, VT, Expand); 451*0b57cec5SDimitry Andric setOperationAction(ISD::UMUL_LOHI, VT, Expand); 452*0b57cec5SDimitry Andric setOperationAction(ISD::SDIVREM, VT, Custom); 453*0b57cec5SDimitry Andric setOperationAction(ISD::UDIVREM, VT, Expand); 454*0b57cec5SDimitry Andric setOperationAction(ISD::SELECT, VT, Expand); 455*0b57cec5SDimitry Andric setOperationAction(ISD::VSELECT, VT, Expand); 456*0b57cec5SDimitry Andric setOperationAction(ISD::SELECT_CC, VT, Expand); 457*0b57cec5SDimitry Andric setOperationAction(ISD::XOR, VT, Expand); 458*0b57cec5SDimitry Andric setOperationAction(ISD::BSWAP, VT, Expand); 459*0b57cec5SDimitry Andric setOperationAction(ISD::CTPOP, VT, Expand); 460*0b57cec5SDimitry Andric setOperationAction(ISD::CTTZ, VT, Expand); 461*0b57cec5SDimitry Andric setOperationAction(ISD::CTLZ, VT, Expand); 462*0b57cec5SDimitry Andric setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 463*0b57cec5SDimitry Andric setOperationAction(ISD::SETCC, VT, Expand); 464*0b57cec5SDimitry Andric } 465*0b57cec5SDimitry Andric 466*0b57cec5SDimitry Andric static const MVT::SimpleValueType FloatVectorTypes[] = { 467*0b57cec5SDimitry Andric MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32 468*0b57cec5SDimitry Andric }; 469*0b57cec5SDimitry Andric 470*0b57cec5SDimitry Andric for (MVT VT : FloatVectorTypes) { 471*0b57cec5SDimitry Andric setOperationAction(ISD::FABS, VT, Expand); 472*0b57cec5SDimitry Andric setOperationAction(ISD::FMINNUM, VT, Expand); 473*0b57cec5SDimitry Andric setOperationAction(ISD::FMAXNUM, VT, Expand); 474*0b57cec5SDimitry Andric setOperationAction(ISD::FADD, VT, Expand); 475*0b57cec5SDimitry Andric setOperationAction(ISD::FCEIL, VT, Expand); 476*0b57cec5SDimitry Andric setOperationAction(ISD::FCOS, VT, Expand); 477*0b57cec5SDimitry Andric setOperationAction(ISD::FDIV, VT, Expand); 478*0b57cec5SDimitry Andric setOperationAction(ISD::FEXP2, VT, Expand); 479*0b57cec5SDimitry Andric setOperationAction(ISD::FEXP, VT, Expand); 480*0b57cec5SDimitry Andric setOperationAction(ISD::FLOG2, VT, Expand); 481*0b57cec5SDimitry Andric setOperationAction(ISD::FREM, VT, Expand); 482*0b57cec5SDimitry Andric setOperationAction(ISD::FLOG, VT, Expand); 483*0b57cec5SDimitry Andric setOperationAction(ISD::FLOG10, VT, Expand); 484*0b57cec5SDimitry Andric setOperationAction(ISD::FPOW, VT, Expand); 485*0b57cec5SDimitry Andric setOperationAction(ISD::FFLOOR, VT, Expand); 486*0b57cec5SDimitry Andric setOperationAction(ISD::FTRUNC, VT, Expand); 487*0b57cec5SDimitry Andric setOperationAction(ISD::FMUL, VT, Expand); 488*0b57cec5SDimitry Andric setOperationAction(ISD::FMA, VT, Expand); 489*0b57cec5SDimitry Andric setOperationAction(ISD::FRINT, VT, Expand); 490*0b57cec5SDimitry Andric setOperationAction(ISD::FNEARBYINT, VT, Expand); 491*0b57cec5SDimitry Andric setOperationAction(ISD::FSQRT, VT, Expand); 492*0b57cec5SDimitry Andric setOperationAction(ISD::FSIN, VT, Expand); 493*0b57cec5SDimitry Andric setOperationAction(ISD::FSUB, VT, Expand); 494*0b57cec5SDimitry Andric setOperationAction(ISD::FNEG, VT, Expand); 495*0b57cec5SDimitry Andric setOperationAction(ISD::VSELECT, VT, Expand); 496*0b57cec5SDimitry Andric setOperationAction(ISD::SELECT_CC, VT, Expand); 497*0b57cec5SDimitry Andric setOperationAction(ISD::FCOPYSIGN, VT, Expand); 498*0b57cec5SDimitry Andric setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 499*0b57cec5SDimitry Andric setOperationAction(ISD::SETCC, VT, Expand); 500*0b57cec5SDimitry Andric setOperationAction(ISD::FCANONICALIZE, VT, Expand); 501*0b57cec5SDimitry Andric } 502*0b57cec5SDimitry Andric 503*0b57cec5SDimitry Andric // This causes using an unrolled select operation rather than expansion with 504*0b57cec5SDimitry Andric // bit operations. This is in general better, but the alternative using BFI 505*0b57cec5SDimitry Andric // instructions may be better if the select sources are SGPRs. 506*0b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v2f32, Promote); 507*0b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v2f32, MVT::v2i32); 508*0b57cec5SDimitry Andric 509*0b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v3f32, Promote); 510*0b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v3f32, MVT::v3i32); 511*0b57cec5SDimitry Andric 512*0b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v4f32, Promote); 513*0b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32); 514*0b57cec5SDimitry Andric 515*0b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v5f32, Promote); 516*0b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32); 517*0b57cec5SDimitry Andric 518*0b57cec5SDimitry Andric // There are no libcalls of any kind. 519*0b57cec5SDimitry Andric for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) 520*0b57cec5SDimitry Andric setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr); 521*0b57cec5SDimitry Andric 522*0b57cec5SDimitry Andric setBooleanContents(ZeroOrNegativeOneBooleanContent); 523*0b57cec5SDimitry Andric setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 524*0b57cec5SDimitry Andric 525*0b57cec5SDimitry Andric setSchedulingPreference(Sched::RegPressure); 526*0b57cec5SDimitry Andric setJumpIsExpensive(true); 527*0b57cec5SDimitry Andric 528*0b57cec5SDimitry Andric // FIXME: This is only partially true. If we have to do vector compares, any 529*0b57cec5SDimitry Andric // SGPR pair can be a condition register. If we have a uniform condition, we 530*0b57cec5SDimitry Andric // are better off doing SALU operations, where there is only one SCC. For now, 531*0b57cec5SDimitry Andric // we don't have a way of knowing during instruction selection if a condition 532*0b57cec5SDimitry Andric // will be uniform and we always use vector compares. Assume we are using 533*0b57cec5SDimitry Andric // vector compares until that is fixed. 534*0b57cec5SDimitry Andric setHasMultipleConditionRegisters(true); 535*0b57cec5SDimitry Andric 536*0b57cec5SDimitry Andric setMinCmpXchgSizeInBits(32); 537*0b57cec5SDimitry Andric setSupportsUnalignedAtomics(false); 538*0b57cec5SDimitry Andric 539*0b57cec5SDimitry Andric PredictableSelectIsExpensive = false; 540*0b57cec5SDimitry Andric 541*0b57cec5SDimitry Andric // We want to find all load dependencies for long chains of stores to enable 542*0b57cec5SDimitry Andric // merging into very wide vectors. The problem is with vectors with > 4 543*0b57cec5SDimitry Andric // elements. MergeConsecutiveStores will attempt to merge these because x8/x16 544*0b57cec5SDimitry Andric // vectors are a legal type, even though we have to split the loads 545*0b57cec5SDimitry Andric // usually. When we can more precisely specify load legality per address 546*0b57cec5SDimitry Andric // space, we should be able to make FindBetterChain/MergeConsecutiveStores 547*0b57cec5SDimitry Andric // smarter so that they can figure out what to do in 2 iterations without all 548*0b57cec5SDimitry Andric // N > 4 stores on the same chain. 549*0b57cec5SDimitry Andric GatherAllAliasesMaxDepth = 16; 550*0b57cec5SDimitry Andric 551*0b57cec5SDimitry Andric // memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry 552*0b57cec5SDimitry Andric // about these during lowering. 553*0b57cec5SDimitry Andric MaxStoresPerMemcpy = 0xffffffff; 554*0b57cec5SDimitry Andric MaxStoresPerMemmove = 0xffffffff; 555*0b57cec5SDimitry Andric MaxStoresPerMemset = 0xffffffff; 556*0b57cec5SDimitry Andric 557*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::BITCAST); 558*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::SHL); 559*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::SRA); 560*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::SRL); 561*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::TRUNCATE); 562*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::MUL); 563*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::MULHU); 564*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::MULHS); 565*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::SELECT); 566*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::SELECT_CC); 567*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::STORE); 568*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::FADD); 569*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::FSUB); 570*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::FNEG); 571*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::FABS); 572*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::AssertZext); 573*0b57cec5SDimitry Andric setTargetDAGCombine(ISD::AssertSext); 574*0b57cec5SDimitry Andric } 575*0b57cec5SDimitry Andric 576*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 577*0b57cec5SDimitry Andric // Target Information 578*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 579*0b57cec5SDimitry Andric 580*0b57cec5SDimitry Andric LLVM_READNONE 581*0b57cec5SDimitry Andric static bool fnegFoldsIntoOp(unsigned Opc) { 582*0b57cec5SDimitry Andric switch (Opc) { 583*0b57cec5SDimitry Andric case ISD::FADD: 584*0b57cec5SDimitry Andric case ISD::FSUB: 585*0b57cec5SDimitry Andric case ISD::FMUL: 586*0b57cec5SDimitry Andric case ISD::FMA: 587*0b57cec5SDimitry Andric case ISD::FMAD: 588*0b57cec5SDimitry Andric case ISD::FMINNUM: 589*0b57cec5SDimitry Andric case ISD::FMAXNUM: 590*0b57cec5SDimitry Andric case ISD::FMINNUM_IEEE: 591*0b57cec5SDimitry Andric case ISD::FMAXNUM_IEEE: 592*0b57cec5SDimitry Andric case ISD::FSIN: 593*0b57cec5SDimitry Andric case ISD::FTRUNC: 594*0b57cec5SDimitry Andric case ISD::FRINT: 595*0b57cec5SDimitry Andric case ISD::FNEARBYINT: 596*0b57cec5SDimitry Andric case ISD::FCANONICALIZE: 597*0b57cec5SDimitry Andric case AMDGPUISD::RCP: 598*0b57cec5SDimitry Andric case AMDGPUISD::RCP_LEGACY: 599*0b57cec5SDimitry Andric case AMDGPUISD::RCP_IFLAG: 600*0b57cec5SDimitry Andric case AMDGPUISD::SIN_HW: 601*0b57cec5SDimitry Andric case AMDGPUISD::FMUL_LEGACY: 602*0b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY: 603*0b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY: 604*0b57cec5SDimitry Andric case AMDGPUISD::FMED3: 605*0b57cec5SDimitry Andric return true; 606*0b57cec5SDimitry Andric default: 607*0b57cec5SDimitry Andric return false; 608*0b57cec5SDimitry Andric } 609*0b57cec5SDimitry Andric } 610*0b57cec5SDimitry Andric 611*0b57cec5SDimitry Andric /// \p returns true if the operation will definitely need to use a 64-bit 612*0b57cec5SDimitry Andric /// encoding, and thus will use a VOP3 encoding regardless of the source 613*0b57cec5SDimitry Andric /// modifiers. 614*0b57cec5SDimitry Andric LLVM_READONLY 615*0b57cec5SDimitry Andric static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) { 616*0b57cec5SDimitry Andric return N->getNumOperands() > 2 || VT == MVT::f64; 617*0b57cec5SDimitry Andric } 618*0b57cec5SDimitry Andric 619*0b57cec5SDimitry Andric // Most FP instructions support source modifiers, but this could be refined 620*0b57cec5SDimitry Andric // slightly. 621*0b57cec5SDimitry Andric LLVM_READONLY 622*0b57cec5SDimitry Andric static bool hasSourceMods(const SDNode *N) { 623*0b57cec5SDimitry Andric if (isa<MemSDNode>(N)) 624*0b57cec5SDimitry Andric return false; 625*0b57cec5SDimitry Andric 626*0b57cec5SDimitry Andric switch (N->getOpcode()) { 627*0b57cec5SDimitry Andric case ISD::CopyToReg: 628*0b57cec5SDimitry Andric case ISD::SELECT: 629*0b57cec5SDimitry Andric case ISD::FDIV: 630*0b57cec5SDimitry Andric case ISD::FREM: 631*0b57cec5SDimitry Andric case ISD::INLINEASM: 632*0b57cec5SDimitry Andric case ISD::INLINEASM_BR: 633*0b57cec5SDimitry Andric case AMDGPUISD::INTERP_P1: 634*0b57cec5SDimitry Andric case AMDGPUISD::INTERP_P2: 635*0b57cec5SDimitry Andric case AMDGPUISD::DIV_SCALE: 636*0b57cec5SDimitry Andric 637*0b57cec5SDimitry Andric // TODO: Should really be looking at the users of the bitcast. These are 638*0b57cec5SDimitry Andric // problematic because bitcasts are used to legalize all stores to integer 639*0b57cec5SDimitry Andric // types. 640*0b57cec5SDimitry Andric case ISD::BITCAST: 641*0b57cec5SDimitry Andric return false; 642*0b57cec5SDimitry Andric default: 643*0b57cec5SDimitry Andric return true; 644*0b57cec5SDimitry Andric } 645*0b57cec5SDimitry Andric } 646*0b57cec5SDimitry Andric 647*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N, 648*0b57cec5SDimitry Andric unsigned CostThreshold) { 649*0b57cec5SDimitry Andric // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus 650*0b57cec5SDimitry Andric // it is truly free to use a source modifier in all cases. If there are 651*0b57cec5SDimitry Andric // multiple users but for each one will necessitate using VOP3, there will be 652*0b57cec5SDimitry Andric // a code size increase. Try to avoid increasing code size unless we know it 653*0b57cec5SDimitry Andric // will save on the instruction count. 654*0b57cec5SDimitry Andric unsigned NumMayIncreaseSize = 0; 655*0b57cec5SDimitry Andric MVT VT = N->getValueType(0).getScalarType().getSimpleVT(); 656*0b57cec5SDimitry Andric 657*0b57cec5SDimitry Andric // XXX - Should this limit number of uses to check? 658*0b57cec5SDimitry Andric for (const SDNode *U : N->uses()) { 659*0b57cec5SDimitry Andric if (!hasSourceMods(U)) 660*0b57cec5SDimitry Andric return false; 661*0b57cec5SDimitry Andric 662*0b57cec5SDimitry Andric if (!opMustUseVOP3Encoding(U, VT)) { 663*0b57cec5SDimitry Andric if (++NumMayIncreaseSize > CostThreshold) 664*0b57cec5SDimitry Andric return false; 665*0b57cec5SDimitry Andric } 666*0b57cec5SDimitry Andric } 667*0b57cec5SDimitry Andric 668*0b57cec5SDimitry Andric return true; 669*0b57cec5SDimitry Andric } 670*0b57cec5SDimitry Andric 671*0b57cec5SDimitry Andric MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const { 672*0b57cec5SDimitry Andric return MVT::i32; 673*0b57cec5SDimitry Andric } 674*0b57cec5SDimitry Andric 675*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const { 676*0b57cec5SDimitry Andric return true; 677*0b57cec5SDimitry Andric } 678*0b57cec5SDimitry Andric 679*0b57cec5SDimitry Andric // The backend supports 32 and 64 bit floating point immediates. 680*0b57cec5SDimitry Andric // FIXME: Why are we reporting vectors of FP immediates as legal? 681*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 682*0b57cec5SDimitry Andric bool ForCodeSize) const { 683*0b57cec5SDimitry Andric EVT ScalarVT = VT.getScalarType(); 684*0b57cec5SDimitry Andric return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 || 685*0b57cec5SDimitry Andric (ScalarVT == MVT::f16 && Subtarget->has16BitInsts())); 686*0b57cec5SDimitry Andric } 687*0b57cec5SDimitry Andric 688*0b57cec5SDimitry Andric // We don't want to shrink f64 / f32 constants. 689*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { 690*0b57cec5SDimitry Andric EVT ScalarVT = VT.getScalarType(); 691*0b57cec5SDimitry Andric return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64); 692*0b57cec5SDimitry Andric } 693*0b57cec5SDimitry Andric 694*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N, 695*0b57cec5SDimitry Andric ISD::LoadExtType ExtTy, 696*0b57cec5SDimitry Andric EVT NewVT) const { 697*0b57cec5SDimitry Andric // TODO: This may be worth removing. Check regression tests for diffs. 698*0b57cec5SDimitry Andric if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT)) 699*0b57cec5SDimitry Andric return false; 700*0b57cec5SDimitry Andric 701*0b57cec5SDimitry Andric unsigned NewSize = NewVT.getStoreSizeInBits(); 702*0b57cec5SDimitry Andric 703*0b57cec5SDimitry Andric // If we are reducing to a 32-bit load, this is always better. 704*0b57cec5SDimitry Andric if (NewSize == 32) 705*0b57cec5SDimitry Andric return true; 706*0b57cec5SDimitry Andric 707*0b57cec5SDimitry Andric EVT OldVT = N->getValueType(0); 708*0b57cec5SDimitry Andric unsigned OldSize = OldVT.getStoreSizeInBits(); 709*0b57cec5SDimitry Andric 710*0b57cec5SDimitry Andric MemSDNode *MN = cast<MemSDNode>(N); 711*0b57cec5SDimitry Andric unsigned AS = MN->getAddressSpace(); 712*0b57cec5SDimitry Andric // Do not shrink an aligned scalar load to sub-dword. 713*0b57cec5SDimitry Andric // Scalar engine cannot do sub-dword loads. 714*0b57cec5SDimitry Andric if (OldSize >= 32 && NewSize < 32 && MN->getAlignment() >= 4 && 715*0b57cec5SDimitry Andric (AS == AMDGPUAS::CONSTANT_ADDRESS || 716*0b57cec5SDimitry Andric AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || 717*0b57cec5SDimitry Andric (isa<LoadSDNode>(N) && 718*0b57cec5SDimitry Andric AS == AMDGPUAS::GLOBAL_ADDRESS && MN->isInvariant())) && 719*0b57cec5SDimitry Andric AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand())) 720*0b57cec5SDimitry Andric return false; 721*0b57cec5SDimitry Andric 722*0b57cec5SDimitry Andric // Don't produce extloads from sub 32-bit types. SI doesn't have scalar 723*0b57cec5SDimitry Andric // extloads, so doing one requires using a buffer_load. In cases where we 724*0b57cec5SDimitry Andric // still couldn't use a scalar load, using the wider load shouldn't really 725*0b57cec5SDimitry Andric // hurt anything. 726*0b57cec5SDimitry Andric 727*0b57cec5SDimitry Andric // If the old size already had to be an extload, there's no harm in continuing 728*0b57cec5SDimitry Andric // to reduce the width. 729*0b57cec5SDimitry Andric return (OldSize < 32); 730*0b57cec5SDimitry Andric } 731*0b57cec5SDimitry Andric 732*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy, 733*0b57cec5SDimitry Andric const SelectionDAG &DAG, 734*0b57cec5SDimitry Andric const MachineMemOperand &MMO) const { 735*0b57cec5SDimitry Andric 736*0b57cec5SDimitry Andric assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits()); 737*0b57cec5SDimitry Andric 738*0b57cec5SDimitry Andric if (LoadTy.getScalarType() == MVT::i32) 739*0b57cec5SDimitry Andric return false; 740*0b57cec5SDimitry Andric 741*0b57cec5SDimitry Andric unsigned LScalarSize = LoadTy.getScalarSizeInBits(); 742*0b57cec5SDimitry Andric unsigned CastScalarSize = CastTy.getScalarSizeInBits(); 743*0b57cec5SDimitry Andric 744*0b57cec5SDimitry Andric if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32)) 745*0b57cec5SDimitry Andric return false; 746*0b57cec5SDimitry Andric 747*0b57cec5SDimitry Andric bool Fast = false; 748*0b57cec5SDimitry Andric return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), CastTy, 749*0b57cec5SDimitry Andric MMO, &Fast) && Fast; 750*0b57cec5SDimitry Andric } 751*0b57cec5SDimitry Andric 752*0b57cec5SDimitry Andric // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also 753*0b57cec5SDimitry Andric // profitable with the expansion for 64-bit since it's generally good to 754*0b57cec5SDimitry Andric // speculate things. 755*0b57cec5SDimitry Andric // FIXME: These should really have the size as a parameter. 756*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isCheapToSpeculateCttz() const { 757*0b57cec5SDimitry Andric return true; 758*0b57cec5SDimitry Andric } 759*0b57cec5SDimitry Andric 760*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const { 761*0b57cec5SDimitry Andric return true; 762*0b57cec5SDimitry Andric } 763*0b57cec5SDimitry Andric 764*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const { 765*0b57cec5SDimitry Andric switch (N->getOpcode()) { 766*0b57cec5SDimitry Andric default: 767*0b57cec5SDimitry Andric return false; 768*0b57cec5SDimitry Andric case ISD::EntryToken: 769*0b57cec5SDimitry Andric case ISD::TokenFactor: 770*0b57cec5SDimitry Andric return true; 771*0b57cec5SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: 772*0b57cec5SDimitry Andric { 773*0b57cec5SDimitry Andric unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 774*0b57cec5SDimitry Andric switch (IntrID) { 775*0b57cec5SDimitry Andric default: 776*0b57cec5SDimitry Andric return false; 777*0b57cec5SDimitry Andric case Intrinsic::amdgcn_readfirstlane: 778*0b57cec5SDimitry Andric case Intrinsic::amdgcn_readlane: 779*0b57cec5SDimitry Andric return true; 780*0b57cec5SDimitry Andric } 781*0b57cec5SDimitry Andric } 782*0b57cec5SDimitry Andric break; 783*0b57cec5SDimitry Andric case ISD::LOAD: 784*0b57cec5SDimitry Andric { 785*0b57cec5SDimitry Andric const LoadSDNode * L = dyn_cast<LoadSDNode>(N); 786*0b57cec5SDimitry Andric if (L->getMemOperand()->getAddrSpace() 787*0b57cec5SDimitry Andric == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 788*0b57cec5SDimitry Andric return true; 789*0b57cec5SDimitry Andric return false; 790*0b57cec5SDimitry Andric } 791*0b57cec5SDimitry Andric break; 792*0b57cec5SDimitry Andric } 793*0b57cec5SDimitry Andric } 794*0b57cec5SDimitry Andric 795*0b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 796*0b57cec5SDimitry Andric // Target Properties 797*0b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 798*0b57cec5SDimitry Andric 799*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { 800*0b57cec5SDimitry Andric assert(VT.isFloatingPoint()); 801*0b57cec5SDimitry Andric 802*0b57cec5SDimitry Andric // Packed operations do not have a fabs modifier. 803*0b57cec5SDimitry Andric return VT == MVT::f32 || VT == MVT::f64 || 804*0b57cec5SDimitry Andric (Subtarget->has16BitInsts() && VT == MVT::f16); 805*0b57cec5SDimitry Andric } 806*0b57cec5SDimitry Andric 807*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { 808*0b57cec5SDimitry Andric assert(VT.isFloatingPoint()); 809*0b57cec5SDimitry Andric return VT == MVT::f32 || VT == MVT::f64 || 810*0b57cec5SDimitry Andric (Subtarget->has16BitInsts() && VT == MVT::f16) || 811*0b57cec5SDimitry Andric (Subtarget->hasVOP3PInsts() && VT == MVT::v2f16); 812*0b57cec5SDimitry Andric } 813*0b57cec5SDimitry Andric 814*0b57cec5SDimitry Andric bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT, 815*0b57cec5SDimitry Andric unsigned NumElem, 816*0b57cec5SDimitry Andric unsigned AS) const { 817*0b57cec5SDimitry Andric return true; 818*0b57cec5SDimitry Andric } 819*0b57cec5SDimitry Andric 820*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const { 821*0b57cec5SDimitry Andric // There are few operations which truly have vector input operands. Any vector 822*0b57cec5SDimitry Andric // operation is going to involve operations on each component, and a 823*0b57cec5SDimitry Andric // build_vector will be a copy per element, so it always makes sense to use a 824*0b57cec5SDimitry Andric // build_vector input in place of the extracted element to avoid a copy into a 825*0b57cec5SDimitry Andric // super register. 826*0b57cec5SDimitry Andric // 827*0b57cec5SDimitry Andric // We should probably only do this if all users are extracts only, but this 828*0b57cec5SDimitry Andric // should be the common case. 829*0b57cec5SDimitry Andric return true; 830*0b57cec5SDimitry Andric } 831*0b57cec5SDimitry Andric 832*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const { 833*0b57cec5SDimitry Andric // Truncate is just accessing a subregister. 834*0b57cec5SDimitry Andric 835*0b57cec5SDimitry Andric unsigned SrcSize = Source.getSizeInBits(); 836*0b57cec5SDimitry Andric unsigned DestSize = Dest.getSizeInBits(); 837*0b57cec5SDimitry Andric 838*0b57cec5SDimitry Andric return DestSize < SrcSize && DestSize % 32 == 0 ; 839*0b57cec5SDimitry Andric } 840*0b57cec5SDimitry Andric 841*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const { 842*0b57cec5SDimitry Andric // Truncate is just accessing a subregister. 843*0b57cec5SDimitry Andric 844*0b57cec5SDimitry Andric unsigned SrcSize = Source->getScalarSizeInBits(); 845*0b57cec5SDimitry Andric unsigned DestSize = Dest->getScalarSizeInBits(); 846*0b57cec5SDimitry Andric 847*0b57cec5SDimitry Andric if (DestSize== 16 && Subtarget->has16BitInsts()) 848*0b57cec5SDimitry Andric return SrcSize >= 32; 849*0b57cec5SDimitry Andric 850*0b57cec5SDimitry Andric return DestSize < SrcSize && DestSize % 32 == 0; 851*0b57cec5SDimitry Andric } 852*0b57cec5SDimitry Andric 853*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const { 854*0b57cec5SDimitry Andric unsigned SrcSize = Src->getScalarSizeInBits(); 855*0b57cec5SDimitry Andric unsigned DestSize = Dest->getScalarSizeInBits(); 856*0b57cec5SDimitry Andric 857*0b57cec5SDimitry Andric if (SrcSize == 16 && Subtarget->has16BitInsts()) 858*0b57cec5SDimitry Andric return DestSize >= 32; 859*0b57cec5SDimitry Andric 860*0b57cec5SDimitry Andric return SrcSize == 32 && DestSize == 64; 861*0b57cec5SDimitry Andric } 862*0b57cec5SDimitry Andric 863*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const { 864*0b57cec5SDimitry Andric // Any register load of a 64-bit value really requires 2 32-bit moves. For all 865*0b57cec5SDimitry Andric // practical purposes, the extra mov 0 to load a 64-bit is free. As used, 866*0b57cec5SDimitry Andric // this will enable reducing 64-bit operations the 32-bit, which is always 867*0b57cec5SDimitry Andric // good. 868*0b57cec5SDimitry Andric 869*0b57cec5SDimitry Andric if (Src == MVT::i16) 870*0b57cec5SDimitry Andric return Dest == MVT::i32 ||Dest == MVT::i64 ; 871*0b57cec5SDimitry Andric 872*0b57cec5SDimitry Andric return Src == MVT::i32 && Dest == MVT::i64; 873*0b57cec5SDimitry Andric } 874*0b57cec5SDimitry Andric 875*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 876*0b57cec5SDimitry Andric return isZExtFree(Val.getValueType(), VT2); 877*0b57cec5SDimitry Andric } 878*0b57cec5SDimitry Andric 879*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { 880*0b57cec5SDimitry Andric // There aren't really 64-bit registers, but pairs of 32-bit ones and only a 881*0b57cec5SDimitry Andric // limited number of native 64-bit operations. Shrinking an operation to fit 882*0b57cec5SDimitry Andric // in a single 32-bit register should always be helpful. As currently used, 883*0b57cec5SDimitry Andric // this is much less general than the name suggests, and is only used in 884*0b57cec5SDimitry Andric // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is 885*0b57cec5SDimitry Andric // not profitable, and may actually be harmful. 886*0b57cec5SDimitry Andric return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32; 887*0b57cec5SDimitry Andric } 888*0b57cec5SDimitry Andric 889*0b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 890*0b57cec5SDimitry Andric // TargetLowering Callbacks 891*0b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 892*0b57cec5SDimitry Andric 893*0b57cec5SDimitry Andric CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC, 894*0b57cec5SDimitry Andric bool IsVarArg) { 895*0b57cec5SDimitry Andric switch (CC) { 896*0b57cec5SDimitry Andric case CallingConv::AMDGPU_VS: 897*0b57cec5SDimitry Andric case CallingConv::AMDGPU_GS: 898*0b57cec5SDimitry Andric case CallingConv::AMDGPU_PS: 899*0b57cec5SDimitry Andric case CallingConv::AMDGPU_CS: 900*0b57cec5SDimitry Andric case CallingConv::AMDGPU_HS: 901*0b57cec5SDimitry Andric case CallingConv::AMDGPU_ES: 902*0b57cec5SDimitry Andric case CallingConv::AMDGPU_LS: 903*0b57cec5SDimitry Andric return CC_AMDGPU; 904*0b57cec5SDimitry Andric case CallingConv::C: 905*0b57cec5SDimitry Andric case CallingConv::Fast: 906*0b57cec5SDimitry Andric case CallingConv::Cold: 907*0b57cec5SDimitry Andric return CC_AMDGPU_Func; 908*0b57cec5SDimitry Andric case CallingConv::AMDGPU_KERNEL: 909*0b57cec5SDimitry Andric case CallingConv::SPIR_KERNEL: 910*0b57cec5SDimitry Andric default: 911*0b57cec5SDimitry Andric report_fatal_error("Unsupported calling convention for call"); 912*0b57cec5SDimitry Andric } 913*0b57cec5SDimitry Andric } 914*0b57cec5SDimitry Andric 915*0b57cec5SDimitry Andric CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC, 916*0b57cec5SDimitry Andric bool IsVarArg) { 917*0b57cec5SDimitry Andric switch (CC) { 918*0b57cec5SDimitry Andric case CallingConv::AMDGPU_KERNEL: 919*0b57cec5SDimitry Andric case CallingConv::SPIR_KERNEL: 920*0b57cec5SDimitry Andric llvm_unreachable("kernels should not be handled here"); 921*0b57cec5SDimitry Andric case CallingConv::AMDGPU_VS: 922*0b57cec5SDimitry Andric case CallingConv::AMDGPU_GS: 923*0b57cec5SDimitry Andric case CallingConv::AMDGPU_PS: 924*0b57cec5SDimitry Andric case CallingConv::AMDGPU_CS: 925*0b57cec5SDimitry Andric case CallingConv::AMDGPU_HS: 926*0b57cec5SDimitry Andric case CallingConv::AMDGPU_ES: 927*0b57cec5SDimitry Andric case CallingConv::AMDGPU_LS: 928*0b57cec5SDimitry Andric return RetCC_SI_Shader; 929*0b57cec5SDimitry Andric case CallingConv::C: 930*0b57cec5SDimitry Andric case CallingConv::Fast: 931*0b57cec5SDimitry Andric case CallingConv::Cold: 932*0b57cec5SDimitry Andric return RetCC_AMDGPU_Func; 933*0b57cec5SDimitry Andric default: 934*0b57cec5SDimitry Andric report_fatal_error("Unsupported calling convention."); 935*0b57cec5SDimitry Andric } 936*0b57cec5SDimitry Andric } 937*0b57cec5SDimitry Andric 938*0b57cec5SDimitry Andric /// The SelectionDAGBuilder will automatically promote function arguments 939*0b57cec5SDimitry Andric /// with illegal types. However, this does not work for the AMDGPU targets 940*0b57cec5SDimitry Andric /// since the function arguments are stored in memory as these illegal types. 941*0b57cec5SDimitry Andric /// In order to handle this properly we need to get the original types sizes 942*0b57cec5SDimitry Andric /// from the LLVM IR Function and fixup the ISD:InputArg values before 943*0b57cec5SDimitry Andric /// passing them to AnalyzeFormalArguments() 944*0b57cec5SDimitry Andric 945*0b57cec5SDimitry Andric /// When the SelectionDAGBuilder computes the Ins, it takes care of splitting 946*0b57cec5SDimitry Andric /// input values across multiple registers. Each item in the Ins array 947*0b57cec5SDimitry Andric /// represents a single value that will be stored in registers. Ins[x].VT is 948*0b57cec5SDimitry Andric /// the value type of the value that will be stored in the register, so 949*0b57cec5SDimitry Andric /// whatever SDNode we lower the argument to needs to be this type. 950*0b57cec5SDimitry Andric /// 951*0b57cec5SDimitry Andric /// In order to correctly lower the arguments we need to know the size of each 952*0b57cec5SDimitry Andric /// argument. Since Ins[x].VT gives us the size of the register that will 953*0b57cec5SDimitry Andric /// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type 954*0b57cec5SDimitry Andric /// for the orignal function argument so that we can deduce the correct memory 955*0b57cec5SDimitry Andric /// type to use for Ins[x]. In most cases the correct memory type will be 956*0b57cec5SDimitry Andric /// Ins[x].ArgVT. However, this will not always be the case. If, for example, 957*0b57cec5SDimitry Andric /// we have a kernel argument of type v8i8, this argument will be split into 958*0b57cec5SDimitry Andric /// 8 parts and each part will be represented by its own item in the Ins array. 959*0b57cec5SDimitry Andric /// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of 960*0b57cec5SDimitry Andric /// the argument before it was split. From this, we deduce that the memory type 961*0b57cec5SDimitry Andric /// for each individual part is i8. We pass the memory type as LocVT to the 962*0b57cec5SDimitry Andric /// calling convention analysis function and the register type (Ins[x].VT) as 963*0b57cec5SDimitry Andric /// the ValVT. 964*0b57cec5SDimitry Andric void AMDGPUTargetLowering::analyzeFormalArgumentsCompute( 965*0b57cec5SDimitry Andric CCState &State, 966*0b57cec5SDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins) const { 967*0b57cec5SDimitry Andric const MachineFunction &MF = State.getMachineFunction(); 968*0b57cec5SDimitry Andric const Function &Fn = MF.getFunction(); 969*0b57cec5SDimitry Andric LLVMContext &Ctx = Fn.getParent()->getContext(); 970*0b57cec5SDimitry Andric const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); 971*0b57cec5SDimitry Andric const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn); 972*0b57cec5SDimitry Andric CallingConv::ID CC = Fn.getCallingConv(); 973*0b57cec5SDimitry Andric 974*0b57cec5SDimitry Andric unsigned MaxAlign = 1; 975*0b57cec5SDimitry Andric uint64_t ExplicitArgOffset = 0; 976*0b57cec5SDimitry Andric const DataLayout &DL = Fn.getParent()->getDataLayout(); 977*0b57cec5SDimitry Andric 978*0b57cec5SDimitry Andric unsigned InIndex = 0; 979*0b57cec5SDimitry Andric 980*0b57cec5SDimitry Andric for (const Argument &Arg : Fn.args()) { 981*0b57cec5SDimitry Andric Type *BaseArgTy = Arg.getType(); 982*0b57cec5SDimitry Andric unsigned Align = DL.getABITypeAlignment(BaseArgTy); 983*0b57cec5SDimitry Andric MaxAlign = std::max(Align, MaxAlign); 984*0b57cec5SDimitry Andric unsigned AllocSize = DL.getTypeAllocSize(BaseArgTy); 985*0b57cec5SDimitry Andric 986*0b57cec5SDimitry Andric uint64_t ArgOffset = alignTo(ExplicitArgOffset, Align) + ExplicitOffset; 987*0b57cec5SDimitry Andric ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize; 988*0b57cec5SDimitry Andric 989*0b57cec5SDimitry Andric // We're basically throwing away everything passed into us and starting over 990*0b57cec5SDimitry Andric // to get accurate in-memory offsets. The "PartOffset" is completely useless 991*0b57cec5SDimitry Andric // to us as computed in Ins. 992*0b57cec5SDimitry Andric // 993*0b57cec5SDimitry Andric // We also need to figure out what type legalization is trying to do to get 994*0b57cec5SDimitry Andric // the correct memory offsets. 995*0b57cec5SDimitry Andric 996*0b57cec5SDimitry Andric SmallVector<EVT, 16> ValueVTs; 997*0b57cec5SDimitry Andric SmallVector<uint64_t, 16> Offsets; 998*0b57cec5SDimitry Andric ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset); 999*0b57cec5SDimitry Andric 1000*0b57cec5SDimitry Andric for (unsigned Value = 0, NumValues = ValueVTs.size(); 1001*0b57cec5SDimitry Andric Value != NumValues; ++Value) { 1002*0b57cec5SDimitry Andric uint64_t BasePartOffset = Offsets[Value]; 1003*0b57cec5SDimitry Andric 1004*0b57cec5SDimitry Andric EVT ArgVT = ValueVTs[Value]; 1005*0b57cec5SDimitry Andric EVT MemVT = ArgVT; 1006*0b57cec5SDimitry Andric MVT RegisterVT = getRegisterTypeForCallingConv(Ctx, CC, ArgVT); 1007*0b57cec5SDimitry Andric unsigned NumRegs = getNumRegistersForCallingConv(Ctx, CC, ArgVT); 1008*0b57cec5SDimitry Andric 1009*0b57cec5SDimitry Andric if (NumRegs == 1) { 1010*0b57cec5SDimitry Andric // This argument is not split, so the IR type is the memory type. 1011*0b57cec5SDimitry Andric if (ArgVT.isExtended()) { 1012*0b57cec5SDimitry Andric // We have an extended type, like i24, so we should just use the 1013*0b57cec5SDimitry Andric // register type. 1014*0b57cec5SDimitry Andric MemVT = RegisterVT; 1015*0b57cec5SDimitry Andric } else { 1016*0b57cec5SDimitry Andric MemVT = ArgVT; 1017*0b57cec5SDimitry Andric } 1018*0b57cec5SDimitry Andric } else if (ArgVT.isVector() && RegisterVT.isVector() && 1019*0b57cec5SDimitry Andric ArgVT.getScalarType() == RegisterVT.getScalarType()) { 1020*0b57cec5SDimitry Andric assert(ArgVT.getVectorNumElements() > RegisterVT.getVectorNumElements()); 1021*0b57cec5SDimitry Andric // We have a vector value which has been split into a vector with 1022*0b57cec5SDimitry Andric // the same scalar type, but fewer elements. This should handle 1023*0b57cec5SDimitry Andric // all the floating-point vector types. 1024*0b57cec5SDimitry Andric MemVT = RegisterVT; 1025*0b57cec5SDimitry Andric } else if (ArgVT.isVector() && 1026*0b57cec5SDimitry Andric ArgVT.getVectorNumElements() == NumRegs) { 1027*0b57cec5SDimitry Andric // This arg has been split so that each element is stored in a separate 1028*0b57cec5SDimitry Andric // register. 1029*0b57cec5SDimitry Andric MemVT = ArgVT.getScalarType(); 1030*0b57cec5SDimitry Andric } else if (ArgVT.isExtended()) { 1031*0b57cec5SDimitry Andric // We have an extended type, like i65. 1032*0b57cec5SDimitry Andric MemVT = RegisterVT; 1033*0b57cec5SDimitry Andric } else { 1034*0b57cec5SDimitry Andric unsigned MemoryBits = ArgVT.getStoreSizeInBits() / NumRegs; 1035*0b57cec5SDimitry Andric assert(ArgVT.getStoreSizeInBits() % NumRegs == 0); 1036*0b57cec5SDimitry Andric if (RegisterVT.isInteger()) { 1037*0b57cec5SDimitry Andric MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits); 1038*0b57cec5SDimitry Andric } else if (RegisterVT.isVector()) { 1039*0b57cec5SDimitry Andric assert(!RegisterVT.getScalarType().isFloatingPoint()); 1040*0b57cec5SDimitry Andric unsigned NumElements = RegisterVT.getVectorNumElements(); 1041*0b57cec5SDimitry Andric assert(MemoryBits % NumElements == 0); 1042*0b57cec5SDimitry Andric // This vector type has been split into another vector type with 1043*0b57cec5SDimitry Andric // a different elements size. 1044*0b57cec5SDimitry Andric EVT ScalarVT = EVT::getIntegerVT(State.getContext(), 1045*0b57cec5SDimitry Andric MemoryBits / NumElements); 1046*0b57cec5SDimitry Andric MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements); 1047*0b57cec5SDimitry Andric } else { 1048*0b57cec5SDimitry Andric llvm_unreachable("cannot deduce memory type."); 1049*0b57cec5SDimitry Andric } 1050*0b57cec5SDimitry Andric } 1051*0b57cec5SDimitry Andric 1052*0b57cec5SDimitry Andric // Convert one element vectors to scalar. 1053*0b57cec5SDimitry Andric if (MemVT.isVector() && MemVT.getVectorNumElements() == 1) 1054*0b57cec5SDimitry Andric MemVT = MemVT.getScalarType(); 1055*0b57cec5SDimitry Andric 1056*0b57cec5SDimitry Andric // Round up vec3/vec5 argument. 1057*0b57cec5SDimitry Andric if (MemVT.isVector() && !MemVT.isPow2VectorType()) { 1058*0b57cec5SDimitry Andric assert(MemVT.getVectorNumElements() == 3 || 1059*0b57cec5SDimitry Andric MemVT.getVectorNumElements() == 5); 1060*0b57cec5SDimitry Andric MemVT = MemVT.getPow2VectorType(State.getContext()); 1061*0b57cec5SDimitry Andric } 1062*0b57cec5SDimitry Andric 1063*0b57cec5SDimitry Andric unsigned PartOffset = 0; 1064*0b57cec5SDimitry Andric for (unsigned i = 0; i != NumRegs; ++i) { 1065*0b57cec5SDimitry Andric State.addLoc(CCValAssign::getCustomMem(InIndex++, RegisterVT, 1066*0b57cec5SDimitry Andric BasePartOffset + PartOffset, 1067*0b57cec5SDimitry Andric MemVT.getSimpleVT(), 1068*0b57cec5SDimitry Andric CCValAssign::Full)); 1069*0b57cec5SDimitry Andric PartOffset += MemVT.getStoreSize(); 1070*0b57cec5SDimitry Andric } 1071*0b57cec5SDimitry Andric } 1072*0b57cec5SDimitry Andric } 1073*0b57cec5SDimitry Andric } 1074*0b57cec5SDimitry Andric 1075*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerReturn( 1076*0b57cec5SDimitry Andric SDValue Chain, CallingConv::ID CallConv, 1077*0b57cec5SDimitry Andric bool isVarArg, 1078*0b57cec5SDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs, 1079*0b57cec5SDimitry Andric const SmallVectorImpl<SDValue> &OutVals, 1080*0b57cec5SDimitry Andric const SDLoc &DL, SelectionDAG &DAG) const { 1081*0b57cec5SDimitry Andric // FIXME: Fails for r600 tests 1082*0b57cec5SDimitry Andric //assert(!isVarArg && Outs.empty() && OutVals.empty() && 1083*0b57cec5SDimitry Andric // "wave terminate should not have return values"); 1084*0b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain); 1085*0b57cec5SDimitry Andric } 1086*0b57cec5SDimitry Andric 1087*0b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 1088*0b57cec5SDimitry Andric // Target specific lowering 1089*0b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 1090*0b57cec5SDimitry Andric 1091*0b57cec5SDimitry Andric /// Selects the correct CCAssignFn for a given CallingConvention value. 1092*0b57cec5SDimitry Andric CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC, 1093*0b57cec5SDimitry Andric bool IsVarArg) { 1094*0b57cec5SDimitry Andric return AMDGPUCallLowering::CCAssignFnForCall(CC, IsVarArg); 1095*0b57cec5SDimitry Andric } 1096*0b57cec5SDimitry Andric 1097*0b57cec5SDimitry Andric CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, 1098*0b57cec5SDimitry Andric bool IsVarArg) { 1099*0b57cec5SDimitry Andric return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg); 1100*0b57cec5SDimitry Andric } 1101*0b57cec5SDimitry Andric 1102*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::addTokenForArgument(SDValue Chain, 1103*0b57cec5SDimitry Andric SelectionDAG &DAG, 1104*0b57cec5SDimitry Andric MachineFrameInfo &MFI, 1105*0b57cec5SDimitry Andric int ClobberedFI) const { 1106*0b57cec5SDimitry Andric SmallVector<SDValue, 8> ArgChains; 1107*0b57cec5SDimitry Andric int64_t FirstByte = MFI.getObjectOffset(ClobberedFI); 1108*0b57cec5SDimitry Andric int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1; 1109*0b57cec5SDimitry Andric 1110*0b57cec5SDimitry Andric // Include the original chain at the beginning of the list. When this is 1111*0b57cec5SDimitry Andric // used by target LowerCall hooks, this helps legalize find the 1112*0b57cec5SDimitry Andric // CALLSEQ_BEGIN node. 1113*0b57cec5SDimitry Andric ArgChains.push_back(Chain); 1114*0b57cec5SDimitry Andric 1115*0b57cec5SDimitry Andric // Add a chain value for each stack argument corresponding 1116*0b57cec5SDimitry Andric for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(), 1117*0b57cec5SDimitry Andric UE = DAG.getEntryNode().getNode()->use_end(); 1118*0b57cec5SDimitry Andric U != UE; ++U) { 1119*0b57cec5SDimitry Andric if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) { 1120*0b57cec5SDimitry Andric if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) { 1121*0b57cec5SDimitry Andric if (FI->getIndex() < 0) { 1122*0b57cec5SDimitry Andric int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex()); 1123*0b57cec5SDimitry Andric int64_t InLastByte = InFirstByte; 1124*0b57cec5SDimitry Andric InLastByte += MFI.getObjectSize(FI->getIndex()) - 1; 1125*0b57cec5SDimitry Andric 1126*0b57cec5SDimitry Andric if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) || 1127*0b57cec5SDimitry Andric (FirstByte <= InFirstByte && InFirstByte <= LastByte)) 1128*0b57cec5SDimitry Andric ArgChains.push_back(SDValue(L, 1)); 1129*0b57cec5SDimitry Andric } 1130*0b57cec5SDimitry Andric } 1131*0b57cec5SDimitry Andric } 1132*0b57cec5SDimitry Andric } 1133*0b57cec5SDimitry Andric 1134*0b57cec5SDimitry Andric // Build a tokenfactor for all the chains. 1135*0b57cec5SDimitry Andric return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); 1136*0b57cec5SDimitry Andric } 1137*0b57cec5SDimitry Andric 1138*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::lowerUnhandledCall(CallLoweringInfo &CLI, 1139*0b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals, 1140*0b57cec5SDimitry Andric StringRef Reason) const { 1141*0b57cec5SDimitry Andric SDValue Callee = CLI.Callee; 1142*0b57cec5SDimitry Andric SelectionDAG &DAG = CLI.DAG; 1143*0b57cec5SDimitry Andric 1144*0b57cec5SDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction(); 1145*0b57cec5SDimitry Andric 1146*0b57cec5SDimitry Andric StringRef FuncName("<unknown>"); 1147*0b57cec5SDimitry Andric 1148*0b57cec5SDimitry Andric if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee)) 1149*0b57cec5SDimitry Andric FuncName = G->getSymbol(); 1150*0b57cec5SDimitry Andric else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1151*0b57cec5SDimitry Andric FuncName = G->getGlobal()->getName(); 1152*0b57cec5SDimitry Andric 1153*0b57cec5SDimitry Andric DiagnosticInfoUnsupported NoCalls( 1154*0b57cec5SDimitry Andric Fn, Reason + FuncName, CLI.DL.getDebugLoc()); 1155*0b57cec5SDimitry Andric DAG.getContext()->diagnose(NoCalls); 1156*0b57cec5SDimitry Andric 1157*0b57cec5SDimitry Andric if (!CLI.IsTailCall) { 1158*0b57cec5SDimitry Andric for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I) 1159*0b57cec5SDimitry Andric InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT)); 1160*0b57cec5SDimitry Andric } 1161*0b57cec5SDimitry Andric 1162*0b57cec5SDimitry Andric return DAG.getEntryNode(); 1163*0b57cec5SDimitry Andric } 1164*0b57cec5SDimitry Andric 1165*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI, 1166*0b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals) const { 1167*0b57cec5SDimitry Andric return lowerUnhandledCall(CLI, InVals, "unsupported call to function "); 1168*0b57cec5SDimitry Andric } 1169*0b57cec5SDimitry Andric 1170*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 1171*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 1172*0b57cec5SDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction(); 1173*0b57cec5SDimitry Andric 1174*0b57cec5SDimitry Andric DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "unsupported dynamic alloca", 1175*0b57cec5SDimitry Andric SDLoc(Op).getDebugLoc()); 1176*0b57cec5SDimitry Andric DAG.getContext()->diagnose(NoDynamicAlloca); 1177*0b57cec5SDimitry Andric auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)}; 1178*0b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SDLoc()); 1179*0b57cec5SDimitry Andric } 1180*0b57cec5SDimitry Andric 1181*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, 1182*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 1183*0b57cec5SDimitry Andric switch (Op.getOpcode()) { 1184*0b57cec5SDimitry Andric default: 1185*0b57cec5SDimitry Andric Op->print(errs(), &DAG); 1186*0b57cec5SDimitry Andric llvm_unreachable("Custom lowering code for this" 1187*0b57cec5SDimitry Andric "instruction is not implemented yet!"); 1188*0b57cec5SDimitry Andric break; 1189*0b57cec5SDimitry Andric case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 1190*0b57cec5SDimitry Andric case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 1191*0b57cec5SDimitry Andric case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); 1192*0b57cec5SDimitry Andric case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 1193*0b57cec5SDimitry Andric case ISD::SDIVREM: return LowerSDIVREM(Op, DAG); 1194*0b57cec5SDimitry Andric case ISD::FREM: return LowerFREM(Op, DAG); 1195*0b57cec5SDimitry Andric case ISD::FCEIL: return LowerFCEIL(Op, DAG); 1196*0b57cec5SDimitry Andric case ISD::FTRUNC: return LowerFTRUNC(Op, DAG); 1197*0b57cec5SDimitry Andric case ISD::FRINT: return LowerFRINT(Op, DAG); 1198*0b57cec5SDimitry Andric case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG); 1199*0b57cec5SDimitry Andric case ISD::FROUND: return LowerFROUND(Op, DAG); 1200*0b57cec5SDimitry Andric case ISD::FFLOOR: return LowerFFLOOR(Op, DAG); 1201*0b57cec5SDimitry Andric case ISD::FLOG: 1202*0b57cec5SDimitry Andric return LowerFLOG(Op, DAG, 1 / AMDGPU_LOG2E_F); 1203*0b57cec5SDimitry Andric case ISD::FLOG10: 1204*0b57cec5SDimitry Andric return LowerFLOG(Op, DAG, AMDGPU_LN2_F / AMDGPU_LN10_F); 1205*0b57cec5SDimitry Andric case ISD::FEXP: 1206*0b57cec5SDimitry Andric return lowerFEXP(Op, DAG); 1207*0b57cec5SDimitry Andric case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 1208*0b57cec5SDimitry Andric case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); 1209*0b57cec5SDimitry Andric case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG); 1210*0b57cec5SDimitry Andric case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 1211*0b57cec5SDimitry Andric case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); 1212*0b57cec5SDimitry Andric case ISD::CTTZ: 1213*0b57cec5SDimitry Andric case ISD::CTTZ_ZERO_UNDEF: 1214*0b57cec5SDimitry Andric case ISD::CTLZ: 1215*0b57cec5SDimitry Andric case ISD::CTLZ_ZERO_UNDEF: 1216*0b57cec5SDimitry Andric return LowerCTLZ_CTTZ(Op, DAG); 1217*0b57cec5SDimitry Andric case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 1218*0b57cec5SDimitry Andric } 1219*0b57cec5SDimitry Andric return Op; 1220*0b57cec5SDimitry Andric } 1221*0b57cec5SDimitry Andric 1222*0b57cec5SDimitry Andric void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N, 1223*0b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results, 1224*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 1225*0b57cec5SDimitry Andric switch (N->getOpcode()) { 1226*0b57cec5SDimitry Andric case ISD::SIGN_EXTEND_INREG: 1227*0b57cec5SDimitry Andric // Different parts of legalization seem to interpret which type of 1228*0b57cec5SDimitry Andric // sign_extend_inreg is the one to check for custom lowering. The extended 1229*0b57cec5SDimitry Andric // from type is what really matters, but some places check for custom 1230*0b57cec5SDimitry Andric // lowering of the result type. This results in trying to use 1231*0b57cec5SDimitry Andric // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do 1232*0b57cec5SDimitry Andric // nothing here and let the illegal result integer be handled normally. 1233*0b57cec5SDimitry Andric return; 1234*0b57cec5SDimitry Andric default: 1235*0b57cec5SDimitry Andric return; 1236*0b57cec5SDimitry Andric } 1237*0b57cec5SDimitry Andric } 1238*0b57cec5SDimitry Andric 1239*0b57cec5SDimitry Andric static bool hasDefinedInitializer(const GlobalValue *GV) { 1240*0b57cec5SDimitry Andric const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); 1241*0b57cec5SDimitry Andric if (!GVar || !GVar->hasInitializer()) 1242*0b57cec5SDimitry Andric return false; 1243*0b57cec5SDimitry Andric 1244*0b57cec5SDimitry Andric return !isa<UndefValue>(GVar->getInitializer()); 1245*0b57cec5SDimitry Andric } 1246*0b57cec5SDimitry Andric 1247*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, 1248*0b57cec5SDimitry Andric SDValue Op, 1249*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 1250*0b57cec5SDimitry Andric 1251*0b57cec5SDimitry Andric const DataLayout &DL = DAG.getDataLayout(); 1252*0b57cec5SDimitry Andric GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); 1253*0b57cec5SDimitry Andric const GlobalValue *GV = G->getGlobal(); 1254*0b57cec5SDimitry Andric 1255*0b57cec5SDimitry Andric if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || 1256*0b57cec5SDimitry Andric G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) { 1257*0b57cec5SDimitry Andric if (!MFI->isEntryFunction()) { 1258*0b57cec5SDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction(); 1259*0b57cec5SDimitry Andric DiagnosticInfoUnsupported BadLDSDecl( 1260*0b57cec5SDimitry Andric Fn, "local memory global used by non-kernel function", SDLoc(Op).getDebugLoc()); 1261*0b57cec5SDimitry Andric DAG.getContext()->diagnose(BadLDSDecl); 1262*0b57cec5SDimitry Andric } 1263*0b57cec5SDimitry Andric 1264*0b57cec5SDimitry Andric // XXX: What does the value of G->getOffset() mean? 1265*0b57cec5SDimitry Andric assert(G->getOffset() == 0 && 1266*0b57cec5SDimitry Andric "Do not know what to do with an non-zero offset"); 1267*0b57cec5SDimitry Andric 1268*0b57cec5SDimitry Andric // TODO: We could emit code to handle the initialization somewhere. 1269*0b57cec5SDimitry Andric if (!hasDefinedInitializer(GV)) { 1270*0b57cec5SDimitry Andric unsigned Offset = MFI->allocateLDSGlobal(DL, *GV); 1271*0b57cec5SDimitry Andric return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); 1272*0b57cec5SDimitry Andric } 1273*0b57cec5SDimitry Andric } 1274*0b57cec5SDimitry Andric 1275*0b57cec5SDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction(); 1276*0b57cec5SDimitry Andric DiagnosticInfoUnsupported BadInit( 1277*0b57cec5SDimitry Andric Fn, "unsupported initializer for address space", SDLoc(Op).getDebugLoc()); 1278*0b57cec5SDimitry Andric DAG.getContext()->diagnose(BadInit); 1279*0b57cec5SDimitry Andric return SDValue(); 1280*0b57cec5SDimitry Andric } 1281*0b57cec5SDimitry Andric 1282*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, 1283*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 1284*0b57cec5SDimitry Andric SmallVector<SDValue, 8> Args; 1285*0b57cec5SDimitry Andric 1286*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 1287*0b57cec5SDimitry Andric if (VT == MVT::v4i16 || VT == MVT::v4f16) { 1288*0b57cec5SDimitry Andric SDLoc SL(Op); 1289*0b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Op.getOperand(0)); 1290*0b57cec5SDimitry Andric SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Op.getOperand(1)); 1291*0b57cec5SDimitry Andric 1292*0b57cec5SDimitry Andric SDValue BV = DAG.getBuildVector(MVT::v2i32, SL, { Lo, Hi }); 1293*0b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, VT, BV); 1294*0b57cec5SDimitry Andric } 1295*0b57cec5SDimitry Andric 1296*0b57cec5SDimitry Andric for (const SDUse &U : Op->ops()) 1297*0b57cec5SDimitry Andric DAG.ExtractVectorElements(U.get(), Args); 1298*0b57cec5SDimitry Andric 1299*0b57cec5SDimitry Andric return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args); 1300*0b57cec5SDimitry Andric } 1301*0b57cec5SDimitry Andric 1302*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 1303*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 1304*0b57cec5SDimitry Andric 1305*0b57cec5SDimitry Andric SmallVector<SDValue, 8> Args; 1306*0b57cec5SDimitry Andric unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1307*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 1308*0b57cec5SDimitry Andric DAG.ExtractVectorElements(Op.getOperand(0), Args, Start, 1309*0b57cec5SDimitry Andric VT.getVectorNumElements()); 1310*0b57cec5SDimitry Andric 1311*0b57cec5SDimitry Andric return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args); 1312*0b57cec5SDimitry Andric } 1313*0b57cec5SDimitry Andric 1314*0b57cec5SDimitry Andric /// Generate Min/Max node 1315*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT, 1316*0b57cec5SDimitry Andric SDValue LHS, SDValue RHS, 1317*0b57cec5SDimitry Andric SDValue True, SDValue False, 1318*0b57cec5SDimitry Andric SDValue CC, 1319*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 1320*0b57cec5SDimitry Andric if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 1321*0b57cec5SDimitry Andric return SDValue(); 1322*0b57cec5SDimitry Andric 1323*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 1324*0b57cec5SDimitry Andric ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 1325*0b57cec5SDimitry Andric switch (CCOpcode) { 1326*0b57cec5SDimitry Andric case ISD::SETOEQ: 1327*0b57cec5SDimitry Andric case ISD::SETONE: 1328*0b57cec5SDimitry Andric case ISD::SETUNE: 1329*0b57cec5SDimitry Andric case ISD::SETNE: 1330*0b57cec5SDimitry Andric case ISD::SETUEQ: 1331*0b57cec5SDimitry Andric case ISD::SETEQ: 1332*0b57cec5SDimitry Andric case ISD::SETFALSE: 1333*0b57cec5SDimitry Andric case ISD::SETFALSE2: 1334*0b57cec5SDimitry Andric case ISD::SETTRUE: 1335*0b57cec5SDimitry Andric case ISD::SETTRUE2: 1336*0b57cec5SDimitry Andric case ISD::SETUO: 1337*0b57cec5SDimitry Andric case ISD::SETO: 1338*0b57cec5SDimitry Andric break; 1339*0b57cec5SDimitry Andric case ISD::SETULE: 1340*0b57cec5SDimitry Andric case ISD::SETULT: { 1341*0b57cec5SDimitry Andric if (LHS == True) 1342*0b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); 1343*0b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); 1344*0b57cec5SDimitry Andric } 1345*0b57cec5SDimitry Andric case ISD::SETOLE: 1346*0b57cec5SDimitry Andric case ISD::SETOLT: 1347*0b57cec5SDimitry Andric case ISD::SETLE: 1348*0b57cec5SDimitry Andric case ISD::SETLT: { 1349*0b57cec5SDimitry Andric // Ordered. Assume ordered for undefined. 1350*0b57cec5SDimitry Andric 1351*0b57cec5SDimitry Andric // Only do this after legalization to avoid interfering with other combines 1352*0b57cec5SDimitry Andric // which might occur. 1353*0b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG && 1354*0b57cec5SDimitry Andric !DCI.isCalledByLegalizer()) 1355*0b57cec5SDimitry Andric return SDValue(); 1356*0b57cec5SDimitry Andric 1357*0b57cec5SDimitry Andric // We need to permute the operands to get the correct NaN behavior. The 1358*0b57cec5SDimitry Andric // selected operand is the second one based on the failing compare with NaN, 1359*0b57cec5SDimitry Andric // so permute it based on the compare type the hardware uses. 1360*0b57cec5SDimitry Andric if (LHS == True) 1361*0b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); 1362*0b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); 1363*0b57cec5SDimitry Andric } 1364*0b57cec5SDimitry Andric case ISD::SETUGE: 1365*0b57cec5SDimitry Andric case ISD::SETUGT: { 1366*0b57cec5SDimitry Andric if (LHS == True) 1367*0b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); 1368*0b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); 1369*0b57cec5SDimitry Andric } 1370*0b57cec5SDimitry Andric case ISD::SETGT: 1371*0b57cec5SDimitry Andric case ISD::SETGE: 1372*0b57cec5SDimitry Andric case ISD::SETOGE: 1373*0b57cec5SDimitry Andric case ISD::SETOGT: { 1374*0b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG && 1375*0b57cec5SDimitry Andric !DCI.isCalledByLegalizer()) 1376*0b57cec5SDimitry Andric return SDValue(); 1377*0b57cec5SDimitry Andric 1378*0b57cec5SDimitry Andric if (LHS == True) 1379*0b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); 1380*0b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); 1381*0b57cec5SDimitry Andric } 1382*0b57cec5SDimitry Andric case ISD::SETCC_INVALID: 1383*0b57cec5SDimitry Andric llvm_unreachable("Invalid setcc condcode!"); 1384*0b57cec5SDimitry Andric } 1385*0b57cec5SDimitry Andric return SDValue(); 1386*0b57cec5SDimitry Andric } 1387*0b57cec5SDimitry Andric 1388*0b57cec5SDimitry Andric std::pair<SDValue, SDValue> 1389*0b57cec5SDimitry Andric AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const { 1390*0b57cec5SDimitry Andric SDLoc SL(Op); 1391*0b57cec5SDimitry Andric 1392*0b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); 1393*0b57cec5SDimitry Andric 1394*0b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 1395*0b57cec5SDimitry Andric const SDValue One = DAG.getConstant(1, SL, MVT::i32); 1396*0b57cec5SDimitry Andric 1397*0b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero); 1398*0b57cec5SDimitry Andric SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One); 1399*0b57cec5SDimitry Andric 1400*0b57cec5SDimitry Andric return std::make_pair(Lo, Hi); 1401*0b57cec5SDimitry Andric } 1402*0b57cec5SDimitry Andric 1403*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getLoHalf64(SDValue Op, SelectionDAG &DAG) const { 1404*0b57cec5SDimitry Andric SDLoc SL(Op); 1405*0b57cec5SDimitry Andric 1406*0b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); 1407*0b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 1408*0b57cec5SDimitry Andric return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero); 1409*0b57cec5SDimitry Andric } 1410*0b57cec5SDimitry Andric 1411*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getHiHalf64(SDValue Op, SelectionDAG &DAG) const { 1412*0b57cec5SDimitry Andric SDLoc SL(Op); 1413*0b57cec5SDimitry Andric 1414*0b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); 1415*0b57cec5SDimitry Andric const SDValue One = DAG.getConstant(1, SL, MVT::i32); 1416*0b57cec5SDimitry Andric return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One); 1417*0b57cec5SDimitry Andric } 1418*0b57cec5SDimitry Andric 1419*0b57cec5SDimitry Andric // Split a vector type into two parts. The first part is a power of two vector. 1420*0b57cec5SDimitry Andric // The second part is whatever is left over, and is a scalar if it would 1421*0b57cec5SDimitry Andric // otherwise be a 1-vector. 1422*0b57cec5SDimitry Andric std::pair<EVT, EVT> 1423*0b57cec5SDimitry Andric AMDGPUTargetLowering::getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const { 1424*0b57cec5SDimitry Andric EVT LoVT, HiVT; 1425*0b57cec5SDimitry Andric EVT EltVT = VT.getVectorElementType(); 1426*0b57cec5SDimitry Andric unsigned NumElts = VT.getVectorNumElements(); 1427*0b57cec5SDimitry Andric unsigned LoNumElts = PowerOf2Ceil((NumElts + 1) / 2); 1428*0b57cec5SDimitry Andric LoVT = EVT::getVectorVT(*DAG.getContext(), EltVT, LoNumElts); 1429*0b57cec5SDimitry Andric HiVT = NumElts - LoNumElts == 1 1430*0b57cec5SDimitry Andric ? EltVT 1431*0b57cec5SDimitry Andric : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts - LoNumElts); 1432*0b57cec5SDimitry Andric return std::make_pair(LoVT, HiVT); 1433*0b57cec5SDimitry Andric } 1434*0b57cec5SDimitry Andric 1435*0b57cec5SDimitry Andric // Split a vector value into two parts of types LoVT and HiVT. HiVT could be 1436*0b57cec5SDimitry Andric // scalar. 1437*0b57cec5SDimitry Andric std::pair<SDValue, SDValue> 1438*0b57cec5SDimitry Andric AMDGPUTargetLowering::splitVector(const SDValue &N, const SDLoc &DL, 1439*0b57cec5SDimitry Andric const EVT &LoVT, const EVT &HiVT, 1440*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 1441*0b57cec5SDimitry Andric assert(LoVT.getVectorNumElements() + 1442*0b57cec5SDimitry Andric (HiVT.isVector() ? HiVT.getVectorNumElements() : 1) <= 1443*0b57cec5SDimitry Andric N.getValueType().getVectorNumElements() && 1444*0b57cec5SDimitry Andric "More vector elements requested than available!"); 1445*0b57cec5SDimitry Andric auto IdxTy = getVectorIdxTy(DAG.getDataLayout()); 1446*0b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, 1447*0b57cec5SDimitry Andric DAG.getConstant(0, DL, IdxTy)); 1448*0b57cec5SDimitry Andric SDValue Hi = DAG.getNode( 1449*0b57cec5SDimitry Andric HiVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT, DL, 1450*0b57cec5SDimitry Andric HiVT, N, DAG.getConstant(LoVT.getVectorNumElements(), DL, IdxTy)); 1451*0b57cec5SDimitry Andric return std::make_pair(Lo, Hi); 1452*0b57cec5SDimitry Andric } 1453*0b57cec5SDimitry Andric 1454*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op, 1455*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 1456*0b57cec5SDimitry Andric LoadSDNode *Load = cast<LoadSDNode>(Op); 1457*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 1458*0b57cec5SDimitry Andric 1459*0b57cec5SDimitry Andric 1460*0b57cec5SDimitry Andric // If this is a 2 element vector, we really want to scalarize and not create 1461*0b57cec5SDimitry Andric // weird 1 element vectors. 1462*0b57cec5SDimitry Andric if (VT.getVectorNumElements() == 2) 1463*0b57cec5SDimitry Andric return scalarizeVectorLoad(Load, DAG); 1464*0b57cec5SDimitry Andric 1465*0b57cec5SDimitry Andric SDValue BasePtr = Load->getBasePtr(); 1466*0b57cec5SDimitry Andric EVT MemVT = Load->getMemoryVT(); 1467*0b57cec5SDimitry Andric SDLoc SL(Op); 1468*0b57cec5SDimitry Andric 1469*0b57cec5SDimitry Andric const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo(); 1470*0b57cec5SDimitry Andric 1471*0b57cec5SDimitry Andric EVT LoVT, HiVT; 1472*0b57cec5SDimitry Andric EVT LoMemVT, HiMemVT; 1473*0b57cec5SDimitry Andric SDValue Lo, Hi; 1474*0b57cec5SDimitry Andric 1475*0b57cec5SDimitry Andric std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG); 1476*0b57cec5SDimitry Andric std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG); 1477*0b57cec5SDimitry Andric std::tie(Lo, Hi) = splitVector(Op, SL, LoVT, HiVT, DAG); 1478*0b57cec5SDimitry Andric 1479*0b57cec5SDimitry Andric unsigned Size = LoMemVT.getStoreSize(); 1480*0b57cec5SDimitry Andric unsigned BaseAlign = Load->getAlignment(); 1481*0b57cec5SDimitry Andric unsigned HiAlign = MinAlign(BaseAlign, Size); 1482*0b57cec5SDimitry Andric 1483*0b57cec5SDimitry Andric SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT, 1484*0b57cec5SDimitry Andric Load->getChain(), BasePtr, SrcValue, LoMemVT, 1485*0b57cec5SDimitry Andric BaseAlign, Load->getMemOperand()->getFlags()); 1486*0b57cec5SDimitry Andric SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, Size); 1487*0b57cec5SDimitry Andric SDValue HiLoad = 1488*0b57cec5SDimitry Andric DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(), 1489*0b57cec5SDimitry Andric HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()), 1490*0b57cec5SDimitry Andric HiMemVT, HiAlign, Load->getMemOperand()->getFlags()); 1491*0b57cec5SDimitry Andric 1492*0b57cec5SDimitry Andric auto IdxTy = getVectorIdxTy(DAG.getDataLayout()); 1493*0b57cec5SDimitry Andric SDValue Join; 1494*0b57cec5SDimitry Andric if (LoVT == HiVT) { 1495*0b57cec5SDimitry Andric // This is the case that the vector is power of two so was evenly split. 1496*0b57cec5SDimitry Andric Join = DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad); 1497*0b57cec5SDimitry Andric } else { 1498*0b57cec5SDimitry Andric Join = DAG.getNode(ISD::INSERT_SUBVECTOR, SL, VT, DAG.getUNDEF(VT), LoLoad, 1499*0b57cec5SDimitry Andric DAG.getConstant(0, SL, IdxTy)); 1500*0b57cec5SDimitry Andric Join = DAG.getNode(HiVT.isVector() ? ISD::INSERT_SUBVECTOR 1501*0b57cec5SDimitry Andric : ISD::INSERT_VECTOR_ELT, 1502*0b57cec5SDimitry Andric SL, VT, Join, HiLoad, 1503*0b57cec5SDimitry Andric DAG.getConstant(LoVT.getVectorNumElements(), SL, IdxTy)); 1504*0b57cec5SDimitry Andric } 1505*0b57cec5SDimitry Andric 1506*0b57cec5SDimitry Andric SDValue Ops[] = {Join, DAG.getNode(ISD::TokenFactor, SL, MVT::Other, 1507*0b57cec5SDimitry Andric LoLoad.getValue(1), HiLoad.getValue(1))}; 1508*0b57cec5SDimitry Andric 1509*0b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SL); 1510*0b57cec5SDimitry Andric } 1511*0b57cec5SDimitry Andric 1512*0b57cec5SDimitry Andric // Widen a vector load from vec3 to vec4. 1513*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::WidenVectorLoad(SDValue Op, 1514*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 1515*0b57cec5SDimitry Andric LoadSDNode *Load = cast<LoadSDNode>(Op); 1516*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 1517*0b57cec5SDimitry Andric assert(VT.getVectorNumElements() == 3); 1518*0b57cec5SDimitry Andric SDValue BasePtr = Load->getBasePtr(); 1519*0b57cec5SDimitry Andric EVT MemVT = Load->getMemoryVT(); 1520*0b57cec5SDimitry Andric SDLoc SL(Op); 1521*0b57cec5SDimitry Andric const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo(); 1522*0b57cec5SDimitry Andric unsigned BaseAlign = Load->getAlignment(); 1523*0b57cec5SDimitry Andric 1524*0b57cec5SDimitry Andric EVT WideVT = 1525*0b57cec5SDimitry Andric EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4); 1526*0b57cec5SDimitry Andric EVT WideMemVT = 1527*0b57cec5SDimitry Andric EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(), 4); 1528*0b57cec5SDimitry Andric SDValue WideLoad = DAG.getExtLoad( 1529*0b57cec5SDimitry Andric Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue, 1530*0b57cec5SDimitry Andric WideMemVT, BaseAlign, Load->getMemOperand()->getFlags()); 1531*0b57cec5SDimitry Andric return DAG.getMergeValues( 1532*0b57cec5SDimitry Andric {DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, VT, WideLoad, 1533*0b57cec5SDimitry Andric DAG.getConstant(0, SL, getVectorIdxTy(DAG.getDataLayout()))), 1534*0b57cec5SDimitry Andric WideLoad.getValue(1)}, 1535*0b57cec5SDimitry Andric SL); 1536*0b57cec5SDimitry Andric } 1537*0b57cec5SDimitry Andric 1538*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, 1539*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 1540*0b57cec5SDimitry Andric StoreSDNode *Store = cast<StoreSDNode>(Op); 1541*0b57cec5SDimitry Andric SDValue Val = Store->getValue(); 1542*0b57cec5SDimitry Andric EVT VT = Val.getValueType(); 1543*0b57cec5SDimitry Andric 1544*0b57cec5SDimitry Andric // If this is a 2 element vector, we really want to scalarize and not create 1545*0b57cec5SDimitry Andric // weird 1 element vectors. 1546*0b57cec5SDimitry Andric if (VT.getVectorNumElements() == 2) 1547*0b57cec5SDimitry Andric return scalarizeVectorStore(Store, DAG); 1548*0b57cec5SDimitry Andric 1549*0b57cec5SDimitry Andric EVT MemVT = Store->getMemoryVT(); 1550*0b57cec5SDimitry Andric SDValue Chain = Store->getChain(); 1551*0b57cec5SDimitry Andric SDValue BasePtr = Store->getBasePtr(); 1552*0b57cec5SDimitry Andric SDLoc SL(Op); 1553*0b57cec5SDimitry Andric 1554*0b57cec5SDimitry Andric EVT LoVT, HiVT; 1555*0b57cec5SDimitry Andric EVT LoMemVT, HiMemVT; 1556*0b57cec5SDimitry Andric SDValue Lo, Hi; 1557*0b57cec5SDimitry Andric 1558*0b57cec5SDimitry Andric std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG); 1559*0b57cec5SDimitry Andric std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG); 1560*0b57cec5SDimitry Andric std::tie(Lo, Hi) = splitVector(Val, SL, LoVT, HiVT, DAG); 1561*0b57cec5SDimitry Andric 1562*0b57cec5SDimitry Andric SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize()); 1563*0b57cec5SDimitry Andric 1564*0b57cec5SDimitry Andric const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo(); 1565*0b57cec5SDimitry Andric unsigned BaseAlign = Store->getAlignment(); 1566*0b57cec5SDimitry Andric unsigned Size = LoMemVT.getStoreSize(); 1567*0b57cec5SDimitry Andric unsigned HiAlign = MinAlign(BaseAlign, Size); 1568*0b57cec5SDimitry Andric 1569*0b57cec5SDimitry Andric SDValue LoStore = 1570*0b57cec5SDimitry Andric DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign, 1571*0b57cec5SDimitry Andric Store->getMemOperand()->getFlags()); 1572*0b57cec5SDimitry Andric SDValue HiStore = 1573*0b57cec5SDimitry Andric DAG.getTruncStore(Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size), 1574*0b57cec5SDimitry Andric HiMemVT, HiAlign, Store->getMemOperand()->getFlags()); 1575*0b57cec5SDimitry Andric 1576*0b57cec5SDimitry Andric return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore); 1577*0b57cec5SDimitry Andric } 1578*0b57cec5SDimitry Andric 1579*0b57cec5SDimitry Andric // This is a shortcut for integer division because we have fast i32<->f32 1580*0b57cec5SDimitry Andric // conversions, and fast f32 reciprocal instructions. The fractional part of a 1581*0b57cec5SDimitry Andric // float is enough to accurately represent up to a 24-bit signed integer. 1582*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, 1583*0b57cec5SDimitry Andric bool Sign) const { 1584*0b57cec5SDimitry Andric SDLoc DL(Op); 1585*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 1586*0b57cec5SDimitry Andric SDValue LHS = Op.getOperand(0); 1587*0b57cec5SDimitry Andric SDValue RHS = Op.getOperand(1); 1588*0b57cec5SDimitry Andric MVT IntVT = MVT::i32; 1589*0b57cec5SDimitry Andric MVT FltVT = MVT::f32; 1590*0b57cec5SDimitry Andric 1591*0b57cec5SDimitry Andric unsigned LHSSignBits = DAG.ComputeNumSignBits(LHS); 1592*0b57cec5SDimitry Andric if (LHSSignBits < 9) 1593*0b57cec5SDimitry Andric return SDValue(); 1594*0b57cec5SDimitry Andric 1595*0b57cec5SDimitry Andric unsigned RHSSignBits = DAG.ComputeNumSignBits(RHS); 1596*0b57cec5SDimitry Andric if (RHSSignBits < 9) 1597*0b57cec5SDimitry Andric return SDValue(); 1598*0b57cec5SDimitry Andric 1599*0b57cec5SDimitry Andric unsigned BitSize = VT.getSizeInBits(); 1600*0b57cec5SDimitry Andric unsigned SignBits = std::min(LHSSignBits, RHSSignBits); 1601*0b57cec5SDimitry Andric unsigned DivBits = BitSize - SignBits; 1602*0b57cec5SDimitry Andric if (Sign) 1603*0b57cec5SDimitry Andric ++DivBits; 1604*0b57cec5SDimitry Andric 1605*0b57cec5SDimitry Andric ISD::NodeType ToFp = Sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP; 1606*0b57cec5SDimitry Andric ISD::NodeType ToInt = Sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT; 1607*0b57cec5SDimitry Andric 1608*0b57cec5SDimitry Andric SDValue jq = DAG.getConstant(1, DL, IntVT); 1609*0b57cec5SDimitry Andric 1610*0b57cec5SDimitry Andric if (Sign) { 1611*0b57cec5SDimitry Andric // char|short jq = ia ^ ib; 1612*0b57cec5SDimitry Andric jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS); 1613*0b57cec5SDimitry Andric 1614*0b57cec5SDimitry Andric // jq = jq >> (bitsize - 2) 1615*0b57cec5SDimitry Andric jq = DAG.getNode(ISD::SRA, DL, VT, jq, 1616*0b57cec5SDimitry Andric DAG.getConstant(BitSize - 2, DL, VT)); 1617*0b57cec5SDimitry Andric 1618*0b57cec5SDimitry Andric // jq = jq | 0x1 1619*0b57cec5SDimitry Andric jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT)); 1620*0b57cec5SDimitry Andric } 1621*0b57cec5SDimitry Andric 1622*0b57cec5SDimitry Andric // int ia = (int)LHS; 1623*0b57cec5SDimitry Andric SDValue ia = LHS; 1624*0b57cec5SDimitry Andric 1625*0b57cec5SDimitry Andric // int ib, (int)RHS; 1626*0b57cec5SDimitry Andric SDValue ib = RHS; 1627*0b57cec5SDimitry Andric 1628*0b57cec5SDimitry Andric // float fa = (float)ia; 1629*0b57cec5SDimitry Andric SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia); 1630*0b57cec5SDimitry Andric 1631*0b57cec5SDimitry Andric // float fb = (float)ib; 1632*0b57cec5SDimitry Andric SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib); 1633*0b57cec5SDimitry Andric 1634*0b57cec5SDimitry Andric SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT, 1635*0b57cec5SDimitry Andric fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb)); 1636*0b57cec5SDimitry Andric 1637*0b57cec5SDimitry Andric // fq = trunc(fq); 1638*0b57cec5SDimitry Andric fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq); 1639*0b57cec5SDimitry Andric 1640*0b57cec5SDimitry Andric // float fqneg = -fq; 1641*0b57cec5SDimitry Andric SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq); 1642*0b57cec5SDimitry Andric 1643*0b57cec5SDimitry Andric // float fr = mad(fqneg, fb, fa); 1644*0b57cec5SDimitry Andric unsigned OpCode = Subtarget->hasFP32Denormals() ? 1645*0b57cec5SDimitry Andric (unsigned)AMDGPUISD::FMAD_FTZ : 1646*0b57cec5SDimitry Andric (unsigned)ISD::FMAD; 1647*0b57cec5SDimitry Andric SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa); 1648*0b57cec5SDimitry Andric 1649*0b57cec5SDimitry Andric // int iq = (int)fq; 1650*0b57cec5SDimitry Andric SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq); 1651*0b57cec5SDimitry Andric 1652*0b57cec5SDimitry Andric // fr = fabs(fr); 1653*0b57cec5SDimitry Andric fr = DAG.getNode(ISD::FABS, DL, FltVT, fr); 1654*0b57cec5SDimitry Andric 1655*0b57cec5SDimitry Andric // fb = fabs(fb); 1656*0b57cec5SDimitry Andric fb = DAG.getNode(ISD::FABS, DL, FltVT, fb); 1657*0b57cec5SDimitry Andric 1658*0b57cec5SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 1659*0b57cec5SDimitry Andric 1660*0b57cec5SDimitry Andric // int cv = fr >= fb; 1661*0b57cec5SDimitry Andric SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE); 1662*0b57cec5SDimitry Andric 1663*0b57cec5SDimitry Andric // jq = (cv ? jq : 0); 1664*0b57cec5SDimitry Andric jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT)); 1665*0b57cec5SDimitry Andric 1666*0b57cec5SDimitry Andric // dst = iq + jq; 1667*0b57cec5SDimitry Andric SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq); 1668*0b57cec5SDimitry Andric 1669*0b57cec5SDimitry Andric // Rem needs compensation, it's easier to recompute it 1670*0b57cec5SDimitry Andric SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS); 1671*0b57cec5SDimitry Andric Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem); 1672*0b57cec5SDimitry Andric 1673*0b57cec5SDimitry Andric // Truncate to number of bits this divide really is. 1674*0b57cec5SDimitry Andric if (Sign) { 1675*0b57cec5SDimitry Andric SDValue InRegSize 1676*0b57cec5SDimitry Andric = DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), DivBits)); 1677*0b57cec5SDimitry Andric Div = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Div, InRegSize); 1678*0b57cec5SDimitry Andric Rem = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Rem, InRegSize); 1679*0b57cec5SDimitry Andric } else { 1680*0b57cec5SDimitry Andric SDValue TruncMask = DAG.getConstant((UINT64_C(1) << DivBits) - 1, DL, VT); 1681*0b57cec5SDimitry Andric Div = DAG.getNode(ISD::AND, DL, VT, Div, TruncMask); 1682*0b57cec5SDimitry Andric Rem = DAG.getNode(ISD::AND, DL, VT, Rem, TruncMask); 1683*0b57cec5SDimitry Andric } 1684*0b57cec5SDimitry Andric 1685*0b57cec5SDimitry Andric return DAG.getMergeValues({ Div, Rem }, DL); 1686*0b57cec5SDimitry Andric } 1687*0b57cec5SDimitry Andric 1688*0b57cec5SDimitry Andric void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op, 1689*0b57cec5SDimitry Andric SelectionDAG &DAG, 1690*0b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results) const { 1691*0b57cec5SDimitry Andric SDLoc DL(Op); 1692*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 1693*0b57cec5SDimitry Andric 1694*0b57cec5SDimitry Andric assert(VT == MVT::i64 && "LowerUDIVREM64 expects an i64"); 1695*0b57cec5SDimitry Andric 1696*0b57cec5SDimitry Andric EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); 1697*0b57cec5SDimitry Andric 1698*0b57cec5SDimitry Andric SDValue One = DAG.getConstant(1, DL, HalfVT); 1699*0b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, DL, HalfVT); 1700*0b57cec5SDimitry Andric 1701*0b57cec5SDimitry Andric //HiLo split 1702*0b57cec5SDimitry Andric SDValue LHS = Op.getOperand(0); 1703*0b57cec5SDimitry Andric SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero); 1704*0b57cec5SDimitry Andric SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, One); 1705*0b57cec5SDimitry Andric 1706*0b57cec5SDimitry Andric SDValue RHS = Op.getOperand(1); 1707*0b57cec5SDimitry Andric SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero); 1708*0b57cec5SDimitry Andric SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, One); 1709*0b57cec5SDimitry Andric 1710*0b57cec5SDimitry Andric if (DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) && 1711*0b57cec5SDimitry Andric DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) { 1712*0b57cec5SDimitry Andric 1713*0b57cec5SDimitry Andric SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT), 1714*0b57cec5SDimitry Andric LHS_Lo, RHS_Lo); 1715*0b57cec5SDimitry Andric 1716*0b57cec5SDimitry Andric SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(0), Zero}); 1717*0b57cec5SDimitry Andric SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(1), Zero}); 1718*0b57cec5SDimitry Andric 1719*0b57cec5SDimitry Andric Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV)); 1720*0b57cec5SDimitry Andric Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM)); 1721*0b57cec5SDimitry Andric return; 1722*0b57cec5SDimitry Andric } 1723*0b57cec5SDimitry Andric 1724*0b57cec5SDimitry Andric if (isTypeLegal(MVT::i64)) { 1725*0b57cec5SDimitry Andric // Compute denominator reciprocal. 1726*0b57cec5SDimitry Andric unsigned FMAD = Subtarget->hasFP32Denormals() ? 1727*0b57cec5SDimitry Andric (unsigned)AMDGPUISD::FMAD_FTZ : 1728*0b57cec5SDimitry Andric (unsigned)ISD::FMAD; 1729*0b57cec5SDimitry Andric 1730*0b57cec5SDimitry Andric SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo); 1731*0b57cec5SDimitry Andric SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi); 1732*0b57cec5SDimitry Andric SDValue Mad1 = DAG.getNode(FMAD, DL, MVT::f32, Cvt_Hi, 1733*0b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0x4f800000).bitsToFloat(), DL, MVT::f32), 1734*0b57cec5SDimitry Andric Cvt_Lo); 1735*0b57cec5SDimitry Andric SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, DL, MVT::f32, Mad1); 1736*0b57cec5SDimitry Andric SDValue Mul1 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Rcp, 1737*0b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0x5f7ffffc).bitsToFloat(), DL, MVT::f32)); 1738*0b57cec5SDimitry Andric SDValue Mul2 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Mul1, 1739*0b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0x2f800000).bitsToFloat(), DL, MVT::f32)); 1740*0b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, MVT::f32, Mul2); 1741*0b57cec5SDimitry Andric SDValue Mad2 = DAG.getNode(FMAD, DL, MVT::f32, Trunc, 1742*0b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0xcf800000).bitsToFloat(), DL, MVT::f32), 1743*0b57cec5SDimitry Andric Mul1); 1744*0b57cec5SDimitry Andric SDValue Rcp_Lo = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Mad2); 1745*0b57cec5SDimitry Andric SDValue Rcp_Hi = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Trunc); 1746*0b57cec5SDimitry Andric SDValue Rcp64 = DAG.getBitcast(VT, 1747*0b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Rcp_Lo, Rcp_Hi})); 1748*0b57cec5SDimitry Andric 1749*0b57cec5SDimitry Andric SDValue Zero64 = DAG.getConstant(0, DL, VT); 1750*0b57cec5SDimitry Andric SDValue One64 = DAG.getConstant(1, DL, VT); 1751*0b57cec5SDimitry Andric SDValue Zero1 = DAG.getConstant(0, DL, MVT::i1); 1752*0b57cec5SDimitry Andric SDVTList HalfCarryVT = DAG.getVTList(HalfVT, MVT::i1); 1753*0b57cec5SDimitry Andric 1754*0b57cec5SDimitry Andric SDValue Neg_RHS = DAG.getNode(ISD::SUB, DL, VT, Zero64, RHS); 1755*0b57cec5SDimitry Andric SDValue Mullo1 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Rcp64); 1756*0b57cec5SDimitry Andric SDValue Mulhi1 = DAG.getNode(ISD::MULHU, DL, VT, Rcp64, Mullo1); 1757*0b57cec5SDimitry Andric SDValue Mulhi1_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1, 1758*0b57cec5SDimitry Andric Zero); 1759*0b57cec5SDimitry Andric SDValue Mulhi1_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1, 1760*0b57cec5SDimitry Andric One); 1761*0b57cec5SDimitry Andric 1762*0b57cec5SDimitry Andric SDValue Add1_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Lo, 1763*0b57cec5SDimitry Andric Mulhi1_Lo, Zero1); 1764*0b57cec5SDimitry Andric SDValue Add1_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Hi, 1765*0b57cec5SDimitry Andric Mulhi1_Hi, Add1_Lo.getValue(1)); 1766*0b57cec5SDimitry Andric SDValue Add1_HiNc = DAG.getNode(ISD::ADD, DL, HalfVT, Rcp_Hi, Mulhi1_Hi); 1767*0b57cec5SDimitry Andric SDValue Add1 = DAG.getBitcast(VT, 1768*0b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Add1_Lo, Add1_Hi})); 1769*0b57cec5SDimitry Andric 1770*0b57cec5SDimitry Andric SDValue Mullo2 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Add1); 1771*0b57cec5SDimitry Andric SDValue Mulhi2 = DAG.getNode(ISD::MULHU, DL, VT, Add1, Mullo2); 1772*0b57cec5SDimitry Andric SDValue Mulhi2_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2, 1773*0b57cec5SDimitry Andric Zero); 1774*0b57cec5SDimitry Andric SDValue Mulhi2_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2, 1775*0b57cec5SDimitry Andric One); 1776*0b57cec5SDimitry Andric 1777*0b57cec5SDimitry Andric SDValue Add2_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Lo, 1778*0b57cec5SDimitry Andric Mulhi2_Lo, Zero1); 1779*0b57cec5SDimitry Andric SDValue Add2_HiC = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_HiNc, 1780*0b57cec5SDimitry Andric Mulhi2_Hi, Add1_Lo.getValue(1)); 1781*0b57cec5SDimitry Andric SDValue Add2_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add2_HiC, 1782*0b57cec5SDimitry Andric Zero, Add2_Lo.getValue(1)); 1783*0b57cec5SDimitry Andric SDValue Add2 = DAG.getBitcast(VT, 1784*0b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Add2_Lo, Add2_Hi})); 1785*0b57cec5SDimitry Andric SDValue Mulhi3 = DAG.getNode(ISD::MULHU, DL, VT, LHS, Add2); 1786*0b57cec5SDimitry Andric 1787*0b57cec5SDimitry Andric SDValue Mul3 = DAG.getNode(ISD::MUL, DL, VT, RHS, Mulhi3); 1788*0b57cec5SDimitry Andric 1789*0b57cec5SDimitry Andric SDValue Mul3_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, Zero); 1790*0b57cec5SDimitry Andric SDValue Mul3_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, One); 1791*0b57cec5SDimitry Andric SDValue Sub1_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Lo, 1792*0b57cec5SDimitry Andric Mul3_Lo, Zero1); 1793*0b57cec5SDimitry Andric SDValue Sub1_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Hi, 1794*0b57cec5SDimitry Andric Mul3_Hi, Sub1_Lo.getValue(1)); 1795*0b57cec5SDimitry Andric SDValue Sub1_Mi = DAG.getNode(ISD::SUB, DL, HalfVT, LHS_Hi, Mul3_Hi); 1796*0b57cec5SDimitry Andric SDValue Sub1 = DAG.getBitcast(VT, 1797*0b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Sub1_Lo, Sub1_Hi})); 1798*0b57cec5SDimitry Andric 1799*0b57cec5SDimitry Andric SDValue MinusOne = DAG.getConstant(0xffffffffu, DL, HalfVT); 1800*0b57cec5SDimitry Andric SDValue C1 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, MinusOne, Zero, 1801*0b57cec5SDimitry Andric ISD::SETUGE); 1802*0b57cec5SDimitry Andric SDValue C2 = DAG.getSelectCC(DL, Sub1_Lo, RHS_Lo, MinusOne, Zero, 1803*0b57cec5SDimitry Andric ISD::SETUGE); 1804*0b57cec5SDimitry Andric SDValue C3 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, C2, C1, ISD::SETEQ); 1805*0b57cec5SDimitry Andric 1806*0b57cec5SDimitry Andric // TODO: Here and below portions of the code can be enclosed into if/endif. 1807*0b57cec5SDimitry Andric // Currently control flow is unconditional and we have 4 selects after 1808*0b57cec5SDimitry Andric // potential endif to substitute PHIs. 1809*0b57cec5SDimitry Andric 1810*0b57cec5SDimitry Andric // if C3 != 0 ... 1811*0b57cec5SDimitry Andric SDValue Sub2_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Lo, 1812*0b57cec5SDimitry Andric RHS_Lo, Zero1); 1813*0b57cec5SDimitry Andric SDValue Sub2_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Mi, 1814*0b57cec5SDimitry Andric RHS_Hi, Sub1_Lo.getValue(1)); 1815*0b57cec5SDimitry Andric SDValue Sub2_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi, 1816*0b57cec5SDimitry Andric Zero, Sub2_Lo.getValue(1)); 1817*0b57cec5SDimitry Andric SDValue Sub2 = DAG.getBitcast(VT, 1818*0b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Sub2_Lo, Sub2_Hi})); 1819*0b57cec5SDimitry Andric 1820*0b57cec5SDimitry Andric SDValue Add3 = DAG.getNode(ISD::ADD, DL, VT, Mulhi3, One64); 1821*0b57cec5SDimitry Andric 1822*0b57cec5SDimitry Andric SDValue C4 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, MinusOne, Zero, 1823*0b57cec5SDimitry Andric ISD::SETUGE); 1824*0b57cec5SDimitry Andric SDValue C5 = DAG.getSelectCC(DL, Sub2_Lo, RHS_Lo, MinusOne, Zero, 1825*0b57cec5SDimitry Andric ISD::SETUGE); 1826*0b57cec5SDimitry Andric SDValue C6 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, C5, C4, ISD::SETEQ); 1827*0b57cec5SDimitry Andric 1828*0b57cec5SDimitry Andric // if (C6 != 0) 1829*0b57cec5SDimitry Andric SDValue Add4 = DAG.getNode(ISD::ADD, DL, VT, Add3, One64); 1830*0b57cec5SDimitry Andric 1831*0b57cec5SDimitry Andric SDValue Sub3_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Lo, 1832*0b57cec5SDimitry Andric RHS_Lo, Zero1); 1833*0b57cec5SDimitry Andric SDValue Sub3_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi, 1834*0b57cec5SDimitry Andric RHS_Hi, Sub2_Lo.getValue(1)); 1835*0b57cec5SDimitry Andric SDValue Sub3_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub3_Mi, 1836*0b57cec5SDimitry Andric Zero, Sub3_Lo.getValue(1)); 1837*0b57cec5SDimitry Andric SDValue Sub3 = DAG.getBitcast(VT, 1838*0b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Sub3_Lo, Sub3_Hi})); 1839*0b57cec5SDimitry Andric 1840*0b57cec5SDimitry Andric // endif C6 1841*0b57cec5SDimitry Andric // endif C3 1842*0b57cec5SDimitry Andric 1843*0b57cec5SDimitry Andric SDValue Sel1 = DAG.getSelectCC(DL, C6, Zero, Add4, Add3, ISD::SETNE); 1844*0b57cec5SDimitry Andric SDValue Div = DAG.getSelectCC(DL, C3, Zero, Sel1, Mulhi3, ISD::SETNE); 1845*0b57cec5SDimitry Andric 1846*0b57cec5SDimitry Andric SDValue Sel2 = DAG.getSelectCC(DL, C6, Zero, Sub3, Sub2, ISD::SETNE); 1847*0b57cec5SDimitry Andric SDValue Rem = DAG.getSelectCC(DL, C3, Zero, Sel2, Sub1, ISD::SETNE); 1848*0b57cec5SDimitry Andric 1849*0b57cec5SDimitry Andric Results.push_back(Div); 1850*0b57cec5SDimitry Andric Results.push_back(Rem); 1851*0b57cec5SDimitry Andric 1852*0b57cec5SDimitry Andric return; 1853*0b57cec5SDimitry Andric } 1854*0b57cec5SDimitry Andric 1855*0b57cec5SDimitry Andric // r600 expandion. 1856*0b57cec5SDimitry Andric // Get Speculative values 1857*0b57cec5SDimitry Andric SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo); 1858*0b57cec5SDimitry Andric SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo); 1859*0b57cec5SDimitry Andric 1860*0b57cec5SDimitry Andric SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, Zero, REM_Part, LHS_Hi, ISD::SETEQ); 1861*0b57cec5SDimitry Andric SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {REM_Lo, Zero}); 1862*0b57cec5SDimitry Andric REM = DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM); 1863*0b57cec5SDimitry Andric 1864*0b57cec5SDimitry Andric SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, Zero, DIV_Part, Zero, ISD::SETEQ); 1865*0b57cec5SDimitry Andric SDValue DIV_Lo = Zero; 1866*0b57cec5SDimitry Andric 1867*0b57cec5SDimitry Andric const unsigned halfBitWidth = HalfVT.getSizeInBits(); 1868*0b57cec5SDimitry Andric 1869*0b57cec5SDimitry Andric for (unsigned i = 0; i < halfBitWidth; ++i) { 1870*0b57cec5SDimitry Andric const unsigned bitPos = halfBitWidth - i - 1; 1871*0b57cec5SDimitry Andric SDValue POS = DAG.getConstant(bitPos, DL, HalfVT); 1872*0b57cec5SDimitry Andric // Get value of high bit 1873*0b57cec5SDimitry Andric SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); 1874*0b57cec5SDimitry Andric HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, One); 1875*0b57cec5SDimitry Andric HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit); 1876*0b57cec5SDimitry Andric 1877*0b57cec5SDimitry Andric // Shift 1878*0b57cec5SDimitry Andric REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT)); 1879*0b57cec5SDimitry Andric // Add LHS high bit 1880*0b57cec5SDimitry Andric REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit); 1881*0b57cec5SDimitry Andric 1882*0b57cec5SDimitry Andric SDValue BIT = DAG.getConstant(1ULL << bitPos, DL, HalfVT); 1883*0b57cec5SDimitry Andric SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, Zero, ISD::SETUGE); 1884*0b57cec5SDimitry Andric 1885*0b57cec5SDimitry Andric DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT); 1886*0b57cec5SDimitry Andric 1887*0b57cec5SDimitry Andric // Update REM 1888*0b57cec5SDimitry Andric SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS); 1889*0b57cec5SDimitry Andric REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE); 1890*0b57cec5SDimitry Andric } 1891*0b57cec5SDimitry Andric 1892*0b57cec5SDimitry Andric SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {DIV_Lo, DIV_Hi}); 1893*0b57cec5SDimitry Andric DIV = DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV); 1894*0b57cec5SDimitry Andric Results.push_back(DIV); 1895*0b57cec5SDimitry Andric Results.push_back(REM); 1896*0b57cec5SDimitry Andric } 1897*0b57cec5SDimitry Andric 1898*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 1899*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 1900*0b57cec5SDimitry Andric SDLoc DL(Op); 1901*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 1902*0b57cec5SDimitry Andric 1903*0b57cec5SDimitry Andric if (VT == MVT::i64) { 1904*0b57cec5SDimitry Andric SmallVector<SDValue, 2> Results; 1905*0b57cec5SDimitry Andric LowerUDIVREM64(Op, DAG, Results); 1906*0b57cec5SDimitry Andric return DAG.getMergeValues(Results, DL); 1907*0b57cec5SDimitry Andric } 1908*0b57cec5SDimitry Andric 1909*0b57cec5SDimitry Andric if (VT == MVT::i32) { 1910*0b57cec5SDimitry Andric if (SDValue Res = LowerDIVREM24(Op, DAG, false)) 1911*0b57cec5SDimitry Andric return Res; 1912*0b57cec5SDimitry Andric } 1913*0b57cec5SDimitry Andric 1914*0b57cec5SDimitry Andric SDValue Num = Op.getOperand(0); 1915*0b57cec5SDimitry Andric SDValue Den = Op.getOperand(1); 1916*0b57cec5SDimitry Andric 1917*0b57cec5SDimitry Andric // RCP = URECIP(Den) = 2^32 / Den + e 1918*0b57cec5SDimitry Andric // e is rounding error. 1919*0b57cec5SDimitry Andric SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); 1920*0b57cec5SDimitry Andric 1921*0b57cec5SDimitry Andric // RCP_LO = mul(RCP, Den) */ 1922*0b57cec5SDimitry Andric SDValue RCP_LO = DAG.getNode(ISD::MUL, DL, VT, RCP, Den); 1923*0b57cec5SDimitry Andric 1924*0b57cec5SDimitry Andric // RCP_HI = mulhu (RCP, Den) */ 1925*0b57cec5SDimitry Andric SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); 1926*0b57cec5SDimitry Andric 1927*0b57cec5SDimitry Andric // NEG_RCP_LO = -RCP_LO 1928*0b57cec5SDimitry Andric SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), 1929*0b57cec5SDimitry Andric RCP_LO); 1930*0b57cec5SDimitry Andric 1931*0b57cec5SDimitry Andric // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) 1932*0b57cec5SDimitry Andric SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT), 1933*0b57cec5SDimitry Andric NEG_RCP_LO, RCP_LO, 1934*0b57cec5SDimitry Andric ISD::SETEQ); 1935*0b57cec5SDimitry Andric // Calculate the rounding error from the URECIP instruction 1936*0b57cec5SDimitry Andric // E = mulhu(ABS_RCP_LO, RCP) 1937*0b57cec5SDimitry Andric SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); 1938*0b57cec5SDimitry Andric 1939*0b57cec5SDimitry Andric // RCP_A_E = RCP + E 1940*0b57cec5SDimitry Andric SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); 1941*0b57cec5SDimitry Andric 1942*0b57cec5SDimitry Andric // RCP_S_E = RCP - E 1943*0b57cec5SDimitry Andric SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); 1944*0b57cec5SDimitry Andric 1945*0b57cec5SDimitry Andric // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) 1946*0b57cec5SDimitry Andric SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT), 1947*0b57cec5SDimitry Andric RCP_A_E, RCP_S_E, 1948*0b57cec5SDimitry Andric ISD::SETEQ); 1949*0b57cec5SDimitry Andric // Quotient = mulhu(Tmp0, Num) 1950*0b57cec5SDimitry Andric SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); 1951*0b57cec5SDimitry Andric 1952*0b57cec5SDimitry Andric // Num_S_Remainder = Quotient * Den 1953*0b57cec5SDimitry Andric SDValue Num_S_Remainder = DAG.getNode(ISD::MUL, DL, VT, Quotient, Den); 1954*0b57cec5SDimitry Andric 1955*0b57cec5SDimitry Andric // Remainder = Num - Num_S_Remainder 1956*0b57cec5SDimitry Andric SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); 1957*0b57cec5SDimitry Andric 1958*0b57cec5SDimitry Andric // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) 1959*0b57cec5SDimitry Andric SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, 1960*0b57cec5SDimitry Andric DAG.getConstant(-1, DL, VT), 1961*0b57cec5SDimitry Andric DAG.getConstant(0, DL, VT), 1962*0b57cec5SDimitry Andric ISD::SETUGE); 1963*0b57cec5SDimitry Andric // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0) 1964*0b57cec5SDimitry Andric SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num, 1965*0b57cec5SDimitry Andric Num_S_Remainder, 1966*0b57cec5SDimitry Andric DAG.getConstant(-1, DL, VT), 1967*0b57cec5SDimitry Andric DAG.getConstant(0, DL, VT), 1968*0b57cec5SDimitry Andric ISD::SETUGE); 1969*0b57cec5SDimitry Andric // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero 1970*0b57cec5SDimitry Andric SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, 1971*0b57cec5SDimitry Andric Remainder_GE_Zero); 1972*0b57cec5SDimitry Andric 1973*0b57cec5SDimitry Andric // Calculate Division result: 1974*0b57cec5SDimitry Andric 1975*0b57cec5SDimitry Andric // Quotient_A_One = Quotient + 1 1976*0b57cec5SDimitry Andric SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, 1977*0b57cec5SDimitry Andric DAG.getConstant(1, DL, VT)); 1978*0b57cec5SDimitry Andric 1979*0b57cec5SDimitry Andric // Quotient_S_One = Quotient - 1 1980*0b57cec5SDimitry Andric SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, 1981*0b57cec5SDimitry Andric DAG.getConstant(1, DL, VT)); 1982*0b57cec5SDimitry Andric 1983*0b57cec5SDimitry Andric // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) 1984*0b57cec5SDimitry Andric SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT), 1985*0b57cec5SDimitry Andric Quotient, Quotient_A_One, ISD::SETEQ); 1986*0b57cec5SDimitry Andric 1987*0b57cec5SDimitry Andric // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) 1988*0b57cec5SDimitry Andric Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT), 1989*0b57cec5SDimitry Andric Quotient_S_One, Div, ISD::SETEQ); 1990*0b57cec5SDimitry Andric 1991*0b57cec5SDimitry Andric // Calculate Rem result: 1992*0b57cec5SDimitry Andric 1993*0b57cec5SDimitry Andric // Remainder_S_Den = Remainder - Den 1994*0b57cec5SDimitry Andric SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); 1995*0b57cec5SDimitry Andric 1996*0b57cec5SDimitry Andric // Remainder_A_Den = Remainder + Den 1997*0b57cec5SDimitry Andric SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); 1998*0b57cec5SDimitry Andric 1999*0b57cec5SDimitry Andric // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) 2000*0b57cec5SDimitry Andric SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT), 2001*0b57cec5SDimitry Andric Remainder, Remainder_S_Den, ISD::SETEQ); 2002*0b57cec5SDimitry Andric 2003*0b57cec5SDimitry Andric // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) 2004*0b57cec5SDimitry Andric Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT), 2005*0b57cec5SDimitry Andric Remainder_A_Den, Rem, ISD::SETEQ); 2006*0b57cec5SDimitry Andric SDValue Ops[2] = { 2007*0b57cec5SDimitry Andric Div, 2008*0b57cec5SDimitry Andric Rem 2009*0b57cec5SDimitry Andric }; 2010*0b57cec5SDimitry Andric return DAG.getMergeValues(Ops, DL); 2011*0b57cec5SDimitry Andric } 2012*0b57cec5SDimitry Andric 2013*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op, 2014*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 2015*0b57cec5SDimitry Andric SDLoc DL(Op); 2016*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 2017*0b57cec5SDimitry Andric 2018*0b57cec5SDimitry Andric SDValue LHS = Op.getOperand(0); 2019*0b57cec5SDimitry Andric SDValue RHS = Op.getOperand(1); 2020*0b57cec5SDimitry Andric 2021*0b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, DL, VT); 2022*0b57cec5SDimitry Andric SDValue NegOne = DAG.getConstant(-1, DL, VT); 2023*0b57cec5SDimitry Andric 2024*0b57cec5SDimitry Andric if (VT == MVT::i32) { 2025*0b57cec5SDimitry Andric if (SDValue Res = LowerDIVREM24(Op, DAG, true)) 2026*0b57cec5SDimitry Andric return Res; 2027*0b57cec5SDimitry Andric } 2028*0b57cec5SDimitry Andric 2029*0b57cec5SDimitry Andric if (VT == MVT::i64 && 2030*0b57cec5SDimitry Andric DAG.ComputeNumSignBits(LHS) > 32 && 2031*0b57cec5SDimitry Andric DAG.ComputeNumSignBits(RHS) > 32) { 2032*0b57cec5SDimitry Andric EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); 2033*0b57cec5SDimitry Andric 2034*0b57cec5SDimitry Andric //HiLo split 2035*0b57cec5SDimitry Andric SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero); 2036*0b57cec5SDimitry Andric SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero); 2037*0b57cec5SDimitry Andric SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT), 2038*0b57cec5SDimitry Andric LHS_Lo, RHS_Lo); 2039*0b57cec5SDimitry Andric SDValue Res[2] = { 2040*0b57cec5SDimitry Andric DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)), 2041*0b57cec5SDimitry Andric DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1)) 2042*0b57cec5SDimitry Andric }; 2043*0b57cec5SDimitry Andric return DAG.getMergeValues(Res, DL); 2044*0b57cec5SDimitry Andric } 2045*0b57cec5SDimitry Andric 2046*0b57cec5SDimitry Andric SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT); 2047*0b57cec5SDimitry Andric SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT); 2048*0b57cec5SDimitry Andric SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign); 2049*0b57cec5SDimitry Andric SDValue RSign = LHSign; // Remainder sign is the same as LHS 2050*0b57cec5SDimitry Andric 2051*0b57cec5SDimitry Andric LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign); 2052*0b57cec5SDimitry Andric RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign); 2053*0b57cec5SDimitry Andric 2054*0b57cec5SDimitry Andric LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign); 2055*0b57cec5SDimitry Andric RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign); 2056*0b57cec5SDimitry Andric 2057*0b57cec5SDimitry Andric SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS); 2058*0b57cec5SDimitry Andric SDValue Rem = Div.getValue(1); 2059*0b57cec5SDimitry Andric 2060*0b57cec5SDimitry Andric Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign); 2061*0b57cec5SDimitry Andric Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign); 2062*0b57cec5SDimitry Andric 2063*0b57cec5SDimitry Andric Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign); 2064*0b57cec5SDimitry Andric Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign); 2065*0b57cec5SDimitry Andric 2066*0b57cec5SDimitry Andric SDValue Res[2] = { 2067*0b57cec5SDimitry Andric Div, 2068*0b57cec5SDimitry Andric Rem 2069*0b57cec5SDimitry Andric }; 2070*0b57cec5SDimitry Andric return DAG.getMergeValues(Res, DL); 2071*0b57cec5SDimitry Andric } 2072*0b57cec5SDimitry Andric 2073*0b57cec5SDimitry Andric // (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y)) 2074*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const { 2075*0b57cec5SDimitry Andric SDLoc SL(Op); 2076*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 2077*0b57cec5SDimitry Andric SDValue X = Op.getOperand(0); 2078*0b57cec5SDimitry Andric SDValue Y = Op.getOperand(1); 2079*0b57cec5SDimitry Andric 2080*0b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 2081*0b57cec5SDimitry Andric 2082*0b57cec5SDimitry Andric SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y); 2083*0b57cec5SDimitry Andric SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div); 2084*0b57cec5SDimitry Andric SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y); 2085*0b57cec5SDimitry Andric 2086*0b57cec5SDimitry Andric return DAG.getNode(ISD::FSUB, SL, VT, X, Mul); 2087*0b57cec5SDimitry Andric } 2088*0b57cec5SDimitry Andric 2089*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { 2090*0b57cec5SDimitry Andric SDLoc SL(Op); 2091*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2092*0b57cec5SDimitry Andric 2093*0b57cec5SDimitry Andric // result = trunc(src) 2094*0b57cec5SDimitry Andric // if (src > 0.0 && src != result) 2095*0b57cec5SDimitry Andric // result += 1.0 2096*0b57cec5SDimitry Andric 2097*0b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); 2098*0b57cec5SDimitry Andric 2099*0b57cec5SDimitry Andric const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64); 2100*0b57cec5SDimitry Andric const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64); 2101*0b57cec5SDimitry Andric 2102*0b57cec5SDimitry Andric EVT SetCCVT = 2103*0b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); 2104*0b57cec5SDimitry Andric 2105*0b57cec5SDimitry Andric SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT); 2106*0b57cec5SDimitry Andric SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); 2107*0b57cec5SDimitry Andric SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); 2108*0b57cec5SDimitry Andric 2109*0b57cec5SDimitry Andric SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero); 2110*0b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 2111*0b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); 2112*0b57cec5SDimitry Andric } 2113*0b57cec5SDimitry Andric 2114*0b57cec5SDimitry Andric static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL, 2115*0b57cec5SDimitry Andric SelectionDAG &DAG) { 2116*0b57cec5SDimitry Andric const unsigned FractBits = 52; 2117*0b57cec5SDimitry Andric const unsigned ExpBits = 11; 2118*0b57cec5SDimitry Andric 2119*0b57cec5SDimitry Andric SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32, 2120*0b57cec5SDimitry Andric Hi, 2121*0b57cec5SDimitry Andric DAG.getConstant(FractBits - 32, SL, MVT::i32), 2122*0b57cec5SDimitry Andric DAG.getConstant(ExpBits, SL, MVT::i32)); 2123*0b57cec5SDimitry Andric SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart, 2124*0b57cec5SDimitry Andric DAG.getConstant(1023, SL, MVT::i32)); 2125*0b57cec5SDimitry Andric 2126*0b57cec5SDimitry Andric return Exp; 2127*0b57cec5SDimitry Andric } 2128*0b57cec5SDimitry Andric 2129*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { 2130*0b57cec5SDimitry Andric SDLoc SL(Op); 2131*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2132*0b57cec5SDimitry Andric 2133*0b57cec5SDimitry Andric assert(Op.getValueType() == MVT::f64); 2134*0b57cec5SDimitry Andric 2135*0b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 2136*0b57cec5SDimitry Andric const SDValue One = DAG.getConstant(1, SL, MVT::i32); 2137*0b57cec5SDimitry Andric 2138*0b57cec5SDimitry Andric SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src); 2139*0b57cec5SDimitry Andric 2140*0b57cec5SDimitry Andric // Extract the upper half, since this is where we will find the sign and 2141*0b57cec5SDimitry Andric // exponent. 2142*0b57cec5SDimitry Andric SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One); 2143*0b57cec5SDimitry Andric 2144*0b57cec5SDimitry Andric SDValue Exp = extractF64Exponent(Hi, SL, DAG); 2145*0b57cec5SDimitry Andric 2146*0b57cec5SDimitry Andric const unsigned FractBits = 52; 2147*0b57cec5SDimitry Andric 2148*0b57cec5SDimitry Andric // Extract the sign bit. 2149*0b57cec5SDimitry Andric const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32); 2150*0b57cec5SDimitry Andric SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask); 2151*0b57cec5SDimitry Andric 2152*0b57cec5SDimitry Andric // Extend back to 64-bits. 2153*0b57cec5SDimitry Andric SDValue SignBit64 = DAG.getBuildVector(MVT::v2i32, SL, {Zero, SignBit}); 2154*0b57cec5SDimitry Andric SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64); 2155*0b57cec5SDimitry Andric 2156*0b57cec5SDimitry Andric SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src); 2157*0b57cec5SDimitry Andric const SDValue FractMask 2158*0b57cec5SDimitry Andric = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64); 2159*0b57cec5SDimitry Andric 2160*0b57cec5SDimitry Andric SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp); 2161*0b57cec5SDimitry Andric SDValue Not = DAG.getNOT(SL, Shr, MVT::i64); 2162*0b57cec5SDimitry Andric SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not); 2163*0b57cec5SDimitry Andric 2164*0b57cec5SDimitry Andric EVT SetCCVT = 2165*0b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32); 2166*0b57cec5SDimitry Andric 2167*0b57cec5SDimitry Andric const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32); 2168*0b57cec5SDimitry Andric 2169*0b57cec5SDimitry Andric SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT); 2170*0b57cec5SDimitry Andric SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT); 2171*0b57cec5SDimitry Andric 2172*0b57cec5SDimitry Andric SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0); 2173*0b57cec5SDimitry Andric SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1); 2174*0b57cec5SDimitry Andric 2175*0b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2); 2176*0b57cec5SDimitry Andric } 2177*0b57cec5SDimitry Andric 2178*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { 2179*0b57cec5SDimitry Andric SDLoc SL(Op); 2180*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2181*0b57cec5SDimitry Andric 2182*0b57cec5SDimitry Andric assert(Op.getValueType() == MVT::f64); 2183*0b57cec5SDimitry Andric 2184*0b57cec5SDimitry Andric APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52"); 2185*0b57cec5SDimitry Andric SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64); 2186*0b57cec5SDimitry Andric SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src); 2187*0b57cec5SDimitry Andric 2188*0b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 2189*0b57cec5SDimitry Andric 2190*0b57cec5SDimitry Andric SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign); 2191*0b57cec5SDimitry Andric SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign); 2192*0b57cec5SDimitry Andric 2193*0b57cec5SDimitry Andric SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src); 2194*0b57cec5SDimitry Andric 2195*0b57cec5SDimitry Andric APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51"); 2196*0b57cec5SDimitry Andric SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64); 2197*0b57cec5SDimitry Andric 2198*0b57cec5SDimitry Andric EVT SetCCVT = 2199*0b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); 2200*0b57cec5SDimitry Andric SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT); 2201*0b57cec5SDimitry Andric 2202*0b57cec5SDimitry Andric return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2); 2203*0b57cec5SDimitry Andric } 2204*0b57cec5SDimitry Andric 2205*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const { 2206*0b57cec5SDimitry Andric // FNEARBYINT and FRINT are the same, except in their handling of FP 2207*0b57cec5SDimitry Andric // exceptions. Those aren't really meaningful for us, and OpenCL only has 2208*0b57cec5SDimitry Andric // rint, so just treat them as equivalent. 2209*0b57cec5SDimitry Andric return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0)); 2210*0b57cec5SDimitry Andric } 2211*0b57cec5SDimitry Andric 2212*0b57cec5SDimitry Andric // XXX - May require not supporting f32 denormals? 2213*0b57cec5SDimitry Andric 2214*0b57cec5SDimitry Andric // Don't handle v2f16. The extra instructions to scalarize and repack around the 2215*0b57cec5SDimitry Andric // compare and vselect end up producing worse code than scalarizing the whole 2216*0b57cec5SDimitry Andric // operation. 2217*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFROUND32_16(SDValue Op, SelectionDAG &DAG) const { 2218*0b57cec5SDimitry Andric SDLoc SL(Op); 2219*0b57cec5SDimitry Andric SDValue X = Op.getOperand(0); 2220*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 2221*0b57cec5SDimitry Andric 2222*0b57cec5SDimitry Andric SDValue T = DAG.getNode(ISD::FTRUNC, SL, VT, X); 2223*0b57cec5SDimitry Andric 2224*0b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 2225*0b57cec5SDimitry Andric 2226*0b57cec5SDimitry Andric SDValue Diff = DAG.getNode(ISD::FSUB, SL, VT, X, T); 2227*0b57cec5SDimitry Andric 2228*0b57cec5SDimitry Andric SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, VT, Diff); 2229*0b57cec5SDimitry Andric 2230*0b57cec5SDimitry Andric const SDValue Zero = DAG.getConstantFP(0.0, SL, VT); 2231*0b57cec5SDimitry Andric const SDValue One = DAG.getConstantFP(1.0, SL, VT); 2232*0b57cec5SDimitry Andric const SDValue Half = DAG.getConstantFP(0.5, SL, VT); 2233*0b57cec5SDimitry Andric 2234*0b57cec5SDimitry Andric SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, VT, One, X); 2235*0b57cec5SDimitry Andric 2236*0b57cec5SDimitry Andric EVT SetCCVT = 2237*0b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 2238*0b57cec5SDimitry Andric 2239*0b57cec5SDimitry Andric SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE); 2240*0b57cec5SDimitry Andric 2241*0b57cec5SDimitry Andric SDValue Sel = DAG.getNode(ISD::SELECT, SL, VT, Cmp, SignOne, Zero); 2242*0b57cec5SDimitry Andric 2243*0b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, VT, T, Sel); 2244*0b57cec5SDimitry Andric } 2245*0b57cec5SDimitry Andric 2246*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const { 2247*0b57cec5SDimitry Andric SDLoc SL(Op); 2248*0b57cec5SDimitry Andric SDValue X = Op.getOperand(0); 2249*0b57cec5SDimitry Andric 2250*0b57cec5SDimitry Andric SDValue L = DAG.getNode(ISD::BITCAST, SL, MVT::i64, X); 2251*0b57cec5SDimitry Andric 2252*0b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 2253*0b57cec5SDimitry Andric const SDValue One = DAG.getConstant(1, SL, MVT::i32); 2254*0b57cec5SDimitry Andric const SDValue NegOne = DAG.getConstant(-1, SL, MVT::i32); 2255*0b57cec5SDimitry Andric const SDValue FiftyOne = DAG.getConstant(51, SL, MVT::i32); 2256*0b57cec5SDimitry Andric EVT SetCCVT = 2257*0b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32); 2258*0b57cec5SDimitry Andric 2259*0b57cec5SDimitry Andric SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X); 2260*0b57cec5SDimitry Andric 2261*0b57cec5SDimitry Andric SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC, One); 2262*0b57cec5SDimitry Andric 2263*0b57cec5SDimitry Andric SDValue Exp = extractF64Exponent(Hi, SL, DAG); 2264*0b57cec5SDimitry Andric 2265*0b57cec5SDimitry Andric const SDValue Mask = DAG.getConstant(INT64_C(0x000fffffffffffff), SL, 2266*0b57cec5SDimitry Andric MVT::i64); 2267*0b57cec5SDimitry Andric 2268*0b57cec5SDimitry Andric SDValue M = DAG.getNode(ISD::SRA, SL, MVT::i64, Mask, Exp); 2269*0b57cec5SDimitry Andric SDValue D = DAG.getNode(ISD::SRA, SL, MVT::i64, 2270*0b57cec5SDimitry Andric DAG.getConstant(INT64_C(0x0008000000000000), SL, 2271*0b57cec5SDimitry Andric MVT::i64), 2272*0b57cec5SDimitry Andric Exp); 2273*0b57cec5SDimitry Andric 2274*0b57cec5SDimitry Andric SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, L, M); 2275*0b57cec5SDimitry Andric SDValue Tmp1 = DAG.getSetCC(SL, SetCCVT, 2276*0b57cec5SDimitry Andric DAG.getConstant(0, SL, MVT::i64), Tmp0, 2277*0b57cec5SDimitry Andric ISD::SETNE); 2278*0b57cec5SDimitry Andric 2279*0b57cec5SDimitry Andric SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, Tmp1, 2280*0b57cec5SDimitry Andric D, DAG.getConstant(0, SL, MVT::i64)); 2281*0b57cec5SDimitry Andric SDValue K = DAG.getNode(ISD::ADD, SL, MVT::i64, L, Tmp2); 2282*0b57cec5SDimitry Andric 2283*0b57cec5SDimitry Andric K = DAG.getNode(ISD::AND, SL, MVT::i64, K, DAG.getNOT(SL, M, MVT::i64)); 2284*0b57cec5SDimitry Andric K = DAG.getNode(ISD::BITCAST, SL, MVT::f64, K); 2285*0b57cec5SDimitry Andric 2286*0b57cec5SDimitry Andric SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT); 2287*0b57cec5SDimitry Andric SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT); 2288*0b57cec5SDimitry Andric SDValue ExpEqNegOne = DAG.getSetCC(SL, SetCCVT, NegOne, Exp, ISD::SETEQ); 2289*0b57cec5SDimitry Andric 2290*0b57cec5SDimitry Andric SDValue Mag = DAG.getNode(ISD::SELECT, SL, MVT::f64, 2291*0b57cec5SDimitry Andric ExpEqNegOne, 2292*0b57cec5SDimitry Andric DAG.getConstantFP(1.0, SL, MVT::f64), 2293*0b57cec5SDimitry Andric DAG.getConstantFP(0.0, SL, MVT::f64)); 2294*0b57cec5SDimitry Andric 2295*0b57cec5SDimitry Andric SDValue S = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, Mag, X); 2296*0b57cec5SDimitry Andric 2297*0b57cec5SDimitry Andric K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpLt0, S, K); 2298*0b57cec5SDimitry Andric K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpGt51, X, K); 2299*0b57cec5SDimitry Andric 2300*0b57cec5SDimitry Andric return K; 2301*0b57cec5SDimitry Andric } 2302*0b57cec5SDimitry Andric 2303*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const { 2304*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 2305*0b57cec5SDimitry Andric 2306*0b57cec5SDimitry Andric if (VT == MVT::f32 || VT == MVT::f16) 2307*0b57cec5SDimitry Andric return LowerFROUND32_16(Op, DAG); 2308*0b57cec5SDimitry Andric 2309*0b57cec5SDimitry Andric if (VT == MVT::f64) 2310*0b57cec5SDimitry Andric return LowerFROUND64(Op, DAG); 2311*0b57cec5SDimitry Andric 2312*0b57cec5SDimitry Andric llvm_unreachable("unhandled type"); 2313*0b57cec5SDimitry Andric } 2314*0b57cec5SDimitry Andric 2315*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { 2316*0b57cec5SDimitry Andric SDLoc SL(Op); 2317*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2318*0b57cec5SDimitry Andric 2319*0b57cec5SDimitry Andric // result = trunc(src); 2320*0b57cec5SDimitry Andric // if (src < 0.0 && src != result) 2321*0b57cec5SDimitry Andric // result += -1.0. 2322*0b57cec5SDimitry Andric 2323*0b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); 2324*0b57cec5SDimitry Andric 2325*0b57cec5SDimitry Andric const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64); 2326*0b57cec5SDimitry Andric const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64); 2327*0b57cec5SDimitry Andric 2328*0b57cec5SDimitry Andric EVT SetCCVT = 2329*0b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); 2330*0b57cec5SDimitry Andric 2331*0b57cec5SDimitry Andric SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT); 2332*0b57cec5SDimitry Andric SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); 2333*0b57cec5SDimitry Andric SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); 2334*0b57cec5SDimitry Andric 2335*0b57cec5SDimitry Andric SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero); 2336*0b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 2337*0b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); 2338*0b57cec5SDimitry Andric } 2339*0b57cec5SDimitry Andric 2340*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG, 2341*0b57cec5SDimitry Andric double Log2BaseInverted) const { 2342*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 2343*0b57cec5SDimitry Andric 2344*0b57cec5SDimitry Andric SDLoc SL(Op); 2345*0b57cec5SDimitry Andric SDValue Operand = Op.getOperand(0); 2346*0b57cec5SDimitry Andric SDValue Log2Operand = DAG.getNode(ISD::FLOG2, SL, VT, Operand); 2347*0b57cec5SDimitry Andric SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT); 2348*0b57cec5SDimitry Andric 2349*0b57cec5SDimitry Andric return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand); 2350*0b57cec5SDimitry Andric } 2351*0b57cec5SDimitry Andric 2352*0b57cec5SDimitry Andric // Return M_LOG2E of appropriate type 2353*0b57cec5SDimitry Andric static SDValue getLog2EVal(SelectionDAG &DAG, const SDLoc &SL, EVT VT) { 2354*0b57cec5SDimitry Andric switch (VT.getScalarType().getSimpleVT().SimpleTy) { 2355*0b57cec5SDimitry Andric case MVT::f32: 2356*0b57cec5SDimitry Andric return DAG.getConstantFP(1.44269504088896340735992468100189214f, SL, VT); 2357*0b57cec5SDimitry Andric case MVT::f16: 2358*0b57cec5SDimitry Andric return DAG.getConstantFP( 2359*0b57cec5SDimitry Andric APFloat(APFloat::IEEEhalf(), "1.44269504088896340735992468100189214"), 2360*0b57cec5SDimitry Andric SL, VT); 2361*0b57cec5SDimitry Andric case MVT::f64: 2362*0b57cec5SDimitry Andric return DAG.getConstantFP( 2363*0b57cec5SDimitry Andric APFloat(APFloat::IEEEdouble(), "0x1.71547652b82fep+0"), SL, VT); 2364*0b57cec5SDimitry Andric default: 2365*0b57cec5SDimitry Andric llvm_unreachable("unsupported fp type"); 2366*0b57cec5SDimitry Andric } 2367*0b57cec5SDimitry Andric } 2368*0b57cec5SDimitry Andric 2369*0b57cec5SDimitry Andric // exp2(M_LOG2E_F * f); 2370*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { 2371*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 2372*0b57cec5SDimitry Andric SDLoc SL(Op); 2373*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2374*0b57cec5SDimitry Andric 2375*0b57cec5SDimitry Andric const SDValue K = getLog2EVal(DAG, SL, VT); 2376*0b57cec5SDimitry Andric SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags()); 2377*0b57cec5SDimitry Andric return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags()); 2378*0b57cec5SDimitry Andric } 2379*0b57cec5SDimitry Andric 2380*0b57cec5SDimitry Andric static bool isCtlzOpc(unsigned Opc) { 2381*0b57cec5SDimitry Andric return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF; 2382*0b57cec5SDimitry Andric } 2383*0b57cec5SDimitry Andric 2384*0b57cec5SDimitry Andric static bool isCttzOpc(unsigned Opc) { 2385*0b57cec5SDimitry Andric return Opc == ISD::CTTZ || Opc == ISD::CTTZ_ZERO_UNDEF; 2386*0b57cec5SDimitry Andric } 2387*0b57cec5SDimitry Andric 2388*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const { 2389*0b57cec5SDimitry Andric SDLoc SL(Op); 2390*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2391*0b57cec5SDimitry Andric bool ZeroUndef = Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF || 2392*0b57cec5SDimitry Andric Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF; 2393*0b57cec5SDimitry Andric 2394*0b57cec5SDimitry Andric unsigned ISDOpc, NewOpc; 2395*0b57cec5SDimitry Andric if (isCtlzOpc(Op.getOpcode())) { 2396*0b57cec5SDimitry Andric ISDOpc = ISD::CTLZ_ZERO_UNDEF; 2397*0b57cec5SDimitry Andric NewOpc = AMDGPUISD::FFBH_U32; 2398*0b57cec5SDimitry Andric } else if (isCttzOpc(Op.getOpcode())) { 2399*0b57cec5SDimitry Andric ISDOpc = ISD::CTTZ_ZERO_UNDEF; 2400*0b57cec5SDimitry Andric NewOpc = AMDGPUISD::FFBL_B32; 2401*0b57cec5SDimitry Andric } else 2402*0b57cec5SDimitry Andric llvm_unreachable("Unexpected OPCode!!!"); 2403*0b57cec5SDimitry Andric 2404*0b57cec5SDimitry Andric 2405*0b57cec5SDimitry Andric if (ZeroUndef && Src.getValueType() == MVT::i32) 2406*0b57cec5SDimitry Andric return DAG.getNode(NewOpc, SL, MVT::i32, Src); 2407*0b57cec5SDimitry Andric 2408*0b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src); 2409*0b57cec5SDimitry Andric 2410*0b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 2411*0b57cec5SDimitry Andric const SDValue One = DAG.getConstant(1, SL, MVT::i32); 2412*0b57cec5SDimitry Andric 2413*0b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero); 2414*0b57cec5SDimitry Andric SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One); 2415*0b57cec5SDimitry Andric 2416*0b57cec5SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), 2417*0b57cec5SDimitry Andric *DAG.getContext(), MVT::i32); 2418*0b57cec5SDimitry Andric 2419*0b57cec5SDimitry Andric SDValue HiOrLo = isCtlzOpc(Op.getOpcode()) ? Hi : Lo; 2420*0b57cec5SDimitry Andric SDValue Hi0orLo0 = DAG.getSetCC(SL, SetCCVT, HiOrLo, Zero, ISD::SETEQ); 2421*0b57cec5SDimitry Andric 2422*0b57cec5SDimitry Andric SDValue OprLo = DAG.getNode(ISDOpc, SL, MVT::i32, Lo); 2423*0b57cec5SDimitry Andric SDValue OprHi = DAG.getNode(ISDOpc, SL, MVT::i32, Hi); 2424*0b57cec5SDimitry Andric 2425*0b57cec5SDimitry Andric const SDValue Bits32 = DAG.getConstant(32, SL, MVT::i32); 2426*0b57cec5SDimitry Andric SDValue Add, NewOpr; 2427*0b57cec5SDimitry Andric if (isCtlzOpc(Op.getOpcode())) { 2428*0b57cec5SDimitry Andric Add = DAG.getNode(ISD::ADD, SL, MVT::i32, OprLo, Bits32); 2429*0b57cec5SDimitry Andric // ctlz(x) = hi_32(x) == 0 ? ctlz(lo_32(x)) + 32 : ctlz(hi_32(x)) 2430*0b57cec5SDimitry Andric NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0orLo0, Add, OprHi); 2431*0b57cec5SDimitry Andric } else { 2432*0b57cec5SDimitry Andric Add = DAG.getNode(ISD::ADD, SL, MVT::i32, OprHi, Bits32); 2433*0b57cec5SDimitry Andric // cttz(x) = lo_32(x) == 0 ? cttz(hi_32(x)) + 32 : cttz(lo_32(x)) 2434*0b57cec5SDimitry Andric NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0orLo0, Add, OprLo); 2435*0b57cec5SDimitry Andric } 2436*0b57cec5SDimitry Andric 2437*0b57cec5SDimitry Andric if (!ZeroUndef) { 2438*0b57cec5SDimitry Andric // Test if the full 64-bit input is zero. 2439*0b57cec5SDimitry Andric 2440*0b57cec5SDimitry Andric // FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32, 2441*0b57cec5SDimitry Andric // which we probably don't want. 2442*0b57cec5SDimitry Andric SDValue LoOrHi = isCtlzOpc(Op.getOpcode()) ? Lo : Hi; 2443*0b57cec5SDimitry Andric SDValue Lo0OrHi0 = DAG.getSetCC(SL, SetCCVT, LoOrHi, Zero, ISD::SETEQ); 2444*0b57cec5SDimitry Andric SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0OrHi0, Hi0orLo0); 2445*0b57cec5SDimitry Andric 2446*0b57cec5SDimitry Andric // TODO: If i64 setcc is half rate, it can result in 1 fewer instruction 2447*0b57cec5SDimitry Andric // with the same cycles, otherwise it is slower. 2448*0b57cec5SDimitry Andric // SDValue SrcIsZero = DAG.getSetCC(SL, SetCCVT, Src, 2449*0b57cec5SDimitry Andric // DAG.getConstant(0, SL, MVT::i64), ISD::SETEQ); 2450*0b57cec5SDimitry Andric 2451*0b57cec5SDimitry Andric const SDValue Bits32 = DAG.getConstant(64, SL, MVT::i32); 2452*0b57cec5SDimitry Andric 2453*0b57cec5SDimitry Andric // The instruction returns -1 for 0 input, but the defined intrinsic 2454*0b57cec5SDimitry Andric // behavior is to return the number of bits. 2455*0b57cec5SDimitry Andric NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, 2456*0b57cec5SDimitry Andric SrcIsZero, Bits32, NewOpr); 2457*0b57cec5SDimitry Andric } 2458*0b57cec5SDimitry Andric 2459*0b57cec5SDimitry Andric return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr); 2460*0b57cec5SDimitry Andric } 2461*0b57cec5SDimitry Andric 2462*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, 2463*0b57cec5SDimitry Andric bool Signed) const { 2464*0b57cec5SDimitry Andric // Unsigned 2465*0b57cec5SDimitry Andric // cul2f(ulong u) 2466*0b57cec5SDimitry Andric //{ 2467*0b57cec5SDimitry Andric // uint lz = clz(u); 2468*0b57cec5SDimitry Andric // uint e = (u != 0) ? 127U + 63U - lz : 0; 2469*0b57cec5SDimitry Andric // u = (u << lz) & 0x7fffffffffffffffUL; 2470*0b57cec5SDimitry Andric // ulong t = u & 0xffffffffffUL; 2471*0b57cec5SDimitry Andric // uint v = (e << 23) | (uint)(u >> 40); 2472*0b57cec5SDimitry Andric // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U); 2473*0b57cec5SDimitry Andric // return as_float(v + r); 2474*0b57cec5SDimitry Andric //} 2475*0b57cec5SDimitry Andric // Signed 2476*0b57cec5SDimitry Andric // cl2f(long l) 2477*0b57cec5SDimitry Andric //{ 2478*0b57cec5SDimitry Andric // long s = l >> 63; 2479*0b57cec5SDimitry Andric // float r = cul2f((l + s) ^ s); 2480*0b57cec5SDimitry Andric // return s ? -r : r; 2481*0b57cec5SDimitry Andric //} 2482*0b57cec5SDimitry Andric 2483*0b57cec5SDimitry Andric SDLoc SL(Op); 2484*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2485*0b57cec5SDimitry Andric SDValue L = Src; 2486*0b57cec5SDimitry Andric 2487*0b57cec5SDimitry Andric SDValue S; 2488*0b57cec5SDimitry Andric if (Signed) { 2489*0b57cec5SDimitry Andric const SDValue SignBit = DAG.getConstant(63, SL, MVT::i64); 2490*0b57cec5SDimitry Andric S = DAG.getNode(ISD::SRA, SL, MVT::i64, L, SignBit); 2491*0b57cec5SDimitry Andric 2492*0b57cec5SDimitry Andric SDValue LPlusS = DAG.getNode(ISD::ADD, SL, MVT::i64, L, S); 2493*0b57cec5SDimitry Andric L = DAG.getNode(ISD::XOR, SL, MVT::i64, LPlusS, S); 2494*0b57cec5SDimitry Andric } 2495*0b57cec5SDimitry Andric 2496*0b57cec5SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), 2497*0b57cec5SDimitry Andric *DAG.getContext(), MVT::f32); 2498*0b57cec5SDimitry Andric 2499*0b57cec5SDimitry Andric 2500*0b57cec5SDimitry Andric SDValue ZeroI32 = DAG.getConstant(0, SL, MVT::i32); 2501*0b57cec5SDimitry Andric SDValue ZeroI64 = DAG.getConstant(0, SL, MVT::i64); 2502*0b57cec5SDimitry Andric SDValue LZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i64, L); 2503*0b57cec5SDimitry Andric LZ = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LZ); 2504*0b57cec5SDimitry Andric 2505*0b57cec5SDimitry Andric SDValue K = DAG.getConstant(127U + 63U, SL, MVT::i32); 2506*0b57cec5SDimitry Andric SDValue E = DAG.getSelect(SL, MVT::i32, 2507*0b57cec5SDimitry Andric DAG.getSetCC(SL, SetCCVT, L, ZeroI64, ISD::SETNE), 2508*0b57cec5SDimitry Andric DAG.getNode(ISD::SUB, SL, MVT::i32, K, LZ), 2509*0b57cec5SDimitry Andric ZeroI32); 2510*0b57cec5SDimitry Andric 2511*0b57cec5SDimitry Andric SDValue U = DAG.getNode(ISD::AND, SL, MVT::i64, 2512*0b57cec5SDimitry Andric DAG.getNode(ISD::SHL, SL, MVT::i64, L, LZ), 2513*0b57cec5SDimitry Andric DAG.getConstant((-1ULL) >> 1, SL, MVT::i64)); 2514*0b57cec5SDimitry Andric 2515*0b57cec5SDimitry Andric SDValue T = DAG.getNode(ISD::AND, SL, MVT::i64, U, 2516*0b57cec5SDimitry Andric DAG.getConstant(0xffffffffffULL, SL, MVT::i64)); 2517*0b57cec5SDimitry Andric 2518*0b57cec5SDimitry Andric SDValue UShl = DAG.getNode(ISD::SRL, SL, MVT::i64, 2519*0b57cec5SDimitry Andric U, DAG.getConstant(40, SL, MVT::i64)); 2520*0b57cec5SDimitry Andric 2521*0b57cec5SDimitry Andric SDValue V = DAG.getNode(ISD::OR, SL, MVT::i32, 2522*0b57cec5SDimitry Andric DAG.getNode(ISD::SHL, SL, MVT::i32, E, DAG.getConstant(23, SL, MVT::i32)), 2523*0b57cec5SDimitry Andric DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, UShl)); 2524*0b57cec5SDimitry Andric 2525*0b57cec5SDimitry Andric SDValue C = DAG.getConstant(0x8000000000ULL, SL, MVT::i64); 2526*0b57cec5SDimitry Andric SDValue RCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETUGT); 2527*0b57cec5SDimitry Andric SDValue TCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETEQ); 2528*0b57cec5SDimitry Andric 2529*0b57cec5SDimitry Andric SDValue One = DAG.getConstant(1, SL, MVT::i32); 2530*0b57cec5SDimitry Andric 2531*0b57cec5SDimitry Andric SDValue VTrunc1 = DAG.getNode(ISD::AND, SL, MVT::i32, V, One); 2532*0b57cec5SDimitry Andric 2533*0b57cec5SDimitry Andric SDValue R = DAG.getSelect(SL, MVT::i32, 2534*0b57cec5SDimitry Andric RCmp, 2535*0b57cec5SDimitry Andric One, 2536*0b57cec5SDimitry Andric DAG.getSelect(SL, MVT::i32, TCmp, VTrunc1, ZeroI32)); 2537*0b57cec5SDimitry Andric R = DAG.getNode(ISD::ADD, SL, MVT::i32, V, R); 2538*0b57cec5SDimitry Andric R = DAG.getNode(ISD::BITCAST, SL, MVT::f32, R); 2539*0b57cec5SDimitry Andric 2540*0b57cec5SDimitry Andric if (!Signed) 2541*0b57cec5SDimitry Andric return R; 2542*0b57cec5SDimitry Andric 2543*0b57cec5SDimitry Andric SDValue RNeg = DAG.getNode(ISD::FNEG, SL, MVT::f32, R); 2544*0b57cec5SDimitry Andric return DAG.getSelect(SL, MVT::f32, DAG.getSExtOrTrunc(S, SL, SetCCVT), RNeg, R); 2545*0b57cec5SDimitry Andric } 2546*0b57cec5SDimitry Andric 2547*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, 2548*0b57cec5SDimitry Andric bool Signed) const { 2549*0b57cec5SDimitry Andric SDLoc SL(Op); 2550*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2551*0b57cec5SDimitry Andric 2552*0b57cec5SDimitry Andric SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src); 2553*0b57cec5SDimitry Andric 2554*0b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC, 2555*0b57cec5SDimitry Andric DAG.getConstant(0, SL, MVT::i32)); 2556*0b57cec5SDimitry Andric SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC, 2557*0b57cec5SDimitry Andric DAG.getConstant(1, SL, MVT::i32)); 2558*0b57cec5SDimitry Andric 2559*0b57cec5SDimitry Andric SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP, 2560*0b57cec5SDimitry Andric SL, MVT::f64, Hi); 2561*0b57cec5SDimitry Andric 2562*0b57cec5SDimitry Andric SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo); 2563*0b57cec5SDimitry Andric 2564*0b57cec5SDimitry Andric SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi, 2565*0b57cec5SDimitry Andric DAG.getConstant(32, SL, MVT::i32)); 2566*0b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 2567*0b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo); 2568*0b57cec5SDimitry Andric } 2569*0b57cec5SDimitry Andric 2570*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, 2571*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 2572*0b57cec5SDimitry Andric assert(Op.getOperand(0).getValueType() == MVT::i64 && 2573*0b57cec5SDimitry Andric "operation should be legal"); 2574*0b57cec5SDimitry Andric 2575*0b57cec5SDimitry Andric // TODO: Factor out code common with LowerSINT_TO_FP. 2576*0b57cec5SDimitry Andric 2577*0b57cec5SDimitry Andric EVT DestVT = Op.getValueType(); 2578*0b57cec5SDimitry Andric if (Subtarget->has16BitInsts() && DestVT == MVT::f16) { 2579*0b57cec5SDimitry Andric SDLoc DL(Op); 2580*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2581*0b57cec5SDimitry Andric 2582*0b57cec5SDimitry Andric SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src); 2583*0b57cec5SDimitry Andric SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op)); 2584*0b57cec5SDimitry Andric SDValue FPRound = 2585*0b57cec5SDimitry Andric DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag); 2586*0b57cec5SDimitry Andric 2587*0b57cec5SDimitry Andric return FPRound; 2588*0b57cec5SDimitry Andric } 2589*0b57cec5SDimitry Andric 2590*0b57cec5SDimitry Andric if (DestVT == MVT::f32) 2591*0b57cec5SDimitry Andric return LowerINT_TO_FP32(Op, DAG, false); 2592*0b57cec5SDimitry Andric 2593*0b57cec5SDimitry Andric assert(DestVT == MVT::f64); 2594*0b57cec5SDimitry Andric return LowerINT_TO_FP64(Op, DAG, false); 2595*0b57cec5SDimitry Andric } 2596*0b57cec5SDimitry Andric 2597*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op, 2598*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 2599*0b57cec5SDimitry Andric assert(Op.getOperand(0).getValueType() == MVT::i64 && 2600*0b57cec5SDimitry Andric "operation should be legal"); 2601*0b57cec5SDimitry Andric 2602*0b57cec5SDimitry Andric // TODO: Factor out code common with LowerUINT_TO_FP. 2603*0b57cec5SDimitry Andric 2604*0b57cec5SDimitry Andric EVT DestVT = Op.getValueType(); 2605*0b57cec5SDimitry Andric if (Subtarget->has16BitInsts() && DestVT == MVT::f16) { 2606*0b57cec5SDimitry Andric SDLoc DL(Op); 2607*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2608*0b57cec5SDimitry Andric 2609*0b57cec5SDimitry Andric SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src); 2610*0b57cec5SDimitry Andric SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op)); 2611*0b57cec5SDimitry Andric SDValue FPRound = 2612*0b57cec5SDimitry Andric DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag); 2613*0b57cec5SDimitry Andric 2614*0b57cec5SDimitry Andric return FPRound; 2615*0b57cec5SDimitry Andric } 2616*0b57cec5SDimitry Andric 2617*0b57cec5SDimitry Andric if (DestVT == MVT::f32) 2618*0b57cec5SDimitry Andric return LowerINT_TO_FP32(Op, DAG, true); 2619*0b57cec5SDimitry Andric 2620*0b57cec5SDimitry Andric assert(DestVT == MVT::f64); 2621*0b57cec5SDimitry Andric return LowerINT_TO_FP64(Op, DAG, true); 2622*0b57cec5SDimitry Andric } 2623*0b57cec5SDimitry Andric 2624*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, 2625*0b57cec5SDimitry Andric bool Signed) const { 2626*0b57cec5SDimitry Andric SDLoc SL(Op); 2627*0b57cec5SDimitry Andric 2628*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2629*0b57cec5SDimitry Andric 2630*0b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); 2631*0b57cec5SDimitry Andric 2632*0b57cec5SDimitry Andric SDValue K0 = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), SL, 2633*0b57cec5SDimitry Andric MVT::f64); 2634*0b57cec5SDimitry Andric SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL, 2635*0b57cec5SDimitry Andric MVT::f64); 2636*0b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 2637*0b57cec5SDimitry Andric SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0); 2638*0b57cec5SDimitry Andric 2639*0b57cec5SDimitry Andric SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul); 2640*0b57cec5SDimitry Andric 2641*0b57cec5SDimitry Andric 2642*0b57cec5SDimitry Andric SDValue Fma = DAG.getNode(ISD::FMA, SL, MVT::f64, FloorMul, K1, Trunc); 2643*0b57cec5SDimitry Andric 2644*0b57cec5SDimitry Andric SDValue Hi = DAG.getNode(Signed ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, SL, 2645*0b57cec5SDimitry Andric MVT::i32, FloorMul); 2646*0b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma); 2647*0b57cec5SDimitry Andric 2648*0b57cec5SDimitry Andric SDValue Result = DAG.getBuildVector(MVT::v2i32, SL, {Lo, Hi}); 2649*0b57cec5SDimitry Andric 2650*0b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Result); 2651*0b57cec5SDimitry Andric } 2652*0b57cec5SDimitry Andric 2653*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const { 2654*0b57cec5SDimitry Andric SDLoc DL(Op); 2655*0b57cec5SDimitry Andric SDValue N0 = Op.getOperand(0); 2656*0b57cec5SDimitry Andric 2657*0b57cec5SDimitry Andric // Convert to target node to get known bits 2658*0b57cec5SDimitry Andric if (N0.getValueType() == MVT::f32) 2659*0b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0); 2660*0b57cec5SDimitry Andric 2661*0b57cec5SDimitry Andric if (getTargetMachine().Options.UnsafeFPMath) { 2662*0b57cec5SDimitry Andric // There is a generic expand for FP_TO_FP16 with unsafe fast math. 2663*0b57cec5SDimitry Andric return SDValue(); 2664*0b57cec5SDimitry Andric } 2665*0b57cec5SDimitry Andric 2666*0b57cec5SDimitry Andric assert(N0.getSimpleValueType() == MVT::f64); 2667*0b57cec5SDimitry Andric 2668*0b57cec5SDimitry Andric // f64 -> f16 conversion using round-to-nearest-even rounding mode. 2669*0b57cec5SDimitry Andric const unsigned ExpMask = 0x7ff; 2670*0b57cec5SDimitry Andric const unsigned ExpBiasf64 = 1023; 2671*0b57cec5SDimitry Andric const unsigned ExpBiasf16 = 15; 2672*0b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, DL, MVT::i32); 2673*0b57cec5SDimitry Andric SDValue One = DAG.getConstant(1, DL, MVT::i32); 2674*0b57cec5SDimitry Andric SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, N0); 2675*0b57cec5SDimitry Andric SDValue UH = DAG.getNode(ISD::SRL, DL, MVT::i64, U, 2676*0b57cec5SDimitry Andric DAG.getConstant(32, DL, MVT::i64)); 2677*0b57cec5SDimitry Andric UH = DAG.getZExtOrTrunc(UH, DL, MVT::i32); 2678*0b57cec5SDimitry Andric U = DAG.getZExtOrTrunc(U, DL, MVT::i32); 2679*0b57cec5SDimitry Andric SDValue E = DAG.getNode(ISD::SRL, DL, MVT::i32, UH, 2680*0b57cec5SDimitry Andric DAG.getConstant(20, DL, MVT::i64)); 2681*0b57cec5SDimitry Andric E = DAG.getNode(ISD::AND, DL, MVT::i32, E, 2682*0b57cec5SDimitry Andric DAG.getConstant(ExpMask, DL, MVT::i32)); 2683*0b57cec5SDimitry Andric // Subtract the fp64 exponent bias (1023) to get the real exponent and 2684*0b57cec5SDimitry Andric // add the f16 bias (15) to get the biased exponent for the f16 format. 2685*0b57cec5SDimitry Andric E = DAG.getNode(ISD::ADD, DL, MVT::i32, E, 2686*0b57cec5SDimitry Andric DAG.getConstant(-ExpBiasf64 + ExpBiasf16, DL, MVT::i32)); 2687*0b57cec5SDimitry Andric 2688*0b57cec5SDimitry Andric SDValue M = DAG.getNode(ISD::SRL, DL, MVT::i32, UH, 2689*0b57cec5SDimitry Andric DAG.getConstant(8, DL, MVT::i32)); 2690*0b57cec5SDimitry Andric M = DAG.getNode(ISD::AND, DL, MVT::i32, M, 2691*0b57cec5SDimitry Andric DAG.getConstant(0xffe, DL, MVT::i32)); 2692*0b57cec5SDimitry Andric 2693*0b57cec5SDimitry Andric SDValue MaskedSig = DAG.getNode(ISD::AND, DL, MVT::i32, UH, 2694*0b57cec5SDimitry Andric DAG.getConstant(0x1ff, DL, MVT::i32)); 2695*0b57cec5SDimitry Andric MaskedSig = DAG.getNode(ISD::OR, DL, MVT::i32, MaskedSig, U); 2696*0b57cec5SDimitry Andric 2697*0b57cec5SDimitry Andric SDValue Lo40Set = DAG.getSelectCC(DL, MaskedSig, Zero, Zero, One, ISD::SETEQ); 2698*0b57cec5SDimitry Andric M = DAG.getNode(ISD::OR, DL, MVT::i32, M, Lo40Set); 2699*0b57cec5SDimitry Andric 2700*0b57cec5SDimitry Andric // (M != 0 ? 0x0200 : 0) | 0x7c00; 2701*0b57cec5SDimitry Andric SDValue I = DAG.getNode(ISD::OR, DL, MVT::i32, 2702*0b57cec5SDimitry Andric DAG.getSelectCC(DL, M, Zero, DAG.getConstant(0x0200, DL, MVT::i32), 2703*0b57cec5SDimitry Andric Zero, ISD::SETNE), DAG.getConstant(0x7c00, DL, MVT::i32)); 2704*0b57cec5SDimitry Andric 2705*0b57cec5SDimitry Andric // N = M | (E << 12); 2706*0b57cec5SDimitry Andric SDValue N = DAG.getNode(ISD::OR, DL, MVT::i32, M, 2707*0b57cec5SDimitry Andric DAG.getNode(ISD::SHL, DL, MVT::i32, E, 2708*0b57cec5SDimitry Andric DAG.getConstant(12, DL, MVT::i32))); 2709*0b57cec5SDimitry Andric 2710*0b57cec5SDimitry Andric // B = clamp(1-E, 0, 13); 2711*0b57cec5SDimitry Andric SDValue OneSubExp = DAG.getNode(ISD::SUB, DL, MVT::i32, 2712*0b57cec5SDimitry Andric One, E); 2713*0b57cec5SDimitry Andric SDValue B = DAG.getNode(ISD::SMAX, DL, MVT::i32, OneSubExp, Zero); 2714*0b57cec5SDimitry Andric B = DAG.getNode(ISD::SMIN, DL, MVT::i32, B, 2715*0b57cec5SDimitry Andric DAG.getConstant(13, DL, MVT::i32)); 2716*0b57cec5SDimitry Andric 2717*0b57cec5SDimitry Andric SDValue SigSetHigh = DAG.getNode(ISD::OR, DL, MVT::i32, M, 2718*0b57cec5SDimitry Andric DAG.getConstant(0x1000, DL, MVT::i32)); 2719*0b57cec5SDimitry Andric 2720*0b57cec5SDimitry Andric SDValue D = DAG.getNode(ISD::SRL, DL, MVT::i32, SigSetHigh, B); 2721*0b57cec5SDimitry Andric SDValue D0 = DAG.getNode(ISD::SHL, DL, MVT::i32, D, B); 2722*0b57cec5SDimitry Andric SDValue D1 = DAG.getSelectCC(DL, D0, SigSetHigh, One, Zero, ISD::SETNE); 2723*0b57cec5SDimitry Andric D = DAG.getNode(ISD::OR, DL, MVT::i32, D, D1); 2724*0b57cec5SDimitry Andric 2725*0b57cec5SDimitry Andric SDValue V = DAG.getSelectCC(DL, E, One, D, N, ISD::SETLT); 2726*0b57cec5SDimitry Andric SDValue VLow3 = DAG.getNode(ISD::AND, DL, MVT::i32, V, 2727*0b57cec5SDimitry Andric DAG.getConstant(0x7, DL, MVT::i32)); 2728*0b57cec5SDimitry Andric V = DAG.getNode(ISD::SRL, DL, MVT::i32, V, 2729*0b57cec5SDimitry Andric DAG.getConstant(2, DL, MVT::i32)); 2730*0b57cec5SDimitry Andric SDValue V0 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(3, DL, MVT::i32), 2731*0b57cec5SDimitry Andric One, Zero, ISD::SETEQ); 2732*0b57cec5SDimitry Andric SDValue V1 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(5, DL, MVT::i32), 2733*0b57cec5SDimitry Andric One, Zero, ISD::SETGT); 2734*0b57cec5SDimitry Andric V1 = DAG.getNode(ISD::OR, DL, MVT::i32, V0, V1); 2735*0b57cec5SDimitry Andric V = DAG.getNode(ISD::ADD, DL, MVT::i32, V, V1); 2736*0b57cec5SDimitry Andric 2737*0b57cec5SDimitry Andric V = DAG.getSelectCC(DL, E, DAG.getConstant(30, DL, MVT::i32), 2738*0b57cec5SDimitry Andric DAG.getConstant(0x7c00, DL, MVT::i32), V, ISD::SETGT); 2739*0b57cec5SDimitry Andric V = DAG.getSelectCC(DL, E, DAG.getConstant(1039, DL, MVT::i32), 2740*0b57cec5SDimitry Andric I, V, ISD::SETEQ); 2741*0b57cec5SDimitry Andric 2742*0b57cec5SDimitry Andric // Extract the sign bit. 2743*0b57cec5SDimitry Andric SDValue Sign = DAG.getNode(ISD::SRL, DL, MVT::i32, UH, 2744*0b57cec5SDimitry Andric DAG.getConstant(16, DL, MVT::i32)); 2745*0b57cec5SDimitry Andric Sign = DAG.getNode(ISD::AND, DL, MVT::i32, Sign, 2746*0b57cec5SDimitry Andric DAG.getConstant(0x8000, DL, MVT::i32)); 2747*0b57cec5SDimitry Andric 2748*0b57cec5SDimitry Andric V = DAG.getNode(ISD::OR, DL, MVT::i32, Sign, V); 2749*0b57cec5SDimitry Andric return DAG.getZExtOrTrunc(V, DL, Op.getValueType()); 2750*0b57cec5SDimitry Andric } 2751*0b57cec5SDimitry Andric 2752*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_SINT(SDValue Op, 2753*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 2754*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2755*0b57cec5SDimitry Andric 2756*0b57cec5SDimitry Andric // TODO: Factor out code common with LowerFP_TO_UINT. 2757*0b57cec5SDimitry Andric 2758*0b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 2759*0b57cec5SDimitry Andric if (Subtarget->has16BitInsts() && SrcVT == MVT::f16) { 2760*0b57cec5SDimitry Andric SDLoc DL(Op); 2761*0b57cec5SDimitry Andric 2762*0b57cec5SDimitry Andric SDValue FPExtend = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src); 2763*0b57cec5SDimitry Andric SDValue FpToInt32 = 2764*0b57cec5SDimitry Andric DAG.getNode(Op.getOpcode(), DL, MVT::i64, FPExtend); 2765*0b57cec5SDimitry Andric 2766*0b57cec5SDimitry Andric return FpToInt32; 2767*0b57cec5SDimitry Andric } 2768*0b57cec5SDimitry Andric 2769*0b57cec5SDimitry Andric if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64) 2770*0b57cec5SDimitry Andric return LowerFP64_TO_INT(Op, DAG, true); 2771*0b57cec5SDimitry Andric 2772*0b57cec5SDimitry Andric return SDValue(); 2773*0b57cec5SDimitry Andric } 2774*0b57cec5SDimitry Andric 2775*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_UINT(SDValue Op, 2776*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 2777*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2778*0b57cec5SDimitry Andric 2779*0b57cec5SDimitry Andric // TODO: Factor out code common with LowerFP_TO_SINT. 2780*0b57cec5SDimitry Andric 2781*0b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 2782*0b57cec5SDimitry Andric if (Subtarget->has16BitInsts() && SrcVT == MVT::f16) { 2783*0b57cec5SDimitry Andric SDLoc DL(Op); 2784*0b57cec5SDimitry Andric 2785*0b57cec5SDimitry Andric SDValue FPExtend = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src); 2786*0b57cec5SDimitry Andric SDValue FpToInt32 = 2787*0b57cec5SDimitry Andric DAG.getNode(Op.getOpcode(), DL, MVT::i64, FPExtend); 2788*0b57cec5SDimitry Andric 2789*0b57cec5SDimitry Andric return FpToInt32; 2790*0b57cec5SDimitry Andric } 2791*0b57cec5SDimitry Andric 2792*0b57cec5SDimitry Andric if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64) 2793*0b57cec5SDimitry Andric return LowerFP64_TO_INT(Op, DAG, false); 2794*0b57cec5SDimitry Andric 2795*0b57cec5SDimitry Andric return SDValue(); 2796*0b57cec5SDimitry Andric } 2797*0b57cec5SDimitry Andric 2798*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 2799*0b57cec5SDimitry Andric SelectionDAG &DAG) const { 2800*0b57cec5SDimitry Andric EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 2801*0b57cec5SDimitry Andric MVT VT = Op.getSimpleValueType(); 2802*0b57cec5SDimitry Andric MVT ScalarVT = VT.getScalarType(); 2803*0b57cec5SDimitry Andric 2804*0b57cec5SDimitry Andric assert(VT.isVector()); 2805*0b57cec5SDimitry Andric 2806*0b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2807*0b57cec5SDimitry Andric SDLoc DL(Op); 2808*0b57cec5SDimitry Andric 2809*0b57cec5SDimitry Andric // TODO: Don't scalarize on Evergreen? 2810*0b57cec5SDimitry Andric unsigned NElts = VT.getVectorNumElements(); 2811*0b57cec5SDimitry Andric SmallVector<SDValue, 8> Args; 2812*0b57cec5SDimitry Andric DAG.ExtractVectorElements(Src, Args, 0, NElts); 2813*0b57cec5SDimitry Andric 2814*0b57cec5SDimitry Andric SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); 2815*0b57cec5SDimitry Andric for (unsigned I = 0; I < NElts; ++I) 2816*0b57cec5SDimitry Andric Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); 2817*0b57cec5SDimitry Andric 2818*0b57cec5SDimitry Andric return DAG.getBuildVector(VT, DL, Args); 2819*0b57cec5SDimitry Andric } 2820*0b57cec5SDimitry Andric 2821*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 2822*0b57cec5SDimitry Andric // Custom DAG optimizations 2823*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 2824*0b57cec5SDimitry Andric 2825*0b57cec5SDimitry Andric static bool isU24(SDValue Op, SelectionDAG &DAG) { 2826*0b57cec5SDimitry Andric return AMDGPUTargetLowering::numBitsUnsigned(Op, DAG) <= 24; 2827*0b57cec5SDimitry Andric } 2828*0b57cec5SDimitry Andric 2829*0b57cec5SDimitry Andric static bool isI24(SDValue Op, SelectionDAG &DAG) { 2830*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 2831*0b57cec5SDimitry Andric return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated 2832*0b57cec5SDimitry Andric // as unsigned 24-bit values. 2833*0b57cec5SDimitry Andric AMDGPUTargetLowering::numBitsSigned(Op, DAG) < 24; 2834*0b57cec5SDimitry Andric } 2835*0b57cec5SDimitry Andric 2836*0b57cec5SDimitry Andric static SDValue simplifyI24(SDNode *Node24, 2837*0b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) { 2838*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 2839*0b57cec5SDimitry Andric SDValue LHS = Node24->getOperand(0); 2840*0b57cec5SDimitry Andric SDValue RHS = Node24->getOperand(1); 2841*0b57cec5SDimitry Andric 2842*0b57cec5SDimitry Andric APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24); 2843*0b57cec5SDimitry Andric 2844*0b57cec5SDimitry Andric // First try to simplify using GetDemandedBits which allows the operands to 2845*0b57cec5SDimitry Andric // have other uses, but will only perform simplifications that involve 2846*0b57cec5SDimitry Andric // bypassing some nodes for this user. 2847*0b57cec5SDimitry Andric SDValue DemandedLHS = DAG.GetDemandedBits(LHS, Demanded); 2848*0b57cec5SDimitry Andric SDValue DemandedRHS = DAG.GetDemandedBits(RHS, Demanded); 2849*0b57cec5SDimitry Andric if (DemandedLHS || DemandedRHS) 2850*0b57cec5SDimitry Andric return DAG.getNode(Node24->getOpcode(), SDLoc(Node24), Node24->getVTList(), 2851*0b57cec5SDimitry Andric DemandedLHS ? DemandedLHS : LHS, 2852*0b57cec5SDimitry Andric DemandedRHS ? DemandedRHS : RHS); 2853*0b57cec5SDimitry Andric 2854*0b57cec5SDimitry Andric // Now try SimplifyDemandedBits which can simplify the nodes used by our 2855*0b57cec5SDimitry Andric // operands if this node is the only user. 2856*0b57cec5SDimitry Andric const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2857*0b57cec5SDimitry Andric if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI)) 2858*0b57cec5SDimitry Andric return SDValue(Node24, 0); 2859*0b57cec5SDimitry Andric if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI)) 2860*0b57cec5SDimitry Andric return SDValue(Node24, 0); 2861*0b57cec5SDimitry Andric 2862*0b57cec5SDimitry Andric return SDValue(); 2863*0b57cec5SDimitry Andric } 2864*0b57cec5SDimitry Andric 2865*0b57cec5SDimitry Andric template <typename IntTy> 2866*0b57cec5SDimitry Andric static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset, 2867*0b57cec5SDimitry Andric uint32_t Width, const SDLoc &DL) { 2868*0b57cec5SDimitry Andric if (Width + Offset < 32) { 2869*0b57cec5SDimitry Andric uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width); 2870*0b57cec5SDimitry Andric IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width); 2871*0b57cec5SDimitry Andric return DAG.getConstant(Result, DL, MVT::i32); 2872*0b57cec5SDimitry Andric } 2873*0b57cec5SDimitry Andric 2874*0b57cec5SDimitry Andric return DAG.getConstant(Src0 >> Offset, DL, MVT::i32); 2875*0b57cec5SDimitry Andric } 2876*0b57cec5SDimitry Andric 2877*0b57cec5SDimitry Andric static bool hasVolatileUser(SDNode *Val) { 2878*0b57cec5SDimitry Andric for (SDNode *U : Val->uses()) { 2879*0b57cec5SDimitry Andric if (MemSDNode *M = dyn_cast<MemSDNode>(U)) { 2880*0b57cec5SDimitry Andric if (M->isVolatile()) 2881*0b57cec5SDimitry Andric return true; 2882*0b57cec5SDimitry Andric } 2883*0b57cec5SDimitry Andric } 2884*0b57cec5SDimitry Andric 2885*0b57cec5SDimitry Andric return false; 2886*0b57cec5SDimitry Andric } 2887*0b57cec5SDimitry Andric 2888*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const { 2889*0b57cec5SDimitry Andric // i32 vectors are the canonical memory type. 2890*0b57cec5SDimitry Andric if (VT.getScalarType() == MVT::i32 || isTypeLegal(VT)) 2891*0b57cec5SDimitry Andric return false; 2892*0b57cec5SDimitry Andric 2893*0b57cec5SDimitry Andric if (!VT.isByteSized()) 2894*0b57cec5SDimitry Andric return false; 2895*0b57cec5SDimitry Andric 2896*0b57cec5SDimitry Andric unsigned Size = VT.getStoreSize(); 2897*0b57cec5SDimitry Andric 2898*0b57cec5SDimitry Andric if ((Size == 1 || Size == 2 || Size == 4) && !VT.isVector()) 2899*0b57cec5SDimitry Andric return false; 2900*0b57cec5SDimitry Andric 2901*0b57cec5SDimitry Andric if (Size == 3 || (Size > 4 && (Size % 4 != 0))) 2902*0b57cec5SDimitry Andric return false; 2903*0b57cec5SDimitry Andric 2904*0b57cec5SDimitry Andric return true; 2905*0b57cec5SDimitry Andric } 2906*0b57cec5SDimitry Andric 2907*0b57cec5SDimitry Andric // Find a load or store from corresponding pattern root. 2908*0b57cec5SDimitry Andric // Roots may be build_vector, bitconvert or their combinations. 2909*0b57cec5SDimitry Andric static MemSDNode* findMemSDNode(SDNode *N) { 2910*0b57cec5SDimitry Andric N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode(); 2911*0b57cec5SDimitry Andric if (MemSDNode *MN = dyn_cast<MemSDNode>(N)) 2912*0b57cec5SDimitry Andric return MN; 2913*0b57cec5SDimitry Andric assert(isa<BuildVectorSDNode>(N)); 2914*0b57cec5SDimitry Andric for (SDValue V : N->op_values()) 2915*0b57cec5SDimitry Andric if (MemSDNode *MN = 2916*0b57cec5SDimitry Andric dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V))) 2917*0b57cec5SDimitry Andric return MN; 2918*0b57cec5SDimitry Andric llvm_unreachable("cannot find MemSDNode in the pattern!"); 2919*0b57cec5SDimitry Andric } 2920*0b57cec5SDimitry Andric 2921*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned, 2922*0b57cec5SDimitry Andric SelectionDAG &DAG, 2923*0b57cec5SDimitry Andric SDNode *N, 2924*0b57cec5SDimitry Andric SDValue Addr, 2925*0b57cec5SDimitry Andric SDValue &VAddr, 2926*0b57cec5SDimitry Andric SDValue &Offset, 2927*0b57cec5SDimitry Andric SDValue &SLC) const { 2928*0b57cec5SDimitry Andric const GCNSubtarget &ST = 2929*0b57cec5SDimitry Andric DAG.getMachineFunction().getSubtarget<GCNSubtarget>(); 2930*0b57cec5SDimitry Andric int64_t OffsetVal = 0; 2931*0b57cec5SDimitry Andric 2932*0b57cec5SDimitry Andric if (ST.hasFlatInstOffsets() && 2933*0b57cec5SDimitry Andric (!ST.hasFlatSegmentOffsetBug() || 2934*0b57cec5SDimitry Andric findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) && 2935*0b57cec5SDimitry Andric DAG.isBaseWithConstantOffset(Addr)) { 2936*0b57cec5SDimitry Andric SDValue N0 = Addr.getOperand(0); 2937*0b57cec5SDimitry Andric SDValue N1 = Addr.getOperand(1); 2938*0b57cec5SDimitry Andric int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); 2939*0b57cec5SDimitry Andric 2940*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 2941*0b57cec5SDimitry Andric if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(), 2942*0b57cec5SDimitry Andric IsSigned)) { 2943*0b57cec5SDimitry Andric Addr = N0; 2944*0b57cec5SDimitry Andric OffsetVal = COffsetVal; 2945*0b57cec5SDimitry Andric } 2946*0b57cec5SDimitry Andric } 2947*0b57cec5SDimitry Andric 2948*0b57cec5SDimitry Andric VAddr = Addr; 2949*0b57cec5SDimitry Andric Offset = DAG.getTargetConstant(OffsetVal, SDLoc(), MVT::i16); 2950*0b57cec5SDimitry Andric SLC = DAG.getTargetConstant(0, SDLoc(), MVT::i1); 2951*0b57cec5SDimitry Andric 2952*0b57cec5SDimitry Andric return true; 2953*0b57cec5SDimitry Andric } 2954*0b57cec5SDimitry Andric 2955*0b57cec5SDimitry Andric // Replace load of an illegal type with a store of a bitcast to a friendlier 2956*0b57cec5SDimitry Andric // type. 2957*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N, 2958*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 2959*0b57cec5SDimitry Andric if (!DCI.isBeforeLegalize()) 2960*0b57cec5SDimitry Andric return SDValue(); 2961*0b57cec5SDimitry Andric 2962*0b57cec5SDimitry Andric LoadSDNode *LN = cast<LoadSDNode>(N); 2963*0b57cec5SDimitry Andric if (LN->isVolatile() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN)) 2964*0b57cec5SDimitry Andric return SDValue(); 2965*0b57cec5SDimitry Andric 2966*0b57cec5SDimitry Andric SDLoc SL(N); 2967*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 2968*0b57cec5SDimitry Andric EVT VT = LN->getMemoryVT(); 2969*0b57cec5SDimitry Andric 2970*0b57cec5SDimitry Andric unsigned Size = VT.getStoreSize(); 2971*0b57cec5SDimitry Andric unsigned Align = LN->getAlignment(); 2972*0b57cec5SDimitry Andric if (Align < Size && isTypeLegal(VT)) { 2973*0b57cec5SDimitry Andric bool IsFast; 2974*0b57cec5SDimitry Andric unsigned AS = LN->getAddressSpace(); 2975*0b57cec5SDimitry Andric 2976*0b57cec5SDimitry Andric // Expand unaligned loads earlier than legalization. Due to visitation order 2977*0b57cec5SDimitry Andric // problems during legalization, the emitted instructions to pack and unpack 2978*0b57cec5SDimitry Andric // the bytes again are not eliminated in the case of an unaligned copy. 2979*0b57cec5SDimitry Andric if (!allowsMisalignedMemoryAccesses( 2980*0b57cec5SDimitry Andric VT, AS, Align, LN->getMemOperand()->getFlags(), &IsFast)) { 2981*0b57cec5SDimitry Andric if (VT.isVector()) 2982*0b57cec5SDimitry Andric return scalarizeVectorLoad(LN, DAG); 2983*0b57cec5SDimitry Andric 2984*0b57cec5SDimitry Andric SDValue Ops[2]; 2985*0b57cec5SDimitry Andric std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG); 2986*0b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SDLoc(N)); 2987*0b57cec5SDimitry Andric } 2988*0b57cec5SDimitry Andric 2989*0b57cec5SDimitry Andric if (!IsFast) 2990*0b57cec5SDimitry Andric return SDValue(); 2991*0b57cec5SDimitry Andric } 2992*0b57cec5SDimitry Andric 2993*0b57cec5SDimitry Andric if (!shouldCombineMemoryType(VT)) 2994*0b57cec5SDimitry Andric return SDValue(); 2995*0b57cec5SDimitry Andric 2996*0b57cec5SDimitry Andric EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT); 2997*0b57cec5SDimitry Andric 2998*0b57cec5SDimitry Andric SDValue NewLoad 2999*0b57cec5SDimitry Andric = DAG.getLoad(NewVT, SL, LN->getChain(), 3000*0b57cec5SDimitry Andric LN->getBasePtr(), LN->getMemOperand()); 3001*0b57cec5SDimitry Andric 3002*0b57cec5SDimitry Andric SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad); 3003*0b57cec5SDimitry Andric DCI.CombineTo(N, BC, NewLoad.getValue(1)); 3004*0b57cec5SDimitry Andric return SDValue(N, 0); 3005*0b57cec5SDimitry Andric } 3006*0b57cec5SDimitry Andric 3007*0b57cec5SDimitry Andric // Replace store of an illegal type with a store of a bitcast to a friendlier 3008*0b57cec5SDimitry Andric // type. 3009*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N, 3010*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3011*0b57cec5SDimitry Andric if (!DCI.isBeforeLegalize()) 3012*0b57cec5SDimitry Andric return SDValue(); 3013*0b57cec5SDimitry Andric 3014*0b57cec5SDimitry Andric StoreSDNode *SN = cast<StoreSDNode>(N); 3015*0b57cec5SDimitry Andric if (SN->isVolatile() || !ISD::isNormalStore(SN)) 3016*0b57cec5SDimitry Andric return SDValue(); 3017*0b57cec5SDimitry Andric 3018*0b57cec5SDimitry Andric EVT VT = SN->getMemoryVT(); 3019*0b57cec5SDimitry Andric unsigned Size = VT.getStoreSize(); 3020*0b57cec5SDimitry Andric 3021*0b57cec5SDimitry Andric SDLoc SL(N); 3022*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3023*0b57cec5SDimitry Andric unsigned Align = SN->getAlignment(); 3024*0b57cec5SDimitry Andric if (Align < Size && isTypeLegal(VT)) { 3025*0b57cec5SDimitry Andric bool IsFast; 3026*0b57cec5SDimitry Andric unsigned AS = SN->getAddressSpace(); 3027*0b57cec5SDimitry Andric 3028*0b57cec5SDimitry Andric // Expand unaligned stores earlier than legalization. Due to visitation 3029*0b57cec5SDimitry Andric // order problems during legalization, the emitted instructions to pack and 3030*0b57cec5SDimitry Andric // unpack the bytes again are not eliminated in the case of an unaligned 3031*0b57cec5SDimitry Andric // copy. 3032*0b57cec5SDimitry Andric if (!allowsMisalignedMemoryAccesses( 3033*0b57cec5SDimitry Andric VT, AS, Align, SN->getMemOperand()->getFlags(), &IsFast)) { 3034*0b57cec5SDimitry Andric if (VT.isVector()) 3035*0b57cec5SDimitry Andric return scalarizeVectorStore(SN, DAG); 3036*0b57cec5SDimitry Andric 3037*0b57cec5SDimitry Andric return expandUnalignedStore(SN, DAG); 3038*0b57cec5SDimitry Andric } 3039*0b57cec5SDimitry Andric 3040*0b57cec5SDimitry Andric if (!IsFast) 3041*0b57cec5SDimitry Andric return SDValue(); 3042*0b57cec5SDimitry Andric } 3043*0b57cec5SDimitry Andric 3044*0b57cec5SDimitry Andric if (!shouldCombineMemoryType(VT)) 3045*0b57cec5SDimitry Andric return SDValue(); 3046*0b57cec5SDimitry Andric 3047*0b57cec5SDimitry Andric EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT); 3048*0b57cec5SDimitry Andric SDValue Val = SN->getValue(); 3049*0b57cec5SDimitry Andric 3050*0b57cec5SDimitry Andric //DCI.AddToWorklist(Val.getNode()); 3051*0b57cec5SDimitry Andric 3052*0b57cec5SDimitry Andric bool OtherUses = !Val.hasOneUse(); 3053*0b57cec5SDimitry Andric SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val); 3054*0b57cec5SDimitry Andric if (OtherUses) { 3055*0b57cec5SDimitry Andric SDValue CastBack = DAG.getNode(ISD::BITCAST, SL, VT, CastVal); 3056*0b57cec5SDimitry Andric DAG.ReplaceAllUsesOfValueWith(Val, CastBack); 3057*0b57cec5SDimitry Andric } 3058*0b57cec5SDimitry Andric 3059*0b57cec5SDimitry Andric return DAG.getStore(SN->getChain(), SL, CastVal, 3060*0b57cec5SDimitry Andric SN->getBasePtr(), SN->getMemOperand()); 3061*0b57cec5SDimitry Andric } 3062*0b57cec5SDimitry Andric 3063*0b57cec5SDimitry Andric // FIXME: This should go in generic DAG combiner with an isTruncateFree check, 3064*0b57cec5SDimitry Andric // but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU 3065*0b57cec5SDimitry Andric // issues. 3066*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N, 3067*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3068*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3069*0b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 3070*0b57cec5SDimitry Andric 3071*0b57cec5SDimitry Andric // (vt2 (assertzext (truncate vt0:x), vt1)) -> 3072*0b57cec5SDimitry Andric // (vt2 (truncate (assertzext vt0:x, vt1))) 3073*0b57cec5SDimitry Andric if (N0.getOpcode() == ISD::TRUNCATE) { 3074*0b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 3075*0b57cec5SDimitry Andric EVT ExtVT = cast<VTSDNode>(N1)->getVT(); 3076*0b57cec5SDimitry Andric SDLoc SL(N); 3077*0b57cec5SDimitry Andric 3078*0b57cec5SDimitry Andric SDValue Src = N0.getOperand(0); 3079*0b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 3080*0b57cec5SDimitry Andric if (SrcVT.bitsGE(ExtVT)) { 3081*0b57cec5SDimitry Andric SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1); 3082*0b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg); 3083*0b57cec5SDimitry Andric } 3084*0b57cec5SDimitry Andric } 3085*0b57cec5SDimitry Andric 3086*0b57cec5SDimitry Andric return SDValue(); 3087*0b57cec5SDimitry Andric } 3088*0b57cec5SDimitry Andric /// Split the 64-bit value \p LHS into two 32-bit components, and perform the 3089*0b57cec5SDimitry Andric /// binary operation \p Opc to it with the corresponding constant operands. 3090*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( 3091*0b57cec5SDimitry Andric DAGCombinerInfo &DCI, const SDLoc &SL, 3092*0b57cec5SDimitry Andric unsigned Opc, SDValue LHS, 3093*0b57cec5SDimitry Andric uint32_t ValLo, uint32_t ValHi) const { 3094*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3095*0b57cec5SDimitry Andric SDValue Lo, Hi; 3096*0b57cec5SDimitry Andric std::tie(Lo, Hi) = split64BitValue(LHS, DAG); 3097*0b57cec5SDimitry Andric 3098*0b57cec5SDimitry Andric SDValue LoRHS = DAG.getConstant(ValLo, SL, MVT::i32); 3099*0b57cec5SDimitry Andric SDValue HiRHS = DAG.getConstant(ValHi, SL, MVT::i32); 3100*0b57cec5SDimitry Andric 3101*0b57cec5SDimitry Andric SDValue LoAnd = DAG.getNode(Opc, SL, MVT::i32, Lo, LoRHS); 3102*0b57cec5SDimitry Andric SDValue HiAnd = DAG.getNode(Opc, SL, MVT::i32, Hi, HiRHS); 3103*0b57cec5SDimitry Andric 3104*0b57cec5SDimitry Andric // Re-visit the ands. It's possible we eliminated one of them and it could 3105*0b57cec5SDimitry Andric // simplify the vector. 3106*0b57cec5SDimitry Andric DCI.AddToWorklist(Lo.getNode()); 3107*0b57cec5SDimitry Andric DCI.AddToWorklist(Hi.getNode()); 3108*0b57cec5SDimitry Andric 3109*0b57cec5SDimitry Andric SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {LoAnd, HiAnd}); 3110*0b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); 3111*0b57cec5SDimitry Andric } 3112*0b57cec5SDimitry Andric 3113*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, 3114*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3115*0b57cec5SDimitry Andric EVT VT = N->getValueType(0); 3116*0b57cec5SDimitry Andric 3117*0b57cec5SDimitry Andric ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3118*0b57cec5SDimitry Andric if (!RHS) 3119*0b57cec5SDimitry Andric return SDValue(); 3120*0b57cec5SDimitry Andric 3121*0b57cec5SDimitry Andric SDValue LHS = N->getOperand(0); 3122*0b57cec5SDimitry Andric unsigned RHSVal = RHS->getZExtValue(); 3123*0b57cec5SDimitry Andric if (!RHSVal) 3124*0b57cec5SDimitry Andric return LHS; 3125*0b57cec5SDimitry Andric 3126*0b57cec5SDimitry Andric SDLoc SL(N); 3127*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3128*0b57cec5SDimitry Andric 3129*0b57cec5SDimitry Andric switch (LHS->getOpcode()) { 3130*0b57cec5SDimitry Andric default: 3131*0b57cec5SDimitry Andric break; 3132*0b57cec5SDimitry Andric case ISD::ZERO_EXTEND: 3133*0b57cec5SDimitry Andric case ISD::SIGN_EXTEND: 3134*0b57cec5SDimitry Andric case ISD::ANY_EXTEND: { 3135*0b57cec5SDimitry Andric SDValue X = LHS->getOperand(0); 3136*0b57cec5SDimitry Andric 3137*0b57cec5SDimitry Andric if (VT == MVT::i32 && RHSVal == 16 && X.getValueType() == MVT::i16 && 3138*0b57cec5SDimitry Andric isOperationLegal(ISD::BUILD_VECTOR, MVT::v2i16)) { 3139*0b57cec5SDimitry Andric // Prefer build_vector as the canonical form if packed types are legal. 3140*0b57cec5SDimitry Andric // (shl ([asz]ext i16:x), 16 -> build_vector 0, x 3141*0b57cec5SDimitry Andric SDValue Vec = DAG.getBuildVector(MVT::v2i16, SL, 3142*0b57cec5SDimitry Andric { DAG.getConstant(0, SL, MVT::i16), LHS->getOperand(0) }); 3143*0b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec); 3144*0b57cec5SDimitry Andric } 3145*0b57cec5SDimitry Andric 3146*0b57cec5SDimitry Andric // shl (ext x) => zext (shl x), if shift does not overflow int 3147*0b57cec5SDimitry Andric if (VT != MVT::i64) 3148*0b57cec5SDimitry Andric break; 3149*0b57cec5SDimitry Andric KnownBits Known = DAG.computeKnownBits(X); 3150*0b57cec5SDimitry Andric unsigned LZ = Known.countMinLeadingZeros(); 3151*0b57cec5SDimitry Andric if (LZ < RHSVal) 3152*0b57cec5SDimitry Andric break; 3153*0b57cec5SDimitry Andric EVT XVT = X.getValueType(); 3154*0b57cec5SDimitry Andric SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0)); 3155*0b57cec5SDimitry Andric return DAG.getZExtOrTrunc(Shl, SL, VT); 3156*0b57cec5SDimitry Andric } 3157*0b57cec5SDimitry Andric } 3158*0b57cec5SDimitry Andric 3159*0b57cec5SDimitry Andric if (VT != MVT::i64) 3160*0b57cec5SDimitry Andric return SDValue(); 3161*0b57cec5SDimitry Andric 3162*0b57cec5SDimitry Andric // i64 (shl x, C) -> (build_pair 0, (shl x, C -32)) 3163*0b57cec5SDimitry Andric 3164*0b57cec5SDimitry Andric // On some subtargets, 64-bit shift is a quarter rate instruction. In the 3165*0b57cec5SDimitry Andric // common case, splitting this into a move and a 32-bit shift is faster and 3166*0b57cec5SDimitry Andric // the same code size. 3167*0b57cec5SDimitry Andric if (RHSVal < 32) 3168*0b57cec5SDimitry Andric return SDValue(); 3169*0b57cec5SDimitry Andric 3170*0b57cec5SDimitry Andric SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32); 3171*0b57cec5SDimitry Andric 3172*0b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS); 3173*0b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SHL, SL, MVT::i32, Lo, ShiftAmt); 3174*0b57cec5SDimitry Andric 3175*0b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 3176*0b57cec5SDimitry Andric 3177*0b57cec5SDimitry Andric SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {Zero, NewShift}); 3178*0b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); 3179*0b57cec5SDimitry Andric } 3180*0b57cec5SDimitry Andric 3181*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N, 3182*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3183*0b57cec5SDimitry Andric if (N->getValueType(0) != MVT::i64) 3184*0b57cec5SDimitry Andric return SDValue(); 3185*0b57cec5SDimitry Andric 3186*0b57cec5SDimitry Andric const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3187*0b57cec5SDimitry Andric if (!RHS) 3188*0b57cec5SDimitry Andric return SDValue(); 3189*0b57cec5SDimitry Andric 3190*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3191*0b57cec5SDimitry Andric SDLoc SL(N); 3192*0b57cec5SDimitry Andric unsigned RHSVal = RHS->getZExtValue(); 3193*0b57cec5SDimitry Andric 3194*0b57cec5SDimitry Andric // (sra i64:x, 32) -> build_pair x, (sra hi_32(x), 31) 3195*0b57cec5SDimitry Andric if (RHSVal == 32) { 3196*0b57cec5SDimitry Andric SDValue Hi = getHiHalf64(N->getOperand(0), DAG); 3197*0b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi, 3198*0b57cec5SDimitry Andric DAG.getConstant(31, SL, MVT::i32)); 3199*0b57cec5SDimitry Andric 3200*0b57cec5SDimitry Andric SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {Hi, NewShift}); 3201*0b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec); 3202*0b57cec5SDimitry Andric } 3203*0b57cec5SDimitry Andric 3204*0b57cec5SDimitry Andric // (sra i64:x, 63) -> build_pair (sra hi_32(x), 31), (sra hi_32(x), 31) 3205*0b57cec5SDimitry Andric if (RHSVal == 63) { 3206*0b57cec5SDimitry Andric SDValue Hi = getHiHalf64(N->getOperand(0), DAG); 3207*0b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi, 3208*0b57cec5SDimitry Andric DAG.getConstant(31, SL, MVT::i32)); 3209*0b57cec5SDimitry Andric SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, NewShift}); 3210*0b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec); 3211*0b57cec5SDimitry Andric } 3212*0b57cec5SDimitry Andric 3213*0b57cec5SDimitry Andric return SDValue(); 3214*0b57cec5SDimitry Andric } 3215*0b57cec5SDimitry Andric 3216*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N, 3217*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3218*0b57cec5SDimitry Andric auto *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3219*0b57cec5SDimitry Andric if (!RHS) 3220*0b57cec5SDimitry Andric return SDValue(); 3221*0b57cec5SDimitry Andric 3222*0b57cec5SDimitry Andric EVT VT = N->getValueType(0); 3223*0b57cec5SDimitry Andric SDValue LHS = N->getOperand(0); 3224*0b57cec5SDimitry Andric unsigned ShiftAmt = RHS->getZExtValue(); 3225*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3226*0b57cec5SDimitry Andric SDLoc SL(N); 3227*0b57cec5SDimitry Andric 3228*0b57cec5SDimitry Andric // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1) 3229*0b57cec5SDimitry Andric // this improves the ability to match BFE patterns in isel. 3230*0b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::AND) { 3231*0b57cec5SDimitry Andric if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) { 3232*0b57cec5SDimitry Andric if (Mask->getAPIntValue().isShiftedMask() && 3233*0b57cec5SDimitry Andric Mask->getAPIntValue().countTrailingZeros() == ShiftAmt) { 3234*0b57cec5SDimitry Andric return DAG.getNode( 3235*0b57cec5SDimitry Andric ISD::AND, SL, VT, 3236*0b57cec5SDimitry Andric DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)), 3237*0b57cec5SDimitry Andric DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1))); 3238*0b57cec5SDimitry Andric } 3239*0b57cec5SDimitry Andric } 3240*0b57cec5SDimitry Andric } 3241*0b57cec5SDimitry Andric 3242*0b57cec5SDimitry Andric if (VT != MVT::i64) 3243*0b57cec5SDimitry Andric return SDValue(); 3244*0b57cec5SDimitry Andric 3245*0b57cec5SDimitry Andric if (ShiftAmt < 32) 3246*0b57cec5SDimitry Andric return SDValue(); 3247*0b57cec5SDimitry Andric 3248*0b57cec5SDimitry Andric // srl i64:x, C for C >= 32 3249*0b57cec5SDimitry Andric // => 3250*0b57cec5SDimitry Andric // build_pair (srl hi_32(x), C - 32), 0 3251*0b57cec5SDimitry Andric SDValue One = DAG.getConstant(1, SL, MVT::i32); 3252*0b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 3253*0b57cec5SDimitry Andric 3254*0b57cec5SDimitry Andric SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, LHS); 3255*0b57cec5SDimitry Andric SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecOp, One); 3256*0b57cec5SDimitry Andric 3257*0b57cec5SDimitry Andric SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32); 3258*0b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst); 3259*0b57cec5SDimitry Andric 3260*0b57cec5SDimitry Andric SDValue BuildPair = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, Zero}); 3261*0b57cec5SDimitry Andric 3262*0b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair); 3263*0b57cec5SDimitry Andric } 3264*0b57cec5SDimitry Andric 3265*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performTruncateCombine( 3266*0b57cec5SDimitry Andric SDNode *N, DAGCombinerInfo &DCI) const { 3267*0b57cec5SDimitry Andric SDLoc SL(N); 3268*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3269*0b57cec5SDimitry Andric EVT VT = N->getValueType(0); 3270*0b57cec5SDimitry Andric SDValue Src = N->getOperand(0); 3271*0b57cec5SDimitry Andric 3272*0b57cec5SDimitry Andric // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x) 3273*0b57cec5SDimitry Andric if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) { 3274*0b57cec5SDimitry Andric SDValue Vec = Src.getOperand(0); 3275*0b57cec5SDimitry Andric if (Vec.getOpcode() == ISD::BUILD_VECTOR) { 3276*0b57cec5SDimitry Andric SDValue Elt0 = Vec.getOperand(0); 3277*0b57cec5SDimitry Andric EVT EltVT = Elt0.getValueType(); 3278*0b57cec5SDimitry Andric if (VT.getSizeInBits() <= EltVT.getSizeInBits()) { 3279*0b57cec5SDimitry Andric if (EltVT.isFloatingPoint()) { 3280*0b57cec5SDimitry Andric Elt0 = DAG.getNode(ISD::BITCAST, SL, 3281*0b57cec5SDimitry Andric EltVT.changeTypeToInteger(), Elt0); 3282*0b57cec5SDimitry Andric } 3283*0b57cec5SDimitry Andric 3284*0b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, VT, Elt0); 3285*0b57cec5SDimitry Andric } 3286*0b57cec5SDimitry Andric } 3287*0b57cec5SDimitry Andric } 3288*0b57cec5SDimitry Andric 3289*0b57cec5SDimitry Andric // Equivalent of above for accessing the high element of a vector as an 3290*0b57cec5SDimitry Andric // integer operation. 3291*0b57cec5SDimitry Andric // trunc (srl (bitcast (build_vector x, y))), 16 -> trunc (bitcast y) 3292*0b57cec5SDimitry Andric if (Src.getOpcode() == ISD::SRL && !VT.isVector()) { 3293*0b57cec5SDimitry Andric if (auto K = isConstOrConstSplat(Src.getOperand(1))) { 3294*0b57cec5SDimitry Andric if (2 * K->getZExtValue() == Src.getValueType().getScalarSizeInBits()) { 3295*0b57cec5SDimitry Andric SDValue BV = stripBitcast(Src.getOperand(0)); 3296*0b57cec5SDimitry Andric if (BV.getOpcode() == ISD::BUILD_VECTOR && 3297*0b57cec5SDimitry Andric BV.getValueType().getVectorNumElements() == 2) { 3298*0b57cec5SDimitry Andric SDValue SrcElt = BV.getOperand(1); 3299*0b57cec5SDimitry Andric EVT SrcEltVT = SrcElt.getValueType(); 3300*0b57cec5SDimitry Andric if (SrcEltVT.isFloatingPoint()) { 3301*0b57cec5SDimitry Andric SrcElt = DAG.getNode(ISD::BITCAST, SL, 3302*0b57cec5SDimitry Andric SrcEltVT.changeTypeToInteger(), SrcElt); 3303*0b57cec5SDimitry Andric } 3304*0b57cec5SDimitry Andric 3305*0b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, VT, SrcElt); 3306*0b57cec5SDimitry Andric } 3307*0b57cec5SDimitry Andric } 3308*0b57cec5SDimitry Andric } 3309*0b57cec5SDimitry Andric } 3310*0b57cec5SDimitry Andric 3311*0b57cec5SDimitry Andric // Partially shrink 64-bit shifts to 32-bit if reduced to 16-bit. 3312*0b57cec5SDimitry Andric // 3313*0b57cec5SDimitry Andric // i16 (trunc (srl i64:x, K)), K <= 16 -> 3314*0b57cec5SDimitry Andric // i16 (trunc (srl (i32 (trunc x), K))) 3315*0b57cec5SDimitry Andric if (VT.getScalarSizeInBits() < 32) { 3316*0b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 3317*0b57cec5SDimitry Andric if (SrcVT.getScalarSizeInBits() > 32 && 3318*0b57cec5SDimitry Andric (Src.getOpcode() == ISD::SRL || 3319*0b57cec5SDimitry Andric Src.getOpcode() == ISD::SRA || 3320*0b57cec5SDimitry Andric Src.getOpcode() == ISD::SHL)) { 3321*0b57cec5SDimitry Andric SDValue Amt = Src.getOperand(1); 3322*0b57cec5SDimitry Andric KnownBits Known = DAG.computeKnownBits(Amt); 3323*0b57cec5SDimitry Andric unsigned Size = VT.getScalarSizeInBits(); 3324*0b57cec5SDimitry Andric if ((Known.isConstant() && Known.getConstant().ule(Size)) || 3325*0b57cec5SDimitry Andric (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size))) { 3326*0b57cec5SDimitry Andric EVT MidVT = VT.isVector() ? 3327*0b57cec5SDimitry Andric EVT::getVectorVT(*DAG.getContext(), MVT::i32, 3328*0b57cec5SDimitry Andric VT.getVectorNumElements()) : MVT::i32; 3329*0b57cec5SDimitry Andric 3330*0b57cec5SDimitry Andric EVT NewShiftVT = getShiftAmountTy(MidVT, DAG.getDataLayout()); 3331*0b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MidVT, 3332*0b57cec5SDimitry Andric Src.getOperand(0)); 3333*0b57cec5SDimitry Andric DCI.AddToWorklist(Trunc.getNode()); 3334*0b57cec5SDimitry Andric 3335*0b57cec5SDimitry Andric if (Amt.getValueType() != NewShiftVT) { 3336*0b57cec5SDimitry Andric Amt = DAG.getZExtOrTrunc(Amt, SL, NewShiftVT); 3337*0b57cec5SDimitry Andric DCI.AddToWorklist(Amt.getNode()); 3338*0b57cec5SDimitry Andric } 3339*0b57cec5SDimitry Andric 3340*0b57cec5SDimitry Andric SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT, 3341*0b57cec5SDimitry Andric Trunc, Amt); 3342*0b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, VT, ShrunkShift); 3343*0b57cec5SDimitry Andric } 3344*0b57cec5SDimitry Andric } 3345*0b57cec5SDimitry Andric } 3346*0b57cec5SDimitry Andric 3347*0b57cec5SDimitry Andric return SDValue(); 3348*0b57cec5SDimitry Andric } 3349*0b57cec5SDimitry Andric 3350*0b57cec5SDimitry Andric // We need to specifically handle i64 mul here to avoid unnecessary conversion 3351*0b57cec5SDimitry Andric // instructions. If we only match on the legalized i64 mul expansion, 3352*0b57cec5SDimitry Andric // SimplifyDemandedBits will be unable to remove them because there will be 3353*0b57cec5SDimitry Andric // multiple uses due to the separate mul + mulh[su]. 3354*0b57cec5SDimitry Andric static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL, 3355*0b57cec5SDimitry Andric SDValue N0, SDValue N1, unsigned Size, bool Signed) { 3356*0b57cec5SDimitry Andric if (Size <= 32) { 3357*0b57cec5SDimitry Andric unsigned MulOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24; 3358*0b57cec5SDimitry Andric return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1); 3359*0b57cec5SDimitry Andric } 3360*0b57cec5SDimitry Andric 3361*0b57cec5SDimitry Andric // Because we want to eliminate extension instructions before the 3362*0b57cec5SDimitry Andric // operation, we need to create a single user here (i.e. not the separate 3363*0b57cec5SDimitry Andric // mul_lo + mul_hi) so that SimplifyDemandedBits will deal with it. 3364*0b57cec5SDimitry Andric 3365*0b57cec5SDimitry Andric unsigned MulOpc = Signed ? AMDGPUISD::MUL_LOHI_I24 : AMDGPUISD::MUL_LOHI_U24; 3366*0b57cec5SDimitry Andric 3367*0b57cec5SDimitry Andric SDValue Mul = DAG.getNode(MulOpc, SL, 3368*0b57cec5SDimitry Andric DAG.getVTList(MVT::i32, MVT::i32), N0, N1); 3369*0b57cec5SDimitry Andric 3370*0b57cec5SDimitry Andric return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, 3371*0b57cec5SDimitry Andric Mul.getValue(0), Mul.getValue(1)); 3372*0b57cec5SDimitry Andric } 3373*0b57cec5SDimitry Andric 3374*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N, 3375*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3376*0b57cec5SDimitry Andric EVT VT = N->getValueType(0); 3377*0b57cec5SDimitry Andric 3378*0b57cec5SDimitry Andric unsigned Size = VT.getSizeInBits(); 3379*0b57cec5SDimitry Andric if (VT.isVector() || Size > 64) 3380*0b57cec5SDimitry Andric return SDValue(); 3381*0b57cec5SDimitry Andric 3382*0b57cec5SDimitry Andric // There are i16 integer mul/mad. 3383*0b57cec5SDimitry Andric if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16)) 3384*0b57cec5SDimitry Andric return SDValue(); 3385*0b57cec5SDimitry Andric 3386*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3387*0b57cec5SDimitry Andric SDLoc DL(N); 3388*0b57cec5SDimitry Andric 3389*0b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 3390*0b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 3391*0b57cec5SDimitry Andric 3392*0b57cec5SDimitry Andric // SimplifyDemandedBits has the annoying habit of turning useful zero_extends 3393*0b57cec5SDimitry Andric // in the source into any_extends if the result of the mul is truncated. Since 3394*0b57cec5SDimitry Andric // we can assume the high bits are whatever we want, use the underlying value 3395*0b57cec5SDimitry Andric // to avoid the unknown high bits from interfering. 3396*0b57cec5SDimitry Andric if (N0.getOpcode() == ISD::ANY_EXTEND) 3397*0b57cec5SDimitry Andric N0 = N0.getOperand(0); 3398*0b57cec5SDimitry Andric 3399*0b57cec5SDimitry Andric if (N1.getOpcode() == ISD::ANY_EXTEND) 3400*0b57cec5SDimitry Andric N1 = N1.getOperand(0); 3401*0b57cec5SDimitry Andric 3402*0b57cec5SDimitry Andric SDValue Mul; 3403*0b57cec5SDimitry Andric 3404*0b57cec5SDimitry Andric if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) { 3405*0b57cec5SDimitry Andric N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); 3406*0b57cec5SDimitry Andric N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); 3407*0b57cec5SDimitry Andric Mul = getMul24(DAG, DL, N0, N1, Size, false); 3408*0b57cec5SDimitry Andric } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) { 3409*0b57cec5SDimitry Andric N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); 3410*0b57cec5SDimitry Andric N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); 3411*0b57cec5SDimitry Andric Mul = getMul24(DAG, DL, N0, N1, Size, true); 3412*0b57cec5SDimitry Andric } else { 3413*0b57cec5SDimitry Andric return SDValue(); 3414*0b57cec5SDimitry Andric } 3415*0b57cec5SDimitry Andric 3416*0b57cec5SDimitry Andric // We need to use sext even for MUL_U24, because MUL_U24 is used 3417*0b57cec5SDimitry Andric // for signed multiply of 8 and 16-bit types. 3418*0b57cec5SDimitry Andric return DAG.getSExtOrTrunc(Mul, DL, VT); 3419*0b57cec5SDimitry Andric } 3420*0b57cec5SDimitry Andric 3421*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N, 3422*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3423*0b57cec5SDimitry Andric EVT VT = N->getValueType(0); 3424*0b57cec5SDimitry Andric 3425*0b57cec5SDimitry Andric if (!Subtarget->hasMulI24() || VT.isVector()) 3426*0b57cec5SDimitry Andric return SDValue(); 3427*0b57cec5SDimitry Andric 3428*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3429*0b57cec5SDimitry Andric SDLoc DL(N); 3430*0b57cec5SDimitry Andric 3431*0b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 3432*0b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 3433*0b57cec5SDimitry Andric 3434*0b57cec5SDimitry Andric if (!isI24(N0, DAG) || !isI24(N1, DAG)) 3435*0b57cec5SDimitry Andric return SDValue(); 3436*0b57cec5SDimitry Andric 3437*0b57cec5SDimitry Andric N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); 3438*0b57cec5SDimitry Andric N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); 3439*0b57cec5SDimitry Andric 3440*0b57cec5SDimitry Andric SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_I24, DL, MVT::i32, N0, N1); 3441*0b57cec5SDimitry Andric DCI.AddToWorklist(Mulhi.getNode()); 3442*0b57cec5SDimitry Andric return DAG.getSExtOrTrunc(Mulhi, DL, VT); 3443*0b57cec5SDimitry Andric } 3444*0b57cec5SDimitry Andric 3445*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N, 3446*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3447*0b57cec5SDimitry Andric EVT VT = N->getValueType(0); 3448*0b57cec5SDimitry Andric 3449*0b57cec5SDimitry Andric if (!Subtarget->hasMulU24() || VT.isVector() || VT.getSizeInBits() > 32) 3450*0b57cec5SDimitry Andric return SDValue(); 3451*0b57cec5SDimitry Andric 3452*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3453*0b57cec5SDimitry Andric SDLoc DL(N); 3454*0b57cec5SDimitry Andric 3455*0b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 3456*0b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 3457*0b57cec5SDimitry Andric 3458*0b57cec5SDimitry Andric if (!isU24(N0, DAG) || !isU24(N1, DAG)) 3459*0b57cec5SDimitry Andric return SDValue(); 3460*0b57cec5SDimitry Andric 3461*0b57cec5SDimitry Andric N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); 3462*0b57cec5SDimitry Andric N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); 3463*0b57cec5SDimitry Andric 3464*0b57cec5SDimitry Andric SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_U24, DL, MVT::i32, N0, N1); 3465*0b57cec5SDimitry Andric DCI.AddToWorklist(Mulhi.getNode()); 3466*0b57cec5SDimitry Andric return DAG.getZExtOrTrunc(Mulhi, DL, VT); 3467*0b57cec5SDimitry Andric } 3468*0b57cec5SDimitry Andric 3469*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulLoHi24Combine( 3470*0b57cec5SDimitry Andric SDNode *N, DAGCombinerInfo &DCI) const { 3471*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3472*0b57cec5SDimitry Andric 3473*0b57cec5SDimitry Andric // Simplify demanded bits before splitting into multiple users. 3474*0b57cec5SDimitry Andric if (SDValue V = simplifyI24(N, DCI)) 3475*0b57cec5SDimitry Andric return V; 3476*0b57cec5SDimitry Andric 3477*0b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 3478*0b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 3479*0b57cec5SDimitry Andric 3480*0b57cec5SDimitry Andric bool Signed = (N->getOpcode() == AMDGPUISD::MUL_LOHI_I24); 3481*0b57cec5SDimitry Andric 3482*0b57cec5SDimitry Andric unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24; 3483*0b57cec5SDimitry Andric unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24; 3484*0b57cec5SDimitry Andric 3485*0b57cec5SDimitry Andric SDLoc SL(N); 3486*0b57cec5SDimitry Andric 3487*0b57cec5SDimitry Andric SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1); 3488*0b57cec5SDimitry Andric SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1); 3489*0b57cec5SDimitry Andric return DAG.getMergeValues({ MulLo, MulHi }, SL); 3490*0b57cec5SDimitry Andric } 3491*0b57cec5SDimitry Andric 3492*0b57cec5SDimitry Andric static bool isNegativeOne(SDValue Val) { 3493*0b57cec5SDimitry Andric if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) 3494*0b57cec5SDimitry Andric return C->isAllOnesValue(); 3495*0b57cec5SDimitry Andric return false; 3496*0b57cec5SDimitry Andric } 3497*0b57cec5SDimitry Andric 3498*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG, 3499*0b57cec5SDimitry Andric SDValue Op, 3500*0b57cec5SDimitry Andric const SDLoc &DL, 3501*0b57cec5SDimitry Andric unsigned Opc) const { 3502*0b57cec5SDimitry Andric EVT VT = Op.getValueType(); 3503*0b57cec5SDimitry Andric EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT); 3504*0b57cec5SDimitry Andric if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() && 3505*0b57cec5SDimitry Andric LegalVT != MVT::i16)) 3506*0b57cec5SDimitry Andric return SDValue(); 3507*0b57cec5SDimitry Andric 3508*0b57cec5SDimitry Andric if (VT != MVT::i32) 3509*0b57cec5SDimitry Andric Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Op); 3510*0b57cec5SDimitry Andric 3511*0b57cec5SDimitry Andric SDValue FFBX = DAG.getNode(Opc, DL, MVT::i32, Op); 3512*0b57cec5SDimitry Andric if (VT != MVT::i32) 3513*0b57cec5SDimitry Andric FFBX = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBX); 3514*0b57cec5SDimitry Andric 3515*0b57cec5SDimitry Andric return FFBX; 3516*0b57cec5SDimitry Andric } 3517*0b57cec5SDimitry Andric 3518*0b57cec5SDimitry Andric // The native instructions return -1 on 0 input. Optimize out a select that 3519*0b57cec5SDimitry Andric // produces -1 on 0. 3520*0b57cec5SDimitry Andric // 3521*0b57cec5SDimitry Andric // TODO: If zero is not undef, we could also do this if the output is compared 3522*0b57cec5SDimitry Andric // against the bitwidth. 3523*0b57cec5SDimitry Andric // 3524*0b57cec5SDimitry Andric // TODO: Should probably combine against FFBH_U32 instead of ctlz directly. 3525*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, 3526*0b57cec5SDimitry Andric SDValue LHS, SDValue RHS, 3527*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3528*0b57cec5SDimitry Andric ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 3529*0b57cec5SDimitry Andric if (!CmpRhs || !CmpRhs->isNullValue()) 3530*0b57cec5SDimitry Andric return SDValue(); 3531*0b57cec5SDimitry Andric 3532*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3533*0b57cec5SDimitry Andric ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 3534*0b57cec5SDimitry Andric SDValue CmpLHS = Cond.getOperand(0); 3535*0b57cec5SDimitry Andric 3536*0b57cec5SDimitry Andric unsigned Opc = isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : 3537*0b57cec5SDimitry Andric AMDGPUISD::FFBH_U32; 3538*0b57cec5SDimitry Andric 3539*0b57cec5SDimitry Andric // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x 3540*0b57cec5SDimitry Andric // select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x 3541*0b57cec5SDimitry Andric if (CCOpcode == ISD::SETEQ && 3542*0b57cec5SDimitry Andric (isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) && 3543*0b57cec5SDimitry Andric RHS.getOperand(0) == CmpLHS && 3544*0b57cec5SDimitry Andric isNegativeOne(LHS)) { 3545*0b57cec5SDimitry Andric return getFFBX_U32(DAG, CmpLHS, SL, Opc); 3546*0b57cec5SDimitry Andric } 3547*0b57cec5SDimitry Andric 3548*0b57cec5SDimitry Andric // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x 3549*0b57cec5SDimitry Andric // select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x 3550*0b57cec5SDimitry Andric if (CCOpcode == ISD::SETNE && 3551*0b57cec5SDimitry Andric (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) && 3552*0b57cec5SDimitry Andric LHS.getOperand(0) == CmpLHS && 3553*0b57cec5SDimitry Andric isNegativeOne(RHS)) { 3554*0b57cec5SDimitry Andric return getFFBX_U32(DAG, CmpLHS, SL, Opc); 3555*0b57cec5SDimitry Andric } 3556*0b57cec5SDimitry Andric 3557*0b57cec5SDimitry Andric return SDValue(); 3558*0b57cec5SDimitry Andric } 3559*0b57cec5SDimitry Andric 3560*0b57cec5SDimitry Andric static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI, 3561*0b57cec5SDimitry Andric unsigned Op, 3562*0b57cec5SDimitry Andric const SDLoc &SL, 3563*0b57cec5SDimitry Andric SDValue Cond, 3564*0b57cec5SDimitry Andric SDValue N1, 3565*0b57cec5SDimitry Andric SDValue N2) { 3566*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3567*0b57cec5SDimitry Andric EVT VT = N1.getValueType(); 3568*0b57cec5SDimitry Andric 3569*0b57cec5SDimitry Andric SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond, 3570*0b57cec5SDimitry Andric N1.getOperand(0), N2.getOperand(0)); 3571*0b57cec5SDimitry Andric DCI.AddToWorklist(NewSelect.getNode()); 3572*0b57cec5SDimitry Andric return DAG.getNode(Op, SL, VT, NewSelect); 3573*0b57cec5SDimitry Andric } 3574*0b57cec5SDimitry Andric 3575*0b57cec5SDimitry Andric // Pull a free FP operation out of a select so it may fold into uses. 3576*0b57cec5SDimitry Andric // 3577*0b57cec5SDimitry Andric // select c, (fneg x), (fneg y) -> fneg (select c, x, y) 3578*0b57cec5SDimitry Andric // select c, (fneg x), k -> fneg (select c, x, (fneg k)) 3579*0b57cec5SDimitry Andric // 3580*0b57cec5SDimitry Andric // select c, (fabs x), (fabs y) -> fabs (select c, x, y) 3581*0b57cec5SDimitry Andric // select c, (fabs x), +k -> fabs (select c, x, k) 3582*0b57cec5SDimitry Andric static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, 3583*0b57cec5SDimitry Andric SDValue N) { 3584*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3585*0b57cec5SDimitry Andric SDValue Cond = N.getOperand(0); 3586*0b57cec5SDimitry Andric SDValue LHS = N.getOperand(1); 3587*0b57cec5SDimitry Andric SDValue RHS = N.getOperand(2); 3588*0b57cec5SDimitry Andric 3589*0b57cec5SDimitry Andric EVT VT = N.getValueType(); 3590*0b57cec5SDimitry Andric if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) || 3591*0b57cec5SDimitry Andric (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) { 3592*0b57cec5SDimitry Andric return distributeOpThroughSelect(DCI, LHS.getOpcode(), 3593*0b57cec5SDimitry Andric SDLoc(N), Cond, LHS, RHS); 3594*0b57cec5SDimitry Andric } 3595*0b57cec5SDimitry Andric 3596*0b57cec5SDimitry Andric bool Inv = false; 3597*0b57cec5SDimitry Andric if (RHS.getOpcode() == ISD::FABS || RHS.getOpcode() == ISD::FNEG) { 3598*0b57cec5SDimitry Andric std::swap(LHS, RHS); 3599*0b57cec5SDimitry Andric Inv = true; 3600*0b57cec5SDimitry Andric } 3601*0b57cec5SDimitry Andric 3602*0b57cec5SDimitry Andric // TODO: Support vector constants. 3603*0b57cec5SDimitry Andric ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS); 3604*0b57cec5SDimitry Andric if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) { 3605*0b57cec5SDimitry Andric SDLoc SL(N); 3606*0b57cec5SDimitry Andric // If one side is an fneg/fabs and the other is a constant, we can push the 3607*0b57cec5SDimitry Andric // fneg/fabs down. If it's an fabs, the constant needs to be non-negative. 3608*0b57cec5SDimitry Andric SDValue NewLHS = LHS.getOperand(0); 3609*0b57cec5SDimitry Andric SDValue NewRHS = RHS; 3610*0b57cec5SDimitry Andric 3611*0b57cec5SDimitry Andric // Careful: if the neg can be folded up, don't try to pull it back down. 3612*0b57cec5SDimitry Andric bool ShouldFoldNeg = true; 3613*0b57cec5SDimitry Andric 3614*0b57cec5SDimitry Andric if (NewLHS.hasOneUse()) { 3615*0b57cec5SDimitry Andric unsigned Opc = NewLHS.getOpcode(); 3616*0b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(Opc)) 3617*0b57cec5SDimitry Andric ShouldFoldNeg = false; 3618*0b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL) 3619*0b57cec5SDimitry Andric ShouldFoldNeg = false; 3620*0b57cec5SDimitry Andric } 3621*0b57cec5SDimitry Andric 3622*0b57cec5SDimitry Andric if (ShouldFoldNeg) { 3623*0b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG) 3624*0b57cec5SDimitry Andric NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 3625*0b57cec5SDimitry Andric else if (CRHS->isNegative()) 3626*0b57cec5SDimitry Andric return SDValue(); 3627*0b57cec5SDimitry Andric 3628*0b57cec5SDimitry Andric if (Inv) 3629*0b57cec5SDimitry Andric std::swap(NewLHS, NewRHS); 3630*0b57cec5SDimitry Andric 3631*0b57cec5SDimitry Andric SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, 3632*0b57cec5SDimitry Andric Cond, NewLHS, NewRHS); 3633*0b57cec5SDimitry Andric DCI.AddToWorklist(NewSelect.getNode()); 3634*0b57cec5SDimitry Andric return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect); 3635*0b57cec5SDimitry Andric } 3636*0b57cec5SDimitry Andric } 3637*0b57cec5SDimitry Andric 3638*0b57cec5SDimitry Andric return SDValue(); 3639*0b57cec5SDimitry Andric } 3640*0b57cec5SDimitry Andric 3641*0b57cec5SDimitry Andric 3642*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N, 3643*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3644*0b57cec5SDimitry Andric if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0))) 3645*0b57cec5SDimitry Andric return Folded; 3646*0b57cec5SDimitry Andric 3647*0b57cec5SDimitry Andric SDValue Cond = N->getOperand(0); 3648*0b57cec5SDimitry Andric if (Cond.getOpcode() != ISD::SETCC) 3649*0b57cec5SDimitry Andric return SDValue(); 3650*0b57cec5SDimitry Andric 3651*0b57cec5SDimitry Andric EVT VT = N->getValueType(0); 3652*0b57cec5SDimitry Andric SDValue LHS = Cond.getOperand(0); 3653*0b57cec5SDimitry Andric SDValue RHS = Cond.getOperand(1); 3654*0b57cec5SDimitry Andric SDValue CC = Cond.getOperand(2); 3655*0b57cec5SDimitry Andric 3656*0b57cec5SDimitry Andric SDValue True = N->getOperand(1); 3657*0b57cec5SDimitry Andric SDValue False = N->getOperand(2); 3658*0b57cec5SDimitry Andric 3659*0b57cec5SDimitry Andric if (Cond.hasOneUse()) { // TODO: Look for multiple select uses. 3660*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3661*0b57cec5SDimitry Andric if (DAG.isConstantValueOfAnyType(True) && 3662*0b57cec5SDimitry Andric !DAG.isConstantValueOfAnyType(False)) { 3663*0b57cec5SDimitry Andric // Swap cmp + select pair to move constant to false input. 3664*0b57cec5SDimitry Andric // This will allow using VOPC cndmasks more often. 3665*0b57cec5SDimitry Andric // select (setcc x, y), k, x -> select (setccinv x, y), x, k 3666*0b57cec5SDimitry Andric 3667*0b57cec5SDimitry Andric SDLoc SL(N); 3668*0b57cec5SDimitry Andric ISD::CondCode NewCC = getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 3669*0b57cec5SDimitry Andric LHS.getValueType().isInteger()); 3670*0b57cec5SDimitry Andric 3671*0b57cec5SDimitry Andric SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC); 3672*0b57cec5SDimitry Andric return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True); 3673*0b57cec5SDimitry Andric } 3674*0b57cec5SDimitry Andric 3675*0b57cec5SDimitry Andric if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) { 3676*0b57cec5SDimitry Andric SDValue MinMax 3677*0b57cec5SDimitry Andric = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI); 3678*0b57cec5SDimitry Andric // Revisit this node so we can catch min3/max3/med3 patterns. 3679*0b57cec5SDimitry Andric //DCI.AddToWorklist(MinMax.getNode()); 3680*0b57cec5SDimitry Andric return MinMax; 3681*0b57cec5SDimitry Andric } 3682*0b57cec5SDimitry Andric } 3683*0b57cec5SDimitry Andric 3684*0b57cec5SDimitry Andric // There's no reason to not do this if the condition has other uses. 3685*0b57cec5SDimitry Andric return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI); 3686*0b57cec5SDimitry Andric } 3687*0b57cec5SDimitry Andric 3688*0b57cec5SDimitry Andric static bool isInv2Pi(const APFloat &APF) { 3689*0b57cec5SDimitry Andric static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118)); 3690*0b57cec5SDimitry Andric static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983)); 3691*0b57cec5SDimitry Andric static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882)); 3692*0b57cec5SDimitry Andric 3693*0b57cec5SDimitry Andric return APF.bitwiseIsEqual(KF16) || 3694*0b57cec5SDimitry Andric APF.bitwiseIsEqual(KF32) || 3695*0b57cec5SDimitry Andric APF.bitwiseIsEqual(KF64); 3696*0b57cec5SDimitry Andric } 3697*0b57cec5SDimitry Andric 3698*0b57cec5SDimitry Andric // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an 3699*0b57cec5SDimitry Andric // additional cost to negate them. 3700*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const { 3701*0b57cec5SDimitry Andric if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) { 3702*0b57cec5SDimitry Andric if (C->isZero() && !C->isNegative()) 3703*0b57cec5SDimitry Andric return true; 3704*0b57cec5SDimitry Andric 3705*0b57cec5SDimitry Andric if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF())) 3706*0b57cec5SDimitry Andric return true; 3707*0b57cec5SDimitry Andric } 3708*0b57cec5SDimitry Andric 3709*0b57cec5SDimitry Andric return false; 3710*0b57cec5SDimitry Andric } 3711*0b57cec5SDimitry Andric 3712*0b57cec5SDimitry Andric static unsigned inverseMinMax(unsigned Opc) { 3713*0b57cec5SDimitry Andric switch (Opc) { 3714*0b57cec5SDimitry Andric case ISD::FMAXNUM: 3715*0b57cec5SDimitry Andric return ISD::FMINNUM; 3716*0b57cec5SDimitry Andric case ISD::FMINNUM: 3717*0b57cec5SDimitry Andric return ISD::FMAXNUM; 3718*0b57cec5SDimitry Andric case ISD::FMAXNUM_IEEE: 3719*0b57cec5SDimitry Andric return ISD::FMINNUM_IEEE; 3720*0b57cec5SDimitry Andric case ISD::FMINNUM_IEEE: 3721*0b57cec5SDimitry Andric return ISD::FMAXNUM_IEEE; 3722*0b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY: 3723*0b57cec5SDimitry Andric return AMDGPUISD::FMIN_LEGACY; 3724*0b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY: 3725*0b57cec5SDimitry Andric return AMDGPUISD::FMAX_LEGACY; 3726*0b57cec5SDimitry Andric default: 3727*0b57cec5SDimitry Andric llvm_unreachable("invalid min/max opcode"); 3728*0b57cec5SDimitry Andric } 3729*0b57cec5SDimitry Andric } 3730*0b57cec5SDimitry Andric 3731*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, 3732*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3733*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3734*0b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 3735*0b57cec5SDimitry Andric EVT VT = N->getValueType(0); 3736*0b57cec5SDimitry Andric 3737*0b57cec5SDimitry Andric unsigned Opc = N0.getOpcode(); 3738*0b57cec5SDimitry Andric 3739*0b57cec5SDimitry Andric // If the input has multiple uses and we can either fold the negate down, or 3740*0b57cec5SDimitry Andric // the other uses cannot, give up. This both prevents unprofitable 3741*0b57cec5SDimitry Andric // transformations and infinite loops: we won't repeatedly try to fold around 3742*0b57cec5SDimitry Andric // a negate that has no 'good' form. 3743*0b57cec5SDimitry Andric if (N0.hasOneUse()) { 3744*0b57cec5SDimitry Andric // This may be able to fold into the source, but at a code size cost. Don't 3745*0b57cec5SDimitry Andric // fold if the fold into the user is free. 3746*0b57cec5SDimitry Andric if (allUsesHaveSourceMods(N, 0)) 3747*0b57cec5SDimitry Andric return SDValue(); 3748*0b57cec5SDimitry Andric } else { 3749*0b57cec5SDimitry Andric if (fnegFoldsIntoOp(Opc) && 3750*0b57cec5SDimitry Andric (allUsesHaveSourceMods(N) || !allUsesHaveSourceMods(N0.getNode()))) 3751*0b57cec5SDimitry Andric return SDValue(); 3752*0b57cec5SDimitry Andric } 3753*0b57cec5SDimitry Andric 3754*0b57cec5SDimitry Andric SDLoc SL(N); 3755*0b57cec5SDimitry Andric switch (Opc) { 3756*0b57cec5SDimitry Andric case ISD::FADD: { 3757*0b57cec5SDimitry Andric if (!mayIgnoreSignedZero(N0)) 3758*0b57cec5SDimitry Andric return SDValue(); 3759*0b57cec5SDimitry Andric 3760*0b57cec5SDimitry Andric // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y)) 3761*0b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0); 3762*0b57cec5SDimitry Andric SDValue RHS = N0.getOperand(1); 3763*0b57cec5SDimitry Andric 3764*0b57cec5SDimitry Andric if (LHS.getOpcode() != ISD::FNEG) 3765*0b57cec5SDimitry Andric LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS); 3766*0b57cec5SDimitry Andric else 3767*0b57cec5SDimitry Andric LHS = LHS.getOperand(0); 3768*0b57cec5SDimitry Andric 3769*0b57cec5SDimitry Andric if (RHS.getOpcode() != ISD::FNEG) 3770*0b57cec5SDimitry Andric RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 3771*0b57cec5SDimitry Andric else 3772*0b57cec5SDimitry Andric RHS = RHS.getOperand(0); 3773*0b57cec5SDimitry Andric 3774*0b57cec5SDimitry Andric SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags()); 3775*0b57cec5SDimitry Andric if (Res.getOpcode() != ISD::FADD) 3776*0b57cec5SDimitry Andric return SDValue(); // Op got folded away. 3777*0b57cec5SDimitry Andric if (!N0.hasOneUse()) 3778*0b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); 3779*0b57cec5SDimitry Andric return Res; 3780*0b57cec5SDimitry Andric } 3781*0b57cec5SDimitry Andric case ISD::FMUL: 3782*0b57cec5SDimitry Andric case AMDGPUISD::FMUL_LEGACY: { 3783*0b57cec5SDimitry Andric // (fneg (fmul x, y)) -> (fmul x, (fneg y)) 3784*0b57cec5SDimitry Andric // (fneg (fmul_legacy x, y)) -> (fmul_legacy x, (fneg y)) 3785*0b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0); 3786*0b57cec5SDimitry Andric SDValue RHS = N0.getOperand(1); 3787*0b57cec5SDimitry Andric 3788*0b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG) 3789*0b57cec5SDimitry Andric LHS = LHS.getOperand(0); 3790*0b57cec5SDimitry Andric else if (RHS.getOpcode() == ISD::FNEG) 3791*0b57cec5SDimitry Andric RHS = RHS.getOperand(0); 3792*0b57cec5SDimitry Andric else 3793*0b57cec5SDimitry Andric RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 3794*0b57cec5SDimitry Andric 3795*0b57cec5SDimitry Andric SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags()); 3796*0b57cec5SDimitry Andric if (Res.getOpcode() != Opc) 3797*0b57cec5SDimitry Andric return SDValue(); // Op got folded away. 3798*0b57cec5SDimitry Andric if (!N0.hasOneUse()) 3799*0b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); 3800*0b57cec5SDimitry Andric return Res; 3801*0b57cec5SDimitry Andric } 3802*0b57cec5SDimitry Andric case ISD::FMA: 3803*0b57cec5SDimitry Andric case ISD::FMAD: { 3804*0b57cec5SDimitry Andric if (!mayIgnoreSignedZero(N0)) 3805*0b57cec5SDimitry Andric return SDValue(); 3806*0b57cec5SDimitry Andric 3807*0b57cec5SDimitry Andric // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z)) 3808*0b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0); 3809*0b57cec5SDimitry Andric SDValue MHS = N0.getOperand(1); 3810*0b57cec5SDimitry Andric SDValue RHS = N0.getOperand(2); 3811*0b57cec5SDimitry Andric 3812*0b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG) 3813*0b57cec5SDimitry Andric LHS = LHS.getOperand(0); 3814*0b57cec5SDimitry Andric else if (MHS.getOpcode() == ISD::FNEG) 3815*0b57cec5SDimitry Andric MHS = MHS.getOperand(0); 3816*0b57cec5SDimitry Andric else 3817*0b57cec5SDimitry Andric MHS = DAG.getNode(ISD::FNEG, SL, VT, MHS); 3818*0b57cec5SDimitry Andric 3819*0b57cec5SDimitry Andric if (RHS.getOpcode() != ISD::FNEG) 3820*0b57cec5SDimitry Andric RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 3821*0b57cec5SDimitry Andric else 3822*0b57cec5SDimitry Andric RHS = RHS.getOperand(0); 3823*0b57cec5SDimitry Andric 3824*0b57cec5SDimitry Andric SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS); 3825*0b57cec5SDimitry Andric if (Res.getOpcode() != Opc) 3826*0b57cec5SDimitry Andric return SDValue(); // Op got folded away. 3827*0b57cec5SDimitry Andric if (!N0.hasOneUse()) 3828*0b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); 3829*0b57cec5SDimitry Andric return Res; 3830*0b57cec5SDimitry Andric } 3831*0b57cec5SDimitry Andric case ISD::FMAXNUM: 3832*0b57cec5SDimitry Andric case ISD::FMINNUM: 3833*0b57cec5SDimitry Andric case ISD::FMAXNUM_IEEE: 3834*0b57cec5SDimitry Andric case ISD::FMINNUM_IEEE: 3835*0b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY: 3836*0b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY: { 3837*0b57cec5SDimitry Andric // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y) 3838*0b57cec5SDimitry Andric // fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y) 3839*0b57cec5SDimitry Andric // fneg (fmax_legacy x, y) -> fmin_legacy (fneg x), (fneg y) 3840*0b57cec5SDimitry Andric // fneg (fmin_legacy x, y) -> fmax_legacy (fneg x), (fneg y) 3841*0b57cec5SDimitry Andric 3842*0b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0); 3843*0b57cec5SDimitry Andric SDValue RHS = N0.getOperand(1); 3844*0b57cec5SDimitry Andric 3845*0b57cec5SDimitry Andric // 0 doesn't have a negated inline immediate. 3846*0b57cec5SDimitry Andric // TODO: This constant check should be generalized to other operations. 3847*0b57cec5SDimitry Andric if (isConstantCostlierToNegate(RHS)) 3848*0b57cec5SDimitry Andric return SDValue(); 3849*0b57cec5SDimitry Andric 3850*0b57cec5SDimitry Andric SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS); 3851*0b57cec5SDimitry Andric SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 3852*0b57cec5SDimitry Andric unsigned Opposite = inverseMinMax(Opc); 3853*0b57cec5SDimitry Andric 3854*0b57cec5SDimitry Andric SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags()); 3855*0b57cec5SDimitry Andric if (Res.getOpcode() != Opposite) 3856*0b57cec5SDimitry Andric return SDValue(); // Op got folded away. 3857*0b57cec5SDimitry Andric if (!N0.hasOneUse()) 3858*0b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); 3859*0b57cec5SDimitry Andric return Res; 3860*0b57cec5SDimitry Andric } 3861*0b57cec5SDimitry Andric case AMDGPUISD::FMED3: { 3862*0b57cec5SDimitry Andric SDValue Ops[3]; 3863*0b57cec5SDimitry Andric for (unsigned I = 0; I < 3; ++I) 3864*0b57cec5SDimitry Andric Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags()); 3865*0b57cec5SDimitry Andric 3866*0b57cec5SDimitry Andric SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags()); 3867*0b57cec5SDimitry Andric if (Res.getOpcode() != AMDGPUISD::FMED3) 3868*0b57cec5SDimitry Andric return SDValue(); // Op got folded away. 3869*0b57cec5SDimitry Andric if (!N0.hasOneUse()) 3870*0b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); 3871*0b57cec5SDimitry Andric return Res; 3872*0b57cec5SDimitry Andric } 3873*0b57cec5SDimitry Andric case ISD::FP_EXTEND: 3874*0b57cec5SDimitry Andric case ISD::FTRUNC: 3875*0b57cec5SDimitry Andric case ISD::FRINT: 3876*0b57cec5SDimitry Andric case ISD::FNEARBYINT: // XXX - Should fround be handled? 3877*0b57cec5SDimitry Andric case ISD::FSIN: 3878*0b57cec5SDimitry Andric case ISD::FCANONICALIZE: 3879*0b57cec5SDimitry Andric case AMDGPUISD::RCP: 3880*0b57cec5SDimitry Andric case AMDGPUISD::RCP_LEGACY: 3881*0b57cec5SDimitry Andric case AMDGPUISD::RCP_IFLAG: 3882*0b57cec5SDimitry Andric case AMDGPUISD::SIN_HW: { 3883*0b57cec5SDimitry Andric SDValue CvtSrc = N0.getOperand(0); 3884*0b57cec5SDimitry Andric if (CvtSrc.getOpcode() == ISD::FNEG) { 3885*0b57cec5SDimitry Andric // (fneg (fp_extend (fneg x))) -> (fp_extend x) 3886*0b57cec5SDimitry Andric // (fneg (rcp (fneg x))) -> (rcp x) 3887*0b57cec5SDimitry Andric return DAG.getNode(Opc, SL, VT, CvtSrc.getOperand(0)); 3888*0b57cec5SDimitry Andric } 3889*0b57cec5SDimitry Andric 3890*0b57cec5SDimitry Andric if (!N0.hasOneUse()) 3891*0b57cec5SDimitry Andric return SDValue(); 3892*0b57cec5SDimitry Andric 3893*0b57cec5SDimitry Andric // (fneg (fp_extend x)) -> (fp_extend (fneg x)) 3894*0b57cec5SDimitry Andric // (fneg (rcp x)) -> (rcp (fneg x)) 3895*0b57cec5SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc); 3896*0b57cec5SDimitry Andric return DAG.getNode(Opc, SL, VT, Neg, N0->getFlags()); 3897*0b57cec5SDimitry Andric } 3898*0b57cec5SDimitry Andric case ISD::FP_ROUND: { 3899*0b57cec5SDimitry Andric SDValue CvtSrc = N0.getOperand(0); 3900*0b57cec5SDimitry Andric 3901*0b57cec5SDimitry Andric if (CvtSrc.getOpcode() == ISD::FNEG) { 3902*0b57cec5SDimitry Andric // (fneg (fp_round (fneg x))) -> (fp_round x) 3903*0b57cec5SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT, 3904*0b57cec5SDimitry Andric CvtSrc.getOperand(0), N0.getOperand(1)); 3905*0b57cec5SDimitry Andric } 3906*0b57cec5SDimitry Andric 3907*0b57cec5SDimitry Andric if (!N0.hasOneUse()) 3908*0b57cec5SDimitry Andric return SDValue(); 3909*0b57cec5SDimitry Andric 3910*0b57cec5SDimitry Andric // (fneg (fp_round x)) -> (fp_round (fneg x)) 3911*0b57cec5SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc); 3912*0b57cec5SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1)); 3913*0b57cec5SDimitry Andric } 3914*0b57cec5SDimitry Andric case ISD::FP16_TO_FP: { 3915*0b57cec5SDimitry Andric // v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal 3916*0b57cec5SDimitry Andric // f16, but legalization of f16 fneg ends up pulling it out of the source. 3917*0b57cec5SDimitry Andric // Put the fneg back as a legal source operation that can be matched later. 3918*0b57cec5SDimitry Andric SDLoc SL(N); 3919*0b57cec5SDimitry Andric 3920*0b57cec5SDimitry Andric SDValue Src = N0.getOperand(0); 3921*0b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 3922*0b57cec5SDimitry Andric 3923*0b57cec5SDimitry Andric // fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000) 3924*0b57cec5SDimitry Andric SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src, 3925*0b57cec5SDimitry Andric DAG.getConstant(0x8000, SL, SrcVT)); 3926*0b57cec5SDimitry Andric return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg); 3927*0b57cec5SDimitry Andric } 3928*0b57cec5SDimitry Andric default: 3929*0b57cec5SDimitry Andric return SDValue(); 3930*0b57cec5SDimitry Andric } 3931*0b57cec5SDimitry Andric } 3932*0b57cec5SDimitry Andric 3933*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N, 3934*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3935*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3936*0b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 3937*0b57cec5SDimitry Andric 3938*0b57cec5SDimitry Andric if (!N0.hasOneUse()) 3939*0b57cec5SDimitry Andric return SDValue(); 3940*0b57cec5SDimitry Andric 3941*0b57cec5SDimitry Andric switch (N0.getOpcode()) { 3942*0b57cec5SDimitry Andric case ISD::FP16_TO_FP: { 3943*0b57cec5SDimitry Andric assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal"); 3944*0b57cec5SDimitry Andric SDLoc SL(N); 3945*0b57cec5SDimitry Andric SDValue Src = N0.getOperand(0); 3946*0b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 3947*0b57cec5SDimitry Andric 3948*0b57cec5SDimitry Andric // fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff) 3949*0b57cec5SDimitry Andric SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src, 3950*0b57cec5SDimitry Andric DAG.getConstant(0x7fff, SL, SrcVT)); 3951*0b57cec5SDimitry Andric return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs); 3952*0b57cec5SDimitry Andric } 3953*0b57cec5SDimitry Andric default: 3954*0b57cec5SDimitry Andric return SDValue(); 3955*0b57cec5SDimitry Andric } 3956*0b57cec5SDimitry Andric } 3957*0b57cec5SDimitry Andric 3958*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N, 3959*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3960*0b57cec5SDimitry Andric const auto *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)); 3961*0b57cec5SDimitry Andric if (!CFP) 3962*0b57cec5SDimitry Andric return SDValue(); 3963*0b57cec5SDimitry Andric 3964*0b57cec5SDimitry Andric // XXX - Should this flush denormals? 3965*0b57cec5SDimitry Andric const APFloat &Val = CFP->getValueAPF(); 3966*0b57cec5SDimitry Andric APFloat One(Val.getSemantics(), "1.0"); 3967*0b57cec5SDimitry Andric return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0)); 3968*0b57cec5SDimitry Andric } 3969*0b57cec5SDimitry Andric 3970*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, 3971*0b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 3972*0b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 3973*0b57cec5SDimitry Andric SDLoc DL(N); 3974*0b57cec5SDimitry Andric 3975*0b57cec5SDimitry Andric switch(N->getOpcode()) { 3976*0b57cec5SDimitry Andric default: 3977*0b57cec5SDimitry Andric break; 3978*0b57cec5SDimitry Andric case ISD::BITCAST: { 3979*0b57cec5SDimitry Andric EVT DestVT = N->getValueType(0); 3980*0b57cec5SDimitry Andric 3981*0b57cec5SDimitry Andric // Push casts through vector builds. This helps avoid emitting a large 3982*0b57cec5SDimitry Andric // number of copies when materializing floating point vector constants. 3983*0b57cec5SDimitry Andric // 3984*0b57cec5SDimitry Andric // vNt1 bitcast (vNt0 (build_vector t0:x, t0:y)) => 3985*0b57cec5SDimitry Andric // vnt1 = build_vector (t1 (bitcast t0:x)), (t1 (bitcast t0:y)) 3986*0b57cec5SDimitry Andric if (DestVT.isVector()) { 3987*0b57cec5SDimitry Andric SDValue Src = N->getOperand(0); 3988*0b57cec5SDimitry Andric if (Src.getOpcode() == ISD::BUILD_VECTOR) { 3989*0b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 3990*0b57cec5SDimitry Andric unsigned NElts = DestVT.getVectorNumElements(); 3991*0b57cec5SDimitry Andric 3992*0b57cec5SDimitry Andric if (SrcVT.getVectorNumElements() == NElts) { 3993*0b57cec5SDimitry Andric EVT DestEltVT = DestVT.getVectorElementType(); 3994*0b57cec5SDimitry Andric 3995*0b57cec5SDimitry Andric SmallVector<SDValue, 8> CastedElts; 3996*0b57cec5SDimitry Andric SDLoc SL(N); 3997*0b57cec5SDimitry Andric for (unsigned I = 0, E = SrcVT.getVectorNumElements(); I != E; ++I) { 3998*0b57cec5SDimitry Andric SDValue Elt = Src.getOperand(I); 3999*0b57cec5SDimitry Andric CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt)); 4000*0b57cec5SDimitry Andric } 4001*0b57cec5SDimitry Andric 4002*0b57cec5SDimitry Andric return DAG.getBuildVector(DestVT, SL, CastedElts); 4003*0b57cec5SDimitry Andric } 4004*0b57cec5SDimitry Andric } 4005*0b57cec5SDimitry Andric } 4006*0b57cec5SDimitry Andric 4007*0b57cec5SDimitry Andric if (DestVT.getSizeInBits() != 64 && !DestVT.isVector()) 4008*0b57cec5SDimitry Andric break; 4009*0b57cec5SDimitry Andric 4010*0b57cec5SDimitry Andric // Fold bitcasts of constants. 4011*0b57cec5SDimitry Andric // 4012*0b57cec5SDimitry Andric // v2i32 (bitcast i64:k) -> build_vector lo_32(k), hi_32(k) 4013*0b57cec5SDimitry Andric // TODO: Generalize and move to DAGCombiner 4014*0b57cec5SDimitry Andric SDValue Src = N->getOperand(0); 4015*0b57cec5SDimitry Andric if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src)) { 4016*0b57cec5SDimitry Andric if (Src.getValueType() == MVT::i64) { 4017*0b57cec5SDimitry Andric SDLoc SL(N); 4018*0b57cec5SDimitry Andric uint64_t CVal = C->getZExtValue(); 4019*0b57cec5SDimitry Andric SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, 4020*0b57cec5SDimitry Andric DAG.getConstant(Lo_32(CVal), SL, MVT::i32), 4021*0b57cec5SDimitry Andric DAG.getConstant(Hi_32(CVal), SL, MVT::i32)); 4022*0b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, DestVT, BV); 4023*0b57cec5SDimitry Andric } 4024*0b57cec5SDimitry Andric } 4025*0b57cec5SDimitry Andric 4026*0b57cec5SDimitry Andric if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Src)) { 4027*0b57cec5SDimitry Andric const APInt &Val = C->getValueAPF().bitcastToAPInt(); 4028*0b57cec5SDimitry Andric SDLoc SL(N); 4029*0b57cec5SDimitry Andric uint64_t CVal = Val.getZExtValue(); 4030*0b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, 4031*0b57cec5SDimitry Andric DAG.getConstant(Lo_32(CVal), SL, MVT::i32), 4032*0b57cec5SDimitry Andric DAG.getConstant(Hi_32(CVal), SL, MVT::i32)); 4033*0b57cec5SDimitry Andric 4034*0b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec); 4035*0b57cec5SDimitry Andric } 4036*0b57cec5SDimitry Andric 4037*0b57cec5SDimitry Andric break; 4038*0b57cec5SDimitry Andric } 4039*0b57cec5SDimitry Andric case ISD::SHL: { 4040*0b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) 4041*0b57cec5SDimitry Andric break; 4042*0b57cec5SDimitry Andric 4043*0b57cec5SDimitry Andric return performShlCombine(N, DCI); 4044*0b57cec5SDimitry Andric } 4045*0b57cec5SDimitry Andric case ISD::SRL: { 4046*0b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) 4047*0b57cec5SDimitry Andric break; 4048*0b57cec5SDimitry Andric 4049*0b57cec5SDimitry Andric return performSrlCombine(N, DCI); 4050*0b57cec5SDimitry Andric } 4051*0b57cec5SDimitry Andric case ISD::SRA: { 4052*0b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) 4053*0b57cec5SDimitry Andric break; 4054*0b57cec5SDimitry Andric 4055*0b57cec5SDimitry Andric return performSraCombine(N, DCI); 4056*0b57cec5SDimitry Andric } 4057*0b57cec5SDimitry Andric case ISD::TRUNCATE: 4058*0b57cec5SDimitry Andric return performTruncateCombine(N, DCI); 4059*0b57cec5SDimitry Andric case ISD::MUL: 4060*0b57cec5SDimitry Andric return performMulCombine(N, DCI); 4061*0b57cec5SDimitry Andric case ISD::MULHS: 4062*0b57cec5SDimitry Andric return performMulhsCombine(N, DCI); 4063*0b57cec5SDimitry Andric case ISD::MULHU: 4064*0b57cec5SDimitry Andric return performMulhuCombine(N, DCI); 4065*0b57cec5SDimitry Andric case AMDGPUISD::MUL_I24: 4066*0b57cec5SDimitry Andric case AMDGPUISD::MUL_U24: 4067*0b57cec5SDimitry Andric case AMDGPUISD::MULHI_I24: 4068*0b57cec5SDimitry Andric case AMDGPUISD::MULHI_U24: { 4069*0b57cec5SDimitry Andric if (SDValue V = simplifyI24(N, DCI)) 4070*0b57cec5SDimitry Andric return V; 4071*0b57cec5SDimitry Andric return SDValue(); 4072*0b57cec5SDimitry Andric } 4073*0b57cec5SDimitry Andric case AMDGPUISD::MUL_LOHI_I24: 4074*0b57cec5SDimitry Andric case AMDGPUISD::MUL_LOHI_U24: 4075*0b57cec5SDimitry Andric return performMulLoHi24Combine(N, DCI); 4076*0b57cec5SDimitry Andric case ISD::SELECT: 4077*0b57cec5SDimitry Andric return performSelectCombine(N, DCI); 4078*0b57cec5SDimitry Andric case ISD::FNEG: 4079*0b57cec5SDimitry Andric return performFNegCombine(N, DCI); 4080*0b57cec5SDimitry Andric case ISD::FABS: 4081*0b57cec5SDimitry Andric return performFAbsCombine(N, DCI); 4082*0b57cec5SDimitry Andric case AMDGPUISD::BFE_I32: 4083*0b57cec5SDimitry Andric case AMDGPUISD::BFE_U32: { 4084*0b57cec5SDimitry Andric assert(!N->getValueType(0).isVector() && 4085*0b57cec5SDimitry Andric "Vector handling of BFE not implemented"); 4086*0b57cec5SDimitry Andric ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 4087*0b57cec5SDimitry Andric if (!Width) 4088*0b57cec5SDimitry Andric break; 4089*0b57cec5SDimitry Andric 4090*0b57cec5SDimitry Andric uint32_t WidthVal = Width->getZExtValue() & 0x1f; 4091*0b57cec5SDimitry Andric if (WidthVal == 0) 4092*0b57cec5SDimitry Andric return DAG.getConstant(0, DL, MVT::i32); 4093*0b57cec5SDimitry Andric 4094*0b57cec5SDimitry Andric ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 4095*0b57cec5SDimitry Andric if (!Offset) 4096*0b57cec5SDimitry Andric break; 4097*0b57cec5SDimitry Andric 4098*0b57cec5SDimitry Andric SDValue BitsFrom = N->getOperand(0); 4099*0b57cec5SDimitry Andric uint32_t OffsetVal = Offset->getZExtValue() & 0x1f; 4100*0b57cec5SDimitry Andric 4101*0b57cec5SDimitry Andric bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32; 4102*0b57cec5SDimitry Andric 4103*0b57cec5SDimitry Andric if (OffsetVal == 0) { 4104*0b57cec5SDimitry Andric // This is already sign / zero extended, so try to fold away extra BFEs. 4105*0b57cec5SDimitry Andric unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal); 4106*0b57cec5SDimitry Andric 4107*0b57cec5SDimitry Andric unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom); 4108*0b57cec5SDimitry Andric if (OpSignBits >= SignBits) 4109*0b57cec5SDimitry Andric return BitsFrom; 4110*0b57cec5SDimitry Andric 4111*0b57cec5SDimitry Andric EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal); 4112*0b57cec5SDimitry Andric if (Signed) { 4113*0b57cec5SDimitry Andric // This is a sign_extend_inreg. Replace it to take advantage of existing 4114*0b57cec5SDimitry Andric // DAG Combines. If not eliminated, we will match back to BFE during 4115*0b57cec5SDimitry Andric // selection. 4116*0b57cec5SDimitry Andric 4117*0b57cec5SDimitry Andric // TODO: The sext_inreg of extended types ends, although we can could 4118*0b57cec5SDimitry Andric // handle them in a single BFE. 4119*0b57cec5SDimitry Andric return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom, 4120*0b57cec5SDimitry Andric DAG.getValueType(SmallVT)); 4121*0b57cec5SDimitry Andric } 4122*0b57cec5SDimitry Andric 4123*0b57cec5SDimitry Andric return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT); 4124*0b57cec5SDimitry Andric } 4125*0b57cec5SDimitry Andric 4126*0b57cec5SDimitry Andric if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) { 4127*0b57cec5SDimitry Andric if (Signed) { 4128*0b57cec5SDimitry Andric return constantFoldBFE<int32_t>(DAG, 4129*0b57cec5SDimitry Andric CVal->getSExtValue(), 4130*0b57cec5SDimitry Andric OffsetVal, 4131*0b57cec5SDimitry Andric WidthVal, 4132*0b57cec5SDimitry Andric DL); 4133*0b57cec5SDimitry Andric } 4134*0b57cec5SDimitry Andric 4135*0b57cec5SDimitry Andric return constantFoldBFE<uint32_t>(DAG, 4136*0b57cec5SDimitry Andric CVal->getZExtValue(), 4137*0b57cec5SDimitry Andric OffsetVal, 4138*0b57cec5SDimitry Andric WidthVal, 4139*0b57cec5SDimitry Andric DL); 4140*0b57cec5SDimitry Andric } 4141*0b57cec5SDimitry Andric 4142*0b57cec5SDimitry Andric if ((OffsetVal + WidthVal) >= 32 && 4143*0b57cec5SDimitry Andric !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) { 4144*0b57cec5SDimitry Andric SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32); 4145*0b57cec5SDimitry Andric return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32, 4146*0b57cec5SDimitry Andric BitsFrom, ShiftVal); 4147*0b57cec5SDimitry Andric } 4148*0b57cec5SDimitry Andric 4149*0b57cec5SDimitry Andric if (BitsFrom.hasOneUse()) { 4150*0b57cec5SDimitry Andric APInt Demanded = APInt::getBitsSet(32, 4151*0b57cec5SDimitry Andric OffsetVal, 4152*0b57cec5SDimitry Andric OffsetVal + WidthVal); 4153*0b57cec5SDimitry Andric 4154*0b57cec5SDimitry Andric KnownBits Known; 4155*0b57cec5SDimitry Andric TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), 4156*0b57cec5SDimitry Andric !DCI.isBeforeLegalizeOps()); 4157*0b57cec5SDimitry Andric const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 4158*0b57cec5SDimitry Andric if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) || 4159*0b57cec5SDimitry Andric TLI.SimplifyDemandedBits(BitsFrom, Demanded, Known, TLO)) { 4160*0b57cec5SDimitry Andric DCI.CommitTargetLoweringOpt(TLO); 4161*0b57cec5SDimitry Andric } 4162*0b57cec5SDimitry Andric } 4163*0b57cec5SDimitry Andric 4164*0b57cec5SDimitry Andric break; 4165*0b57cec5SDimitry Andric } 4166*0b57cec5SDimitry Andric case ISD::LOAD: 4167*0b57cec5SDimitry Andric return performLoadCombine(N, DCI); 4168*0b57cec5SDimitry Andric case ISD::STORE: 4169*0b57cec5SDimitry Andric return performStoreCombine(N, DCI); 4170*0b57cec5SDimitry Andric case AMDGPUISD::RCP: 4171*0b57cec5SDimitry Andric case AMDGPUISD::RCP_IFLAG: 4172*0b57cec5SDimitry Andric return performRcpCombine(N, DCI); 4173*0b57cec5SDimitry Andric case ISD::AssertZext: 4174*0b57cec5SDimitry Andric case ISD::AssertSext: 4175*0b57cec5SDimitry Andric return performAssertSZExtCombine(N, DCI); 4176*0b57cec5SDimitry Andric } 4177*0b57cec5SDimitry Andric return SDValue(); 4178*0b57cec5SDimitry Andric } 4179*0b57cec5SDimitry Andric 4180*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 4181*0b57cec5SDimitry Andric // Helper functions 4182*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 4183*0b57cec5SDimitry Andric 4184*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 4185*0b57cec5SDimitry Andric const TargetRegisterClass *RC, 4186*0b57cec5SDimitry Andric unsigned Reg, EVT VT, 4187*0b57cec5SDimitry Andric const SDLoc &SL, 4188*0b57cec5SDimitry Andric bool RawReg) const { 4189*0b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 4190*0b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 4191*0b57cec5SDimitry Andric unsigned VReg; 4192*0b57cec5SDimitry Andric 4193*0b57cec5SDimitry Andric if (!MRI.isLiveIn(Reg)) { 4194*0b57cec5SDimitry Andric VReg = MRI.createVirtualRegister(RC); 4195*0b57cec5SDimitry Andric MRI.addLiveIn(Reg, VReg); 4196*0b57cec5SDimitry Andric } else { 4197*0b57cec5SDimitry Andric VReg = MRI.getLiveInVirtReg(Reg); 4198*0b57cec5SDimitry Andric } 4199*0b57cec5SDimitry Andric 4200*0b57cec5SDimitry Andric if (RawReg) 4201*0b57cec5SDimitry Andric return DAG.getRegister(VReg, VT); 4202*0b57cec5SDimitry Andric 4203*0b57cec5SDimitry Andric return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT); 4204*0b57cec5SDimitry Andric } 4205*0b57cec5SDimitry Andric 4206*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG, 4207*0b57cec5SDimitry Andric EVT VT, 4208*0b57cec5SDimitry Andric const SDLoc &SL, 4209*0b57cec5SDimitry Andric int64_t Offset) const { 4210*0b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 4211*0b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 4212*0b57cec5SDimitry Andric 4213*0b57cec5SDimitry Andric int FI = MFI.CreateFixedObject(VT.getStoreSize(), Offset, true); 4214*0b57cec5SDimitry Andric auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset); 4215*0b57cec5SDimitry Andric SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32); 4216*0b57cec5SDimitry Andric 4217*0b57cec5SDimitry Andric return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, 4, 4218*0b57cec5SDimitry Andric MachineMemOperand::MODereferenceable | 4219*0b57cec5SDimitry Andric MachineMemOperand::MOInvariant); 4220*0b57cec5SDimitry Andric } 4221*0b57cec5SDimitry Andric 4222*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG, 4223*0b57cec5SDimitry Andric const SDLoc &SL, 4224*0b57cec5SDimitry Andric SDValue Chain, 4225*0b57cec5SDimitry Andric SDValue ArgVal, 4226*0b57cec5SDimitry Andric int64_t Offset) const { 4227*0b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 4228*0b57cec5SDimitry Andric MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset); 4229*0b57cec5SDimitry Andric 4230*0b57cec5SDimitry Andric SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32); 4231*0b57cec5SDimitry Andric SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, 4, 4232*0b57cec5SDimitry Andric MachineMemOperand::MODereferenceable); 4233*0b57cec5SDimitry Andric return Store; 4234*0b57cec5SDimitry Andric } 4235*0b57cec5SDimitry Andric 4236*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG, 4237*0b57cec5SDimitry Andric const TargetRegisterClass *RC, 4238*0b57cec5SDimitry Andric EVT VT, const SDLoc &SL, 4239*0b57cec5SDimitry Andric const ArgDescriptor &Arg) const { 4240*0b57cec5SDimitry Andric assert(Arg && "Attempting to load missing argument"); 4241*0b57cec5SDimitry Andric 4242*0b57cec5SDimitry Andric SDValue V = Arg.isRegister() ? 4243*0b57cec5SDimitry Andric CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL) : 4244*0b57cec5SDimitry Andric loadStackInputValue(DAG, VT, SL, Arg.getStackOffset()); 4245*0b57cec5SDimitry Andric 4246*0b57cec5SDimitry Andric if (!Arg.isMasked()) 4247*0b57cec5SDimitry Andric return V; 4248*0b57cec5SDimitry Andric 4249*0b57cec5SDimitry Andric unsigned Mask = Arg.getMask(); 4250*0b57cec5SDimitry Andric unsigned Shift = countTrailingZeros<unsigned>(Mask); 4251*0b57cec5SDimitry Andric V = DAG.getNode(ISD::SRL, SL, VT, V, 4252*0b57cec5SDimitry Andric DAG.getShiftAmountConstant(Shift, VT, SL)); 4253*0b57cec5SDimitry Andric return DAG.getNode(ISD::AND, SL, VT, V, 4254*0b57cec5SDimitry Andric DAG.getConstant(Mask >> Shift, SL, VT)); 4255*0b57cec5SDimitry Andric } 4256*0b57cec5SDimitry Andric 4257*0b57cec5SDimitry Andric uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( 4258*0b57cec5SDimitry Andric const MachineFunction &MF, const ImplicitParameter Param) const { 4259*0b57cec5SDimitry Andric const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>(); 4260*0b57cec5SDimitry Andric const AMDGPUSubtarget &ST = 4261*0b57cec5SDimitry Andric AMDGPUSubtarget::get(getTargetMachine(), MF.getFunction()); 4262*0b57cec5SDimitry Andric unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction()); 4263*0b57cec5SDimitry Andric unsigned Alignment = ST.getAlignmentForImplicitArgPtr(); 4264*0b57cec5SDimitry Andric uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) + 4265*0b57cec5SDimitry Andric ExplicitArgOffset; 4266*0b57cec5SDimitry Andric switch (Param) { 4267*0b57cec5SDimitry Andric case GRID_DIM: 4268*0b57cec5SDimitry Andric return ArgOffset; 4269*0b57cec5SDimitry Andric case GRID_OFFSET: 4270*0b57cec5SDimitry Andric return ArgOffset + 4; 4271*0b57cec5SDimitry Andric } 4272*0b57cec5SDimitry Andric llvm_unreachable("unexpected implicit parameter type"); 4273*0b57cec5SDimitry Andric } 4274*0b57cec5SDimitry Andric 4275*0b57cec5SDimitry Andric #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 4276*0b57cec5SDimitry Andric 4277*0b57cec5SDimitry Andric const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { 4278*0b57cec5SDimitry Andric switch ((AMDGPUISD::NodeType)Opcode) { 4279*0b57cec5SDimitry Andric case AMDGPUISD::FIRST_NUMBER: break; 4280*0b57cec5SDimitry Andric // AMDIL DAG nodes 4281*0b57cec5SDimitry Andric NODE_NAME_CASE(UMUL); 4282*0b57cec5SDimitry Andric NODE_NAME_CASE(BRANCH_COND); 4283*0b57cec5SDimitry Andric 4284*0b57cec5SDimitry Andric // AMDGPU DAG nodes 4285*0b57cec5SDimitry Andric NODE_NAME_CASE(IF) 4286*0b57cec5SDimitry Andric NODE_NAME_CASE(ELSE) 4287*0b57cec5SDimitry Andric NODE_NAME_CASE(LOOP) 4288*0b57cec5SDimitry Andric NODE_NAME_CASE(CALL) 4289*0b57cec5SDimitry Andric NODE_NAME_CASE(TC_RETURN) 4290*0b57cec5SDimitry Andric NODE_NAME_CASE(TRAP) 4291*0b57cec5SDimitry Andric NODE_NAME_CASE(RET_FLAG) 4292*0b57cec5SDimitry Andric NODE_NAME_CASE(RETURN_TO_EPILOG) 4293*0b57cec5SDimitry Andric NODE_NAME_CASE(ENDPGM) 4294*0b57cec5SDimitry Andric NODE_NAME_CASE(DWORDADDR) 4295*0b57cec5SDimitry Andric NODE_NAME_CASE(FRACT) 4296*0b57cec5SDimitry Andric NODE_NAME_CASE(SETCC) 4297*0b57cec5SDimitry Andric NODE_NAME_CASE(SETREG) 4298*0b57cec5SDimitry Andric NODE_NAME_CASE(FMA_W_CHAIN) 4299*0b57cec5SDimitry Andric NODE_NAME_CASE(FMUL_W_CHAIN) 4300*0b57cec5SDimitry Andric NODE_NAME_CASE(CLAMP) 4301*0b57cec5SDimitry Andric NODE_NAME_CASE(COS_HW) 4302*0b57cec5SDimitry Andric NODE_NAME_CASE(SIN_HW) 4303*0b57cec5SDimitry Andric NODE_NAME_CASE(FMAX_LEGACY) 4304*0b57cec5SDimitry Andric NODE_NAME_CASE(FMIN_LEGACY) 4305*0b57cec5SDimitry Andric NODE_NAME_CASE(FMAX3) 4306*0b57cec5SDimitry Andric NODE_NAME_CASE(SMAX3) 4307*0b57cec5SDimitry Andric NODE_NAME_CASE(UMAX3) 4308*0b57cec5SDimitry Andric NODE_NAME_CASE(FMIN3) 4309*0b57cec5SDimitry Andric NODE_NAME_CASE(SMIN3) 4310*0b57cec5SDimitry Andric NODE_NAME_CASE(UMIN3) 4311*0b57cec5SDimitry Andric NODE_NAME_CASE(FMED3) 4312*0b57cec5SDimitry Andric NODE_NAME_CASE(SMED3) 4313*0b57cec5SDimitry Andric NODE_NAME_CASE(UMED3) 4314*0b57cec5SDimitry Andric NODE_NAME_CASE(FDOT2) 4315*0b57cec5SDimitry Andric NODE_NAME_CASE(URECIP) 4316*0b57cec5SDimitry Andric NODE_NAME_CASE(DIV_SCALE) 4317*0b57cec5SDimitry Andric NODE_NAME_CASE(DIV_FMAS) 4318*0b57cec5SDimitry Andric NODE_NAME_CASE(DIV_FIXUP) 4319*0b57cec5SDimitry Andric NODE_NAME_CASE(FMAD_FTZ) 4320*0b57cec5SDimitry Andric NODE_NAME_CASE(TRIG_PREOP) 4321*0b57cec5SDimitry Andric NODE_NAME_CASE(RCP) 4322*0b57cec5SDimitry Andric NODE_NAME_CASE(RSQ) 4323*0b57cec5SDimitry Andric NODE_NAME_CASE(RCP_LEGACY) 4324*0b57cec5SDimitry Andric NODE_NAME_CASE(RSQ_LEGACY) 4325*0b57cec5SDimitry Andric NODE_NAME_CASE(RCP_IFLAG) 4326*0b57cec5SDimitry Andric NODE_NAME_CASE(FMUL_LEGACY) 4327*0b57cec5SDimitry Andric NODE_NAME_CASE(RSQ_CLAMP) 4328*0b57cec5SDimitry Andric NODE_NAME_CASE(LDEXP) 4329*0b57cec5SDimitry Andric NODE_NAME_CASE(FP_CLASS) 4330*0b57cec5SDimitry Andric NODE_NAME_CASE(DOT4) 4331*0b57cec5SDimitry Andric NODE_NAME_CASE(CARRY) 4332*0b57cec5SDimitry Andric NODE_NAME_CASE(BORROW) 4333*0b57cec5SDimitry Andric NODE_NAME_CASE(BFE_U32) 4334*0b57cec5SDimitry Andric NODE_NAME_CASE(BFE_I32) 4335*0b57cec5SDimitry Andric NODE_NAME_CASE(BFI) 4336*0b57cec5SDimitry Andric NODE_NAME_CASE(BFM) 4337*0b57cec5SDimitry Andric NODE_NAME_CASE(FFBH_U32) 4338*0b57cec5SDimitry Andric NODE_NAME_CASE(FFBH_I32) 4339*0b57cec5SDimitry Andric NODE_NAME_CASE(FFBL_B32) 4340*0b57cec5SDimitry Andric NODE_NAME_CASE(MUL_U24) 4341*0b57cec5SDimitry Andric NODE_NAME_CASE(MUL_I24) 4342*0b57cec5SDimitry Andric NODE_NAME_CASE(MULHI_U24) 4343*0b57cec5SDimitry Andric NODE_NAME_CASE(MULHI_I24) 4344*0b57cec5SDimitry Andric NODE_NAME_CASE(MUL_LOHI_U24) 4345*0b57cec5SDimitry Andric NODE_NAME_CASE(MUL_LOHI_I24) 4346*0b57cec5SDimitry Andric NODE_NAME_CASE(MAD_U24) 4347*0b57cec5SDimitry Andric NODE_NAME_CASE(MAD_I24) 4348*0b57cec5SDimitry Andric NODE_NAME_CASE(MAD_I64_I32) 4349*0b57cec5SDimitry Andric NODE_NAME_CASE(MAD_U64_U32) 4350*0b57cec5SDimitry Andric NODE_NAME_CASE(PERM) 4351*0b57cec5SDimitry Andric NODE_NAME_CASE(TEXTURE_FETCH) 4352*0b57cec5SDimitry Andric NODE_NAME_CASE(EXPORT) 4353*0b57cec5SDimitry Andric NODE_NAME_CASE(EXPORT_DONE) 4354*0b57cec5SDimitry Andric NODE_NAME_CASE(R600_EXPORT) 4355*0b57cec5SDimitry Andric NODE_NAME_CASE(CONST_ADDRESS) 4356*0b57cec5SDimitry Andric NODE_NAME_CASE(REGISTER_LOAD) 4357*0b57cec5SDimitry Andric NODE_NAME_CASE(REGISTER_STORE) 4358*0b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLE) 4359*0b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLEB) 4360*0b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLED) 4361*0b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLEL) 4362*0b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE0) 4363*0b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE1) 4364*0b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE2) 4365*0b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE3) 4366*0b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PKRTZ_F16_F32) 4367*0b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PKNORM_I16_F32) 4368*0b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PKNORM_U16_F32) 4369*0b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PK_I16_I32) 4370*0b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PK_U16_U32) 4371*0b57cec5SDimitry Andric NODE_NAME_CASE(FP_TO_FP16) 4372*0b57cec5SDimitry Andric NODE_NAME_CASE(FP16_ZEXT) 4373*0b57cec5SDimitry Andric NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) 4374*0b57cec5SDimitry Andric NODE_NAME_CASE(CONST_DATA_PTR) 4375*0b57cec5SDimitry Andric NODE_NAME_CASE(PC_ADD_REL_OFFSET) 4376*0b57cec5SDimitry Andric NODE_NAME_CASE(LDS) 4377*0b57cec5SDimitry Andric NODE_NAME_CASE(KILL) 4378*0b57cec5SDimitry Andric NODE_NAME_CASE(DUMMY_CHAIN) 4379*0b57cec5SDimitry Andric case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; 4380*0b57cec5SDimitry Andric NODE_NAME_CASE(INIT_EXEC) 4381*0b57cec5SDimitry Andric NODE_NAME_CASE(INIT_EXEC_FROM_INPUT) 4382*0b57cec5SDimitry Andric NODE_NAME_CASE(SENDMSG) 4383*0b57cec5SDimitry Andric NODE_NAME_CASE(SENDMSGHALT) 4384*0b57cec5SDimitry Andric NODE_NAME_CASE(INTERP_MOV) 4385*0b57cec5SDimitry Andric NODE_NAME_CASE(INTERP_P1) 4386*0b57cec5SDimitry Andric NODE_NAME_CASE(INTERP_P2) 4387*0b57cec5SDimitry Andric NODE_NAME_CASE(INTERP_P1LL_F16) 4388*0b57cec5SDimitry Andric NODE_NAME_CASE(INTERP_P1LV_F16) 4389*0b57cec5SDimitry Andric NODE_NAME_CASE(INTERP_P2_F16) 4390*0b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_HI) 4391*0b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_LO) 4392*0b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_HI_I8) 4393*0b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_HI_U8) 4394*0b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_LO_I8) 4395*0b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_LO_U8) 4396*0b57cec5SDimitry Andric NODE_NAME_CASE(STORE_MSKOR) 4397*0b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_CONSTANT) 4398*0b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_STORE_FORMAT) 4399*0b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16) 4400*0b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_LOAD_FORMAT) 4401*0b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16) 4402*0b57cec5SDimitry Andric NODE_NAME_CASE(DS_ORDERED_COUNT) 4403*0b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_CMP_SWAP) 4404*0b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_INC) 4405*0b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_DEC) 4406*0b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_LOAD_FMIN) 4407*0b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_LOAD_FMAX) 4408*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD) 4409*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_UBYTE) 4410*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_USHORT) 4411*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_BYTE) 4412*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_SHORT) 4413*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_FORMAT) 4414*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16) 4415*0b57cec5SDimitry Andric NODE_NAME_CASE(SBUFFER_LOAD) 4416*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE) 4417*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_BYTE) 4418*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_SHORT) 4419*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_FORMAT) 4420*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16) 4421*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SWAP) 4422*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_ADD) 4423*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SUB) 4424*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SMIN) 4425*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_UMIN) 4426*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SMAX) 4427*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_UMAX) 4428*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_AND) 4429*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_OR) 4430*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_XOR) 4431*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP) 4432*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_FADD) 4433*0b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_PK_FADD) 4434*0b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_FADD) 4435*0b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_PK_FADD) 4436*0b57cec5SDimitry Andric 4437*0b57cec5SDimitry Andric case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break; 4438*0b57cec5SDimitry Andric } 4439*0b57cec5SDimitry Andric return nullptr; 4440*0b57cec5SDimitry Andric } 4441*0b57cec5SDimitry Andric 4442*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand, 4443*0b57cec5SDimitry Andric SelectionDAG &DAG, int Enabled, 4444*0b57cec5SDimitry Andric int &RefinementSteps, 4445*0b57cec5SDimitry Andric bool &UseOneConstNR, 4446*0b57cec5SDimitry Andric bool Reciprocal) const { 4447*0b57cec5SDimitry Andric EVT VT = Operand.getValueType(); 4448*0b57cec5SDimitry Andric 4449*0b57cec5SDimitry Andric if (VT == MVT::f32) { 4450*0b57cec5SDimitry Andric RefinementSteps = 0; 4451*0b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand); 4452*0b57cec5SDimitry Andric } 4453*0b57cec5SDimitry Andric 4454*0b57cec5SDimitry Andric // TODO: There is also f64 rsq instruction, but the documentation is less 4455*0b57cec5SDimitry Andric // clear on its precision. 4456*0b57cec5SDimitry Andric 4457*0b57cec5SDimitry Andric return SDValue(); 4458*0b57cec5SDimitry Andric } 4459*0b57cec5SDimitry Andric 4460*0b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand, 4461*0b57cec5SDimitry Andric SelectionDAG &DAG, int Enabled, 4462*0b57cec5SDimitry Andric int &RefinementSteps) const { 4463*0b57cec5SDimitry Andric EVT VT = Operand.getValueType(); 4464*0b57cec5SDimitry Andric 4465*0b57cec5SDimitry Andric if (VT == MVT::f32) { 4466*0b57cec5SDimitry Andric // Reciprocal, < 1 ulp error. 4467*0b57cec5SDimitry Andric // 4468*0b57cec5SDimitry Andric // This reciprocal approximation converges to < 0.5 ulp error with one 4469*0b57cec5SDimitry Andric // newton rhapson performed with two fused multiple adds (FMAs). 4470*0b57cec5SDimitry Andric 4471*0b57cec5SDimitry Andric RefinementSteps = 0; 4472*0b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand); 4473*0b57cec5SDimitry Andric } 4474*0b57cec5SDimitry Andric 4475*0b57cec5SDimitry Andric // TODO: There is also f64 rcp instruction, but the documentation is less 4476*0b57cec5SDimitry Andric // clear on its precision. 4477*0b57cec5SDimitry Andric 4478*0b57cec5SDimitry Andric return SDValue(); 4479*0b57cec5SDimitry Andric } 4480*0b57cec5SDimitry Andric 4481*0b57cec5SDimitry Andric void AMDGPUTargetLowering::computeKnownBitsForTargetNode( 4482*0b57cec5SDimitry Andric const SDValue Op, KnownBits &Known, 4483*0b57cec5SDimitry Andric const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { 4484*0b57cec5SDimitry Andric 4485*0b57cec5SDimitry Andric Known.resetAll(); // Don't know anything. 4486*0b57cec5SDimitry Andric 4487*0b57cec5SDimitry Andric unsigned Opc = Op.getOpcode(); 4488*0b57cec5SDimitry Andric 4489*0b57cec5SDimitry Andric switch (Opc) { 4490*0b57cec5SDimitry Andric default: 4491*0b57cec5SDimitry Andric break; 4492*0b57cec5SDimitry Andric case AMDGPUISD::CARRY: 4493*0b57cec5SDimitry Andric case AMDGPUISD::BORROW: { 4494*0b57cec5SDimitry Andric Known.Zero = APInt::getHighBitsSet(32, 31); 4495*0b57cec5SDimitry Andric break; 4496*0b57cec5SDimitry Andric } 4497*0b57cec5SDimitry Andric 4498*0b57cec5SDimitry Andric case AMDGPUISD::BFE_I32: 4499*0b57cec5SDimitry Andric case AMDGPUISD::BFE_U32: { 4500*0b57cec5SDimitry Andric ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4501*0b57cec5SDimitry Andric if (!CWidth) 4502*0b57cec5SDimitry Andric return; 4503*0b57cec5SDimitry Andric 4504*0b57cec5SDimitry Andric uint32_t Width = CWidth->getZExtValue() & 0x1f; 4505*0b57cec5SDimitry Andric 4506*0b57cec5SDimitry Andric if (Opc == AMDGPUISD::BFE_U32) 4507*0b57cec5SDimitry Andric Known.Zero = APInt::getHighBitsSet(32, 32 - Width); 4508*0b57cec5SDimitry Andric 4509*0b57cec5SDimitry Andric break; 4510*0b57cec5SDimitry Andric } 4511*0b57cec5SDimitry Andric case AMDGPUISD::FP_TO_FP16: 4512*0b57cec5SDimitry Andric case AMDGPUISD::FP16_ZEXT: { 4513*0b57cec5SDimitry Andric unsigned BitWidth = Known.getBitWidth(); 4514*0b57cec5SDimitry Andric 4515*0b57cec5SDimitry Andric // High bits are zero. 4516*0b57cec5SDimitry Andric Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth - 16); 4517*0b57cec5SDimitry Andric break; 4518*0b57cec5SDimitry Andric } 4519*0b57cec5SDimitry Andric case AMDGPUISD::MUL_U24: 4520*0b57cec5SDimitry Andric case AMDGPUISD::MUL_I24: { 4521*0b57cec5SDimitry Andric KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 4522*0b57cec5SDimitry Andric KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); 4523*0b57cec5SDimitry Andric unsigned TrailZ = LHSKnown.countMinTrailingZeros() + 4524*0b57cec5SDimitry Andric RHSKnown.countMinTrailingZeros(); 4525*0b57cec5SDimitry Andric Known.Zero.setLowBits(std::min(TrailZ, 32u)); 4526*0b57cec5SDimitry Andric 4527*0b57cec5SDimitry Andric // Truncate to 24 bits. 4528*0b57cec5SDimitry Andric LHSKnown = LHSKnown.trunc(24); 4529*0b57cec5SDimitry Andric RHSKnown = RHSKnown.trunc(24); 4530*0b57cec5SDimitry Andric 4531*0b57cec5SDimitry Andric bool Negative = false; 4532*0b57cec5SDimitry Andric if (Opc == AMDGPUISD::MUL_I24) { 4533*0b57cec5SDimitry Andric unsigned LHSValBits = 24 - LHSKnown.countMinSignBits(); 4534*0b57cec5SDimitry Andric unsigned RHSValBits = 24 - RHSKnown.countMinSignBits(); 4535*0b57cec5SDimitry Andric unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u); 4536*0b57cec5SDimitry Andric if (MaxValBits >= 32) 4537*0b57cec5SDimitry Andric break; 4538*0b57cec5SDimitry Andric bool LHSNegative = LHSKnown.isNegative(); 4539*0b57cec5SDimitry Andric bool LHSPositive = LHSKnown.isNonNegative(); 4540*0b57cec5SDimitry Andric bool RHSNegative = RHSKnown.isNegative(); 4541*0b57cec5SDimitry Andric bool RHSPositive = RHSKnown.isNonNegative(); 4542*0b57cec5SDimitry Andric if ((!LHSNegative && !LHSPositive) || (!RHSNegative && !RHSPositive)) 4543*0b57cec5SDimitry Andric break; 4544*0b57cec5SDimitry Andric Negative = (LHSNegative && RHSPositive) || (LHSPositive && RHSNegative); 4545*0b57cec5SDimitry Andric if (Negative) 4546*0b57cec5SDimitry Andric Known.One.setHighBits(32 - MaxValBits); 4547*0b57cec5SDimitry Andric else 4548*0b57cec5SDimitry Andric Known.Zero.setHighBits(32 - MaxValBits); 4549*0b57cec5SDimitry Andric } else { 4550*0b57cec5SDimitry Andric unsigned LHSValBits = 24 - LHSKnown.countMinLeadingZeros(); 4551*0b57cec5SDimitry Andric unsigned RHSValBits = 24 - RHSKnown.countMinLeadingZeros(); 4552*0b57cec5SDimitry Andric unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u); 4553*0b57cec5SDimitry Andric if (MaxValBits >= 32) 4554*0b57cec5SDimitry Andric break; 4555*0b57cec5SDimitry Andric Known.Zero.setHighBits(32 - MaxValBits); 4556*0b57cec5SDimitry Andric } 4557*0b57cec5SDimitry Andric break; 4558*0b57cec5SDimitry Andric } 4559*0b57cec5SDimitry Andric case AMDGPUISD::PERM: { 4560*0b57cec5SDimitry Andric ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4561*0b57cec5SDimitry Andric if (!CMask) 4562*0b57cec5SDimitry Andric return; 4563*0b57cec5SDimitry Andric 4564*0b57cec5SDimitry Andric KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 4565*0b57cec5SDimitry Andric KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); 4566*0b57cec5SDimitry Andric unsigned Sel = CMask->getZExtValue(); 4567*0b57cec5SDimitry Andric 4568*0b57cec5SDimitry Andric for (unsigned I = 0; I < 32; I += 8) { 4569*0b57cec5SDimitry Andric unsigned SelBits = Sel & 0xff; 4570*0b57cec5SDimitry Andric if (SelBits < 4) { 4571*0b57cec5SDimitry Andric SelBits *= 8; 4572*0b57cec5SDimitry Andric Known.One |= ((RHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I; 4573*0b57cec5SDimitry Andric Known.Zero |= ((RHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I; 4574*0b57cec5SDimitry Andric } else if (SelBits < 7) { 4575*0b57cec5SDimitry Andric SelBits = (SelBits & 3) * 8; 4576*0b57cec5SDimitry Andric Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I; 4577*0b57cec5SDimitry Andric Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I; 4578*0b57cec5SDimitry Andric } else if (SelBits == 0x0c) { 4579*0b57cec5SDimitry Andric Known.Zero |= 0xff << I; 4580*0b57cec5SDimitry Andric } else if (SelBits > 0x0c) { 4581*0b57cec5SDimitry Andric Known.One |= 0xff << I; 4582*0b57cec5SDimitry Andric } 4583*0b57cec5SDimitry Andric Sel >>= 8; 4584*0b57cec5SDimitry Andric } 4585*0b57cec5SDimitry Andric break; 4586*0b57cec5SDimitry Andric } 4587*0b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_UBYTE: { 4588*0b57cec5SDimitry Andric Known.Zero.setHighBits(24); 4589*0b57cec5SDimitry Andric break; 4590*0b57cec5SDimitry Andric } 4591*0b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_USHORT: { 4592*0b57cec5SDimitry Andric Known.Zero.setHighBits(16); 4593*0b57cec5SDimitry Andric break; 4594*0b57cec5SDimitry Andric } 4595*0b57cec5SDimitry Andric case AMDGPUISD::LDS: { 4596*0b57cec5SDimitry Andric auto GA = cast<GlobalAddressSDNode>(Op.getOperand(0).getNode()); 4597*0b57cec5SDimitry Andric unsigned Align = GA->getGlobal()->getAlignment(); 4598*0b57cec5SDimitry Andric 4599*0b57cec5SDimitry Andric Known.Zero.setHighBits(16); 4600*0b57cec5SDimitry Andric if (Align) 4601*0b57cec5SDimitry Andric Known.Zero.setLowBits(Log2_32(Align)); 4602*0b57cec5SDimitry Andric break; 4603*0b57cec5SDimitry Andric } 4604*0b57cec5SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: { 4605*0b57cec5SDimitry Andric unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 4606*0b57cec5SDimitry Andric switch (IID) { 4607*0b57cec5SDimitry Andric case Intrinsic::amdgcn_mbcnt_lo: 4608*0b57cec5SDimitry Andric case Intrinsic::amdgcn_mbcnt_hi: { 4609*0b57cec5SDimitry Andric const GCNSubtarget &ST = 4610*0b57cec5SDimitry Andric DAG.getMachineFunction().getSubtarget<GCNSubtarget>(); 4611*0b57cec5SDimitry Andric // These return at most the wavefront size - 1. 4612*0b57cec5SDimitry Andric unsigned Size = Op.getValueType().getSizeInBits(); 4613*0b57cec5SDimitry Andric Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2()); 4614*0b57cec5SDimitry Andric break; 4615*0b57cec5SDimitry Andric } 4616*0b57cec5SDimitry Andric default: 4617*0b57cec5SDimitry Andric break; 4618*0b57cec5SDimitry Andric } 4619*0b57cec5SDimitry Andric } 4620*0b57cec5SDimitry Andric } 4621*0b57cec5SDimitry Andric } 4622*0b57cec5SDimitry Andric 4623*0b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode( 4624*0b57cec5SDimitry Andric SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 4625*0b57cec5SDimitry Andric unsigned Depth) const { 4626*0b57cec5SDimitry Andric switch (Op.getOpcode()) { 4627*0b57cec5SDimitry Andric case AMDGPUISD::BFE_I32: { 4628*0b57cec5SDimitry Andric ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4629*0b57cec5SDimitry Andric if (!Width) 4630*0b57cec5SDimitry Andric return 1; 4631*0b57cec5SDimitry Andric 4632*0b57cec5SDimitry Andric unsigned SignBits = 32 - Width->getZExtValue() + 1; 4633*0b57cec5SDimitry Andric if (!isNullConstant(Op.getOperand(1))) 4634*0b57cec5SDimitry Andric return SignBits; 4635*0b57cec5SDimitry Andric 4636*0b57cec5SDimitry Andric // TODO: Could probably figure something out with non-0 offsets. 4637*0b57cec5SDimitry Andric unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); 4638*0b57cec5SDimitry Andric return std::max(SignBits, Op0SignBits); 4639*0b57cec5SDimitry Andric } 4640*0b57cec5SDimitry Andric 4641*0b57cec5SDimitry Andric case AMDGPUISD::BFE_U32: { 4642*0b57cec5SDimitry Andric ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4643*0b57cec5SDimitry Andric return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1; 4644*0b57cec5SDimitry Andric } 4645*0b57cec5SDimitry Andric 4646*0b57cec5SDimitry Andric case AMDGPUISD::CARRY: 4647*0b57cec5SDimitry Andric case AMDGPUISD::BORROW: 4648*0b57cec5SDimitry Andric return 31; 4649*0b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_BYTE: 4650*0b57cec5SDimitry Andric return 25; 4651*0b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_SHORT: 4652*0b57cec5SDimitry Andric return 17; 4653*0b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_UBYTE: 4654*0b57cec5SDimitry Andric return 24; 4655*0b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_USHORT: 4656*0b57cec5SDimitry Andric return 16; 4657*0b57cec5SDimitry Andric case AMDGPUISD::FP_TO_FP16: 4658*0b57cec5SDimitry Andric case AMDGPUISD::FP16_ZEXT: 4659*0b57cec5SDimitry Andric return 16; 4660*0b57cec5SDimitry Andric default: 4661*0b57cec5SDimitry Andric return 1; 4662*0b57cec5SDimitry Andric } 4663*0b57cec5SDimitry Andric } 4664*0b57cec5SDimitry Andric 4665*0b57cec5SDimitry Andric bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, 4666*0b57cec5SDimitry Andric const SelectionDAG &DAG, 4667*0b57cec5SDimitry Andric bool SNaN, 4668*0b57cec5SDimitry Andric unsigned Depth) const { 4669*0b57cec5SDimitry Andric unsigned Opcode = Op.getOpcode(); 4670*0b57cec5SDimitry Andric switch (Opcode) { 4671*0b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY: 4672*0b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY: { 4673*0b57cec5SDimitry Andric if (SNaN) 4674*0b57cec5SDimitry Andric return true; 4675*0b57cec5SDimitry Andric 4676*0b57cec5SDimitry Andric // TODO: Can check no nans on one of the operands for each one, but which 4677*0b57cec5SDimitry Andric // one? 4678*0b57cec5SDimitry Andric return false; 4679*0b57cec5SDimitry Andric } 4680*0b57cec5SDimitry Andric case AMDGPUISD::FMUL_LEGACY: 4681*0b57cec5SDimitry Andric case AMDGPUISD::CVT_PKRTZ_F16_F32: { 4682*0b57cec5SDimitry Andric if (SNaN) 4683*0b57cec5SDimitry Andric return true; 4684*0b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && 4685*0b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); 4686*0b57cec5SDimitry Andric } 4687*0b57cec5SDimitry Andric case AMDGPUISD::FMED3: 4688*0b57cec5SDimitry Andric case AMDGPUISD::FMIN3: 4689*0b57cec5SDimitry Andric case AMDGPUISD::FMAX3: 4690*0b57cec5SDimitry Andric case AMDGPUISD::FMAD_FTZ: { 4691*0b57cec5SDimitry Andric if (SNaN) 4692*0b57cec5SDimitry Andric return true; 4693*0b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && 4694*0b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && 4695*0b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); 4696*0b57cec5SDimitry Andric } 4697*0b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE0: 4698*0b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE1: 4699*0b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE2: 4700*0b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE3: 4701*0b57cec5SDimitry Andric return true; 4702*0b57cec5SDimitry Andric 4703*0b57cec5SDimitry Andric case AMDGPUISD::RCP: 4704*0b57cec5SDimitry Andric case AMDGPUISD::RSQ: 4705*0b57cec5SDimitry Andric case AMDGPUISD::RCP_LEGACY: 4706*0b57cec5SDimitry Andric case AMDGPUISD::RSQ_LEGACY: 4707*0b57cec5SDimitry Andric case AMDGPUISD::RSQ_CLAMP: { 4708*0b57cec5SDimitry Andric if (SNaN) 4709*0b57cec5SDimitry Andric return true; 4710*0b57cec5SDimitry Andric 4711*0b57cec5SDimitry Andric // TODO: Need is known positive check. 4712*0b57cec5SDimitry Andric return false; 4713*0b57cec5SDimitry Andric } 4714*0b57cec5SDimitry Andric case AMDGPUISD::LDEXP: 4715*0b57cec5SDimitry Andric case AMDGPUISD::FRACT: { 4716*0b57cec5SDimitry Andric if (SNaN) 4717*0b57cec5SDimitry Andric return true; 4718*0b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); 4719*0b57cec5SDimitry Andric } 4720*0b57cec5SDimitry Andric case AMDGPUISD::DIV_SCALE: 4721*0b57cec5SDimitry Andric case AMDGPUISD::DIV_FMAS: 4722*0b57cec5SDimitry Andric case AMDGPUISD::DIV_FIXUP: 4723*0b57cec5SDimitry Andric case AMDGPUISD::TRIG_PREOP: 4724*0b57cec5SDimitry Andric // TODO: Refine on operands. 4725*0b57cec5SDimitry Andric return SNaN; 4726*0b57cec5SDimitry Andric case AMDGPUISD::SIN_HW: 4727*0b57cec5SDimitry Andric case AMDGPUISD::COS_HW: { 4728*0b57cec5SDimitry Andric // TODO: Need check for infinity 4729*0b57cec5SDimitry Andric return SNaN; 4730*0b57cec5SDimitry Andric } 4731*0b57cec5SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: { 4732*0b57cec5SDimitry Andric unsigned IntrinsicID 4733*0b57cec5SDimitry Andric = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 4734*0b57cec5SDimitry Andric // TODO: Handle more intrinsics 4735*0b57cec5SDimitry Andric switch (IntrinsicID) { 4736*0b57cec5SDimitry Andric case Intrinsic::amdgcn_cubeid: 4737*0b57cec5SDimitry Andric return true; 4738*0b57cec5SDimitry Andric 4739*0b57cec5SDimitry Andric case Intrinsic::amdgcn_frexp_mant: { 4740*0b57cec5SDimitry Andric if (SNaN) 4741*0b57cec5SDimitry Andric return true; 4742*0b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); 4743*0b57cec5SDimitry Andric } 4744*0b57cec5SDimitry Andric case Intrinsic::amdgcn_cvt_pkrtz: { 4745*0b57cec5SDimitry Andric if (SNaN) 4746*0b57cec5SDimitry Andric return true; 4747*0b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && 4748*0b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); 4749*0b57cec5SDimitry Andric } 4750*0b57cec5SDimitry Andric case Intrinsic::amdgcn_fdot2: 4751*0b57cec5SDimitry Andric // TODO: Refine on operand 4752*0b57cec5SDimitry Andric return SNaN; 4753*0b57cec5SDimitry Andric default: 4754*0b57cec5SDimitry Andric return false; 4755*0b57cec5SDimitry Andric } 4756*0b57cec5SDimitry Andric } 4757*0b57cec5SDimitry Andric default: 4758*0b57cec5SDimitry Andric return false; 4759*0b57cec5SDimitry Andric } 4760*0b57cec5SDimitry Andric } 4761*0b57cec5SDimitry Andric 4762*0b57cec5SDimitry Andric TargetLowering::AtomicExpansionKind 4763*0b57cec5SDimitry Andric AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { 4764*0b57cec5SDimitry Andric switch (RMW->getOperation()) { 4765*0b57cec5SDimitry Andric case AtomicRMWInst::Nand: 4766*0b57cec5SDimitry Andric case AtomicRMWInst::FAdd: 4767*0b57cec5SDimitry Andric case AtomicRMWInst::FSub: 4768*0b57cec5SDimitry Andric return AtomicExpansionKind::CmpXChg; 4769*0b57cec5SDimitry Andric default: 4770*0b57cec5SDimitry Andric return AtomicExpansionKind::None; 4771*0b57cec5SDimitry Andric } 4772*0b57cec5SDimitry Andric } 4773