10b57cec5SDimitry Andric //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines the interfaces that NVPTX uses to lower LLVM code into a
100b57cec5SDimitry Andric // selection DAG.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric
140b57cec5SDimitry Andric #include "NVPTXISelLowering.h"
150b57cec5SDimitry Andric #include "MCTargetDesc/NVPTXBaseInfo.h"
160b57cec5SDimitry Andric #include "NVPTX.h"
170b57cec5SDimitry Andric #include "NVPTXSubtarget.h"
180b57cec5SDimitry Andric #include "NVPTXTargetMachine.h"
190b57cec5SDimitry Andric #include "NVPTXTargetObjectFile.h"
200b57cec5SDimitry Andric #include "NVPTXUtilities.h"
210b57cec5SDimitry Andric #include "llvm/ADT/APInt.h"
22e8d8bef9SDimitry Andric #include "llvm/ADT/STLExtras.h"
230b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
240b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h"
265f757f3fSDimitry Andric #include "llvm/CodeGen/ISDOpcodes.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAG.h"
300b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAGNodes.h"
310b57cec5SDimitry Andric #include "llvm/CodeGen/TargetCallingConv.h"
320b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
330b57cec5SDimitry Andric #include "llvm/CodeGen/ValueTypes.h"
340fca6ea1SDimitry Andric #include "llvm/CodeGenTypes/MachineValueType.h"
350b57cec5SDimitry Andric #include "llvm/IR/Argument.h"
360b57cec5SDimitry Andric #include "llvm/IR/Attributes.h"
370b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
380b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
390b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
405f757f3fSDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
4181ad6265SDimitry Andric #include "llvm/IR/FPEnv.h"
420b57cec5SDimitry Andric #include "llvm/IR/Function.h"
430b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h"
440b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
450b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
46480093f4SDimitry Andric #include "llvm/IR/IntrinsicsNVPTX.h"
470b57cec5SDimitry Andric #include "llvm/IR/Module.h"
480b57cec5SDimitry Andric #include "llvm/IR/Type.h"
490b57cec5SDimitry Andric #include "llvm/IR/Value.h"
500fca6ea1SDimitry Andric #include "llvm/Support/Alignment.h"
510b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
520b57cec5SDimitry Andric #include "llvm/Support/CodeGen.h"
530b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
540b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
550b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
560b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
570b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h"
580b57cec5SDimitry Andric #include <algorithm>
590b57cec5SDimitry Andric #include <cassert>
60bdd1243dSDimitry Andric #include <cmath>
610b57cec5SDimitry Andric #include <cstdint>
620b57cec5SDimitry Andric #include <iterator>
630fca6ea1SDimitry Andric #include <optional>
640b57cec5SDimitry Andric #include <sstream>
650b57cec5SDimitry Andric #include <string>
660b57cec5SDimitry Andric #include <utility>
670b57cec5SDimitry Andric #include <vector>
680b57cec5SDimitry Andric
690b57cec5SDimitry Andric #define DEBUG_TYPE "nvptx-lower"
700b57cec5SDimitry Andric
710b57cec5SDimitry Andric using namespace llvm;
720b57cec5SDimitry Andric
73e8d8bef9SDimitry Andric static std::atomic<unsigned> GlobalUniqueCallSite;
740b57cec5SDimitry Andric
750b57cec5SDimitry Andric static cl::opt<bool> sched4reg(
760b57cec5SDimitry Andric "nvptx-sched4reg",
770b57cec5SDimitry Andric cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
780b57cec5SDimitry Andric
7981ad6265SDimitry Andric static cl::opt<unsigned> FMAContractLevelOpt(
8081ad6265SDimitry Andric "nvptx-fma-level", cl::Hidden,
810b57cec5SDimitry Andric cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
820b57cec5SDimitry Andric " 1: do it 2: do it aggressively"),
830b57cec5SDimitry Andric cl::init(2));
840b57cec5SDimitry Andric
850b57cec5SDimitry Andric static cl::opt<int> UsePrecDivF32(
8681ad6265SDimitry Andric "nvptx-prec-divf32", cl::Hidden,
870b57cec5SDimitry Andric cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
880b57cec5SDimitry Andric " IEEE Compliant F32 div.rnd if available."),
890b57cec5SDimitry Andric cl::init(2));
900b57cec5SDimitry Andric
910b57cec5SDimitry Andric static cl::opt<bool> UsePrecSqrtF32(
920b57cec5SDimitry Andric "nvptx-prec-sqrtf32", cl::Hidden,
930b57cec5SDimitry Andric cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
940b57cec5SDimitry Andric cl::init(true));
950b57cec5SDimitry Andric
9606c3fb27SDimitry Andric static cl::opt<bool> ForceMinByValParamAlign(
9706c3fb27SDimitry Andric "nvptx-force-min-byval-param-align", cl::Hidden,
9806c3fb27SDimitry Andric cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval"
9906c3fb27SDimitry Andric " params of device functions."),
10006c3fb27SDimitry Andric cl::init(false));
10106c3fb27SDimitry Andric
getDivF32Level() const1020b57cec5SDimitry Andric int NVPTXTargetLowering::getDivF32Level() const {
1030b57cec5SDimitry Andric if (UsePrecDivF32.getNumOccurrences() > 0) {
1040b57cec5SDimitry Andric // If nvptx-prec-div32=N is used on the command-line, always honor it
1050b57cec5SDimitry Andric return UsePrecDivF32;
1060b57cec5SDimitry Andric } else {
1070b57cec5SDimitry Andric // Otherwise, use div.approx if fast math is enabled
1080b57cec5SDimitry Andric if (getTargetMachine().Options.UnsafeFPMath)
1090b57cec5SDimitry Andric return 0;
1100b57cec5SDimitry Andric else
1110b57cec5SDimitry Andric return 2;
1120b57cec5SDimitry Andric }
1130b57cec5SDimitry Andric }
1140b57cec5SDimitry Andric
usePrecSqrtF32() const1150b57cec5SDimitry Andric bool NVPTXTargetLowering::usePrecSqrtF32() const {
1160b57cec5SDimitry Andric if (UsePrecSqrtF32.getNumOccurrences() > 0) {
1170b57cec5SDimitry Andric // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
1180b57cec5SDimitry Andric return UsePrecSqrtF32;
1190b57cec5SDimitry Andric } else {
1200b57cec5SDimitry Andric // Otherwise, use sqrt.approx if fast math is enabled
1210b57cec5SDimitry Andric return !getTargetMachine().Options.UnsafeFPMath;
1220b57cec5SDimitry Andric }
1230b57cec5SDimitry Andric }
1240b57cec5SDimitry Andric
useF32FTZ(const MachineFunction & MF) const1250b57cec5SDimitry Andric bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const {
1265ffd83dbSDimitry Andric return MF.getDenormalMode(APFloat::IEEEsingle()).Output ==
1275ffd83dbSDimitry Andric DenormalMode::PreserveSign;
1280b57cec5SDimitry Andric }
1290b57cec5SDimitry Andric
IsPTXVectorType(MVT VT)1300b57cec5SDimitry Andric static bool IsPTXVectorType(MVT VT) {
1310b57cec5SDimitry Andric switch (VT.SimpleTy) {
1320b57cec5SDimitry Andric default:
1330b57cec5SDimitry Andric return false;
1340b57cec5SDimitry Andric case MVT::v2i1:
1350b57cec5SDimitry Andric case MVT::v4i1:
1360b57cec5SDimitry Andric case MVT::v2i8:
1370b57cec5SDimitry Andric case MVT::v4i8:
1380b57cec5SDimitry Andric case MVT::v2i16:
1390b57cec5SDimitry Andric case MVT::v4i16:
1405f757f3fSDimitry Andric case MVT::v8i16: // <4 x i16x2>
1410b57cec5SDimitry Andric case MVT::v2i32:
1420b57cec5SDimitry Andric case MVT::v4i32:
1430b57cec5SDimitry Andric case MVT::v2i64:
1440b57cec5SDimitry Andric case MVT::v2f16:
1450b57cec5SDimitry Andric case MVT::v4f16:
1460b57cec5SDimitry Andric case MVT::v8f16: // <4 x f16x2>
147bdd1243dSDimitry Andric case MVT::v2bf16:
148bdd1243dSDimitry Andric case MVT::v4bf16:
149bdd1243dSDimitry Andric case MVT::v8bf16: // <4 x bf16x2>
1500b57cec5SDimitry Andric case MVT::v2f32:
1510b57cec5SDimitry Andric case MVT::v4f32:
1520b57cec5SDimitry Andric case MVT::v2f64:
1530b57cec5SDimitry Andric return true;
1540b57cec5SDimitry Andric }
1550b57cec5SDimitry Andric }
1560b57cec5SDimitry Andric
Is16bitsType(MVT VT)1575f757f3fSDimitry Andric static bool Is16bitsType(MVT VT) {
1585f757f3fSDimitry Andric return (VT.SimpleTy == MVT::f16 || VT.SimpleTy == MVT::bf16 ||
1595f757f3fSDimitry Andric VT.SimpleTy == MVT::i16);
16006c3fb27SDimitry Andric }
16106c3fb27SDimitry Andric
1620b57cec5SDimitry Andric /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
1630b57cec5SDimitry Andric /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors
1640b57cec5SDimitry Andric /// into their primitive components.
1650b57cec5SDimitry Andric /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
1660b57cec5SDimitry Andric /// same number of types as the Ins/Outs arrays in LowerFormalArguments,
1670b57cec5SDimitry Andric /// LowerCall, and LowerReturn.
ComputePTXValueVTs(const TargetLowering & TLI,const DataLayout & DL,Type * Ty,SmallVectorImpl<EVT> & ValueVTs,SmallVectorImpl<uint64_t> * Offsets=nullptr,uint64_t StartingOffset=0)1680b57cec5SDimitry Andric static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
1690b57cec5SDimitry Andric Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
1700b57cec5SDimitry Andric SmallVectorImpl<uint64_t> *Offsets = nullptr,
1710b57cec5SDimitry Andric uint64_t StartingOffset = 0) {
1720b57cec5SDimitry Andric SmallVector<EVT, 16> TempVTs;
1730b57cec5SDimitry Andric SmallVector<uint64_t, 16> TempOffsets;
1740b57cec5SDimitry Andric
1750b57cec5SDimitry Andric // Special case for i128 - decompose to (i64, i64)
1760b57cec5SDimitry Andric if (Ty->isIntegerTy(128)) {
1770b57cec5SDimitry Andric ValueVTs.push_back(EVT(MVT::i64));
1780b57cec5SDimitry Andric ValueVTs.push_back(EVT(MVT::i64));
1790b57cec5SDimitry Andric
1800b57cec5SDimitry Andric if (Offsets) {
1810b57cec5SDimitry Andric Offsets->push_back(StartingOffset + 0);
1820b57cec5SDimitry Andric Offsets->push_back(StartingOffset + 8);
1830b57cec5SDimitry Andric }
1840b57cec5SDimitry Andric
1850b57cec5SDimitry Andric return;
1860b57cec5SDimitry Andric }
1870b57cec5SDimitry Andric
1880b57cec5SDimitry Andric // Given a struct type, recursively traverse the elements with custom ComputePTXValueVTs.
1890b57cec5SDimitry Andric if (StructType *STy = dyn_cast<StructType>(Ty)) {
1900b57cec5SDimitry Andric auto const *SL = DL.getStructLayout(STy);
1910b57cec5SDimitry Andric auto ElementNum = 0;
1920b57cec5SDimitry Andric for(auto *EI : STy->elements()) {
1930b57cec5SDimitry Andric ComputePTXValueVTs(TLI, DL, EI, ValueVTs, Offsets,
1940b57cec5SDimitry Andric StartingOffset + SL->getElementOffset(ElementNum));
1950b57cec5SDimitry Andric ++ElementNum;
1960b57cec5SDimitry Andric }
1970b57cec5SDimitry Andric return;
1980b57cec5SDimitry Andric }
1990b57cec5SDimitry Andric
2000b57cec5SDimitry Andric ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
2010b57cec5SDimitry Andric for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
2020b57cec5SDimitry Andric EVT VT = TempVTs[i];
2030b57cec5SDimitry Andric uint64_t Off = TempOffsets[i];
2040b57cec5SDimitry Andric // Split vectors into individual elements, except for v2f16, which
2050b57cec5SDimitry Andric // we will pass as a single scalar.
2060b57cec5SDimitry Andric if (VT.isVector()) {
2070b57cec5SDimitry Andric unsigned NumElts = VT.getVectorNumElements();
2080b57cec5SDimitry Andric EVT EltVT = VT.getVectorElementType();
2090b57cec5SDimitry Andric // Vectors with an even number of f16 elements will be passed to
210bdd1243dSDimitry Andric // us as an array of v2f16/v2bf16 elements. We must match this so we
2110b57cec5SDimitry Andric // stay in sync with Ins/Outs.
2125f757f3fSDimitry Andric if ((Is16bitsType(EltVT.getSimpleVT())) && NumElts % 2 == 0) {
2135f757f3fSDimitry Andric switch (EltVT.getSimpleVT().SimpleTy) {
2145f757f3fSDimitry Andric case MVT::f16:
2155f757f3fSDimitry Andric EltVT = MVT::v2f16;
2165f757f3fSDimitry Andric break;
2175f757f3fSDimitry Andric case MVT::bf16:
2185f757f3fSDimitry Andric EltVT = MVT::v2bf16;
2195f757f3fSDimitry Andric break;
2205f757f3fSDimitry Andric case MVT::i16:
2215f757f3fSDimitry Andric EltVT = MVT::v2i16;
2225f757f3fSDimitry Andric break;
2235f757f3fSDimitry Andric default:
2245f757f3fSDimitry Andric llvm_unreachable("Unexpected type");
2255f757f3fSDimitry Andric }
2260b57cec5SDimitry Andric NumElts /= 2;
2275f757f3fSDimitry Andric } else if (EltVT.getSimpleVT() == MVT::i8 &&
2285f757f3fSDimitry Andric (NumElts % 4 == 0 || NumElts == 3)) {
2295f757f3fSDimitry Andric // v*i8 are formally lowered as v4i8
2305f757f3fSDimitry Andric EltVT = MVT::v4i8;
2315f757f3fSDimitry Andric NumElts = (NumElts + 3) / 4;
232*71ac745dSDimitry Andric } else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) {
233*71ac745dSDimitry Andric // v2i8 is promoted to v2i16
234*71ac745dSDimitry Andric NumElts = 1;
235*71ac745dSDimitry Andric EltVT = MVT::v2i16;
2360b57cec5SDimitry Andric }
2370b57cec5SDimitry Andric for (unsigned j = 0; j != NumElts; ++j) {
2380b57cec5SDimitry Andric ValueVTs.push_back(EltVT);
2390b57cec5SDimitry Andric if (Offsets)
2400b57cec5SDimitry Andric Offsets->push_back(Off + j * EltVT.getStoreSize());
2410b57cec5SDimitry Andric }
2420b57cec5SDimitry Andric } else {
2430b57cec5SDimitry Andric ValueVTs.push_back(VT);
2440b57cec5SDimitry Andric if (Offsets)
2450b57cec5SDimitry Andric Offsets->push_back(Off);
2460b57cec5SDimitry Andric }
2470b57cec5SDimitry Andric }
2480b57cec5SDimitry Andric }
2490b57cec5SDimitry Andric
250fcaf7f86SDimitry Andric /// PromoteScalarIntegerPTX
251fcaf7f86SDimitry Andric /// Used to make sure the arguments/returns are suitable for passing
252fcaf7f86SDimitry Andric /// and promote them to a larger size if they're not.
253fcaf7f86SDimitry Andric ///
254fcaf7f86SDimitry Andric /// The promoted type is placed in \p PromoteVT if the function returns true.
PromoteScalarIntegerPTX(const EVT & VT,MVT * PromotedVT)255fcaf7f86SDimitry Andric static bool PromoteScalarIntegerPTX(const EVT &VT, MVT *PromotedVT) {
256fcaf7f86SDimitry Andric if (VT.isScalarInteger()) {
257fcaf7f86SDimitry Andric switch (PowerOf2Ceil(VT.getFixedSizeInBits())) {
258fcaf7f86SDimitry Andric default:
259fcaf7f86SDimitry Andric llvm_unreachable(
260fcaf7f86SDimitry Andric "Promotion is not suitable for scalars of size larger than 64-bits");
261fcaf7f86SDimitry Andric case 1:
262fcaf7f86SDimitry Andric *PromotedVT = MVT::i1;
263fcaf7f86SDimitry Andric break;
264fcaf7f86SDimitry Andric case 2:
265fcaf7f86SDimitry Andric case 4:
266fcaf7f86SDimitry Andric case 8:
267fcaf7f86SDimitry Andric *PromotedVT = MVT::i8;
268fcaf7f86SDimitry Andric break;
269fcaf7f86SDimitry Andric case 16:
270fcaf7f86SDimitry Andric *PromotedVT = MVT::i16;
271fcaf7f86SDimitry Andric break;
272fcaf7f86SDimitry Andric case 32:
273fcaf7f86SDimitry Andric *PromotedVT = MVT::i32;
274fcaf7f86SDimitry Andric break;
275fcaf7f86SDimitry Andric case 64:
276fcaf7f86SDimitry Andric *PromotedVT = MVT::i64;
277fcaf7f86SDimitry Andric break;
278fcaf7f86SDimitry Andric }
279fcaf7f86SDimitry Andric return EVT(*PromotedVT) != VT;
280fcaf7f86SDimitry Andric }
281fcaf7f86SDimitry Andric return false;
282fcaf7f86SDimitry Andric }
283fcaf7f86SDimitry Andric
2840b57cec5SDimitry Andric // Check whether we can merge loads/stores of some of the pieces of a
2850b57cec5SDimitry Andric // flattened function parameter or return value into a single vector
2860b57cec5SDimitry Andric // load/store.
2870b57cec5SDimitry Andric //
2880b57cec5SDimitry Andric // The flattened parameter is represented as a list of EVTs and
2890b57cec5SDimitry Andric // offsets, and the whole structure is aligned to ParamAlignment. This
2900b57cec5SDimitry Andric // function determines whether we can load/store pieces of the
2910b57cec5SDimitry Andric // parameter starting at index Idx using a single vectorized op of
2920b57cec5SDimitry Andric // size AccessSize. If so, it returns the number of param pieces
2930b57cec5SDimitry Andric // covered by the vector op. Otherwise, it returns 1.
CanMergeParamLoadStoresStartingAt(unsigned Idx,uint32_t AccessSize,const SmallVectorImpl<EVT> & ValueVTs,const SmallVectorImpl<uint64_t> & Offsets,Align ParamAlignment)2940b57cec5SDimitry Andric static unsigned CanMergeParamLoadStoresStartingAt(
2950b57cec5SDimitry Andric unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
2965ffd83dbSDimitry Andric const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) {
2970b57cec5SDimitry Andric
2980b57cec5SDimitry Andric // Can't vectorize if param alignment is not sufficient.
2995ffd83dbSDimitry Andric if (ParamAlignment < AccessSize)
3000b57cec5SDimitry Andric return 1;
3010b57cec5SDimitry Andric // Can't vectorize if offset is not aligned.
3020b57cec5SDimitry Andric if (Offsets[Idx] & (AccessSize - 1))
3030b57cec5SDimitry Andric return 1;
3040b57cec5SDimitry Andric
3050b57cec5SDimitry Andric EVT EltVT = ValueVTs[Idx];
3060b57cec5SDimitry Andric unsigned EltSize = EltVT.getStoreSize();
3070b57cec5SDimitry Andric
3080b57cec5SDimitry Andric // Element is too large to vectorize.
3090b57cec5SDimitry Andric if (EltSize >= AccessSize)
3100b57cec5SDimitry Andric return 1;
3110b57cec5SDimitry Andric
3120b57cec5SDimitry Andric unsigned NumElts = AccessSize / EltSize;
3130b57cec5SDimitry Andric // Can't vectorize if AccessBytes if not a multiple of EltSize.
3140b57cec5SDimitry Andric if (AccessSize != EltSize * NumElts)
3150b57cec5SDimitry Andric return 1;
3160b57cec5SDimitry Andric
3170b57cec5SDimitry Andric // We don't have enough elements to vectorize.
3180b57cec5SDimitry Andric if (Idx + NumElts > ValueVTs.size())
3190b57cec5SDimitry Andric return 1;
3200b57cec5SDimitry Andric
3210b57cec5SDimitry Andric // PTX ISA can only deal with 2- and 4-element vector ops.
3220b57cec5SDimitry Andric if (NumElts != 4 && NumElts != 2)
3230b57cec5SDimitry Andric return 1;
3240b57cec5SDimitry Andric
3250b57cec5SDimitry Andric for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) {
3260b57cec5SDimitry Andric // Types do not match.
3270b57cec5SDimitry Andric if (ValueVTs[j] != EltVT)
3280b57cec5SDimitry Andric return 1;
3290b57cec5SDimitry Andric
3300b57cec5SDimitry Andric // Elements are not contiguous.
3310b57cec5SDimitry Andric if (Offsets[j] - Offsets[j - 1] != EltSize)
3320b57cec5SDimitry Andric return 1;
3330b57cec5SDimitry Andric }
3340b57cec5SDimitry Andric // OK. We can vectorize ValueVTs[i..i+NumElts)
3350b57cec5SDimitry Andric return NumElts;
3360b57cec5SDimitry Andric }
3370b57cec5SDimitry Andric
3380b57cec5SDimitry Andric // Flags for tracking per-element vectorization state of loads/stores
3390b57cec5SDimitry Andric // of a flattened function parameter or return value.
3400b57cec5SDimitry Andric enum ParamVectorizationFlags {
3410b57cec5SDimitry Andric PVF_INNER = 0x0, // Middle elements of a vector.
3420b57cec5SDimitry Andric PVF_FIRST = 0x1, // First element of the vector.
3430b57cec5SDimitry Andric PVF_LAST = 0x2, // Last element of the vector.
3440b57cec5SDimitry Andric // Scalar is effectively a 1-element vector.
3450b57cec5SDimitry Andric PVF_SCALAR = PVF_FIRST | PVF_LAST
3460b57cec5SDimitry Andric };
3470b57cec5SDimitry Andric
3480b57cec5SDimitry Andric // Computes whether and how we can vectorize the loads/stores of a
3490b57cec5SDimitry Andric // flattened function parameter or return value.
3500b57cec5SDimitry Andric //
3510b57cec5SDimitry Andric // The flattened parameter is represented as the list of ValueVTs and
3520b57cec5SDimitry Andric // Offsets, and is aligned to ParamAlignment bytes. We return a vector
3530b57cec5SDimitry Andric // of the same size as ValueVTs indicating how each piece should be
3540b57cec5SDimitry Andric // loaded/stored (i.e. as a scalar, or as part of a vector
3550b57cec5SDimitry Andric // load/store).
3560b57cec5SDimitry Andric static SmallVector<ParamVectorizationFlags, 16>
VectorizePTXValueVTs(const SmallVectorImpl<EVT> & ValueVTs,const SmallVectorImpl<uint64_t> & Offsets,Align ParamAlignment,bool IsVAArg=false)3570b57cec5SDimitry Andric VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
3580b57cec5SDimitry Andric const SmallVectorImpl<uint64_t> &Offsets,
359bdd1243dSDimitry Andric Align ParamAlignment, bool IsVAArg = false) {
3600b57cec5SDimitry Andric // Set vector size to match ValueVTs and mark all elements as
3610b57cec5SDimitry Andric // scalars by default.
3620b57cec5SDimitry Andric SmallVector<ParamVectorizationFlags, 16> VectorInfo;
3630b57cec5SDimitry Andric VectorInfo.assign(ValueVTs.size(), PVF_SCALAR);
3640b57cec5SDimitry Andric
365bdd1243dSDimitry Andric if (IsVAArg)
366bdd1243dSDimitry Andric return VectorInfo;
367bdd1243dSDimitry Andric
3680b57cec5SDimitry Andric // Check what we can vectorize using 128/64/32-bit accesses.
3690b57cec5SDimitry Andric for (int I = 0, E = ValueVTs.size(); I != E; ++I) {
3700b57cec5SDimitry Andric // Skip elements we've already processed.
3710b57cec5SDimitry Andric assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state.");
3720b57cec5SDimitry Andric for (unsigned AccessSize : {16, 8, 4, 2}) {
3730b57cec5SDimitry Andric unsigned NumElts = CanMergeParamLoadStoresStartingAt(
3740b57cec5SDimitry Andric I, AccessSize, ValueVTs, Offsets, ParamAlignment);
3750b57cec5SDimitry Andric // Mark vectorized elements.
3760b57cec5SDimitry Andric switch (NumElts) {
3770b57cec5SDimitry Andric default:
3780b57cec5SDimitry Andric llvm_unreachable("Unexpected return value");
3790b57cec5SDimitry Andric case 1:
3800b57cec5SDimitry Andric // Can't vectorize using this size, try next smaller size.
3810b57cec5SDimitry Andric continue;
3820b57cec5SDimitry Andric case 2:
3830b57cec5SDimitry Andric assert(I + 1 < E && "Not enough elements.");
3840b57cec5SDimitry Andric VectorInfo[I] = PVF_FIRST;
3850b57cec5SDimitry Andric VectorInfo[I + 1] = PVF_LAST;
3860b57cec5SDimitry Andric I += 1;
3870b57cec5SDimitry Andric break;
3880b57cec5SDimitry Andric case 4:
3890b57cec5SDimitry Andric assert(I + 3 < E && "Not enough elements.");
3900b57cec5SDimitry Andric VectorInfo[I] = PVF_FIRST;
3910b57cec5SDimitry Andric VectorInfo[I + 1] = PVF_INNER;
3920b57cec5SDimitry Andric VectorInfo[I + 2] = PVF_INNER;
3930b57cec5SDimitry Andric VectorInfo[I + 3] = PVF_LAST;
3940b57cec5SDimitry Andric I += 3;
3950b57cec5SDimitry Andric break;
3960b57cec5SDimitry Andric }
3970b57cec5SDimitry Andric // Break out of the inner loop because we've already succeeded
3980b57cec5SDimitry Andric // using largest possible AccessSize.
3990b57cec5SDimitry Andric break;
4000b57cec5SDimitry Andric }
4010b57cec5SDimitry Andric }
4020b57cec5SDimitry Andric return VectorInfo;
4030b57cec5SDimitry Andric }
4040b57cec5SDimitry Andric
4050b57cec5SDimitry Andric // NVPTXTargetLowering Constructor.
NVPTXTargetLowering(const NVPTXTargetMachine & TM,const NVPTXSubtarget & STI)4060b57cec5SDimitry Andric NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
4070b57cec5SDimitry Andric const NVPTXSubtarget &STI)
4080b57cec5SDimitry Andric : TargetLowering(TM), nvTM(&TM), STI(STI) {
4090b57cec5SDimitry Andric // always lower memset, memcpy, and memmove intrinsics to load/store
4100b57cec5SDimitry Andric // instructions, rather
4110b57cec5SDimitry Andric // then generating calls to memset, mempcy or memmove.
4125f757f3fSDimitry Andric MaxStoresPerMemset = MaxStoresPerMemsetOptSize = (unsigned)0xFFFFFFFF;
4135f757f3fSDimitry Andric MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = (unsigned) 0xFFFFFFFF;
4145f757f3fSDimitry Andric MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = (unsigned) 0xFFFFFFFF;
4150b57cec5SDimitry Andric
4160b57cec5SDimitry Andric setBooleanContents(ZeroOrNegativeOneBooleanContent);
4170b57cec5SDimitry Andric setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
4180b57cec5SDimitry Andric
4190b57cec5SDimitry Andric // Jump is Expensive. Don't create extra control flow for 'and', 'or'
4200b57cec5SDimitry Andric // condition branches.
4210b57cec5SDimitry Andric setJumpIsExpensive(true);
4220b57cec5SDimitry Andric
4230b57cec5SDimitry Andric // Wide divides are _very_ slow. Try to reduce the width of the divide if
4240b57cec5SDimitry Andric // possible.
4250b57cec5SDimitry Andric addBypassSlowDiv(64, 32);
4260b57cec5SDimitry Andric
4270b57cec5SDimitry Andric // By default, use the Source scheduling
4280b57cec5SDimitry Andric if (sched4reg)
4290b57cec5SDimitry Andric setSchedulingPreference(Sched::RegPressure);
4300b57cec5SDimitry Andric else
4310b57cec5SDimitry Andric setSchedulingPreference(Sched::Source);
4320b57cec5SDimitry Andric
4330b57cec5SDimitry Andric auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
4340b57cec5SDimitry Andric LegalizeAction NoF16Action) {
4350b57cec5SDimitry Andric setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action);
4360b57cec5SDimitry Andric };
4370b57cec5SDimitry Andric
43806c3fb27SDimitry Andric auto setBF16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
43906c3fb27SDimitry Andric LegalizeAction NoBF16Action) {
44006c3fb27SDimitry Andric bool IsOpSupported = STI.hasBF16Math();
44106c3fb27SDimitry Andric // Few instructions are available on sm_90 only
44206c3fb27SDimitry Andric switch(Op) {
44306c3fb27SDimitry Andric case ISD::FADD:
44406c3fb27SDimitry Andric case ISD::FMUL:
44506c3fb27SDimitry Andric case ISD::FSUB:
4465f757f3fSDimitry Andric case ISD::SELECT:
4475f757f3fSDimitry Andric case ISD::SELECT_CC:
4485f757f3fSDimitry Andric case ISD::SETCC:
4495f757f3fSDimitry Andric case ISD::FEXP2:
4505f757f3fSDimitry Andric case ISD::FCEIL:
4515f757f3fSDimitry Andric case ISD::FFLOOR:
4525f757f3fSDimitry Andric case ISD::FNEARBYINT:
4535f757f3fSDimitry Andric case ISD::FRINT:
4540fca6ea1SDimitry Andric case ISD::FROUNDEVEN:
4555f757f3fSDimitry Andric case ISD::FTRUNC:
45606c3fb27SDimitry Andric IsOpSupported = STI.getSmVersion() >= 90 && STI.getPTXVersion() >= 78;
45706c3fb27SDimitry Andric break;
45806c3fb27SDimitry Andric }
45906c3fb27SDimitry Andric setOperationAction(
46006c3fb27SDimitry Andric Op, VT, IsOpSupported ? Action : NoBF16Action);
46106c3fb27SDimitry Andric };
46206c3fb27SDimitry Andric
4635f757f3fSDimitry Andric auto setI16x2OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
4645f757f3fSDimitry Andric LegalizeAction NoI16x2Action) {
4655f757f3fSDimitry Andric bool IsOpSupported = false;
4665f757f3fSDimitry Andric // instructions are available on sm_90 only
4675f757f3fSDimitry Andric switch (Op) {
4685f757f3fSDimitry Andric case ISD::ADD:
4695f757f3fSDimitry Andric case ISD::SMAX:
4705f757f3fSDimitry Andric case ISD::SMIN:
4715f757f3fSDimitry Andric case ISD::UMIN:
4725f757f3fSDimitry Andric case ISD::UMAX:
4735f757f3fSDimitry Andric IsOpSupported = STI.getSmVersion() >= 90 && STI.getPTXVersion() >= 80;
4745f757f3fSDimitry Andric break;
4755f757f3fSDimitry Andric }
4765f757f3fSDimitry Andric setOperationAction(Op, VT, IsOpSupported ? Action : NoI16x2Action);
4775f757f3fSDimitry Andric };
4785f757f3fSDimitry Andric
4790b57cec5SDimitry Andric addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
4800b57cec5SDimitry Andric addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
4815f757f3fSDimitry Andric addRegisterClass(MVT::v2i16, &NVPTX::Int32RegsRegClass);
4825f757f3fSDimitry Andric addRegisterClass(MVT::v4i8, &NVPTX::Int32RegsRegClass);
4830b57cec5SDimitry Andric addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
4840b57cec5SDimitry Andric addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
4850b57cec5SDimitry Andric addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
4860b57cec5SDimitry Andric addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
48706c3fb27SDimitry Andric addRegisterClass(MVT::f16, &NVPTX::Int16RegsRegClass);
48806c3fb27SDimitry Andric addRegisterClass(MVT::v2f16, &NVPTX::Int32RegsRegClass);
48906c3fb27SDimitry Andric addRegisterClass(MVT::bf16, &NVPTX::Int16RegsRegClass);
49006c3fb27SDimitry Andric addRegisterClass(MVT::v2bf16, &NVPTX::Int32RegsRegClass);
4910b57cec5SDimitry Andric
4920b57cec5SDimitry Andric // Conversion to/from FP16/FP16x2 is always legal.
4930b57cec5SDimitry Andric setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
4940b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
4950b57cec5SDimitry Andric setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Expand);
4960b57cec5SDimitry Andric setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand);
4970b57cec5SDimitry Andric
4980fca6ea1SDimitry Andric setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
4990fca6ea1SDimitry Andric if (STI.getSmVersion() >= 30 && STI.getPTXVersion() > 31)
5000fca6ea1SDimitry Andric setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Legal);
5010fca6ea1SDimitry Andric
5020b57cec5SDimitry Andric setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
5030b57cec5SDimitry Andric setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
5040b57cec5SDimitry Andric
50506c3fb27SDimitry Andric // Conversion to/from BFP16/BFP16x2 is always legal.
50606c3fb27SDimitry Andric setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Custom);
50706c3fb27SDimitry Andric setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2bf16, Custom);
50806c3fb27SDimitry Andric setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2bf16, Expand);
50906c3fb27SDimitry Andric setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2bf16, Expand);
51006c3fb27SDimitry Andric
51106c3fb27SDimitry Andric setBF16OperationAction(ISD::SETCC, MVT::v2bf16, Legal, Expand);
5125f757f3fSDimitry Andric setBF16OperationAction(ISD::SETCC, MVT::bf16, Legal, Promote);
5135f757f3fSDimitry Andric if (getOperationAction(ISD::SETCC, MVT::bf16) == Promote)
5145f757f3fSDimitry Andric AddPromotedToType(ISD::SETCC, MVT::bf16, MVT::f32);
5155f757f3fSDimitry Andric
5165f757f3fSDimitry Andric // Conversion to/from i16/i16x2 is always legal.
5175f757f3fSDimitry Andric setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16, Custom);
5185f757f3fSDimitry Andric setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
5195f757f3fSDimitry Andric setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i16, Expand);
5205f757f3fSDimitry Andric setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i16, Expand);
5215f757f3fSDimitry Andric
5225f757f3fSDimitry Andric setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8, Custom);
5235f757f3fSDimitry Andric setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i8, Custom);
5245f757f3fSDimitry Andric setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom);
5255f757f3fSDimitry Andric setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);
5265f757f3fSDimitry Andric // Only logical ops can be done on v4i8 directly, others must be done
5275f757f3fSDimitry Andric // elementwise.
5285f757f3fSDimitry Andric setOperationAction(
5295f757f3fSDimitry Andric {ISD::ABS, ISD::ADD, ISD::ADDC, ISD::ADDE,
5305f757f3fSDimitry Andric ISD::BITREVERSE, ISD::CTLZ, ISD::CTPOP, ISD::CTTZ,
5315f757f3fSDimitry Andric ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FSHL, ISD::FSHR,
5325f757f3fSDimitry Andric ISD::MUL, ISD::MULHS, ISD::MULHU, ISD::PARITY,
5335f757f3fSDimitry Andric ISD::ROTL, ISD::ROTR, ISD::SADDO, ISD::SADDO_CARRY,
5345f757f3fSDimitry Andric ISD::SADDSAT, ISD::SDIV, ISD::SDIVREM, ISD::SELECT_CC,
5355f757f3fSDimitry Andric ISD::SETCC, ISD::SHL, ISD::SINT_TO_FP, ISD::SMAX,
5365f757f3fSDimitry Andric ISD::SMIN, ISD::SMULO, ISD::SMUL_LOHI, ISD::SRA,
5375f757f3fSDimitry Andric ISD::SREM, ISD::SRL, ISD::SSHLSAT, ISD::SSUBO,
5385f757f3fSDimitry Andric ISD::SSUBO_CARRY, ISD::SSUBSAT, ISD::SUB, ISD::SUBC,
5395f757f3fSDimitry Andric ISD::SUBE, ISD::UADDO, ISD::UADDO_CARRY, ISD::UADDSAT,
5405f757f3fSDimitry Andric ISD::UDIV, ISD::UDIVREM, ISD::UINT_TO_FP, ISD::UMAX,
5415f757f3fSDimitry Andric ISD::UMIN, ISD::UMULO, ISD::UMUL_LOHI, ISD::UREM,
5425f757f3fSDimitry Andric ISD::USHLSAT, ISD::USUBO, ISD::USUBO_CARRY, ISD::VSELECT,
5435f757f3fSDimitry Andric ISD::USUBSAT},
5445f757f3fSDimitry Andric MVT::v4i8, Expand);
5455f757f3fSDimitry Andric
5460b57cec5SDimitry Andric // Operations not directly supported by NVPTX.
54706c3fb27SDimitry Andric for (MVT VT : {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32,
5485f757f3fSDimitry Andric MVT::f64, MVT::i1, MVT::i8, MVT::i16, MVT::v2i16, MVT::v4i8,
5495f757f3fSDimitry Andric MVT::i32, MVT::i64}) {
5500b57cec5SDimitry Andric setOperationAction(ISD::SELECT_CC, VT, Expand);
5510b57cec5SDimitry Andric setOperationAction(ISD::BR_CC, VT, Expand);
5520b57cec5SDimitry Andric }
5530b57cec5SDimitry Andric
5540b57cec5SDimitry Andric // Some SIGN_EXTEND_INREG can be done using cvt instruction.
5550b57cec5SDimitry Andric // For others we will expand to a SHL/SRA pair.
5560b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal);
5570b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
5580b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
5590b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
5600b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
5615f757f3fSDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
5620b57cec5SDimitry Andric
5630b57cec5SDimitry Andric setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom);
5640b57cec5SDimitry Andric setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom);
5650b57cec5SDimitry Andric setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom);
5660b57cec5SDimitry Andric setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom);
5670b57cec5SDimitry Andric setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom);
5680b57cec5SDimitry Andric setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom);
5690b57cec5SDimitry Andric
5700b57cec5SDimitry Andric setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
5710b57cec5SDimitry Andric setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
5720b57cec5SDimitry Andric
5730b57cec5SDimitry Andric // TODO: we may consider expanding ROTL/ROTR on older GPUs. Currently on GPUs
5740b57cec5SDimitry Andric // that don't have h/w rotation we lower them to multi-instruction assembly.
5750b57cec5SDimitry Andric // See ROT*_sw in NVPTXIntrInfo.td
5760b57cec5SDimitry Andric setOperationAction(ISD::ROTL, MVT::i64, Legal);
5770b57cec5SDimitry Andric setOperationAction(ISD::ROTR, MVT::i64, Legal);
5780b57cec5SDimitry Andric setOperationAction(ISD::ROTL, MVT::i32, Legal);
5790b57cec5SDimitry Andric setOperationAction(ISD::ROTR, MVT::i32, Legal);
5800b57cec5SDimitry Andric
5810b57cec5SDimitry Andric setOperationAction(ISD::ROTL, MVT::i16, Expand);
5825f757f3fSDimitry Andric setOperationAction(ISD::ROTL, MVT::v2i16, Expand);
5830b57cec5SDimitry Andric setOperationAction(ISD::ROTR, MVT::i16, Expand);
5845f757f3fSDimitry Andric setOperationAction(ISD::ROTR, MVT::v2i16, Expand);
5850b57cec5SDimitry Andric setOperationAction(ISD::ROTL, MVT::i8, Expand);
5860b57cec5SDimitry Andric setOperationAction(ISD::ROTR, MVT::i8, Expand);
5870b57cec5SDimitry Andric setOperationAction(ISD::BSWAP, MVT::i16, Expand);
5880b57cec5SDimitry Andric
5890b57cec5SDimitry Andric // Indirect branch is not supported.
5900b57cec5SDimitry Andric // This also disables Jump Table creation.
5910b57cec5SDimitry Andric setOperationAction(ISD::BR_JT, MVT::Other, Expand);
5920b57cec5SDimitry Andric setOperationAction(ISD::BRIND, MVT::Other, Expand);
5930b57cec5SDimitry Andric
5940b57cec5SDimitry Andric setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
5950b57cec5SDimitry Andric setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
5960b57cec5SDimitry Andric
5970b57cec5SDimitry Andric // We want to legalize constant related memmove and memcopy
5980b57cec5SDimitry Andric // intrinsics.
5990b57cec5SDimitry Andric setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
6000b57cec5SDimitry Andric
6010b57cec5SDimitry Andric // Turn FP extload into load/fpextend
6020b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
6030b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
60406c3fb27SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
60506c3fb27SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
6060b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
6070b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
6080b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
60906c3fb27SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand);
61006c3fb27SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand);
6110b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
6120b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
6130b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
61406c3fb27SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand);
61506c3fb27SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand);
6160b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
6175f757f3fSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
6185f757f3fSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
6195f757f3fSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand);
6205f757f3fSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand);
6210b57cec5SDimitry Andric // Turn FP truncstore into trunc + store.
6220b57cec5SDimitry Andric // FIXME: vector types should also be expanded
6230b57cec5SDimitry Andric setTruncStoreAction(MVT::f32, MVT::f16, Expand);
6240b57cec5SDimitry Andric setTruncStoreAction(MVT::f64, MVT::f16, Expand);
62506c3fb27SDimitry Andric setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
62606c3fb27SDimitry Andric setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
6270b57cec5SDimitry Andric setTruncStoreAction(MVT::f64, MVT::f32, Expand);
6280b57cec5SDimitry Andric
6290b57cec5SDimitry Andric // PTX does not support load / store predicate registers
6300b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::i1, Custom);
6310b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::i1, Custom);
6320b57cec5SDimitry Andric
6330b57cec5SDimitry Andric for (MVT VT : MVT::integer_valuetypes()) {
6340b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
6350b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
6360fca6ea1SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
6370b57cec5SDimitry Andric setTruncStoreAction(VT, MVT::i1, Expand);
6380b57cec5SDimitry Andric }
6390b57cec5SDimitry Andric
6405f757f3fSDimitry Andric // expand extload of vector of integers.
6415f757f3fSDimitry Andric setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
6425f757f3fSDimitry Andric MVT::v2i8, Expand);
6435f757f3fSDimitry Andric setTruncStoreAction(MVT::v2i16, MVT::v2i8, Expand);
6445f757f3fSDimitry Andric
6450b57cec5SDimitry Andric // This is legal in NVPTX
6460b57cec5SDimitry Andric setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
6470b57cec5SDimitry Andric setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
6480b57cec5SDimitry Andric setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
649bdd1243dSDimitry Andric setOperationAction(ISD::ConstantFP, MVT::bf16, Legal);
6500b57cec5SDimitry Andric
6515f757f3fSDimitry Andric setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
6525f757f3fSDimitry Andric setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
6535f757f3fSDimitry Andric
6540b57cec5SDimitry Andric // TRAP can be lowered to PTX trap
6550b57cec5SDimitry Andric setOperationAction(ISD::TRAP, MVT::Other, Legal);
6560b57cec5SDimitry Andric
6570b57cec5SDimitry Andric // Register custom handling for vector loads/stores
6588bcb0991SDimitry Andric for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
6590b57cec5SDimitry Andric if (IsPTXVectorType(VT)) {
6600b57cec5SDimitry Andric setOperationAction(ISD::LOAD, VT, Custom);
6610b57cec5SDimitry Andric setOperationAction(ISD::STORE, VT, Custom);
6620b57cec5SDimitry Andric setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
6630b57cec5SDimitry Andric }
6640b57cec5SDimitry Andric }
6650b57cec5SDimitry Andric
666bdd1243dSDimitry Andric // Support varargs.
667bdd1243dSDimitry Andric setOperationAction(ISD::VASTART, MVT::Other, Custom);
668bdd1243dSDimitry Andric setOperationAction(ISD::VAARG, MVT::Other, Custom);
669bdd1243dSDimitry Andric setOperationAction(ISD::VACOPY, MVT::Other, Expand);
670bdd1243dSDimitry Andric setOperationAction(ISD::VAEND, MVT::Other, Expand);
671bdd1243dSDimitry Andric
6720b57cec5SDimitry Andric // Custom handling for i8 intrinsics
6730b57cec5SDimitry Andric setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
6740b57cec5SDimitry Andric
6750b57cec5SDimitry Andric for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) {
6760b57cec5SDimitry Andric setOperationAction(ISD::ABS, Ty, Legal);
6770b57cec5SDimitry Andric setOperationAction(ISD::SMIN, Ty, Legal);
6780b57cec5SDimitry Andric setOperationAction(ISD::SMAX, Ty, Legal);
6790b57cec5SDimitry Andric setOperationAction(ISD::UMIN, Ty, Legal);
6800b57cec5SDimitry Andric setOperationAction(ISD::UMAX, Ty, Legal);
6810b57cec5SDimitry Andric
6820b57cec5SDimitry Andric setOperationAction(ISD::CTPOP, Ty, Legal);
6830b57cec5SDimitry Andric setOperationAction(ISD::CTLZ, Ty, Legal);
6840b57cec5SDimitry Andric }
6850b57cec5SDimitry Andric
6865f757f3fSDimitry Andric setI16x2OperationAction(ISD::ABS, MVT::v2i16, Legal, Custom);
6875f757f3fSDimitry Andric setI16x2OperationAction(ISD::SMIN, MVT::v2i16, Legal, Custom);
6885f757f3fSDimitry Andric setI16x2OperationAction(ISD::SMAX, MVT::v2i16, Legal, Custom);
6895f757f3fSDimitry Andric setI16x2OperationAction(ISD::UMIN, MVT::v2i16, Legal, Custom);
6905f757f3fSDimitry Andric setI16x2OperationAction(ISD::UMAX, MVT::v2i16, Legal, Custom);
6915f757f3fSDimitry Andric setI16x2OperationAction(ISD::CTPOP, MVT::v2i16, Legal, Expand);
6925f757f3fSDimitry Andric setI16x2OperationAction(ISD::CTLZ, MVT::v2i16, Legal, Expand);
6935f757f3fSDimitry Andric
6945f757f3fSDimitry Andric setI16x2OperationAction(ISD::ADD, MVT::v2i16, Legal, Custom);
6955f757f3fSDimitry Andric setI16x2OperationAction(ISD::SUB, MVT::v2i16, Legal, Custom);
6965f757f3fSDimitry Andric setI16x2OperationAction(ISD::MUL, MVT::v2i16, Legal, Custom);
6975f757f3fSDimitry Andric setI16x2OperationAction(ISD::SHL, MVT::v2i16, Legal, Custom);
6985f757f3fSDimitry Andric setI16x2OperationAction(ISD::SREM, MVT::v2i16, Legal, Custom);
6995f757f3fSDimitry Andric setI16x2OperationAction(ISD::UREM, MVT::v2i16, Legal, Custom);
7005f757f3fSDimitry Andric
7015f757f3fSDimitry Andric // Other arithmetic and logic ops are unsupported.
7025f757f3fSDimitry Andric setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SRA, ISD::SRL, ISD::MULHS,
7035f757f3fSDimitry Andric ISD::MULHU, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
7045f757f3fSDimitry Andric ISD::SINT_TO_FP, ISD::UINT_TO_FP},
7055f757f3fSDimitry Andric MVT::v2i16, Expand);
7065f757f3fSDimitry Andric
70781ad6265SDimitry Andric setOperationAction(ISD::ADDC, MVT::i32, Legal);
70881ad6265SDimitry Andric setOperationAction(ISD::ADDE, MVT::i32, Legal);
70981ad6265SDimitry Andric setOperationAction(ISD::SUBC, MVT::i32, Legal);
71081ad6265SDimitry Andric setOperationAction(ISD::SUBE, MVT::i32, Legal);
71181ad6265SDimitry Andric if (STI.getPTXVersion() >= 43) {
71281ad6265SDimitry Andric setOperationAction(ISD::ADDC, MVT::i64, Legal);
71381ad6265SDimitry Andric setOperationAction(ISD::ADDE, MVT::i64, Legal);
71481ad6265SDimitry Andric setOperationAction(ISD::SUBC, MVT::i64, Legal);
71581ad6265SDimitry Andric setOperationAction(ISD::SUBE, MVT::i64, Legal);
71681ad6265SDimitry Andric }
71781ad6265SDimitry Andric
7180b57cec5SDimitry Andric setOperationAction(ISD::CTTZ, MVT::i16, Expand);
7195f757f3fSDimitry Andric setOperationAction(ISD::CTTZ, MVT::v2i16, Expand);
7200b57cec5SDimitry Andric setOperationAction(ISD::CTTZ, MVT::i32, Expand);
7210b57cec5SDimitry Andric setOperationAction(ISD::CTTZ, MVT::i64, Expand);
7220b57cec5SDimitry Andric
7230b57cec5SDimitry Andric // PTX does not directly support SELP of i1, so promote to i32 first
7240b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::i1, Custom);
7250b57cec5SDimitry Andric
7260b57cec5SDimitry Andric // PTX cannot multiply two i64s in a single instruction.
7270b57cec5SDimitry Andric setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
7280b57cec5SDimitry Andric setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
7290b57cec5SDimitry Andric
7300b57cec5SDimitry Andric // We have some custom DAG combine patterns for these nodes
7315f757f3fSDimitry Andric setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::EXTRACT_VECTOR_ELT, ISD::FADD,
7325f757f3fSDimitry Andric ISD::LOAD, ISD::MUL, ISD::SHL, ISD::SREM, ISD::UREM,
7335f757f3fSDimitry Andric ISD::VSELECT});
7340b57cec5SDimitry Andric
73506c3fb27SDimitry Andric // setcc for f16x2 and bf16x2 needs special handling to prevent
73606c3fb27SDimitry Andric // legalizer's attempt to scalarize it due to v2i1 not being legal.
73706c3fb27SDimitry Andric if (STI.allowFP16Math() || STI.hasBF16Math())
7380b57cec5SDimitry Andric setTargetDAGCombine(ISD::SETCC);
7390b57cec5SDimitry Andric
7400b57cec5SDimitry Andric // Promote fp16 arithmetic if fp16 hardware isn't available or the
7410b57cec5SDimitry Andric // user passed --nvptx-no-fp16-math. The flag is useful because,
7420b57cec5SDimitry Andric // although sm_53+ GPUs have some sort of FP16 support in
7430b57cec5SDimitry Andric // hardware, only sm_53 and sm_60 have full implementation. Others
7440b57cec5SDimitry Andric // only have token amount of hardware and are likely to run faster
7450b57cec5SDimitry Andric // by using fp32 units instead.
7460b57cec5SDimitry Andric for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) {
7470b57cec5SDimitry Andric setFP16OperationAction(Op, MVT::f16, Legal, Promote);
7480b57cec5SDimitry Andric setFP16OperationAction(Op, MVT::v2f16, Legal, Expand);
74906c3fb27SDimitry Andric setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
75006c3fb27SDimitry Andric // bf16 must be promoted to f32.
7515f757f3fSDimitry Andric setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
75206c3fb27SDimitry Andric if (getOperationAction(Op, MVT::bf16) == Promote)
75306c3fb27SDimitry Andric AddPromotedToType(Op, MVT::bf16, MVT::f32);
7540b57cec5SDimitry Andric }
7550b57cec5SDimitry Andric
756bdd1243dSDimitry Andric // f16/f16x2 neg was introduced in PTX 60, SM_53.
757bdd1243dSDimitry Andric const bool IsFP16FP16x2NegAvailable = STI.getSmVersion() >= 53 &&
758bdd1243dSDimitry Andric STI.getPTXVersion() >= 60 &&
759bdd1243dSDimitry Andric STI.allowFP16Math();
760bdd1243dSDimitry Andric for (const auto &VT : {MVT::f16, MVT::v2f16})
761bdd1243dSDimitry Andric setOperationAction(ISD::FNEG, VT,
762bdd1243dSDimitry Andric IsFP16FP16x2NegAvailable ? Legal : Expand);
7630b57cec5SDimitry Andric
76406c3fb27SDimitry Andric setBF16OperationAction(ISD::FNEG, MVT::bf16, Legal, Expand);
76506c3fb27SDimitry Andric setBF16OperationAction(ISD::FNEG, MVT::v2bf16, Legal, Expand);
7660b57cec5SDimitry Andric // (would be) Library functions.
7670b57cec5SDimitry Andric
7680b57cec5SDimitry Andric // These map to conversion instructions for scalar FP types.
7690b57cec5SDimitry Andric for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
770bdd1243dSDimitry Andric ISD::FROUNDEVEN, ISD::FTRUNC}) {
7710b57cec5SDimitry Andric setOperationAction(Op, MVT::f16, Legal);
7720b57cec5SDimitry Andric setOperationAction(Op, MVT::f32, Legal);
7730b57cec5SDimitry Andric setOperationAction(Op, MVT::f64, Legal);
7740b57cec5SDimitry Andric setOperationAction(Op, MVT::v2f16, Expand);
77506c3fb27SDimitry Andric setOperationAction(Op, MVT::v2bf16, Expand);
7765f757f3fSDimitry Andric setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
7775f757f3fSDimitry Andric if (getOperationAction(Op, MVT::bf16) == Promote)
7785f757f3fSDimitry Andric AddPromotedToType(Op, MVT::bf16, MVT::f32);
7795f757f3fSDimitry Andric }
7805f757f3fSDimitry Andric
7810fca6ea1SDimitry Andric if (STI.getSmVersion() < 80 || STI.getPTXVersion() < 71) {
7820fca6ea1SDimitry Andric setOperationAction(ISD::BF16_TO_FP, MVT::f32, Expand);
7830fca6ea1SDimitry Andric }
7840fca6ea1SDimitry Andric if (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78) {
7850fca6ea1SDimitry Andric for (MVT VT : {MVT::bf16, MVT::f32, MVT::f64}) {
7860fca6ea1SDimitry Andric setOperationAction(ISD::FP_EXTEND, VT, Custom);
7870fca6ea1SDimitry Andric setOperationAction(ISD::FP_ROUND, VT, Custom);
7880fca6ea1SDimitry Andric }
7890fca6ea1SDimitry Andric }
7900fca6ea1SDimitry Andric
7915f757f3fSDimitry Andric // sm_80 only has conversions between f32 and bf16. Custom lower all other
7925f757f3fSDimitry Andric // bf16 conversions.
7930fca6ea1SDimitry Andric if (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78) {
7945f757f3fSDimitry Andric for (MVT VT : {MVT::i1, MVT::i16, MVT::i32, MVT::i64}) {
7955f757f3fSDimitry Andric setOperationAction(
7965f757f3fSDimitry Andric {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
7975f757f3fSDimitry Andric VT, Custom);
7985f757f3fSDimitry Andric }
7990fca6ea1SDimitry Andric setOperationAction(
8000fca6ea1SDimitry Andric {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
8010fca6ea1SDimitry Andric MVT::bf16, Custom);
8020b57cec5SDimitry Andric }
8030b57cec5SDimitry Andric
8040b57cec5SDimitry Andric setOperationAction(ISD::FROUND, MVT::f16, Promote);
8050b57cec5SDimitry Andric setOperationAction(ISD::FROUND, MVT::v2f16, Expand);
80606c3fb27SDimitry Andric setOperationAction(ISD::FROUND, MVT::v2bf16, Expand);
8070b57cec5SDimitry Andric setOperationAction(ISD::FROUND, MVT::f32, Custom);
8080b57cec5SDimitry Andric setOperationAction(ISD::FROUND, MVT::f64, Custom);
8095f757f3fSDimitry Andric setOperationAction(ISD::FROUND, MVT::bf16, Promote);
8105f757f3fSDimitry Andric AddPromotedToType(ISD::FROUND, MVT::bf16, MVT::f32);
8110b57cec5SDimitry Andric
8120b57cec5SDimitry Andric // 'Expand' implements FCOPYSIGN without calling an external library.
8130b57cec5SDimitry Andric setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
8140b57cec5SDimitry Andric setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
81506c3fb27SDimitry Andric setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
81606c3fb27SDimitry Andric setOperationAction(ISD::FCOPYSIGN, MVT::v2bf16, Expand);
8170b57cec5SDimitry Andric setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
8180b57cec5SDimitry Andric setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
8190b57cec5SDimitry Andric
8200b57cec5SDimitry Andric // These map to corresponding instructions for f32/f64. f16 must be
8210b57cec5SDimitry Andric // promoted to f32. v2f16 is expanded to f16, which is then promoted
8220b57cec5SDimitry Andric // to f32.
82304eeddc0SDimitry Andric for (const auto &Op :
8245f757f3fSDimitry Andric {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS}) {
8250b57cec5SDimitry Andric setOperationAction(Op, MVT::f16, Promote);
8260b57cec5SDimitry Andric setOperationAction(Op, MVT::f32, Legal);
8270b57cec5SDimitry Andric setOperationAction(Op, MVT::f64, Legal);
8280b57cec5SDimitry Andric setOperationAction(Op, MVT::v2f16, Expand);
82906c3fb27SDimitry Andric setOperationAction(Op, MVT::v2bf16, Expand);
8305f757f3fSDimitry Andric setOperationAction(Op, MVT::bf16, Promote);
8315f757f3fSDimitry Andric AddPromotedToType(Op, MVT::bf16, MVT::f32);
8320b57cec5SDimitry Andric }
8335f757f3fSDimitry Andric for (const auto &Op : {ISD::FABS}) {
8345f757f3fSDimitry Andric setOperationAction(Op, MVT::f16, Promote);
8355f757f3fSDimitry Andric setOperationAction(Op, MVT::f32, Legal);
8365f757f3fSDimitry Andric setOperationAction(Op, MVT::f64, Legal);
8375f757f3fSDimitry Andric setOperationAction(Op, MVT::v2f16, Expand);
8385f757f3fSDimitry Andric setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
8395f757f3fSDimitry Andric setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
8405f757f3fSDimitry Andric if (getOperationAction(Op, MVT::bf16) == Promote)
8415f757f3fSDimitry Andric AddPromotedToType(Op, MVT::bf16, MVT::f32);
8425f757f3fSDimitry Andric }
8435f757f3fSDimitry Andric
84404eeddc0SDimitry Andric // max.f16, max.f16x2 and max.NaN are supported on sm_80+.
84504eeddc0SDimitry Andric auto GetMinMaxAction = [&](LegalizeAction NotSm80Action) {
84604eeddc0SDimitry Andric bool IsAtLeastSm80 = STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 70;
84704eeddc0SDimitry Andric return IsAtLeastSm80 ? Legal : NotSm80Action;
84804eeddc0SDimitry Andric };
84904eeddc0SDimitry Andric for (const auto &Op : {ISD::FMINNUM, ISD::FMAXNUM}) {
85004eeddc0SDimitry Andric setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Promote), Promote);
85104eeddc0SDimitry Andric setOperationAction(Op, MVT::f32, Legal);
85204eeddc0SDimitry Andric setOperationAction(Op, MVT::f64, Legal);
85304eeddc0SDimitry Andric setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
85406c3fb27SDimitry Andric setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
8555f757f3fSDimitry Andric setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
8565f757f3fSDimitry Andric if (getOperationAction(Op, MVT::bf16) == Promote)
8575f757f3fSDimitry Andric AddPromotedToType(Op, MVT::bf16, MVT::f32);
85804eeddc0SDimitry Andric }
85904eeddc0SDimitry Andric for (const auto &Op : {ISD::FMINIMUM, ISD::FMAXIMUM}) {
86004eeddc0SDimitry Andric setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Expand), Expand);
86106c3fb27SDimitry Andric setFP16OperationAction(Op, MVT::bf16, Legal, Expand);
86204eeddc0SDimitry Andric setOperationAction(Op, MVT::f32, GetMinMaxAction(Expand));
86304eeddc0SDimitry Andric setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
86406c3fb27SDimitry Andric setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
86504eeddc0SDimitry Andric }
8660b57cec5SDimitry Andric
8670fca6ea1SDimitry Andric // Custom lowering for inline asm with 128-bit operands
8680fca6ea1SDimitry Andric setOperationAction(ISD::CopyToReg, MVT::i128, Custom);
8690fca6ea1SDimitry Andric setOperationAction(ISD::CopyFromReg, MVT::i128, Custom);
8700fca6ea1SDimitry Andric
8710b57cec5SDimitry Andric // No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate.
8720b57cec5SDimitry Andric // No FPOW or FREM in PTX.
8730b57cec5SDimitry Andric
8740b57cec5SDimitry Andric // Now deduce the information based on the above mentioned
8750b57cec5SDimitry Andric // actions
8760b57cec5SDimitry Andric computeRegisterProperties(STI.getRegisterInfo());
87781ad6265SDimitry Andric
87881ad6265SDimitry Andric setMinCmpXchgSizeInBits(32);
8791db9f3b2SDimitry Andric setMaxAtomicSizeInBitsSupported(64);
8800fca6ea1SDimitry Andric setMaxDivRemBitWidthSupported(64);
8810b57cec5SDimitry Andric }
8820b57cec5SDimitry Andric
getTargetNodeName(unsigned Opcode) const8830b57cec5SDimitry Andric const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
8840fca6ea1SDimitry Andric
8850fca6ea1SDimitry Andric #define MAKE_CASE(V) \
8860fca6ea1SDimitry Andric case V: \
8870fca6ea1SDimitry Andric return #V;
8880fca6ea1SDimitry Andric
8890b57cec5SDimitry Andric switch ((NVPTXISD::NodeType)Opcode) {
8900b57cec5SDimitry Andric case NVPTXISD::FIRST_NUMBER:
8910b57cec5SDimitry Andric break;
8920b57cec5SDimitry Andric
8930fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CALL)
8940fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::RET_GLUE)
8950fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LOAD_PARAM)
8960fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Wrapper)
8970fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::DeclareParam)
8980fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::DeclareScalarParam)
8990fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::DeclareRet)
9000fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::DeclareScalarRet)
9010fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::DeclareRetParam)
9020fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::PrintCall)
9030fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::PrintConvergentCall)
9040fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::PrintCallUni)
9050fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::PrintConvergentCallUni)
9060fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LoadParam)
9070fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LoadParamV2)
9080fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LoadParamV4)
9090fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreParam)
9100fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreParamV2)
9110fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreParamV4)
9120fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreParamS32)
9130fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreParamU32)
9140fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallArgBegin)
9150fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallArg)
9160fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LastCallArg)
9170fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallArgEnd)
9180fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallVoid)
9190fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallVal)
9200fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallSymbol)
9210fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Prototype)
9220fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::MoveParam)
9230fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreRetval)
9240fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreRetvalV2)
9250fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreRetvalV4)
9260fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::PseudoUseParam)
9270fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::RETURN)
9280fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallSeqBegin)
9290fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallSeqEnd)
9300fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallPrototype)
9310fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::ProxyReg)
9320fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LoadV2)
9330fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LoadV4)
9340fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LDGV2)
9350fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LDGV4)
9360fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LDUV2)
9370fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LDUV4)
9380fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreV2)
9390fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreV4)
9400fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::FUN_SHFL_CLAMP)
9410fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::FUN_SHFR_CLAMP)
9420fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::IMAD)
9430fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::BFE)
9440fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::BFI)
9450fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::PRMT)
9460fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC)
9470fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::SETP_F16X2)
9480fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::SETP_BF16X2)
9490fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Dummy)
9500fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED)
9510fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED)
9520fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DFloatS32)
9530fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DFloatFloat)
9540fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DFloatFloatLevel)
9550fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DFloatFloatGrad)
9560fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DS32S32)
9570fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DS32Float)
9580fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DS32FloatLevel)
9590fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DS32FloatGrad)
9600fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DU32S32)
9610fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DU32Float)
9620fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DU32FloatLevel)
9630fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DU32FloatGrad)
9640fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayFloatS32)
9650fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayFloatFloat)
9660fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayFloatFloatLevel)
9670fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayFloatFloatGrad)
9680fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayS32S32)
9690fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayS32Float)
9700fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayS32FloatLevel)
9710fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayS32FloatGrad)
9720fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayU32S32)
9730fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayU32Float)
9740fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayU32FloatLevel)
9750fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayU32FloatGrad)
9760fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DFloatS32)
9770fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DFloatFloat)
9780fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DFloatFloatLevel)
9790fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DFloatFloatGrad)
9800fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DS32S32)
9810fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DS32Float)
9820fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DS32FloatLevel)
9830fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DS32FloatGrad)
9840fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DU32S32)
9850fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DU32Float)
9860fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DU32FloatLevel)
9870fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DU32FloatGrad)
9880fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayFloatS32)
9890fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayFloatFloat)
9900fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayFloatFloatLevel)
9910fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayFloatFloatGrad)
9920fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayS32S32)
9930fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayS32Float)
9940fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayS32FloatLevel)
9950fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayS32FloatGrad)
9960fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayU32S32)
9970fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayU32Float)
9980fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayU32FloatLevel)
9990fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayU32FloatGrad)
10000fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DFloatS32)
10010fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DFloatFloat)
10020fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DFloatFloatLevel)
10030fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DFloatFloatGrad)
10040fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DS32S32)
10050fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DS32Float)
10060fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DS32FloatLevel)
10070fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DS32FloatGrad)
10080fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DU32S32)
10090fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DU32Float)
10100fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DU32FloatLevel)
10110fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DU32FloatGrad)
10120fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeFloatFloat)
10130fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeFloatFloatLevel)
10140fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeS32Float)
10150fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeS32FloatLevel)
10160fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeU32Float)
10170fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeU32FloatLevel)
10180fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeArrayFloatFloat)
10190fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeArrayFloatFloatLevel)
10200fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeArrayS32Float)
10210fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeArrayS32FloatLevel)
10220fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeArrayU32Float)
10230fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeArrayU32FloatLevel)
10240fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4R2DFloatFloat)
10250fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4G2DFloatFloat)
10260fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4B2DFloatFloat)
10270fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4A2DFloatFloat)
10280fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4R2DS64Float)
10290fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4G2DS64Float)
10300fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4B2DS64Float)
10310fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4A2DS64Float)
10320fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4R2DU64Float)
10330fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4G2DU64Float)
10340fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4B2DU64Float)
10350fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4A2DU64Float)
10360b57cec5SDimitry Andric
10370fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DFloatS32)
10380fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DFloatFloat)
10390fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DFloatFloatLevel)
10400fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DFloatFloatGrad)
10410fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DS32S32)
10420fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DS32Float)
10430fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DS32FloatLevel)
10440fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DS32FloatGrad)
10450fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DU32S32)
10460fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DU32Float)
10470fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DU32FloatLevel)
10480fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DU32FloatGrad)
10490fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayFloatS32)
10500fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayFloatFloat)
10510fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayFloatFloatLevel)
10520fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayFloatFloatGrad)
10530fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayS32S32)
10540fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayS32Float)
10550fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayS32FloatLevel)
10560fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayS32FloatGrad)
10570fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayU32S32)
10580fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayU32Float)
10590fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayU32FloatLevel)
10600fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayU32FloatGrad)
10610fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DFloatS32)
10620fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DFloatFloat)
10630fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DFloatFloatLevel)
10640fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DFloatFloatGrad)
10650fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DS32S32)
10660fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DS32Float)
10670fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DS32FloatLevel)
10680fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DS32FloatGrad)
10690fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DU32S32)
10700fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DU32Float)
10710fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DU32FloatLevel)
10720fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DU32FloatGrad)
10730fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayFloatS32)
10740fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayFloatFloat)
10750fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayFloatFloatLevel)
10760fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayFloatFloatGrad)
10770fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayS32S32)
10780fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayS32Float)
10790fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayS32FloatLevel)
10800fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayS32FloatGrad)
10810fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayU32S32)
10820fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayU32Float)
10830fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayU32FloatLevel)
10840fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayU32FloatGrad)
10850fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DFloatS32)
10860fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DFloatFloat)
10870fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DFloatFloatLevel)
10880fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DFloatFloatGrad)
10890fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DS32S32)
10900fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DS32Float)
10910fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DS32FloatLevel)
10920fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DS32FloatGrad)
10930fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DU32S32)
10940fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DU32Float)
10950fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DU32FloatLevel)
10960fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DU32FloatGrad)
10970fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeFloatFloat)
10980fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeFloatFloatLevel)
10990fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeS32Float)
11000fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeS32FloatLevel)
11010fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeU32Float)
11020fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeU32FloatLevel)
11030fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayFloatFloat)
11040fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel)
11050fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayS32Float)
11060fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayS32FloatLevel)
11070fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayU32Float)
11080fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayU32FloatLevel)
11090fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeFloatFloatGrad)
11100fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeS32FloatGrad)
11110fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeU32FloatGrad)
11120fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad)
11130fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayS32FloatGrad)
11140fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayU32FloatGrad)
11150fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedR2DFloatFloat)
11160fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedG2DFloatFloat)
11170fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedB2DFloatFloat)
11180fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedA2DFloatFloat)
11190fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedR2DS64Float)
11200fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedG2DS64Float)
11210fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedB2DS64Float)
11220fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedA2DS64Float)
11230fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedR2DU64Float)
11240fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedG2DU64Float)
11250fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedB2DU64Float)
11260fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedA2DU64Float)
11270b57cec5SDimitry Andric
11280fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI8Clamp)
11290fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI16Clamp)
11300fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI32Clamp)
11310fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI64Clamp)
11320fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I8Clamp)
11330fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I16Clamp)
11340fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I32Clamp)
11350fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I64Clamp)
11360fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I8Clamp)
11370fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I16Clamp)
11380fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I32Clamp)
11390b57cec5SDimitry Andric
11400fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI8Clamp)
11410fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI16Clamp)
11420fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI32Clamp)
11430fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI64Clamp)
11440fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I8Clamp)
11450fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I16Clamp)
11460fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I32Clamp)
11470fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I64Clamp)
11480fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I8Clamp)
11490fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I16Clamp)
11500fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I32Clamp)
11510b57cec5SDimitry Andric
11520fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI8Clamp)
11530fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI16Clamp)
11540fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI32Clamp)
11550fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI64Clamp)
11560fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I8Clamp)
11570fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I16Clamp)
11580fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I32Clamp)
11590fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I64Clamp)
11600fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I8Clamp)
11610fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I16Clamp)
11620fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I32Clamp)
11630b57cec5SDimitry Andric
11640fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI8Clamp)
11650fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI16Clamp)
11660fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI32Clamp)
11670fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI64Clamp)
11680fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I8Clamp)
11690fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I16Clamp)
11700fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I32Clamp)
11710fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I64Clamp)
11720fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I8Clamp)
11730fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I16Clamp)
11740fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I32Clamp)
11750b57cec5SDimitry Andric
11760fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI8Clamp)
11770fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI16Clamp)
11780fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI32Clamp)
11790fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI64Clamp)
11800fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I8Clamp)
11810fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I16Clamp)
11820fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I32Clamp)
11830fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I64Clamp)
11840fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I8Clamp)
11850fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I16Clamp)
11860fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I32Clamp)
11870b57cec5SDimitry Andric
11880fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI8Trap)
11890fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI16Trap)
11900fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI32Trap)
11910fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI64Trap)
11920fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I8Trap)
11930fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I16Trap)
11940fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I32Trap)
11950fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I64Trap)
11960fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I8Trap)
11970fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I16Trap)
11980fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I32Trap)
11990b57cec5SDimitry Andric
12000fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI8Trap)
12010fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI16Trap)
12020fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI32Trap)
12030fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI64Trap)
12040fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I8Trap)
12050fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I16Trap)
12060fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I32Trap)
12070fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I64Trap)
12080fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I8Trap)
12090fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I16Trap)
12100fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I32Trap)
12110b57cec5SDimitry Andric
12120fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI8Trap)
12130fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI16Trap)
12140fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI32Trap)
12150fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI64Trap)
12160fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I8Trap)
12170fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I16Trap)
12180fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I32Trap)
12190fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I64Trap)
12200fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I8Trap)
12210fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I16Trap)
12220fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I32Trap)
12230b57cec5SDimitry Andric
12240fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI8Trap)
12250fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI16Trap)
12260fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI32Trap)
12270fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI64Trap)
12280fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I8Trap)
12290fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I16Trap)
12300fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I32Trap)
12310fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I64Trap)
12320fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I8Trap)
12330fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I16Trap)
12340fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I32Trap)
12350b57cec5SDimitry Andric
12360fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI8Trap)
12370fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI16Trap)
12380fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI32Trap)
12390fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI64Trap)
12400fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I8Trap)
12410fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I16Trap)
12420fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I32Trap)
12430fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I64Trap)
12440fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I8Trap)
12450fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I16Trap)
12460fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I32Trap)
12470b57cec5SDimitry Andric
12480fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI8Zero)
12490fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI16Zero)
12500fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI32Zero)
12510fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI64Zero)
12520fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I8Zero)
12530fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I16Zero)
12540fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I32Zero)
12550fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I64Zero)
12560fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I8Zero)
12570fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I16Zero)
12580fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I32Zero)
12590b57cec5SDimitry Andric
12600fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI8Zero)
12610fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI16Zero)
12620fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI32Zero)
12630fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI64Zero)
12640fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I8Zero)
12650fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I16Zero)
12660fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I32Zero)
12670fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I64Zero)
12680fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I8Zero)
12690fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I16Zero)
12700fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I32Zero)
12710b57cec5SDimitry Andric
12720fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI8Zero)
12730fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI16Zero)
12740fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI32Zero)
12750fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI64Zero)
12760fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I8Zero)
12770fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I16Zero)
12780fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I32Zero)
12790fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I64Zero)
12800fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I8Zero)
12810fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I16Zero)
12820fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I32Zero)
12830b57cec5SDimitry Andric
12840fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI8Zero)
12850fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI16Zero)
12860fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI32Zero)
12870fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI64Zero)
12880fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I8Zero)
12890fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I16Zero)
12900fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I32Zero)
12910fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I64Zero)
12920fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I8Zero)
12930fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I16Zero)
12940fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I32Zero)
12950fca6ea1SDimitry Andric
12960fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI8Zero)
12970fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI16Zero)
12980fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI32Zero)
12990fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI64Zero)
13000fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I8Zero)
13010fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I16Zero)
13020fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I32Zero)
13030fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I64Zero)
13040fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I8Zero)
13050fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I16Zero)
13060fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I32Zero)
13070b57cec5SDimitry Andric }
13080b57cec5SDimitry Andric return nullptr;
13090fca6ea1SDimitry Andric
13100fca6ea1SDimitry Andric #undef MAKE_CASE
13110b57cec5SDimitry Andric }
13120b57cec5SDimitry Andric
13130b57cec5SDimitry Andric TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const13140b57cec5SDimitry Andric NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
1315fe6060f1SDimitry Andric if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
1316fe6060f1SDimitry Andric VT.getScalarType() == MVT::i1)
13170b57cec5SDimitry Andric return TypeSplitVector;
13185f757f3fSDimitry Andric if (Isv2x16VT(VT))
13190b57cec5SDimitry Andric return TypeLegal;
13200b57cec5SDimitry Andric return TargetLoweringBase::getPreferredVectorAction(VT);
13210b57cec5SDimitry Andric }
13220b57cec5SDimitry Andric
getSqrtEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & ExtraSteps,bool & UseOneConst,bool Reciprocal) const13230b57cec5SDimitry Andric SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13240b57cec5SDimitry Andric int Enabled, int &ExtraSteps,
13250b57cec5SDimitry Andric bool &UseOneConst,
13260b57cec5SDimitry Andric bool Reciprocal) const {
13270b57cec5SDimitry Andric if (!(Enabled == ReciprocalEstimate::Enabled ||
13280b57cec5SDimitry Andric (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32())))
13290b57cec5SDimitry Andric return SDValue();
13300b57cec5SDimitry Andric
13310b57cec5SDimitry Andric if (ExtraSteps == ReciprocalEstimate::Unspecified)
13320b57cec5SDimitry Andric ExtraSteps = 0;
13330b57cec5SDimitry Andric
13340b57cec5SDimitry Andric SDLoc DL(Operand);
13350b57cec5SDimitry Andric EVT VT = Operand.getValueType();
13360b57cec5SDimitry Andric bool Ftz = useF32FTZ(DAG.getMachineFunction());
13370b57cec5SDimitry Andric
13380b57cec5SDimitry Andric auto MakeIntrinsicCall = [&](Intrinsic::ID IID) {
13390b57cec5SDimitry Andric return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13400b57cec5SDimitry Andric DAG.getConstant(IID, DL, MVT::i32), Operand);
13410b57cec5SDimitry Andric };
13420b57cec5SDimitry Andric
13430b57cec5SDimitry Andric // The sqrt and rsqrt refinement processes assume we always start out with an
13440b57cec5SDimitry Andric // approximation of the rsqrt. Therefore, if we're going to do any refinement
13450b57cec5SDimitry Andric // (i.e. ExtraSteps > 0), we must return an rsqrt. But if we're *not* doing
13460b57cec5SDimitry Andric // any refinement, we must return a regular sqrt.
13470b57cec5SDimitry Andric if (Reciprocal || ExtraSteps > 0) {
13480b57cec5SDimitry Andric if (VT == MVT::f32)
13490b57cec5SDimitry Andric return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f
13500b57cec5SDimitry Andric : Intrinsic::nvvm_rsqrt_approx_f);
13510b57cec5SDimitry Andric else if (VT == MVT::f64)
13520b57cec5SDimitry Andric return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d);
13530b57cec5SDimitry Andric else
13540b57cec5SDimitry Andric return SDValue();
13550b57cec5SDimitry Andric } else {
13560b57cec5SDimitry Andric if (VT == MVT::f32)
13570b57cec5SDimitry Andric return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f
13580b57cec5SDimitry Andric : Intrinsic::nvvm_sqrt_approx_f);
13590b57cec5SDimitry Andric else {
13600b57cec5SDimitry Andric // There's no sqrt.approx.f64 instruction, so we emit
13610b57cec5SDimitry Andric // reciprocal(rsqrt(x)). This is faster than
13620b57cec5SDimitry Andric // select(x == 0, 0, x * rsqrt(x)). (In fact, it's faster than plain
13630b57cec5SDimitry Andric // x * rsqrt(x).)
13640b57cec5SDimitry Andric return DAG.getNode(
13650b57cec5SDimitry Andric ISD::INTRINSIC_WO_CHAIN, DL, VT,
13660b57cec5SDimitry Andric DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32),
13670b57cec5SDimitry Andric MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
13680b57cec5SDimitry Andric }
13690b57cec5SDimitry Andric }
13700b57cec5SDimitry Andric }
13710b57cec5SDimitry Andric
13720b57cec5SDimitry Andric SDValue
LowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const13730b57cec5SDimitry Andric NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
13740b57cec5SDimitry Andric SDLoc dl(Op);
13750b57cec5SDimitry Andric const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op);
13760b57cec5SDimitry Andric auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace());
13770b57cec5SDimitry Andric Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT);
13780b57cec5SDimitry Andric return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
13790b57cec5SDimitry Andric }
13800b57cec5SDimitry Andric
IsTypePassedAsArray(const Type * Ty)138106c3fb27SDimitry Andric static bool IsTypePassedAsArray(const Type *Ty) {
138206c3fb27SDimitry Andric return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) ||
138306c3fb27SDimitry Andric Ty->isHalfTy() || Ty->isBFloatTy();
138406c3fb27SDimitry Andric }
138506c3fb27SDimitry Andric
getPrototype(const DataLayout & DL,Type * retTy,const ArgListTy & Args,const SmallVectorImpl<ISD::OutputArg> & Outs,MaybeAlign retAlignment,std::optional<std::pair<unsigned,const APInt &>> VAInfo,const CallBase & CB,unsigned UniqueCallSite) const13860b57cec5SDimitry Andric std::string NVPTXTargetLowering::getPrototype(
13870b57cec5SDimitry Andric const DataLayout &DL, Type *retTy, const ArgListTy &Args,
13885ffd83dbSDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment,
1389bdd1243dSDimitry Andric std::optional<std::pair<unsigned, const APInt &>> VAInfo,
1390e8d8bef9SDimitry Andric const CallBase &CB, unsigned UniqueCallSite) const {
13910b57cec5SDimitry Andric auto PtrVT = getPointerTy(DL);
13920b57cec5SDimitry Andric
13930b57cec5SDimitry Andric bool isABI = (STI.getSmVersion() >= 20);
13940b57cec5SDimitry Andric assert(isABI && "Non-ABI compilation is not supported");
13950b57cec5SDimitry Andric if (!isABI)
13960b57cec5SDimitry Andric return "";
13970b57cec5SDimitry Andric
1398bdd1243dSDimitry Andric std::string Prototype;
1399bdd1243dSDimitry Andric raw_string_ostream O(Prototype);
1400e8d8bef9SDimitry Andric O << "prototype_" << UniqueCallSite << " : .callprototype ";
14010b57cec5SDimitry Andric
14020b57cec5SDimitry Andric if (retTy->getTypeID() == Type::VoidTyID) {
14030b57cec5SDimitry Andric O << "()";
14040b57cec5SDimitry Andric } else {
14050b57cec5SDimitry Andric O << "(";
140606c3fb27SDimitry Andric if ((retTy->isFloatingPointTy() || retTy->isIntegerTy()) &&
140706c3fb27SDimitry Andric !IsTypePassedAsArray(retTy)) {
14080b57cec5SDimitry Andric unsigned size = 0;
14090b57cec5SDimitry Andric if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
14100b57cec5SDimitry Andric size = ITy->getBitWidth();
14110b57cec5SDimitry Andric } else {
14120b57cec5SDimitry Andric assert(retTy->isFloatingPointTy() &&
14130b57cec5SDimitry Andric "Floating point type expected here");
14140b57cec5SDimitry Andric size = retTy->getPrimitiveSizeInBits();
14150b57cec5SDimitry Andric }
14160b57cec5SDimitry Andric // PTX ABI requires all scalar return values to be at least 32
14170b57cec5SDimitry Andric // bits in size. fp16 normally uses .b16 as its storage type in
14180b57cec5SDimitry Andric // PTX, so its size must be adjusted here, too.
1419fcaf7f86SDimitry Andric size = promoteScalarArgumentSize(size);
14200b57cec5SDimitry Andric
14210b57cec5SDimitry Andric O << ".param .b" << size << " _";
14220b57cec5SDimitry Andric } else if (isa<PointerType>(retTy)) {
14230b57cec5SDimitry Andric O << ".param .b" << PtrVT.getSizeInBits() << " _";
142406c3fb27SDimitry Andric } else if (IsTypePassedAsArray(retTy)) {
14255ffd83dbSDimitry Andric O << ".param .align " << (retAlignment ? retAlignment->value() : 0)
14265ffd83dbSDimitry Andric << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]";
14270b57cec5SDimitry Andric } else {
14280b57cec5SDimitry Andric llvm_unreachable("Unknown return type");
14290b57cec5SDimitry Andric }
14300b57cec5SDimitry Andric O << ") ";
14310b57cec5SDimitry Andric }
14320b57cec5SDimitry Andric O << "_ (";
14330b57cec5SDimitry Andric
14340b57cec5SDimitry Andric bool first = true;
14350b57cec5SDimitry Andric
1436bdd1243dSDimitry Andric unsigned NumArgs = VAInfo ? VAInfo->first : Args.size();
1437bdd1243dSDimitry Andric for (unsigned i = 0, OIdx = 0; i != NumArgs; ++i, ++OIdx) {
14380b57cec5SDimitry Andric Type *Ty = Args[i].Ty;
14390b57cec5SDimitry Andric if (!first) {
14400b57cec5SDimitry Andric O << ", ";
14410b57cec5SDimitry Andric }
14420b57cec5SDimitry Andric first = false;
14430b57cec5SDimitry Andric
14440b57cec5SDimitry Andric if (!Outs[OIdx].Flags.isByVal()) {
144506c3fb27SDimitry Andric if (IsTypePassedAsArray(Ty)) {
14460fca6ea1SDimitry Andric Align ParamAlign =
14470fca6ea1SDimitry Andric getArgumentAlignment(&CB, Ty, i + AttributeList::FirstArgIndex, DL);
14480fca6ea1SDimitry Andric O << ".param .align " << ParamAlign.value() << " .b8 ";
14490b57cec5SDimitry Andric O << "_";
145081ad6265SDimitry Andric O << "[" << DL.getTypeAllocSize(Ty) << "]";
14510b57cec5SDimitry Andric // update the index for Outs
14520b57cec5SDimitry Andric SmallVector<EVT, 16> vtparts;
14530b57cec5SDimitry Andric ComputeValueVTs(*this, DL, Ty, vtparts);
14540b57cec5SDimitry Andric if (unsigned len = vtparts.size())
14550b57cec5SDimitry Andric OIdx += len - 1;
14560b57cec5SDimitry Andric continue;
14570b57cec5SDimitry Andric }
14580b57cec5SDimitry Andric // i8 types in IR will be i16 types in SDAG
14590b57cec5SDimitry Andric assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
14600b57cec5SDimitry Andric (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
14610b57cec5SDimitry Andric "type mismatch between callee prototype and arguments");
14620b57cec5SDimitry Andric // scalar type
14630b57cec5SDimitry Andric unsigned sz = 0;
14640b57cec5SDimitry Andric if (isa<IntegerType>(Ty)) {
14650b57cec5SDimitry Andric sz = cast<IntegerType>(Ty)->getBitWidth();
1466fcaf7f86SDimitry Andric sz = promoteScalarArgumentSize(sz);
14670b57cec5SDimitry Andric } else if (isa<PointerType>(Ty)) {
14680b57cec5SDimitry Andric sz = PtrVT.getSizeInBits();
146906c3fb27SDimitry Andric } else {
14700b57cec5SDimitry Andric sz = Ty->getPrimitiveSizeInBits();
147106c3fb27SDimitry Andric }
14720b57cec5SDimitry Andric O << ".param .b" << sz << " ";
14730b57cec5SDimitry Andric O << "_";
14740b57cec5SDimitry Andric continue;
14750b57cec5SDimitry Andric }
14760b57cec5SDimitry Andric
147736b606aeSDimitry Andric // Indirect calls need strict ABI alignment so we disable optimizations by
147836b606aeSDimitry Andric // not providing a function to optimize.
147981ad6265SDimitry Andric Type *ETy = Args[i].IndirectType;
1480bdd1243dSDimitry Andric Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1481bdd1243dSDimitry Andric Align ParamByValAlign =
148236b606aeSDimitry Andric getFunctionByValParamAlign(/*F=*/nullptr, ETy, InitialAlign, DL);
148381ad6265SDimitry Andric
148481ad6265SDimitry Andric O << ".param .align " << ParamByValAlign.value() << " .b8 ";
14850b57cec5SDimitry Andric O << "_";
148681ad6265SDimitry Andric O << "[" << Outs[OIdx].Flags.getByValSize() << "]";
14870b57cec5SDimitry Andric }
1488bdd1243dSDimitry Andric
1489bdd1243dSDimitry Andric if (VAInfo)
1490bdd1243dSDimitry Andric O << (first ? "" : ",") << " .param .align " << VAInfo->second
1491bdd1243dSDimitry Andric << " .b8 _[]\n";
1492bdd1243dSDimitry Andric O << ")";
1493bdd1243dSDimitry Andric if (shouldEmitPTXNoReturn(&CB, *nvTM))
1494bdd1243dSDimitry Andric O << " .noreturn";
1495bdd1243dSDimitry Andric O << ";";
1496bdd1243dSDimitry Andric
1497bdd1243dSDimitry Andric return Prototype;
14980b57cec5SDimitry Andric }
14990b57cec5SDimitry Andric
getFunctionArgumentAlignment(const Function * F,Type * Ty,unsigned Idx,const DataLayout & DL) const15000fca6ea1SDimitry Andric Align NVPTXTargetLowering::getFunctionArgumentAlignment(
15010fca6ea1SDimitry Andric const Function *F, Type *Ty, unsigned Idx, const DataLayout &DL) const {
15020fca6ea1SDimitry Andric return getAlign(*F, Idx).value_or(getFunctionParamOptimizedAlign(F, Ty, DL));
15030fca6ea1SDimitry Andric }
15040fca6ea1SDimitry Andric
getArgumentAlignment(const CallBase * CB,Type * Ty,unsigned Idx,const DataLayout & DL) const15057a6dacacSDimitry Andric Align NVPTXTargetLowering::getArgumentAlignment(const CallBase *CB, Type *Ty,
15065ffd83dbSDimitry Andric unsigned Idx,
15070b57cec5SDimitry Andric const DataLayout &DL) const {
15085ffd83dbSDimitry Andric if (!CB) {
15090b57cec5SDimitry Andric // CallSite is zero, fallback to ABI type alignment
15105ffd83dbSDimitry Andric return DL.getABITypeAlign(Ty);
15110b57cec5SDimitry Andric }
15120b57cec5SDimitry Andric
15135ffd83dbSDimitry Andric const Function *DirectCallee = CB->getCalledFunction();
15140b57cec5SDimitry Andric
15150b57cec5SDimitry Andric if (!DirectCallee) {
15160b57cec5SDimitry Andric // We don't have a direct function symbol, but that may be because of
15170b57cec5SDimitry Andric // constant cast instructions in the call.
15180b57cec5SDimitry Andric
15190b57cec5SDimitry Andric // With bitcast'd call targets, the instruction will be the call
15205ffd83dbSDimitry Andric if (const auto *CI = dyn_cast<CallInst>(CB)) {
15210b57cec5SDimitry Andric // Check if we have call alignment metadata
15220fca6ea1SDimitry Andric if (MaybeAlign StackAlign = getAlign(*CI, Idx))
15230fca6ea1SDimitry Andric return StackAlign.value();
15240b57cec5SDimitry Andric }
1525bdd1243dSDimitry Andric DirectCallee = getMaybeBitcastedCallee(CB);
15260b57cec5SDimitry Andric }
15270b57cec5SDimitry Andric
15280b57cec5SDimitry Andric // Check for function alignment information if we found that the
15290b57cec5SDimitry Andric // ultimate target is a Function
15300fca6ea1SDimitry Andric if (DirectCallee)
15310fca6ea1SDimitry Andric return getFunctionArgumentAlignment(DirectCallee, Ty, Idx, DL);
15320b57cec5SDimitry Andric
153381ad6265SDimitry Andric // Call is indirect, fall back to the ABI type alignment
15345ffd83dbSDimitry Andric return DL.getABITypeAlign(Ty);
15350b57cec5SDimitry Andric }
15360b57cec5SDimitry Andric
adjustElementType(EVT & ElementType)15370fca6ea1SDimitry Andric static bool adjustElementType(EVT &ElementType) {
15380fca6ea1SDimitry Andric switch (ElementType.getSimpleVT().SimpleTy) {
15390fca6ea1SDimitry Andric default:
15400fca6ea1SDimitry Andric return false;
15410fca6ea1SDimitry Andric case MVT::f16:
15420fca6ea1SDimitry Andric case MVT::bf16:
15430fca6ea1SDimitry Andric ElementType = MVT::i16;
15440fca6ea1SDimitry Andric return true;
15450fca6ea1SDimitry Andric case MVT::f32:
15460fca6ea1SDimitry Andric case MVT::v2f16:
15470fca6ea1SDimitry Andric case MVT::v2bf16:
15480fca6ea1SDimitry Andric ElementType = MVT::i32;
15490fca6ea1SDimitry Andric return true;
15500fca6ea1SDimitry Andric case MVT::f64:
15510fca6ea1SDimitry Andric ElementType = MVT::i64;
15520fca6ea1SDimitry Andric return true;
15530fca6ea1SDimitry Andric }
15540fca6ea1SDimitry Andric }
15550fca6ea1SDimitry Andric
15560fca6ea1SDimitry Andric // Use byte-store when the param address of the argument value is unaligned.
15570fca6ea1SDimitry Andric // This may happen when the return value is a field of a packed structure.
15580fca6ea1SDimitry Andric //
15590fca6ea1SDimitry Andric // This is called in LowerCall() when passing the param values.
LowerUnalignedStoreParam(SelectionDAG & DAG,SDValue Chain,uint64_t Offset,EVT ElementType,SDValue StVal,SDValue & InGlue,unsigned ArgID,const SDLoc & dl)15600fca6ea1SDimitry Andric static SDValue LowerUnalignedStoreParam(SelectionDAG &DAG, SDValue Chain,
15610fca6ea1SDimitry Andric uint64_t Offset, EVT ElementType,
15620fca6ea1SDimitry Andric SDValue StVal, SDValue &InGlue,
15630fca6ea1SDimitry Andric unsigned ArgID, const SDLoc &dl) {
15640fca6ea1SDimitry Andric // Bit logic only works on integer types
15650fca6ea1SDimitry Andric if (adjustElementType(ElementType))
15660fca6ea1SDimitry Andric StVal = DAG.getNode(ISD::BITCAST, dl, ElementType, StVal);
15670fca6ea1SDimitry Andric
15680fca6ea1SDimitry Andric // Store each byte
15690fca6ea1SDimitry Andric SDVTList StoreVTs = DAG.getVTList(MVT::Other, MVT::Glue);
15700fca6ea1SDimitry Andric for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
15710fca6ea1SDimitry Andric // Shift the byte to the last byte position
15720fca6ea1SDimitry Andric SDValue ShiftVal = DAG.getNode(ISD::SRL, dl, ElementType, StVal,
15730fca6ea1SDimitry Andric DAG.getConstant(i * 8, dl, MVT::i32));
15740fca6ea1SDimitry Andric SDValue StoreOperands[] = {Chain, DAG.getConstant(ArgID, dl, MVT::i32),
15750fca6ea1SDimitry Andric DAG.getConstant(Offset + i, dl, MVT::i32),
15760fca6ea1SDimitry Andric ShiftVal, InGlue};
15770fca6ea1SDimitry Andric // Trunc store only the last byte by using
15780fca6ea1SDimitry Andric // st.param.b8
15790fca6ea1SDimitry Andric // The register type can be larger than b8.
15800fca6ea1SDimitry Andric Chain = DAG.getMemIntrinsicNode(
15810fca6ea1SDimitry Andric NVPTXISD::StoreParam, dl, StoreVTs, StoreOperands, MVT::i8,
15820fca6ea1SDimitry Andric MachinePointerInfo(), Align(1), MachineMemOperand::MOStore);
15830fca6ea1SDimitry Andric InGlue = Chain.getValue(1);
15840fca6ea1SDimitry Andric }
15850fca6ea1SDimitry Andric return Chain;
15860fca6ea1SDimitry Andric }
15870fca6ea1SDimitry Andric
15880fca6ea1SDimitry Andric // Use byte-load when the param adress of the returned value is unaligned.
15890fca6ea1SDimitry Andric // This may happen when the returned value is a field of a packed structure.
15900fca6ea1SDimitry Andric static SDValue
LowerUnalignedLoadRetParam(SelectionDAG & DAG,SDValue & Chain,uint64_t Offset,EVT ElementType,SDValue & InGlue,SmallVectorImpl<SDValue> & TempProxyRegOps,const SDLoc & dl)15910fca6ea1SDimitry Andric LowerUnalignedLoadRetParam(SelectionDAG &DAG, SDValue &Chain, uint64_t Offset,
15920fca6ea1SDimitry Andric EVT ElementType, SDValue &InGlue,
15930fca6ea1SDimitry Andric SmallVectorImpl<SDValue> &TempProxyRegOps,
15940fca6ea1SDimitry Andric const SDLoc &dl) {
15950fca6ea1SDimitry Andric // Bit logic only works on integer types
15960fca6ea1SDimitry Andric EVT MergedType = ElementType;
15970fca6ea1SDimitry Andric adjustElementType(MergedType);
15980fca6ea1SDimitry Andric
15990fca6ea1SDimitry Andric // Load each byte and construct the whole value. Initial value to 0
16000fca6ea1SDimitry Andric SDValue RetVal = DAG.getConstant(0, dl, MergedType);
16010fca6ea1SDimitry Andric // LoadParamMemI8 loads into i16 register only
16020fca6ea1SDimitry Andric SDVTList LoadVTs = DAG.getVTList(MVT::i16, MVT::Other, MVT::Glue);
16030fca6ea1SDimitry Andric for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
16040fca6ea1SDimitry Andric SDValue LoadOperands[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
16050fca6ea1SDimitry Andric DAG.getConstant(Offset + i, dl, MVT::i32),
16060fca6ea1SDimitry Andric InGlue};
16070fca6ea1SDimitry Andric // This will be selected to LoadParamMemI8
16080fca6ea1SDimitry Andric SDValue LdVal =
16090fca6ea1SDimitry Andric DAG.getMemIntrinsicNode(NVPTXISD::LoadParam, dl, LoadVTs, LoadOperands,
16100fca6ea1SDimitry Andric MVT::i8, MachinePointerInfo(), Align(1));
16110fca6ea1SDimitry Andric SDValue TmpLdVal = LdVal.getValue(0);
16120fca6ea1SDimitry Andric Chain = LdVal.getValue(1);
16130fca6ea1SDimitry Andric InGlue = LdVal.getValue(2);
16140fca6ea1SDimitry Andric
16150fca6ea1SDimitry Andric TmpLdVal = DAG.getNode(NVPTXISD::ProxyReg, dl,
16160fca6ea1SDimitry Andric TmpLdVal.getSimpleValueType(), TmpLdVal);
16170fca6ea1SDimitry Andric TempProxyRegOps.push_back(TmpLdVal);
16180fca6ea1SDimitry Andric
16190fca6ea1SDimitry Andric SDValue CMask = DAG.getConstant(255, dl, MergedType);
16200fca6ea1SDimitry Andric SDValue CShift = DAG.getConstant(i * 8, dl, MVT::i32);
16210fca6ea1SDimitry Andric // Need to extend the i16 register to the whole width.
16220fca6ea1SDimitry Andric TmpLdVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MergedType, TmpLdVal);
16230fca6ea1SDimitry Andric // Mask off the high bits. Leave only the lower 8bits.
16240fca6ea1SDimitry Andric // Do this because we are using loadparam.b8.
16250fca6ea1SDimitry Andric TmpLdVal = DAG.getNode(ISD::AND, dl, MergedType, TmpLdVal, CMask);
16260fca6ea1SDimitry Andric // Shift and merge
16270fca6ea1SDimitry Andric TmpLdVal = DAG.getNode(ISD::SHL, dl, MergedType, TmpLdVal, CShift);
16280fca6ea1SDimitry Andric RetVal = DAG.getNode(ISD::OR, dl, MergedType, RetVal, TmpLdVal);
16290fca6ea1SDimitry Andric }
16300fca6ea1SDimitry Andric if (ElementType != MergedType)
16310fca6ea1SDimitry Andric RetVal = DAG.getNode(ISD::BITCAST, dl, ElementType, RetVal);
16320fca6ea1SDimitry Andric
16330fca6ea1SDimitry Andric return RetVal;
16340fca6ea1SDimitry Andric }
16350fca6ea1SDimitry Andric
LowerCall(TargetLowering::CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const16360b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
16370b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals) const {
1638bdd1243dSDimitry Andric
1639bdd1243dSDimitry Andric if (CLI.IsVarArg && (STI.getPTXVersion() < 60 || STI.getSmVersion() < 30))
1640bdd1243dSDimitry Andric report_fatal_error(
1641bdd1243dSDimitry Andric "Support for variadic functions (unsized array parameter) introduced "
1642bdd1243dSDimitry Andric "in PTX ISA version 6.0 and requires target sm_30.");
1643bdd1243dSDimitry Andric
16440b57cec5SDimitry Andric SelectionDAG &DAG = CLI.DAG;
16450b57cec5SDimitry Andric SDLoc dl = CLI.DL;
16460b57cec5SDimitry Andric SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
16470b57cec5SDimitry Andric SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
16480b57cec5SDimitry Andric SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
16490b57cec5SDimitry Andric SDValue Chain = CLI.Chain;
16500b57cec5SDimitry Andric SDValue Callee = CLI.Callee;
16510b57cec5SDimitry Andric bool &isTailCall = CLI.IsTailCall;
16520b57cec5SDimitry Andric ArgListTy &Args = CLI.getArgs();
16530b57cec5SDimitry Andric Type *RetTy = CLI.RetTy;
16545ffd83dbSDimitry Andric const CallBase *CB = CLI.CB;
16550b57cec5SDimitry Andric const DataLayout &DL = DAG.getDataLayout();
16560b57cec5SDimitry Andric
16570b57cec5SDimitry Andric bool isABI = (STI.getSmVersion() >= 20);
16580b57cec5SDimitry Andric assert(isABI && "Non-ABI compilation is not supported");
16590b57cec5SDimitry Andric if (!isABI)
16600b57cec5SDimitry Andric return Chain;
16610b57cec5SDimitry Andric
1662bdd1243dSDimitry Andric // Variadic arguments.
1663bdd1243dSDimitry Andric //
1664bdd1243dSDimitry Andric // Normally, for each argument, we declare a param scalar or a param
1665bdd1243dSDimitry Andric // byte array in the .param space, and store the argument value to that
1666bdd1243dSDimitry Andric // param scalar or array starting at offset 0.
1667bdd1243dSDimitry Andric //
1668bdd1243dSDimitry Andric // In the case of the first variadic argument, we declare a vararg byte array
1669bdd1243dSDimitry Andric // with size 0. The exact size of this array isn't known at this point, so
1670bdd1243dSDimitry Andric // it'll be patched later. All the variadic arguments will be stored to this
1671bdd1243dSDimitry Andric // array at a certain offset (which gets tracked by 'VAOffset'). The offset is
1672bdd1243dSDimitry Andric // initially set to 0, so it can be used for non-variadic arguments (which use
1673bdd1243dSDimitry Andric // 0 offset) to simplify the code.
1674bdd1243dSDimitry Andric //
1675bdd1243dSDimitry Andric // After all vararg is processed, 'VAOffset' holds the size of the
1676bdd1243dSDimitry Andric // vararg byte array.
1677bdd1243dSDimitry Andric
1678bdd1243dSDimitry Andric SDValue VADeclareParam; // vararg byte array
1679bdd1243dSDimitry Andric unsigned FirstVAArg = CLI.NumFixedArgs; // position of the first variadic
1680bdd1243dSDimitry Andric unsigned VAOffset = 0; // current offset in the param array
1681bdd1243dSDimitry Andric
1682e8d8bef9SDimitry Andric unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1);
168381ad6265SDimitry Andric SDValue TempChain = Chain;
1684e8d8bef9SDimitry Andric Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl);
168506c3fb27SDimitry Andric SDValue InGlue = Chain.getValue(1);
16860b57cec5SDimitry Andric
168781ad6265SDimitry Andric unsigned ParamCount = 0;
16880b57cec5SDimitry Andric // Args.size() and Outs.size() need not match.
16890b57cec5SDimitry Andric // Outs.size() will be larger
16900b57cec5SDimitry Andric // * if there is an aggregate argument with multiple fields (each field
16910b57cec5SDimitry Andric // showing up separately in Outs)
16920b57cec5SDimitry Andric // * if there is a vector argument with more than typical vector-length
16930b57cec5SDimitry Andric // elements (generally if more than 4) where each vector element is
16940b57cec5SDimitry Andric // individually present in Outs.
16950b57cec5SDimitry Andric // So a different index should be used for indexing into Outs/OutVals.
16960b57cec5SDimitry Andric // See similar issue in LowerFormalArguments.
16970b57cec5SDimitry Andric unsigned OIdx = 0;
16980b57cec5SDimitry Andric // Declare the .params or .reg need to pass values
16990b57cec5SDimitry Andric // to the function
17000b57cec5SDimitry Andric for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
17010b57cec5SDimitry Andric EVT VT = Outs[OIdx].VT;
17020b57cec5SDimitry Andric Type *Ty = Args[i].Ty;
1703bdd1243dSDimitry Andric bool IsVAArg = (i >= CLI.NumFixedArgs);
170481ad6265SDimitry Andric bool IsByVal = Outs[OIdx].Flags.isByVal();
17050b57cec5SDimitry Andric
17060b57cec5SDimitry Andric SmallVector<EVT, 16> VTs;
17070b57cec5SDimitry Andric SmallVector<uint64_t, 16> Offsets;
170881ad6265SDimitry Andric
170981ad6265SDimitry Andric assert((!IsByVal || Args[i].IndirectType) &&
171081ad6265SDimitry Andric "byval arg must have indirect type");
171181ad6265SDimitry Andric Type *ETy = (IsByVal ? Args[i].IndirectType : Ty);
1712bdd1243dSDimitry Andric ComputePTXValueVTs(*this, DL, ETy, VTs, &Offsets, IsByVal ? 0 : VAOffset);
171381ad6265SDimitry Andric
171481ad6265SDimitry Andric Align ArgAlign;
171581ad6265SDimitry Andric if (IsByVal) {
171681ad6265SDimitry Andric // The ByValAlign in the Outs[OIdx].Flags is always set at this point,
171781ad6265SDimitry Andric // so we don't need to worry whether it's naturally aligned or not.
171881ad6265SDimitry Andric // See TargetLowering::LowerCallTo().
1719bdd1243dSDimitry Andric Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1720bdd1243dSDimitry Andric ArgAlign = getFunctionByValParamAlign(CB->getCalledFunction(), ETy,
1721bdd1243dSDimitry Andric InitialAlign, DL);
1722bdd1243dSDimitry Andric if (IsVAArg)
1723bdd1243dSDimitry Andric VAOffset = alignTo(VAOffset, ArgAlign);
172481ad6265SDimitry Andric } else {
17257a6dacacSDimitry Andric ArgAlign = getArgumentAlignment(CB, Ty, ParamCount + 1, DL);
172681ad6265SDimitry Andric }
172781ad6265SDimitry Andric
172881ad6265SDimitry Andric unsigned TypeSize =
172981ad6265SDimitry Andric (IsByVal ? Outs[OIdx].Flags.getByValSize() : DL.getTypeAllocSize(Ty));
17300b57cec5SDimitry Andric SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
173181ad6265SDimitry Andric
17320b57cec5SDimitry Andric bool NeedAlign; // Does argument declaration specify alignment?
173306c3fb27SDimitry Andric bool PassAsArray = IsByVal || IsTypePassedAsArray(Ty);
1734bdd1243dSDimitry Andric if (IsVAArg) {
1735bdd1243dSDimitry Andric if (ParamCount == FirstVAArg) {
1736bdd1243dSDimitry Andric SDValue DeclareParamOps[] = {
1737bdd1243dSDimitry Andric Chain, DAG.getConstant(STI.getMaxRequiredAlignment(), dl, MVT::i32),
1738bdd1243dSDimitry Andric DAG.getConstant(ParamCount, dl, MVT::i32),
173906c3fb27SDimitry Andric DAG.getConstant(1, dl, MVT::i32), InGlue};
1740bdd1243dSDimitry Andric VADeclareParam = Chain = DAG.getNode(NVPTXISD::DeclareParam, dl,
1741bdd1243dSDimitry Andric DeclareParamVTs, DeclareParamOps);
1742bdd1243dSDimitry Andric }
174306c3fb27SDimitry Andric NeedAlign = PassAsArray;
174406c3fb27SDimitry Andric } else if (PassAsArray) {
17450b57cec5SDimitry Andric // declare .param .align <align> .b8 .param<n>[<size>];
17460b57cec5SDimitry Andric SDValue DeclareParamOps[] = {
17475ffd83dbSDimitry Andric Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
174881ad6265SDimitry Andric DAG.getConstant(ParamCount, dl, MVT::i32),
174906c3fb27SDimitry Andric DAG.getConstant(TypeSize, dl, MVT::i32), InGlue};
17500b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
17510b57cec5SDimitry Andric DeclareParamOps);
17520b57cec5SDimitry Andric NeedAlign = true;
17530b57cec5SDimitry Andric } else {
17540b57cec5SDimitry Andric // declare .param .b<size> .param<n>;
1755fcaf7f86SDimitry Andric if (VT.isInteger() || VT.isFloatingPoint()) {
17560b57cec5SDimitry Andric // PTX ABI requires integral types to be at least 32 bits in
17570b57cec5SDimitry Andric // size. FP16 is loaded/stored using i16, so it's handled
17580b57cec5SDimitry Andric // here as well.
1759fcaf7f86SDimitry Andric TypeSize = promoteScalarArgumentSize(TypeSize * 8) / 8;
17600b57cec5SDimitry Andric }
17610b57cec5SDimitry Andric SDValue DeclareScalarParamOps[] = {
176281ad6265SDimitry Andric Chain, DAG.getConstant(ParamCount, dl, MVT::i32),
176381ad6265SDimitry Andric DAG.getConstant(TypeSize * 8, dl, MVT::i32),
176406c3fb27SDimitry Andric DAG.getConstant(0, dl, MVT::i32), InGlue};
17650b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
17660b57cec5SDimitry Andric DeclareScalarParamOps);
17670b57cec5SDimitry Andric NeedAlign = false;
17680b57cec5SDimitry Andric }
176906c3fb27SDimitry Andric InGlue = Chain.getValue(1);
17700b57cec5SDimitry Andric
17710b57cec5SDimitry Andric // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
17720b57cec5SDimitry Andric // than 32-bits are sign extended or zero extended, depending on
17730b57cec5SDimitry Andric // whether they are signed or unsigned types. This case applies
17740b57cec5SDimitry Andric // only to scalar parameters and not to aggregate values.
17750b57cec5SDimitry Andric bool ExtendIntegerParam =
17760b57cec5SDimitry Andric Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32;
17770b57cec5SDimitry Andric
1778bdd1243dSDimitry Andric auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign, IsVAArg);
17790b57cec5SDimitry Andric SmallVector<SDValue, 6> StoreOperands;
17800b57cec5SDimitry Andric for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
178181ad6265SDimitry Andric EVT EltVT = VTs[j];
178281ad6265SDimitry Andric int CurOffset = Offsets[j];
178381ad6265SDimitry Andric MaybeAlign PartAlign;
178481ad6265SDimitry Andric if (NeedAlign)
178581ad6265SDimitry Andric PartAlign = commonAlignment(ArgAlign, CurOffset);
178681ad6265SDimitry Andric
17870b57cec5SDimitry Andric SDValue StVal = OutVals[OIdx];
1788fcaf7f86SDimitry Andric
1789fcaf7f86SDimitry Andric MVT PromotedVT;
1790fcaf7f86SDimitry Andric if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) {
1791fcaf7f86SDimitry Andric EltVT = EVT(PromotedVT);
1792fcaf7f86SDimitry Andric }
1793fcaf7f86SDimitry Andric if (PromoteScalarIntegerPTX(StVal.getValueType(), &PromotedVT)) {
1794fcaf7f86SDimitry Andric llvm::ISD::NodeType Ext =
1795fcaf7f86SDimitry Andric Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1796fcaf7f86SDimitry Andric StVal = DAG.getNode(Ext, dl, PromotedVT, StVal);
1797fcaf7f86SDimitry Andric }
1798fcaf7f86SDimitry Andric
179981ad6265SDimitry Andric if (IsByVal) {
180081ad6265SDimitry Andric auto PtrVT = getPointerTy(DL);
180181ad6265SDimitry Andric SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StVal,
180281ad6265SDimitry Andric DAG.getConstant(CurOffset, dl, PtrVT));
180381ad6265SDimitry Andric StVal = DAG.getLoad(EltVT, dl, TempChain, srcAddr, MachinePointerInfo(),
180481ad6265SDimitry Andric PartAlign);
180581ad6265SDimitry Andric } else if (ExtendIntegerParam) {
18060b57cec5SDimitry Andric assert(VTs.size() == 1 && "Scalar can't have multiple parts.");
18070b57cec5SDimitry Andric // zext/sext to i32
18080b57cec5SDimitry Andric StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
18090b57cec5SDimitry Andric : ISD::ZERO_EXTEND,
18100b57cec5SDimitry Andric dl, MVT::i32, StVal);
181181ad6265SDimitry Andric }
181281ad6265SDimitry Andric
181381ad6265SDimitry Andric if (!ExtendIntegerParam && EltVT.getSizeInBits() < 16) {
18140b57cec5SDimitry Andric // Use 16-bit registers for small stores as it's the
18150b57cec5SDimitry Andric // smallest general purpose register size supported by NVPTX.
18160b57cec5SDimitry Andric StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
18170b57cec5SDimitry Andric }
18180b57cec5SDimitry Andric
18190fca6ea1SDimitry Andric // If we have a PVF_SCALAR entry, it may not be sufficiently aligned for a
18200fca6ea1SDimitry Andric // scalar store. In such cases, fall back to byte stores.
18210fca6ea1SDimitry Andric if (VectorInfo[j] == PVF_SCALAR && !IsVAArg && PartAlign.has_value() &&
18220fca6ea1SDimitry Andric PartAlign.value() <
18230fca6ea1SDimitry Andric DL.getABITypeAlign(EltVT.getTypeForEVT(*DAG.getContext()))) {
18240fca6ea1SDimitry Andric assert(StoreOperands.empty() && "Unfinished preceeding store.");
18250fca6ea1SDimitry Andric Chain = LowerUnalignedStoreParam(
18260fca6ea1SDimitry Andric DAG, Chain, IsByVal ? CurOffset + VAOffset : CurOffset, EltVT,
18270fca6ea1SDimitry Andric StVal, InGlue, ParamCount, dl);
18280fca6ea1SDimitry Andric
18290fca6ea1SDimitry Andric // LowerUnalignedStoreParam took care of inserting the necessary nodes
18300fca6ea1SDimitry Andric // into the SDAG, so just move on to the next element.
18310fca6ea1SDimitry Andric if (!IsByVal)
18320fca6ea1SDimitry Andric ++OIdx;
18330fca6ea1SDimitry Andric continue;
18340fca6ea1SDimitry Andric }
18350fca6ea1SDimitry Andric
18360fca6ea1SDimitry Andric // New store.
18370fca6ea1SDimitry Andric if (VectorInfo[j] & PVF_FIRST) {
18380fca6ea1SDimitry Andric assert(StoreOperands.empty() && "Unfinished preceding store.");
18390fca6ea1SDimitry Andric StoreOperands.push_back(Chain);
18400fca6ea1SDimitry Andric StoreOperands.push_back(
18410fca6ea1SDimitry Andric DAG.getConstant(IsVAArg ? FirstVAArg : ParamCount, dl, MVT::i32));
18420fca6ea1SDimitry Andric
18430fca6ea1SDimitry Andric StoreOperands.push_back(DAG.getConstant(
18440fca6ea1SDimitry Andric IsByVal ? CurOffset + VAOffset : (IsVAArg ? VAOffset : CurOffset),
18450fca6ea1SDimitry Andric dl, MVT::i32));
18460fca6ea1SDimitry Andric }
18470fca6ea1SDimitry Andric
18480b57cec5SDimitry Andric // Record the value to store.
18490b57cec5SDimitry Andric StoreOperands.push_back(StVal);
18500b57cec5SDimitry Andric
18510b57cec5SDimitry Andric if (VectorInfo[j] & PVF_LAST) {
18520b57cec5SDimitry Andric unsigned NumElts = StoreOperands.size() - 3;
18530b57cec5SDimitry Andric NVPTXISD::NodeType Op;
18540b57cec5SDimitry Andric switch (NumElts) {
18550b57cec5SDimitry Andric case 1:
18560b57cec5SDimitry Andric Op = NVPTXISD::StoreParam;
18570b57cec5SDimitry Andric break;
18580b57cec5SDimitry Andric case 2:
18590b57cec5SDimitry Andric Op = NVPTXISD::StoreParamV2;
18600b57cec5SDimitry Andric break;
18610b57cec5SDimitry Andric case 4:
18620b57cec5SDimitry Andric Op = NVPTXISD::StoreParamV4;
18630b57cec5SDimitry Andric break;
18640b57cec5SDimitry Andric default:
18650b57cec5SDimitry Andric llvm_unreachable("Invalid vector info.");
18660b57cec5SDimitry Andric }
18670b57cec5SDimitry Andric
186806c3fb27SDimitry Andric StoreOperands.push_back(InGlue);
18690b57cec5SDimitry Andric
18700b57cec5SDimitry Andric // Adjust type of the store op if we've extended the scalar
18710b57cec5SDimitry Andric // return value.
187281ad6265SDimitry Andric EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
18730b57cec5SDimitry Andric
18740b57cec5SDimitry Andric Chain = DAG.getMemIntrinsicNode(
18750b57cec5SDimitry Andric Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
187681ad6265SDimitry Andric TheStoreType, MachinePointerInfo(), PartAlign,
18770b57cec5SDimitry Andric MachineMemOperand::MOStore);
187806c3fb27SDimitry Andric InGlue = Chain.getValue(1);
18790b57cec5SDimitry Andric
18800b57cec5SDimitry Andric // Cleanup.
18810b57cec5SDimitry Andric StoreOperands.clear();
1882bdd1243dSDimitry Andric
1883bdd1243dSDimitry Andric // TODO: We may need to support vector types that can be passed
1884bdd1243dSDimitry Andric // as scalars in variadic arguments.
1885bdd1243dSDimitry Andric if (!IsByVal && IsVAArg) {
1886bdd1243dSDimitry Andric assert(NumElts == 1 &&
1887bdd1243dSDimitry Andric "Vectorization is expected to be disabled for variadics.");
1888bdd1243dSDimitry Andric VAOffset += DL.getTypeAllocSize(
1889bdd1243dSDimitry Andric TheStoreType.getTypeForEVT(*DAG.getContext()));
1890bdd1243dSDimitry Andric }
18910b57cec5SDimitry Andric }
189281ad6265SDimitry Andric if (!IsByVal)
18930b57cec5SDimitry Andric ++OIdx;
18940b57cec5SDimitry Andric }
18950b57cec5SDimitry Andric assert(StoreOperands.empty() && "Unfinished parameter store.");
189681ad6265SDimitry Andric if (!IsByVal && VTs.size() > 0)
18970b57cec5SDimitry Andric --OIdx;
189881ad6265SDimitry Andric ++ParamCount;
1899bdd1243dSDimitry Andric if (IsByVal && IsVAArg)
1900bdd1243dSDimitry Andric VAOffset += TypeSize;
19010b57cec5SDimitry Andric }
19020b57cec5SDimitry Andric
19030b57cec5SDimitry Andric GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
1904bdd1243dSDimitry Andric MaybeAlign retAlignment = std::nullopt;
19050b57cec5SDimitry Andric
19060b57cec5SDimitry Andric // Handle Result
19070b57cec5SDimitry Andric if (Ins.size() > 0) {
19080b57cec5SDimitry Andric SmallVector<EVT, 16> resvtparts;
19090b57cec5SDimitry Andric ComputeValueVTs(*this, DL, RetTy, resvtparts);
19100b57cec5SDimitry Andric
19110b57cec5SDimitry Andric // Declare
191206c3fb27SDimitry Andric // .param .align N .b8 retval0[<size-in-bytes>], or
19130b57cec5SDimitry Andric // .param .b<size-in-bits> retval0
19140b57cec5SDimitry Andric unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy);
191506c3fb27SDimitry Andric if (!IsTypePassedAsArray(RetTy)) {
1916fcaf7f86SDimitry Andric resultsz = promoteScalarArgumentSize(resultsz);
19170b57cec5SDimitry Andric SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
19180b57cec5SDimitry Andric SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
19190b57cec5SDimitry Andric DAG.getConstant(resultsz, dl, MVT::i32),
192006c3fb27SDimitry Andric DAG.getConstant(0, dl, MVT::i32), InGlue };
19210b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
19220b57cec5SDimitry Andric DeclareRetOps);
192306c3fb27SDimitry Andric InGlue = Chain.getValue(1);
19240b57cec5SDimitry Andric } else {
19257a6dacacSDimitry Andric retAlignment = getArgumentAlignment(CB, RetTy, 0, DL);
19265ffd83dbSDimitry Andric assert(retAlignment && "retAlignment is guaranteed to be set");
19270b57cec5SDimitry Andric SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
19285ffd83dbSDimitry Andric SDValue DeclareRetOps[] = {
19295ffd83dbSDimitry Andric Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32),
19300b57cec5SDimitry Andric DAG.getConstant(resultsz / 8, dl, MVT::i32),
193106c3fb27SDimitry Andric DAG.getConstant(0, dl, MVT::i32), InGlue};
19320b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
19330b57cec5SDimitry Andric DeclareRetOps);
193406c3fb27SDimitry Andric InGlue = Chain.getValue(1);
19350b57cec5SDimitry Andric }
19360b57cec5SDimitry Andric }
19370b57cec5SDimitry Andric
1938bdd1243dSDimitry Andric bool HasVAArgs = CLI.IsVarArg && (CLI.Args.size() > CLI.NumFixedArgs);
1939bdd1243dSDimitry Andric // Set the size of the vararg param byte array if the callee is a variadic
1940bdd1243dSDimitry Andric // function and the variadic part is not empty.
1941bdd1243dSDimitry Andric if (HasVAArgs) {
1942bdd1243dSDimitry Andric SDValue DeclareParamOps[] = {
1943bdd1243dSDimitry Andric VADeclareParam.getOperand(0), VADeclareParam.getOperand(1),
1944bdd1243dSDimitry Andric VADeclareParam.getOperand(2), DAG.getConstant(VAOffset, dl, MVT::i32),
1945bdd1243dSDimitry Andric VADeclareParam.getOperand(4)};
1946bdd1243dSDimitry Andric DAG.MorphNodeTo(VADeclareParam.getNode(), VADeclareParam.getOpcode(),
1947bdd1243dSDimitry Andric VADeclareParam->getVTList(), DeclareParamOps);
1948bdd1243dSDimitry Andric }
1949bdd1243dSDimitry Andric
19500b57cec5SDimitry Andric // Both indirect calls and libcalls have nullptr Func. In order to distinguish
19510b57cec5SDimitry Andric // between them we must rely on the call site value which is valid for
19520b57cec5SDimitry Andric // indirect calls but is always null for libcalls.
19535ffd83dbSDimitry Andric bool isIndirectCall = !Func && CB;
19540b57cec5SDimitry Andric
19550b57cec5SDimitry Andric if (isa<ExternalSymbolSDNode>(Callee)) {
19560b57cec5SDimitry Andric Function* CalleeFunc = nullptr;
19570b57cec5SDimitry Andric
19580b57cec5SDimitry Andric // Try to find the callee in the current module.
19590b57cec5SDimitry Andric Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc);
19600b57cec5SDimitry Andric assert(CalleeFunc != nullptr && "Libcall callee must be set.");
19610b57cec5SDimitry Andric
19620b57cec5SDimitry Andric // Set the "libcall callee" attribute to indicate that the function
19630b57cec5SDimitry Andric // must always have a declaration.
19640b57cec5SDimitry Andric CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");
19650b57cec5SDimitry Andric }
19660b57cec5SDimitry Andric
19670b57cec5SDimitry Andric if (isIndirectCall) {
19680b57cec5SDimitry Andric // This is indirect function call case : PTX requires a prototype of the
19690b57cec5SDimitry Andric // form
19700b57cec5SDimitry Andric // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
19710b57cec5SDimitry Andric // to be emitted, and the label has to used as the last arg of call
19720b57cec5SDimitry Andric // instruction.
19730b57cec5SDimitry Andric // The prototype is embedded in a string and put as the operand for a
19740b57cec5SDimitry Andric // CallPrototype SDNode which will print out to the value of the string.
19750b57cec5SDimitry Andric SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1976bdd1243dSDimitry Andric std::string Proto = getPrototype(
1977bdd1243dSDimitry Andric DL, RetTy, Args, Outs, retAlignment,
1978bdd1243dSDimitry Andric HasVAArgs
1979bdd1243dSDimitry Andric ? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair(
1980297eecfbSDimitry Andric CLI.NumFixedArgs, VADeclareParam->getConstantOperandAPInt(1)))
1981bdd1243dSDimitry Andric : std::nullopt,
1982bdd1243dSDimitry Andric *CB, UniqueCallSite);
1983bdd1243dSDimitry Andric const char *ProtoStr = nvTM->getStrPool().save(Proto).data();
19840b57cec5SDimitry Andric SDValue ProtoOps[] = {
1985bdd1243dSDimitry Andric Chain,
1986bdd1243dSDimitry Andric DAG.getTargetExternalSymbol(ProtoStr, MVT::i32),
198706c3fb27SDimitry Andric InGlue,
19880b57cec5SDimitry Andric };
19890b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
199006c3fb27SDimitry Andric InGlue = Chain.getValue(1);
19910b57cec5SDimitry Andric }
19920b57cec5SDimitry Andric // Op to just print "call"
19930b57cec5SDimitry Andric SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
19940b57cec5SDimitry Andric SDValue PrintCallOps[] = {
199506c3fb27SDimitry Andric Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InGlue
19960b57cec5SDimitry Andric };
19970b57cec5SDimitry Andric // We model convergent calls as separate opcodes.
19980b57cec5SDimitry Andric unsigned Opcode = isIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni;
19990b57cec5SDimitry Andric if (CLI.IsConvergent)
20000b57cec5SDimitry Andric Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni
20010b57cec5SDimitry Andric : NVPTXISD::PrintConvergentCall;
20020b57cec5SDimitry Andric Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
200306c3fb27SDimitry Andric InGlue = Chain.getValue(1);
20040b57cec5SDimitry Andric
20050b57cec5SDimitry Andric // Ops to print out the function name
20060b57cec5SDimitry Andric SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
200706c3fb27SDimitry Andric SDValue CallVoidOps[] = { Chain, Callee, InGlue };
20080b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
200906c3fb27SDimitry Andric InGlue = Chain.getValue(1);
20100b57cec5SDimitry Andric
20110b57cec5SDimitry Andric // Ops to print out the param list
20120b57cec5SDimitry Andric SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
201306c3fb27SDimitry Andric SDValue CallArgBeginOps[] = { Chain, InGlue };
20140b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
20150b57cec5SDimitry Andric CallArgBeginOps);
201606c3fb27SDimitry Andric InGlue = Chain.getValue(1);
20170b57cec5SDimitry Andric
2018bdd1243dSDimitry Andric for (unsigned i = 0, e = std::min(CLI.NumFixedArgs + 1, ParamCount); i != e;
2019bdd1243dSDimitry Andric ++i) {
20200b57cec5SDimitry Andric unsigned opcode;
20210b57cec5SDimitry Andric if (i == (e - 1))
20220b57cec5SDimitry Andric opcode = NVPTXISD::LastCallArg;
20230b57cec5SDimitry Andric else
20240b57cec5SDimitry Andric opcode = NVPTXISD::CallArg;
20250b57cec5SDimitry Andric SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
20260b57cec5SDimitry Andric SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
202706c3fb27SDimitry Andric DAG.getConstant(i, dl, MVT::i32), InGlue };
20280b57cec5SDimitry Andric Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
202906c3fb27SDimitry Andric InGlue = Chain.getValue(1);
20300b57cec5SDimitry Andric }
20310b57cec5SDimitry Andric SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
20320b57cec5SDimitry Andric SDValue CallArgEndOps[] = { Chain,
20330b57cec5SDimitry Andric DAG.getConstant(isIndirectCall ? 0 : 1, dl, MVT::i32),
203406c3fb27SDimitry Andric InGlue };
20350b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
203606c3fb27SDimitry Andric InGlue = Chain.getValue(1);
20370b57cec5SDimitry Andric
20380b57cec5SDimitry Andric if (isIndirectCall) {
20390b57cec5SDimitry Andric SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
2040e8d8bef9SDimitry Andric SDValue PrototypeOps[] = {
204106c3fb27SDimitry Andric Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InGlue};
20420b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
204306c3fb27SDimitry Andric InGlue = Chain.getValue(1);
20440b57cec5SDimitry Andric }
20450b57cec5SDimitry Andric
20460b57cec5SDimitry Andric SmallVector<SDValue, 16> ProxyRegOps;
2047bdd1243dSDimitry Andric SmallVector<std::optional<MVT>, 16> ProxyRegTruncates;
20480fca6ea1SDimitry Andric // An item of the vector is filled if the element does not need a ProxyReg
20490fca6ea1SDimitry Andric // operation on it and should be added to InVals as is. ProxyRegOps and
20500fca6ea1SDimitry Andric // ProxyRegTruncates contain empty/none items at the same index.
20510fca6ea1SDimitry Andric SmallVector<SDValue, 16> RetElts;
20520fca6ea1SDimitry Andric // A temporary ProxyReg operations inserted in `LowerUnalignedLoadRetParam()`
20530fca6ea1SDimitry Andric // to use the values of `LoadParam`s and to be replaced later then
20540fca6ea1SDimitry Andric // `CALLSEQ_END` is added.
20550fca6ea1SDimitry Andric SmallVector<SDValue, 16> TempProxyRegOps;
20560b57cec5SDimitry Andric
20570b57cec5SDimitry Andric // Generate loads from param memory/moves from registers for result
20580b57cec5SDimitry Andric if (Ins.size() > 0) {
20590b57cec5SDimitry Andric SmallVector<EVT, 16> VTs;
20600b57cec5SDimitry Andric SmallVector<uint64_t, 16> Offsets;
20610b57cec5SDimitry Andric ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
20620b57cec5SDimitry Andric assert(VTs.size() == Ins.size() && "Bad value decomposition");
20630b57cec5SDimitry Andric
20647a6dacacSDimitry Andric Align RetAlign = getArgumentAlignment(CB, RetTy, 0, DL);
20650b57cec5SDimitry Andric auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
20660b57cec5SDimitry Andric
20670b57cec5SDimitry Andric SmallVector<EVT, 6> LoadVTs;
20680b57cec5SDimitry Andric int VecIdx = -1; // Index of the first element of the vector.
20690b57cec5SDimitry Andric
20700b57cec5SDimitry Andric // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
20710b57cec5SDimitry Andric // 32-bits are sign extended or zero extended, depending on whether
20720b57cec5SDimitry Andric // they are signed or unsigned types.
20730b57cec5SDimitry Andric bool ExtendIntegerRetVal =
20740b57cec5SDimitry Andric RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
20750b57cec5SDimitry Andric
20760b57cec5SDimitry Andric for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
20770b57cec5SDimitry Andric bool needTruncate = false;
20780b57cec5SDimitry Andric EVT TheLoadType = VTs[i];
20790b57cec5SDimitry Andric EVT EltType = Ins[i].VT;
20805ffd83dbSDimitry Andric Align EltAlign = commonAlignment(RetAlign, Offsets[i]);
2081fcaf7f86SDimitry Andric MVT PromotedVT;
2082fcaf7f86SDimitry Andric
2083fcaf7f86SDimitry Andric if (PromoteScalarIntegerPTX(TheLoadType, &PromotedVT)) {
2084fcaf7f86SDimitry Andric TheLoadType = EVT(PromotedVT);
2085fcaf7f86SDimitry Andric EltType = EVT(PromotedVT);
2086fcaf7f86SDimitry Andric needTruncate = true;
2087fcaf7f86SDimitry Andric }
2088fcaf7f86SDimitry Andric
20890b57cec5SDimitry Andric if (ExtendIntegerRetVal) {
20900b57cec5SDimitry Andric TheLoadType = MVT::i32;
20910b57cec5SDimitry Andric EltType = MVT::i32;
20920b57cec5SDimitry Andric needTruncate = true;
20930b57cec5SDimitry Andric } else if (TheLoadType.getSizeInBits() < 16) {
20940b57cec5SDimitry Andric if (VTs[i].isInteger())
20950b57cec5SDimitry Andric needTruncate = true;
20960b57cec5SDimitry Andric EltType = MVT::i16;
20970b57cec5SDimitry Andric }
20980b57cec5SDimitry Andric
20990fca6ea1SDimitry Andric // If we have a PVF_SCALAR entry, it may not be sufficiently aligned for a
21000fca6ea1SDimitry Andric // scalar load. In such cases, fall back to byte loads.
21010fca6ea1SDimitry Andric if (VectorInfo[i] == PVF_SCALAR && RetTy->isAggregateType() &&
21020fca6ea1SDimitry Andric EltAlign < DL.getABITypeAlign(
21030fca6ea1SDimitry Andric TheLoadType.getTypeForEVT(*DAG.getContext()))) {
21040fca6ea1SDimitry Andric assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");
21050fca6ea1SDimitry Andric SDValue Ret = LowerUnalignedLoadRetParam(
21060fca6ea1SDimitry Andric DAG, Chain, Offsets[i], TheLoadType, InGlue, TempProxyRegOps, dl);
21070fca6ea1SDimitry Andric ProxyRegOps.push_back(SDValue());
21080fca6ea1SDimitry Andric ProxyRegTruncates.push_back(std::optional<MVT>());
21090fca6ea1SDimitry Andric RetElts.resize(i);
21100fca6ea1SDimitry Andric RetElts.push_back(Ret);
21110fca6ea1SDimitry Andric
21120fca6ea1SDimitry Andric continue;
21130fca6ea1SDimitry Andric }
21140fca6ea1SDimitry Andric
21150b57cec5SDimitry Andric // Record index of the very first element of the vector.
21160b57cec5SDimitry Andric if (VectorInfo[i] & PVF_FIRST) {
21170b57cec5SDimitry Andric assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");
21180b57cec5SDimitry Andric VecIdx = i;
21190b57cec5SDimitry Andric }
21200b57cec5SDimitry Andric
21210b57cec5SDimitry Andric LoadVTs.push_back(EltType);
21220b57cec5SDimitry Andric
21230b57cec5SDimitry Andric if (VectorInfo[i] & PVF_LAST) {
21240b57cec5SDimitry Andric unsigned NumElts = LoadVTs.size();
21250b57cec5SDimitry Andric LoadVTs.push_back(MVT::Other);
21260b57cec5SDimitry Andric LoadVTs.push_back(MVT::Glue);
21270b57cec5SDimitry Andric NVPTXISD::NodeType Op;
21280b57cec5SDimitry Andric switch (NumElts) {
21290b57cec5SDimitry Andric case 1:
21300b57cec5SDimitry Andric Op = NVPTXISD::LoadParam;
21310b57cec5SDimitry Andric break;
21320b57cec5SDimitry Andric case 2:
21330b57cec5SDimitry Andric Op = NVPTXISD::LoadParamV2;
21340b57cec5SDimitry Andric break;
21350b57cec5SDimitry Andric case 4:
21360b57cec5SDimitry Andric Op = NVPTXISD::LoadParamV4;
21370b57cec5SDimitry Andric break;
21380b57cec5SDimitry Andric default:
21390b57cec5SDimitry Andric llvm_unreachable("Invalid vector info.");
21400b57cec5SDimitry Andric }
21410b57cec5SDimitry Andric
21420b57cec5SDimitry Andric SDValue LoadOperands[] = {
21430b57cec5SDimitry Andric Chain, DAG.getConstant(1, dl, MVT::i32),
214406c3fb27SDimitry Andric DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InGlue};
21450b57cec5SDimitry Andric SDValue RetVal = DAG.getMemIntrinsicNode(
21460b57cec5SDimitry Andric Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
21470b57cec5SDimitry Andric MachinePointerInfo(), EltAlign,
21480b57cec5SDimitry Andric MachineMemOperand::MOLoad);
21490b57cec5SDimitry Andric
21500b57cec5SDimitry Andric for (unsigned j = 0; j < NumElts; ++j) {
21510b57cec5SDimitry Andric ProxyRegOps.push_back(RetVal.getValue(j));
21520b57cec5SDimitry Andric
21530b57cec5SDimitry Andric if (needTruncate)
2154bdd1243dSDimitry Andric ProxyRegTruncates.push_back(std::optional<MVT>(Ins[VecIdx + j].VT));
21550b57cec5SDimitry Andric else
2156bdd1243dSDimitry Andric ProxyRegTruncates.push_back(std::optional<MVT>());
21570b57cec5SDimitry Andric }
21580b57cec5SDimitry Andric
21590b57cec5SDimitry Andric Chain = RetVal.getValue(NumElts);
216006c3fb27SDimitry Andric InGlue = RetVal.getValue(NumElts + 1);
21610b57cec5SDimitry Andric
21620b57cec5SDimitry Andric // Cleanup
21630b57cec5SDimitry Andric VecIdx = -1;
21640b57cec5SDimitry Andric LoadVTs.clear();
21650b57cec5SDimitry Andric }
21660b57cec5SDimitry Andric }
21670b57cec5SDimitry Andric }
21680b57cec5SDimitry Andric
2169bdd1243dSDimitry Andric Chain =
217006c3fb27SDimitry Andric DAG.getCALLSEQ_END(Chain, UniqueCallSite, UniqueCallSite + 1, InGlue, dl);
217106c3fb27SDimitry Andric InGlue = Chain.getValue(1);
21720b57cec5SDimitry Andric
21730b57cec5SDimitry Andric // Append ProxyReg instructions to the chain to make sure that `callseq_end`
21740b57cec5SDimitry Andric // will not get lost. Otherwise, during libcalls expansion, the nodes can become
21750b57cec5SDimitry Andric // dangling.
21760b57cec5SDimitry Andric for (unsigned i = 0; i < ProxyRegOps.size(); ++i) {
21770fca6ea1SDimitry Andric if (i < RetElts.size() && RetElts[i]) {
21780fca6ea1SDimitry Andric InVals.push_back(RetElts[i]);
21790fca6ea1SDimitry Andric continue;
21800fca6ea1SDimitry Andric }
21810fca6ea1SDimitry Andric
21820b57cec5SDimitry Andric SDValue Ret = DAG.getNode(
21830b57cec5SDimitry Andric NVPTXISD::ProxyReg, dl,
21840b57cec5SDimitry Andric DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
218506c3fb27SDimitry Andric { Chain, ProxyRegOps[i], InGlue }
21860b57cec5SDimitry Andric );
21870b57cec5SDimitry Andric
21880b57cec5SDimitry Andric Chain = Ret.getValue(1);
218906c3fb27SDimitry Andric InGlue = Ret.getValue(2);
21900b57cec5SDimitry Andric
219181ad6265SDimitry Andric if (ProxyRegTruncates[i]) {
2192bdd1243dSDimitry Andric Ret = DAG.getNode(ISD::TRUNCATE, dl, *ProxyRegTruncates[i], Ret);
21930b57cec5SDimitry Andric }
21940b57cec5SDimitry Andric
21950b57cec5SDimitry Andric InVals.push_back(Ret);
21960b57cec5SDimitry Andric }
21970b57cec5SDimitry Andric
21980fca6ea1SDimitry Andric for (SDValue &T : TempProxyRegOps) {
21990fca6ea1SDimitry Andric SDValue Repl = DAG.getNode(
22000fca6ea1SDimitry Andric NVPTXISD::ProxyReg, dl,
22010fca6ea1SDimitry Andric DAG.getVTList(T.getSimpleValueType(), MVT::Other, MVT::Glue),
22020fca6ea1SDimitry Andric {Chain, T.getOperand(0), InGlue});
22030fca6ea1SDimitry Andric DAG.ReplaceAllUsesWith(T, Repl);
22040fca6ea1SDimitry Andric DAG.RemoveDeadNode(T.getNode());
22050fca6ea1SDimitry Andric
22060fca6ea1SDimitry Andric Chain = Repl.getValue(1);
22070fca6ea1SDimitry Andric InGlue = Repl.getValue(2);
22080fca6ea1SDimitry Andric }
22090fca6ea1SDimitry Andric
22100b57cec5SDimitry Andric // set isTailCall to false for now, until we figure out how to express
22110b57cec5SDimitry Andric // tail call optimization in PTX
22120b57cec5SDimitry Andric isTailCall = false;
22130b57cec5SDimitry Andric return Chain;
22140b57cec5SDimitry Andric }
22150b57cec5SDimitry Andric
LowerDYNAMIC_STACKALLOC(SDValue Op,SelectionDAG & DAG) const22165f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
22175f757f3fSDimitry Andric SelectionDAG &DAG) const {
22180fca6ea1SDimitry Andric
22190fca6ea1SDimitry Andric if (STI.getPTXVersion() < 73 || STI.getSmVersion() < 52) {
22205f757f3fSDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction();
22215f757f3fSDimitry Andric
22225f757f3fSDimitry Andric DiagnosticInfoUnsupported NoDynamicAlloca(
22230fca6ea1SDimitry Andric Fn,
22240fca6ea1SDimitry Andric "Support for dynamic alloca introduced in PTX ISA version 7.3 and "
22250fca6ea1SDimitry Andric "requires target sm_52.",
22265f757f3fSDimitry Andric SDLoc(Op).getDebugLoc());
22275f757f3fSDimitry Andric DAG.getContext()->diagnose(NoDynamicAlloca);
22280fca6ea1SDimitry Andric auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()),
22290fca6ea1SDimitry Andric Op.getOperand(0)};
22305f757f3fSDimitry Andric return DAG.getMergeValues(Ops, SDLoc());
22315f757f3fSDimitry Andric }
22325f757f3fSDimitry Andric
22330fca6ea1SDimitry Andric SDValue Chain = Op.getOperand(0);
22340fca6ea1SDimitry Andric SDValue Size = Op.getOperand(1);
22350fca6ea1SDimitry Andric uint64_t Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
22360fca6ea1SDimitry Andric SDLoc DL(Op.getNode());
22370fca6ea1SDimitry Andric
22380fca6ea1SDimitry Andric // The size for ptx alloca instruction is 64-bit for m64 and 32-bit for m32.
22390fca6ea1SDimitry Andric if (nvTM->is64Bit())
22400fca6ea1SDimitry Andric Size = DAG.getZExtOrTrunc(Size, DL, MVT::i64);
22410fca6ea1SDimitry Andric else
22420fca6ea1SDimitry Andric Size = DAG.getZExtOrTrunc(Size, DL, MVT::i32);
22430fca6ea1SDimitry Andric
22440fca6ea1SDimitry Andric SDValue AllocOps[] = {Chain, Size,
22450fca6ea1SDimitry Andric DAG.getTargetConstant(Align, DL, MVT::i32)};
22460fca6ea1SDimitry Andric SDValue Alloca = DAG.getNode(NVPTXISD::DYNAMIC_STACKALLOC, DL,
22470fca6ea1SDimitry Andric nvTM->is64Bit() ? MVT::i64 : MVT::i32, AllocOps);
22480fca6ea1SDimitry Andric
22490fca6ea1SDimitry Andric SDValue MergeOps[] = {Alloca, Chain};
22500fca6ea1SDimitry Andric return DAG.getMergeValues(MergeOps, DL);
22510fca6ea1SDimitry Andric }
22520fca6ea1SDimitry Andric
22530b57cec5SDimitry Andric // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
22540b57cec5SDimitry Andric // (see LegalizeDAG.cpp). This is slow and uses local memory.
22550b57cec5SDimitry Andric // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
22560b57cec5SDimitry Andric SDValue
LowerCONCAT_VECTORS(SDValue Op,SelectionDAG & DAG) const22570b57cec5SDimitry Andric NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
22580b57cec5SDimitry Andric SDNode *Node = Op.getNode();
22590b57cec5SDimitry Andric SDLoc dl(Node);
22600b57cec5SDimitry Andric SmallVector<SDValue, 8> Ops;
22610b57cec5SDimitry Andric unsigned NumOperands = Node->getNumOperands();
22620b57cec5SDimitry Andric for (unsigned i = 0; i < NumOperands; ++i) {
22630b57cec5SDimitry Andric SDValue SubOp = Node->getOperand(i);
22640b57cec5SDimitry Andric EVT VVT = SubOp.getNode()->getValueType(0);
22650b57cec5SDimitry Andric EVT EltVT = VVT.getVectorElementType();
22660b57cec5SDimitry Andric unsigned NumSubElem = VVT.getVectorNumElements();
22670b57cec5SDimitry Andric for (unsigned j = 0; j < NumSubElem; ++j) {
22680b57cec5SDimitry Andric Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
22690b57cec5SDimitry Andric DAG.getIntPtrConstant(j, dl)));
22700b57cec5SDimitry Andric }
22710b57cec5SDimitry Andric }
22720b57cec5SDimitry Andric return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
22730b57cec5SDimitry Andric }
22740b57cec5SDimitry Andric
22755f757f3fSDimitry Andric // We can init constant f16x2/v2i16/v4i8 with a single .b32 move. Normally it
22760b57cec5SDimitry Andric // would get lowered as two constant loads and vector-packing move.
22770b57cec5SDimitry Andric // Instead we want just a constant move:
22785f757f3fSDimitry Andric // mov.b32 %r2, 0x40003C00
LowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const22790b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
22800b57cec5SDimitry Andric SelectionDAG &DAG) const {
22815f757f3fSDimitry Andric EVT VT = Op->getValueType(0);
22825f757f3fSDimitry Andric if (!(Isv2x16VT(VT) || VT == MVT::v4i8))
22830b57cec5SDimitry Andric return Op;
22840b57cec5SDimitry Andric
22855f757f3fSDimitry Andric SDLoc DL(Op);
22865f757f3fSDimitry Andric
22875f757f3fSDimitry Andric if (!llvm::all_of(Op->ops(), [](SDValue Operand) {
22885f757f3fSDimitry Andric return Operand->isUndef() || isa<ConstantSDNode>(Operand) ||
22895f757f3fSDimitry Andric isa<ConstantFPSDNode>(Operand);
22905f757f3fSDimitry Andric })) {
22915f757f3fSDimitry Andric // Lower non-const v4i8 vector as byte-wise constructed i32, which allows us
22925f757f3fSDimitry Andric // to optimize calculation of constant parts.
22935f757f3fSDimitry Andric if (VT == MVT::v4i8) {
22945f757f3fSDimitry Andric SDValue C8 = DAG.getConstant(8, DL, MVT::i32);
22955f757f3fSDimitry Andric SDValue E01 = DAG.getNode(
22965f757f3fSDimitry Andric NVPTXISD::BFI, DL, MVT::i32,
22975f757f3fSDimitry Andric DAG.getAnyExtOrTrunc(Op->getOperand(1), DL, MVT::i32),
22985f757f3fSDimitry Andric DAG.getAnyExtOrTrunc(Op->getOperand(0), DL, MVT::i32), C8, C8);
22995f757f3fSDimitry Andric SDValue E012 =
23005f757f3fSDimitry Andric DAG.getNode(NVPTXISD::BFI, DL, MVT::i32,
23015f757f3fSDimitry Andric DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32),
23025f757f3fSDimitry Andric E01, DAG.getConstant(16, DL, MVT::i32), C8);
23035f757f3fSDimitry Andric SDValue E0123 =
23045f757f3fSDimitry Andric DAG.getNode(NVPTXISD::BFI, DL, MVT::i32,
23055f757f3fSDimitry Andric DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32),
23065f757f3fSDimitry Andric E012, DAG.getConstant(24, DL, MVT::i32), C8);
23075f757f3fSDimitry Andric return DAG.getNode(ISD::BITCAST, DL, VT, E0123);
23085f757f3fSDimitry Andric }
23095f757f3fSDimitry Andric return Op;
23105f757f3fSDimitry Andric }
23115f757f3fSDimitry Andric
23125f757f3fSDimitry Andric // Get value or the Nth operand as an APInt(32). Undef values treated as 0.
23135f757f3fSDimitry Andric auto GetOperand = [](SDValue Op, int N) -> APInt {
23145f757f3fSDimitry Andric const SDValue &Operand = Op->getOperand(N);
23155f757f3fSDimitry Andric EVT VT = Op->getValueType(0);
23165f757f3fSDimitry Andric if (Operand->isUndef())
23175f757f3fSDimitry Andric return APInt(32, 0);
23185f757f3fSDimitry Andric APInt Value;
23195f757f3fSDimitry Andric if (VT == MVT::v2f16 || VT == MVT::v2bf16)
23205f757f3fSDimitry Andric Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt();
23215f757f3fSDimitry Andric else if (VT == MVT::v2i16 || VT == MVT::v4i8)
2322297eecfbSDimitry Andric Value = Operand->getAsAPIntVal();
23235f757f3fSDimitry Andric else
23245f757f3fSDimitry Andric llvm_unreachable("Unsupported type");
23255f757f3fSDimitry Andric // i8 values are carried around as i16, so we need to zero out upper bits,
23265f757f3fSDimitry Andric // so they do not get in the way of combining individual byte values
23275f757f3fSDimitry Andric if (VT == MVT::v4i8)
23285f757f3fSDimitry Andric Value = Value.trunc(8);
23295f757f3fSDimitry Andric return Value.zext(32);
23305f757f3fSDimitry Andric };
23315f757f3fSDimitry Andric APInt Value;
23325f757f3fSDimitry Andric if (Isv2x16VT(VT)) {
23335f757f3fSDimitry Andric Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(16);
23345f757f3fSDimitry Andric } else if (VT == MVT::v4i8) {
23355f757f3fSDimitry Andric Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(8) |
23365f757f3fSDimitry Andric GetOperand(Op, 2).shl(16) | GetOperand(Op, 3).shl(24);
23375f757f3fSDimitry Andric } else {
23385f757f3fSDimitry Andric llvm_unreachable("Unsupported type");
23395f757f3fSDimitry Andric }
23405f757f3fSDimitry Andric SDValue Const = DAG.getConstant(Value, SDLoc(Op), MVT::i32);
234106c3fb27SDimitry Andric return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op->getValueType(0), Const);
23420b57cec5SDimitry Andric }
23430b57cec5SDimitry Andric
LowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const23440b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
23450b57cec5SDimitry Andric SelectionDAG &DAG) const {
23460b57cec5SDimitry Andric SDValue Index = Op->getOperand(1);
23475f757f3fSDimitry Andric SDValue Vector = Op->getOperand(0);
23485f757f3fSDimitry Andric SDLoc DL(Op);
23495f757f3fSDimitry Andric EVT VectorVT = Vector.getValueType();
23505f757f3fSDimitry Andric
23515f757f3fSDimitry Andric if (VectorVT == MVT::v4i8) {
23525f757f3fSDimitry Andric SDValue BFE =
23535f757f3fSDimitry Andric DAG.getNode(NVPTXISD::BFE, DL, MVT::i32,
23545f757f3fSDimitry Andric {Vector,
23555f757f3fSDimitry Andric DAG.getNode(ISD::MUL, DL, MVT::i32,
23565f757f3fSDimitry Andric DAG.getZExtOrTrunc(Index, DL, MVT::i32),
23575f757f3fSDimitry Andric DAG.getConstant(8, DL, MVT::i32)),
23585f757f3fSDimitry Andric DAG.getConstant(8, DL, MVT::i32)});
23595f757f3fSDimitry Andric return DAG.getAnyExtOrTrunc(BFE, DL, Op->getValueType(0));
23605f757f3fSDimitry Andric }
23615f757f3fSDimitry Andric
23620b57cec5SDimitry Andric // Constant index will be matched by tablegen.
23630b57cec5SDimitry Andric if (isa<ConstantSDNode>(Index.getNode()))
23640b57cec5SDimitry Andric return Op;
23650b57cec5SDimitry Andric
23660b57cec5SDimitry Andric // Extract individual elements and select one of them.
23675f757f3fSDimitry Andric assert(Isv2x16VT(VectorVT) && "Unexpected vector type.");
23680b57cec5SDimitry Andric EVT EltVT = VectorVT.getVectorElementType();
23690b57cec5SDimitry Andric
23700b57cec5SDimitry Andric SDLoc dl(Op.getNode());
23710b57cec5SDimitry Andric SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
23720b57cec5SDimitry Andric DAG.getIntPtrConstant(0, dl));
23730b57cec5SDimitry Andric SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
23740b57cec5SDimitry Andric DAG.getIntPtrConstant(1, dl));
23750b57cec5SDimitry Andric return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1,
23760b57cec5SDimitry Andric ISD::CondCode::SETEQ);
23770b57cec5SDimitry Andric }
23780b57cec5SDimitry Andric
LowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const23795f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
23805f757f3fSDimitry Andric SelectionDAG &DAG) const {
23815f757f3fSDimitry Andric SDValue Vector = Op->getOperand(0);
23825f757f3fSDimitry Andric EVT VectorVT = Vector.getValueType();
23835f757f3fSDimitry Andric
23845f757f3fSDimitry Andric if (VectorVT != MVT::v4i8)
23855f757f3fSDimitry Andric return Op;
23865f757f3fSDimitry Andric SDLoc DL(Op);
23875f757f3fSDimitry Andric SDValue Value = Op->getOperand(1);
23885f757f3fSDimitry Andric if (Value->isUndef())
23895f757f3fSDimitry Andric return Vector;
23905f757f3fSDimitry Andric
23915f757f3fSDimitry Andric SDValue Index = Op->getOperand(2);
23925f757f3fSDimitry Andric
23935f757f3fSDimitry Andric SDValue BFI =
23945f757f3fSDimitry Andric DAG.getNode(NVPTXISD::BFI, DL, MVT::i32,
23955f757f3fSDimitry Andric {DAG.getZExtOrTrunc(Value, DL, MVT::i32), Vector,
23965f757f3fSDimitry Andric DAG.getNode(ISD::MUL, DL, MVT::i32,
23975f757f3fSDimitry Andric DAG.getZExtOrTrunc(Index, DL, MVT::i32),
23985f757f3fSDimitry Andric DAG.getConstant(8, DL, MVT::i32)),
23995f757f3fSDimitry Andric DAG.getConstant(8, DL, MVT::i32)});
24005f757f3fSDimitry Andric return DAG.getNode(ISD::BITCAST, DL, Op->getValueType(0), BFI);
24015f757f3fSDimitry Andric }
24025f757f3fSDimitry Andric
LowerVECTOR_SHUFFLE(SDValue Op,SelectionDAG & DAG) const24035f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
24045f757f3fSDimitry Andric SelectionDAG &DAG) const {
24055f757f3fSDimitry Andric SDValue V1 = Op.getOperand(0);
24065f757f3fSDimitry Andric EVT VectorVT = V1.getValueType();
24075f757f3fSDimitry Andric if (VectorVT != MVT::v4i8 || Op.getValueType() != MVT::v4i8)
24085f757f3fSDimitry Andric return Op;
24095f757f3fSDimitry Andric
24105f757f3fSDimitry Andric // Lower shuffle to PRMT instruction.
24115f757f3fSDimitry Andric const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
24125f757f3fSDimitry Andric SDValue V2 = Op.getOperand(1);
24135f757f3fSDimitry Andric uint32_t Selector = 0;
24147a6dacacSDimitry Andric for (auto I : llvm::enumerate(SVN->getMask())) {
24157a6dacacSDimitry Andric if (I.value() != -1) // -1 is a placeholder for undef.
24165f757f3fSDimitry Andric Selector |= (I.value() << (I.index() * 4));
24177a6dacacSDimitry Andric }
24185f757f3fSDimitry Andric
24195f757f3fSDimitry Andric SDLoc DL(Op);
24205f757f3fSDimitry Andric return DAG.getNode(NVPTXISD::PRMT, DL, MVT::v4i8, V1, V2,
24215f757f3fSDimitry Andric DAG.getConstant(Selector, DL, MVT::i32),
24225f757f3fSDimitry Andric DAG.getConstant(NVPTX::PTXPrmtMode::NONE, DL, MVT::i32));
24235f757f3fSDimitry Andric }
24240b57cec5SDimitry Andric /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
24250b57cec5SDimitry Andric /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
24260b57cec5SDimitry Andric /// amount, or
24270b57cec5SDimitry Andric /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
24280b57cec5SDimitry Andric /// amount.
LowerShiftRightParts(SDValue Op,SelectionDAG & DAG) const24290b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
24300b57cec5SDimitry Andric SelectionDAG &DAG) const {
24310b57cec5SDimitry Andric assert(Op.getNumOperands() == 3 && "Not a double-shift!");
24320b57cec5SDimitry Andric assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
24330b57cec5SDimitry Andric
24340b57cec5SDimitry Andric EVT VT = Op.getValueType();
24350b57cec5SDimitry Andric unsigned VTBits = VT.getSizeInBits();
24360b57cec5SDimitry Andric SDLoc dl(Op);
24370b57cec5SDimitry Andric SDValue ShOpLo = Op.getOperand(0);
24380b57cec5SDimitry Andric SDValue ShOpHi = Op.getOperand(1);
24390b57cec5SDimitry Andric SDValue ShAmt = Op.getOperand(2);
24400b57cec5SDimitry Andric unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
24410b57cec5SDimitry Andric
24420b57cec5SDimitry Andric if (VTBits == 32 && STI.getSmVersion() >= 35) {
24430b57cec5SDimitry Andric // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
24440b57cec5SDimitry Andric // {dHi, dLo} = {aHi, aLo} >> Amt
24450b57cec5SDimitry Andric // dHi = aHi >> Amt
24460b57cec5SDimitry Andric // dLo = shf.r.clamp aLo, aHi, Amt
24470b57cec5SDimitry Andric
24480b57cec5SDimitry Andric SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
24490b57cec5SDimitry Andric SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
24500b57cec5SDimitry Andric ShAmt);
24510b57cec5SDimitry Andric
24520b57cec5SDimitry Andric SDValue Ops[2] = { Lo, Hi };
24530b57cec5SDimitry Andric return DAG.getMergeValues(Ops, dl);
24540b57cec5SDimitry Andric }
24550b57cec5SDimitry Andric else {
24560b57cec5SDimitry Andric // {dHi, dLo} = {aHi, aLo} >> Amt
24570b57cec5SDimitry Andric // - if (Amt>=size) then
24580b57cec5SDimitry Andric // dLo = aHi >> (Amt-size)
24590b57cec5SDimitry Andric // dHi = aHi >> Amt (this is either all 0 or all 1)
24600b57cec5SDimitry Andric // else
24610b57cec5SDimitry Andric // dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
24620b57cec5SDimitry Andric // dHi = aHi >> Amt
24630b57cec5SDimitry Andric
24640b57cec5SDimitry Andric SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
24650b57cec5SDimitry Andric DAG.getConstant(VTBits, dl, MVT::i32),
24660b57cec5SDimitry Andric ShAmt);
24670b57cec5SDimitry Andric SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
24680b57cec5SDimitry Andric SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
24690b57cec5SDimitry Andric DAG.getConstant(VTBits, dl, MVT::i32));
24700b57cec5SDimitry Andric SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
24710b57cec5SDimitry Andric SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
24720b57cec5SDimitry Andric SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
24730b57cec5SDimitry Andric
24740b57cec5SDimitry Andric SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
24750b57cec5SDimitry Andric DAG.getConstant(VTBits, dl, MVT::i32),
24760b57cec5SDimitry Andric ISD::SETGE);
24770b57cec5SDimitry Andric SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
24780b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
24790b57cec5SDimitry Andric
24800b57cec5SDimitry Andric SDValue Ops[2] = { Lo, Hi };
24810b57cec5SDimitry Andric return DAG.getMergeValues(Ops, dl);
24820b57cec5SDimitry Andric }
24830b57cec5SDimitry Andric }
24840b57cec5SDimitry Andric
24850b57cec5SDimitry Andric /// LowerShiftLeftParts - Lower SHL_PARTS, which
24860b57cec5SDimitry Andric /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
24870b57cec5SDimitry Andric /// amount, or
24880b57cec5SDimitry Andric /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
24890b57cec5SDimitry Andric /// amount.
LowerShiftLeftParts(SDValue Op,SelectionDAG & DAG) const24900b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
24910b57cec5SDimitry Andric SelectionDAG &DAG) const {
24920b57cec5SDimitry Andric assert(Op.getNumOperands() == 3 && "Not a double-shift!");
24930b57cec5SDimitry Andric assert(Op.getOpcode() == ISD::SHL_PARTS);
24940b57cec5SDimitry Andric
24950b57cec5SDimitry Andric EVT VT = Op.getValueType();
24960b57cec5SDimitry Andric unsigned VTBits = VT.getSizeInBits();
24970b57cec5SDimitry Andric SDLoc dl(Op);
24980b57cec5SDimitry Andric SDValue ShOpLo = Op.getOperand(0);
24990b57cec5SDimitry Andric SDValue ShOpHi = Op.getOperand(1);
25000b57cec5SDimitry Andric SDValue ShAmt = Op.getOperand(2);
25010b57cec5SDimitry Andric
25020b57cec5SDimitry Andric if (VTBits == 32 && STI.getSmVersion() >= 35) {
25030b57cec5SDimitry Andric // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
25040b57cec5SDimitry Andric // {dHi, dLo} = {aHi, aLo} << Amt
25050b57cec5SDimitry Andric // dHi = shf.l.clamp aLo, aHi, Amt
25060b57cec5SDimitry Andric // dLo = aLo << Amt
25070b57cec5SDimitry Andric
25080b57cec5SDimitry Andric SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
25090b57cec5SDimitry Andric ShAmt);
25100b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
25110b57cec5SDimitry Andric
25120b57cec5SDimitry Andric SDValue Ops[2] = { Lo, Hi };
25130b57cec5SDimitry Andric return DAG.getMergeValues(Ops, dl);
25140b57cec5SDimitry Andric }
25150b57cec5SDimitry Andric else {
25160b57cec5SDimitry Andric // {dHi, dLo} = {aHi, aLo} << Amt
25170b57cec5SDimitry Andric // - if (Amt>=size) then
25180b57cec5SDimitry Andric // dLo = aLo << Amt (all 0)
25190b57cec5SDimitry Andric // dLo = aLo << (Amt-size)
25200b57cec5SDimitry Andric // else
25210b57cec5SDimitry Andric // dLo = aLo << Amt
25220b57cec5SDimitry Andric // dHi = (aHi << Amt) | (aLo >> (size-Amt))
25230b57cec5SDimitry Andric
25240b57cec5SDimitry Andric SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
25250b57cec5SDimitry Andric DAG.getConstant(VTBits, dl, MVT::i32),
25260b57cec5SDimitry Andric ShAmt);
25270b57cec5SDimitry Andric SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
25280b57cec5SDimitry Andric SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
25290b57cec5SDimitry Andric DAG.getConstant(VTBits, dl, MVT::i32));
25300b57cec5SDimitry Andric SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
25310b57cec5SDimitry Andric SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
25320b57cec5SDimitry Andric SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
25330b57cec5SDimitry Andric
25340b57cec5SDimitry Andric SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
25350b57cec5SDimitry Andric DAG.getConstant(VTBits, dl, MVT::i32),
25360b57cec5SDimitry Andric ISD::SETGE);
25370b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
25380b57cec5SDimitry Andric SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
25390b57cec5SDimitry Andric
25400b57cec5SDimitry Andric SDValue Ops[2] = { Lo, Hi };
25410b57cec5SDimitry Andric return DAG.getMergeValues(Ops, dl);
25420b57cec5SDimitry Andric }
25430b57cec5SDimitry Andric }
25440b57cec5SDimitry Andric
LowerFROUND(SDValue Op,SelectionDAG & DAG) const25450b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
25460b57cec5SDimitry Andric EVT VT = Op.getValueType();
25470b57cec5SDimitry Andric
25480b57cec5SDimitry Andric if (VT == MVT::f32)
25490b57cec5SDimitry Andric return LowerFROUND32(Op, DAG);
25500b57cec5SDimitry Andric
25510b57cec5SDimitry Andric if (VT == MVT::f64)
25520b57cec5SDimitry Andric return LowerFROUND64(Op, DAG);
25530b57cec5SDimitry Andric
25540b57cec5SDimitry Andric llvm_unreachable("unhandled type");
25550b57cec5SDimitry Andric }
25560b57cec5SDimitry Andric
25570b57cec5SDimitry Andric // This is the the rounding method used in CUDA libdevice in C like code:
25580b57cec5SDimitry Andric // float roundf(float A)
25590b57cec5SDimitry Andric // {
25600b57cec5SDimitry Andric // float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f));
25610b57cec5SDimitry Andric // RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
25620b57cec5SDimitry Andric // return abs(A) < 0.5 ? (float)(int)A : RoundedA;
25630b57cec5SDimitry Andric // }
LowerFROUND32(SDValue Op,SelectionDAG & DAG) const25640b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFROUND32(SDValue Op,
25650b57cec5SDimitry Andric SelectionDAG &DAG) const {
25660b57cec5SDimitry Andric SDLoc SL(Op);
25670b57cec5SDimitry Andric SDValue A = Op.getOperand(0);
25680b57cec5SDimitry Andric EVT VT = Op.getValueType();
25690b57cec5SDimitry Andric
25700b57cec5SDimitry Andric SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
25710b57cec5SDimitry Andric
25720b57cec5SDimitry Andric // RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f))
25730b57cec5SDimitry Andric SDValue Bitcast = DAG.getNode(ISD::BITCAST, SL, MVT::i32, A);
25740b57cec5SDimitry Andric const int SignBitMask = 0x80000000;
25750b57cec5SDimitry Andric SDValue Sign = DAG.getNode(ISD::AND, SL, MVT::i32, Bitcast,
25760b57cec5SDimitry Andric DAG.getConstant(SignBitMask, SL, MVT::i32));
25770b57cec5SDimitry Andric const int PointFiveInBits = 0x3F000000;
25780b57cec5SDimitry Andric SDValue PointFiveWithSignRaw =
25790b57cec5SDimitry Andric DAG.getNode(ISD::OR, SL, MVT::i32, Sign,
25800b57cec5SDimitry Andric DAG.getConstant(PointFiveInBits, SL, MVT::i32));
25810b57cec5SDimitry Andric SDValue PointFiveWithSign =
25820b57cec5SDimitry Andric DAG.getNode(ISD::BITCAST, SL, VT, PointFiveWithSignRaw);
25830b57cec5SDimitry Andric SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, A, PointFiveWithSign);
25840b57cec5SDimitry Andric SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
25850b57cec5SDimitry Andric
25860b57cec5SDimitry Andric // RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
25870b57cec5SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
25880b57cec5SDimitry Andric SDValue IsLarge =
25890b57cec5SDimitry Andric DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 23.0), SL, VT),
25900b57cec5SDimitry Andric ISD::SETOGT);
25910b57cec5SDimitry Andric RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
25920b57cec5SDimitry Andric
25930b57cec5SDimitry Andric // return abs(A) < 0.5 ? (float)(int)A : RoundedA;
25940b57cec5SDimitry Andric SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
25950b57cec5SDimitry Andric DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
25960b57cec5SDimitry Andric SDValue RoundedAForSmallA = DAG.getNode(ISD::FTRUNC, SL, VT, A);
25970b57cec5SDimitry Andric return DAG.getNode(ISD::SELECT, SL, VT, IsSmall, RoundedAForSmallA, RoundedA);
25980b57cec5SDimitry Andric }
25990b57cec5SDimitry Andric
26000b57cec5SDimitry Andric // The implementation of round(double) is similar to that of round(float) in
26010b57cec5SDimitry Andric // that they both separate the value range into three regions and use a method
26020b57cec5SDimitry Andric // specific to the region to round the values. However, round(double) first
26030b57cec5SDimitry Andric // calculates the round of the absolute value and then adds the sign back while
26040b57cec5SDimitry Andric // round(float) directly rounds the value with sign.
LowerFROUND64(SDValue Op,SelectionDAG & DAG) const26050b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFROUND64(SDValue Op,
26060b57cec5SDimitry Andric SelectionDAG &DAG) const {
26070b57cec5SDimitry Andric SDLoc SL(Op);
26080b57cec5SDimitry Andric SDValue A = Op.getOperand(0);
26090b57cec5SDimitry Andric EVT VT = Op.getValueType();
26100b57cec5SDimitry Andric
26110b57cec5SDimitry Andric SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
26120b57cec5SDimitry Andric
26130b57cec5SDimitry Andric // double RoundedA = (double) (int) (abs(A) + 0.5f);
26140b57cec5SDimitry Andric SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, AbsA,
26150b57cec5SDimitry Andric DAG.getConstantFP(0.5, SL, VT));
26160b57cec5SDimitry Andric SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
26170b57cec5SDimitry Andric
26180b57cec5SDimitry Andric // RoundedA = abs(A) < 0.5 ? (double)0 : RoundedA;
26190b57cec5SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
26200b57cec5SDimitry Andric SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
26210b57cec5SDimitry Andric DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
26220b57cec5SDimitry Andric RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsSmall,
26230b57cec5SDimitry Andric DAG.getConstantFP(0, SL, VT),
26240b57cec5SDimitry Andric RoundedA);
26250b57cec5SDimitry Andric
26260b57cec5SDimitry Andric // Add sign to rounded_A
26270b57cec5SDimitry Andric RoundedA = DAG.getNode(ISD::FCOPYSIGN, SL, VT, RoundedA, A);
26280b57cec5SDimitry Andric DAG.getNode(ISD::FTRUNC, SL, VT, A);
26290b57cec5SDimitry Andric
26300b57cec5SDimitry Andric // RoundedA = abs(A) > 0x1.0p52 ? A : RoundedA;
26310b57cec5SDimitry Andric SDValue IsLarge =
26320b57cec5SDimitry Andric DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 52.0), SL, VT),
26330b57cec5SDimitry Andric ISD::SETOGT);
26340b57cec5SDimitry Andric return DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
26350b57cec5SDimitry Andric }
26360b57cec5SDimitry Andric
LowerINT_TO_FP(SDValue Op,SelectionDAG & DAG) const26375f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerINT_TO_FP(SDValue Op,
26385f757f3fSDimitry Andric SelectionDAG &DAG) const {
26395f757f3fSDimitry Andric assert(STI.getSmVersion() < 90 || STI.getPTXVersion() < 78);
26400b57cec5SDimitry Andric
26415f757f3fSDimitry Andric if (Op.getValueType() == MVT::bf16) {
26425f757f3fSDimitry Andric SDLoc Loc(Op);
26435f757f3fSDimitry Andric return DAG.getNode(
26445f757f3fSDimitry Andric ISD::FP_ROUND, Loc, MVT::bf16,
26455f757f3fSDimitry Andric DAG.getNode(Op.getOpcode(), Loc, MVT::f32, Op.getOperand(0)),
26465f757f3fSDimitry Andric DAG.getIntPtrConstant(0, Loc));
26475f757f3fSDimitry Andric }
26485f757f3fSDimitry Andric
26495f757f3fSDimitry Andric // Everything else is considered legal.
26505f757f3fSDimitry Andric return Op;
26515f757f3fSDimitry Andric }
26525f757f3fSDimitry Andric
LowerFP_TO_INT(SDValue Op,SelectionDAG & DAG) const26535f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerFP_TO_INT(SDValue Op,
26545f757f3fSDimitry Andric SelectionDAG &DAG) const {
26555f757f3fSDimitry Andric assert(STI.getSmVersion() < 90 || STI.getPTXVersion() < 78);
26565f757f3fSDimitry Andric
26575f757f3fSDimitry Andric if (Op.getOperand(0).getValueType() == MVT::bf16) {
26585f757f3fSDimitry Andric SDLoc Loc(Op);
26595f757f3fSDimitry Andric return DAG.getNode(
26605f757f3fSDimitry Andric Op.getOpcode(), Loc, Op.getValueType(),
26615f757f3fSDimitry Andric DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f32, Op.getOperand(0)));
26625f757f3fSDimitry Andric }
26635f757f3fSDimitry Andric
26645f757f3fSDimitry Andric // Everything else is considered legal.
26655f757f3fSDimitry Andric return Op;
26665f757f3fSDimitry Andric }
26675f757f3fSDimitry Andric
LowerFP_ROUND(SDValue Op,SelectionDAG & DAG) const26680fca6ea1SDimitry Andric SDValue NVPTXTargetLowering::LowerFP_ROUND(SDValue Op,
26690fca6ea1SDimitry Andric SelectionDAG &DAG) const {
26700fca6ea1SDimitry Andric EVT NarrowVT = Op.getValueType();
26710fca6ea1SDimitry Andric SDValue Wide = Op.getOperand(0);
26720fca6ea1SDimitry Andric EVT WideVT = Wide.getValueType();
26730fca6ea1SDimitry Andric if (NarrowVT.getScalarType() == MVT::bf16) {
26740fca6ea1SDimitry Andric const TargetLowering *TLI = STI.getTargetLowering();
26750fca6ea1SDimitry Andric if (STI.getSmVersion() < 80 || STI.getPTXVersion() < 70) {
26760fca6ea1SDimitry Andric return TLI->expandFP_ROUND(Op.getNode(), DAG);
26770fca6ea1SDimitry Andric }
26780fca6ea1SDimitry Andric if (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78) {
26790fca6ea1SDimitry Andric // This combination was the first to support f32 -> bf16.
26800fca6ea1SDimitry Andric if (STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 70) {
26810fca6ea1SDimitry Andric if (WideVT.getScalarType() == MVT::f32) {
26820fca6ea1SDimitry Andric return Op;
26830fca6ea1SDimitry Andric }
26840fca6ea1SDimitry Andric if (WideVT.getScalarType() == MVT::f64) {
26850fca6ea1SDimitry Andric SDLoc Loc(Op);
26860fca6ea1SDimitry Andric // Round-inexact-to-odd f64 to f32, then do the final rounding using
26870fca6ea1SDimitry Andric // the hardware f32 -> bf16 instruction.
26880fca6ea1SDimitry Andric SDValue rod = TLI->expandRoundInexactToOdd(
26890fca6ea1SDimitry Andric WideVT.isVector() ? WideVT.changeVectorElementType(MVT::f32)
26900fca6ea1SDimitry Andric : MVT::f32,
26910fca6ea1SDimitry Andric Wide, Loc, DAG);
26920fca6ea1SDimitry Andric return DAG.getFPExtendOrRound(rod, Loc, NarrowVT);
26930fca6ea1SDimitry Andric }
26940fca6ea1SDimitry Andric }
26950fca6ea1SDimitry Andric return TLI->expandFP_ROUND(Op.getNode(), DAG);
26960fca6ea1SDimitry Andric }
26970fca6ea1SDimitry Andric }
26980fca6ea1SDimitry Andric
26990fca6ea1SDimitry Andric // Everything else is considered legal.
27000fca6ea1SDimitry Andric return Op;
27010fca6ea1SDimitry Andric }
27020fca6ea1SDimitry Andric
LowerFP_EXTEND(SDValue Op,SelectionDAG & DAG) const27030fca6ea1SDimitry Andric SDValue NVPTXTargetLowering::LowerFP_EXTEND(SDValue Op,
27040fca6ea1SDimitry Andric SelectionDAG &DAG) const {
27050fca6ea1SDimitry Andric SDValue Narrow = Op.getOperand(0);
27060fca6ea1SDimitry Andric EVT NarrowVT = Narrow.getValueType();
27070fca6ea1SDimitry Andric EVT WideVT = Op.getValueType();
27080fca6ea1SDimitry Andric if (NarrowVT.getScalarType() == MVT::bf16) {
27090fca6ea1SDimitry Andric if (WideVT.getScalarType() == MVT::f32 &&
27100fca6ea1SDimitry Andric (STI.getSmVersion() < 80 || STI.getPTXVersion() < 71)) {
27110fca6ea1SDimitry Andric SDLoc Loc(Op);
27120fca6ea1SDimitry Andric return DAG.getNode(ISD::BF16_TO_FP, Loc, WideVT, Narrow);
27130fca6ea1SDimitry Andric }
27140fca6ea1SDimitry Andric if (WideVT.getScalarType() == MVT::f64 &&
27150fca6ea1SDimitry Andric (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78)) {
27160fca6ea1SDimitry Andric EVT F32 = NarrowVT.isVector() ? NarrowVT.changeVectorElementType(MVT::f32)
27170fca6ea1SDimitry Andric : MVT::f32;
27180fca6ea1SDimitry Andric SDLoc Loc(Op);
27190fca6ea1SDimitry Andric if (STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 71) {
27200fca6ea1SDimitry Andric Op = DAG.getNode(ISD::FP_EXTEND, Loc, F32, Narrow);
27210fca6ea1SDimitry Andric } else {
27220fca6ea1SDimitry Andric Op = DAG.getNode(ISD::BF16_TO_FP, Loc, F32, Narrow);
27230fca6ea1SDimitry Andric }
27240fca6ea1SDimitry Andric return DAG.getNode(ISD::FP_EXTEND, Loc, WideVT, Op);
27250fca6ea1SDimitry Andric }
27260fca6ea1SDimitry Andric }
27270fca6ea1SDimitry Andric
27280fca6ea1SDimitry Andric // Everything else is considered legal.
27290fca6ea1SDimitry Andric return Op;
27300fca6ea1SDimitry Andric }
27310fca6ea1SDimitry Andric
LowerVectorArith(SDValue Op,SelectionDAG & DAG)27325f757f3fSDimitry Andric static SDValue LowerVectorArith(SDValue Op, SelectionDAG &DAG) {
27335f757f3fSDimitry Andric SDLoc DL(Op);
27345f757f3fSDimitry Andric if (Op.getValueType() != MVT::v2i16)
27355f757f3fSDimitry Andric return Op;
27365f757f3fSDimitry Andric EVT EltVT = Op.getValueType().getVectorElementType();
27375f757f3fSDimitry Andric SmallVector<SDValue> VecElements;
27385f757f3fSDimitry Andric for (int I = 0, E = Op.getValueType().getVectorNumElements(); I < E; I++) {
27395f757f3fSDimitry Andric SmallVector<SDValue> ScalarArgs;
27405f757f3fSDimitry Andric llvm::transform(Op->ops(), std::back_inserter(ScalarArgs),
27415f757f3fSDimitry Andric [&](const SDUse &O) {
27425f757f3fSDimitry Andric return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
27435f757f3fSDimitry Andric O.get(), DAG.getIntPtrConstant(I, DL));
27445f757f3fSDimitry Andric });
27455f757f3fSDimitry Andric VecElements.push_back(DAG.getNode(Op.getOpcode(), DL, EltVT, ScalarArgs));
27465f757f3fSDimitry Andric }
27475f757f3fSDimitry Andric SDValue V =
27485f757f3fSDimitry Andric DAG.getNode(ISD::BUILD_VECTOR, DL, Op.getValueType(), VecElements);
27495f757f3fSDimitry Andric return V;
27505f757f3fSDimitry Andric }
27510b57cec5SDimitry Andric
27520b57cec5SDimitry Andric SDValue
LowerOperation(SDValue Op,SelectionDAG & DAG) const27530b57cec5SDimitry Andric NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
27540b57cec5SDimitry Andric switch (Op.getOpcode()) {
27550b57cec5SDimitry Andric case ISD::RETURNADDR:
27560b57cec5SDimitry Andric return SDValue();
27570b57cec5SDimitry Andric case ISD::FRAMEADDR:
27580b57cec5SDimitry Andric return SDValue();
27590b57cec5SDimitry Andric case ISD::GlobalAddress:
27600b57cec5SDimitry Andric return LowerGlobalAddress(Op, DAG);
27610b57cec5SDimitry Andric case ISD::INTRINSIC_W_CHAIN:
27620b57cec5SDimitry Andric return Op;
27630b57cec5SDimitry Andric case ISD::BUILD_VECTOR:
27640b57cec5SDimitry Andric return LowerBUILD_VECTOR(Op, DAG);
27650b57cec5SDimitry Andric case ISD::EXTRACT_SUBVECTOR:
27660b57cec5SDimitry Andric return Op;
27670b57cec5SDimitry Andric case ISD::EXTRACT_VECTOR_ELT:
27680b57cec5SDimitry Andric return LowerEXTRACT_VECTOR_ELT(Op, DAG);
27695f757f3fSDimitry Andric case ISD::INSERT_VECTOR_ELT:
27705f757f3fSDimitry Andric return LowerINSERT_VECTOR_ELT(Op, DAG);
27715f757f3fSDimitry Andric case ISD::VECTOR_SHUFFLE:
27725f757f3fSDimitry Andric return LowerVECTOR_SHUFFLE(Op, DAG);
27730b57cec5SDimitry Andric case ISD::CONCAT_VECTORS:
27740b57cec5SDimitry Andric return LowerCONCAT_VECTORS(Op, DAG);
27750b57cec5SDimitry Andric case ISD::STORE:
27760b57cec5SDimitry Andric return LowerSTORE(Op, DAG);
27770b57cec5SDimitry Andric case ISD::LOAD:
27780b57cec5SDimitry Andric return LowerLOAD(Op, DAG);
27790b57cec5SDimitry Andric case ISD::SHL_PARTS:
27800b57cec5SDimitry Andric return LowerShiftLeftParts(Op, DAG);
27810b57cec5SDimitry Andric case ISD::SRA_PARTS:
27820b57cec5SDimitry Andric case ISD::SRL_PARTS:
27830b57cec5SDimitry Andric return LowerShiftRightParts(Op, DAG);
27840b57cec5SDimitry Andric case ISD::SELECT:
27850b57cec5SDimitry Andric return LowerSelect(Op, DAG);
27860b57cec5SDimitry Andric case ISD::FROUND:
27870b57cec5SDimitry Andric return LowerFROUND(Op, DAG);
27885f757f3fSDimitry Andric case ISD::SINT_TO_FP:
27895f757f3fSDimitry Andric case ISD::UINT_TO_FP:
27905f757f3fSDimitry Andric return LowerINT_TO_FP(Op, DAG);
27915f757f3fSDimitry Andric case ISD::FP_TO_SINT:
27925f757f3fSDimitry Andric case ISD::FP_TO_UINT:
27935f757f3fSDimitry Andric return LowerFP_TO_INT(Op, DAG);
27940fca6ea1SDimitry Andric case ISD::FP_ROUND:
27950fca6ea1SDimitry Andric return LowerFP_ROUND(Op, DAG);
27960fca6ea1SDimitry Andric case ISD::FP_EXTEND:
27970fca6ea1SDimitry Andric return LowerFP_EXTEND(Op, DAG);
2798bdd1243dSDimitry Andric case ISD::VAARG:
2799bdd1243dSDimitry Andric return LowerVAARG(Op, DAG);
2800bdd1243dSDimitry Andric case ISD::VASTART:
2801bdd1243dSDimitry Andric return LowerVASTART(Op, DAG);
28025f757f3fSDimitry Andric case ISD::ABS:
28035f757f3fSDimitry Andric case ISD::SMIN:
28045f757f3fSDimitry Andric case ISD::SMAX:
28055f757f3fSDimitry Andric case ISD::UMIN:
28065f757f3fSDimitry Andric case ISD::UMAX:
28075f757f3fSDimitry Andric case ISD::ADD:
28085f757f3fSDimitry Andric case ISD::SUB:
28095f757f3fSDimitry Andric case ISD::MUL:
28105f757f3fSDimitry Andric case ISD::SHL:
28115f757f3fSDimitry Andric case ISD::SREM:
28125f757f3fSDimitry Andric case ISD::UREM:
28135f757f3fSDimitry Andric return LowerVectorArith(Op, DAG);
28145f757f3fSDimitry Andric case ISD::DYNAMIC_STACKALLOC:
28155f757f3fSDimitry Andric return LowerDYNAMIC_STACKALLOC(Op, DAG);
28160fca6ea1SDimitry Andric case ISD::CopyToReg:
28170fca6ea1SDimitry Andric return LowerCopyToReg_128(Op, DAG);
28180b57cec5SDimitry Andric default:
28190b57cec5SDimitry Andric llvm_unreachable("Custom lowering not defined for operation");
28200b57cec5SDimitry Andric }
28210b57cec5SDimitry Andric }
28220b57cec5SDimitry Andric
2823bdd1243dSDimitry Andric // This function is almost a copy of SelectionDAG::expandVAArg().
2824bdd1243dSDimitry Andric // The only diff is that this one produces loads from local address space.
LowerVAARG(SDValue Op,SelectionDAG & DAG) const2825bdd1243dSDimitry Andric SDValue NVPTXTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2826bdd1243dSDimitry Andric const TargetLowering *TLI = STI.getTargetLowering();
2827bdd1243dSDimitry Andric SDLoc DL(Op);
2828bdd1243dSDimitry Andric
2829bdd1243dSDimitry Andric SDNode *Node = Op.getNode();
2830bdd1243dSDimitry Andric const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2831bdd1243dSDimitry Andric EVT VT = Node->getValueType(0);
2832bdd1243dSDimitry Andric auto *Ty = VT.getTypeForEVT(*DAG.getContext());
2833bdd1243dSDimitry Andric SDValue Tmp1 = Node->getOperand(0);
2834bdd1243dSDimitry Andric SDValue Tmp2 = Node->getOperand(1);
2835bdd1243dSDimitry Andric const MaybeAlign MA(Node->getConstantOperandVal(3));
2836bdd1243dSDimitry Andric
2837bdd1243dSDimitry Andric SDValue VAListLoad = DAG.getLoad(TLI->getPointerTy(DAG.getDataLayout()), DL,
2838bdd1243dSDimitry Andric Tmp1, Tmp2, MachinePointerInfo(V));
2839bdd1243dSDimitry Andric SDValue VAList = VAListLoad;
2840bdd1243dSDimitry Andric
2841bdd1243dSDimitry Andric if (MA && *MA > TLI->getMinStackArgumentAlignment()) {
2842bdd1243dSDimitry Andric VAList = DAG.getNode(
2843bdd1243dSDimitry Andric ISD::ADD, DL, VAList.getValueType(), VAList,
2844bdd1243dSDimitry Andric DAG.getConstant(MA->value() - 1, DL, VAList.getValueType()));
2845bdd1243dSDimitry Andric
2846bdd1243dSDimitry Andric VAList = DAG.getNode(
2847bdd1243dSDimitry Andric ISD::AND, DL, VAList.getValueType(), VAList,
2848bdd1243dSDimitry Andric DAG.getConstant(-(int64_t)MA->value(), DL, VAList.getValueType()));
2849bdd1243dSDimitry Andric }
2850bdd1243dSDimitry Andric
2851bdd1243dSDimitry Andric // Increment the pointer, VAList, to the next vaarg
2852bdd1243dSDimitry Andric Tmp1 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
2853bdd1243dSDimitry Andric DAG.getConstant(DAG.getDataLayout().getTypeAllocSize(Ty),
2854bdd1243dSDimitry Andric DL, VAList.getValueType()));
2855bdd1243dSDimitry Andric
2856bdd1243dSDimitry Andric // Store the incremented VAList to the legalized pointer
2857bdd1243dSDimitry Andric Tmp1 = DAG.getStore(VAListLoad.getValue(1), DL, Tmp1, Tmp2,
2858bdd1243dSDimitry Andric MachinePointerInfo(V));
2859bdd1243dSDimitry Andric
2860bdd1243dSDimitry Andric const Value *SrcV =
2861bdd1243dSDimitry Andric Constant::getNullValue(PointerType::get(Ty, ADDRESS_SPACE_LOCAL));
2862bdd1243dSDimitry Andric
2863bdd1243dSDimitry Andric // Load the actual argument out of the pointer VAList
2864bdd1243dSDimitry Andric return DAG.getLoad(VT, DL, Tmp1, VAList, MachinePointerInfo(SrcV));
2865bdd1243dSDimitry Andric }
2866bdd1243dSDimitry Andric
LowerVASTART(SDValue Op,SelectionDAG & DAG) const2867bdd1243dSDimitry Andric SDValue NVPTXTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2868bdd1243dSDimitry Andric const TargetLowering *TLI = STI.getTargetLowering();
2869bdd1243dSDimitry Andric SDLoc DL(Op);
2870bdd1243dSDimitry Andric EVT PtrVT = TLI->getPointerTy(DAG.getDataLayout());
2871bdd1243dSDimitry Andric
2872bdd1243dSDimitry Andric // Store the address of unsized array <function>_vararg[] in the ap object.
2873bdd1243dSDimitry Andric SDValue Arg = getParamSymbol(DAG, /* vararg */ -1, PtrVT);
2874bdd1243dSDimitry Andric SDValue VAReg = DAG.getNode(NVPTXISD::Wrapper, DL, PtrVT, Arg);
2875bdd1243dSDimitry Andric
2876bdd1243dSDimitry Andric const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2877bdd1243dSDimitry Andric return DAG.getStore(Op.getOperand(0), DL, VAReg, Op.getOperand(1),
2878bdd1243dSDimitry Andric MachinePointerInfo(SV));
2879bdd1243dSDimitry Andric }
2880bdd1243dSDimitry Andric
LowerSelect(SDValue Op,SelectionDAG & DAG) const28810b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
28820b57cec5SDimitry Andric SDValue Op0 = Op->getOperand(0);
28830b57cec5SDimitry Andric SDValue Op1 = Op->getOperand(1);
28840b57cec5SDimitry Andric SDValue Op2 = Op->getOperand(2);
28850b57cec5SDimitry Andric SDLoc DL(Op.getNode());
28860b57cec5SDimitry Andric
28870b57cec5SDimitry Andric assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
28880b57cec5SDimitry Andric
28890b57cec5SDimitry Andric Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
28900b57cec5SDimitry Andric Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
28910b57cec5SDimitry Andric SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
28920b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
28930b57cec5SDimitry Andric
28940b57cec5SDimitry Andric return Trunc;
28950b57cec5SDimitry Andric }
28960b57cec5SDimitry Andric
LowerLOAD(SDValue Op,SelectionDAG & DAG) const28970b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
28980b57cec5SDimitry Andric if (Op.getValueType() == MVT::i1)
28990b57cec5SDimitry Andric return LowerLOADi1(Op, DAG);
29000b57cec5SDimitry Andric
29015f757f3fSDimitry Andric // v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to handle
29025f757f3fSDimitry Andric // unaligned loads and have to handle it here.
29035f757f3fSDimitry Andric EVT VT = Op.getValueType();
29045f757f3fSDimitry Andric if (Isv2x16VT(VT) || VT == MVT::v4i8) {
29050b57cec5SDimitry Andric LoadSDNode *Load = cast<LoadSDNode>(Op);
29060b57cec5SDimitry Andric EVT MemVT = Load->getMemoryVT();
29078bcb0991SDimitry Andric if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
29088bcb0991SDimitry Andric MemVT, *Load->getMemOperand())) {
29090b57cec5SDimitry Andric SDValue Ops[2];
29100b57cec5SDimitry Andric std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
29110b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SDLoc(Op));
29120b57cec5SDimitry Andric }
29130b57cec5SDimitry Andric }
29140b57cec5SDimitry Andric
29150b57cec5SDimitry Andric return SDValue();
29160b57cec5SDimitry Andric }
29170b57cec5SDimitry Andric
29180b57cec5SDimitry Andric // v = ld i1* addr
29190b57cec5SDimitry Andric // =>
29200b57cec5SDimitry Andric // v1 = ld i8* addr (-> i16)
29210b57cec5SDimitry Andric // v = trunc i16 to i1
LowerLOADi1(SDValue Op,SelectionDAG & DAG) const29220b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
29230b57cec5SDimitry Andric SDNode *Node = Op.getNode();
29240b57cec5SDimitry Andric LoadSDNode *LD = cast<LoadSDNode>(Node);
29250b57cec5SDimitry Andric SDLoc dl(Node);
29260b57cec5SDimitry Andric assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
29270b57cec5SDimitry Andric assert(Node->getValueType(0) == MVT::i1 &&
29280b57cec5SDimitry Andric "Custom lowering for i1 load only");
29290fca6ea1SDimitry Andric SDValue newLD = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i16, LD->getChain(),
29300fca6ea1SDimitry Andric LD->getBasePtr(), LD->getPointerInfo(),
29310fca6ea1SDimitry Andric MVT::i8, LD->getAlign(),
29320b57cec5SDimitry Andric LD->getMemOperand()->getFlags());
29330b57cec5SDimitry Andric SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
29340b57cec5SDimitry Andric // The legalizer (the caller) is expecting two values from the legalized
29350b57cec5SDimitry Andric // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
29360b57cec5SDimitry Andric // in LegalizeDAG.cpp which also uses MergeValues.
29370b57cec5SDimitry Andric SDValue Ops[] = { result, LD->getChain() };
29380b57cec5SDimitry Andric return DAG.getMergeValues(Ops, dl);
29390b57cec5SDimitry Andric }
29400b57cec5SDimitry Andric
LowerSTORE(SDValue Op,SelectionDAG & DAG) const29410b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
29420b57cec5SDimitry Andric StoreSDNode *Store = cast<StoreSDNode>(Op);
29430b57cec5SDimitry Andric EVT VT = Store->getMemoryVT();
29440b57cec5SDimitry Andric
29450b57cec5SDimitry Andric if (VT == MVT::i1)
29460b57cec5SDimitry Andric return LowerSTOREi1(Op, DAG);
29470b57cec5SDimitry Andric
29480b57cec5SDimitry Andric // v2f16 is legal, so we can't rely on legalizer to handle unaligned
29490b57cec5SDimitry Andric // stores and have to handle it here.
29505f757f3fSDimitry Andric if ((Isv2x16VT(VT) || VT == MVT::v4i8) &&
29518bcb0991SDimitry Andric !allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
29528bcb0991SDimitry Andric VT, *Store->getMemOperand()))
29530b57cec5SDimitry Andric return expandUnalignedStore(Store, DAG);
29540b57cec5SDimitry Andric
29555f757f3fSDimitry Andric // v2f16, v2bf16 and v2i16 don't need special handling.
29565f757f3fSDimitry Andric if (Isv2x16VT(VT) || VT == MVT::v4i8)
295706c3fb27SDimitry Andric return SDValue();
295806c3fb27SDimitry Andric
29590b57cec5SDimitry Andric if (VT.isVector())
29600b57cec5SDimitry Andric return LowerSTOREVector(Op, DAG);
29610b57cec5SDimitry Andric
29620b57cec5SDimitry Andric return SDValue();
29630b57cec5SDimitry Andric }
29640b57cec5SDimitry Andric
29650b57cec5SDimitry Andric SDValue
LowerSTOREVector(SDValue Op,SelectionDAG & DAG) const29660b57cec5SDimitry Andric NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
29670b57cec5SDimitry Andric SDNode *N = Op.getNode();
29680b57cec5SDimitry Andric SDValue Val = N->getOperand(1);
29690b57cec5SDimitry Andric SDLoc DL(N);
29700b57cec5SDimitry Andric EVT ValVT = Val.getValueType();
29710b57cec5SDimitry Andric
29720b57cec5SDimitry Andric if (ValVT.isVector()) {
29730b57cec5SDimitry Andric // We only handle "native" vector sizes for now, e.g. <4 x double> is not
29740b57cec5SDimitry Andric // legal. We can (and should) split that into 2 stores of <2 x double> here
29750b57cec5SDimitry Andric // but I'm leaving that as a TODO for now.
29760b57cec5SDimitry Andric if (!ValVT.isSimple())
29770b57cec5SDimitry Andric return SDValue();
29780b57cec5SDimitry Andric switch (ValVT.getSimpleVT().SimpleTy) {
29790b57cec5SDimitry Andric default:
29800b57cec5SDimitry Andric return SDValue();
29810b57cec5SDimitry Andric case MVT::v2i8:
29820b57cec5SDimitry Andric case MVT::v2i16:
29830b57cec5SDimitry Andric case MVT::v2i32:
29840b57cec5SDimitry Andric case MVT::v2i64:
29850b57cec5SDimitry Andric case MVT::v2f16:
2986bdd1243dSDimitry Andric case MVT::v2bf16:
29870b57cec5SDimitry Andric case MVT::v2f32:
29880b57cec5SDimitry Andric case MVT::v2f64:
29890b57cec5SDimitry Andric case MVT::v4i8:
29900b57cec5SDimitry Andric case MVT::v4i16:
29910b57cec5SDimitry Andric case MVT::v4i32:
29920b57cec5SDimitry Andric case MVT::v4f16:
2993bdd1243dSDimitry Andric case MVT::v4bf16:
29940b57cec5SDimitry Andric case MVT::v4f32:
29950b57cec5SDimitry Andric case MVT::v8f16: // <4 x f16x2>
2996bdd1243dSDimitry Andric case MVT::v8bf16: // <4 x bf16x2>
29975f757f3fSDimitry Andric case MVT::v8i16: // <4 x i16x2>
29980b57cec5SDimitry Andric // This is a "native" vector type
29990b57cec5SDimitry Andric break;
30000b57cec5SDimitry Andric }
30010b57cec5SDimitry Andric
30020b57cec5SDimitry Andric MemSDNode *MemSD = cast<MemSDNode>(N);
30030b57cec5SDimitry Andric const DataLayout &TD = DAG.getDataLayout();
30040b57cec5SDimitry Andric
30055ffd83dbSDimitry Andric Align Alignment = MemSD->getAlign();
30065ffd83dbSDimitry Andric Align PrefAlign =
30075ffd83dbSDimitry Andric TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext()));
30085ffd83dbSDimitry Andric if (Alignment < PrefAlign) {
30090b57cec5SDimitry Andric // This store is not sufficiently aligned, so bail out and let this vector
30100b57cec5SDimitry Andric // store be scalarized. Note that we may still be able to emit smaller
30110b57cec5SDimitry Andric // vector stores. For example, if we are storing a <4 x float> with an
30120b57cec5SDimitry Andric // alignment of 8, this check will fail but the legalizer will try again
30130b57cec5SDimitry Andric // with 2 x <2 x float>, which will succeed with an alignment of 8.
30140b57cec5SDimitry Andric return SDValue();
30150b57cec5SDimitry Andric }
30160b57cec5SDimitry Andric
30170b57cec5SDimitry Andric unsigned Opcode = 0;
30180b57cec5SDimitry Andric EVT EltVT = ValVT.getVectorElementType();
30190b57cec5SDimitry Andric unsigned NumElts = ValVT.getVectorNumElements();
30200b57cec5SDimitry Andric
30210b57cec5SDimitry Andric // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
30220b57cec5SDimitry Andric // Therefore, we must ensure the type is legal. For i1 and i8, we set the
30230b57cec5SDimitry Andric // stored type to i16 and propagate the "real" type as the memory type.
30240b57cec5SDimitry Andric bool NeedExt = false;
30250b57cec5SDimitry Andric if (EltVT.getSizeInBits() < 16)
30260b57cec5SDimitry Andric NeedExt = true;
30270b57cec5SDimitry Andric
30280b57cec5SDimitry Andric bool StoreF16x2 = false;
30290b57cec5SDimitry Andric switch (NumElts) {
30300b57cec5SDimitry Andric default:
30310b57cec5SDimitry Andric return SDValue();
30320b57cec5SDimitry Andric case 2:
30330b57cec5SDimitry Andric Opcode = NVPTXISD::StoreV2;
30340b57cec5SDimitry Andric break;
30350b57cec5SDimitry Andric case 4:
30360b57cec5SDimitry Andric Opcode = NVPTXISD::StoreV4;
30370b57cec5SDimitry Andric break;
30380b57cec5SDimitry Andric case 8:
30390b57cec5SDimitry Andric // v8f16 is a special case. PTX doesn't have st.v8.f16
30400b57cec5SDimitry Andric // instruction. Instead, we split the vector into v2f16 chunks and
30410b57cec5SDimitry Andric // store them with st.v4.b32.
30425f757f3fSDimitry Andric assert(Is16bitsType(EltVT.getSimpleVT()) && "Wrong type for the vector.");
30430b57cec5SDimitry Andric Opcode = NVPTXISD::StoreV4;
30440b57cec5SDimitry Andric StoreF16x2 = true;
30450b57cec5SDimitry Andric break;
30460b57cec5SDimitry Andric }
30470b57cec5SDimitry Andric
30480b57cec5SDimitry Andric SmallVector<SDValue, 8> Ops;
30490b57cec5SDimitry Andric
30500b57cec5SDimitry Andric // First is the chain
30510b57cec5SDimitry Andric Ops.push_back(N->getOperand(0));
30520b57cec5SDimitry Andric
30530b57cec5SDimitry Andric if (StoreF16x2) {
30540b57cec5SDimitry Andric // Combine f16,f16 -> v2f16
30550b57cec5SDimitry Andric NumElts /= 2;
30560b57cec5SDimitry Andric for (unsigned i = 0; i < NumElts; ++i) {
305706c3fb27SDimitry Andric SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
30580b57cec5SDimitry Andric DAG.getIntPtrConstant(i * 2, DL));
305906c3fb27SDimitry Andric SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
30600b57cec5SDimitry Andric DAG.getIntPtrConstant(i * 2 + 1, DL));
306106c3fb27SDimitry Andric EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, 2);
306206c3fb27SDimitry Andric SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, E0, E1);
30630b57cec5SDimitry Andric Ops.push_back(V2);
30640b57cec5SDimitry Andric }
30650b57cec5SDimitry Andric } else {
30660b57cec5SDimitry Andric // Then the split values
30670b57cec5SDimitry Andric for (unsigned i = 0; i < NumElts; ++i) {
30680b57cec5SDimitry Andric SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
30690b57cec5SDimitry Andric DAG.getIntPtrConstant(i, DL));
30700b57cec5SDimitry Andric if (NeedExt)
30710b57cec5SDimitry Andric ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
30720b57cec5SDimitry Andric Ops.push_back(ExtVal);
30730b57cec5SDimitry Andric }
30740b57cec5SDimitry Andric }
30750b57cec5SDimitry Andric
30760b57cec5SDimitry Andric // Then any remaining arguments
30770b57cec5SDimitry Andric Ops.append(N->op_begin() + 2, N->op_end());
30780b57cec5SDimitry Andric
30790b57cec5SDimitry Andric SDValue NewSt =
30800b57cec5SDimitry Andric DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops,
30810b57cec5SDimitry Andric MemSD->getMemoryVT(), MemSD->getMemOperand());
30820b57cec5SDimitry Andric
30830b57cec5SDimitry Andric // return DCI.CombineTo(N, NewSt, true);
30840b57cec5SDimitry Andric return NewSt;
30850b57cec5SDimitry Andric }
30860b57cec5SDimitry Andric
30870b57cec5SDimitry Andric return SDValue();
30880b57cec5SDimitry Andric }
30890b57cec5SDimitry Andric
30900b57cec5SDimitry Andric // st i1 v, addr
30910b57cec5SDimitry Andric // =>
30920b57cec5SDimitry Andric // v1 = zxt v to i16
30930b57cec5SDimitry Andric // st.u8 i16, addr
LowerSTOREi1(SDValue Op,SelectionDAG & DAG) const30940b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
30950b57cec5SDimitry Andric SDNode *Node = Op.getNode();
30960b57cec5SDimitry Andric SDLoc dl(Node);
30970b57cec5SDimitry Andric StoreSDNode *ST = cast<StoreSDNode>(Node);
30980b57cec5SDimitry Andric SDValue Tmp1 = ST->getChain();
30990b57cec5SDimitry Andric SDValue Tmp2 = ST->getBasePtr();
31000b57cec5SDimitry Andric SDValue Tmp3 = ST->getValue();
31010b57cec5SDimitry Andric assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
31020b57cec5SDimitry Andric Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
31030b57cec5SDimitry Andric SDValue Result =
31040b57cec5SDimitry Andric DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8,
310581ad6265SDimitry Andric ST->getAlign(), ST->getMemOperand()->getFlags());
31060b57cec5SDimitry Andric return Result;
31070b57cec5SDimitry Andric }
31080b57cec5SDimitry Andric
LowerCopyToReg_128(SDValue Op,SelectionDAG & DAG) const31090fca6ea1SDimitry Andric SDValue NVPTXTargetLowering::LowerCopyToReg_128(SDValue Op,
31100fca6ea1SDimitry Andric SelectionDAG &DAG) const {
31110fca6ea1SDimitry Andric // Change the CopyToReg to take in two 64-bit operands instead of a 128-bit
31120fca6ea1SDimitry Andric // operand so that it can pass the legalization.
31130fca6ea1SDimitry Andric
31140fca6ea1SDimitry Andric assert(Op.getOperand(1).getValueType() == MVT::i128 &&
31150fca6ea1SDimitry Andric "Custom lowering for 128-bit CopyToReg only");
31160fca6ea1SDimitry Andric
31170fca6ea1SDimitry Andric SDNode *Node = Op.getNode();
31180fca6ea1SDimitry Andric SDLoc DL(Node);
31190fca6ea1SDimitry Andric
31200fca6ea1SDimitry Andric SDValue Cast = DAG.getBitcast(MVT::v2i64, Op->getOperand(2));
31210fca6ea1SDimitry Andric SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Cast,
31220fca6ea1SDimitry Andric DAG.getIntPtrConstant(0, DL));
31230fca6ea1SDimitry Andric SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Cast,
31240fca6ea1SDimitry Andric DAG.getIntPtrConstant(1, DL));
31250fca6ea1SDimitry Andric
31260fca6ea1SDimitry Andric SmallVector<SDValue, 5> NewOps(Op->getNumOperands() + 1);
31270fca6ea1SDimitry Andric SmallVector<EVT, 3> ResultsType(Node->values());
31280fca6ea1SDimitry Andric
31290fca6ea1SDimitry Andric NewOps[0] = Op->getOperand(0); // Chain
31300fca6ea1SDimitry Andric NewOps[1] = Op->getOperand(1); // Dst Reg
31310fca6ea1SDimitry Andric NewOps[2] = Lo; // Lower 64-bit
31320fca6ea1SDimitry Andric NewOps[3] = Hi; // Higher 64-bit
31330fca6ea1SDimitry Andric if (Op.getNumOperands() == 4)
31340fca6ea1SDimitry Andric NewOps[4] = Op->getOperand(3); // Glue if exists
31350fca6ea1SDimitry Andric
31360fca6ea1SDimitry Andric return DAG.getNode(ISD::CopyToReg, DL, ResultsType, NewOps);
31370fca6ea1SDimitry Andric }
31380fca6ea1SDimitry Andric
getNumRegisters(LLVMContext & Context,EVT VT,std::optional<MVT> RegisterVT=std::nullopt) const31390fca6ea1SDimitry Andric unsigned NVPTXTargetLowering::getNumRegisters(
31400fca6ea1SDimitry Andric LLVMContext &Context, EVT VT,
31410fca6ea1SDimitry Andric std::optional<MVT> RegisterVT = std::nullopt) const {
31420fca6ea1SDimitry Andric if (VT == MVT::i128 && RegisterVT == MVT::i128)
31430fca6ea1SDimitry Andric return 1;
31440fca6ea1SDimitry Andric return TargetLoweringBase::getNumRegisters(Context, VT, RegisterVT);
31450fca6ea1SDimitry Andric }
31460fca6ea1SDimitry Andric
splitValueIntoRegisterParts(SelectionDAG & DAG,const SDLoc & DL,SDValue Val,SDValue * Parts,unsigned NumParts,MVT PartVT,std::optional<CallingConv::ID> CC) const31470fca6ea1SDimitry Andric bool NVPTXTargetLowering::splitValueIntoRegisterParts(
31480fca6ea1SDimitry Andric SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
31490fca6ea1SDimitry Andric unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
31500fca6ea1SDimitry Andric if (Val.getValueType() == MVT::i128 && NumParts == 1) {
31510fca6ea1SDimitry Andric Parts[0] = Val;
31520fca6ea1SDimitry Andric return true;
31530fca6ea1SDimitry Andric }
31540fca6ea1SDimitry Andric return false;
31550fca6ea1SDimitry Andric }
31560fca6ea1SDimitry Andric
3157bdd1243dSDimitry Andric // This creates target external symbol for a function parameter.
3158bdd1243dSDimitry Andric // Name of the symbol is composed from its index and the function name.
3159bdd1243dSDimitry Andric // Negative index corresponds to special parameter (unsized array) used for
3160bdd1243dSDimitry Andric // passing variable arguments.
getParamSymbol(SelectionDAG & DAG,int idx,EVT v) const3161bdd1243dSDimitry Andric SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx,
3162bdd1243dSDimitry Andric EVT v) const {
316306c3fb27SDimitry Andric StringRef SavedStr = nvTM->getStrPool().save(
316406c3fb27SDimitry Andric getParamName(&DAG.getMachineFunction().getFunction(), idx));
3165bdd1243dSDimitry Andric return DAG.getTargetExternalSymbol(SavedStr.data(), v);
31660b57cec5SDimitry Andric }
31670b57cec5SDimitry Andric
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const31680b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFormalArguments(
31690b57cec5SDimitry Andric SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
31700b57cec5SDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
31710b57cec5SDimitry Andric SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
31720b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction();
31730b57cec5SDimitry Andric const DataLayout &DL = DAG.getDataLayout();
31740b57cec5SDimitry Andric auto PtrVT = getPointerTy(DAG.getDataLayout());
31750b57cec5SDimitry Andric
31760b57cec5SDimitry Andric const Function *F = &MF.getFunction();
31770b57cec5SDimitry Andric const AttributeList &PAL = F->getAttributes();
31780b57cec5SDimitry Andric const TargetLowering *TLI = STI.getTargetLowering();
31790b57cec5SDimitry Andric
31800b57cec5SDimitry Andric SDValue Root = DAG.getRoot();
31810b57cec5SDimitry Andric std::vector<SDValue> OutChains;
31820b57cec5SDimitry Andric
31830b57cec5SDimitry Andric bool isABI = (STI.getSmVersion() >= 20);
31840b57cec5SDimitry Andric assert(isABI && "Non-ABI compilation is not supported");
31850b57cec5SDimitry Andric if (!isABI)
31860b57cec5SDimitry Andric return Chain;
31870b57cec5SDimitry Andric
31880b57cec5SDimitry Andric std::vector<Type *> argTypes;
31890b57cec5SDimitry Andric std::vector<const Argument *> theArgs;
31900b57cec5SDimitry Andric for (const Argument &I : F->args()) {
31910b57cec5SDimitry Andric theArgs.push_back(&I);
31920b57cec5SDimitry Andric argTypes.push_back(I.getType());
31930b57cec5SDimitry Andric }
31940b57cec5SDimitry Andric // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
31950b57cec5SDimitry Andric // Ins.size() will be larger
31960b57cec5SDimitry Andric // * if there is an aggregate argument with multiple fields (each field
31970b57cec5SDimitry Andric // showing up separately in Ins)
31980b57cec5SDimitry Andric // * if there is a vector argument with more than typical vector-length
31990b57cec5SDimitry Andric // elements (generally if more than 4) where each vector element is
32000b57cec5SDimitry Andric // individually present in Ins.
32010b57cec5SDimitry Andric // So a different index should be used for indexing into Ins.
32020b57cec5SDimitry Andric // See similar issue in LowerCall.
32030b57cec5SDimitry Andric unsigned InsIdx = 0;
32040b57cec5SDimitry Andric
32050fca6ea1SDimitry Andric for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++InsIdx) {
32060b57cec5SDimitry Andric Type *Ty = argTypes[i];
32070b57cec5SDimitry Andric
32080b57cec5SDimitry Andric if (theArgs[i]->use_empty()) {
32090b57cec5SDimitry Andric // argument is dead
321006c3fb27SDimitry Andric if (IsTypePassedAsArray(Ty) && !Ty->isVectorTy()) {
32110b57cec5SDimitry Andric SmallVector<EVT, 16> vtparts;
32120b57cec5SDimitry Andric
32130b57cec5SDimitry Andric ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
321406c3fb27SDimitry Andric if (vtparts.empty())
321506c3fb27SDimitry Andric report_fatal_error("Empty parameter types are not supported");
321606c3fb27SDimitry Andric
32170b57cec5SDimitry Andric for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
32180b57cec5SDimitry Andric ++parti) {
32190b57cec5SDimitry Andric InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
32200b57cec5SDimitry Andric ++InsIdx;
32210b57cec5SDimitry Andric }
32220b57cec5SDimitry Andric if (vtparts.size() > 0)
32230b57cec5SDimitry Andric --InsIdx;
32240b57cec5SDimitry Andric continue;
32250b57cec5SDimitry Andric }
32260b57cec5SDimitry Andric if (Ty->isVectorTy()) {
32270b57cec5SDimitry Andric EVT ObjectVT = getValueType(DL, Ty);
32280b57cec5SDimitry Andric unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
32290b57cec5SDimitry Andric for (unsigned parti = 0; parti < NumRegs; ++parti) {
32300b57cec5SDimitry Andric InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
32310b57cec5SDimitry Andric ++InsIdx;
32320b57cec5SDimitry Andric }
32330b57cec5SDimitry Andric if (NumRegs > 0)
32340b57cec5SDimitry Andric --InsIdx;
32350b57cec5SDimitry Andric continue;
32360b57cec5SDimitry Andric }
32370b57cec5SDimitry Andric InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
32380b57cec5SDimitry Andric continue;
32390b57cec5SDimitry Andric }
32400b57cec5SDimitry Andric
32410fca6ea1SDimitry Andric // In the following cases, assign a node order of "i+1"
32420b57cec5SDimitry Andric // to newly created nodes. The SDNodes for params have to
32430b57cec5SDimitry Andric // appear in the same order as their order of appearance
32440fca6ea1SDimitry Andric // in the original function. "i+1" holds that order.
3245349cc55cSDimitry Andric if (!PAL.hasParamAttr(i, Attribute::ByVal)) {
32460b57cec5SDimitry Andric bool aggregateIsPacked = false;
32470b57cec5SDimitry Andric if (StructType *STy = dyn_cast<StructType>(Ty))
32480b57cec5SDimitry Andric aggregateIsPacked = STy->isPacked();
32490b57cec5SDimitry Andric
32500b57cec5SDimitry Andric SmallVector<EVT, 16> VTs;
32510b57cec5SDimitry Andric SmallVector<uint64_t, 16> Offsets;
32520b57cec5SDimitry Andric ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
325306c3fb27SDimitry Andric if (VTs.empty())
325406c3fb27SDimitry Andric report_fatal_error("Empty parameter types are not supported");
325506c3fb27SDimitry Andric
32560fca6ea1SDimitry Andric Align ArgAlign = getFunctionArgumentAlignment(
32570fca6ea1SDimitry Andric F, Ty, i + AttributeList::FirstArgIndex, DL);
32580fca6ea1SDimitry Andric auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign);
32590b57cec5SDimitry Andric
32600fca6ea1SDimitry Andric SDValue Arg = getParamSymbol(DAG, i, PtrVT);
32610b57cec5SDimitry Andric int VecIdx = -1; // Index of the first element of the current vector.
32620b57cec5SDimitry Andric for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) {
32630b57cec5SDimitry Andric if (VectorInfo[parti] & PVF_FIRST) {
32640b57cec5SDimitry Andric assert(VecIdx == -1 && "Orphaned vector.");
32650b57cec5SDimitry Andric VecIdx = parti;
32660b57cec5SDimitry Andric }
32670b57cec5SDimitry Andric
32680b57cec5SDimitry Andric // That's the last element of this store op.
32690b57cec5SDimitry Andric if (VectorInfo[parti] & PVF_LAST) {
32700b57cec5SDimitry Andric unsigned NumElts = parti - VecIdx + 1;
32710b57cec5SDimitry Andric EVT EltVT = VTs[parti];
32720b57cec5SDimitry Andric // i1 is loaded/stored as i8.
32730b57cec5SDimitry Andric EVT LoadVT = EltVT;
32740b57cec5SDimitry Andric if (EltVT == MVT::i1)
32750b57cec5SDimitry Andric LoadVT = MVT::i8;
32765f757f3fSDimitry Andric else if (Isv2x16VT(EltVT) || EltVT == MVT::v4i8)
32770b57cec5SDimitry Andric // getLoad needs a vector type, but it can't handle
327806c3fb27SDimitry Andric // vectors which contain v2f16 or v2bf16 elements. So we must load
32790b57cec5SDimitry Andric // using i32 here and then bitcast back.
32800b57cec5SDimitry Andric LoadVT = MVT::i32;
32810b57cec5SDimitry Andric
32820b57cec5SDimitry Andric EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts);
32830b57cec5SDimitry Andric SDValue VecAddr =
32840b57cec5SDimitry Andric DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
32850b57cec5SDimitry Andric DAG.getConstant(Offsets[VecIdx], dl, PtrVT));
32860b57cec5SDimitry Andric Value *srcValue = Constant::getNullValue(PointerType::get(
32870b57cec5SDimitry Andric EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
32880fca6ea1SDimitry Andric
32890fca6ea1SDimitry Andric const MaybeAlign PartAlign = [&]() -> MaybeAlign {
32900fca6ea1SDimitry Andric if (aggregateIsPacked)
32910fca6ea1SDimitry Andric return Align(1);
32920fca6ea1SDimitry Andric if (NumElts != 1)
32930fca6ea1SDimitry Andric return std::nullopt;
32940fca6ea1SDimitry Andric Align PartAlign =
32950fca6ea1SDimitry Andric DL.getABITypeAlign(EltVT.getTypeForEVT(F->getContext()));
32960fca6ea1SDimitry Andric return commonAlignment(PartAlign, Offsets[parti]);
32970fca6ea1SDimitry Andric }();
3298bdd1243dSDimitry Andric SDValue P = DAG.getLoad(VecVT, dl, Root, VecAddr,
32990fca6ea1SDimitry Andric MachinePointerInfo(srcValue), PartAlign,
33000b57cec5SDimitry Andric MachineMemOperand::MODereferenceable |
33010b57cec5SDimitry Andric MachineMemOperand::MOInvariant);
33020b57cec5SDimitry Andric if (P.getNode())
33030fca6ea1SDimitry Andric P.getNode()->setIROrder(i + 1);
33040b57cec5SDimitry Andric for (unsigned j = 0; j < NumElts; ++j) {
33050b57cec5SDimitry Andric SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P,
33060b57cec5SDimitry Andric DAG.getIntPtrConstant(j, dl));
33070b57cec5SDimitry Andric // We've loaded i1 as an i8 and now must truncate it back to i1
33080b57cec5SDimitry Andric if (EltVT == MVT::i1)
33090b57cec5SDimitry Andric Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt);
33100b57cec5SDimitry Andric // v2f16 was loaded as an i32. Now we must bitcast it back.
33115f757f3fSDimitry Andric else if (EltVT != LoadVT)
331206c3fb27SDimitry Andric Elt = DAG.getNode(ISD::BITCAST, dl, EltVT, Elt);
3313fcaf7f86SDimitry Andric
3314fcaf7f86SDimitry Andric // If a promoted integer type is used, truncate down to the original
3315fcaf7f86SDimitry Andric MVT PromotedVT;
3316fcaf7f86SDimitry Andric if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) {
3317fcaf7f86SDimitry Andric Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
3318fcaf7f86SDimitry Andric }
3319fcaf7f86SDimitry Andric
33200b57cec5SDimitry Andric // Extend the element if necessary (e.g. an i8 is loaded
33210b57cec5SDimitry Andric // into an i16 register)
33220b57cec5SDimitry Andric if (Ins[InsIdx].VT.isInteger() &&
3323e8d8bef9SDimitry Andric Ins[InsIdx].VT.getFixedSizeInBits() >
3324e8d8bef9SDimitry Andric LoadVT.getFixedSizeInBits()) {
33250b57cec5SDimitry Andric unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
33260b57cec5SDimitry Andric : ISD::ZERO_EXTEND;
33270b57cec5SDimitry Andric Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt);
33280b57cec5SDimitry Andric }
33290b57cec5SDimitry Andric InVals.push_back(Elt);
33300b57cec5SDimitry Andric }
33310b57cec5SDimitry Andric
33320b57cec5SDimitry Andric // Reset vector tracking state.
33330b57cec5SDimitry Andric VecIdx = -1;
33340b57cec5SDimitry Andric }
33350b57cec5SDimitry Andric ++InsIdx;
33360b57cec5SDimitry Andric }
33370b57cec5SDimitry Andric if (VTs.size() > 0)
33380b57cec5SDimitry Andric --InsIdx;
33390b57cec5SDimitry Andric continue;
33400b57cec5SDimitry Andric }
33410b57cec5SDimitry Andric
33420b57cec5SDimitry Andric // Param has ByVal attribute
33430b57cec5SDimitry Andric // Return MoveParam(param symbol).
33440b57cec5SDimitry Andric // Ideally, the param symbol can be returned directly,
33450b57cec5SDimitry Andric // but when SDNode builder decides to use it in a CopyToReg(),
33460b57cec5SDimitry Andric // machine instruction fails because TargetExternalSymbol
33470b57cec5SDimitry Andric // (not lowered) is target dependent, and CopyToReg assumes
33480b57cec5SDimitry Andric // the source is lowered.
33490b57cec5SDimitry Andric EVT ObjectVT = getValueType(DL, Ty);
33500b57cec5SDimitry Andric assert(ObjectVT == Ins[InsIdx].VT &&
33510b57cec5SDimitry Andric "Ins type did not match function type");
33520fca6ea1SDimitry Andric SDValue Arg = getParamSymbol(DAG, i, PtrVT);
33530b57cec5SDimitry Andric SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
33540b57cec5SDimitry Andric if (p.getNode())
33550fca6ea1SDimitry Andric p.getNode()->setIROrder(i + 1);
33560b57cec5SDimitry Andric InVals.push_back(p);
33570b57cec5SDimitry Andric }
33580b57cec5SDimitry Andric
33590b57cec5SDimitry Andric if (!OutChains.empty())
33600b57cec5SDimitry Andric DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
33610b57cec5SDimitry Andric
33620b57cec5SDimitry Andric return Chain;
33630b57cec5SDimitry Andric }
33640b57cec5SDimitry Andric
33650fca6ea1SDimitry Andric // Use byte-store when the param adress of the return value is unaligned.
33660fca6ea1SDimitry Andric // This may happen when the return value is a field of a packed structure.
LowerUnalignedStoreRet(SelectionDAG & DAG,SDValue Chain,uint64_t Offset,EVT ElementType,SDValue RetVal,const SDLoc & dl)33670fca6ea1SDimitry Andric static SDValue LowerUnalignedStoreRet(SelectionDAG &DAG, SDValue Chain,
33680fca6ea1SDimitry Andric uint64_t Offset, EVT ElementType,
33690fca6ea1SDimitry Andric SDValue RetVal, const SDLoc &dl) {
33700fca6ea1SDimitry Andric // Bit logic only works on integer types
33710fca6ea1SDimitry Andric if (adjustElementType(ElementType))
33720fca6ea1SDimitry Andric RetVal = DAG.getNode(ISD::BITCAST, dl, ElementType, RetVal);
33730fca6ea1SDimitry Andric
33740fca6ea1SDimitry Andric // Store each byte
33750fca6ea1SDimitry Andric for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
33760fca6ea1SDimitry Andric // Shift the byte to the last byte position
33770fca6ea1SDimitry Andric SDValue ShiftVal = DAG.getNode(ISD::SRL, dl, ElementType, RetVal,
33780fca6ea1SDimitry Andric DAG.getConstant(i * 8, dl, MVT::i32));
33790fca6ea1SDimitry Andric SDValue StoreOperands[] = {Chain, DAG.getConstant(Offset + i, dl, MVT::i32),
33800fca6ea1SDimitry Andric ShiftVal};
33810fca6ea1SDimitry Andric // Trunc store only the last byte by using
33820fca6ea1SDimitry Andric // st.param.b8
33830fca6ea1SDimitry Andric // The register type can be larger than b8.
33840fca6ea1SDimitry Andric Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
33850fca6ea1SDimitry Andric DAG.getVTList(MVT::Other), StoreOperands,
33860fca6ea1SDimitry Andric MVT::i8, MachinePointerInfo(), std::nullopt,
33870fca6ea1SDimitry Andric MachineMemOperand::MOStore);
33880fca6ea1SDimitry Andric }
33890fca6ea1SDimitry Andric return Chain;
33900fca6ea1SDimitry Andric }
33910fca6ea1SDimitry Andric
33920b57cec5SDimitry Andric SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & dl,SelectionDAG & DAG) const33930b57cec5SDimitry Andric NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
33940b57cec5SDimitry Andric bool isVarArg,
33950b57cec5SDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs,
33960b57cec5SDimitry Andric const SmallVectorImpl<SDValue> &OutVals,
33970b57cec5SDimitry Andric const SDLoc &dl, SelectionDAG &DAG) const {
339881ad6265SDimitry Andric const MachineFunction &MF = DAG.getMachineFunction();
339981ad6265SDimitry Andric const Function &F = MF.getFunction();
34000b57cec5SDimitry Andric Type *RetTy = MF.getFunction().getReturnType();
34010b57cec5SDimitry Andric
34020b57cec5SDimitry Andric bool isABI = (STI.getSmVersion() >= 20);
34030b57cec5SDimitry Andric assert(isABI && "Non-ABI compilation is not supported");
34040b57cec5SDimitry Andric if (!isABI)
34050b57cec5SDimitry Andric return Chain;
34060b57cec5SDimitry Andric
3407fe6060f1SDimitry Andric const DataLayout &DL = DAG.getDataLayout();
3408fcaf7f86SDimitry Andric SmallVector<SDValue, 16> PromotedOutVals;
34090b57cec5SDimitry Andric SmallVector<EVT, 16> VTs;
34100b57cec5SDimitry Andric SmallVector<uint64_t, 16> Offsets;
34110b57cec5SDimitry Andric ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
34120b57cec5SDimitry Andric assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
34130b57cec5SDimitry Andric
3414fcaf7f86SDimitry Andric for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
3415fcaf7f86SDimitry Andric SDValue PromotedOutVal = OutVals[i];
3416fcaf7f86SDimitry Andric MVT PromotedVT;
3417fcaf7f86SDimitry Andric if (PromoteScalarIntegerPTX(VTs[i], &PromotedVT)) {
3418fcaf7f86SDimitry Andric VTs[i] = EVT(PromotedVT);
3419fcaf7f86SDimitry Andric }
3420fcaf7f86SDimitry Andric if (PromoteScalarIntegerPTX(PromotedOutVal.getValueType(), &PromotedVT)) {
3421fcaf7f86SDimitry Andric llvm::ISD::NodeType Ext =
3422fcaf7f86SDimitry Andric Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3423fcaf7f86SDimitry Andric PromotedOutVal = DAG.getNode(Ext, dl, PromotedVT, PromotedOutVal);
3424fcaf7f86SDimitry Andric }
3425fcaf7f86SDimitry Andric PromotedOutVals.push_back(PromotedOutVal);
3426fcaf7f86SDimitry Andric }
3427fcaf7f86SDimitry Andric
34280b57cec5SDimitry Andric auto VectorInfo = VectorizePTXValueVTs(
342981ad6265SDimitry Andric VTs, Offsets,
343081ad6265SDimitry Andric RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL)
343181ad6265SDimitry Andric : Align(1));
34320b57cec5SDimitry Andric
34330b57cec5SDimitry Andric // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
34340b57cec5SDimitry Andric // 32-bits are sign extended or zero extended, depending on whether
34350b57cec5SDimitry Andric // they are signed or unsigned types.
34360b57cec5SDimitry Andric bool ExtendIntegerRetVal =
34370b57cec5SDimitry Andric RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
34380b57cec5SDimitry Andric
34390b57cec5SDimitry Andric SmallVector<SDValue, 6> StoreOperands;
34400b57cec5SDimitry Andric for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
3441fcaf7f86SDimitry Andric SDValue OutVal = OutVals[i];
3442fcaf7f86SDimitry Andric SDValue RetVal = PromotedOutVals[i];
3443fcaf7f86SDimitry Andric
34440b57cec5SDimitry Andric if (ExtendIntegerRetVal) {
34450b57cec5SDimitry Andric RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND
34460b57cec5SDimitry Andric : ISD::ZERO_EXTEND,
34470b57cec5SDimitry Andric dl, MVT::i32, RetVal);
3448fcaf7f86SDimitry Andric } else if (OutVal.getValueSizeInBits() < 16) {
34490b57cec5SDimitry Andric // Use 16-bit registers for small load-stores as it's the
34500b57cec5SDimitry Andric // smallest general purpose register size supported by NVPTX.
34510b57cec5SDimitry Andric RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal);
34520b57cec5SDimitry Andric }
34530b57cec5SDimitry Andric
34540fca6ea1SDimitry Andric // If we have a PVF_SCALAR entry, it may not even be sufficiently aligned
34550fca6ea1SDimitry Andric // for a scalar store. In such cases, fall back to byte stores.
34560fca6ea1SDimitry Andric if (VectorInfo[i] == PVF_SCALAR && RetTy->isAggregateType()) {
34570fca6ea1SDimitry Andric EVT ElementType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
34580fca6ea1SDimitry Andric Align ElementTypeAlign =
34590fca6ea1SDimitry Andric DL.getABITypeAlign(ElementType.getTypeForEVT(RetTy->getContext()));
34600fca6ea1SDimitry Andric Align ElementAlign =
34610fca6ea1SDimitry Andric commonAlignment(DL.getABITypeAlign(RetTy), Offsets[i]);
34620fca6ea1SDimitry Andric if (ElementAlign < ElementTypeAlign) {
34630fca6ea1SDimitry Andric assert(StoreOperands.empty() && "Orphaned operand list.");
34640fca6ea1SDimitry Andric Chain = LowerUnalignedStoreRet(DAG, Chain, Offsets[i], ElementType,
34650fca6ea1SDimitry Andric RetVal, dl);
34660fca6ea1SDimitry Andric
34670fca6ea1SDimitry Andric // The call to LowerUnalignedStoreRet inserted the necessary SDAG nodes
34680fca6ea1SDimitry Andric // into the graph, so just move on to the next element.
34690fca6ea1SDimitry Andric continue;
34700fca6ea1SDimitry Andric }
34710fca6ea1SDimitry Andric }
34720fca6ea1SDimitry Andric
34730fca6ea1SDimitry Andric // New load/store. Record chain and offset operands.
34740fca6ea1SDimitry Andric if (VectorInfo[i] & PVF_FIRST) {
34750fca6ea1SDimitry Andric assert(StoreOperands.empty() && "Orphaned operand list.");
34760fca6ea1SDimitry Andric StoreOperands.push_back(Chain);
34770fca6ea1SDimitry Andric StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32));
34780fca6ea1SDimitry Andric }
34790fca6ea1SDimitry Andric
34800b57cec5SDimitry Andric // Record the value to return.
34810b57cec5SDimitry Andric StoreOperands.push_back(RetVal);
34820b57cec5SDimitry Andric
34830b57cec5SDimitry Andric // That's the last element of this store op.
34840b57cec5SDimitry Andric if (VectorInfo[i] & PVF_LAST) {
34850b57cec5SDimitry Andric NVPTXISD::NodeType Op;
34860b57cec5SDimitry Andric unsigned NumElts = StoreOperands.size() - 2;
34870b57cec5SDimitry Andric switch (NumElts) {
34880b57cec5SDimitry Andric case 1:
34890b57cec5SDimitry Andric Op = NVPTXISD::StoreRetval;
34900b57cec5SDimitry Andric break;
34910b57cec5SDimitry Andric case 2:
34920b57cec5SDimitry Andric Op = NVPTXISD::StoreRetvalV2;
34930b57cec5SDimitry Andric break;
34940b57cec5SDimitry Andric case 4:
34950b57cec5SDimitry Andric Op = NVPTXISD::StoreRetvalV4;
34960b57cec5SDimitry Andric break;
34970b57cec5SDimitry Andric default:
34980b57cec5SDimitry Andric llvm_unreachable("Invalid vector info.");
34990b57cec5SDimitry Andric }
35000b57cec5SDimitry Andric
35010b57cec5SDimitry Andric // Adjust type of load/store op if we've extended the scalar
35020b57cec5SDimitry Andric // return value.
35030b57cec5SDimitry Andric EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
35045ffd83dbSDimitry Andric Chain = DAG.getMemIntrinsicNode(
35055ffd83dbSDimitry Andric Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType,
35065ffd83dbSDimitry Andric MachinePointerInfo(), Align(1), MachineMemOperand::MOStore);
35070b57cec5SDimitry Andric // Cleanup vector state.
35080b57cec5SDimitry Andric StoreOperands.clear();
35090b57cec5SDimitry Andric }
35100b57cec5SDimitry Andric }
35110b57cec5SDimitry Andric
351206c3fb27SDimitry Andric return DAG.getNode(NVPTXISD::RET_GLUE, dl, MVT::Other, Chain);
35130b57cec5SDimitry Andric }
35140b57cec5SDimitry Andric
LowerAsmOperandForConstraint(SDValue Op,StringRef Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const35150b57cec5SDimitry Andric void NVPTXTargetLowering::LowerAsmOperandForConstraint(
35165f757f3fSDimitry Andric SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
35170b57cec5SDimitry Andric SelectionDAG &DAG) const {
35185f757f3fSDimitry Andric if (Constraint.size() > 1)
35190b57cec5SDimitry Andric return;
35200b57cec5SDimitry Andric TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
35210b57cec5SDimitry Andric }
35220b57cec5SDimitry Andric
getOpcForTextureInstr(unsigned Intrinsic)35230b57cec5SDimitry Andric static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
35240b57cec5SDimitry Andric switch (Intrinsic) {
35250b57cec5SDimitry Andric default:
35260b57cec5SDimitry Andric return 0;
35270b57cec5SDimitry Andric
35280b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4f32_s32:
35290b57cec5SDimitry Andric return NVPTXISD::Tex1DFloatS32;
35300b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4f32_f32:
35310b57cec5SDimitry Andric return NVPTXISD::Tex1DFloatFloat;
35320b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
35330b57cec5SDimitry Andric return NVPTXISD::Tex1DFloatFloatLevel;
35340b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
35350b57cec5SDimitry Andric return NVPTXISD::Tex1DFloatFloatGrad;
35360b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4s32_s32:
35370b57cec5SDimitry Andric return NVPTXISD::Tex1DS32S32;
35380b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4s32_f32:
35390b57cec5SDimitry Andric return NVPTXISD::Tex1DS32Float;
35400b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
35410b57cec5SDimitry Andric return NVPTXISD::Tex1DS32FloatLevel;
35420b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
35430b57cec5SDimitry Andric return NVPTXISD::Tex1DS32FloatGrad;
35440b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4u32_s32:
35450b57cec5SDimitry Andric return NVPTXISD::Tex1DU32S32;
35460b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4u32_f32:
35470b57cec5SDimitry Andric return NVPTXISD::Tex1DU32Float;
35480b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
35490b57cec5SDimitry Andric return NVPTXISD::Tex1DU32FloatLevel;
35500b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
35510b57cec5SDimitry Andric return NVPTXISD::Tex1DU32FloatGrad;
35520b57cec5SDimitry Andric
35530b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
35540b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayFloatS32;
35550b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
35560b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayFloatFloat;
35570b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
35580b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayFloatFloatLevel;
35590b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
35600b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayFloatFloatGrad;
35610b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
35620b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayS32S32;
35630b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
35640b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayS32Float;
35650b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
35660b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayS32FloatLevel;
35670b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
35680b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayS32FloatGrad;
35690b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
35700b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayU32S32;
35710b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
35720b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayU32Float;
35730b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
35740b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayU32FloatLevel;
35750b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
35760b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayU32FloatGrad;
35770b57cec5SDimitry Andric
35780b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4f32_s32:
35790b57cec5SDimitry Andric return NVPTXISD::Tex2DFloatS32;
35800b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4f32_f32:
35810b57cec5SDimitry Andric return NVPTXISD::Tex2DFloatFloat;
35820b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
35830b57cec5SDimitry Andric return NVPTXISD::Tex2DFloatFloatLevel;
35840b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
35850b57cec5SDimitry Andric return NVPTXISD::Tex2DFloatFloatGrad;
35860b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4s32_s32:
35870b57cec5SDimitry Andric return NVPTXISD::Tex2DS32S32;
35880b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4s32_f32:
35890b57cec5SDimitry Andric return NVPTXISD::Tex2DS32Float;
35900b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
35910b57cec5SDimitry Andric return NVPTXISD::Tex2DS32FloatLevel;
35920b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
35930b57cec5SDimitry Andric return NVPTXISD::Tex2DS32FloatGrad;
35940b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4u32_s32:
35950b57cec5SDimitry Andric return NVPTXISD::Tex2DU32S32;
35960b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4u32_f32:
35970b57cec5SDimitry Andric return NVPTXISD::Tex2DU32Float;
35980b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
35990b57cec5SDimitry Andric return NVPTXISD::Tex2DU32FloatLevel;
36000b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
36010b57cec5SDimitry Andric return NVPTXISD::Tex2DU32FloatGrad;
36020b57cec5SDimitry Andric
36030b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
36040b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayFloatS32;
36050b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
36060b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayFloatFloat;
36070b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
36080b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayFloatFloatLevel;
36090b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
36100b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayFloatFloatGrad;
36110b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
36120b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayS32S32;
36130b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
36140b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayS32Float;
36150b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
36160b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayS32FloatLevel;
36170b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
36180b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayS32FloatGrad;
36190b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
36200b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayU32S32;
36210b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
36220b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayU32Float;
36230b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
36240b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayU32FloatLevel;
36250b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
36260b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayU32FloatGrad;
36270b57cec5SDimitry Andric
36280b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4f32_s32:
36290b57cec5SDimitry Andric return NVPTXISD::Tex3DFloatS32;
36300b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4f32_f32:
36310b57cec5SDimitry Andric return NVPTXISD::Tex3DFloatFloat;
36320b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
36330b57cec5SDimitry Andric return NVPTXISD::Tex3DFloatFloatLevel;
36340b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
36350b57cec5SDimitry Andric return NVPTXISD::Tex3DFloatFloatGrad;
36360b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4s32_s32:
36370b57cec5SDimitry Andric return NVPTXISD::Tex3DS32S32;
36380b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4s32_f32:
36390b57cec5SDimitry Andric return NVPTXISD::Tex3DS32Float;
36400b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
36410b57cec5SDimitry Andric return NVPTXISD::Tex3DS32FloatLevel;
36420b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
36430b57cec5SDimitry Andric return NVPTXISD::Tex3DS32FloatGrad;
36440b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4u32_s32:
36450b57cec5SDimitry Andric return NVPTXISD::Tex3DU32S32;
36460b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4u32_f32:
36470b57cec5SDimitry Andric return NVPTXISD::Tex3DU32Float;
36480b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
36490b57cec5SDimitry Andric return NVPTXISD::Tex3DU32FloatLevel;
36500b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
36510b57cec5SDimitry Andric return NVPTXISD::Tex3DU32FloatGrad;
36520b57cec5SDimitry Andric
36530b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_v4f32_f32:
36540b57cec5SDimitry Andric return NVPTXISD::TexCubeFloatFloat;
36550b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
36560b57cec5SDimitry Andric return NVPTXISD::TexCubeFloatFloatLevel;
36570b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_v4s32_f32:
36580b57cec5SDimitry Andric return NVPTXISD::TexCubeS32Float;
36590b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
36600b57cec5SDimitry Andric return NVPTXISD::TexCubeS32FloatLevel;
36610b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_v4u32_f32:
36620b57cec5SDimitry Andric return NVPTXISD::TexCubeU32Float;
36630b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
36640b57cec5SDimitry Andric return NVPTXISD::TexCubeU32FloatLevel;
36650b57cec5SDimitry Andric
36660b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
36670b57cec5SDimitry Andric return NVPTXISD::TexCubeArrayFloatFloat;
36680b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
36690b57cec5SDimitry Andric return NVPTXISD::TexCubeArrayFloatFloatLevel;
36700b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
36710b57cec5SDimitry Andric return NVPTXISD::TexCubeArrayS32Float;
36720b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
36730b57cec5SDimitry Andric return NVPTXISD::TexCubeArrayS32FloatLevel;
36740b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
36750b57cec5SDimitry Andric return NVPTXISD::TexCubeArrayU32Float;
36760b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
36770b57cec5SDimitry Andric return NVPTXISD::TexCubeArrayU32FloatLevel;
36780b57cec5SDimitry Andric
36790b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
36800b57cec5SDimitry Andric return NVPTXISD::Tld4R2DFloatFloat;
36810b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
36820b57cec5SDimitry Andric return NVPTXISD::Tld4G2DFloatFloat;
36830b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
36840b57cec5SDimitry Andric return NVPTXISD::Tld4B2DFloatFloat;
36850b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
36860b57cec5SDimitry Andric return NVPTXISD::Tld4A2DFloatFloat;
36870b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
36880b57cec5SDimitry Andric return NVPTXISD::Tld4R2DS64Float;
36890b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
36900b57cec5SDimitry Andric return NVPTXISD::Tld4G2DS64Float;
36910b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
36920b57cec5SDimitry Andric return NVPTXISD::Tld4B2DS64Float;
36930b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
36940b57cec5SDimitry Andric return NVPTXISD::Tld4A2DS64Float;
36950b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
36960b57cec5SDimitry Andric return NVPTXISD::Tld4R2DU64Float;
36970b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
36980b57cec5SDimitry Andric return NVPTXISD::Tld4G2DU64Float;
36990b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
37000b57cec5SDimitry Andric return NVPTXISD::Tld4B2DU64Float;
37010b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
37020b57cec5SDimitry Andric return NVPTXISD::Tld4A2DU64Float;
37030b57cec5SDimitry Andric
37040b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
37050b57cec5SDimitry Andric return NVPTXISD::TexUnified1DFloatS32;
37060b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
37070b57cec5SDimitry Andric return NVPTXISD::TexUnified1DFloatFloat;
37080b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
37090b57cec5SDimitry Andric return NVPTXISD::TexUnified1DFloatFloatLevel;
37100b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
37110b57cec5SDimitry Andric return NVPTXISD::TexUnified1DFloatFloatGrad;
37120b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
37130b57cec5SDimitry Andric return NVPTXISD::TexUnified1DS32S32;
37140b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
37150b57cec5SDimitry Andric return NVPTXISD::TexUnified1DS32Float;
37160b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
37170b57cec5SDimitry Andric return NVPTXISD::TexUnified1DS32FloatLevel;
37180b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
37190b57cec5SDimitry Andric return NVPTXISD::TexUnified1DS32FloatGrad;
37200b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
37210b57cec5SDimitry Andric return NVPTXISD::TexUnified1DU32S32;
37220b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
37230b57cec5SDimitry Andric return NVPTXISD::TexUnified1DU32Float;
37240b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
37250b57cec5SDimitry Andric return NVPTXISD::TexUnified1DU32FloatLevel;
37260b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
37270b57cec5SDimitry Andric return NVPTXISD::TexUnified1DU32FloatGrad;
37280b57cec5SDimitry Andric
37290b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
37300b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayFloatS32;
37310b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
37320b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayFloatFloat;
37330b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
37340b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayFloatFloatLevel;
37350b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
37360b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayFloatFloatGrad;
37370b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
37380b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayS32S32;
37390b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
37400b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayS32Float;
37410b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
37420b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayS32FloatLevel;
37430b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
37440b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayS32FloatGrad;
37450b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
37460b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayU32S32;
37470b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
37480b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayU32Float;
37490b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
37500b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayU32FloatLevel;
37510b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
37520b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayU32FloatGrad;
37530b57cec5SDimitry Andric
37540b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
37550b57cec5SDimitry Andric return NVPTXISD::TexUnified2DFloatS32;
37560b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
37570b57cec5SDimitry Andric return NVPTXISD::TexUnified2DFloatFloat;
37580b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
37590b57cec5SDimitry Andric return NVPTXISD::TexUnified2DFloatFloatLevel;
37600b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
37610b57cec5SDimitry Andric return NVPTXISD::TexUnified2DFloatFloatGrad;
37620b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
37630b57cec5SDimitry Andric return NVPTXISD::TexUnified2DS32S32;
37640b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
37650b57cec5SDimitry Andric return NVPTXISD::TexUnified2DS32Float;
37660b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
37670b57cec5SDimitry Andric return NVPTXISD::TexUnified2DS32FloatLevel;
37680b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
37690b57cec5SDimitry Andric return NVPTXISD::TexUnified2DS32FloatGrad;
37700b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
37710b57cec5SDimitry Andric return NVPTXISD::TexUnified2DU32S32;
37720b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
37730b57cec5SDimitry Andric return NVPTXISD::TexUnified2DU32Float;
37740b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
37750b57cec5SDimitry Andric return NVPTXISD::TexUnified2DU32FloatLevel;
37760b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
37770b57cec5SDimitry Andric return NVPTXISD::TexUnified2DU32FloatGrad;
37780b57cec5SDimitry Andric
37790b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
37800b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayFloatS32;
37810b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
37820b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayFloatFloat;
37830b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
37840b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayFloatFloatLevel;
37850b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
37860b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayFloatFloatGrad;
37870b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
37880b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayS32S32;
37890b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
37900b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayS32Float;
37910b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
37920b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayS32FloatLevel;
37930b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
37940b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayS32FloatGrad;
37950b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
37960b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayU32S32;
37970b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
37980b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayU32Float;
37990b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
38000b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayU32FloatLevel;
38010b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
38020b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayU32FloatGrad;
38030b57cec5SDimitry Andric
38040b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
38050b57cec5SDimitry Andric return NVPTXISD::TexUnified3DFloatS32;
38060b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
38070b57cec5SDimitry Andric return NVPTXISD::TexUnified3DFloatFloat;
38080b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
38090b57cec5SDimitry Andric return NVPTXISD::TexUnified3DFloatFloatLevel;
38100b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
38110b57cec5SDimitry Andric return NVPTXISD::TexUnified3DFloatFloatGrad;
38120b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
38130b57cec5SDimitry Andric return NVPTXISD::TexUnified3DS32S32;
38140b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
38150b57cec5SDimitry Andric return NVPTXISD::TexUnified3DS32Float;
38160b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
38170b57cec5SDimitry Andric return NVPTXISD::TexUnified3DS32FloatLevel;
38180b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
38190b57cec5SDimitry Andric return NVPTXISD::TexUnified3DS32FloatGrad;
38200b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
38210b57cec5SDimitry Andric return NVPTXISD::TexUnified3DU32S32;
38220b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
38230b57cec5SDimitry Andric return NVPTXISD::TexUnified3DU32Float;
38240b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
38250b57cec5SDimitry Andric return NVPTXISD::TexUnified3DU32FloatLevel;
38260b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
38270b57cec5SDimitry Andric return NVPTXISD::TexUnified3DU32FloatGrad;
38280b57cec5SDimitry Andric
38290b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
38300b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeFloatFloat;
38310b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
38320b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeFloatFloatLevel;
38330b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
38340b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeS32Float;
38350b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
38360b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeS32FloatLevel;
38370b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
38380b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeU32Float;
38390b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
38400b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeU32FloatLevel;
38410b57cec5SDimitry Andric
38420b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
38430b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeArrayFloatFloat;
38440b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
38450b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel;
38460b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
38470b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeArrayS32Float;
38480b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
38490b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel;
38500b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
38510b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeArrayU32Float;
38520b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
38530b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
38540b57cec5SDimitry Andric
38557a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32:
38567a6dacacSDimitry Andric return NVPTXISD::TexUnifiedCubeFloatFloatGrad;
38577a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32:
38587a6dacacSDimitry Andric return NVPTXISD::TexUnifiedCubeS32FloatGrad;
38597a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32:
38607a6dacacSDimitry Andric return NVPTXISD::TexUnifiedCubeU32FloatGrad;
38617a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32:
38627a6dacacSDimitry Andric return NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad;
38637a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32:
38647a6dacacSDimitry Andric return NVPTXISD::TexUnifiedCubeArrayS32FloatGrad;
38657a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32:
38667a6dacacSDimitry Andric return NVPTXISD::TexUnifiedCubeArrayU32FloatGrad;
38677a6dacacSDimitry Andric
38680b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
38690b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedR2DFloatFloat;
38700b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
38710b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedG2DFloatFloat;
38720b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
38730b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedB2DFloatFloat;
38740b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
38750b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedA2DFloatFloat;
38760b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
38770b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedR2DS64Float;
38780b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
38790b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedG2DS64Float;
38800b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
38810b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedB2DS64Float;
38820b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
38830b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedA2DS64Float;
38840b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
38850b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedR2DU64Float;
38860b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
38870b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedG2DU64Float;
38880b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
38890b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedB2DU64Float;
38900b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
38910b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedA2DU64Float;
38920b57cec5SDimitry Andric }
38930b57cec5SDimitry Andric }
38940b57cec5SDimitry Andric
getOpcForSurfaceInstr(unsigned Intrinsic)38950b57cec5SDimitry Andric static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
38960b57cec5SDimitry Andric switch (Intrinsic) {
38970b57cec5SDimitry Andric default:
38980b57cec5SDimitry Andric return 0;
38990b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i8_clamp:
39000b57cec5SDimitry Andric return NVPTXISD::Suld1DI8Clamp;
39010b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i16_clamp:
39020b57cec5SDimitry Andric return NVPTXISD::Suld1DI16Clamp;
39030b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i32_clamp:
39040b57cec5SDimitry Andric return NVPTXISD::Suld1DI32Clamp;
39050b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i64_clamp:
39060b57cec5SDimitry Andric return NVPTXISD::Suld1DI64Clamp;
39070b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i8_clamp:
39080b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I8Clamp;
39090b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i16_clamp:
39100b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I16Clamp;
39110b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i32_clamp:
39120b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I32Clamp;
39130b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i64_clamp:
39140b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I64Clamp;
39150b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i8_clamp:
39160b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I8Clamp;
39170b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i16_clamp:
39180b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I16Clamp;
39190b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i32_clamp:
39200b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I32Clamp;
39210b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i8_clamp:
39220b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI8Clamp;
39230b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i16_clamp:
39240b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI16Clamp;
39250b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i32_clamp:
39260b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI32Clamp;
39270b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i64_clamp:
39280b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI64Clamp;
39290b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
39300b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I8Clamp;
39310b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
39320b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I16Clamp;
39330b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
39340b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I32Clamp;
39350b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
39360b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I64Clamp;
39370b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
39380b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I8Clamp;
39390b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
39400b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I16Clamp;
39410b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
39420b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I32Clamp;
39430b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i8_clamp:
39440b57cec5SDimitry Andric return NVPTXISD::Suld2DI8Clamp;
39450b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i16_clamp:
39460b57cec5SDimitry Andric return NVPTXISD::Suld2DI16Clamp;
39470b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i32_clamp:
39480b57cec5SDimitry Andric return NVPTXISD::Suld2DI32Clamp;
39490b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i64_clamp:
39500b57cec5SDimitry Andric return NVPTXISD::Suld2DI64Clamp;
39510b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i8_clamp:
39520b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I8Clamp;
39530b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i16_clamp:
39540b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I16Clamp;
39550b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i32_clamp:
39560b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I32Clamp;
39570b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i64_clamp:
39580b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I64Clamp;
39590b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i8_clamp:
39600b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I8Clamp;
39610b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i16_clamp:
39620b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I16Clamp;
39630b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i32_clamp:
39640b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I32Clamp;
39650b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i8_clamp:
39660b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI8Clamp;
39670b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i16_clamp:
39680b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI16Clamp;
39690b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i32_clamp:
39700b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI32Clamp;
39710b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i64_clamp:
39720b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI64Clamp;
39730b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
39740b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I8Clamp;
39750b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
39760b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I16Clamp;
39770b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
39780b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I32Clamp;
39790b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
39800b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I64Clamp;
39810b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
39820b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I8Clamp;
39830b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
39840b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I16Clamp;
39850b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
39860b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I32Clamp;
39870b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i8_clamp:
39880b57cec5SDimitry Andric return NVPTXISD::Suld3DI8Clamp;
39890b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i16_clamp:
39900b57cec5SDimitry Andric return NVPTXISD::Suld3DI16Clamp;
39910b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i32_clamp:
39920b57cec5SDimitry Andric return NVPTXISD::Suld3DI32Clamp;
39930b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i64_clamp:
39940b57cec5SDimitry Andric return NVPTXISD::Suld3DI64Clamp;
39950b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i8_clamp:
39960b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I8Clamp;
39970b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i16_clamp:
39980b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I16Clamp;
39990b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i32_clamp:
40000b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I32Clamp;
40010b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i64_clamp:
40020b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I64Clamp;
40030b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i8_clamp:
40040b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I8Clamp;
40050b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i16_clamp:
40060b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I16Clamp;
40070b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i32_clamp:
40080b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I32Clamp;
40090b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i8_trap:
40100b57cec5SDimitry Andric return NVPTXISD::Suld1DI8Trap;
40110b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i16_trap:
40120b57cec5SDimitry Andric return NVPTXISD::Suld1DI16Trap;
40130b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i32_trap:
40140b57cec5SDimitry Andric return NVPTXISD::Suld1DI32Trap;
40150b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i64_trap:
40160b57cec5SDimitry Andric return NVPTXISD::Suld1DI64Trap;
40170b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i8_trap:
40180b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I8Trap;
40190b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i16_trap:
40200b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I16Trap;
40210b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i32_trap:
40220b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I32Trap;
40230b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i64_trap:
40240b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I64Trap;
40250b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i8_trap:
40260b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I8Trap;
40270b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i16_trap:
40280b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I16Trap;
40290b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i32_trap:
40300b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I32Trap;
40310b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i8_trap:
40320b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI8Trap;
40330b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i16_trap:
40340b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI16Trap;
40350b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i32_trap:
40360b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI32Trap;
40370b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i64_trap:
40380b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI64Trap;
40390b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
40400b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I8Trap;
40410b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
40420b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I16Trap;
40430b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
40440b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I32Trap;
40450b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
40460b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I64Trap;
40470b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
40480b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I8Trap;
40490b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
40500b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I16Trap;
40510b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
40520b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I32Trap;
40530b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i8_trap:
40540b57cec5SDimitry Andric return NVPTXISD::Suld2DI8Trap;
40550b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i16_trap:
40560b57cec5SDimitry Andric return NVPTXISD::Suld2DI16Trap;
40570b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i32_trap:
40580b57cec5SDimitry Andric return NVPTXISD::Suld2DI32Trap;
40590b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i64_trap:
40600b57cec5SDimitry Andric return NVPTXISD::Suld2DI64Trap;
40610b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i8_trap:
40620b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I8Trap;
40630b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i16_trap:
40640b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I16Trap;
40650b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i32_trap:
40660b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I32Trap;
40670b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i64_trap:
40680b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I64Trap;
40690b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i8_trap:
40700b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I8Trap;
40710b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i16_trap:
40720b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I16Trap;
40730b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i32_trap:
40740b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I32Trap;
40750b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i8_trap:
40760b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI8Trap;
40770b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i16_trap:
40780b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI16Trap;
40790b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i32_trap:
40800b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI32Trap;
40810b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i64_trap:
40820b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI64Trap;
40830b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
40840b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I8Trap;
40850b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
40860b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I16Trap;
40870b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
40880b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I32Trap;
40890b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
40900b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I64Trap;
40910b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
40920b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I8Trap;
40930b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
40940b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I16Trap;
40950b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
40960b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I32Trap;
40970b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i8_trap:
40980b57cec5SDimitry Andric return NVPTXISD::Suld3DI8Trap;
40990b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i16_trap:
41000b57cec5SDimitry Andric return NVPTXISD::Suld3DI16Trap;
41010b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i32_trap:
41020b57cec5SDimitry Andric return NVPTXISD::Suld3DI32Trap;
41030b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i64_trap:
41040b57cec5SDimitry Andric return NVPTXISD::Suld3DI64Trap;
41050b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i8_trap:
41060b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I8Trap;
41070b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i16_trap:
41080b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I16Trap;
41090b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i32_trap:
41100b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I32Trap;
41110b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i64_trap:
41120b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I64Trap;
41130b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i8_trap:
41140b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I8Trap;
41150b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i16_trap:
41160b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I16Trap;
41170b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i32_trap:
41180b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I32Trap;
41190b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i8_zero:
41200b57cec5SDimitry Andric return NVPTXISD::Suld1DI8Zero;
41210b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i16_zero:
41220b57cec5SDimitry Andric return NVPTXISD::Suld1DI16Zero;
41230b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i32_zero:
41240b57cec5SDimitry Andric return NVPTXISD::Suld1DI32Zero;
41250b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i64_zero:
41260b57cec5SDimitry Andric return NVPTXISD::Suld1DI64Zero;
41270b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i8_zero:
41280b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I8Zero;
41290b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i16_zero:
41300b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I16Zero;
41310b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i32_zero:
41320b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I32Zero;
41330b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i64_zero:
41340b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I64Zero;
41350b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i8_zero:
41360b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I8Zero;
41370b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i16_zero:
41380b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I16Zero;
41390b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i32_zero:
41400b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I32Zero;
41410b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i8_zero:
41420b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI8Zero;
41430b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i16_zero:
41440b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI16Zero;
41450b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i32_zero:
41460b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI32Zero;
41470b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i64_zero:
41480b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI64Zero;
41490b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
41500b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I8Zero;
41510b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
41520b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I16Zero;
41530b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
41540b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I32Zero;
41550b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
41560b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I64Zero;
41570b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
41580b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I8Zero;
41590b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
41600b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I16Zero;
41610b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
41620b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I32Zero;
41630b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i8_zero:
41640b57cec5SDimitry Andric return NVPTXISD::Suld2DI8Zero;
41650b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i16_zero:
41660b57cec5SDimitry Andric return NVPTXISD::Suld2DI16Zero;
41670b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i32_zero:
41680b57cec5SDimitry Andric return NVPTXISD::Suld2DI32Zero;
41690b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i64_zero:
41700b57cec5SDimitry Andric return NVPTXISD::Suld2DI64Zero;
41710b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i8_zero:
41720b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I8Zero;
41730b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i16_zero:
41740b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I16Zero;
41750b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i32_zero:
41760b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I32Zero;
41770b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i64_zero:
41780b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I64Zero;
41790b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i8_zero:
41800b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I8Zero;
41810b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i16_zero:
41820b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I16Zero;
41830b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i32_zero:
41840b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I32Zero;
41850b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i8_zero:
41860b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI8Zero;
41870b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i16_zero:
41880b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI16Zero;
41890b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i32_zero:
41900b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI32Zero;
41910b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i64_zero:
41920b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI64Zero;
41930b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
41940b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I8Zero;
41950b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
41960b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I16Zero;
41970b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
41980b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I32Zero;
41990b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
42000b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I64Zero;
42010b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
42020b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I8Zero;
42030b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
42040b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I16Zero;
42050b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
42060b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I32Zero;
42070b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i8_zero:
42080b57cec5SDimitry Andric return NVPTXISD::Suld3DI8Zero;
42090b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i16_zero:
42100b57cec5SDimitry Andric return NVPTXISD::Suld3DI16Zero;
42110b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i32_zero:
42120b57cec5SDimitry Andric return NVPTXISD::Suld3DI32Zero;
42130b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i64_zero:
42140b57cec5SDimitry Andric return NVPTXISD::Suld3DI64Zero;
42150b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i8_zero:
42160b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I8Zero;
42170b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i16_zero:
42180b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I16Zero;
42190b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i32_zero:
42200b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I32Zero;
42210b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i64_zero:
42220b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I64Zero;
42230b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i8_zero:
42240b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I8Zero;
42250b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i16_zero:
42260b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I16Zero;
42270b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i32_zero:
42280b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I32Zero;
42290b57cec5SDimitry Andric }
42300b57cec5SDimitry Andric }
42310b57cec5SDimitry Andric
42320b57cec5SDimitry Andric // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
42330b57cec5SDimitry Andric // TgtMemIntrinsic
42340b57cec5SDimitry Andric // because we need the information that is only available in the "Value" type
42350b57cec5SDimitry Andric // of destination
42360b57cec5SDimitry Andric // pointer. In particular, the address space information.
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,MachineFunction & MF,unsigned Intrinsic) const42370b57cec5SDimitry Andric bool NVPTXTargetLowering::getTgtMemIntrinsic(
42380b57cec5SDimitry Andric IntrinsicInfo &Info, const CallInst &I,
42390b57cec5SDimitry Andric MachineFunction &MF, unsigned Intrinsic) const {
42400b57cec5SDimitry Andric switch (Intrinsic) {
42410b57cec5SDimitry Andric default:
42420b57cec5SDimitry Andric return false;
42430b57cec5SDimitry Andric case Intrinsic::nvvm_match_all_sync_i32p:
42440b57cec5SDimitry Andric case Intrinsic::nvvm_match_all_sync_i64p:
42450b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
42460b57cec5SDimitry Andric // memVT is bogus. These intrinsics have IntrInaccessibleMemOnly attribute
42470b57cec5SDimitry Andric // in order to model data exchange with other threads, but perform no real
42480b57cec5SDimitry Andric // memory accesses.
42490b57cec5SDimitry Andric Info.memVT = MVT::i1;
42500b57cec5SDimitry Andric
42510b57cec5SDimitry Andric // Our result depends on both our and other thread's arguments.
42520b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
42530b57cec5SDimitry Andric return true;
42540b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col:
42550b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row:
42560b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride:
42570b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride:
42580b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col:
42590b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row:
42600b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride:
42610b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride:
42620b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col:
42630b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row:
42640b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride:
42650b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride:
42660b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col:
42670b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row:
42680b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride:
42690b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride:
42700b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col:
42710b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row:
42720b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride:
42730b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride:
42740b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col:
42750b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row:
42760b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride:
42770b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride: {
42780b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
42790b57cec5SDimitry Andric Info.memVT = MVT::v8f16;
42800b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
42810b57cec5SDimitry Andric Info.offset = 0;
42820b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
42838bcb0991SDimitry Andric Info.align = Align(16);
42840b57cec5SDimitry Andric return true;
42850b57cec5SDimitry Andric }
42860b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col:
42870b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride:
42880b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride:
42890b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col:
42900b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row:
42910b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride:
42920b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride:
42930b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row:
4294fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col:
4295fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col_stride:
4296fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row:
4297fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row_stride:
42980b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col:
42990b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride:
43000b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride:
43010b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col:
43020b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row:
43030b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride:
43040b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride:
4305fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row:
4306fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col:
4307fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col_stride:
4308fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row:
4309fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row_stride: {
43100b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
43110b57cec5SDimitry Andric Info.memVT = MVT::v2i32;
43120b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
43130b57cec5SDimitry Andric Info.offset = 0;
43140b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
43158bcb0991SDimitry Andric Info.align = Align(8);
43160b57cec5SDimitry Andric return true;
43170b57cec5SDimitry Andric }
43180b57cec5SDimitry Andric
43190b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col:
43200b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride:
43210b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride:
43220b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col:
43230b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row:
43240b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride:
43250b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride:
43260b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row:
4327fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col:
4328fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col_stride:
4329fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row:
4330fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row_stride:
4331fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col:
4332fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col_stride:
4333fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row:
4334fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row_stride:
43350b57cec5SDimitry Andric
43360b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col:
43370b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride:
43380b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride:
43390b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col:
43400b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row:
43410b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride:
43420b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride:
4343fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row:
4344fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col:
4345fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col_stride:
4346fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row:
4347fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row_stride:
4348fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col:
4349fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col_stride:
4350fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row:
4351349cc55cSDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride:
4352349cc55cSDimitry Andric case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16:
4353349cc55cSDimitry Andric case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16: {
43540b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
43550b57cec5SDimitry Andric Info.memVT = MVT::v4i32;
43560b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
43570b57cec5SDimitry Andric Info.offset = 0;
43580b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
43598bcb0991SDimitry Andric Info.align = Align(16);
43600b57cec5SDimitry Andric return true;
43610b57cec5SDimitry Andric }
43620b57cec5SDimitry Andric
43630b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col:
43640b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride:
43650b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride:
43660b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col:
43670b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row:
43680b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride:
43690b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride:
43700b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row:
43710b57cec5SDimitry Andric
43720b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col:
43730b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride:
43740b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride:
43750b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col:
43760b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row:
43770b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride:
43780b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride:
43790b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row:
43800b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row:
43810b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride:
43820b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col:
43830b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride:
43840b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row:
43850b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride:
43860b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride:
43870b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row:
43880b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col:
43890b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride:
43900b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride:
4391349cc55cSDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col:
4392349cc55cSDimitry Andric case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16:
4393349cc55cSDimitry Andric case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16: {
43940b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
43950b57cec5SDimitry Andric Info.memVT = MVT::i32;
43960b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
43970b57cec5SDimitry Andric Info.offset = 0;
43980b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
43998bcb0991SDimitry Andric Info.align = Align(4);
44000b57cec5SDimitry Andric return true;
44010b57cec5SDimitry Andric }
44020b57cec5SDimitry Andric
44030b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col:
44040b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row:
44050b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride:
44060b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride:
44070b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col:
44080b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row:
44090b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride:
44100b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride:
44110b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col:
44120b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row:
44130b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride:
44140b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride: {
44150b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
44160b57cec5SDimitry Andric Info.memVT = MVT::v4f16;
44170b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
44180b57cec5SDimitry Andric Info.offset = 0;
44190b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
44208bcb0991SDimitry Andric Info.align = Align(16);
44210b57cec5SDimitry Andric return true;
44220b57cec5SDimitry Andric }
44230b57cec5SDimitry Andric
44240b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col:
44250b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row:
44260b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride:
44270b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride:
44280b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col:
44290b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row:
44300b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride:
44310b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride:
44320b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col:
44330b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row:
44340b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride:
4435fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride:
4436fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col:
4437fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row:
4438fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col_stride:
4439fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row_stride: {
44400b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
44410b57cec5SDimitry Andric Info.memVT = MVT::v8f32;
44420b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
44430b57cec5SDimitry Andric Info.offset = 0;
44440b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
44458bcb0991SDimitry Andric Info.align = Align(16);
44460b57cec5SDimitry Andric return true;
44470b57cec5SDimitry Andric }
44480b57cec5SDimitry Andric
4449fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col:
4450fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col_stride:
4451fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row:
4452fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row_stride:
4453fe6060f1SDimitry Andric
4454fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col:
4455fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col_stride:
4456fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row:
4457fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row_stride:
4458fe6060f1SDimitry Andric
44590b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col:
44600b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride:
44610b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row:
44620b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride:
44630b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col:
44640b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride:
44650b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row:
44660b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride:
44670b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col:
44680b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride:
44690b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row:
44700b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride: {
44710b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
44720b57cec5SDimitry Andric Info.memVT = MVT::v8i32;
44730b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
44740b57cec5SDimitry Andric Info.offset = 0;
44750b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
44768bcb0991SDimitry Andric Info.align = Align(16);
44770b57cec5SDimitry Andric return true;
44780b57cec5SDimitry Andric }
44790b57cec5SDimitry Andric
44800b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col:
44810b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride:
44820b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row:
44830b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride:
44840b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col:
44850b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride:
44860b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row:
4487349cc55cSDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride:
4488349cc55cSDimitry Andric case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16:
4489349cc55cSDimitry Andric case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16: {
44900b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
44910b57cec5SDimitry Andric Info.memVT = MVT::v2i32;
44920b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
44930b57cec5SDimitry Andric Info.offset = 0;
44940b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
44958bcb0991SDimitry Andric Info.align = Align(8);
44960b57cec5SDimitry Andric return true;
44970b57cec5SDimitry Andric }
44980b57cec5SDimitry Andric
4499fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col:
4500fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col_stride:
4501fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row:
4502fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row_stride:
4503fe6060f1SDimitry Andric
4504fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col:
4505fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col_stride:
4506fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row:
4507fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row_stride: {
4508fe6060f1SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
4509fe6060f1SDimitry Andric Info.memVT = MVT::f64;
4510fe6060f1SDimitry Andric Info.ptrVal = I.getArgOperand(0);
4511fe6060f1SDimitry Andric Info.offset = 0;
4512fe6060f1SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
4513fe6060f1SDimitry Andric Info.align = Align(8);
4514fe6060f1SDimitry Andric return true;
4515fe6060f1SDimitry Andric }
4516fe6060f1SDimitry Andric
4517fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col:
4518fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col_stride:
4519fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row:
4520fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row_stride: {
4521fe6060f1SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
4522fe6060f1SDimitry Andric Info.memVT = MVT::v2f64;
4523fe6060f1SDimitry Andric Info.ptrVal = I.getArgOperand(0);
4524fe6060f1SDimitry Andric Info.offset = 0;
4525fe6060f1SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
4526fe6060f1SDimitry Andric Info.align = Align(16);
4527fe6060f1SDimitry Andric return true;
4528fe6060f1SDimitry Andric }
4529fe6060f1SDimitry Andric
45300b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col:
45310b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row:
45320b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride:
45330b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride:
45340b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col:
45350b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row:
45360b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride:
45370b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride:
45380b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col:
45390b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row:
45400b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride:
45410b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride: {
45420b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_VOID;
45430b57cec5SDimitry Andric Info.memVT = MVT::v4f16;
45440b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
45450b57cec5SDimitry Andric Info.offset = 0;
45460b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOStore;
45478bcb0991SDimitry Andric Info.align = Align(16);
45480b57cec5SDimitry Andric return true;
45490b57cec5SDimitry Andric }
45500b57cec5SDimitry Andric
45510b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col:
45520b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row:
45530b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride:
45540b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride:
45550b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col:
45560b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row:
45570b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride:
45580b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride:
45590b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col:
45600b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row:
45610b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride:
4562fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride:
4563fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col:
4564fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row:
4565fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col_stride:
4566fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row_stride: {
45670b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_VOID;
45680b57cec5SDimitry Andric Info.memVT = MVT::v8f32;
45690b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
45700b57cec5SDimitry Andric Info.offset = 0;
45710b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOStore;
45728bcb0991SDimitry Andric Info.align = Align(16);
45730b57cec5SDimitry Andric return true;
45740b57cec5SDimitry Andric }
45750b57cec5SDimitry Andric
45760b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col:
45770b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col_stride:
45780b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row:
45790b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row_stride:
45800b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col:
45810b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col_stride:
45820b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row:
45830b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row_stride:
45840b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col:
45850b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col_stride:
45860b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row:
45870b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row_stride: {
45880b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_VOID;
45890b57cec5SDimitry Andric Info.memVT = MVT::v8i32;
45900b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
45910b57cec5SDimitry Andric Info.offset = 0;
45920b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOStore;
45938bcb0991SDimitry Andric Info.align = Align(16);
45940b57cec5SDimitry Andric return true;
45950b57cec5SDimitry Andric }
45960b57cec5SDimitry Andric
45970b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col:
45980b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col_stride:
45990b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row:
46000b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row_stride:
46010b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col:
46020b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col_stride:
46030b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row:
46040b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row_stride: {
46050b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_VOID;
46060b57cec5SDimitry Andric Info.memVT = MVT::v2i32;
46070b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
46080b57cec5SDimitry Andric Info.offset = 0;
46090b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOStore;
46108bcb0991SDimitry Andric Info.align = Align(8);
46110b57cec5SDimitry Andric return true;
46120b57cec5SDimitry Andric }
46130b57cec5SDimitry Andric
4614fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col:
4615fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col_stride:
4616fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row:
4617fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row_stride: {
4618fe6060f1SDimitry Andric Info.opc = ISD::INTRINSIC_VOID;
4619fe6060f1SDimitry Andric Info.memVT = MVT::v2f64;
4620fe6060f1SDimitry Andric Info.ptrVal = I.getArgOperand(0);
4621fe6060f1SDimitry Andric Info.offset = 0;
4622fe6060f1SDimitry Andric Info.flags = MachineMemOperand::MOStore;
4623fe6060f1SDimitry Andric Info.align = Align(16);
4624fe6060f1SDimitry Andric return true;
4625fe6060f1SDimitry Andric }
4626fe6060f1SDimitry Andric
46270b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_load_inc_32:
46280b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_load_dec_32:
46290b57cec5SDimitry Andric
46300b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_add_gen_f_cta:
46310b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_add_gen_f_sys:
46320b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_add_gen_i_cta:
46330b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_add_gen_i_sys:
46340b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_and_gen_i_cta:
46350b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_and_gen_i_sys:
46360b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_cas_gen_i_cta:
46370b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_cas_gen_i_sys:
46380b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_dec_gen_i_cta:
46390b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_dec_gen_i_sys:
46400b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_inc_gen_i_cta:
46410b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_inc_gen_i_sys:
46420b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_max_gen_i_cta:
46430b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_max_gen_i_sys:
46440b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_min_gen_i_cta:
46450b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_min_gen_i_sys:
46460b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_or_gen_i_cta:
46470b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_or_gen_i_sys:
46480b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_exch_gen_i_cta:
46490b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_exch_gen_i_sys:
46500b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_xor_gen_i_cta:
46510b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_xor_gen_i_sys: {
46520fca6ea1SDimitry Andric auto &DL = I.getDataLayout();
46530b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
46540b57cec5SDimitry Andric Info.memVT = getValueType(DL, I.getType());
46550b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
46560b57cec5SDimitry Andric Info.offset = 0;
46570b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
46588bcb0991SDimitry Andric Info.align.reset();
46590b57cec5SDimitry Andric return true;
46600b57cec5SDimitry Andric }
46610b57cec5SDimitry Andric
46620b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_i:
46630b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_f:
46640b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_p: {
46650fca6ea1SDimitry Andric auto &DL = I.getDataLayout();
46660b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
46670b57cec5SDimitry Andric if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
46680b57cec5SDimitry Andric Info.memVT = getValueType(DL, I.getType());
46690b57cec5SDimitry Andric else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
46700b57cec5SDimitry Andric Info.memVT = getPointerTy(DL);
46710b57cec5SDimitry Andric else
46720b57cec5SDimitry Andric Info.memVT = getValueType(DL, I.getType());
46730b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
46740b57cec5SDimitry Andric Info.offset = 0;
46750b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
46765ffd83dbSDimitry Andric Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
46770b57cec5SDimitry Andric
46780b57cec5SDimitry Andric return true;
46790b57cec5SDimitry Andric }
46800b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_i:
46810b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_f:
46820b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_p: {
46830fca6ea1SDimitry Andric auto &DL = I.getDataLayout();
46840b57cec5SDimitry Andric
46850b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN;
46860b57cec5SDimitry Andric if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
46870b57cec5SDimitry Andric Info.memVT = getValueType(DL, I.getType());
46880b57cec5SDimitry Andric else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
46890b57cec5SDimitry Andric Info.memVT = getPointerTy(DL);
46900b57cec5SDimitry Andric else
46910b57cec5SDimitry Andric Info.memVT = getValueType(DL, I.getType());
46920b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0);
46930b57cec5SDimitry Andric Info.offset = 0;
46940b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
46955ffd83dbSDimitry Andric Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
46960b57cec5SDimitry Andric
46970b57cec5SDimitry Andric return true;
46980b57cec5SDimitry Andric }
46990b57cec5SDimitry Andric
47000b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4f32_s32:
47010b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4f32_f32:
47020b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
47030b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
47040b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
47050b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
47060b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
47070b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
47080b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4f32_s32:
47090b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4f32_f32:
47100b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
47110b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
47120b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
47130b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
47140b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
47150b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
47160b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4f32_s32:
47170b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4f32_f32:
47180b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
47190b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
47200b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_v4f32_f32:
47210b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
47220b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
47230b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
47240b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
47250b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
47260b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
47270b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
47280b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
47290b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
47300b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
47310b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
47320b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
47330b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
47340b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
47350b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
47360b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
47370b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
47380b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
47390b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
47400b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
47410b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
47420b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
47430b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
47440b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
47450b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
47460b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
47470b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
47480b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
47490b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
47500b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
47510b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
47527a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32:
47537a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32:
47540b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
47550b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
47560b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
47570b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
47580b57cec5SDimitry Andric Info.opc = getOpcForTextureInstr(Intrinsic);
47590b57cec5SDimitry Andric Info.memVT = MVT::v4f32;
47600b57cec5SDimitry Andric Info.ptrVal = nullptr;
47610b57cec5SDimitry Andric Info.offset = 0;
47620b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
47638bcb0991SDimitry Andric Info.align = Align(16);
47640b57cec5SDimitry Andric return true;
47650b57cec5SDimitry Andric
47660b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4s32_s32:
47670b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4s32_f32:
47680b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
47690b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
47700b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
47710b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
47720b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
47730b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
47740b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4s32_s32:
47750b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4s32_f32:
47760b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
47770b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
47780b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
47790b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
47800b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
47810b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
47820b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4s32_s32:
47830b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4s32_f32:
47840b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
47850b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
47860b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_v4s32_f32:
47870b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
47880b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
47890b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
47900b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_v4u32_f32:
47910b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
47920b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
47930b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
47940b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4u32_s32:
47950b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4u32_f32:
47960b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
47970b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
47980b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
47990b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
48000b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
48010b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
48020b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4u32_s32:
48030b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4u32_f32:
48040b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
48050b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
48060b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
48070b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
48080b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
48090b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
48100b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4u32_s32:
48110b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4u32_f32:
48120b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
48130b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
48140b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
48150b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
48160b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
48170b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
48180b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
48190b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
48200b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
48210b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
48220b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
48230b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
48240b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
48250b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
48260b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
48270b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
48280b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
48290b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
48300b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
48310b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
48320b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
48330b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
48340b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
48350b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
48360b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
48370b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
48380b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
48390b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
48400b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
48410b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
48420b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
48430b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
48440b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
48450b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
48460b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
48470b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
48480b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
48490b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
48500b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
48510b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
48520b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
48530b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
48540b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
48550b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
48560b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
48570b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
48580b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
48590b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
48600b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
48610b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
48620b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
48630b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
48640b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
48650b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
48660b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
48670b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
48680b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
48690b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
48707a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32:
48717a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32:
48727a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32:
48737a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32:
48740b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
48750b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
48760b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
48770b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
48780b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
48790b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
48800b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
48810b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
48820b57cec5SDimitry Andric Info.opc = getOpcForTextureInstr(Intrinsic);
48830b57cec5SDimitry Andric Info.memVT = MVT::v4i32;
48840b57cec5SDimitry Andric Info.ptrVal = nullptr;
48850b57cec5SDimitry Andric Info.offset = 0;
48860b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
48878bcb0991SDimitry Andric Info.align = Align(16);
48880b57cec5SDimitry Andric return true;
48890b57cec5SDimitry Andric
48900b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i8_clamp:
48910b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i8_clamp:
48920b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i8_clamp:
48930b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i8_clamp:
48940b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
48950b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
48960b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i8_clamp:
48970b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i8_clamp:
48980b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i8_clamp:
48990b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i8_clamp:
49000b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
49010b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
49020b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i8_clamp:
49030b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i8_clamp:
49040b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i8_clamp:
49050b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i8_trap:
49060b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i8_trap:
49070b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i8_trap:
49080b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i8_trap:
49090b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
49100b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
49110b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i8_trap:
49120b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i8_trap:
49130b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i8_trap:
49140b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i8_trap:
49150b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
49160b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
49170b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i8_trap:
49180b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i8_trap:
49190b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i8_trap:
49200b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i8_zero:
49210b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i8_zero:
49220b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i8_zero:
49230b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i8_zero:
49240b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
49250b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
49260b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i8_zero:
49270b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i8_zero:
49280b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i8_zero:
49290b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i8_zero:
49300b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
49310b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
49320b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i8_zero:
49330b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i8_zero:
49340b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i8_zero:
49350b57cec5SDimitry Andric Info.opc = getOpcForSurfaceInstr(Intrinsic);
49360b57cec5SDimitry Andric Info.memVT = MVT::i8;
49370b57cec5SDimitry Andric Info.ptrVal = nullptr;
49380b57cec5SDimitry Andric Info.offset = 0;
49390b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
49408bcb0991SDimitry Andric Info.align = Align(16);
49410b57cec5SDimitry Andric return true;
49420b57cec5SDimitry Andric
49430b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i16_clamp:
49440b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i16_clamp:
49450b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i16_clamp:
49460b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i16_clamp:
49470b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
49480b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
49490b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i16_clamp:
49500b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i16_clamp:
49510b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i16_clamp:
49520b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i16_clamp:
49530b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
49540b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
49550b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i16_clamp:
49560b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i16_clamp:
49570b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i16_clamp:
49580b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i16_trap:
49590b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i16_trap:
49600b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i16_trap:
49610b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i16_trap:
49620b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
49630b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
49640b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i16_trap:
49650b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i16_trap:
49660b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i16_trap:
49670b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i16_trap:
49680b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
49690b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
49700b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i16_trap:
49710b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i16_trap:
49720b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i16_trap:
49730b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i16_zero:
49740b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i16_zero:
49750b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i16_zero:
49760b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i16_zero:
49770b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
49780b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
49790b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i16_zero:
49800b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i16_zero:
49810b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i16_zero:
49820b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i16_zero:
49830b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
49840b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
49850b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i16_zero:
49860b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i16_zero:
49870b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i16_zero:
49880b57cec5SDimitry Andric Info.opc = getOpcForSurfaceInstr(Intrinsic);
49890b57cec5SDimitry Andric Info.memVT = MVT::i16;
49900b57cec5SDimitry Andric Info.ptrVal = nullptr;
49910b57cec5SDimitry Andric Info.offset = 0;
49920b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
49938bcb0991SDimitry Andric Info.align = Align(16);
49940b57cec5SDimitry Andric return true;
49950b57cec5SDimitry Andric
49960b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i32_clamp:
49970b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i32_clamp:
49980b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i32_clamp:
49990b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i32_clamp:
50000b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
50010b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
50020b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i32_clamp:
50030b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i32_clamp:
50040b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i32_clamp:
50050b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i32_clamp:
50060b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
50070b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
50080b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i32_clamp:
50090b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i32_clamp:
50100b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i32_clamp:
50110b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i32_trap:
50120b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i32_trap:
50130b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i32_trap:
50140b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i32_trap:
50150b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
50160b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
50170b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i32_trap:
50180b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i32_trap:
50190b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i32_trap:
50200b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i32_trap:
50210b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
50220b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
50230b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i32_trap:
50240b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i32_trap:
50250b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i32_trap:
50260b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i32_zero:
50270b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i32_zero:
50280b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i32_zero:
50290b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i32_zero:
50300b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
50310b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
50320b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i32_zero:
50330b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i32_zero:
50340b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i32_zero:
50350b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i32_zero:
50360b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
50370b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
50380b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i32_zero:
50390b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i32_zero:
50400b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i32_zero:
50410b57cec5SDimitry Andric Info.opc = getOpcForSurfaceInstr(Intrinsic);
50420b57cec5SDimitry Andric Info.memVT = MVT::i32;
50430b57cec5SDimitry Andric Info.ptrVal = nullptr;
50440b57cec5SDimitry Andric Info.offset = 0;
50450b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
50468bcb0991SDimitry Andric Info.align = Align(16);
50470b57cec5SDimitry Andric return true;
50480b57cec5SDimitry Andric
50490b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i64_clamp:
50500b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i64_clamp:
50510b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i64_clamp:
50520b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
50530b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i64_clamp:
50540b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i64_clamp:
50550b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i64_clamp:
50560b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
50570b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i64_clamp:
50580b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i64_clamp:
50590b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i64_trap:
50600b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i64_trap:
50610b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i64_trap:
50620b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
50630b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i64_trap:
50640b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i64_trap:
50650b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i64_trap:
50660b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
50670b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i64_trap:
50680b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i64_trap:
50690b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i64_zero:
50700b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i64_zero:
50710b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i64_zero:
50720b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
50730b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i64_zero:
50740b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i64_zero:
50750b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i64_zero:
50760b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
50770b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i64_zero:
50780b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i64_zero:
50790b57cec5SDimitry Andric Info.opc = getOpcForSurfaceInstr(Intrinsic);
50800b57cec5SDimitry Andric Info.memVT = MVT::i64;
50810b57cec5SDimitry Andric Info.ptrVal = nullptr;
50820b57cec5SDimitry Andric Info.offset = 0;
50830b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad;
50848bcb0991SDimitry Andric Info.align = Align(16);
50850b57cec5SDimitry Andric return true;
50860b57cec5SDimitry Andric }
50870b57cec5SDimitry Andric return false;
50880b57cec5SDimitry Andric }
50890b57cec5SDimitry Andric
509081ad6265SDimitry Andric /// getFunctionParamOptimizedAlign - since function arguments are passed via
509181ad6265SDimitry Andric /// .param space, we may want to increase their alignment in a way that
509281ad6265SDimitry Andric /// ensures that we can effectively vectorize their loads & stores. We can
509381ad6265SDimitry Andric /// increase alignment only if the function has internal or has private
509481ad6265SDimitry Andric /// linkage as for other linkage types callers may already rely on default
509581ad6265SDimitry Andric /// alignment. To allow using 128-bit vectorized loads/stores, this function
509681ad6265SDimitry Andric /// ensures that alignment is 16 or greater.
getFunctionParamOptimizedAlign(const Function * F,Type * ArgTy,const DataLayout & DL) const509781ad6265SDimitry Andric Align NVPTXTargetLowering::getFunctionParamOptimizedAlign(
509881ad6265SDimitry Andric const Function *F, Type *ArgTy, const DataLayout &DL) const {
50990fca6ea1SDimitry Andric // Capping the alignment to 128 bytes as that is the maximum alignment
51000fca6ea1SDimitry Andric // supported by PTX.
51010fca6ea1SDimitry Andric const Align ABITypeAlign = std::min(Align(128), DL.getABITypeAlign(ArgTy));
510281ad6265SDimitry Andric
510381ad6265SDimitry Andric // If a function has linkage different from internal or private, we
5104bdd1243dSDimitry Andric // must use default ABI alignment as external users rely on it. Same
5105bdd1243dSDimitry Andric // for a function that may be called from a function pointer.
5106bdd1243dSDimitry Andric if (!F || !F->hasLocalLinkage() ||
5107bdd1243dSDimitry Andric F->hasAddressTaken(/*Users=*/nullptr,
5108bdd1243dSDimitry Andric /*IgnoreCallbackUses=*/false,
5109bdd1243dSDimitry Andric /*IgnoreAssumeLikeCalls=*/true,
5110bdd1243dSDimitry Andric /*IgnoreLLVMUsed=*/true))
51110fca6ea1SDimitry Andric return ABITypeAlign;
511281ad6265SDimitry Andric
511381ad6265SDimitry Andric assert(!isKernelFunction(*F) && "Expect kernels to have non-local linkage");
51140fca6ea1SDimitry Andric return std::max(Align(16), ABITypeAlign);
511581ad6265SDimitry Andric }
511681ad6265SDimitry Andric
5117bdd1243dSDimitry Andric /// Helper for computing alignment of a device function byval parameter.
getFunctionByValParamAlign(const Function * F,Type * ArgTy,Align InitialAlign,const DataLayout & DL) const5118bdd1243dSDimitry Andric Align NVPTXTargetLowering::getFunctionByValParamAlign(
5119bdd1243dSDimitry Andric const Function *F, Type *ArgTy, Align InitialAlign,
5120bdd1243dSDimitry Andric const DataLayout &DL) const {
5121bdd1243dSDimitry Andric Align ArgAlign = InitialAlign;
5122bdd1243dSDimitry Andric // Try to increase alignment to enhance vectorization options.
5123bdd1243dSDimitry Andric if (F)
5124bdd1243dSDimitry Andric ArgAlign = std::max(ArgAlign, getFunctionParamOptimizedAlign(F, ArgTy, DL));
5125bdd1243dSDimitry Andric
512606c3fb27SDimitry Andric // Old ptx versions have a bug. When PTX code takes address of
5127bdd1243dSDimitry Andric // byval parameter with alignment < 4, ptxas generates code to
5128bdd1243dSDimitry Andric // spill argument into memory. Alas on sm_50+ ptxas generates
5129bdd1243dSDimitry Andric // SASS code that fails with misaligned access. To work around
5130bdd1243dSDimitry Andric // the problem, make sure that we align byval parameters by at
513106c3fb27SDimitry Andric // least 4. This bug seems to be fixed at least starting from
513206c3fb27SDimitry Andric // ptxas > 9.0.
513306c3fb27SDimitry Andric // TODO: remove this after verifying the bug is not reproduced
513406c3fb27SDimitry Andric // on non-deprecated ptxas versions.
513506c3fb27SDimitry Andric if (ForceMinByValParamAlign)
5136bdd1243dSDimitry Andric ArgAlign = std::max(ArgAlign, Align(4));
5137bdd1243dSDimitry Andric
5138bdd1243dSDimitry Andric return ArgAlign;
5139bdd1243dSDimitry Andric }
5140bdd1243dSDimitry Andric
514106c3fb27SDimitry Andric // Helper for getting a function parameter name. Name is composed from
514206c3fb27SDimitry Andric // its index and the function name. Negative index corresponds to special
514306c3fb27SDimitry Andric // parameter (unsized array) used for passing variable arguments.
getParamName(const Function * F,int Idx) const514406c3fb27SDimitry Andric std::string NVPTXTargetLowering::getParamName(const Function *F,
514506c3fb27SDimitry Andric int Idx) const {
514606c3fb27SDimitry Andric std::string ParamName;
514706c3fb27SDimitry Andric raw_string_ostream ParamStr(ParamName);
514806c3fb27SDimitry Andric
514906c3fb27SDimitry Andric ParamStr << getTargetMachine().getSymbol(F)->getName();
515006c3fb27SDimitry Andric if (Idx < 0)
515106c3fb27SDimitry Andric ParamStr << "_vararg";
515206c3fb27SDimitry Andric else
515306c3fb27SDimitry Andric ParamStr << "_param_" << Idx;
515406c3fb27SDimitry Andric
515506c3fb27SDimitry Andric return ParamName;
515606c3fb27SDimitry Andric }
515706c3fb27SDimitry Andric
51580b57cec5SDimitry Andric /// isLegalAddressingMode - Return true if the addressing mode represented
51590b57cec5SDimitry Andric /// by AM is legal for this target, for a load/store of the specified type.
51600b57cec5SDimitry Andric /// Used to guide target specific optimizations, like loop strength reduction
51610b57cec5SDimitry Andric /// (LoopStrengthReduce.cpp) and memory optimization for address mode
51620b57cec5SDimitry Andric /// (CodeGenPrepare.cpp)
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const51630b57cec5SDimitry Andric bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
51640b57cec5SDimitry Andric const AddrMode &AM, Type *Ty,
51650b57cec5SDimitry Andric unsigned AS, Instruction *I) const {
51660b57cec5SDimitry Andric // AddrMode - This represents an addressing mode of:
51670b57cec5SDimitry Andric // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
51680b57cec5SDimitry Andric //
51690b57cec5SDimitry Andric // The legal address modes are
51700b57cec5SDimitry Andric // - [avar]
51710b57cec5SDimitry Andric // - [areg]
51720b57cec5SDimitry Andric // - [areg+immoff]
51730b57cec5SDimitry Andric // - [immAddr]
51740b57cec5SDimitry Andric
51750fca6ea1SDimitry Andric // immoff must fit in a signed 32-bit int
51760fca6ea1SDimitry Andric if (!APInt(64, AM.BaseOffs).isSignedIntN(32))
51770fca6ea1SDimitry Andric return false;
51780fca6ea1SDimitry Andric
51790fca6ea1SDimitry Andric if (AM.BaseGV)
51800b57cec5SDimitry Andric return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
51810b57cec5SDimitry Andric
51820b57cec5SDimitry Andric switch (AM.Scale) {
51830b57cec5SDimitry Andric case 0: // "r", "r+i" or "i" is allowed
51840b57cec5SDimitry Andric break;
51850b57cec5SDimitry Andric case 1:
51860b57cec5SDimitry Andric if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
51870b57cec5SDimitry Andric return false;
51880b57cec5SDimitry Andric // Otherwise we have r+i.
51890b57cec5SDimitry Andric break;
51900b57cec5SDimitry Andric default:
51910b57cec5SDimitry Andric // No scale > 1 is allowed
51920b57cec5SDimitry Andric return false;
51930b57cec5SDimitry Andric }
51940b57cec5SDimitry Andric return true;
51950b57cec5SDimitry Andric }
51960b57cec5SDimitry Andric
51970b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
51980b57cec5SDimitry Andric // NVPTX Inline Assembly Support
51990b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
52000b57cec5SDimitry Andric
52010b57cec5SDimitry Andric /// getConstraintType - Given a constraint letter, return the type of
52020b57cec5SDimitry Andric /// constraint it is for this target.
52030b57cec5SDimitry Andric NVPTXTargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const52040b57cec5SDimitry Andric NVPTXTargetLowering::getConstraintType(StringRef Constraint) const {
52050b57cec5SDimitry Andric if (Constraint.size() == 1) {
52060b57cec5SDimitry Andric switch (Constraint[0]) {
52070b57cec5SDimitry Andric default:
52080b57cec5SDimitry Andric break;
52090b57cec5SDimitry Andric case 'b':
52100b57cec5SDimitry Andric case 'r':
52110b57cec5SDimitry Andric case 'h':
52120b57cec5SDimitry Andric case 'c':
52130b57cec5SDimitry Andric case 'l':
52140b57cec5SDimitry Andric case 'f':
52150b57cec5SDimitry Andric case 'd':
52160fca6ea1SDimitry Andric case 'q':
52170b57cec5SDimitry Andric case '0':
52180b57cec5SDimitry Andric case 'N':
52190b57cec5SDimitry Andric return C_RegisterClass;
52200b57cec5SDimitry Andric }
52210b57cec5SDimitry Andric }
52220b57cec5SDimitry Andric return TargetLowering::getConstraintType(Constraint);
52230b57cec5SDimitry Andric }
52240b57cec5SDimitry Andric
52250b57cec5SDimitry Andric std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const52260b57cec5SDimitry Andric NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
52270b57cec5SDimitry Andric StringRef Constraint,
52280b57cec5SDimitry Andric MVT VT) const {
52290b57cec5SDimitry Andric if (Constraint.size() == 1) {
52300b57cec5SDimitry Andric switch (Constraint[0]) {
52310b57cec5SDimitry Andric case 'b':
52320b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
52330b57cec5SDimitry Andric case 'c':
52340b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
52350b57cec5SDimitry Andric case 'h':
52360b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
52370b57cec5SDimitry Andric case 'r':
52380b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
52390b57cec5SDimitry Andric case 'l':
52400b57cec5SDimitry Andric case 'N':
52410b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
52420fca6ea1SDimitry Andric case 'q': {
52430fca6ea1SDimitry Andric if (STI.getSmVersion() < 70)
52440fca6ea1SDimitry Andric report_fatal_error("Inline asm with 128 bit operands is only "
52450fca6ea1SDimitry Andric "supported for sm_70 and higher!");
52460fca6ea1SDimitry Andric return std::make_pair(0U, &NVPTX::Int128RegsRegClass);
52470fca6ea1SDimitry Andric }
52480b57cec5SDimitry Andric case 'f':
52490b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
52500b57cec5SDimitry Andric case 'd':
52510b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
52520b57cec5SDimitry Andric }
52530b57cec5SDimitry Andric }
52540b57cec5SDimitry Andric return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
52550b57cec5SDimitry Andric }
52560b57cec5SDimitry Andric
52570b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
52580b57cec5SDimitry Andric // NVPTX DAG Combining
52590b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
52600b57cec5SDimitry Andric
allowFMA(MachineFunction & MF,CodeGenOptLevel OptLevel) const52610b57cec5SDimitry Andric bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
52625f757f3fSDimitry Andric CodeGenOptLevel OptLevel) const {
52630b57cec5SDimitry Andric // Always honor command-line argument
52640b57cec5SDimitry Andric if (FMAContractLevelOpt.getNumOccurrences() > 0)
52650b57cec5SDimitry Andric return FMAContractLevelOpt > 0;
52660b57cec5SDimitry Andric
52670b57cec5SDimitry Andric // Do not contract if we're not optimizing the code.
52685f757f3fSDimitry Andric if (OptLevel == CodeGenOptLevel::None)
52690b57cec5SDimitry Andric return false;
52700b57cec5SDimitry Andric
52710b57cec5SDimitry Andric // Honor TargetOptions flags that explicitly say fusion is okay.
52720b57cec5SDimitry Andric if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast)
52730b57cec5SDimitry Andric return true;
52740b57cec5SDimitry Andric
52750b57cec5SDimitry Andric return allowUnsafeFPMath(MF);
52760b57cec5SDimitry Andric }
52770b57cec5SDimitry Andric
allowUnsafeFPMath(MachineFunction & MF) const52780b57cec5SDimitry Andric bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const {
52790b57cec5SDimitry Andric // Honor TargetOptions flags that explicitly say unsafe math is okay.
52800b57cec5SDimitry Andric if (MF.getTarget().Options.UnsafeFPMath)
52810b57cec5SDimitry Andric return true;
52820b57cec5SDimitry Andric
52830b57cec5SDimitry Andric // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
52840b57cec5SDimitry Andric const Function &F = MF.getFunction();
5285fe6060f1SDimitry Andric return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
52860b57cec5SDimitry Andric }
52870b57cec5SDimitry Andric
isConstZero(const SDValue & Operand)52880fca6ea1SDimitry Andric static bool isConstZero(const SDValue &Operand) {
52890fca6ea1SDimitry Andric const auto *Const = dyn_cast<ConstantSDNode>(Operand);
52900fca6ea1SDimitry Andric return Const && Const->getZExtValue() == 0;
52910fca6ea1SDimitry Andric }
52920fca6ea1SDimitry Andric
52930b57cec5SDimitry Andric /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
52940b57cec5SDimitry Andric /// operands N0 and N1. This is a helper for PerformADDCombine that is
52950b57cec5SDimitry Andric /// called with the default operands, and if that fails, with commuted
52960b57cec5SDimitry Andric /// operands.
52970fca6ea1SDimitry Andric static SDValue
PerformADDCombineWithOperands(SDNode * N,SDValue N0,SDValue N1,TargetLowering::DAGCombinerInfo & DCI)52980fca6ea1SDimitry Andric PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
52990fca6ea1SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) {
53000b57cec5SDimitry Andric EVT VT = N0.getValueType();
53010fca6ea1SDimitry Andric
53020fca6ea1SDimitry Andric // Since integer multiply-add costs the same as integer multiply
53030fca6ea1SDimitry Andric // but is more costly than integer add, do the fusion only when
53040fca6ea1SDimitry Andric // the mul is only used in the add.
53050fca6ea1SDimitry Andric // TODO: this may not be true for later architectures, consider relaxing this
53060fca6ea1SDimitry Andric if (!N0.getNode()->hasOneUse())
53070b57cec5SDimitry Andric return SDValue();
53080b57cec5SDimitry Andric
53090b57cec5SDimitry Andric // fold (add (mul a, b), c) -> (mad a, b, c)
53100b57cec5SDimitry Andric //
53110fca6ea1SDimitry Andric if (N0.getOpcode() == ISD::MUL)
53120fca6ea1SDimitry Andric return DCI.DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, N0.getOperand(0),
53130fca6ea1SDimitry Andric N0.getOperand(1), N1);
53140fca6ea1SDimitry Andric
53150fca6ea1SDimitry Andric // fold (add (select cond, 0, (mul a, b)), c)
53160fca6ea1SDimitry Andric // -> (select cond, c, (mad a, b, c))
53170fca6ea1SDimitry Andric //
53180fca6ea1SDimitry Andric if (N0.getOpcode() == ISD::SELECT) {
53190fca6ea1SDimitry Andric unsigned ZeroOpNum;
53200fca6ea1SDimitry Andric if (isConstZero(N0->getOperand(1)))
53210fca6ea1SDimitry Andric ZeroOpNum = 1;
53220fca6ea1SDimitry Andric else if (isConstZero(N0->getOperand(2)))
53230fca6ea1SDimitry Andric ZeroOpNum = 2;
53240fca6ea1SDimitry Andric else
53250b57cec5SDimitry Andric return SDValue();
53260b57cec5SDimitry Andric
53270fca6ea1SDimitry Andric SDValue M = N0->getOperand((ZeroOpNum == 1) ? 2 : 1);
53280fca6ea1SDimitry Andric if (M->getOpcode() != ISD::MUL || !M.getNode()->hasOneUse())
53290fca6ea1SDimitry Andric return SDValue();
53300fca6ea1SDimitry Andric
53310fca6ea1SDimitry Andric SDValue MAD = DCI.DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
53320fca6ea1SDimitry Andric M->getOperand(0), M->getOperand(1), N1);
53330fca6ea1SDimitry Andric return DCI.DAG.getSelect(SDLoc(N), VT, N0->getOperand(0),
53340fca6ea1SDimitry Andric ((ZeroOpNum == 1) ? N1 : MAD),
53350fca6ea1SDimitry Andric ((ZeroOpNum == 1) ? MAD : N1));
53360b57cec5SDimitry Andric }
53370fca6ea1SDimitry Andric
53380fca6ea1SDimitry Andric return SDValue();
53390fca6ea1SDimitry Andric }
53400fca6ea1SDimitry Andric
53410fca6ea1SDimitry Andric static SDValue
PerformFADDCombineWithOperands(SDNode * N,SDValue N0,SDValue N1,TargetLowering::DAGCombinerInfo & DCI,CodeGenOptLevel OptLevel)53420fca6ea1SDimitry Andric PerformFADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
53430fca6ea1SDimitry Andric TargetLowering::DAGCombinerInfo &DCI,
53440fca6ea1SDimitry Andric CodeGenOptLevel OptLevel) {
53450fca6ea1SDimitry Andric EVT VT = N0.getValueType();
53460fca6ea1SDimitry Andric if (N0.getOpcode() == ISD::FMUL) {
53470b57cec5SDimitry Andric const auto *TLI = static_cast<const NVPTXTargetLowering *>(
53480fca6ea1SDimitry Andric &DCI.DAG.getTargetLoweringInfo());
53490fca6ea1SDimitry Andric if (!TLI->allowFMA(DCI.DAG.getMachineFunction(), OptLevel))
53500b57cec5SDimitry Andric return SDValue();
53510b57cec5SDimitry Andric
53520b57cec5SDimitry Andric // For floating point:
53530b57cec5SDimitry Andric // Do the fusion only when the mul has less than 5 uses and all
53540b57cec5SDimitry Andric // are add.
53550b57cec5SDimitry Andric // The heuristic is that if a use is not an add, then that use
53560b57cec5SDimitry Andric // cannot be fused into fma, therefore mul is still needed anyway.
53570b57cec5SDimitry Andric // If there are more than 4 uses, even if they are all add, fusing
53580b57cec5SDimitry Andric // them will increase register pressue.
53590b57cec5SDimitry Andric //
53600b57cec5SDimitry Andric int numUses = 0;
53610b57cec5SDimitry Andric int nonAddCount = 0;
5362349cc55cSDimitry Andric for (const SDNode *User : N0.getNode()->uses()) {
53630b57cec5SDimitry Andric numUses++;
53640b57cec5SDimitry Andric if (User->getOpcode() != ISD::FADD)
53650b57cec5SDimitry Andric ++nonAddCount;
53660b57cec5SDimitry Andric if (numUses >= 5)
53670b57cec5SDimitry Andric return SDValue();
53680fca6ea1SDimitry Andric }
53690b57cec5SDimitry Andric if (nonAddCount) {
53700b57cec5SDimitry Andric int orderNo = N->getIROrder();
53710b57cec5SDimitry Andric int orderNo2 = N0.getNode()->getIROrder();
53720b57cec5SDimitry Andric // simple heuristics here for considering potential register
53730b57cec5SDimitry Andric // pressure, the logics here is that the differnce are used
53740b57cec5SDimitry Andric // to measure the distance between def and use, the longer distance
53750b57cec5SDimitry Andric // more likely cause register pressure.
53760b57cec5SDimitry Andric if (orderNo - orderNo2 < 500)
53770b57cec5SDimitry Andric return SDValue();
53780b57cec5SDimitry Andric
53790fca6ea1SDimitry Andric // Now, check if at least one of the FMUL's operands is live beyond the
53800fca6ea1SDimitry Andric // node N, which guarantees that the FMA will not increase register
53810fca6ea1SDimitry Andric // pressure at node N.
53820b57cec5SDimitry Andric bool opIsLive = false;
53830b57cec5SDimitry Andric const SDNode *left = N0.getOperand(0).getNode();
53840b57cec5SDimitry Andric const SDNode *right = N0.getOperand(1).getNode();
53850b57cec5SDimitry Andric
53860b57cec5SDimitry Andric if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
53870b57cec5SDimitry Andric opIsLive = true;
53880b57cec5SDimitry Andric
53890b57cec5SDimitry Andric if (!opIsLive)
5390349cc55cSDimitry Andric for (const SDNode *User : left->uses()) {
53910b57cec5SDimitry Andric int orderNo3 = User->getIROrder();
53920b57cec5SDimitry Andric if (orderNo3 > orderNo) {
53930b57cec5SDimitry Andric opIsLive = true;
53940b57cec5SDimitry Andric break;
53950b57cec5SDimitry Andric }
53960b57cec5SDimitry Andric }
53970b57cec5SDimitry Andric
53980b57cec5SDimitry Andric if (!opIsLive)
5399349cc55cSDimitry Andric for (const SDNode *User : right->uses()) {
54000b57cec5SDimitry Andric int orderNo3 = User->getIROrder();
54010b57cec5SDimitry Andric if (orderNo3 > orderNo) {
54020b57cec5SDimitry Andric opIsLive = true;
54030b57cec5SDimitry Andric break;
54040b57cec5SDimitry Andric }
54050b57cec5SDimitry Andric }
54060b57cec5SDimitry Andric
54070b57cec5SDimitry Andric if (!opIsLive)
54080b57cec5SDimitry Andric return SDValue();
54090b57cec5SDimitry Andric }
54100b57cec5SDimitry Andric
54110fca6ea1SDimitry Andric return DCI.DAG.getNode(ISD::FMA, SDLoc(N), VT, N0.getOperand(0),
54120fca6ea1SDimitry Andric N0.getOperand(1), N1);
54130b57cec5SDimitry Andric }
54140b57cec5SDimitry Andric
54150b57cec5SDimitry Andric return SDValue();
54160b57cec5SDimitry Andric }
54170b57cec5SDimitry Andric
PerformStoreCombineHelper(SDNode * N,std::size_t Front,std::size_t Back)54180fca6ea1SDimitry Andric static SDValue PerformStoreCombineHelper(SDNode *N, std::size_t Front,
54190fca6ea1SDimitry Andric std::size_t Back) {
54200fca6ea1SDimitry Andric if (all_of(N->ops().drop_front(Front).drop_back(Back),
54210fca6ea1SDimitry Andric [](const SDUse &U) { return U.get()->isUndef(); }))
54220fca6ea1SDimitry Andric // Operand 0 is the previous value in the chain. Cannot return EntryToken
54230fca6ea1SDimitry Andric // as the previous value will become unused and eliminated later.
54240fca6ea1SDimitry Andric return N->getOperand(0);
54250fca6ea1SDimitry Andric
54260fca6ea1SDimitry Andric return SDValue();
54270fca6ea1SDimitry Andric }
54280fca6ea1SDimitry Andric
PerformStoreParamCombine(SDNode * N)54290fca6ea1SDimitry Andric static SDValue PerformStoreParamCombine(SDNode *N) {
54300fca6ea1SDimitry Andric // Operands from the 3rd to the 2nd last one are the values to be stored.
54310fca6ea1SDimitry Andric // {Chain, ArgID, Offset, Val, Glue}
54320fca6ea1SDimitry Andric return PerformStoreCombineHelper(N, 3, 1);
54330fca6ea1SDimitry Andric }
54340fca6ea1SDimitry Andric
PerformStoreRetvalCombine(SDNode * N)543581ad6265SDimitry Andric static SDValue PerformStoreRetvalCombine(SDNode *N) {
543681ad6265SDimitry Andric // Operands from the 2nd to the last one are the values to be stored
54370fca6ea1SDimitry Andric return PerformStoreCombineHelper(N, 2, 0);
543881ad6265SDimitry Andric }
543981ad6265SDimitry Andric
54400b57cec5SDimitry Andric /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
54410b57cec5SDimitry Andric ///
PerformADDCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOptLevel OptLevel)54420b57cec5SDimitry Andric static SDValue PerformADDCombine(SDNode *N,
54430b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI,
54440fca6ea1SDimitry Andric CodeGenOptLevel OptLevel) {
54450fca6ea1SDimitry Andric if (OptLevel == CodeGenOptLevel::None)
54460fca6ea1SDimitry Andric return SDValue();
54470fca6ea1SDimitry Andric
54480fca6ea1SDimitry Andric SDValue N0 = N->getOperand(0);
54490fca6ea1SDimitry Andric SDValue N1 = N->getOperand(1);
54500fca6ea1SDimitry Andric
54510fca6ea1SDimitry Andric // Skip non-integer, non-scalar case
54520fca6ea1SDimitry Andric EVT VT = N0.getValueType();
54530fca6ea1SDimitry Andric if (VT.isVector() || VT != MVT::i32)
54540fca6ea1SDimitry Andric return SDValue();
54550fca6ea1SDimitry Andric
54560fca6ea1SDimitry Andric // First try with the default operand order.
54570fca6ea1SDimitry Andric if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI))
54580fca6ea1SDimitry Andric return Result;
54590fca6ea1SDimitry Andric
54600fca6ea1SDimitry Andric // If that didn't work, try again with the operands commuted.
54610fca6ea1SDimitry Andric return PerformADDCombineWithOperands(N, N1, N0, DCI);
54620fca6ea1SDimitry Andric }
54630fca6ea1SDimitry Andric
54640fca6ea1SDimitry Andric /// PerformFADDCombine - Target-specific dag combine xforms for ISD::FADD.
54650fca6ea1SDimitry Andric ///
PerformFADDCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOptLevel OptLevel)54660fca6ea1SDimitry Andric static SDValue PerformFADDCombine(SDNode *N,
54670fca6ea1SDimitry Andric TargetLowering::DAGCombinerInfo &DCI,
54685f757f3fSDimitry Andric CodeGenOptLevel OptLevel) {
54690b57cec5SDimitry Andric SDValue N0 = N->getOperand(0);
54700b57cec5SDimitry Andric SDValue N1 = N->getOperand(1);
54710b57cec5SDimitry Andric
54720fca6ea1SDimitry Andric EVT VT = N0.getValueType();
54730fca6ea1SDimitry Andric if (VT.isVector() || !(VT == MVT::f32 || VT == MVT::f64))
54740fca6ea1SDimitry Andric return SDValue();
54750fca6ea1SDimitry Andric
54760b57cec5SDimitry Andric // First try with the default operand order.
54770fca6ea1SDimitry Andric if (SDValue Result = PerformFADDCombineWithOperands(N, N0, N1, DCI, OptLevel))
54780b57cec5SDimitry Andric return Result;
54790b57cec5SDimitry Andric
54800b57cec5SDimitry Andric // If that didn't work, try again with the operands commuted.
54810fca6ea1SDimitry Andric return PerformFADDCombineWithOperands(N, N1, N0, DCI, OptLevel);
54820b57cec5SDimitry Andric }
54830b57cec5SDimitry Andric
PerformANDCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)54840b57cec5SDimitry Andric static SDValue PerformANDCombine(SDNode *N,
54850b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) {
54860b57cec5SDimitry Andric // The type legalizer turns a vector load of i8 values into a zextload to i16
54870b57cec5SDimitry Andric // registers, optionally ANY_EXTENDs it (if target type is integer),
54880b57cec5SDimitry Andric // and ANDs off the high 8 bits. Since we turn this load into a
54890b57cec5SDimitry Andric // target-specific DAG node, the DAG combiner fails to eliminate these AND
54900b57cec5SDimitry Andric // nodes. Do that here.
54910b57cec5SDimitry Andric SDValue Val = N->getOperand(0);
54920b57cec5SDimitry Andric SDValue Mask = N->getOperand(1);
54930b57cec5SDimitry Andric
54940b57cec5SDimitry Andric if (isa<ConstantSDNode>(Val)) {
54950b57cec5SDimitry Andric std::swap(Val, Mask);
54960b57cec5SDimitry Andric }
54970b57cec5SDimitry Andric
54980b57cec5SDimitry Andric SDValue AExt;
54995f757f3fSDimitry Andric
55005f757f3fSDimitry Andric // Convert BFE-> truncate i16 -> and 255
55015f757f3fSDimitry Andric // To just BFE-> truncate i16, as the value already has all the bits in the
55025f757f3fSDimitry Andric // right places.
55035f757f3fSDimitry Andric if (Val.getOpcode() == ISD::TRUNCATE) {
55045f757f3fSDimitry Andric SDValue BFE = Val.getOperand(0);
55055f757f3fSDimitry Andric if (BFE.getOpcode() != NVPTXISD::BFE)
55065f757f3fSDimitry Andric return SDValue();
55075f757f3fSDimitry Andric
55085f757f3fSDimitry Andric ConstantSDNode *BFEBits = dyn_cast<ConstantSDNode>(BFE.getOperand(0));
55095f757f3fSDimitry Andric if (!BFEBits)
55105f757f3fSDimitry Andric return SDValue();
55115f757f3fSDimitry Andric uint64_t BFEBitsVal = BFEBits->getZExtValue();
55125f757f3fSDimitry Andric
55135f757f3fSDimitry Andric ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
55145f757f3fSDimitry Andric if (!MaskCnst) {
55155f757f3fSDimitry Andric // Not an AND with a constant
55165f757f3fSDimitry Andric return SDValue();
55175f757f3fSDimitry Andric }
55185f757f3fSDimitry Andric uint64_t MaskVal = MaskCnst->getZExtValue();
55195f757f3fSDimitry Andric
55205f757f3fSDimitry Andric if (MaskVal != (uint64_t(1) << BFEBitsVal) - 1)
55215f757f3fSDimitry Andric return SDValue();
55225f757f3fSDimitry Andric // If we get here, the AND is unnecessary. Just replace it with the trunc
55235f757f3fSDimitry Andric DCI.CombineTo(N, Val, false);
55245f757f3fSDimitry Andric }
55250b57cec5SDimitry Andric // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
55260b57cec5SDimitry Andric if (Val.getOpcode() == ISD::ANY_EXTEND) {
55270b57cec5SDimitry Andric AExt = Val;
55280b57cec5SDimitry Andric Val = Val->getOperand(0);
55290b57cec5SDimitry Andric }
55300b57cec5SDimitry Andric
55310b57cec5SDimitry Andric if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
55320b57cec5SDimitry Andric Val = Val->getOperand(0);
55330b57cec5SDimitry Andric }
55340b57cec5SDimitry Andric
55350b57cec5SDimitry Andric if (Val->getOpcode() == NVPTXISD::LoadV2 ||
55360b57cec5SDimitry Andric Val->getOpcode() == NVPTXISD::LoadV4) {
55370b57cec5SDimitry Andric ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
55380b57cec5SDimitry Andric if (!MaskCnst) {
55390b57cec5SDimitry Andric // Not an AND with a constant
55400b57cec5SDimitry Andric return SDValue();
55410b57cec5SDimitry Andric }
55420b57cec5SDimitry Andric
55430b57cec5SDimitry Andric uint64_t MaskVal = MaskCnst->getZExtValue();
55440b57cec5SDimitry Andric if (MaskVal != 0xff) {
55450b57cec5SDimitry Andric // Not an AND that chops off top 8 bits
55460b57cec5SDimitry Andric return SDValue();
55470b57cec5SDimitry Andric }
55480b57cec5SDimitry Andric
55490b57cec5SDimitry Andric MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
55500b57cec5SDimitry Andric if (!Mem) {
55510b57cec5SDimitry Andric // Not a MemSDNode?!?
55520b57cec5SDimitry Andric return SDValue();
55530b57cec5SDimitry Andric }
55540b57cec5SDimitry Andric
55550b57cec5SDimitry Andric EVT MemVT = Mem->getMemoryVT();
55560b57cec5SDimitry Andric if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
55570b57cec5SDimitry Andric // We only handle the i8 case
55580b57cec5SDimitry Andric return SDValue();
55590b57cec5SDimitry Andric }
55600b57cec5SDimitry Andric
55617a6dacacSDimitry Andric unsigned ExtType = Val->getConstantOperandVal(Val->getNumOperands() - 1);
55620b57cec5SDimitry Andric if (ExtType == ISD::SEXTLOAD) {
55630b57cec5SDimitry Andric // If for some reason the load is a sextload, the and is needed to zero
55640b57cec5SDimitry Andric // out the high 8 bits
55650b57cec5SDimitry Andric return SDValue();
55660b57cec5SDimitry Andric }
55670b57cec5SDimitry Andric
55680b57cec5SDimitry Andric bool AddTo = false;
55690b57cec5SDimitry Andric if (AExt.getNode() != nullptr) {
55700b57cec5SDimitry Andric // Re-insert the ext as a zext.
55710b57cec5SDimitry Andric Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
55720b57cec5SDimitry Andric AExt.getValueType(), Val);
55730b57cec5SDimitry Andric AddTo = true;
55740b57cec5SDimitry Andric }
55750b57cec5SDimitry Andric
55760b57cec5SDimitry Andric // If we get here, the AND is unnecessary. Just replace it with the load
55770b57cec5SDimitry Andric DCI.CombineTo(N, Val, AddTo);
55780b57cec5SDimitry Andric }
55790b57cec5SDimitry Andric
55800b57cec5SDimitry Andric return SDValue();
55810b57cec5SDimitry Andric }
55820b57cec5SDimitry Andric
PerformREMCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOptLevel OptLevel)55830b57cec5SDimitry Andric static SDValue PerformREMCombine(SDNode *N,
55840b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI,
55855f757f3fSDimitry Andric CodeGenOptLevel OptLevel) {
55860b57cec5SDimitry Andric assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM);
55870b57cec5SDimitry Andric
55880b57cec5SDimitry Andric // Don't do anything at less than -O2.
55895f757f3fSDimitry Andric if (OptLevel < CodeGenOptLevel::Default)
55900b57cec5SDimitry Andric return SDValue();
55910b57cec5SDimitry Andric
55920b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
55930b57cec5SDimitry Andric SDLoc DL(N);
55940b57cec5SDimitry Andric EVT VT = N->getValueType(0);
55950b57cec5SDimitry Andric bool IsSigned = N->getOpcode() == ISD::SREM;
55960b57cec5SDimitry Andric unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV;
55970b57cec5SDimitry Andric
55980b57cec5SDimitry Andric const SDValue &Num = N->getOperand(0);
55990b57cec5SDimitry Andric const SDValue &Den = N->getOperand(1);
56000b57cec5SDimitry Andric
56010b57cec5SDimitry Andric for (const SDNode *U : Num->uses()) {
56020b57cec5SDimitry Andric if (U->getOpcode() == DivOpc && U->getOperand(0) == Num &&
56030b57cec5SDimitry Andric U->getOperand(1) == Den) {
56040b57cec5SDimitry Andric // Num % Den -> Num - (Num / Den) * Den
56050b57cec5SDimitry Andric return DAG.getNode(ISD::SUB, DL, VT, Num,
56060b57cec5SDimitry Andric DAG.getNode(ISD::MUL, DL, VT,
56070b57cec5SDimitry Andric DAG.getNode(DivOpc, DL, VT, Num, Den),
56080b57cec5SDimitry Andric Den));
56090b57cec5SDimitry Andric }
56100b57cec5SDimitry Andric }
56110b57cec5SDimitry Andric return SDValue();
56120b57cec5SDimitry Andric }
56130b57cec5SDimitry Andric
56140b57cec5SDimitry Andric enum OperandSignedness {
56150b57cec5SDimitry Andric Signed = 0,
56160b57cec5SDimitry Andric Unsigned,
56170b57cec5SDimitry Andric Unknown
56180b57cec5SDimitry Andric };
56190b57cec5SDimitry Andric
56200b57cec5SDimitry Andric /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
56210b57cec5SDimitry Andric /// that can be demoted to \p OptSize bits without loss of information. The
56220b57cec5SDimitry Andric /// signedness of the operand, if determinable, is placed in \p S.
IsMulWideOperandDemotable(SDValue Op,unsigned OptSize,OperandSignedness & S)56230b57cec5SDimitry Andric static bool IsMulWideOperandDemotable(SDValue Op,
56240b57cec5SDimitry Andric unsigned OptSize,
56250b57cec5SDimitry Andric OperandSignedness &S) {
56260b57cec5SDimitry Andric S = Unknown;
56270b57cec5SDimitry Andric
56280b57cec5SDimitry Andric if (Op.getOpcode() == ISD::SIGN_EXTEND ||
56290b57cec5SDimitry Andric Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
56300b57cec5SDimitry Andric EVT OrigVT = Op.getOperand(0).getValueType();
5631e8d8bef9SDimitry Andric if (OrigVT.getFixedSizeInBits() <= OptSize) {
56320b57cec5SDimitry Andric S = Signed;
56330b57cec5SDimitry Andric return true;
56340b57cec5SDimitry Andric }
56350b57cec5SDimitry Andric } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
56360b57cec5SDimitry Andric EVT OrigVT = Op.getOperand(0).getValueType();
5637e8d8bef9SDimitry Andric if (OrigVT.getFixedSizeInBits() <= OptSize) {
56380b57cec5SDimitry Andric S = Unsigned;
56390b57cec5SDimitry Andric return true;
56400b57cec5SDimitry Andric }
56410b57cec5SDimitry Andric }
56420b57cec5SDimitry Andric
56430b57cec5SDimitry Andric return false;
56440b57cec5SDimitry Andric }
56450b57cec5SDimitry Andric
56460b57cec5SDimitry Andric /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
56470b57cec5SDimitry Andric /// be demoted to \p OptSize bits without loss of information. If the operands
56480b57cec5SDimitry Andric /// contain a constant, it should appear as the RHS operand. The signedness of
56490b57cec5SDimitry Andric /// the operands is placed in \p IsSigned.
AreMulWideOperandsDemotable(SDValue LHS,SDValue RHS,unsigned OptSize,bool & IsSigned)56500b57cec5SDimitry Andric static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
56510b57cec5SDimitry Andric unsigned OptSize,
56520b57cec5SDimitry Andric bool &IsSigned) {
56530b57cec5SDimitry Andric OperandSignedness LHSSign;
56540b57cec5SDimitry Andric
56550b57cec5SDimitry Andric // The LHS operand must be a demotable op
56560b57cec5SDimitry Andric if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
56570b57cec5SDimitry Andric return false;
56580b57cec5SDimitry Andric
56590b57cec5SDimitry Andric // We should have been able to determine the signedness from the LHS
56600b57cec5SDimitry Andric if (LHSSign == Unknown)
56610b57cec5SDimitry Andric return false;
56620b57cec5SDimitry Andric
56630b57cec5SDimitry Andric IsSigned = (LHSSign == Signed);
56640b57cec5SDimitry Andric
56650b57cec5SDimitry Andric // The RHS can be a demotable op or a constant
56660b57cec5SDimitry Andric if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
56670b57cec5SDimitry Andric const APInt &Val = CI->getAPIntValue();
56680b57cec5SDimitry Andric if (LHSSign == Unsigned) {
56690b57cec5SDimitry Andric return Val.isIntN(OptSize);
56700b57cec5SDimitry Andric } else {
56710b57cec5SDimitry Andric return Val.isSignedIntN(OptSize);
56720b57cec5SDimitry Andric }
56730b57cec5SDimitry Andric } else {
56740b57cec5SDimitry Andric OperandSignedness RHSSign;
56750b57cec5SDimitry Andric if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
56760b57cec5SDimitry Andric return false;
56770b57cec5SDimitry Andric
56780b57cec5SDimitry Andric return LHSSign == RHSSign;
56790b57cec5SDimitry Andric }
56800b57cec5SDimitry Andric }
56810b57cec5SDimitry Andric
56820b57cec5SDimitry Andric /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
56830b57cec5SDimitry Andric /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
56840b57cec5SDimitry Andric /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
56850b57cec5SDimitry Andric /// amount.
TryMULWIDECombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)56860b57cec5SDimitry Andric static SDValue TryMULWIDECombine(SDNode *N,
56870b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) {
56880b57cec5SDimitry Andric EVT MulType = N->getValueType(0);
56890b57cec5SDimitry Andric if (MulType != MVT::i32 && MulType != MVT::i64) {
56900b57cec5SDimitry Andric return SDValue();
56910b57cec5SDimitry Andric }
56920b57cec5SDimitry Andric
56930b57cec5SDimitry Andric SDLoc DL(N);
56940b57cec5SDimitry Andric unsigned OptSize = MulType.getSizeInBits() >> 1;
56950b57cec5SDimitry Andric SDValue LHS = N->getOperand(0);
56960b57cec5SDimitry Andric SDValue RHS = N->getOperand(1);
56970b57cec5SDimitry Andric
56980b57cec5SDimitry Andric // Canonicalize the multiply so the constant (if any) is on the right
56990b57cec5SDimitry Andric if (N->getOpcode() == ISD::MUL) {
57000b57cec5SDimitry Andric if (isa<ConstantSDNode>(LHS)) {
57010b57cec5SDimitry Andric std::swap(LHS, RHS);
57020b57cec5SDimitry Andric }
57030b57cec5SDimitry Andric }
57040b57cec5SDimitry Andric
57050b57cec5SDimitry Andric // If we have a SHL, determine the actual multiply amount
57060b57cec5SDimitry Andric if (N->getOpcode() == ISD::SHL) {
57070b57cec5SDimitry Andric ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
57080b57cec5SDimitry Andric if (!ShlRHS) {
57090b57cec5SDimitry Andric return SDValue();
57100b57cec5SDimitry Andric }
57110b57cec5SDimitry Andric
57120b57cec5SDimitry Andric APInt ShiftAmt = ShlRHS->getAPIntValue();
57130b57cec5SDimitry Andric unsigned BitWidth = MulType.getSizeInBits();
57140b57cec5SDimitry Andric if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
57150b57cec5SDimitry Andric APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
57160b57cec5SDimitry Andric RHS = DCI.DAG.getConstant(MulVal, DL, MulType);
57170b57cec5SDimitry Andric } else {
57180b57cec5SDimitry Andric return SDValue();
57190b57cec5SDimitry Andric }
57200b57cec5SDimitry Andric }
57210b57cec5SDimitry Andric
57220b57cec5SDimitry Andric bool Signed;
57230b57cec5SDimitry Andric // Verify that our operands are demotable
57240b57cec5SDimitry Andric if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
57250b57cec5SDimitry Andric return SDValue();
57260b57cec5SDimitry Andric }
57270b57cec5SDimitry Andric
57280b57cec5SDimitry Andric EVT DemotedVT;
57290b57cec5SDimitry Andric if (MulType == MVT::i32) {
57300b57cec5SDimitry Andric DemotedVT = MVT::i16;
57310b57cec5SDimitry Andric } else {
57320b57cec5SDimitry Andric DemotedVT = MVT::i32;
57330b57cec5SDimitry Andric }
57340b57cec5SDimitry Andric
57350b57cec5SDimitry Andric // Truncate the operands to the correct size. Note that these are just for
57360b57cec5SDimitry Andric // type consistency and will (likely) be eliminated in later phases.
57370b57cec5SDimitry Andric SDValue TruncLHS =
57380b57cec5SDimitry Andric DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS);
57390b57cec5SDimitry Andric SDValue TruncRHS =
57400b57cec5SDimitry Andric DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS);
57410b57cec5SDimitry Andric
57420b57cec5SDimitry Andric unsigned Opc;
57430b57cec5SDimitry Andric if (Signed) {
57440b57cec5SDimitry Andric Opc = NVPTXISD::MUL_WIDE_SIGNED;
57450b57cec5SDimitry Andric } else {
57460b57cec5SDimitry Andric Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
57470b57cec5SDimitry Andric }
57480b57cec5SDimitry Andric
57490b57cec5SDimitry Andric return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
57500b57cec5SDimitry Andric }
57510b57cec5SDimitry Andric
isConstOne(const SDValue & Operand)57520fca6ea1SDimitry Andric static bool isConstOne(const SDValue &Operand) {
57530fca6ea1SDimitry Andric const auto *Const = dyn_cast<ConstantSDNode>(Operand);
57540fca6ea1SDimitry Andric return Const && Const->getZExtValue() == 1;
57550fca6ea1SDimitry Andric }
57560fca6ea1SDimitry Andric
matchMADConstOnePattern(SDValue Add)57570fca6ea1SDimitry Andric static SDValue matchMADConstOnePattern(SDValue Add) {
57580fca6ea1SDimitry Andric if (Add->getOpcode() != ISD::ADD)
57590fca6ea1SDimitry Andric return SDValue();
57600fca6ea1SDimitry Andric
57610fca6ea1SDimitry Andric if (isConstOne(Add->getOperand(0)))
57620fca6ea1SDimitry Andric return Add->getOperand(1);
57630fca6ea1SDimitry Andric
57640fca6ea1SDimitry Andric if (isConstOne(Add->getOperand(1)))
57650fca6ea1SDimitry Andric return Add->getOperand(0);
57660fca6ea1SDimitry Andric
57670fca6ea1SDimitry Andric return SDValue();
57680fca6ea1SDimitry Andric }
57690fca6ea1SDimitry Andric
combineMADConstOne(SDValue X,SDValue Add,EVT VT,SDLoc DL,TargetLowering::DAGCombinerInfo & DCI)57700fca6ea1SDimitry Andric static SDValue combineMADConstOne(SDValue X, SDValue Add, EVT VT, SDLoc DL,
57710fca6ea1SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) {
57720fca6ea1SDimitry Andric
57730fca6ea1SDimitry Andric if (SDValue Y = matchMADConstOnePattern(Add))
57740fca6ea1SDimitry Andric return DCI.DAG.getNode(NVPTXISD::IMAD, DL, VT, X, Y, X);
57750fca6ea1SDimitry Andric
57760fca6ea1SDimitry Andric return SDValue();
57770fca6ea1SDimitry Andric }
57780fca6ea1SDimitry Andric
combineMulSelectConstOne(SDValue X,SDValue Select,EVT VT,SDLoc DL,TargetLowering::DAGCombinerInfo & DCI)57790fca6ea1SDimitry Andric static SDValue combineMulSelectConstOne(SDValue X, SDValue Select, EVT VT,
57800fca6ea1SDimitry Andric SDLoc DL,
57810fca6ea1SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) {
57820fca6ea1SDimitry Andric if (Select->getOpcode() != ISD::SELECT)
57830fca6ea1SDimitry Andric return SDValue();
57840fca6ea1SDimitry Andric
57850fca6ea1SDimitry Andric SDValue Cond = Select->getOperand(0);
57860fca6ea1SDimitry Andric
57870fca6ea1SDimitry Andric unsigned ConstOpNo;
57880fca6ea1SDimitry Andric if (isConstOne(Select->getOperand(1)))
57890fca6ea1SDimitry Andric ConstOpNo = 1;
57900fca6ea1SDimitry Andric else if (isConstOne(Select->getOperand(2)))
57910fca6ea1SDimitry Andric ConstOpNo = 2;
57920fca6ea1SDimitry Andric else
57930fca6ea1SDimitry Andric return SDValue();
57940fca6ea1SDimitry Andric
57950fca6ea1SDimitry Andric SDValue Y = Select->getOperand((ConstOpNo == 1) ? 2 : 1);
57960fca6ea1SDimitry Andric
57970fca6ea1SDimitry Andric // Do not combine if the resulting sequence is not obviously profitable.
57980fca6ea1SDimitry Andric if (!matchMADConstOnePattern(Y))
57990fca6ea1SDimitry Andric return SDValue();
58000fca6ea1SDimitry Andric
58010fca6ea1SDimitry Andric SDValue NewMul = DCI.DAG.getNode(ISD::MUL, DL, VT, X, Y);
58020fca6ea1SDimitry Andric
58030fca6ea1SDimitry Andric return DCI.DAG.getNode(ISD::SELECT, DL, VT, Cond,
58040fca6ea1SDimitry Andric (ConstOpNo == 1) ? X : NewMul,
58050fca6ea1SDimitry Andric (ConstOpNo == 1) ? NewMul : X);
58060fca6ea1SDimitry Andric }
58070fca6ea1SDimitry Andric
58080fca6ea1SDimitry Andric static SDValue
PerformMULCombineWithOperands(SDNode * N,SDValue N0,SDValue N1,TargetLowering::DAGCombinerInfo & DCI)58090fca6ea1SDimitry Andric PerformMULCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
58100fca6ea1SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) {
58110fca6ea1SDimitry Andric
58120fca6ea1SDimitry Andric EVT VT = N0.getValueType();
58130fca6ea1SDimitry Andric if (VT.isVector())
58140fca6ea1SDimitry Andric return SDValue();
58150fca6ea1SDimitry Andric
58160fca6ea1SDimitry Andric if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
58170fca6ea1SDimitry Andric return SDValue();
58180fca6ea1SDimitry Andric
58190fca6ea1SDimitry Andric SDLoc DL(N);
58200fca6ea1SDimitry Andric
58210fca6ea1SDimitry Andric // (mul x, (add y, 1)) -> (mad x, y, x)
58220fca6ea1SDimitry Andric if (SDValue Res = combineMADConstOne(N0, N1, VT, DL, DCI))
58230fca6ea1SDimitry Andric return Res;
58240fca6ea1SDimitry Andric if (SDValue Res = combineMADConstOne(N1, N0, VT, DL, DCI))
58250fca6ea1SDimitry Andric return Res;
58260fca6ea1SDimitry Andric
58270fca6ea1SDimitry Andric // (mul x, (select y, 1)) -> (select (mul x, y), x)
58280fca6ea1SDimitry Andric if (SDValue Res = combineMulSelectConstOne(N0, N1, VT, DL, DCI))
58290fca6ea1SDimitry Andric return Res;
58300fca6ea1SDimitry Andric if (SDValue Res = combineMulSelectConstOne(N1, N0, VT, DL, DCI))
58310fca6ea1SDimitry Andric return Res;
58320fca6ea1SDimitry Andric
58330fca6ea1SDimitry Andric return SDValue();
58340fca6ea1SDimitry Andric }
58350fca6ea1SDimitry Andric
58360b57cec5SDimitry Andric /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
PerformMULCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOptLevel OptLevel)58370b57cec5SDimitry Andric static SDValue PerformMULCombine(SDNode *N,
58380b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI,
58395f757f3fSDimitry Andric CodeGenOptLevel OptLevel) {
58400fca6ea1SDimitry Andric if (OptLevel == CodeGenOptLevel::None)
58410fca6ea1SDimitry Andric return SDValue();
58420fca6ea1SDimitry Andric
58430b57cec5SDimitry Andric if (SDValue Ret = TryMULWIDECombine(N, DCI))
58440b57cec5SDimitry Andric return Ret;
58450b57cec5SDimitry Andric
58460fca6ea1SDimitry Andric SDValue N0 = N->getOperand(0);
58470fca6ea1SDimitry Andric SDValue N1 = N->getOperand(1);
58480fca6ea1SDimitry Andric return PerformMULCombineWithOperands(N, N0, N1, DCI);
58490b57cec5SDimitry Andric }
58500b57cec5SDimitry Andric
58510b57cec5SDimitry Andric /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
PerformSHLCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOptLevel OptLevel)58520b57cec5SDimitry Andric static SDValue PerformSHLCombine(SDNode *N,
58530b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI,
58545f757f3fSDimitry Andric CodeGenOptLevel OptLevel) {
58555f757f3fSDimitry Andric if (OptLevel > CodeGenOptLevel::None) {
58560b57cec5SDimitry Andric // Try mul.wide combining at OptLevel > 0
58570b57cec5SDimitry Andric if (SDValue Ret = TryMULWIDECombine(N, DCI))
58580b57cec5SDimitry Andric return Ret;
58590b57cec5SDimitry Andric }
58600b57cec5SDimitry Andric
58610b57cec5SDimitry Andric return SDValue();
58620b57cec5SDimitry Andric }
58630b57cec5SDimitry Andric
PerformSETCCCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,unsigned int SmVersion)58640b57cec5SDimitry Andric static SDValue PerformSETCCCombine(SDNode *N,
58655f757f3fSDimitry Andric TargetLowering::DAGCombinerInfo &DCI,
58665f757f3fSDimitry Andric unsigned int SmVersion) {
58670b57cec5SDimitry Andric EVT CCType = N->getValueType(0);
58680b57cec5SDimitry Andric SDValue A = N->getOperand(0);
58690b57cec5SDimitry Andric SDValue B = N->getOperand(1);
58700b57cec5SDimitry Andric
58715f757f3fSDimitry Andric EVT AType = A.getValueType();
58725f757f3fSDimitry Andric if (!(CCType == MVT::v2i1 && (AType == MVT::v2f16 || AType == MVT::v2bf16)))
58735f757f3fSDimitry Andric return SDValue();
58745f757f3fSDimitry Andric
58755f757f3fSDimitry Andric if (A.getValueType() == MVT::v2bf16 && SmVersion < 90)
58760b57cec5SDimitry Andric return SDValue();
58770b57cec5SDimitry Andric
58780b57cec5SDimitry Andric SDLoc DL(N);
58790b57cec5SDimitry Andric // setp.f16x2 returns two scalar predicates, which we need to
58800b57cec5SDimitry Andric // convert back to v2i1. The returned result will be scalarized by
58810b57cec5SDimitry Andric // the legalizer, but the comparison will remain a single vector
58820b57cec5SDimitry Andric // instruction.
58835f757f3fSDimitry Andric SDValue CCNode = DCI.DAG.getNode(
58845f757f3fSDimitry Andric A.getValueType() == MVT::v2f16 ? NVPTXISD::SETP_F16X2
58855f757f3fSDimitry Andric : NVPTXISD::SETP_BF16X2,
58865f757f3fSDimitry Andric DL, DCI.DAG.getVTList(MVT::i1, MVT::i1), {A, B, N->getOperand(2)});
58870b57cec5SDimitry Andric return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0),
58880b57cec5SDimitry Andric CCNode.getValue(1));
58890b57cec5SDimitry Andric }
58900b57cec5SDimitry Andric
PerformEXTRACTCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)58915f757f3fSDimitry Andric static SDValue PerformEXTRACTCombine(SDNode *N,
58925f757f3fSDimitry Andric TargetLowering::DAGCombinerInfo &DCI) {
58935f757f3fSDimitry Andric SDValue Vector = N->getOperand(0);
58945f757f3fSDimitry Andric SDLoc DL(N);
58955f757f3fSDimitry Andric EVT VectorVT = Vector.getValueType();
58965f757f3fSDimitry Andric if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() &&
58975f757f3fSDimitry Andric IsPTXVectorType(VectorVT.getSimpleVT()))
58985f757f3fSDimitry Andric return SDValue(); // Native vector loads already combine nicely w/
58990fca6ea1SDimitry Andric // extract_vector_elt.
59000fca6ea1SDimitry Andric // Don't mess with singletons or v2*16, v4i8 and v8i8 types, we already
59010fca6ea1SDimitry Andric // handle them OK.
59025f757f3fSDimitry Andric if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT) ||
59030fca6ea1SDimitry Andric VectorVT == MVT::v4i8 || VectorVT == MVT::v8i8)
59040fca6ea1SDimitry Andric return SDValue();
59050fca6ea1SDimitry Andric
59060fca6ea1SDimitry Andric // Don't mess with undef values as sra may be simplified to 0, not undef.
59070fca6ea1SDimitry Andric if (Vector->isUndef() || ISD::allOperandsUndef(Vector.getNode()))
59085f757f3fSDimitry Andric return SDValue();
59095f757f3fSDimitry Andric
59105f757f3fSDimitry Andric uint64_t VectorBits = VectorVT.getSizeInBits();
59115f757f3fSDimitry Andric // We only handle the types we can extract in-register.
59125f757f3fSDimitry Andric if (!(VectorBits == 16 || VectorBits == 32 || VectorBits == 64))
59135f757f3fSDimitry Andric return SDValue();
59145f757f3fSDimitry Andric
59155f757f3fSDimitry Andric ConstantSDNode *Index = dyn_cast<ConstantSDNode>(N->getOperand(1));
59165f757f3fSDimitry Andric // Index == 0 is handled by generic DAG combiner.
59175f757f3fSDimitry Andric if (!Index || Index->getZExtValue() == 0)
59185f757f3fSDimitry Andric return SDValue();
59195f757f3fSDimitry Andric
59205f757f3fSDimitry Andric MVT IVT = MVT::getIntegerVT(VectorBits);
59215f757f3fSDimitry Andric EVT EltVT = VectorVT.getVectorElementType();
59225f757f3fSDimitry Andric EVT EltIVT = EltVT.changeTypeToInteger();
59235f757f3fSDimitry Andric uint64_t EltBits = EltVT.getScalarSizeInBits();
59245f757f3fSDimitry Andric
59255f757f3fSDimitry Andric SDValue Result = DCI.DAG.getNode(
59265f757f3fSDimitry Andric ISD::TRUNCATE, DL, EltIVT,
59275f757f3fSDimitry Andric DCI.DAG.getNode(
59285f757f3fSDimitry Andric ISD::SRA, DL, IVT, DCI.DAG.getNode(ISD::BITCAST, DL, IVT, Vector),
59295f757f3fSDimitry Andric DCI.DAG.getConstant(Index->getZExtValue() * EltBits, DL, IVT)));
59305f757f3fSDimitry Andric
59315f757f3fSDimitry Andric // If element has non-integer type, bitcast it back to the expected type.
59325f757f3fSDimitry Andric if (EltVT != EltIVT)
59335f757f3fSDimitry Andric Result = DCI.DAG.getNode(ISD::BITCAST, DL, EltVT, Result);
59345f757f3fSDimitry Andric // Past legalizer, we may need to extent i8 -> i16 to match the register type.
59355f757f3fSDimitry Andric if (EltVT != N->getValueType(0))
59365f757f3fSDimitry Andric Result = DCI.DAG.getNode(ISD::ANY_EXTEND, DL, N->getValueType(0), Result);
59375f757f3fSDimitry Andric
59385f757f3fSDimitry Andric return Result;
59395f757f3fSDimitry Andric }
59405f757f3fSDimitry Andric
PerformVSELECTCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)59415f757f3fSDimitry Andric static SDValue PerformVSELECTCombine(SDNode *N,
59425f757f3fSDimitry Andric TargetLowering::DAGCombinerInfo &DCI) {
59435f757f3fSDimitry Andric SDValue VA = N->getOperand(1);
59445f757f3fSDimitry Andric EVT VectorVT = VA.getValueType();
59455f757f3fSDimitry Andric if (VectorVT != MVT::v4i8)
59465f757f3fSDimitry Andric return SDValue();
59475f757f3fSDimitry Andric
59485f757f3fSDimitry Andric // We need to split vselect into individual per-element operations Because we
59495f757f3fSDimitry Andric // use BFE/BFI instruction for byte extraction/insertion, we do end up with
59505f757f3fSDimitry Andric // 32-bit values, so we may as well do comparison as i32 to avoid conversions
59515f757f3fSDimitry Andric // to/from i16 normally used for i8 values.
59525f757f3fSDimitry Andric SmallVector<SDValue, 4> E;
59535f757f3fSDimitry Andric SDLoc DL(N);
59545f757f3fSDimitry Andric SDValue VCond = N->getOperand(0);
59555f757f3fSDimitry Andric SDValue VB = N->getOperand(2);
59565f757f3fSDimitry Andric for (int I = 0; I < 4; ++I) {
59575f757f3fSDimitry Andric SDValue C = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i1, VCond,
59585f757f3fSDimitry Andric DCI.DAG.getConstant(I, DL, MVT::i32));
59595f757f3fSDimitry Andric SDValue EA = DCI.DAG.getAnyExtOrTrunc(
59605f757f3fSDimitry Andric DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VA,
59615f757f3fSDimitry Andric DCI.DAG.getConstant(I, DL, MVT::i32)),
59625f757f3fSDimitry Andric DL, MVT::i32);
59635f757f3fSDimitry Andric SDValue EB = DCI.DAG.getAnyExtOrTrunc(
59645f757f3fSDimitry Andric DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VB,
59655f757f3fSDimitry Andric DCI.DAG.getConstant(I, DL, MVT::i32)),
59665f757f3fSDimitry Andric DL, MVT::i32);
59675f757f3fSDimitry Andric E.push_back(DCI.DAG.getAnyExtOrTrunc(
59685f757f3fSDimitry Andric DCI.DAG.getNode(ISD::SELECT, DL, MVT::i32, C, EA, EB), DL, MVT::i8));
59695f757f3fSDimitry Andric }
59705f757f3fSDimitry Andric return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i8, E);
59715f757f3fSDimitry Andric }
59725f757f3fSDimitry Andric
PerformLOADCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)59735f757f3fSDimitry Andric static SDValue PerformLOADCombine(SDNode *N,
59745f757f3fSDimitry Andric TargetLowering::DAGCombinerInfo &DCI) {
59755f757f3fSDimitry Andric SelectionDAG &DAG = DCI.DAG;
59765f757f3fSDimitry Andric LoadSDNode *LD = cast<LoadSDNode>(N);
59775f757f3fSDimitry Andric
59785f757f3fSDimitry Andric // Lower a v16i8 load into a LoadV4 operation with i32 results instead of
59795f757f3fSDimitry Andric // letting ReplaceLoadVector split it into smaller loads during legalization.
59805f757f3fSDimitry Andric // This is done at dag-combine1 time, so that vector operations with i8
59815f757f3fSDimitry Andric // elements can be optimised away instead of being needlessly split during
59825f757f3fSDimitry Andric // legalization, which involves storing to the stack and loading it back.
59835f757f3fSDimitry Andric EVT VT = N->getValueType(0);
59845f757f3fSDimitry Andric if (VT != MVT::v16i8)
59855f757f3fSDimitry Andric return SDValue();
59865f757f3fSDimitry Andric
59875f757f3fSDimitry Andric SDLoc DL(N);
59885f757f3fSDimitry Andric
59895f757f3fSDimitry Andric // Create a v4i32 vector load operation, effectively <4 x v4i8>.
59905f757f3fSDimitry Andric unsigned Opc = NVPTXISD::LoadV4;
59915f757f3fSDimitry Andric EVT NewVT = MVT::v4i32;
59925f757f3fSDimitry Andric EVT EltVT = NewVT.getVectorElementType();
59935f757f3fSDimitry Andric unsigned NumElts = NewVT.getVectorNumElements();
59945f757f3fSDimitry Andric EVT RetVTs[] = {EltVT, EltVT, EltVT, EltVT, MVT::Other};
59955f757f3fSDimitry Andric SDVTList RetVTList = DAG.getVTList(RetVTs);
59965f757f3fSDimitry Andric SmallVector<SDValue, 8> Ops(N->ops());
59975f757f3fSDimitry Andric Ops.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
59985f757f3fSDimitry Andric SDValue NewLoad = DAG.getMemIntrinsicNode(Opc, DL, RetVTList, Ops, NewVT,
59995f757f3fSDimitry Andric LD->getMemOperand());
60005f757f3fSDimitry Andric SDValue NewChain = NewLoad.getValue(NumElts);
60015f757f3fSDimitry Andric
60025f757f3fSDimitry Andric // Create a vector of the same type returned by the original load.
60035f757f3fSDimitry Andric SmallVector<SDValue, 4> Elts;
60045f757f3fSDimitry Andric for (unsigned i = 0; i < NumElts; i++)
60055f757f3fSDimitry Andric Elts.push_back(NewLoad.getValue(i));
60065f757f3fSDimitry Andric return DCI.DAG.getMergeValues(
60075f757f3fSDimitry Andric {DCI.DAG.getBitcast(VT, DCI.DAG.getBuildVector(NewVT, DL, Elts)),
60085f757f3fSDimitry Andric NewChain},
60095f757f3fSDimitry Andric DL);
60105f757f3fSDimitry Andric }
60115f757f3fSDimitry Andric
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const60120b57cec5SDimitry Andric SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
60130b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
60145f757f3fSDimitry Andric CodeGenOptLevel OptLevel = getTargetMachine().getOptLevel();
60150b57cec5SDimitry Andric switch (N->getOpcode()) {
60160b57cec5SDimitry Andric default: break;
60170b57cec5SDimitry Andric case ISD::ADD:
60180fca6ea1SDimitry Andric return PerformADDCombine(N, DCI, OptLevel);
60190b57cec5SDimitry Andric case ISD::FADD:
60200fca6ea1SDimitry Andric return PerformFADDCombine(N, DCI, OptLevel);
60210b57cec5SDimitry Andric case ISD::MUL:
60220b57cec5SDimitry Andric return PerformMULCombine(N, DCI, OptLevel);
60230b57cec5SDimitry Andric case ISD::SHL:
60240b57cec5SDimitry Andric return PerformSHLCombine(N, DCI, OptLevel);
60250b57cec5SDimitry Andric case ISD::AND:
60260b57cec5SDimitry Andric return PerformANDCombine(N, DCI);
60270b57cec5SDimitry Andric case ISD::UREM:
60280b57cec5SDimitry Andric case ISD::SREM:
60290b57cec5SDimitry Andric return PerformREMCombine(N, DCI, OptLevel);
60300b57cec5SDimitry Andric case ISD::SETCC:
60315f757f3fSDimitry Andric return PerformSETCCCombine(N, DCI, STI.getSmVersion());
60325f757f3fSDimitry Andric case ISD::LOAD:
60335f757f3fSDimitry Andric return PerformLOADCombine(N, DCI);
603481ad6265SDimitry Andric case NVPTXISD::StoreRetval:
603581ad6265SDimitry Andric case NVPTXISD::StoreRetvalV2:
603681ad6265SDimitry Andric case NVPTXISD::StoreRetvalV4:
603781ad6265SDimitry Andric return PerformStoreRetvalCombine(N);
60380fca6ea1SDimitry Andric case NVPTXISD::StoreParam:
60390fca6ea1SDimitry Andric case NVPTXISD::StoreParamV2:
60400fca6ea1SDimitry Andric case NVPTXISD::StoreParamV4:
60410fca6ea1SDimitry Andric return PerformStoreParamCombine(N);
60425f757f3fSDimitry Andric case ISD::EXTRACT_VECTOR_ELT:
60435f757f3fSDimitry Andric return PerformEXTRACTCombine(N, DCI);
60445f757f3fSDimitry Andric case ISD::VSELECT:
60455f757f3fSDimitry Andric return PerformVSELECTCombine(N, DCI);
60460b57cec5SDimitry Andric }
60470b57cec5SDimitry Andric return SDValue();
60480b57cec5SDimitry Andric }
60490b57cec5SDimitry Andric
60500b57cec5SDimitry Andric /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
ReplaceLoadVector(SDNode * N,SelectionDAG & DAG,SmallVectorImpl<SDValue> & Results)60510b57cec5SDimitry Andric static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
60520b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results) {
60530b57cec5SDimitry Andric EVT ResVT = N->getValueType(0);
60540b57cec5SDimitry Andric SDLoc DL(N);
60550b57cec5SDimitry Andric
60560b57cec5SDimitry Andric assert(ResVT.isVector() && "Vector load must have vector type");
60570b57cec5SDimitry Andric
60580b57cec5SDimitry Andric // We only handle "native" vector sizes for now, e.g. <4 x double> is not
60590b57cec5SDimitry Andric // legal. We can (and should) split that into 2 loads of <2 x double> here
60600b57cec5SDimitry Andric // but I'm leaving that as a TODO for now.
60610b57cec5SDimitry Andric assert(ResVT.isSimple() && "Can only handle simple types");
60620b57cec5SDimitry Andric switch (ResVT.getSimpleVT().SimpleTy) {
60630b57cec5SDimitry Andric default:
60640b57cec5SDimitry Andric return;
60650b57cec5SDimitry Andric case MVT::v2i8:
60660b57cec5SDimitry Andric case MVT::v2i16:
60670b57cec5SDimitry Andric case MVT::v2i32:
60680b57cec5SDimitry Andric case MVT::v2i64:
60690b57cec5SDimitry Andric case MVT::v2f16:
60700b57cec5SDimitry Andric case MVT::v2f32:
60710b57cec5SDimitry Andric case MVT::v2f64:
60720b57cec5SDimitry Andric case MVT::v4i8:
60730b57cec5SDimitry Andric case MVT::v4i16:
60740b57cec5SDimitry Andric case MVT::v4i32:
60750b57cec5SDimitry Andric case MVT::v4f16:
60760b57cec5SDimitry Andric case MVT::v4f32:
60770b57cec5SDimitry Andric case MVT::v8f16: // <4 x f16x2>
60785f757f3fSDimitry Andric case MVT::v8bf16: // <4 x bf16x2>
60795f757f3fSDimitry Andric case MVT::v8i16: // <4 x i16x2>
60800b57cec5SDimitry Andric // This is a "native" vector type
60810b57cec5SDimitry Andric break;
60820b57cec5SDimitry Andric }
60830b57cec5SDimitry Andric
60840b57cec5SDimitry Andric LoadSDNode *LD = cast<LoadSDNode>(N);
60850b57cec5SDimitry Andric
60865ffd83dbSDimitry Andric Align Alignment = LD->getAlign();
60870b57cec5SDimitry Andric auto &TD = DAG.getDataLayout();
608806c3fb27SDimitry Andric Align PrefAlign =
608906c3fb27SDimitry Andric TD.getPrefTypeAlign(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
60905ffd83dbSDimitry Andric if (Alignment < PrefAlign) {
60910b57cec5SDimitry Andric // This load is not sufficiently aligned, so bail out and let this vector
60920b57cec5SDimitry Andric // load be scalarized. Note that we may still be able to emit smaller
60930b57cec5SDimitry Andric // vector loads. For example, if we are loading a <4 x float> with an
60940b57cec5SDimitry Andric // alignment of 8, this check will fail but the legalizer will try again
60950b57cec5SDimitry Andric // with 2 x <2 x float>, which will succeed with an alignment of 8.
60960b57cec5SDimitry Andric return;
60970b57cec5SDimitry Andric }
60980b57cec5SDimitry Andric
60990b57cec5SDimitry Andric EVT EltVT = ResVT.getVectorElementType();
61000b57cec5SDimitry Andric unsigned NumElts = ResVT.getVectorNumElements();
61010b57cec5SDimitry Andric
61020b57cec5SDimitry Andric // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
61030b57cec5SDimitry Andric // Therefore, we must ensure the type is legal. For i1 and i8, we set the
61040b57cec5SDimitry Andric // loaded type to i16 and propagate the "real" type as the memory type.
61050b57cec5SDimitry Andric bool NeedTrunc = false;
61060b57cec5SDimitry Andric if (EltVT.getSizeInBits() < 16) {
61070b57cec5SDimitry Andric EltVT = MVT::i16;
61080b57cec5SDimitry Andric NeedTrunc = true;
61090b57cec5SDimitry Andric }
61100b57cec5SDimitry Andric
61110b57cec5SDimitry Andric unsigned Opcode = 0;
61120b57cec5SDimitry Andric SDVTList LdResVTs;
61135f757f3fSDimitry Andric bool Load16x2 = false;
61140b57cec5SDimitry Andric
61150b57cec5SDimitry Andric switch (NumElts) {
61160b57cec5SDimitry Andric default:
61170b57cec5SDimitry Andric return;
61180b57cec5SDimitry Andric case 2:
61190b57cec5SDimitry Andric Opcode = NVPTXISD::LoadV2;
61200b57cec5SDimitry Andric LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
61210b57cec5SDimitry Andric break;
61220b57cec5SDimitry Andric case 4: {
61230b57cec5SDimitry Andric Opcode = NVPTXISD::LoadV4;
61240b57cec5SDimitry Andric EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
61250b57cec5SDimitry Andric LdResVTs = DAG.getVTList(ListVTs);
61260b57cec5SDimitry Andric break;
61270b57cec5SDimitry Andric }
61280b57cec5SDimitry Andric case 8: {
61290b57cec5SDimitry Andric // v8f16 is a special case. PTX doesn't have ld.v8.f16
61300b57cec5SDimitry Andric // instruction. Instead, we split the vector into v2f16 chunks and
61310b57cec5SDimitry Andric // load them with ld.v4.b32.
61325f757f3fSDimitry Andric assert(Is16bitsType(EltVT.getSimpleVT()) && "Unsupported v8 vector type.");
61335f757f3fSDimitry Andric Load16x2 = true;
61340b57cec5SDimitry Andric Opcode = NVPTXISD::LoadV4;
61355f757f3fSDimitry Andric EVT VVT;
61365f757f3fSDimitry Andric switch (EltVT.getSimpleVT().SimpleTy) {
61375f757f3fSDimitry Andric case MVT::f16:
61385f757f3fSDimitry Andric VVT = MVT::v2f16;
61395f757f3fSDimitry Andric break;
61405f757f3fSDimitry Andric case MVT::bf16:
61415f757f3fSDimitry Andric VVT = MVT::v2bf16;
61425f757f3fSDimitry Andric break;
61435f757f3fSDimitry Andric case MVT::i16:
61445f757f3fSDimitry Andric VVT = MVT::v2i16;
61455f757f3fSDimitry Andric break;
61465f757f3fSDimitry Andric default:
61475f757f3fSDimitry Andric llvm_unreachable("Unsupported v8 vector type.");
61485f757f3fSDimitry Andric }
6149bdd1243dSDimitry Andric EVT ListVTs[] = {VVT, VVT, VVT, VVT, MVT::Other};
61500b57cec5SDimitry Andric LdResVTs = DAG.getVTList(ListVTs);
61510b57cec5SDimitry Andric break;
61520b57cec5SDimitry Andric }
61530b57cec5SDimitry Andric }
61540b57cec5SDimitry Andric
61550b57cec5SDimitry Andric // Copy regular operands
61560b57cec5SDimitry Andric SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
61570b57cec5SDimitry Andric
61580b57cec5SDimitry Andric // The select routine does not have access to the LoadSDNode instance, so
61590b57cec5SDimitry Andric // pass along the extension information
61600b57cec5SDimitry Andric OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
61610b57cec5SDimitry Andric
61620b57cec5SDimitry Andric SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
61630b57cec5SDimitry Andric LD->getMemoryVT(),
61640b57cec5SDimitry Andric LD->getMemOperand());
61650b57cec5SDimitry Andric
61660b57cec5SDimitry Andric SmallVector<SDValue, 8> ScalarRes;
61675f757f3fSDimitry Andric if (Load16x2) {
61680b57cec5SDimitry Andric // Split v2f16 subvectors back into individual elements.
61690b57cec5SDimitry Andric NumElts /= 2;
61700b57cec5SDimitry Andric for (unsigned i = 0; i < NumElts; ++i) {
61710b57cec5SDimitry Andric SDValue SubVector = NewLD.getValue(i);
61720b57cec5SDimitry Andric SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
61730b57cec5SDimitry Andric DAG.getIntPtrConstant(0, DL));
61740b57cec5SDimitry Andric SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
61750b57cec5SDimitry Andric DAG.getIntPtrConstant(1, DL));
61760b57cec5SDimitry Andric ScalarRes.push_back(E0);
61770b57cec5SDimitry Andric ScalarRes.push_back(E1);
61780b57cec5SDimitry Andric }
61790b57cec5SDimitry Andric } else {
61800b57cec5SDimitry Andric for (unsigned i = 0; i < NumElts; ++i) {
61810b57cec5SDimitry Andric SDValue Res = NewLD.getValue(i);
61820b57cec5SDimitry Andric if (NeedTrunc)
61830b57cec5SDimitry Andric Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
61840b57cec5SDimitry Andric ScalarRes.push_back(Res);
61850b57cec5SDimitry Andric }
61860b57cec5SDimitry Andric }
61870b57cec5SDimitry Andric
61880b57cec5SDimitry Andric SDValue LoadChain = NewLD.getValue(NumElts);
61890b57cec5SDimitry Andric
61900b57cec5SDimitry Andric SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes);
61910b57cec5SDimitry Andric
61920b57cec5SDimitry Andric Results.push_back(BuildVec);
61930b57cec5SDimitry Andric Results.push_back(LoadChain);
61940b57cec5SDimitry Andric }
61950b57cec5SDimitry Andric
ReplaceINTRINSIC_W_CHAIN(SDNode * N,SelectionDAG & DAG,SmallVectorImpl<SDValue> & Results)61960b57cec5SDimitry Andric static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
61970b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results) {
61980b57cec5SDimitry Andric SDValue Chain = N->getOperand(0);
61990b57cec5SDimitry Andric SDValue Intrin = N->getOperand(1);
62000b57cec5SDimitry Andric SDLoc DL(N);
62010b57cec5SDimitry Andric
62020b57cec5SDimitry Andric // Get the intrinsic ID
62031db9f3b2SDimitry Andric unsigned IntrinNo = Intrin.getNode()->getAsZExtVal();
62040b57cec5SDimitry Andric switch (IntrinNo) {
62050b57cec5SDimitry Andric default:
62060b57cec5SDimitry Andric return;
62070b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_i:
62080b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_f:
62090b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_p:
62100b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_i:
62110b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_f:
62120b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_p: {
62130b57cec5SDimitry Andric EVT ResVT = N->getValueType(0);
62140b57cec5SDimitry Andric
62150b57cec5SDimitry Andric if (ResVT.isVector()) {
62160b57cec5SDimitry Andric // Vector LDG/LDU
62170b57cec5SDimitry Andric
62180b57cec5SDimitry Andric unsigned NumElts = ResVT.getVectorNumElements();
62190b57cec5SDimitry Andric EVT EltVT = ResVT.getVectorElementType();
62200b57cec5SDimitry Andric
62210b57cec5SDimitry Andric // Since LDU/LDG are target nodes, we cannot rely on DAG type
62220b57cec5SDimitry Andric // legalization.
62230b57cec5SDimitry Andric // Therefore, we must ensure the type is legal. For i1 and i8, we set the
62240b57cec5SDimitry Andric // loaded type to i16 and propagate the "real" type as the memory type.
62250b57cec5SDimitry Andric bool NeedTrunc = false;
62260b57cec5SDimitry Andric if (EltVT.getSizeInBits() < 16) {
62270b57cec5SDimitry Andric EltVT = MVT::i16;
62280b57cec5SDimitry Andric NeedTrunc = true;
62290b57cec5SDimitry Andric }
62300b57cec5SDimitry Andric
62310b57cec5SDimitry Andric unsigned Opcode = 0;
62320b57cec5SDimitry Andric SDVTList LdResVTs;
62330b57cec5SDimitry Andric
62340b57cec5SDimitry Andric switch (NumElts) {
62350b57cec5SDimitry Andric default:
62360b57cec5SDimitry Andric return;
62370b57cec5SDimitry Andric case 2:
62380b57cec5SDimitry Andric switch (IntrinNo) {
62390b57cec5SDimitry Andric default:
62400b57cec5SDimitry Andric return;
62410b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_i:
62420b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_f:
62430b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_p:
62440b57cec5SDimitry Andric Opcode = NVPTXISD::LDGV2;
62450b57cec5SDimitry Andric break;
62460b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_i:
62470b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_f:
62480b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_p:
62490b57cec5SDimitry Andric Opcode = NVPTXISD::LDUV2;
62500b57cec5SDimitry Andric break;
62510b57cec5SDimitry Andric }
62520b57cec5SDimitry Andric LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
62530b57cec5SDimitry Andric break;
62540b57cec5SDimitry Andric case 4: {
62550b57cec5SDimitry Andric switch (IntrinNo) {
62560b57cec5SDimitry Andric default:
62570b57cec5SDimitry Andric return;
62580b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_i:
62590b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_f:
62600b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_p:
62610b57cec5SDimitry Andric Opcode = NVPTXISD::LDGV4;
62620b57cec5SDimitry Andric break;
62630b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_i:
62640b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_f:
62650b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_p:
62660b57cec5SDimitry Andric Opcode = NVPTXISD::LDUV4;
62670b57cec5SDimitry Andric break;
62680b57cec5SDimitry Andric }
62690b57cec5SDimitry Andric EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
62700b57cec5SDimitry Andric LdResVTs = DAG.getVTList(ListVTs);
62710b57cec5SDimitry Andric break;
62720b57cec5SDimitry Andric }
62730b57cec5SDimitry Andric }
62740b57cec5SDimitry Andric
62750b57cec5SDimitry Andric SmallVector<SDValue, 8> OtherOps;
62760b57cec5SDimitry Andric
62770b57cec5SDimitry Andric // Copy regular operands
62780b57cec5SDimitry Andric
62790b57cec5SDimitry Andric OtherOps.push_back(Chain); // Chain
62800b57cec5SDimitry Andric // Skip operand 1 (intrinsic ID)
62810b57cec5SDimitry Andric // Others
62820b57cec5SDimitry Andric OtherOps.append(N->op_begin() + 2, N->op_end());
62830b57cec5SDimitry Andric
62840b57cec5SDimitry Andric MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
62850b57cec5SDimitry Andric
62860b57cec5SDimitry Andric SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
62870b57cec5SDimitry Andric MemSD->getMemoryVT(),
62880b57cec5SDimitry Andric MemSD->getMemOperand());
62890b57cec5SDimitry Andric
62900b57cec5SDimitry Andric SmallVector<SDValue, 4> ScalarRes;
62910b57cec5SDimitry Andric
62920b57cec5SDimitry Andric for (unsigned i = 0; i < NumElts; ++i) {
62930b57cec5SDimitry Andric SDValue Res = NewLD.getValue(i);
62940b57cec5SDimitry Andric if (NeedTrunc)
62950b57cec5SDimitry Andric Res =
62960b57cec5SDimitry Andric DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
62970b57cec5SDimitry Andric ScalarRes.push_back(Res);
62980b57cec5SDimitry Andric }
62990b57cec5SDimitry Andric
63000b57cec5SDimitry Andric SDValue LoadChain = NewLD.getValue(NumElts);
63010b57cec5SDimitry Andric
63020b57cec5SDimitry Andric SDValue BuildVec =
63030b57cec5SDimitry Andric DAG.getBuildVector(ResVT, DL, ScalarRes);
63040b57cec5SDimitry Andric
63050b57cec5SDimitry Andric Results.push_back(BuildVec);
63060b57cec5SDimitry Andric Results.push_back(LoadChain);
63070b57cec5SDimitry Andric } else {
63080b57cec5SDimitry Andric // i8 LDG/LDU
63090b57cec5SDimitry Andric assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
63100b57cec5SDimitry Andric "Custom handling of non-i8 ldu/ldg?");
63110b57cec5SDimitry Andric
63120b57cec5SDimitry Andric // Just copy all operands as-is
63130b57cec5SDimitry Andric SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
63140b57cec5SDimitry Andric
63150b57cec5SDimitry Andric // Force output to i16
63160b57cec5SDimitry Andric SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
63170b57cec5SDimitry Andric
63180b57cec5SDimitry Andric MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
63190b57cec5SDimitry Andric
63200b57cec5SDimitry Andric // We make sure the memory type is i8, which will be used during isel
63210b57cec5SDimitry Andric // to select the proper instruction.
63220b57cec5SDimitry Andric SDValue NewLD =
63230b57cec5SDimitry Andric DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
63240b57cec5SDimitry Andric MVT::i8, MemSD->getMemOperand());
63250b57cec5SDimitry Andric
63260b57cec5SDimitry Andric Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
63270b57cec5SDimitry Andric NewLD.getValue(0)));
63280b57cec5SDimitry Andric Results.push_back(NewLD.getValue(1));
63290b57cec5SDimitry Andric }
63300b57cec5SDimitry Andric }
63310b57cec5SDimitry Andric }
63320b57cec5SDimitry Andric }
63330b57cec5SDimitry Andric
ReplaceCopyFromReg_128(SDNode * N,SelectionDAG & DAG,SmallVectorImpl<SDValue> & Results)63340fca6ea1SDimitry Andric static void ReplaceCopyFromReg_128(SDNode *N, SelectionDAG &DAG,
63350fca6ea1SDimitry Andric SmallVectorImpl<SDValue> &Results) {
63360fca6ea1SDimitry Andric // Change the CopyFromReg to output 2 64-bit results instead of a 128-bit
63370fca6ea1SDimitry Andric // result so that it can pass the legalization
63380fca6ea1SDimitry Andric SDLoc DL(N);
63390fca6ea1SDimitry Andric SDValue Chain = N->getOperand(0);
63400fca6ea1SDimitry Andric SDValue Reg = N->getOperand(1);
63410fca6ea1SDimitry Andric SDValue Glue = N->getOperand(2);
63420fca6ea1SDimitry Andric
63430fca6ea1SDimitry Andric assert(Reg.getValueType() == MVT::i128 &&
63440fca6ea1SDimitry Andric "Custom lowering for CopyFromReg with 128-bit reg only");
63450fca6ea1SDimitry Andric SmallVector<EVT, 4> ResultsType = {MVT::i64, MVT::i64, N->getValueType(1),
63460fca6ea1SDimitry Andric N->getValueType(2)};
63470fca6ea1SDimitry Andric SmallVector<SDValue, 3> NewOps = {Chain, Reg, Glue};
63480fca6ea1SDimitry Andric
63490fca6ea1SDimitry Andric SDValue NewValue = DAG.getNode(ISD::CopyFromReg, DL, ResultsType, NewOps);
63500fca6ea1SDimitry Andric SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
63510fca6ea1SDimitry Andric {NewValue.getValue(0), NewValue.getValue(1)});
63520fca6ea1SDimitry Andric
63530fca6ea1SDimitry Andric Results.push_back(Pair);
63540fca6ea1SDimitry Andric Results.push_back(NewValue.getValue(2));
63550fca6ea1SDimitry Andric Results.push_back(NewValue.getValue(3));
63560fca6ea1SDimitry Andric }
63570fca6ea1SDimitry Andric
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const63580b57cec5SDimitry Andric void NVPTXTargetLowering::ReplaceNodeResults(
63590b57cec5SDimitry Andric SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
63600b57cec5SDimitry Andric switch (N->getOpcode()) {
63610b57cec5SDimitry Andric default:
63620b57cec5SDimitry Andric report_fatal_error("Unhandled custom legalization");
63630b57cec5SDimitry Andric case ISD::LOAD:
63640b57cec5SDimitry Andric ReplaceLoadVector(N, DAG, Results);
63650b57cec5SDimitry Andric return;
63660b57cec5SDimitry Andric case ISD::INTRINSIC_W_CHAIN:
63670b57cec5SDimitry Andric ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
63680b57cec5SDimitry Andric return;
63690fca6ea1SDimitry Andric case ISD::CopyFromReg:
63700fca6ea1SDimitry Andric ReplaceCopyFromReg_128(N, DAG, Results);
63710fca6ea1SDimitry Andric return;
63720b57cec5SDimitry Andric }
63730b57cec5SDimitry Andric }
63740b57cec5SDimitry Andric
637581ad6265SDimitry Andric NVPTXTargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * AI) const637681ad6265SDimitry Andric NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
637781ad6265SDimitry Andric Type *Ty = AI->getValOperand()->getType();
637881ad6265SDimitry Andric
637981ad6265SDimitry Andric if (AI->isFloatingPointOperation()) {
638081ad6265SDimitry Andric if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
63810fca6ea1SDimitry Andric if (Ty->isHalfTy() && STI.getSmVersion() >= 70 &&
63820fca6ea1SDimitry Andric STI.getPTXVersion() >= 63)
63830fca6ea1SDimitry Andric return AtomicExpansionKind::None;
63840fca6ea1SDimitry Andric if (Ty->isBFloatTy() && STI.getSmVersion() >= 90 &&
63850fca6ea1SDimitry Andric STI.getPTXVersion() >= 78)
63860fca6ea1SDimitry Andric return AtomicExpansionKind::None;
638781ad6265SDimitry Andric if (Ty->isFloatTy())
638881ad6265SDimitry Andric return AtomicExpansionKind::None;
638981ad6265SDimitry Andric if (Ty->isDoubleTy() && STI.hasAtomAddF64())
639081ad6265SDimitry Andric return AtomicExpansionKind::None;
639181ad6265SDimitry Andric }
639281ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg;
639381ad6265SDimitry Andric }
639481ad6265SDimitry Andric
639581ad6265SDimitry Andric assert(Ty->isIntegerTy() && "Ty should be integer at this point");
639681ad6265SDimitry Andric auto ITy = cast<llvm::IntegerType>(Ty);
639781ad6265SDimitry Andric
639881ad6265SDimitry Andric switch (AI->getOperation()) {
639981ad6265SDimitry Andric default:
640081ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg;
640181ad6265SDimitry Andric case AtomicRMWInst::BinOp::And:
640281ad6265SDimitry Andric case AtomicRMWInst::BinOp::Or:
640381ad6265SDimitry Andric case AtomicRMWInst::BinOp::Xor:
640481ad6265SDimitry Andric case AtomicRMWInst::BinOp::Xchg:
640581ad6265SDimitry Andric switch (ITy->getBitWidth()) {
640681ad6265SDimitry Andric case 8:
640781ad6265SDimitry Andric case 16:
640881ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg;
640981ad6265SDimitry Andric case 32:
641081ad6265SDimitry Andric return AtomicExpansionKind::None;
641181ad6265SDimitry Andric case 64:
641281ad6265SDimitry Andric if (STI.hasAtomBitwise64())
641381ad6265SDimitry Andric return AtomicExpansionKind::None;
641481ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg;
641581ad6265SDimitry Andric default:
641681ad6265SDimitry Andric llvm_unreachable("unsupported width encountered");
641781ad6265SDimitry Andric }
641881ad6265SDimitry Andric case AtomicRMWInst::BinOp::Add:
641981ad6265SDimitry Andric case AtomicRMWInst::BinOp::Sub:
642081ad6265SDimitry Andric case AtomicRMWInst::BinOp::Max:
642181ad6265SDimitry Andric case AtomicRMWInst::BinOp::Min:
642281ad6265SDimitry Andric case AtomicRMWInst::BinOp::UMax:
642381ad6265SDimitry Andric case AtomicRMWInst::BinOp::UMin:
642481ad6265SDimitry Andric switch (ITy->getBitWidth()) {
642581ad6265SDimitry Andric case 8:
642681ad6265SDimitry Andric case 16:
642781ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg;
642881ad6265SDimitry Andric case 32:
642981ad6265SDimitry Andric return AtomicExpansionKind::None;
643081ad6265SDimitry Andric case 64:
643181ad6265SDimitry Andric if (STI.hasAtomMinMax64())
643281ad6265SDimitry Andric return AtomicExpansionKind::None;
643381ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg;
643481ad6265SDimitry Andric default:
643581ad6265SDimitry Andric llvm_unreachable("unsupported width encountered");
643681ad6265SDimitry Andric }
643781ad6265SDimitry Andric }
643881ad6265SDimitry Andric
643981ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg;
644081ad6265SDimitry Andric }
644181ad6265SDimitry Andric
64420b57cec5SDimitry Andric // Pin NVPTXTargetObjectFile's vtables to this file.
644381ad6265SDimitry Andric NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default;
64440b57cec5SDimitry Andric
SelectSectionForGlobal(const GlobalObject * GO,SectionKind Kind,const TargetMachine & TM) const64450b57cec5SDimitry Andric MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal(
64460b57cec5SDimitry Andric const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
64470b57cec5SDimitry Andric return getDataSection();
64480b57cec5SDimitry Andric }
6449