10b57cec5SDimitry Andric //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file defines the interfaces that NVPTX uses to lower LLVM code into a 100b57cec5SDimitry Andric // selection DAG. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H 150b57cec5SDimitry Andric #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric #include "NVPTX.h" 180b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAG.h" 190b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric namespace llvm { 220b57cec5SDimitry Andric namespace NVPTXISD { 230b57cec5SDimitry Andric enum NodeType : unsigned { 240b57cec5SDimitry Andric // Start the numbering from where ISD NodeType finishes. 250b57cec5SDimitry Andric FIRST_NUMBER = ISD::BUILTIN_OP_END, 260b57cec5SDimitry Andric Wrapper, 270b57cec5SDimitry Andric CALL, 2806c3fb27SDimitry Andric RET_GLUE, 290b57cec5SDimitry Andric LOAD_PARAM, 300b57cec5SDimitry Andric DeclareParam, 310b57cec5SDimitry Andric DeclareScalarParam, 320b57cec5SDimitry Andric DeclareRetParam, 330b57cec5SDimitry Andric DeclareRet, 340b57cec5SDimitry Andric DeclareScalarRet, 350b57cec5SDimitry Andric PrintCall, 360b57cec5SDimitry Andric PrintConvergentCall, 370b57cec5SDimitry Andric PrintCallUni, 380b57cec5SDimitry Andric PrintConvergentCallUni, 390b57cec5SDimitry Andric CallArgBegin, 400b57cec5SDimitry Andric CallArg, 410b57cec5SDimitry Andric LastCallArg, 420b57cec5SDimitry Andric CallArgEnd, 430b57cec5SDimitry Andric CallVoid, 440b57cec5SDimitry Andric CallVal, 450b57cec5SDimitry Andric CallSymbol, 460b57cec5SDimitry Andric Prototype, 470b57cec5SDimitry Andric MoveParam, 480b57cec5SDimitry Andric PseudoUseParam, 490b57cec5SDimitry Andric RETURN, 500b57cec5SDimitry Andric CallSeqBegin, 510b57cec5SDimitry Andric CallSeqEnd, 520b57cec5SDimitry Andric CallPrototype, 530b57cec5SDimitry Andric ProxyReg, 540b57cec5SDimitry Andric FUN_SHFL_CLAMP, 550b57cec5SDimitry Andric FUN_SHFR_CLAMP, 560b57cec5SDimitry Andric MUL_WIDE_SIGNED, 570b57cec5SDimitry Andric MUL_WIDE_UNSIGNED, 580b57cec5SDimitry Andric IMAD, 590b57cec5SDimitry Andric SETP_F16X2, 605f757f3fSDimitry Andric SETP_BF16X2, 615f757f3fSDimitry Andric BFE, 625f757f3fSDimitry Andric BFI, 635f757f3fSDimitry Andric PRMT, 64*0fca6ea1SDimitry Andric DYNAMIC_STACKALLOC, 650b57cec5SDimitry Andric Dummy, 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, 680b57cec5SDimitry Andric LoadV4, 690b57cec5SDimitry Andric LDGV2, // LDG.v2 700b57cec5SDimitry Andric LDGV4, // LDG.v4 710b57cec5SDimitry Andric LDUV2, // LDU.v2 720b57cec5SDimitry Andric LDUV4, // LDU.v4 730b57cec5SDimitry Andric StoreV2, 740b57cec5SDimitry Andric StoreV4, 750b57cec5SDimitry Andric LoadParam, 760b57cec5SDimitry Andric LoadParamV2, 770b57cec5SDimitry Andric LoadParamV4, 780b57cec5SDimitry Andric StoreParam, 790b57cec5SDimitry Andric StoreParamV2, 800b57cec5SDimitry Andric StoreParamV4, 810b57cec5SDimitry Andric StoreParamS32, // to sext and store a <32bit value, not used currently 820b57cec5SDimitry Andric StoreParamU32, // to zext and store a <32bit value, not used currently 830b57cec5SDimitry Andric StoreRetval, 840b57cec5SDimitry Andric StoreRetvalV2, 850b57cec5SDimitry Andric StoreRetvalV4, 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric // Texture intrinsics 880b57cec5SDimitry Andric Tex1DFloatS32, 890b57cec5SDimitry Andric Tex1DFloatFloat, 900b57cec5SDimitry Andric Tex1DFloatFloatLevel, 910b57cec5SDimitry Andric Tex1DFloatFloatGrad, 920b57cec5SDimitry Andric Tex1DS32S32, 930b57cec5SDimitry Andric Tex1DS32Float, 940b57cec5SDimitry Andric Tex1DS32FloatLevel, 950b57cec5SDimitry Andric Tex1DS32FloatGrad, 960b57cec5SDimitry Andric Tex1DU32S32, 970b57cec5SDimitry Andric Tex1DU32Float, 980b57cec5SDimitry Andric Tex1DU32FloatLevel, 990b57cec5SDimitry Andric Tex1DU32FloatGrad, 1000b57cec5SDimitry Andric Tex1DArrayFloatS32, 1010b57cec5SDimitry Andric Tex1DArrayFloatFloat, 1020b57cec5SDimitry Andric Tex1DArrayFloatFloatLevel, 1030b57cec5SDimitry Andric Tex1DArrayFloatFloatGrad, 1040b57cec5SDimitry Andric Tex1DArrayS32S32, 1050b57cec5SDimitry Andric Tex1DArrayS32Float, 1060b57cec5SDimitry Andric Tex1DArrayS32FloatLevel, 1070b57cec5SDimitry Andric Tex1DArrayS32FloatGrad, 1080b57cec5SDimitry Andric Tex1DArrayU32S32, 1090b57cec5SDimitry Andric Tex1DArrayU32Float, 1100b57cec5SDimitry Andric Tex1DArrayU32FloatLevel, 1110b57cec5SDimitry Andric Tex1DArrayU32FloatGrad, 1120b57cec5SDimitry Andric Tex2DFloatS32, 1130b57cec5SDimitry Andric Tex2DFloatFloat, 1140b57cec5SDimitry Andric Tex2DFloatFloatLevel, 1150b57cec5SDimitry Andric Tex2DFloatFloatGrad, 1160b57cec5SDimitry Andric Tex2DS32S32, 1170b57cec5SDimitry Andric Tex2DS32Float, 1180b57cec5SDimitry Andric Tex2DS32FloatLevel, 1190b57cec5SDimitry Andric Tex2DS32FloatGrad, 1200b57cec5SDimitry Andric Tex2DU32S32, 1210b57cec5SDimitry Andric Tex2DU32Float, 1220b57cec5SDimitry Andric Tex2DU32FloatLevel, 1230b57cec5SDimitry Andric Tex2DU32FloatGrad, 1240b57cec5SDimitry Andric Tex2DArrayFloatS32, 1250b57cec5SDimitry Andric Tex2DArrayFloatFloat, 1260b57cec5SDimitry Andric Tex2DArrayFloatFloatLevel, 1270b57cec5SDimitry Andric Tex2DArrayFloatFloatGrad, 1280b57cec5SDimitry Andric Tex2DArrayS32S32, 1290b57cec5SDimitry Andric Tex2DArrayS32Float, 1300b57cec5SDimitry Andric Tex2DArrayS32FloatLevel, 1310b57cec5SDimitry Andric Tex2DArrayS32FloatGrad, 1320b57cec5SDimitry Andric Tex2DArrayU32S32, 1330b57cec5SDimitry Andric Tex2DArrayU32Float, 1340b57cec5SDimitry Andric Tex2DArrayU32FloatLevel, 1350b57cec5SDimitry Andric Tex2DArrayU32FloatGrad, 1360b57cec5SDimitry Andric Tex3DFloatS32, 1370b57cec5SDimitry Andric Tex3DFloatFloat, 1380b57cec5SDimitry Andric Tex3DFloatFloatLevel, 1390b57cec5SDimitry Andric Tex3DFloatFloatGrad, 1400b57cec5SDimitry Andric Tex3DS32S32, 1410b57cec5SDimitry Andric Tex3DS32Float, 1420b57cec5SDimitry Andric Tex3DS32FloatLevel, 1430b57cec5SDimitry Andric Tex3DS32FloatGrad, 1440b57cec5SDimitry Andric Tex3DU32S32, 1450b57cec5SDimitry Andric Tex3DU32Float, 1460b57cec5SDimitry Andric Tex3DU32FloatLevel, 1470b57cec5SDimitry Andric Tex3DU32FloatGrad, 1480b57cec5SDimitry Andric TexCubeFloatFloat, 1490b57cec5SDimitry Andric TexCubeFloatFloatLevel, 1500b57cec5SDimitry Andric TexCubeS32Float, 1510b57cec5SDimitry Andric TexCubeS32FloatLevel, 1520b57cec5SDimitry Andric TexCubeU32Float, 1530b57cec5SDimitry Andric TexCubeU32FloatLevel, 1540b57cec5SDimitry Andric TexCubeArrayFloatFloat, 1550b57cec5SDimitry Andric TexCubeArrayFloatFloatLevel, 1560b57cec5SDimitry Andric TexCubeArrayS32Float, 1570b57cec5SDimitry Andric TexCubeArrayS32FloatLevel, 1580b57cec5SDimitry Andric TexCubeArrayU32Float, 1590b57cec5SDimitry Andric TexCubeArrayU32FloatLevel, 1600b57cec5SDimitry Andric Tld4R2DFloatFloat, 1610b57cec5SDimitry Andric Tld4G2DFloatFloat, 1620b57cec5SDimitry Andric Tld4B2DFloatFloat, 1630b57cec5SDimitry Andric Tld4A2DFloatFloat, 1640b57cec5SDimitry Andric Tld4R2DS64Float, 1650b57cec5SDimitry Andric Tld4G2DS64Float, 1660b57cec5SDimitry Andric Tld4B2DS64Float, 1670b57cec5SDimitry Andric Tld4A2DS64Float, 1680b57cec5SDimitry Andric Tld4R2DU64Float, 1690b57cec5SDimitry Andric Tld4G2DU64Float, 1700b57cec5SDimitry Andric Tld4B2DU64Float, 1710b57cec5SDimitry Andric Tld4A2DU64Float, 1720b57cec5SDimitry Andric TexUnified1DFloatS32, 1730b57cec5SDimitry Andric TexUnified1DFloatFloat, 1740b57cec5SDimitry Andric TexUnified1DFloatFloatLevel, 1750b57cec5SDimitry Andric TexUnified1DFloatFloatGrad, 1760b57cec5SDimitry Andric TexUnified1DS32S32, 1770b57cec5SDimitry Andric TexUnified1DS32Float, 1780b57cec5SDimitry Andric TexUnified1DS32FloatLevel, 1790b57cec5SDimitry Andric TexUnified1DS32FloatGrad, 1800b57cec5SDimitry Andric TexUnified1DU32S32, 1810b57cec5SDimitry Andric TexUnified1DU32Float, 1820b57cec5SDimitry Andric TexUnified1DU32FloatLevel, 1830b57cec5SDimitry Andric TexUnified1DU32FloatGrad, 1840b57cec5SDimitry Andric TexUnified1DArrayFloatS32, 1850b57cec5SDimitry Andric TexUnified1DArrayFloatFloat, 1860b57cec5SDimitry Andric TexUnified1DArrayFloatFloatLevel, 1870b57cec5SDimitry Andric TexUnified1DArrayFloatFloatGrad, 1880b57cec5SDimitry Andric TexUnified1DArrayS32S32, 1890b57cec5SDimitry Andric TexUnified1DArrayS32Float, 1900b57cec5SDimitry Andric TexUnified1DArrayS32FloatLevel, 1910b57cec5SDimitry Andric TexUnified1DArrayS32FloatGrad, 1920b57cec5SDimitry Andric TexUnified1DArrayU32S32, 1930b57cec5SDimitry Andric TexUnified1DArrayU32Float, 1940b57cec5SDimitry Andric TexUnified1DArrayU32FloatLevel, 1950b57cec5SDimitry Andric TexUnified1DArrayU32FloatGrad, 1960b57cec5SDimitry Andric TexUnified2DFloatS32, 1970b57cec5SDimitry Andric TexUnified2DFloatFloat, 1980b57cec5SDimitry Andric TexUnified2DFloatFloatLevel, 1990b57cec5SDimitry Andric TexUnified2DFloatFloatGrad, 2000b57cec5SDimitry Andric TexUnified2DS32S32, 2010b57cec5SDimitry Andric TexUnified2DS32Float, 2020b57cec5SDimitry Andric TexUnified2DS32FloatLevel, 2030b57cec5SDimitry Andric TexUnified2DS32FloatGrad, 2040b57cec5SDimitry Andric TexUnified2DU32S32, 2050b57cec5SDimitry Andric TexUnified2DU32Float, 2060b57cec5SDimitry Andric TexUnified2DU32FloatLevel, 2070b57cec5SDimitry Andric TexUnified2DU32FloatGrad, 2080b57cec5SDimitry Andric TexUnified2DArrayFloatS32, 2090b57cec5SDimitry Andric TexUnified2DArrayFloatFloat, 2100b57cec5SDimitry Andric TexUnified2DArrayFloatFloatLevel, 2110b57cec5SDimitry Andric TexUnified2DArrayFloatFloatGrad, 2120b57cec5SDimitry Andric TexUnified2DArrayS32S32, 2130b57cec5SDimitry Andric TexUnified2DArrayS32Float, 2140b57cec5SDimitry Andric TexUnified2DArrayS32FloatLevel, 2150b57cec5SDimitry Andric TexUnified2DArrayS32FloatGrad, 2160b57cec5SDimitry Andric TexUnified2DArrayU32S32, 2170b57cec5SDimitry Andric TexUnified2DArrayU32Float, 2180b57cec5SDimitry Andric TexUnified2DArrayU32FloatLevel, 2190b57cec5SDimitry Andric TexUnified2DArrayU32FloatGrad, 2200b57cec5SDimitry Andric TexUnified3DFloatS32, 2210b57cec5SDimitry Andric TexUnified3DFloatFloat, 2220b57cec5SDimitry Andric TexUnified3DFloatFloatLevel, 2230b57cec5SDimitry Andric TexUnified3DFloatFloatGrad, 2240b57cec5SDimitry Andric TexUnified3DS32S32, 2250b57cec5SDimitry Andric TexUnified3DS32Float, 2260b57cec5SDimitry Andric TexUnified3DS32FloatLevel, 2270b57cec5SDimitry Andric TexUnified3DS32FloatGrad, 2280b57cec5SDimitry Andric TexUnified3DU32S32, 2290b57cec5SDimitry Andric TexUnified3DU32Float, 2300b57cec5SDimitry Andric TexUnified3DU32FloatLevel, 2310b57cec5SDimitry Andric TexUnified3DU32FloatGrad, 2320b57cec5SDimitry Andric TexUnifiedCubeFloatFloat, 2330b57cec5SDimitry Andric TexUnifiedCubeFloatFloatLevel, 2340b57cec5SDimitry Andric TexUnifiedCubeS32Float, 2350b57cec5SDimitry Andric TexUnifiedCubeS32FloatLevel, 2360b57cec5SDimitry Andric TexUnifiedCubeU32Float, 2370b57cec5SDimitry Andric TexUnifiedCubeU32FloatLevel, 2380b57cec5SDimitry Andric TexUnifiedCubeArrayFloatFloat, 2390b57cec5SDimitry Andric TexUnifiedCubeArrayFloatFloatLevel, 2400b57cec5SDimitry Andric TexUnifiedCubeArrayS32Float, 2410b57cec5SDimitry Andric TexUnifiedCubeArrayS32FloatLevel, 2420b57cec5SDimitry Andric TexUnifiedCubeArrayU32Float, 2430b57cec5SDimitry Andric TexUnifiedCubeArrayU32FloatLevel, 2447a6dacacSDimitry Andric TexUnifiedCubeFloatFloatGrad, 2457a6dacacSDimitry Andric TexUnifiedCubeS32FloatGrad, 2467a6dacacSDimitry Andric TexUnifiedCubeU32FloatGrad, 2477a6dacacSDimitry Andric TexUnifiedCubeArrayFloatFloatGrad, 2487a6dacacSDimitry Andric TexUnifiedCubeArrayS32FloatGrad, 2497a6dacacSDimitry Andric TexUnifiedCubeArrayU32FloatGrad, 2500b57cec5SDimitry Andric Tld4UnifiedR2DFloatFloat, 2510b57cec5SDimitry Andric Tld4UnifiedG2DFloatFloat, 2520b57cec5SDimitry Andric Tld4UnifiedB2DFloatFloat, 2530b57cec5SDimitry Andric Tld4UnifiedA2DFloatFloat, 2540b57cec5SDimitry Andric Tld4UnifiedR2DS64Float, 2550b57cec5SDimitry Andric Tld4UnifiedG2DS64Float, 2560b57cec5SDimitry Andric Tld4UnifiedB2DS64Float, 2570b57cec5SDimitry Andric Tld4UnifiedA2DS64Float, 2580b57cec5SDimitry Andric Tld4UnifiedR2DU64Float, 2590b57cec5SDimitry Andric Tld4UnifiedG2DU64Float, 2600b57cec5SDimitry Andric Tld4UnifiedB2DU64Float, 2610b57cec5SDimitry Andric Tld4UnifiedA2DU64Float, 2620b57cec5SDimitry Andric 2630b57cec5SDimitry Andric // Surface intrinsics 2640b57cec5SDimitry Andric Suld1DI8Clamp, 2650b57cec5SDimitry Andric Suld1DI16Clamp, 2660b57cec5SDimitry Andric Suld1DI32Clamp, 2670b57cec5SDimitry Andric Suld1DI64Clamp, 2680b57cec5SDimitry Andric Suld1DV2I8Clamp, 2690b57cec5SDimitry Andric Suld1DV2I16Clamp, 2700b57cec5SDimitry Andric Suld1DV2I32Clamp, 2710b57cec5SDimitry Andric Suld1DV2I64Clamp, 2720b57cec5SDimitry Andric Suld1DV4I8Clamp, 2730b57cec5SDimitry Andric Suld1DV4I16Clamp, 2740b57cec5SDimitry Andric Suld1DV4I32Clamp, 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric Suld1DArrayI8Clamp, 2770b57cec5SDimitry Andric Suld1DArrayI16Clamp, 2780b57cec5SDimitry Andric Suld1DArrayI32Clamp, 2790b57cec5SDimitry Andric Suld1DArrayI64Clamp, 2800b57cec5SDimitry Andric Suld1DArrayV2I8Clamp, 2810b57cec5SDimitry Andric Suld1DArrayV2I16Clamp, 2820b57cec5SDimitry Andric Suld1DArrayV2I32Clamp, 2830b57cec5SDimitry Andric Suld1DArrayV2I64Clamp, 2840b57cec5SDimitry Andric Suld1DArrayV4I8Clamp, 2850b57cec5SDimitry Andric Suld1DArrayV4I16Clamp, 2860b57cec5SDimitry Andric Suld1DArrayV4I32Clamp, 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric Suld2DI8Clamp, 2890b57cec5SDimitry Andric Suld2DI16Clamp, 2900b57cec5SDimitry Andric Suld2DI32Clamp, 2910b57cec5SDimitry Andric Suld2DI64Clamp, 2920b57cec5SDimitry Andric Suld2DV2I8Clamp, 2930b57cec5SDimitry Andric Suld2DV2I16Clamp, 2940b57cec5SDimitry Andric Suld2DV2I32Clamp, 2950b57cec5SDimitry Andric Suld2DV2I64Clamp, 2960b57cec5SDimitry Andric Suld2DV4I8Clamp, 2970b57cec5SDimitry Andric Suld2DV4I16Clamp, 2980b57cec5SDimitry Andric Suld2DV4I32Clamp, 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric Suld2DArrayI8Clamp, 3010b57cec5SDimitry Andric Suld2DArrayI16Clamp, 3020b57cec5SDimitry Andric Suld2DArrayI32Clamp, 3030b57cec5SDimitry Andric Suld2DArrayI64Clamp, 3040b57cec5SDimitry Andric Suld2DArrayV2I8Clamp, 3050b57cec5SDimitry Andric Suld2DArrayV2I16Clamp, 3060b57cec5SDimitry Andric Suld2DArrayV2I32Clamp, 3070b57cec5SDimitry Andric Suld2DArrayV2I64Clamp, 3080b57cec5SDimitry Andric Suld2DArrayV4I8Clamp, 3090b57cec5SDimitry Andric Suld2DArrayV4I16Clamp, 3100b57cec5SDimitry Andric Suld2DArrayV4I32Clamp, 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric Suld3DI8Clamp, 3130b57cec5SDimitry Andric Suld3DI16Clamp, 3140b57cec5SDimitry Andric Suld3DI32Clamp, 3150b57cec5SDimitry Andric Suld3DI64Clamp, 3160b57cec5SDimitry Andric Suld3DV2I8Clamp, 3170b57cec5SDimitry Andric Suld3DV2I16Clamp, 3180b57cec5SDimitry Andric Suld3DV2I32Clamp, 3190b57cec5SDimitry Andric Suld3DV2I64Clamp, 3200b57cec5SDimitry Andric Suld3DV4I8Clamp, 3210b57cec5SDimitry Andric Suld3DV4I16Clamp, 3220b57cec5SDimitry Andric Suld3DV4I32Clamp, 3230b57cec5SDimitry Andric 3240b57cec5SDimitry Andric Suld1DI8Trap, 3250b57cec5SDimitry Andric Suld1DI16Trap, 3260b57cec5SDimitry Andric Suld1DI32Trap, 3270b57cec5SDimitry Andric Suld1DI64Trap, 3280b57cec5SDimitry Andric Suld1DV2I8Trap, 3290b57cec5SDimitry Andric Suld1DV2I16Trap, 3300b57cec5SDimitry Andric Suld1DV2I32Trap, 3310b57cec5SDimitry Andric Suld1DV2I64Trap, 3320b57cec5SDimitry Andric Suld1DV4I8Trap, 3330b57cec5SDimitry Andric Suld1DV4I16Trap, 3340b57cec5SDimitry Andric Suld1DV4I32Trap, 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric Suld1DArrayI8Trap, 3370b57cec5SDimitry Andric Suld1DArrayI16Trap, 3380b57cec5SDimitry Andric Suld1DArrayI32Trap, 3390b57cec5SDimitry Andric Suld1DArrayI64Trap, 3400b57cec5SDimitry Andric Suld1DArrayV2I8Trap, 3410b57cec5SDimitry Andric Suld1DArrayV2I16Trap, 3420b57cec5SDimitry Andric Suld1DArrayV2I32Trap, 3430b57cec5SDimitry Andric Suld1DArrayV2I64Trap, 3440b57cec5SDimitry Andric Suld1DArrayV4I8Trap, 3450b57cec5SDimitry Andric Suld1DArrayV4I16Trap, 3460b57cec5SDimitry Andric Suld1DArrayV4I32Trap, 3470b57cec5SDimitry Andric 3480b57cec5SDimitry Andric Suld2DI8Trap, 3490b57cec5SDimitry Andric Suld2DI16Trap, 3500b57cec5SDimitry Andric Suld2DI32Trap, 3510b57cec5SDimitry Andric Suld2DI64Trap, 3520b57cec5SDimitry Andric Suld2DV2I8Trap, 3530b57cec5SDimitry Andric Suld2DV2I16Trap, 3540b57cec5SDimitry Andric Suld2DV2I32Trap, 3550b57cec5SDimitry Andric Suld2DV2I64Trap, 3560b57cec5SDimitry Andric Suld2DV4I8Trap, 3570b57cec5SDimitry Andric Suld2DV4I16Trap, 3580b57cec5SDimitry Andric Suld2DV4I32Trap, 3590b57cec5SDimitry Andric 3600b57cec5SDimitry Andric Suld2DArrayI8Trap, 3610b57cec5SDimitry Andric Suld2DArrayI16Trap, 3620b57cec5SDimitry Andric Suld2DArrayI32Trap, 3630b57cec5SDimitry Andric Suld2DArrayI64Trap, 3640b57cec5SDimitry Andric Suld2DArrayV2I8Trap, 3650b57cec5SDimitry Andric Suld2DArrayV2I16Trap, 3660b57cec5SDimitry Andric Suld2DArrayV2I32Trap, 3670b57cec5SDimitry Andric Suld2DArrayV2I64Trap, 3680b57cec5SDimitry Andric Suld2DArrayV4I8Trap, 3690b57cec5SDimitry Andric Suld2DArrayV4I16Trap, 3700b57cec5SDimitry Andric Suld2DArrayV4I32Trap, 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric Suld3DI8Trap, 3730b57cec5SDimitry Andric Suld3DI16Trap, 3740b57cec5SDimitry Andric Suld3DI32Trap, 3750b57cec5SDimitry Andric Suld3DI64Trap, 3760b57cec5SDimitry Andric Suld3DV2I8Trap, 3770b57cec5SDimitry Andric Suld3DV2I16Trap, 3780b57cec5SDimitry Andric Suld3DV2I32Trap, 3790b57cec5SDimitry Andric Suld3DV2I64Trap, 3800b57cec5SDimitry Andric Suld3DV4I8Trap, 3810b57cec5SDimitry Andric Suld3DV4I16Trap, 3820b57cec5SDimitry Andric Suld3DV4I32Trap, 3830b57cec5SDimitry Andric 3840b57cec5SDimitry Andric Suld1DI8Zero, 3850b57cec5SDimitry Andric Suld1DI16Zero, 3860b57cec5SDimitry Andric Suld1DI32Zero, 3870b57cec5SDimitry Andric Suld1DI64Zero, 3880b57cec5SDimitry Andric Suld1DV2I8Zero, 3890b57cec5SDimitry Andric Suld1DV2I16Zero, 3900b57cec5SDimitry Andric Suld1DV2I32Zero, 3910b57cec5SDimitry Andric Suld1DV2I64Zero, 3920b57cec5SDimitry Andric Suld1DV4I8Zero, 3930b57cec5SDimitry Andric Suld1DV4I16Zero, 3940b57cec5SDimitry Andric Suld1DV4I32Zero, 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric Suld1DArrayI8Zero, 3970b57cec5SDimitry Andric Suld1DArrayI16Zero, 3980b57cec5SDimitry Andric Suld1DArrayI32Zero, 3990b57cec5SDimitry Andric Suld1DArrayI64Zero, 4000b57cec5SDimitry Andric Suld1DArrayV2I8Zero, 4010b57cec5SDimitry Andric Suld1DArrayV2I16Zero, 4020b57cec5SDimitry Andric Suld1DArrayV2I32Zero, 4030b57cec5SDimitry Andric Suld1DArrayV2I64Zero, 4040b57cec5SDimitry Andric Suld1DArrayV4I8Zero, 4050b57cec5SDimitry Andric Suld1DArrayV4I16Zero, 4060b57cec5SDimitry Andric Suld1DArrayV4I32Zero, 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric Suld2DI8Zero, 4090b57cec5SDimitry Andric Suld2DI16Zero, 4100b57cec5SDimitry Andric Suld2DI32Zero, 4110b57cec5SDimitry Andric Suld2DI64Zero, 4120b57cec5SDimitry Andric Suld2DV2I8Zero, 4130b57cec5SDimitry Andric Suld2DV2I16Zero, 4140b57cec5SDimitry Andric Suld2DV2I32Zero, 4150b57cec5SDimitry Andric Suld2DV2I64Zero, 4160b57cec5SDimitry Andric Suld2DV4I8Zero, 4170b57cec5SDimitry Andric Suld2DV4I16Zero, 4180b57cec5SDimitry Andric Suld2DV4I32Zero, 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andric Suld2DArrayI8Zero, 4210b57cec5SDimitry Andric Suld2DArrayI16Zero, 4220b57cec5SDimitry Andric Suld2DArrayI32Zero, 4230b57cec5SDimitry Andric Suld2DArrayI64Zero, 4240b57cec5SDimitry Andric Suld2DArrayV2I8Zero, 4250b57cec5SDimitry Andric Suld2DArrayV2I16Zero, 4260b57cec5SDimitry Andric Suld2DArrayV2I32Zero, 4270b57cec5SDimitry Andric Suld2DArrayV2I64Zero, 4280b57cec5SDimitry Andric Suld2DArrayV4I8Zero, 4290b57cec5SDimitry Andric Suld2DArrayV4I16Zero, 4300b57cec5SDimitry Andric Suld2DArrayV4I32Zero, 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric Suld3DI8Zero, 4330b57cec5SDimitry Andric Suld3DI16Zero, 4340b57cec5SDimitry Andric Suld3DI32Zero, 4350b57cec5SDimitry Andric Suld3DI64Zero, 4360b57cec5SDimitry Andric Suld3DV2I8Zero, 4370b57cec5SDimitry Andric Suld3DV2I16Zero, 4380b57cec5SDimitry Andric Suld3DV2I32Zero, 4390b57cec5SDimitry Andric Suld3DV2I64Zero, 4400b57cec5SDimitry Andric Suld3DV4I8Zero, 4410b57cec5SDimitry Andric Suld3DV4I16Zero, 4420b57cec5SDimitry Andric Suld3DV4I32Zero 4430b57cec5SDimitry Andric }; 4440b57cec5SDimitry Andric } 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric class NVPTXSubtarget; 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric //===--------------------------------------------------------------------===// 4490b57cec5SDimitry Andric // TargetLowering Implementation 4500b57cec5SDimitry Andric //===--------------------------------------------------------------------===// 4510b57cec5SDimitry Andric class NVPTXTargetLowering : public TargetLowering { 4520b57cec5SDimitry Andric public: 4530b57cec5SDimitry Andric explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM, 4540b57cec5SDimitry Andric const NVPTXSubtarget &STI); 4550b57cec5SDimitry Andric SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andric const char *getTargetNodeName(unsigned Opcode) const override; 4600b57cec5SDimitry Andric 4610b57cec5SDimitry Andric bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 4620b57cec5SDimitry Andric MachineFunction &MF, 4630b57cec5SDimitry Andric unsigned Intrinsic) const override; 4640b57cec5SDimitry Andric 465*0fca6ea1SDimitry Andric Align getFunctionArgumentAlignment(const Function *F, Type *Ty, unsigned Idx, 466*0fca6ea1SDimitry Andric const DataLayout &DL) const; 467*0fca6ea1SDimitry Andric 46881ad6265SDimitry Andric /// getFunctionParamOptimizedAlign - since function arguments are passed via 46981ad6265SDimitry Andric /// .param space, we may want to increase their alignment in a way that 47081ad6265SDimitry Andric /// ensures that we can effectively vectorize their loads & stores. We can 47181ad6265SDimitry Andric /// increase alignment only if the function has internal or has private 47281ad6265SDimitry Andric /// linkage as for other linkage types callers may already rely on default 47381ad6265SDimitry Andric /// alignment. To allow using 128-bit vectorized loads/stores, this function 47481ad6265SDimitry Andric /// ensures that alignment is 16 or greater. 47581ad6265SDimitry Andric Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, 47681ad6265SDimitry Andric const DataLayout &DL) const; 47781ad6265SDimitry Andric 478bdd1243dSDimitry Andric /// Helper for computing alignment of a device function byval parameter. 479bdd1243dSDimitry Andric Align getFunctionByValParamAlign(const Function *F, Type *ArgTy, 480bdd1243dSDimitry Andric Align InitialAlign, 481bdd1243dSDimitry Andric const DataLayout &DL) const; 482bdd1243dSDimitry Andric 48306c3fb27SDimitry Andric // Helper for getting a function parameter name. Name is composed from 48406c3fb27SDimitry Andric // its index and the function name. Negative index corresponds to special 48506c3fb27SDimitry Andric // parameter (unsized array) used for passing variable arguments. 48606c3fb27SDimitry Andric std::string getParamName(const Function *F, int Idx) const; 48706c3fb27SDimitry Andric 4880b57cec5SDimitry Andric /// isLegalAddressingMode - Return true if the addressing mode represented 4890b57cec5SDimitry Andric /// by AM is legal for this target, for a load/store of the specified type 4900b57cec5SDimitry Andric /// Used to guide target specific optimizations, like loop strength 4910b57cec5SDimitry Andric /// reduction (LoopStrengthReduce.cpp) and memory optimization for 4920b57cec5SDimitry Andric /// address mode (CodeGenPrepare.cpp) 4930b57cec5SDimitry Andric bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 4940b57cec5SDimitry Andric unsigned AS, 4950b57cec5SDimitry Andric Instruction *I = nullptr) const override; 4960b57cec5SDimitry Andric isTruncateFree(Type * SrcTy,Type * DstTy)4970b57cec5SDimitry Andric bool isTruncateFree(Type *SrcTy, Type *DstTy) const override { 4980b57cec5SDimitry Andric // Truncating 64-bit to 32-bit is free in SASS. 4990b57cec5SDimitry Andric if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 5000b57cec5SDimitry Andric return false; 5010b57cec5SDimitry Andric return SrcTy->getPrimitiveSizeInBits() == 64 && 5020b57cec5SDimitry Andric DstTy->getPrimitiveSizeInBits() == 32; 5030b57cec5SDimitry Andric } 5040b57cec5SDimitry Andric getSetCCResultType(const DataLayout & DL,LLVMContext & Ctx,EVT VT)5050b57cec5SDimitry Andric EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, 5060b57cec5SDimitry Andric EVT VT) const override { 5070b57cec5SDimitry Andric if (VT.isVector()) 5080b57cec5SDimitry Andric return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); 5090b57cec5SDimitry Andric return MVT::i1; 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric ConstraintType getConstraintType(StringRef Constraint) const override; 5130b57cec5SDimitry Andric std::pair<unsigned, const TargetRegisterClass *> 5140b57cec5SDimitry Andric getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 5150b57cec5SDimitry Andric StringRef Constraint, MVT VT) const override; 5160b57cec5SDimitry Andric 5170b57cec5SDimitry Andric SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 5180b57cec5SDimitry Andric bool isVarArg, 5190b57cec5SDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins, 5200b57cec5SDimitry Andric const SDLoc &dl, SelectionDAG &DAG, 5210b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals) const override; 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric SDValue LowerCall(CallLoweringInfo &CLI, 5240b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals) const override; 5250b57cec5SDimitry Andric 5265f757f3fSDimitry Andric SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 5275f757f3fSDimitry Andric 528bdd1243dSDimitry Andric std::string 529bdd1243dSDimitry Andric getPrototype(const DataLayout &DL, Type *, const ArgListTy &, 530bdd1243dSDimitry Andric const SmallVectorImpl<ISD::OutputArg> &, MaybeAlign retAlignment, 531bdd1243dSDimitry Andric std::optional<std::pair<unsigned, const APInt &>> VAInfo, 532bdd1243dSDimitry Andric const CallBase &CB, unsigned UniqueCallSite) const; 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 5350b57cec5SDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs, 5360b57cec5SDimitry Andric const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, 5370b57cec5SDimitry Andric SelectionDAG &DAG) const override; 5380b57cec5SDimitry Andric 5395f757f3fSDimitry Andric void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, 5400b57cec5SDimitry Andric std::vector<SDValue> &Ops, 5410b57cec5SDimitry Andric SelectionDAG &DAG) const override; 5420b57cec5SDimitry Andric 5430b57cec5SDimitry Andric const NVPTXTargetMachine *nvTM; 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric // PTX always uses 32-bit shift amounts getScalarShiftAmountTy(const DataLayout &,EVT)5460b57cec5SDimitry Andric MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { 5470b57cec5SDimitry Andric return MVT::i32; 5480b57cec5SDimitry Andric } 5490b57cec5SDimitry Andric 5500b57cec5SDimitry Andric TargetLoweringBase::LegalizeTypeAction 5510b57cec5SDimitry Andric getPreferredVectorAction(MVT VT) const override; 5520b57cec5SDimitry Andric 5530b57cec5SDimitry Andric // Get the degree of precision we want from 32-bit floating point division 5540b57cec5SDimitry Andric // operations. 5550b57cec5SDimitry Andric // 5560b57cec5SDimitry Andric // 0 - Use ptx div.approx 5570b57cec5SDimitry Andric // 1 - Use ptx.div.full (approximate, but less so than div.approx) 5580b57cec5SDimitry Andric // 2 - Use IEEE-compliant div instructions, if available. 5590b57cec5SDimitry Andric int getDivF32Level() const; 5600b57cec5SDimitry Andric 5610b57cec5SDimitry Andric // Get whether we should use a precise or approximate 32-bit floating point 5620b57cec5SDimitry Andric // sqrt instruction. 5630b57cec5SDimitry Andric bool usePrecSqrtF32() const; 5640b57cec5SDimitry Andric 5650b57cec5SDimitry Andric // Get whether we should use instructions that flush floating-point denormals 5660b57cec5SDimitry Andric // to sign-preserving zero. 5670b57cec5SDimitry Andric bool useF32FTZ(const MachineFunction &MF) const; 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 5700b57cec5SDimitry Andric int &ExtraSteps, bool &UseOneConst, 5710b57cec5SDimitry Andric bool Reciprocal) const override; 5720b57cec5SDimitry Andric combineRepeatedFPDivisors()5730b57cec5SDimitry Andric unsigned combineRepeatedFPDivisors() const override { return 2; } 5740b57cec5SDimitry Andric 5755f757f3fSDimitry Andric bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const; 5760b57cec5SDimitry Andric bool allowUnsafeFPMath(MachineFunction &MF) const; 5770b57cec5SDimitry Andric isFMAFasterThanFMulAndFAdd(const MachineFunction & MF,EVT)578480093f4SDimitry Andric bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 579480093f4SDimitry Andric EVT) const override { 580480093f4SDimitry Andric return true; 581480093f4SDimitry Andric } 5820b57cec5SDimitry Andric enableAggressiveFMAFusion(EVT VT)5830b57cec5SDimitry Andric bool enableAggressiveFMAFusion(EVT VT) const override { return true; } 5840b57cec5SDimitry Andric 5850b57cec5SDimitry Andric // The default is to transform llvm.ctlz(x, false) (where false indicates that 5860b57cec5SDimitry Andric // x == 0 is not undefined behavior) into a branch that checks whether x is 0 5870b57cec5SDimitry Andric // and avoids calling ctlz in that case. We have a dedicated ctlz 5880b57cec5SDimitry Andric // instruction, so we say that ctlz is cheap to speculate. isCheapToSpeculateCtlz(Type * Ty)589bdd1243dSDimitry Andric bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; } 5900b57cec5SDimitry Andric shouldCastAtomicLoadInIR(LoadInst * LI)59181ad6265SDimitry Andric AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override { 59281ad6265SDimitry Andric return AtomicExpansionKind::None; 59381ad6265SDimitry Andric } 59481ad6265SDimitry Andric shouldCastAtomicStoreInIR(StoreInst * SI)59581ad6265SDimitry Andric AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override { 59681ad6265SDimitry Andric return AtomicExpansionKind::None; 59781ad6265SDimitry Andric } 59881ad6265SDimitry Andric 59981ad6265SDimitry Andric AtomicExpansionKind 60081ad6265SDimitry Andric shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 60181ad6265SDimitry Andric aggressivelyPreferBuildVectorSources(EVT VecVT)6025f757f3fSDimitry Andric bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override { 6035f757f3fSDimitry Andric // There's rarely any point of packing something into a vector type if we 6045f757f3fSDimitry Andric // already have the source data. 6055f757f3fSDimitry Andric return true; 6065f757f3fSDimitry Andric } 6075f757f3fSDimitry Andric 6080b57cec5SDimitry Andric private: 6090b57cec5SDimitry Andric const NVPTXSubtarget &STI; // cache the subtarget here 6100b57cec5SDimitry Andric SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; 6110b57cec5SDimitry Andric 6120b57cec5SDimitry Andric SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 6130b57cec5SDimitry Andric SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 6140b57cec5SDimitry Andric SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 6155f757f3fSDimitry Andric SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 6165f757f3fSDimitry Andric SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 6170b57cec5SDimitry Andric 6180b57cec5SDimitry Andric SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; 6190b57cec5SDimitry Andric SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const; 6200b57cec5SDimitry Andric SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const; 6210b57cec5SDimitry Andric 6225f757f3fSDimitry Andric SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 6235f757f3fSDimitry Andric SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 6245f757f3fSDimitry Andric 625*0fca6ea1SDimitry Andric SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 626*0fca6ea1SDimitry Andric SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 627*0fca6ea1SDimitry Andric 6280b57cec5SDimitry Andric SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 6290b57cec5SDimitry Andric SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const; 6300b57cec5SDimitry Andric 6310b57cec5SDimitry Andric SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 6320b57cec5SDimitry Andric SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; 6330b57cec5SDimitry Andric SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; 6340b57cec5SDimitry Andric 6350b57cec5SDimitry Andric SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; 6360b57cec5SDimitry Andric SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; 6370b57cec5SDimitry Andric 6380b57cec5SDimitry Andric SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const; 6390b57cec5SDimitry Andric 640bdd1243dSDimitry Andric SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 641bdd1243dSDimitry Andric SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 642bdd1243dSDimitry Andric 643*0fca6ea1SDimitry Andric SDValue LowerCopyToReg_128(SDValue Op, SelectionDAG &DAG) const; 644*0fca6ea1SDimitry Andric unsigned getNumRegisters(LLVMContext &Context, EVT VT, 645*0fca6ea1SDimitry Andric std::optional<MVT> RegisterVT) const override; 646*0fca6ea1SDimitry Andric bool 647*0fca6ea1SDimitry Andric splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, 648*0fca6ea1SDimitry Andric SDValue *Parts, unsigned NumParts, MVT PartVT, 649*0fca6ea1SDimitry Andric std::optional<CallingConv::ID> CC) const override; 650*0fca6ea1SDimitry Andric 6510b57cec5SDimitry Andric void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 6520b57cec5SDimitry Andric SelectionDAG &DAG) const override; 6530b57cec5SDimitry Andric SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 6540b57cec5SDimitry Andric 6557a6dacacSDimitry Andric Align getArgumentAlignment(const CallBase *CB, Type *Ty, unsigned Idx, 6567a6dacacSDimitry Andric const DataLayout &DL) const; 6570b57cec5SDimitry Andric }; 6585f757f3fSDimitry Andric 6590b57cec5SDimitry Andric } // namespace llvm 6600b57cec5SDimitry Andric 6610b57cec5SDimitry Andric #endif 662