10b57cec5SDimitry Andric//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric// 90b57cec5SDimitry Andric// This file describes the PTX instructions in TableGen format. 100b57cec5SDimitry Andric// 110b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andricinclude "NVPTXInstrFormats.td" 140b57cec5SDimitry Andric 150b57cec5SDimitry Andriclet OperandType = "OPERAND_IMMEDIATE" in { 160b57cec5SDimitry Andric def f16imm : Operand<f16>; 1706c3fb27SDimitry Andric def bf16imm : Operand<bf16>; 1806c3fb27SDimitry Andric 190b57cec5SDimitry Andric} 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric// List of vector specific properties 220b57cec5SDimitry Andricdef isVecLD : VecInstTypeEnum<1>; 230b57cec5SDimitry Andricdef isVecST : VecInstTypeEnum<2>; 240b57cec5SDimitry Andricdef isVecBuild : VecInstTypeEnum<3>; 250b57cec5SDimitry Andricdef isVecShuffle : VecInstTypeEnum<4>; 260b57cec5SDimitry Andricdef isVecExtract : VecInstTypeEnum<5>; 270b57cec5SDimitry Andricdef isVecInsert : VecInstTypeEnum<6>; 280b57cec5SDimitry Andricdef isVecDest : VecInstTypeEnum<7>; 290b57cec5SDimitry Andricdef isVecOther : VecInstTypeEnum<15>; 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 320b57cec5SDimitry Andric// NVPTX Operand Definitions. 330b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 340b57cec5SDimitry Andric 350b57cec5SDimitry Andricdef brtarget : Operand<OtherVT>; 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric// CVT conversion modes 380b57cec5SDimitry Andric// These must match the enum in NVPTX.h 390b57cec5SDimitry Andricdef CvtNONE : PatLeaf<(i32 0x0)>; 400b57cec5SDimitry Andricdef CvtRNI : PatLeaf<(i32 0x1)>; 410b57cec5SDimitry Andricdef CvtRZI : PatLeaf<(i32 0x2)>; 420b57cec5SDimitry Andricdef CvtRMI : PatLeaf<(i32 0x3)>; 430b57cec5SDimitry Andricdef CvtRPI : PatLeaf<(i32 0x4)>; 440b57cec5SDimitry Andricdef CvtRN : PatLeaf<(i32 0x5)>; 450b57cec5SDimitry Andricdef CvtRZ : PatLeaf<(i32 0x6)>; 460b57cec5SDimitry Andricdef CvtRM : PatLeaf<(i32 0x7)>; 470b57cec5SDimitry Andricdef CvtRP : PatLeaf<(i32 0x8)>; 4804eeddc0SDimitry Andricdef CvtRNA : PatLeaf<(i32 0x9)>; 490b57cec5SDimitry Andric 500b57cec5SDimitry Andricdef CvtNONE_FTZ : PatLeaf<(i32 0x10)>; 510b57cec5SDimitry Andricdef CvtRNI_FTZ : PatLeaf<(i32 0x11)>; 520b57cec5SDimitry Andricdef CvtRZI_FTZ : PatLeaf<(i32 0x12)>; 530b57cec5SDimitry Andricdef CvtRMI_FTZ : PatLeaf<(i32 0x13)>; 540b57cec5SDimitry Andricdef CvtRPI_FTZ : PatLeaf<(i32 0x14)>; 550b57cec5SDimitry Andricdef CvtRN_FTZ : PatLeaf<(i32 0x15)>; 560b57cec5SDimitry Andricdef CvtRZ_FTZ : PatLeaf<(i32 0x16)>; 570b57cec5SDimitry Andricdef CvtRM_FTZ : PatLeaf<(i32 0x17)>; 580b57cec5SDimitry Andricdef CvtRP_FTZ : PatLeaf<(i32 0x18)>; 590b57cec5SDimitry Andric 600b57cec5SDimitry Andricdef CvtSAT : PatLeaf<(i32 0x20)>; 610b57cec5SDimitry Andricdef CvtSAT_FTZ : PatLeaf<(i32 0x30)>; 620b57cec5SDimitry Andric 6304eeddc0SDimitry Andricdef CvtNONE_RELU : PatLeaf<(i32 0x40)>; 6404eeddc0SDimitry Andricdef CvtRN_RELU : PatLeaf<(i32 0x45)>; 6504eeddc0SDimitry Andricdef CvtRZ_RELU : PatLeaf<(i32 0x46)>; 6604eeddc0SDimitry Andric 670b57cec5SDimitry Andricdef CvtMode : Operand<i32> { 680b57cec5SDimitry Andric let PrintMethod = "printCvtMode"; 690b57cec5SDimitry Andric} 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric// Compare modes 720b57cec5SDimitry Andric// These must match the enum in NVPTX.h 730b57cec5SDimitry Andricdef CmpEQ : PatLeaf<(i32 0)>; 740b57cec5SDimitry Andricdef CmpNE : PatLeaf<(i32 1)>; 750b57cec5SDimitry Andricdef CmpLT : PatLeaf<(i32 2)>; 760b57cec5SDimitry Andricdef CmpLE : PatLeaf<(i32 3)>; 770b57cec5SDimitry Andricdef CmpGT : PatLeaf<(i32 4)>; 780b57cec5SDimitry Andricdef CmpGE : PatLeaf<(i32 5)>; 795f757f3fSDimitry Andricdef CmpLO : PatLeaf<(i32 6)>; 805f757f3fSDimitry Andricdef CmpLS : PatLeaf<(i32 7)>; 815f757f3fSDimitry Andricdef CmpHI : PatLeaf<(i32 8)>; 825f757f3fSDimitry Andricdef CmpHS : PatLeaf<(i32 9)>; 830b57cec5SDimitry Andricdef CmpEQU : PatLeaf<(i32 10)>; 840b57cec5SDimitry Andricdef CmpNEU : PatLeaf<(i32 11)>; 850b57cec5SDimitry Andricdef CmpLTU : PatLeaf<(i32 12)>; 860b57cec5SDimitry Andricdef CmpLEU : PatLeaf<(i32 13)>; 870b57cec5SDimitry Andricdef CmpGTU : PatLeaf<(i32 14)>; 880b57cec5SDimitry Andricdef CmpGEU : PatLeaf<(i32 15)>; 890b57cec5SDimitry Andricdef CmpNUM : PatLeaf<(i32 16)>; 900b57cec5SDimitry Andricdef CmpNAN : PatLeaf<(i32 17)>; 910b57cec5SDimitry Andric 920b57cec5SDimitry Andricdef CmpEQ_FTZ : PatLeaf<(i32 0x100)>; 930b57cec5SDimitry Andricdef CmpNE_FTZ : PatLeaf<(i32 0x101)>; 940b57cec5SDimitry Andricdef CmpLT_FTZ : PatLeaf<(i32 0x102)>; 950b57cec5SDimitry Andricdef CmpLE_FTZ : PatLeaf<(i32 0x103)>; 960b57cec5SDimitry Andricdef CmpGT_FTZ : PatLeaf<(i32 0x104)>; 970b57cec5SDimitry Andricdef CmpGE_FTZ : PatLeaf<(i32 0x105)>; 980b57cec5SDimitry Andricdef CmpEQU_FTZ : PatLeaf<(i32 0x10A)>; 990b57cec5SDimitry Andricdef CmpNEU_FTZ : PatLeaf<(i32 0x10B)>; 1000b57cec5SDimitry Andricdef CmpLTU_FTZ : PatLeaf<(i32 0x10C)>; 1010b57cec5SDimitry Andricdef CmpLEU_FTZ : PatLeaf<(i32 0x10D)>; 1020b57cec5SDimitry Andricdef CmpGTU_FTZ : PatLeaf<(i32 0x10E)>; 1030b57cec5SDimitry Andricdef CmpGEU_FTZ : PatLeaf<(i32 0x10F)>; 1040b57cec5SDimitry Andricdef CmpNUM_FTZ : PatLeaf<(i32 0x110)>; 1050b57cec5SDimitry Andricdef CmpNAN_FTZ : PatLeaf<(i32 0x111)>; 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andricdef CmpMode : Operand<i32> { 1080b57cec5SDimitry Andric let PrintMethod = "printCmpMode"; 1090b57cec5SDimitry Andric} 1100b57cec5SDimitry Andricdef VecElement : Operand<i32> { 1110b57cec5SDimitry Andric let PrintMethod = "printVecElement"; 1120b57cec5SDimitry Andric} 1130b57cec5SDimitry Andric 1145f757f3fSDimitry Andric// PRMT modes 1155f757f3fSDimitry Andric// These must match the enum in NVPTX.h 1165f757f3fSDimitry Andricdef PrmtNONE : PatLeaf<(i32 0x0)>; 1175f757f3fSDimitry Andricdef PrmtF4E : PatLeaf<(i32 0x1)>; 1185f757f3fSDimitry Andricdef PrmtB4E : PatLeaf<(i32 0x2)>; 1195f757f3fSDimitry Andricdef PrmtRC8 : PatLeaf<(i32 0x3)>; 1205f757f3fSDimitry Andricdef PrmtECL : PatLeaf<(i32 0x4)>; 1215f757f3fSDimitry Andricdef PrmtECR : PatLeaf<(i32 0x5)>; 1225f757f3fSDimitry Andricdef PrmtRC16 : PatLeaf<(i32 0x6)>; 1235f757f3fSDimitry Andric 1245f757f3fSDimitry Andricdef PrmtMode : Operand<i32> { 1255f757f3fSDimitry Andric let PrintMethod = "printPrmtMode"; 1265f757f3fSDimitry Andric} 1275f757f3fSDimitry Andric 1285f757f3fSDimitry Andric 1290b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1300b57cec5SDimitry Andric// NVPTX Instruction Predicate Definitions 1310b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andricdef hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">; 1350b57cec5SDimitry Andricdef hasAtomScope : Predicate<"Subtarget->hasAtomScope()">; 1360b57cec5SDimitry Andricdef hasAtomBitwise64 : Predicate<"Subtarget->hasAtomBitwise64()">; 1370b57cec5SDimitry Andricdef hasAtomMinMax64 : Predicate<"Subtarget->hasAtomMinMax64()">; 1380b57cec5SDimitry Andricdef hasVote : Predicate<"Subtarget->hasVote()">; 1390b57cec5SDimitry Andricdef hasDouble : Predicate<"Subtarget->hasDouble()">; 1400b57cec5SDimitry Andricdef hasLDG : Predicate<"Subtarget->hasLDG()">; 1410b57cec5SDimitry Andricdef hasLDU : Predicate<"Subtarget->hasLDU()">; 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andricdef doF32FTZ : Predicate<"useF32FTZ()">; 1440b57cec5SDimitry Andricdef doNoF32FTZ : Predicate<"!useF32FTZ()">; 145*0fca6ea1SDimitry Andricdef doRsqrtOpt : Predicate<"doRsqrtOpt()">; 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andricdef doMulWide : Predicate<"doMulWide">; 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andricdef allowFMA : Predicate<"allowFMA()">; 1500b57cec5SDimitry Andricdef noFMA : Predicate<"!allowFMA()">; 1510b57cec5SDimitry Andricdef allowUnsafeFPMath : Predicate<"allowUnsafeFPMath()">; 152bdd1243dSDimitry Andricdef noUnsafeFPMath : Predicate<"!allowUnsafeFPMath()">; 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andricdef do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">; 1550b57cec5SDimitry Andricdef do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andricdef do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">; 1580b57cec5SDimitry Andricdef do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andricdef hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">; 1610b57cec5SDimitry Andricdef noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; 1620b57cec5SDimitry Andric 163e8d8bef9SDimitry Andricdef True : Predicate<"true">; 164*0fca6ea1SDimitry Andricdef False : Predicate<"false">; 1650b57cec5SDimitry Andric 16606c3fb27SDimitry Andricclass hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>; 16706c3fb27SDimitry Andricclass hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>; 1680b57cec5SDimitry Andric 169297eecfbSDimitry Andric// Explicit records for arch-accelerated SM versions 170297eecfbSDimitry Andricdef hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">; 171297eecfbSDimitry Andric 1728bcb0991SDimitry Andric// non-sync shfl instructions are not available on sm_70+ in PTX6.4+ 1738bcb0991SDimitry Andricdef hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" 1748bcb0991SDimitry Andric "&& Subtarget->getPTXVersion() >= 64)">; 1758bcb0991SDimitry Andric 176*0fca6ea1SDimitry Andricdef useShortPtrLocal : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_LOCAL) == 32">; 177*0fca6ea1SDimitry Andricdef useShortPtrShared : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_SHARED) == 32">; 178*0fca6ea1SDimitry Andricdef useShortPtrConst : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_CONST) == 32">; 179*0fca6ea1SDimitry Andric 1800b57cec5SDimitry Andricdef useFP16Math: Predicate<"Subtarget->allowFP16Math()">; 18106c3fb27SDimitry Andricdef hasBF16Math: Predicate<"Subtarget->hasBF16Math()">; 1820b57cec5SDimitry Andric 183bdd1243dSDimitry Andric// Helper class to aid conversion between ValueType and a matching RegisterClass. 184bdd1243dSDimitry Andric 185bdd1243dSDimitry Andricclass ValueToRegClass<ValueType T> { 186bdd1243dSDimitry Andric string name = !cast<string>(T); 187bdd1243dSDimitry Andric NVPTXRegClass ret = !cond( 188bdd1243dSDimitry Andric !eq(name, "i1"): Int1Regs, 189bdd1243dSDimitry Andric !eq(name, "i16"): Int16Regs, 1905f757f3fSDimitry Andric !eq(name, "v2i16"): Int32Regs, 191bdd1243dSDimitry Andric !eq(name, "i32"): Int32Regs, 192bdd1243dSDimitry Andric !eq(name, "i64"): Int64Regs, 19306c3fb27SDimitry Andric !eq(name, "f16"): Int16Regs, 19406c3fb27SDimitry Andric !eq(name, "v2f16"): Int32Regs, 19506c3fb27SDimitry Andric !eq(name, "bf16"): Int16Regs, 19606c3fb27SDimitry Andric !eq(name, "v2bf16"): Int32Regs, 197bdd1243dSDimitry Andric !eq(name, "f32"): Float32Regs, 198bdd1243dSDimitry Andric !eq(name, "f64"): Float64Regs, 199bdd1243dSDimitry Andric !eq(name, "ai32"): Int32ArgRegs, 200bdd1243dSDimitry Andric !eq(name, "ai64"): Int64ArgRegs, 201bdd1243dSDimitry Andric !eq(name, "af32"): Float32ArgRegs, 202bdd1243dSDimitry Andric !eq(name, "if64"): Float64ArgRegs, 203bdd1243dSDimitry Andric ); 204bdd1243dSDimitry Andric} 205bdd1243dSDimitry Andric 206bdd1243dSDimitry Andric 2070b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 2080b57cec5SDimitry Andric// Some Common Instruction Class Templates 2090b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric// Template for instructions which take three int64, int32, or int16 args. 2120b57cec5SDimitry Andric// The instructions are named "<OpcStr><Width>" (e.g. "add.s64"). 2130b57cec5SDimitry Andricmulticlass I3<string OpcStr, SDNode OpNode> { 2140b57cec5SDimitry Andric def i64rr : 2150b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 2160b57cec5SDimitry Andric !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 2170b57cec5SDimitry Andric [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; 2180b57cec5SDimitry Andric def i64ri : 2190b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 2200b57cec5SDimitry Andric !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 2210b57cec5SDimitry Andric [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 2220b57cec5SDimitry Andric def i32rr : 2230b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 2240b57cec5SDimitry Andric !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 2255f757f3fSDimitry Andric [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>; 2260b57cec5SDimitry Andric def i32ri : 2270b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 2280b57cec5SDimitry Andric !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 2295f757f3fSDimitry Andric [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>; 2300b57cec5SDimitry Andric def i16rr : 2310b57cec5SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 2320b57cec5SDimitry Andric !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 2330b57cec5SDimitry Andric [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; 2340b57cec5SDimitry Andric def i16ri : 2350b57cec5SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 2360b57cec5SDimitry Andric !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 2370b57cec5SDimitry Andric [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; 2380b57cec5SDimitry Andric} 2390b57cec5SDimitry Andric 2405f757f3fSDimitry Andricclass I16x2<string OpcStr, SDNode OpNode> : 2415f757f3fSDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 2425f757f3fSDimitry Andric !strconcat(OpcStr, "16x2 \t$dst, $a, $b;"), 2435f757f3fSDimitry Andric [(set Int32Regs:$dst, (OpNode (v2i16 Int32Regs:$a), (v2i16 Int32Regs:$b)))]>, 2445f757f3fSDimitry Andric Requires<[hasPTX<80>, hasSM<90>]>; 2455f757f3fSDimitry Andric 24681ad6265SDimitry Andric// Template for instructions which take 3 int args. The instructions are 2470b57cec5SDimitry Andric// named "<OpcStr>.s32" (e.g. "addc.cc.s32"). 24881ad6265SDimitry Andricmulticlass ADD_SUB_INT_CARRY<string OpcStr, SDNode OpNode> { 24981ad6265SDimitry Andric let hasSideEffects = 1 in { 2500b57cec5SDimitry Andric def i32rr : 2510b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 2520b57cec5SDimitry Andric !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), 2535f757f3fSDimitry Andric [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>; 2540b57cec5SDimitry Andric def i32ri : 2550b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 2560b57cec5SDimitry Andric !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), 2575f757f3fSDimitry Andric [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>; 25881ad6265SDimitry Andric def i64rr : 25981ad6265SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 26081ad6265SDimitry Andric !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"), 26181ad6265SDimitry Andric [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>, 26206c3fb27SDimitry Andric Requires<[hasPTX<43>]>; 26381ad6265SDimitry Andric def i64ri : 26481ad6265SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 26581ad6265SDimitry Andric !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"), 26681ad6265SDimitry Andric [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>, 26706c3fb27SDimitry Andric Requires<[hasPTX<43>]>; 26881ad6265SDimitry Andric } 2690b57cec5SDimitry Andric} 2700b57cec5SDimitry Andric 2710b57cec5SDimitry Andric// Template for instructions which take three fp64 or fp32 args. The 2720b57cec5SDimitry Andric// instructions are named "<OpcStr>.f<Width>" (e.g. "min.f64"). 2730b57cec5SDimitry Andric// 2740b57cec5SDimitry Andric// Also defines ftz (flush subnormal inputs and results to sign-preserving 2750b57cec5SDimitry Andric// zero) variants for fp32 functions. 2760b57cec5SDimitry Andric// 2770b57cec5SDimitry Andric// This multiclass should be used for nodes that cannot be folded into FMAs. 2780b57cec5SDimitry Andric// For nodes that can be folded into FMAs (i.e. adds and muls), use 2790b57cec5SDimitry Andric// F3_fma_component. 2800b57cec5SDimitry Andricmulticlass F3<string OpcStr, SDNode OpNode> { 2810b57cec5SDimitry Andric def f64rr : 2820b57cec5SDimitry Andric NVPTXInst<(outs Float64Regs:$dst), 2830b57cec5SDimitry Andric (ins Float64Regs:$a, Float64Regs:$b), 2840b57cec5SDimitry Andric !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), 2850b57cec5SDimitry Andric [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; 2860b57cec5SDimitry Andric def f64ri : 2870b57cec5SDimitry Andric NVPTXInst<(outs Float64Regs:$dst), 2880b57cec5SDimitry Andric (ins Float64Regs:$a, f64imm:$b), 2890b57cec5SDimitry Andric !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), 2900b57cec5SDimitry Andric [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; 2910b57cec5SDimitry Andric def f32rr_ftz : 2920b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 2930b57cec5SDimitry Andric (ins Float32Regs:$a, Float32Regs:$b), 2940b57cec5SDimitry Andric !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 2950b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, 2960b57cec5SDimitry Andric Requires<[doF32FTZ]>; 2970b57cec5SDimitry Andric def f32ri_ftz : 2980b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 2990b57cec5SDimitry Andric (ins Float32Regs:$a, f32imm:$b), 3000b57cec5SDimitry Andric !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 3010b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, 3020b57cec5SDimitry Andric Requires<[doF32FTZ]>; 3030b57cec5SDimitry Andric def f32rr : 3040b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 3050b57cec5SDimitry Andric (ins Float32Regs:$a, Float32Regs:$b), 3060b57cec5SDimitry Andric !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), 3070b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; 3080b57cec5SDimitry Andric def f32ri : 3090b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 3100b57cec5SDimitry Andric (ins Float32Regs:$a, f32imm:$b), 3110b57cec5SDimitry Andric !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), 3120b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; 31304eeddc0SDimitry Andric 31404eeddc0SDimitry Andric def f16rr_ftz : 31506c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 31606c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b), 31704eeddc0SDimitry Andric !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"), 31806c3fb27SDimitry Andric [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, 31904eeddc0SDimitry Andric Requires<[useFP16Math, doF32FTZ]>; 32004eeddc0SDimitry Andric def f16rr : 32106c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 32206c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b), 32304eeddc0SDimitry Andric !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"), 32406c3fb27SDimitry Andric [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, 32504eeddc0SDimitry Andric Requires<[useFP16Math]>; 32604eeddc0SDimitry Andric 32704eeddc0SDimitry Andric def f16x2rr_ftz : 32806c3fb27SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 32906c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b), 33004eeddc0SDimitry Andric !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"), 33106c3fb27SDimitry Andric [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, 33204eeddc0SDimitry Andric Requires<[useFP16Math, doF32FTZ]>; 33304eeddc0SDimitry Andric def f16x2rr : 33406c3fb27SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 33506c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b), 33604eeddc0SDimitry Andric !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"), 33706c3fb27SDimitry Andric [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, 33804eeddc0SDimitry Andric Requires<[useFP16Math]>; 33906c3fb27SDimitry Andric def bf16rr_ftz : 34006c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 34106c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b), 34206c3fb27SDimitry Andric !strconcat(OpcStr, ".ftz.bf16 \t$dst, $a, $b;"), 34306c3fb27SDimitry Andric [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>, 34406c3fb27SDimitry Andric Requires<[hasBF16Math, doF32FTZ]>; 34506c3fb27SDimitry Andric def bf16rr : 34606c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 34706c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b), 34806c3fb27SDimitry Andric !strconcat(OpcStr, ".bf16 \t$dst, $a, $b;"), 34906c3fb27SDimitry Andric [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>, 35006c3fb27SDimitry Andric Requires<[hasBF16Math]>; 35106c3fb27SDimitry Andric 35206c3fb27SDimitry Andric def bf16x2rr_ftz : 35306c3fb27SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 35406c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b), 35506c3fb27SDimitry Andric !strconcat(OpcStr, ".ftz.bf16x2 \t$dst, $a, $b;"), 35606c3fb27SDimitry Andric [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>, 35706c3fb27SDimitry Andric Requires<[hasBF16Math, doF32FTZ]>; 35806c3fb27SDimitry Andric def bf16x2rr : 35906c3fb27SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 36006c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b), 36106c3fb27SDimitry Andric !strconcat(OpcStr, ".bf16x2 \t$dst, $a, $b;"), 36206c3fb27SDimitry Andric [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>, 36306c3fb27SDimitry Andric Requires<[hasBF16Math]>; 3640b57cec5SDimitry Andric} 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric// Template for instructions which take three FP args. The 3670b57cec5SDimitry Andric// instructions are named "<OpcStr>.f<Width>" (e.g. "add.f64"). 3680b57cec5SDimitry Andric// 3690b57cec5SDimitry Andric// Also defines ftz (flush subnormal inputs and results to sign-preserving 3700b57cec5SDimitry Andric// zero) variants for fp32/fp16 functions. 3710b57cec5SDimitry Andric// 3720b57cec5SDimitry Andric// This multiclass should be used for nodes that can be folded to make fma ops. 3730b57cec5SDimitry Andric// In this case, we use the ".rn" variant when FMA is disabled, as this behaves 3740b57cec5SDimitry Andric// just like the non ".rn" op, but prevents ptxas from creating FMAs. 3750b57cec5SDimitry Andricmulticlass F3_fma_component<string OpcStr, SDNode OpNode> { 3760b57cec5SDimitry Andric def f64rr : 3770b57cec5SDimitry Andric NVPTXInst<(outs Float64Regs:$dst), 3780b57cec5SDimitry Andric (ins Float64Regs:$a, Float64Regs:$b), 3790b57cec5SDimitry Andric !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), 3800b57cec5SDimitry Andric [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>, 3810b57cec5SDimitry Andric Requires<[allowFMA]>; 3820b57cec5SDimitry Andric def f64ri : 3830b57cec5SDimitry Andric NVPTXInst<(outs Float64Regs:$dst), 3840b57cec5SDimitry Andric (ins Float64Regs:$a, f64imm:$b), 3850b57cec5SDimitry Andric !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), 3860b57cec5SDimitry Andric [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>, 3870b57cec5SDimitry Andric Requires<[allowFMA]>; 3880b57cec5SDimitry Andric def f32rr_ftz : 3890b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 3900b57cec5SDimitry Andric (ins Float32Regs:$a, Float32Regs:$b), 3910b57cec5SDimitry Andric !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 3920b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, 3930b57cec5SDimitry Andric Requires<[allowFMA, doF32FTZ]>; 3940b57cec5SDimitry Andric def f32ri_ftz : 3950b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 3960b57cec5SDimitry Andric (ins Float32Regs:$a, f32imm:$b), 3970b57cec5SDimitry Andric !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 3980b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, 3990b57cec5SDimitry Andric Requires<[allowFMA, doF32FTZ]>; 4000b57cec5SDimitry Andric def f32rr : 4010b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 4020b57cec5SDimitry Andric (ins Float32Regs:$a, Float32Regs:$b), 4030b57cec5SDimitry Andric !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), 4040b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, 4050b57cec5SDimitry Andric Requires<[allowFMA]>; 4060b57cec5SDimitry Andric def f32ri : 4070b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 4080b57cec5SDimitry Andric (ins Float32Regs:$a, f32imm:$b), 4090b57cec5SDimitry Andric !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), 4100b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, 4110b57cec5SDimitry Andric Requires<[allowFMA]>; 4120b57cec5SDimitry Andric 4130b57cec5SDimitry Andric def f16rr_ftz : 41406c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 41506c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b), 4160b57cec5SDimitry Andric !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"), 41706c3fb27SDimitry Andric [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, 4180b57cec5SDimitry Andric Requires<[useFP16Math, allowFMA, doF32FTZ]>; 4190b57cec5SDimitry Andric def f16rr : 42006c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 42106c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b), 4220b57cec5SDimitry Andric !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"), 42306c3fb27SDimitry Andric [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, 4240b57cec5SDimitry Andric Requires<[useFP16Math, allowFMA]>; 4250b57cec5SDimitry Andric 4260b57cec5SDimitry Andric def f16x2rr_ftz : 42706c3fb27SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 42806c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b), 4290b57cec5SDimitry Andric !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"), 43006c3fb27SDimitry Andric [(set (v2f16 Int32Regs:$dst), (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, 4310b57cec5SDimitry Andric Requires<[useFP16Math, allowFMA, doF32FTZ]>; 4320b57cec5SDimitry Andric def f16x2rr : 43306c3fb27SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 43406c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b), 4350b57cec5SDimitry Andric !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"), 43606c3fb27SDimitry Andric [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, 4370b57cec5SDimitry Andric Requires<[useFP16Math, allowFMA]>; 43806c3fb27SDimitry Andric def bf16rr_ftz : 43906c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 44006c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b), 44106c3fb27SDimitry Andric !strconcat(OpcStr, ".ftz.bf16 \t$dst, $a, $b;"), 44206c3fb27SDimitry Andric [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>, 44306c3fb27SDimitry Andric Requires<[hasBF16Math, allowFMA, doF32FTZ]>; 44406c3fb27SDimitry Andric def bf16rr : 44506c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 44606c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b), 44706c3fb27SDimitry Andric !strconcat(OpcStr, ".bf16 \t$dst, $a, $b;"), 44806c3fb27SDimitry Andric [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>, 44906c3fb27SDimitry Andric Requires<[hasBF16Math, allowFMA]>; 4500b57cec5SDimitry Andric 45106c3fb27SDimitry Andric def bf16x2rr_ftz : 45206c3fb27SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 45306c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b), 45406c3fb27SDimitry Andric !strconcat(OpcStr, ".ftz.bf16x2 \t$dst, $a, $b;"), 45506c3fb27SDimitry Andric [(set (v2bf16 Int32Regs:$dst), (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>, 45606c3fb27SDimitry Andric Requires<[hasBF16Math, allowFMA, doF32FTZ]>; 45706c3fb27SDimitry Andric def bf16x2rr : 45806c3fb27SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 45906c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b), 46006c3fb27SDimitry Andric !strconcat(OpcStr, ".bf16x2 \t$dst, $a, $b;"), 46106c3fb27SDimitry Andric [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>, 46206c3fb27SDimitry Andric Requires<[hasBF16Math, allowFMA]>; 4630b57cec5SDimitry Andric // These have strange names so we don't perturb existing mir tests. 4640b57cec5SDimitry Andric def _rnf64rr : 4650b57cec5SDimitry Andric NVPTXInst<(outs Float64Regs:$dst), 4660b57cec5SDimitry Andric (ins Float64Regs:$a, Float64Regs:$b), 4670b57cec5SDimitry Andric !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), 4680b57cec5SDimitry Andric [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>, 4690b57cec5SDimitry Andric Requires<[noFMA]>; 4700b57cec5SDimitry Andric def _rnf64ri : 4710b57cec5SDimitry Andric NVPTXInst<(outs Float64Regs:$dst), 4720b57cec5SDimitry Andric (ins Float64Regs:$a, f64imm:$b), 4730b57cec5SDimitry Andric !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), 4740b57cec5SDimitry Andric [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>, 4750b57cec5SDimitry Andric Requires<[noFMA]>; 4760b57cec5SDimitry Andric def _rnf32rr_ftz : 4770b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 4780b57cec5SDimitry Andric (ins Float32Regs:$a, Float32Regs:$b), 4790b57cec5SDimitry Andric !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), 4800b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, 4810b57cec5SDimitry Andric Requires<[noFMA, doF32FTZ]>; 4820b57cec5SDimitry Andric def _rnf32ri_ftz : 4830b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 4840b57cec5SDimitry Andric (ins Float32Regs:$a, f32imm:$b), 4850b57cec5SDimitry Andric !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), 4860b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, 4870b57cec5SDimitry Andric Requires<[noFMA, doF32FTZ]>; 4880b57cec5SDimitry Andric def _rnf32rr : 4890b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 4900b57cec5SDimitry Andric (ins Float32Regs:$a, Float32Regs:$b), 4910b57cec5SDimitry Andric !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), 4920b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, 4930b57cec5SDimitry Andric Requires<[noFMA]>; 4940b57cec5SDimitry Andric def _rnf32ri : 4950b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 4960b57cec5SDimitry Andric (ins Float32Regs:$a, f32imm:$b), 4970b57cec5SDimitry Andric !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), 4980b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, 4990b57cec5SDimitry Andric Requires<[noFMA]>; 5000b57cec5SDimitry Andric def _rnf16rr_ftz : 50106c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 50206c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b), 5030b57cec5SDimitry Andric !strconcat(OpcStr, ".rn.ftz.f16 \t$dst, $a, $b;"), 50406c3fb27SDimitry Andric [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, 5050b57cec5SDimitry Andric Requires<[useFP16Math, noFMA, doF32FTZ]>; 5060b57cec5SDimitry Andric def _rnf16rr : 50706c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 50806c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b), 5090b57cec5SDimitry Andric !strconcat(OpcStr, ".rn.f16 \t$dst, $a, $b;"), 51006c3fb27SDimitry Andric [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, 5110b57cec5SDimitry Andric Requires<[useFP16Math, noFMA]>; 5120b57cec5SDimitry Andric def _rnf16x2rr_ftz : 51306c3fb27SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 51406c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b), 5150b57cec5SDimitry Andric !strconcat(OpcStr, ".rn.ftz.f16x2 \t$dst, $a, $b;"), 51606c3fb27SDimitry Andric [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, 5170b57cec5SDimitry Andric Requires<[useFP16Math, noFMA, doF32FTZ]>; 5180b57cec5SDimitry Andric def _rnf16x2rr : 51906c3fb27SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 52006c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b), 5210b57cec5SDimitry Andric !strconcat(OpcStr, ".rn.f16x2 \t$dst, $a, $b;"), 52206c3fb27SDimitry Andric [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, 5230b57cec5SDimitry Andric Requires<[useFP16Math, noFMA]>; 52406c3fb27SDimitry Andric def _rnbf16rr_ftz : 52506c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 52606c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b), 52706c3fb27SDimitry Andric !strconcat(OpcStr, ".rn.ftz.bf16 \t$dst, $a, $b;"), 52806c3fb27SDimitry Andric [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>, 52906c3fb27SDimitry Andric Requires<[hasBF16Math, noFMA, doF32FTZ]>; 53006c3fb27SDimitry Andric def _rnbf16rr : 53106c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 53206c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b), 53306c3fb27SDimitry Andric !strconcat(OpcStr, ".rn.bf16 \t$dst, $a, $b;"), 53406c3fb27SDimitry Andric [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>, 53506c3fb27SDimitry Andric Requires<[hasBF16Math, noFMA]>; 53606c3fb27SDimitry Andric def _rnbf16x2rr_ftz : 53706c3fb27SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 53806c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b), 53906c3fb27SDimitry Andric !strconcat(OpcStr, ".rn.ftz.bf16x2 \t$dst, $a, $b;"), 54006c3fb27SDimitry Andric [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>, 54106c3fb27SDimitry Andric Requires<[hasBF16Math, noFMA, doF32FTZ]>; 54206c3fb27SDimitry Andric def _rnbf16x2rr : 54306c3fb27SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 54406c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b), 54506c3fb27SDimitry Andric !strconcat(OpcStr, ".rn.bf16x2 \t$dst, $a, $b;"), 54606c3fb27SDimitry Andric [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>, 54706c3fb27SDimitry Andric Requires<[hasBF16Math, noFMA]>; 5480b57cec5SDimitry Andric} 5490b57cec5SDimitry Andric 5500b57cec5SDimitry Andric// Template for operations which take two f32 or f64 operands. Provides three 5510b57cec5SDimitry Andric// instructions: <OpcStr>.f64, <OpcStr>.f32, and <OpcStr>.ftz.f32 (flush 5520b57cec5SDimitry Andric// subnormal inputs and results to zero). 5530b57cec5SDimitry Andricmulticlass F2<string OpcStr, SDNode OpNode> { 5540b57cec5SDimitry Andric def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), 5550b57cec5SDimitry Andric !strconcat(OpcStr, ".f64 \t$dst, $a;"), 5560b57cec5SDimitry Andric [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>; 5570b57cec5SDimitry Andric def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), 5580b57cec5SDimitry Andric !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"), 5590b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>, 5600b57cec5SDimitry Andric Requires<[doF32FTZ]>; 5610b57cec5SDimitry Andric def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), 5620b57cec5SDimitry Andric !strconcat(OpcStr, ".f32 \t$dst, $a;"), 5630b57cec5SDimitry Andric [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>; 5640b57cec5SDimitry Andric} 5650b57cec5SDimitry Andric 5665f757f3fSDimitry Andricmulticlass F2_Support_Half<string OpcStr, SDNode OpNode> { 5675f757f3fSDimitry Andric def bf16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a), 5685f757f3fSDimitry Andric !strconcat(OpcStr, ".bf16 \t$dst, $a;"), 5695f757f3fSDimitry Andric [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a)))]>, 5705f757f3fSDimitry Andric Requires<[hasSM<80>, hasPTX<70>]>; 5715f757f3fSDimitry Andric def bf16x2 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), 5725f757f3fSDimitry Andric !strconcat(OpcStr, ".bf16x2 \t$dst, $a;"), 5735f757f3fSDimitry Andric [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a)))]>, 5745f757f3fSDimitry Andric Requires<[hasSM<80>, hasPTX<70>]>; 5755f757f3fSDimitry Andric def f16_ftz : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a), 5765f757f3fSDimitry Andric !strconcat(OpcStr, ".ftz.f16 \t$dst, $a;"), 5775f757f3fSDimitry Andric [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a)))]>, 5785f757f3fSDimitry Andric Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>; 5795f757f3fSDimitry Andric def f16x2_ftz : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), 5805f757f3fSDimitry Andric !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a;"), 5815f757f3fSDimitry Andric [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a)))]>, 5825f757f3fSDimitry Andric Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>; 5835f757f3fSDimitry Andric def f16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a), 5845f757f3fSDimitry Andric !strconcat(OpcStr, ".f16 \t$dst, $a;"), 5855f757f3fSDimitry Andric [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a)))]>, 5865f757f3fSDimitry Andric Requires<[hasSM<53>, hasPTX<65>]>; 5875f757f3fSDimitry Andric def f16x2 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), 5885f757f3fSDimitry Andric !strconcat(OpcStr, ".f16x2 \t$dst, $a;"), 5895f757f3fSDimitry Andric [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a)))]>, 5905f757f3fSDimitry Andric Requires<[hasSM<53>, hasPTX<65>]>; 5915f757f3fSDimitry Andric 5925f757f3fSDimitry Andric} 5935f757f3fSDimitry Andric 5940b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 5950b57cec5SDimitry Andric// NVPTX Instructions. 5960b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric//----------------------------------- 5990b57cec5SDimitry Andric// Type Conversion 6000b57cec5SDimitry Andric//----------------------------------- 6010b57cec5SDimitry Andric 602e8d8bef9SDimitry Andriclet hasSideEffects = false in { 6030b57cec5SDimitry Andric // Generate a cvt to the given type from all possible types. Each instance 6040b57cec5SDimitry Andric // takes a CvtMode immediate that defines the conversion mode to use. It can 6050b57cec5SDimitry Andric // be CvtNONE to omit a conversion mode. 60606c3fb27SDimitry Andric multiclass CVT_FROM_ALL<string ToType, RegisterClass RC, list<Predicate> Preds = []> { 6070b57cec5SDimitry Andric def _s8 : 6080b57cec5SDimitry Andric NVPTXInst<(outs RC:$dst), 6090b57cec5SDimitry Andric (ins Int16Regs:$src, CvtMode:$mode), 6100b57cec5SDimitry Andric !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 61106c3fb27SDimitry Andric ToType, ".s8 \t$dst, $src;"), []>, 61206c3fb27SDimitry Andric Requires<Preds>; 6130b57cec5SDimitry Andric def _u8 : 6140b57cec5SDimitry Andric NVPTXInst<(outs RC:$dst), 6150b57cec5SDimitry Andric (ins Int16Regs:$src, CvtMode:$mode), 6160b57cec5SDimitry Andric !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 61706c3fb27SDimitry Andric ToType, ".u8 \t$dst, $src;"), []>, 61806c3fb27SDimitry Andric Requires<Preds>; 6190b57cec5SDimitry Andric def _s16 : 6200b57cec5SDimitry Andric NVPTXInst<(outs RC:$dst), 6210b57cec5SDimitry Andric (ins Int16Regs:$src, CvtMode:$mode), 6220b57cec5SDimitry Andric !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 62306c3fb27SDimitry Andric ToType, ".s16 \t$dst, $src;"), []>, 62406c3fb27SDimitry Andric Requires<Preds>; 6250b57cec5SDimitry Andric def _u16 : 6260b57cec5SDimitry Andric NVPTXInst<(outs RC:$dst), 6270b57cec5SDimitry Andric (ins Int16Regs:$src, CvtMode:$mode), 6280b57cec5SDimitry Andric !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 62906c3fb27SDimitry Andric ToType, ".u16 \t$dst, $src;"), []>, 63006c3fb27SDimitry Andric Requires<Preds>; 6310b57cec5SDimitry Andric def _s32 : 6320b57cec5SDimitry Andric NVPTXInst<(outs RC:$dst), 6330b57cec5SDimitry Andric (ins Int32Regs:$src, CvtMode:$mode), 6340b57cec5SDimitry Andric !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 63506c3fb27SDimitry Andric ToType, ".s32 \t$dst, $src;"), []>, 63606c3fb27SDimitry Andric Requires<Preds>; 6370b57cec5SDimitry Andric def _u32 : 6380b57cec5SDimitry Andric NVPTXInst<(outs RC:$dst), 6390b57cec5SDimitry Andric (ins Int32Regs:$src, CvtMode:$mode), 6400b57cec5SDimitry Andric !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 64106c3fb27SDimitry Andric ToType, ".u32 \t$dst, $src;"), []>, 64206c3fb27SDimitry Andric Requires<Preds>; 6430b57cec5SDimitry Andric def _s64 : 6440b57cec5SDimitry Andric NVPTXInst<(outs RC:$dst), 6450b57cec5SDimitry Andric (ins Int64Regs:$src, CvtMode:$mode), 6460b57cec5SDimitry Andric !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 64706c3fb27SDimitry Andric ToType, ".s64 \t$dst, $src;"), []>, 64806c3fb27SDimitry Andric Requires<Preds>; 6490b57cec5SDimitry Andric def _u64 : 6500b57cec5SDimitry Andric NVPTXInst<(outs RC:$dst), 6510b57cec5SDimitry Andric (ins Int64Regs:$src, CvtMode:$mode), 6520b57cec5SDimitry Andric !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 65306c3fb27SDimitry Andric ToType, ".u64 \t$dst, $src;"), []>, 65406c3fb27SDimitry Andric Requires<Preds>; 6550b57cec5SDimitry Andric def _f16 : 6560b57cec5SDimitry Andric NVPTXInst<(outs RC:$dst), 65706c3fb27SDimitry Andric (ins Int16Regs:$src, CvtMode:$mode), 6580b57cec5SDimitry Andric !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 65906c3fb27SDimitry Andric ToType, ".f16 \t$dst, $src;"), []>, 66006c3fb27SDimitry Andric Requires<Preds>; 66106c3fb27SDimitry Andric def _bf16 : 66206c3fb27SDimitry Andric NVPTXInst<(outs RC:$dst), 66306c3fb27SDimitry Andric (ins Int16Regs:$src, CvtMode:$mode), 66406c3fb27SDimitry Andric !strconcat("cvt${mode:base}${mode:ftz}${mode:relu}${mode:sat}.", 66506c3fb27SDimitry Andric ToType, ".bf16 \t$dst, $src;"), []>, 66606c3fb27SDimitry Andric Requires<!if(!eq(ToType, "f32"), 66706c3fb27SDimitry Andric // bf16->f32 was introduced early. 66806c3fb27SDimitry Andric [hasPTX<71>, hasSM<80>], 66906c3fb27SDimitry Andric // bf16->everything else needs sm90/ptx78 67006c3fb27SDimitry Andric [hasPTX<78>, hasSM<90>])>; 6710b57cec5SDimitry Andric def _f32 : 6720b57cec5SDimitry Andric NVPTXInst<(outs RC:$dst), 6730b57cec5SDimitry Andric (ins Float32Regs:$src, CvtMode:$mode), 67406c3fb27SDimitry Andric !strconcat("cvt${mode:base}${mode:ftz}${mode:relu}${mode:sat}.", 67506c3fb27SDimitry Andric ToType, ".f32 \t$dst, $src;"), []>, 67606c3fb27SDimitry Andric Requires<!if(!eq(ToType, "bf16"), 67706c3fb27SDimitry Andric // f32->bf16 was introduced early. 67806c3fb27SDimitry Andric [hasPTX<70>, hasSM<80>], 67906c3fb27SDimitry Andric Preds)>; 6800b57cec5SDimitry Andric def _f64 : 6810b57cec5SDimitry Andric NVPTXInst<(outs RC:$dst), 6820b57cec5SDimitry Andric (ins Float64Regs:$src, CvtMode:$mode), 6830b57cec5SDimitry Andric !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 68406c3fb27SDimitry Andric ToType, ".f64 \t$dst, $src;"), []>, 68506c3fb27SDimitry Andric Requires<Preds>; 6860b57cec5SDimitry Andric } 6870b57cec5SDimitry Andric 6880b57cec5SDimitry Andric // Generate cvts from all types to all types. 6890b57cec5SDimitry Andric defm CVT_s8 : CVT_FROM_ALL<"s8", Int16Regs>; 6900b57cec5SDimitry Andric defm CVT_u8 : CVT_FROM_ALL<"u8", Int16Regs>; 6910b57cec5SDimitry Andric defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>; 6920b57cec5SDimitry Andric defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>; 6930b57cec5SDimitry Andric defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>; 6940b57cec5SDimitry Andric defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; 6950b57cec5SDimitry Andric defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; 6960b57cec5SDimitry Andric defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; 69706c3fb27SDimitry Andric defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>; 69806c3fb27SDimitry Andric defm CVT_bf16 : CVT_FROM_ALL<"bf16", Int16Regs, [hasPTX<78>, hasSM<90>]>; 6990b57cec5SDimitry Andric defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; 7000b57cec5SDimitry Andric defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; 7010b57cec5SDimitry Andric 7020b57cec5SDimitry Andric // These cvts are different from those above: The source and dest registers 7030b57cec5SDimitry Andric // are of the same type. 7040b57cec5SDimitry Andric def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 7050b57cec5SDimitry Andric "cvt.s16.s8 \t$dst, $src;", []>; 7060b57cec5SDimitry Andric def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 7070b57cec5SDimitry Andric "cvt.s32.s8 \t$dst, $src;", []>; 7080b57cec5SDimitry Andric def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 7090b57cec5SDimitry Andric "cvt.s32.s16 \t$dst, $src;", []>; 7100b57cec5SDimitry Andric def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 7110b57cec5SDimitry Andric "cvt.s64.s8 \t$dst, $src;", []>; 7120b57cec5SDimitry Andric def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 7130b57cec5SDimitry Andric "cvt.s64.s16 \t$dst, $src;", []>; 7140b57cec5SDimitry Andric def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 7150b57cec5SDimitry Andric "cvt.s64.s32 \t$dst, $src;", []>; 71604eeddc0SDimitry Andric 71704eeddc0SDimitry Andric multiclass CVT_FROM_FLOAT_V2_SM80<string FromName, RegisterClass RC> { 71804eeddc0SDimitry Andric def _f32 : 71904eeddc0SDimitry Andric NVPTXInst<(outs RC:$dst), 72004eeddc0SDimitry Andric (ins Float32Regs:$src1, Float32Regs:$src2, CvtMode:$mode), 72104eeddc0SDimitry Andric !strconcat("cvt${mode:base}${mode:relu}.", 72204eeddc0SDimitry Andric FromName, ".f32 \t$dst, $src1, $src2;"), []>, 72306c3fb27SDimitry Andric Requires<[hasPTX<70>, hasSM<80>]>; 72404eeddc0SDimitry Andric } 72504eeddc0SDimitry Andric 72606c3fb27SDimitry Andric defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", Int32Regs>; 72704eeddc0SDimitry Andric defm CVT_bf16x2 : CVT_FROM_FLOAT_V2_SM80<"bf16x2", Int32Regs>; 7280b57cec5SDimitry Andric} 7290b57cec5SDimitry Andric 7300b57cec5SDimitry Andric//----------------------------------- 731bdd1243dSDimitry Andric// Selection instructions (selp) 732bdd1243dSDimitry Andric//----------------------------------- 733bdd1243dSDimitry Andric 734bdd1243dSDimitry Andric// TODO: Missing slct 735bdd1243dSDimitry Andric 736bdd1243dSDimitry Andric// selp instructions that don't have any pattern matches; we explicitly use 737bdd1243dSDimitry Andric// them within this file. 738bdd1243dSDimitry Andriclet hasSideEffects = false in { 739bdd1243dSDimitry Andric multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> { 740bdd1243dSDimitry Andric def rr : NVPTXInst<(outs RC:$dst), 741bdd1243dSDimitry Andric (ins RC:$a, RC:$b, Int1Regs:$p), 742bdd1243dSDimitry Andric !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>; 743bdd1243dSDimitry Andric def ri : NVPTXInst<(outs RC:$dst), 744bdd1243dSDimitry Andric (ins RC:$a, ImmCls:$b, Int1Regs:$p), 745bdd1243dSDimitry Andric !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>; 746bdd1243dSDimitry Andric def ir : NVPTXInst<(outs RC:$dst), 747bdd1243dSDimitry Andric (ins ImmCls:$a, RC:$b, Int1Regs:$p), 748bdd1243dSDimitry Andric !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>; 749bdd1243dSDimitry Andric def ii : NVPTXInst<(outs RC:$dst), 750bdd1243dSDimitry Andric (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), 751bdd1243dSDimitry Andric !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>; 752bdd1243dSDimitry Andric } 753bdd1243dSDimitry Andric 754bdd1243dSDimitry Andric multiclass SELP_PATTERN<string TypeStr, ValueType T, RegisterClass RC, 755bdd1243dSDimitry Andric Operand ImmCls, SDNode ImmNode> { 756bdd1243dSDimitry Andric def rr : 757bdd1243dSDimitry Andric NVPTXInst<(outs RC:$dst), 758bdd1243dSDimitry Andric (ins RC:$a, RC:$b, Int1Regs:$p), 759bdd1243dSDimitry Andric !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), 760bdd1243dSDimitry Andric [(set (T RC:$dst), (select Int1Regs:$p, (T RC:$a), (T RC:$b)))]>; 761bdd1243dSDimitry Andric def ri : 762bdd1243dSDimitry Andric NVPTXInst<(outs RC:$dst), 763bdd1243dSDimitry Andric (ins RC:$a, ImmCls:$b, Int1Regs:$p), 764bdd1243dSDimitry Andric !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), 765bdd1243dSDimitry Andric [(set (T RC:$dst), (select Int1Regs:$p, (T RC:$a), (T ImmNode:$b)))]>; 766bdd1243dSDimitry Andric def ir : 767bdd1243dSDimitry Andric NVPTXInst<(outs RC:$dst), 768bdd1243dSDimitry Andric (ins ImmCls:$a, RC:$b, Int1Regs:$p), 769bdd1243dSDimitry Andric !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), 770bdd1243dSDimitry Andric [(set (T RC:$dst), (select Int1Regs:$p, ImmNode:$a, (T RC:$b)))]>; 771bdd1243dSDimitry Andric def ii : 772bdd1243dSDimitry Andric NVPTXInst<(outs RC:$dst), 773bdd1243dSDimitry Andric (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), 774bdd1243dSDimitry Andric !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), 775bdd1243dSDimitry Andric [(set (T RC:$dst), (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>; 776bdd1243dSDimitry Andric } 777bdd1243dSDimitry Andric} 778bdd1243dSDimitry Andric 779bdd1243dSDimitry Andric// Don't pattern match on selp.{s,u}{16,32,64} -- selp.b{16,32,64} is just as 780bdd1243dSDimitry Andric// good. 781bdd1243dSDimitry Andricdefm SELP_b16 : SELP_PATTERN<"b16", i16, Int16Regs, i16imm, imm>; 782bdd1243dSDimitry Andricdefm SELP_s16 : SELP<"s16", Int16Regs, i16imm>; 783bdd1243dSDimitry Andricdefm SELP_u16 : SELP<"u16", Int16Regs, i16imm>; 784bdd1243dSDimitry Andricdefm SELP_b32 : SELP_PATTERN<"b32", i32, Int32Regs, i32imm, imm>; 785bdd1243dSDimitry Andricdefm SELP_s32 : SELP<"s32", Int32Regs, i32imm>; 786bdd1243dSDimitry Andricdefm SELP_u32 : SELP<"u32", Int32Regs, i32imm>; 787bdd1243dSDimitry Andricdefm SELP_b64 : SELP_PATTERN<"b64", i64, Int64Regs, i64imm, imm>; 788bdd1243dSDimitry Andricdefm SELP_s64 : SELP<"s64", Int64Regs, i64imm>; 789bdd1243dSDimitry Andricdefm SELP_u64 : SELP<"u64", Int64Regs, i64imm>; 79006c3fb27SDimitry Andricdefm SELP_f16 : SELP_PATTERN<"b16", f16, Int16Regs, f16imm, fpimm>; 79106c3fb27SDimitry Andricdefm SELP_bf16 : SELP_PATTERN<"b16", bf16, Int16Regs, bf16imm, fpimm>; 792bdd1243dSDimitry Andric 793bdd1243dSDimitry Andricdefm SELP_f32 : SELP_PATTERN<"f32", f32, Float32Regs, f32imm, fpimm>; 794bdd1243dSDimitry Andricdefm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>; 795bdd1243dSDimitry Andric 796bdd1243dSDimitry Andric// This does not work as tablegen fails to infer the type of 'imm'. 797bdd1243dSDimitry Andric// def v2f16imm : Operand<v2f16>; 79806c3fb27SDimitry Andric// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Int32Regs, v2f16imm, imm>; 799bdd1243dSDimitry Andric 8005f757f3fSDimitry Andricforeach vt = [v2f16, v2bf16, v2i16, v4i8] in { 8015f757f3fSDimitry Andricdef : Pat<(vt (select Int1Regs:$p, (vt Int32Regs:$a), (vt Int32Regs:$b))), 8025f757f3fSDimitry Andric (SELP_b32rr Int32Regs:$a, Int32Regs:$b, Int1Regs:$p)>; 8035f757f3fSDimitry Andric} 804bdd1243dSDimitry Andric 805bdd1243dSDimitry Andric//----------------------------------- 806bdd1243dSDimitry Andric// Test Instructions 807bdd1243dSDimitry Andric//----------------------------------- 808bdd1243dSDimitry Andric 809bdd1243dSDimitry Andricdef TESTINF_f32r : NVPTXInst<(outs Int1Regs:$p), (ins Float32Regs:$a), 810bdd1243dSDimitry Andric "testp.infinite.f32 \t$p, $a;", 811bdd1243dSDimitry Andric []>; 812bdd1243dSDimitry Andricdef TESTINF_f32i : NVPTXInst<(outs Int1Regs:$p), (ins f32imm:$a), 813bdd1243dSDimitry Andric "testp.infinite.f32 \t$p, $a;", 814bdd1243dSDimitry Andric []>; 815bdd1243dSDimitry Andricdef TESTINF_f64r : NVPTXInst<(outs Int1Regs:$p), (ins Float64Regs:$a), 816bdd1243dSDimitry Andric "testp.infinite.f64 \t$p, $a;", 817bdd1243dSDimitry Andric []>; 818bdd1243dSDimitry Andricdef TESTINF_f64i : NVPTXInst<(outs Int1Regs:$p), (ins f64imm:$a), 819bdd1243dSDimitry Andric "testp.infinite.f64 \t$p, $a;", 820bdd1243dSDimitry Andric []>; 821bdd1243dSDimitry Andric 822bdd1243dSDimitry Andric//----------------------------------- 8230b57cec5SDimitry Andric// Integer Arithmetic 8240b57cec5SDimitry Andric//----------------------------------- 8250b57cec5SDimitry Andric 8260b57cec5SDimitry Andric// Template for xor masquerading as int1 arithmetic. 8270b57cec5SDimitry Andricmulticlass ADD_SUB_i1<SDNode OpNode> { 8280b57cec5SDimitry Andric def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), 8290b57cec5SDimitry Andric "xor.pred \t$dst, $a, $b;", 8300b57cec5SDimitry Andric [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; 8310b57cec5SDimitry Andric def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), 8320b57cec5SDimitry Andric "xor.pred \t$dst, $a, $b;", 8330b57cec5SDimitry Andric [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>; 8340b57cec5SDimitry Andric} 8350b57cec5SDimitry Andric 8360b57cec5SDimitry Andric// int1 addition and subtraction are both just xor. 8370b57cec5SDimitry Andricdefm ADD_i1 : ADD_SUB_i1<add>; 8380b57cec5SDimitry Andricdefm SUB_i1 : ADD_SUB_i1<sub>; 8390b57cec5SDimitry Andric 8400b57cec5SDimitry Andric// int16, int32, and int64 signed addition. Since nvptx is 2's complement, we 8410b57cec5SDimitry Andric// also use these for unsigned arithmetic. 8420b57cec5SDimitry Andricdefm ADD : I3<"add.s", add>; 8430b57cec5SDimitry Andricdefm SUB : I3<"sub.s", sub>; 8440b57cec5SDimitry Andric 8455f757f3fSDimitry Andricdef ADD16x2 : I16x2<"add.s", add>; 8465f757f3fSDimitry Andric 84781ad6265SDimitry Andric// in32 and int64 addition and subtraction with carry-out. 84881ad6265SDimitry Andricdefm ADDCC : ADD_SUB_INT_CARRY<"add.cc", addc>; 84981ad6265SDimitry Andricdefm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>; 8500b57cec5SDimitry Andric 85181ad6265SDimitry Andric// int32 and int64 addition and subtraction with carry-in and carry-out. 85281ad6265SDimitry Andricdefm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde>; 85381ad6265SDimitry Andricdefm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube>; 8540b57cec5SDimitry Andric 8550b57cec5SDimitry Andricdefm MULT : I3<"mul.lo.s", mul>; 8560b57cec5SDimitry Andric 8570b57cec5SDimitry Andricdefm MULTHS : I3<"mul.hi.s", mulhs>; 8580b57cec5SDimitry Andricdefm MULTHU : I3<"mul.hi.u", mulhu>; 8590b57cec5SDimitry Andric 8600b57cec5SDimitry Andricdefm SDIV : I3<"div.s", sdiv>; 8610b57cec5SDimitry Andricdefm UDIV : I3<"div.u", udiv>; 8620b57cec5SDimitry Andric 8630b57cec5SDimitry Andric// The ri versions of rem.s and rem.u won't be selected; DAGCombiner::visitSREM 8640b57cec5SDimitry Andric// will lower it. 8650b57cec5SDimitry Andricdefm SREM : I3<"rem.s", srem>; 8660b57cec5SDimitry Andricdefm UREM : I3<"rem.u", urem>; 8670b57cec5SDimitry Andric 8680b57cec5SDimitry Andric// Integer absolute value. NumBits should be one minus the bit width of RC. 8690b57cec5SDimitry Andric// This idiom implements the algorithm at 8700b57cec5SDimitry Andric// http://graphics.stanford.edu/~seander/bithacks.html#IntegerAbs. 8715f757f3fSDimitry Andricmulticlass ABS<ValueType T, RegisterClass RC, string SizeName> { 8720b57cec5SDimitry Andric def : NVPTXInst<(outs RC:$dst), (ins RC:$a), 8730b57cec5SDimitry Andric !strconcat("abs", SizeName, " \t$dst, $a;"), 8745f757f3fSDimitry Andric [(set (T RC:$dst), (abs (T RC:$a)))]>; 8750b57cec5SDimitry Andric} 8765f757f3fSDimitry Andricdefm ABS_16 : ABS<i16, Int16Regs, ".s16">; 8775f757f3fSDimitry Andricdefm ABS_32 : ABS<i32, Int32Regs, ".s32">; 8785f757f3fSDimitry Andricdefm ABS_64 : ABS<i64, Int64Regs, ".s64">; 8790b57cec5SDimitry Andric 8800b57cec5SDimitry Andric// Integer min/max. 8810b57cec5SDimitry Andricdefm SMAX : I3<"max.s", smax>; 8820b57cec5SDimitry Andricdefm UMAX : I3<"max.u", umax>; 8830b57cec5SDimitry Andricdefm SMIN : I3<"min.s", smin>; 8840b57cec5SDimitry Andricdefm UMIN : I3<"min.u", umin>; 8850b57cec5SDimitry Andric 8865f757f3fSDimitry Andricdef SMAX16x2 : I16x2<"max.s", smax>; 8875f757f3fSDimitry Andricdef UMAX16x2 : I16x2<"max.u", umax>; 8885f757f3fSDimitry Andricdef SMIN16x2 : I16x2<"min.s", smin>; 8895f757f3fSDimitry Andricdef UMIN16x2 : I16x2<"min.u", umin>; 8905f757f3fSDimitry Andric 8915f757f3fSDimitry Andric 8920b57cec5SDimitry Andric// 8930b57cec5SDimitry Andric// Wide multiplication 8940b57cec5SDimitry Andric// 8950b57cec5SDimitry Andricdef MULWIDES64 : 8960b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 8970b57cec5SDimitry Andric "mul.wide.s32 \t$dst, $a, $b;", []>; 8980b57cec5SDimitry Andricdef MULWIDES64Imm : 8990b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 9000b57cec5SDimitry Andric "mul.wide.s32 \t$dst, $a, $b;", []>; 9010b57cec5SDimitry Andricdef MULWIDES64Imm64 : 9020b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), 9030b57cec5SDimitry Andric "mul.wide.s32 \t$dst, $a, $b;", []>; 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andricdef MULWIDEU64 : 9060b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 9070b57cec5SDimitry Andric "mul.wide.u32 \t$dst, $a, $b;", []>; 9080b57cec5SDimitry Andricdef MULWIDEU64Imm : 9090b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 9100b57cec5SDimitry Andric "mul.wide.u32 \t$dst, $a, $b;", []>; 9110b57cec5SDimitry Andricdef MULWIDEU64Imm64 : 9120b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), 9130b57cec5SDimitry Andric "mul.wide.u32 \t$dst, $a, $b;", []>; 9140b57cec5SDimitry Andric 9150b57cec5SDimitry Andricdef MULWIDES32 : 9160b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 9170b57cec5SDimitry Andric "mul.wide.s16 \t$dst, $a, $b;", []>; 9180b57cec5SDimitry Andricdef MULWIDES32Imm : 9190b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 9200b57cec5SDimitry Andric "mul.wide.s16 \t$dst, $a, $b;", []>; 9210b57cec5SDimitry Andricdef MULWIDES32Imm32 : 9220b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 9230b57cec5SDimitry Andric "mul.wide.s16 \t$dst, $a, $b;", []>; 9240b57cec5SDimitry Andric 9250b57cec5SDimitry Andricdef MULWIDEU32 : 9260b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 9270b57cec5SDimitry Andric "mul.wide.u16 \t$dst, $a, $b;", []>; 9280b57cec5SDimitry Andricdef MULWIDEU32Imm : 9290b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 9300b57cec5SDimitry Andric "mul.wide.u16 \t$dst, $a, $b;", []>; 9310b57cec5SDimitry Andricdef MULWIDEU32Imm32 : 9320b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 9330b57cec5SDimitry Andric "mul.wide.u16 \t$dst, $a, $b;", []>; 9340b57cec5SDimitry Andric 9350b57cec5SDimitry Andricdef SDTMulWide : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>; 9360b57cec5SDimitry Andricdef mul_wide_signed : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>; 9370b57cec5SDimitry Andricdef mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>; 9380b57cec5SDimitry Andric 9390b57cec5SDimitry Andric// Matchers for signed, unsigned mul.wide ISD nodes. 94006c3fb27SDimitry Andricdef : Pat<(i32 (mul_wide_signed i16:$a, i16:$b)), 94106c3fb27SDimitry Andric (MULWIDES32 i16:$a, i16:$b)>, 9420b57cec5SDimitry Andric Requires<[doMulWide]>; 9430b57cec5SDimitry Andricdef : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)), 9440b57cec5SDimitry Andric (MULWIDES32Imm Int16Regs:$a, imm:$b)>, 9450b57cec5SDimitry Andric Requires<[doMulWide]>; 94606c3fb27SDimitry Andricdef : Pat<(i32 (mul_wide_unsigned i16:$a, i16:$b)), 9470b57cec5SDimitry Andric (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, 9480b57cec5SDimitry Andric Requires<[doMulWide]>; 9490b57cec5SDimitry Andricdef : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)), 9500b57cec5SDimitry Andric (MULWIDEU32Imm Int16Regs:$a, imm:$b)>, 9510b57cec5SDimitry Andric Requires<[doMulWide]>; 9520b57cec5SDimitry Andric 95306c3fb27SDimitry Andricdef : Pat<(i64 (mul_wide_signed i32:$a, i32:$b)), 9540b57cec5SDimitry Andric (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, 9550b57cec5SDimitry Andric Requires<[doMulWide]>; 9565f757f3fSDimitry Andricdef : Pat<(i64 (mul_wide_signed (i32 Int32Regs:$a), imm:$b)), 9570b57cec5SDimitry Andric (MULWIDES64Imm Int32Regs:$a, imm:$b)>, 9580b57cec5SDimitry Andric Requires<[doMulWide]>; 95906c3fb27SDimitry Andricdef : Pat<(i64 (mul_wide_unsigned i32:$a, i32:$b)), 9600b57cec5SDimitry Andric (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, 9610b57cec5SDimitry Andric Requires<[doMulWide]>; 9625f757f3fSDimitry Andricdef : Pat<(i64 (mul_wide_unsigned (i32 Int32Regs:$a), imm:$b)), 9630b57cec5SDimitry Andric (MULWIDEU64Imm Int32Regs:$a, imm:$b)>, 9640b57cec5SDimitry Andric Requires<[doMulWide]>; 9650b57cec5SDimitry Andric 9660b57cec5SDimitry Andric// Predicates used for converting some patterns to mul.wide. 9670b57cec5SDimitry Andricdef SInt32Const : PatLeaf<(imm), [{ 9680b57cec5SDimitry Andric const APInt &v = N->getAPIntValue(); 9690b57cec5SDimitry Andric return v.isSignedIntN(32); 9700b57cec5SDimitry Andric}]>; 9710b57cec5SDimitry Andric 9720b57cec5SDimitry Andricdef UInt32Const : PatLeaf<(imm), [{ 9730b57cec5SDimitry Andric const APInt &v = N->getAPIntValue(); 9740b57cec5SDimitry Andric return v.isIntN(32); 9750b57cec5SDimitry Andric}]>; 9760b57cec5SDimitry Andric 9770b57cec5SDimitry Andricdef SInt16Const : PatLeaf<(imm), [{ 9780b57cec5SDimitry Andric const APInt &v = N->getAPIntValue(); 9790b57cec5SDimitry Andric return v.isSignedIntN(16); 9800b57cec5SDimitry Andric}]>; 9810b57cec5SDimitry Andric 9820b57cec5SDimitry Andricdef UInt16Const : PatLeaf<(imm), [{ 9830b57cec5SDimitry Andric const APInt &v = N->getAPIntValue(); 9840b57cec5SDimitry Andric return v.isIntN(16); 9850b57cec5SDimitry Andric}]>; 9860b57cec5SDimitry Andric 987bdd1243dSDimitry Andricdef IntConst_0_30 : PatLeaf<(imm), [{ 988bdd1243dSDimitry Andric // Check if 0 <= v < 31; only then will the result of (x << v) be an int32. 9890b57cec5SDimitry Andric const APInt &v = N->getAPIntValue(); 990bdd1243dSDimitry Andric return v.sge(0) && v.slt(31); 9910b57cec5SDimitry Andric}]>; 9920b57cec5SDimitry Andric 993bdd1243dSDimitry Andricdef IntConst_0_14 : PatLeaf<(imm), [{ 994bdd1243dSDimitry Andric // Check if 0 <= v < 15; only then will the result of (x << v) be an int16. 9950b57cec5SDimitry Andric const APInt &v = N->getAPIntValue(); 996bdd1243dSDimitry Andric return v.sge(0) && v.slt(15); 9970b57cec5SDimitry Andric}]>; 9980b57cec5SDimitry Andric 9990b57cec5SDimitry Andricdef SHL2MUL32 : SDNodeXForm<imm, [{ 10000b57cec5SDimitry Andric const APInt &v = N->getAPIntValue(); 10010b57cec5SDimitry Andric APInt temp(32, 1); 10020b57cec5SDimitry Andric return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i32); 10030b57cec5SDimitry Andric}]>; 10040b57cec5SDimitry Andric 10050b57cec5SDimitry Andricdef SHL2MUL16 : SDNodeXForm<imm, [{ 10060b57cec5SDimitry Andric const APInt &v = N->getAPIntValue(); 10070b57cec5SDimitry Andric APInt temp(16, 1); 10080b57cec5SDimitry Andric return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i16); 10090b57cec5SDimitry Andric}]>; 10100b57cec5SDimitry Andric 10110b57cec5SDimitry Andric// Convert "sign/zero-extend, then shift left by an immediate" to mul.wide. 1012bdd1243dSDimitry Andricdef : Pat<(shl (sext Int32Regs:$a), (i32 IntConst_0_30:$b)), 10130b57cec5SDimitry Andric (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, 10140b57cec5SDimitry Andric Requires<[doMulWide]>; 1015bdd1243dSDimitry Andricdef : Pat<(shl (zext Int32Regs:$a), (i32 IntConst_0_30:$b)), 10160b57cec5SDimitry Andric (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, 10170b57cec5SDimitry Andric Requires<[doMulWide]>; 10180b57cec5SDimitry Andric 1019bdd1243dSDimitry Andricdef : Pat<(shl (sext Int16Regs:$a), (i16 IntConst_0_14:$b)), 10200b57cec5SDimitry Andric (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, 10210b57cec5SDimitry Andric Requires<[doMulWide]>; 1022bdd1243dSDimitry Andricdef : Pat<(shl (zext Int16Regs:$a), (i16 IntConst_0_14:$b)), 10230b57cec5SDimitry Andric (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, 10240b57cec5SDimitry Andric Requires<[doMulWide]>; 10250b57cec5SDimitry Andric 10260b57cec5SDimitry Andric// Convert "sign/zero-extend then multiply" to mul.wide. 10270b57cec5SDimitry Andricdef : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), 10280b57cec5SDimitry Andric (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, 10290b57cec5SDimitry Andric Requires<[doMulWide]>; 10300b57cec5SDimitry Andricdef : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), 10310b57cec5SDimitry Andric (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>, 10320b57cec5SDimitry Andric Requires<[doMulWide]>; 10330b57cec5SDimitry Andric 10340b57cec5SDimitry Andricdef : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), 10350b57cec5SDimitry Andric (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, 10360b57cec5SDimitry Andric Requires<[doMulWide]>; 10370b57cec5SDimitry Andricdef : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), 10380b57cec5SDimitry Andric (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>, 10390b57cec5SDimitry Andric Requires<[doMulWide]>; 10400b57cec5SDimitry Andric 10410b57cec5SDimitry Andricdef : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), 10420b57cec5SDimitry Andric (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, 10430b57cec5SDimitry Andric Requires<[doMulWide]>; 10440b57cec5SDimitry Andricdef : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), 10450b57cec5SDimitry Andric (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>, 10460b57cec5SDimitry Andric Requires<[doMulWide]>; 10470b57cec5SDimitry Andric 10480b57cec5SDimitry Andricdef : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), 10490b57cec5SDimitry Andric (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, 10500b57cec5SDimitry Andric Requires<[doMulWide]>; 10510b57cec5SDimitry Andricdef : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), 10520b57cec5SDimitry Andric (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>, 10530b57cec5SDimitry Andric Requires<[doMulWide]>; 10540b57cec5SDimitry Andric 10550b57cec5SDimitry Andric// 10560b57cec5SDimitry Andric// Integer multiply-add 10570b57cec5SDimitry Andric// 10580b57cec5SDimitry Andricdef SDTIMAD : 10590b57cec5SDimitry Andric SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>, 10600b57cec5SDimitry Andric SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; 10610b57cec5SDimitry Andricdef imad : SDNode<"NVPTXISD::IMAD", SDTIMAD>; 10620b57cec5SDimitry Andric 10630b57cec5SDimitry Andricdef MAD16rrr : 10640b57cec5SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 10650b57cec5SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), 10660b57cec5SDimitry Andric "mad.lo.s16 \t$dst, $a, $b, $c;", 10670b57cec5SDimitry Andric [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>; 10680b57cec5SDimitry Andricdef MAD16rri : 10690b57cec5SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 10700b57cec5SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), 10710b57cec5SDimitry Andric "mad.lo.s16 \t$dst, $a, $b, $c;", 10720b57cec5SDimitry Andric [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>; 10730b57cec5SDimitry Andricdef MAD16rir : 10740b57cec5SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 10750b57cec5SDimitry Andric (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), 10760b57cec5SDimitry Andric "mad.lo.s16 \t$dst, $a, $b, $c;", 10770b57cec5SDimitry Andric [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>; 10780b57cec5SDimitry Andricdef MAD16rii : 10790b57cec5SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), 10800b57cec5SDimitry Andric (ins Int16Regs:$a, i16imm:$b, i16imm:$c), 10810b57cec5SDimitry Andric "mad.lo.s16 \t$dst, $a, $b, $c;", 10820b57cec5SDimitry Andric [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, imm:$c))]>; 10830b57cec5SDimitry Andric 10840b57cec5SDimitry Andricdef MAD32rrr : 10850b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 10860b57cec5SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), 10870b57cec5SDimitry Andric "mad.lo.s32 \t$dst, $a, $b, $c;", 10885f757f3fSDimitry Andric [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), (i32 Int32Regs:$b), (i32 Int32Regs:$c)))]>; 10890b57cec5SDimitry Andricdef MAD32rri : 10900b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 10910b57cec5SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), 10920b57cec5SDimitry Andric "mad.lo.s32 \t$dst, $a, $b, $c;", 10935f757f3fSDimitry Andric [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), (i32 Int32Regs:$b), imm:$c))]>; 10940b57cec5SDimitry Andricdef MAD32rir : 10950b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 10960b57cec5SDimitry Andric (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), 10970b57cec5SDimitry Andric "mad.lo.s32 \t$dst, $a, $b, $c;", 10985f757f3fSDimitry Andric [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), imm:$b, (i32 Int32Regs:$c)))]>; 10990b57cec5SDimitry Andricdef MAD32rii : 11000b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 11010b57cec5SDimitry Andric (ins Int32Regs:$a, i32imm:$b, i32imm:$c), 11020b57cec5SDimitry Andric "mad.lo.s32 \t$dst, $a, $b, $c;", 11035f757f3fSDimitry Andric [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), imm:$b, imm:$c))]>; 11040b57cec5SDimitry Andric 11050b57cec5SDimitry Andricdef MAD64rrr : 11060b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), 11070b57cec5SDimitry Andric (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), 11080b57cec5SDimitry Andric "mad.lo.s64 \t$dst, $a, $b, $c;", 11090b57cec5SDimitry Andric [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>; 11100b57cec5SDimitry Andricdef MAD64rri : 11110b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), 11120b57cec5SDimitry Andric (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), 11130b57cec5SDimitry Andric "mad.lo.s64 \t$dst, $a, $b, $c;", 11140b57cec5SDimitry Andric [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>; 11150b57cec5SDimitry Andricdef MAD64rir : 11160b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), 11170b57cec5SDimitry Andric (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), 11180b57cec5SDimitry Andric "mad.lo.s64 \t$dst, $a, $b, $c;", 11190b57cec5SDimitry Andric [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>; 11200b57cec5SDimitry Andricdef MAD64rii : 11210b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), 11220b57cec5SDimitry Andric (ins Int64Regs:$a, i64imm:$b, i64imm:$c), 11230b57cec5SDimitry Andric "mad.lo.s64 \t$dst, $a, $b, $c;", 11240b57cec5SDimitry Andric [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, imm:$c))]>; 11250b57cec5SDimitry Andric 11260b57cec5SDimitry Andricdef INEG16 : 11270b57cec5SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 11280b57cec5SDimitry Andric "neg.s16 \t$dst, $src;", 11290b57cec5SDimitry Andric [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; 11300b57cec5SDimitry Andricdef INEG32 : 11310b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 11320b57cec5SDimitry Andric "neg.s32 \t$dst, $src;", 11335f757f3fSDimitry Andric [(set (i32 Int32Regs:$dst), (ineg (i32 Int32Regs:$src)))]>; 11340b57cec5SDimitry Andricdef INEG64 : 11350b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 11360b57cec5SDimitry Andric "neg.s64 \t$dst, $src;", 11370b57cec5SDimitry Andric [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>; 11380b57cec5SDimitry Andric 11390b57cec5SDimitry Andric//----------------------------------- 11400b57cec5SDimitry Andric// Floating Point Arithmetic 11410b57cec5SDimitry Andric//----------------------------------- 11420b57cec5SDimitry Andric 11430b57cec5SDimitry Andric// Constant 1.0f 11440b57cec5SDimitry Andricdef FloatConst1 : PatLeaf<(fpimm), [{ 11450b57cec5SDimitry Andric return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEsingle() && 11460b57cec5SDimitry Andric N->getValueAPF().convertToFloat() == 1.0f; 11470b57cec5SDimitry Andric}]>; 11480b57cec5SDimitry Andric// Constant 1.0 (double) 11490b57cec5SDimitry Andricdef DoubleConst1 : PatLeaf<(fpimm), [{ 11500b57cec5SDimitry Andric return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() && 11510b57cec5SDimitry Andric N->getValueAPF().convertToDouble() == 1.0; 11520b57cec5SDimitry Andric}]>; 1153*0fca6ea1SDimitry Andric// Constant -1.0 (double) 1154*0fca6ea1SDimitry Andricdef DoubleConstNeg1 : PatLeaf<(fpimm), [{ 1155*0fca6ea1SDimitry Andric return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() && 1156*0fca6ea1SDimitry Andric N->getValueAPF().convertToDouble() == -1.0; 1157*0fca6ea1SDimitry Andric}]>; 1158*0fca6ea1SDimitry Andric 1159*0fca6ea1SDimitry Andric 1160*0fca6ea1SDimitry Andric// Constant -X -> X (double) 1161*0fca6ea1SDimitry Andricdef NegDoubleConst : SDNodeXForm<fpimm, [{ 1162*0fca6ea1SDimitry Andric return CurDAG->getTargetConstantFP(-(N->getValueAPF()), 1163*0fca6ea1SDimitry Andric SDLoc(N), MVT::f64); 1164*0fca6ea1SDimitry Andric}]>; 11650b57cec5SDimitry Andric 11660b57cec5SDimitry Andric// Loads FP16 constant into a register. 11670b57cec5SDimitry Andric// 11680b57cec5SDimitry Andric// ptxas does not have hex representation for fp16, so we can't use 11690b57cec5SDimitry Andric// fp16 immediate values in .f16 instructions. Instead we have to load 11700b57cec5SDimitry Andric// the constant into a register using mov.b16. 11710b57cec5SDimitry Andricdef LOAD_CONST_F16 : 117206c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$a), 11730b57cec5SDimitry Andric "mov.b16 \t$dst, $a;", []>; 117406c3fb27SDimitry Andricdef LOAD_CONST_BF16 : 117506c3fb27SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$a), 117606c3fb27SDimitry Andric "mov.b16 \t$dst, $a;", []>; 11770b57cec5SDimitry Andricdefm FADD : F3_fma_component<"add", fadd>; 11780b57cec5SDimitry Andricdefm FSUB : F3_fma_component<"sub", fsub>; 11790b57cec5SDimitry Andricdefm FMUL : F3_fma_component<"mul", fmul>; 11800b57cec5SDimitry Andric 11810b57cec5SDimitry Andricdefm FMIN : F3<"min", fminnum>; 11820b57cec5SDimitry Andricdefm FMAX : F3<"max", fmaxnum>; 11831fd87a68SDimitry Andric// Note: min.NaN.f64 and max.NaN.f64 do not actually exist. 118404eeddc0SDimitry Andricdefm FMINNAN : F3<"min.NaN", fminimum>; 118504eeddc0SDimitry Andricdefm FMAXNAN : F3<"max.NaN", fmaximum>; 11860b57cec5SDimitry Andric 11870b57cec5SDimitry Andricdefm FABS : F2<"abs", fabs>; 11880b57cec5SDimitry Andricdefm FNEG : F2<"neg", fneg>; 11895f757f3fSDimitry Andricdefm FABS_H: F2_Support_Half<"abs", fabs>; 11905f757f3fSDimitry Andricdefm FNEG_H: F2_Support_Half<"neg", fneg>; 11915f757f3fSDimitry Andric 11920b57cec5SDimitry Andricdefm FSQRT : F2<"sqrt.rn", fsqrt>; 11930b57cec5SDimitry Andric 11940b57cec5SDimitry Andric// 1195bdd1243dSDimitry Andric// F16 NEG 1196bdd1243dSDimitry Andric// 1197bdd1243dSDimitry Andricclass FNEG_F16_F16X2<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> : 1198bdd1243dSDimitry Andric NVPTXInst<(outs RC:$dst), (ins RC:$src), 1199bdd1243dSDimitry Andric !strconcat(OpcStr, " \t$dst, $src;"), 1200bdd1243dSDimitry Andric [(set RC:$dst, (fneg (T RC:$src)))]>, 120106c3fb27SDimitry Andric Requires<[useFP16Math, hasPTX<60>, hasSM<53>, Pred]>; 120206c3fb27SDimitry Andricdef FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Int16Regs, doF32FTZ>; 120306c3fb27SDimitry Andricdef FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Int16Regs, True>; 120406c3fb27SDimitry Andricdef FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>; 120506c3fb27SDimitry Andricdef FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, Int32Regs, True>; 120606c3fb27SDimitry Andric 120706c3fb27SDimitry Andric// 120806c3fb27SDimitry Andric// BF16 NEG 120906c3fb27SDimitry Andric// 121006c3fb27SDimitry Andric 121106c3fb27SDimitry Andricclass FNEG_BF16_F16X2<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> : 121206c3fb27SDimitry Andric NVPTXInst<(outs RC:$dst), (ins RC:$src), 121306c3fb27SDimitry Andric !strconcat(OpcStr, " \t$dst, $src;"), 121406c3fb27SDimitry Andric [(set RC:$dst, (fneg (T RC:$src)))]>, 121506c3fb27SDimitry Andric Requires<[hasBF16Math, hasPTX<70>, hasSM<80>, Pred]>; 121606c3fb27SDimitry Andricdef BFNEG16_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, Int16Regs, doF32FTZ>; 121706c3fb27SDimitry Andricdef BFNEG16 : FNEG_BF16_F16X2<"neg.bf16", bf16, Int16Regs, True>; 121806c3fb27SDimitry Andricdef BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, Int32Regs, doF32FTZ>; 121906c3fb27SDimitry Andricdef BFNEG16x2 : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, Int32Regs, True>; 1220bdd1243dSDimitry Andric 1221bdd1243dSDimitry Andric// 12220b57cec5SDimitry Andric// F64 division 12230b57cec5SDimitry Andric// 12240b57cec5SDimitry Andricdef FDIV641r : 12250b57cec5SDimitry Andric NVPTXInst<(outs Float64Regs:$dst), 12260b57cec5SDimitry Andric (ins f64imm:$a, Float64Regs:$b), 12270b57cec5SDimitry Andric "rcp.rn.f64 \t$dst, $b;", 12280b57cec5SDimitry Andric [(set Float64Regs:$dst, (fdiv DoubleConst1:$a, Float64Regs:$b))]>; 12290b57cec5SDimitry Andricdef FDIV64rr : 12300b57cec5SDimitry Andric NVPTXInst<(outs Float64Regs:$dst), 12310b57cec5SDimitry Andric (ins Float64Regs:$a, Float64Regs:$b), 12320b57cec5SDimitry Andric "div.rn.f64 \t$dst, $a, $b;", 12330b57cec5SDimitry Andric [(set Float64Regs:$dst, (fdiv Float64Regs:$a, Float64Regs:$b))]>; 12340b57cec5SDimitry Andricdef FDIV64ri : 12350b57cec5SDimitry Andric NVPTXInst<(outs Float64Regs:$dst), 12360b57cec5SDimitry Andric (ins Float64Regs:$a, f64imm:$b), 12370b57cec5SDimitry Andric "div.rn.f64 \t$dst, $a, $b;", 12380b57cec5SDimitry Andric [(set Float64Regs:$dst, (fdiv Float64Regs:$a, fpimm:$b))]>; 12390b57cec5SDimitry Andric 1240*0fca6ea1SDimitry Andric// fdiv will be converted to rcp 1241*0fca6ea1SDimitry Andric// fneg (fdiv 1.0, X) => fneg (rcp.rn X) 1242*0fca6ea1SDimitry Andricdef : Pat<(fdiv DoubleConstNeg1:$a, Float64Regs:$b), 1243*0fca6ea1SDimitry Andric (FNEGf64 (FDIV641r (NegDoubleConst node:$a), Float64Regs:$b))>; 1244*0fca6ea1SDimitry Andric 12450b57cec5SDimitry Andric// 12460b57cec5SDimitry Andric// F32 Approximate reciprocal 12470b57cec5SDimitry Andric// 12480b57cec5SDimitry Andricdef FDIV321r_ftz : 12490b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 12500b57cec5SDimitry Andric (ins f32imm:$a, Float32Regs:$b), 12510b57cec5SDimitry Andric "rcp.approx.ftz.f32 \t$dst, $b;", 12520b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, 12530b57cec5SDimitry Andric Requires<[do_DIVF32_APPROX, doF32FTZ]>; 12540b57cec5SDimitry Andricdef FDIV321r : 12550b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 12560b57cec5SDimitry Andric (ins f32imm:$a, Float32Regs:$b), 12570b57cec5SDimitry Andric "rcp.approx.f32 \t$dst, $b;", 12580b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, 12590b57cec5SDimitry Andric Requires<[do_DIVF32_APPROX]>; 12600b57cec5SDimitry Andric// 12610b57cec5SDimitry Andric// F32 Approximate division 12620b57cec5SDimitry Andric// 12630b57cec5SDimitry Andricdef FDIV32approxrr_ftz : 12640b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 12650b57cec5SDimitry Andric (ins Float32Regs:$a, Float32Regs:$b), 12660b57cec5SDimitry Andric "div.approx.ftz.f32 \t$dst, $a, $b;", 12670b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, 12680b57cec5SDimitry Andric Requires<[do_DIVF32_APPROX, doF32FTZ]>; 12690b57cec5SDimitry Andricdef FDIV32approxri_ftz : 12700b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 12710b57cec5SDimitry Andric (ins Float32Regs:$a, f32imm:$b), 12720b57cec5SDimitry Andric "div.approx.ftz.f32 \t$dst, $a, $b;", 12730b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, 12740b57cec5SDimitry Andric Requires<[do_DIVF32_APPROX, doF32FTZ]>; 12750b57cec5SDimitry Andricdef FDIV32approxrr : 12760b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 12770b57cec5SDimitry Andric (ins Float32Regs:$a, Float32Regs:$b), 12780b57cec5SDimitry Andric "div.approx.f32 \t$dst, $a, $b;", 12790b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, 12800b57cec5SDimitry Andric Requires<[do_DIVF32_APPROX]>; 12810b57cec5SDimitry Andricdef FDIV32approxri : 12820b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 12830b57cec5SDimitry Andric (ins Float32Regs:$a, f32imm:$b), 12840b57cec5SDimitry Andric "div.approx.f32 \t$dst, $a, $b;", 12850b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, 12860b57cec5SDimitry Andric Requires<[do_DIVF32_APPROX]>; 12870b57cec5SDimitry Andric// 12880b57cec5SDimitry Andric// F32 Semi-accurate reciprocal 12890b57cec5SDimitry Andric// 12900b57cec5SDimitry Andric// rcp.approx gives the same result as div.full(1.0f, a) and is faster. 12910b57cec5SDimitry Andric// 12920b57cec5SDimitry Andricdef FDIV321r_approx_ftz : 12930b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 12940b57cec5SDimitry Andric (ins f32imm:$a, Float32Regs:$b), 12950b57cec5SDimitry Andric "rcp.approx.ftz.f32 \t$dst, $b;", 12960b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, 12970b57cec5SDimitry Andric Requires<[do_DIVF32_FULL, doF32FTZ]>; 12980b57cec5SDimitry Andricdef FDIV321r_approx : 12990b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 13000b57cec5SDimitry Andric (ins f32imm:$a, Float32Regs:$b), 13010b57cec5SDimitry Andric "rcp.approx.f32 \t$dst, $b;", 13020b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, 13030b57cec5SDimitry Andric Requires<[do_DIVF32_FULL]>; 13040b57cec5SDimitry Andric// 13050b57cec5SDimitry Andric// F32 Semi-accurate division 13060b57cec5SDimitry Andric// 13070b57cec5SDimitry Andricdef FDIV32rr_ftz : 13080b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 13090b57cec5SDimitry Andric (ins Float32Regs:$a, Float32Regs:$b), 13100b57cec5SDimitry Andric "div.full.ftz.f32 \t$dst, $a, $b;", 13110b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, 13120b57cec5SDimitry Andric Requires<[do_DIVF32_FULL, doF32FTZ]>; 13130b57cec5SDimitry Andricdef FDIV32ri_ftz : 13140b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 13150b57cec5SDimitry Andric (ins Float32Regs:$a, f32imm:$b), 13160b57cec5SDimitry Andric "div.full.ftz.f32 \t$dst, $a, $b;", 13170b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, 13180b57cec5SDimitry Andric Requires<[do_DIVF32_FULL, doF32FTZ]>; 13190b57cec5SDimitry Andricdef FDIV32rr : 13200b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 13210b57cec5SDimitry Andric (ins Float32Regs:$a, Float32Regs:$b), 13220b57cec5SDimitry Andric "div.full.f32 \t$dst, $a, $b;", 13230b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, 13240b57cec5SDimitry Andric Requires<[do_DIVF32_FULL]>; 13250b57cec5SDimitry Andricdef FDIV32ri : 13260b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 13270b57cec5SDimitry Andric (ins Float32Regs:$a, f32imm:$b), 13280b57cec5SDimitry Andric "div.full.f32 \t$dst, $a, $b;", 13290b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, 13300b57cec5SDimitry Andric Requires<[do_DIVF32_FULL]>; 13310b57cec5SDimitry Andric// 13320b57cec5SDimitry Andric// F32 Accurate reciprocal 13330b57cec5SDimitry Andric// 13340b57cec5SDimitry Andricdef FDIV321r_prec_ftz : 13350b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 13360b57cec5SDimitry Andric (ins f32imm:$a, Float32Regs:$b), 13370b57cec5SDimitry Andric "rcp.rn.ftz.f32 \t$dst, $b;", 13380b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, 13390b57cec5SDimitry Andric Requires<[doF32FTZ]>; 13400b57cec5SDimitry Andricdef FDIV321r_prec : 13410b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 13420b57cec5SDimitry Andric (ins f32imm:$a, Float32Regs:$b), 13430b57cec5SDimitry Andric "rcp.rn.f32 \t$dst, $b;", 13440b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>; 13450b57cec5SDimitry Andric// 13460b57cec5SDimitry Andric// F32 Accurate division 13470b57cec5SDimitry Andric// 13480b57cec5SDimitry Andricdef FDIV32rr_prec_ftz : 13490b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 13500b57cec5SDimitry Andric (ins Float32Regs:$a, Float32Regs:$b), 13510b57cec5SDimitry Andric "div.rn.ftz.f32 \t$dst, $a, $b;", 13520b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, 13530b57cec5SDimitry Andric Requires<[doF32FTZ]>; 13540b57cec5SDimitry Andricdef FDIV32ri_prec_ftz : 13550b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 13560b57cec5SDimitry Andric (ins Float32Regs:$a, f32imm:$b), 13570b57cec5SDimitry Andric "div.rn.ftz.f32 \t$dst, $a, $b;", 13580b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, 13590b57cec5SDimitry Andric Requires<[doF32FTZ]>; 13600b57cec5SDimitry Andricdef FDIV32rr_prec : 13610b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 13620b57cec5SDimitry Andric (ins Float32Regs:$a, Float32Regs:$b), 13630b57cec5SDimitry Andric "div.rn.f32 \t$dst, $a, $b;", 13640b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>; 13650b57cec5SDimitry Andricdef FDIV32ri_prec : 13660b57cec5SDimitry Andric NVPTXInst<(outs Float32Regs:$dst), 13670b57cec5SDimitry Andric (ins Float32Regs:$a, f32imm:$b), 13680b57cec5SDimitry Andric "div.rn.f32 \t$dst, $a, $b;", 13690b57cec5SDimitry Andric [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>; 13700b57cec5SDimitry Andric 13710b57cec5SDimitry Andric// 13720b57cec5SDimitry Andric// FMA 13730b57cec5SDimitry Andric// 13740b57cec5SDimitry Andric 13750b57cec5SDimitry Andricmulticlass FMA<string OpcStr, RegisterClass RC, Operand ImmCls, Predicate Pred> { 13760b57cec5SDimitry Andric def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), 13770b57cec5SDimitry Andric !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 13780b57cec5SDimitry Andric [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>, 13790b57cec5SDimitry Andric Requires<[Pred]>; 13800b57cec5SDimitry Andric def rri : NVPTXInst<(outs RC:$dst), 13810b57cec5SDimitry Andric (ins RC:$a, RC:$b, ImmCls:$c), 13820b57cec5SDimitry Andric !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 13830b57cec5SDimitry Andric [(set RC:$dst, (fma RC:$a, RC:$b, fpimm:$c))]>, 13840b57cec5SDimitry Andric Requires<[Pred]>; 13850b57cec5SDimitry Andric def rir : NVPTXInst<(outs RC:$dst), 13860b57cec5SDimitry Andric (ins RC:$a, ImmCls:$b, RC:$c), 13870b57cec5SDimitry Andric !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 13880b57cec5SDimitry Andric [(set RC:$dst, (fma RC:$a, fpimm:$b, RC:$c))]>, 13890b57cec5SDimitry Andric Requires<[Pred]>; 13900b57cec5SDimitry Andric def rii : NVPTXInst<(outs RC:$dst), 13910b57cec5SDimitry Andric (ins RC:$a, ImmCls:$b, ImmCls:$c), 13920b57cec5SDimitry Andric !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 13930b57cec5SDimitry Andric [(set RC:$dst, (fma RC:$a, fpimm:$b, fpimm:$c))]>, 13940b57cec5SDimitry Andric Requires<[Pred]>; 13950b57cec5SDimitry Andric} 13960b57cec5SDimitry Andric 1397bdd1243dSDimitry Andricmulticlass FMA_F16<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> { 13980b57cec5SDimitry Andric def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), 13990b57cec5SDimitry Andric !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 1400bdd1243dSDimitry Andric [(set RC:$dst, (fma (T RC:$a), (T RC:$b), (T RC:$c)))]>, 14010b57cec5SDimitry Andric Requires<[useFP16Math, Pred]>; 14020b57cec5SDimitry Andric} 14030b57cec5SDimitry Andric 140406c3fb27SDimitry Andricmulticlass FMA_BF16<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> { 140506c3fb27SDimitry Andric def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), 140606c3fb27SDimitry Andric !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 140706c3fb27SDimitry Andric [(set RC:$dst, (fma (T RC:$a), (T RC:$b), (T RC:$c)))]>, 140806c3fb27SDimitry Andric Requires<[hasBF16Math, Pred]>; 140906c3fb27SDimitry Andric} 141006c3fb27SDimitry Andric 141106c3fb27SDimitry Andricdefm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", f16, Int16Regs, doF32FTZ>; 141206c3fb27SDimitry Andricdefm FMA16 : FMA_F16<"fma.rn.f16", f16, Int16Regs, True>; 141306c3fb27SDimitry Andricdefm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>; 141406c3fb27SDimitry Andricdefm FMA16x2 : FMA_F16<"fma.rn.f16x2", v2f16, Int32Regs, True>; 141506c3fb27SDimitry Andricdefm BFMA16_ftz : FMA_BF16<"fma.rn.ftz.bf16", bf16, Int16Regs, doF32FTZ>; 141606c3fb27SDimitry Andricdefm BFMA16 : FMA_BF16<"fma.rn.bf16", bf16, Int16Regs, True>; 141706c3fb27SDimitry Andricdefm BFMA16x2_ftz : FMA_BF16<"fma.rn.ftz.bf16x2", v2bf16, Int32Regs, doF32FTZ>; 141806c3fb27SDimitry Andricdefm BFMA16x2 : FMA_BF16<"fma.rn.bf16x2", v2bf16, Int32Regs, True>; 14190b57cec5SDimitry Andricdefm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>; 1420e8d8bef9SDimitry Andricdefm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, True>; 1421e8d8bef9SDimitry Andricdefm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>; 14220b57cec5SDimitry Andric 14230b57cec5SDimitry Andric// sin/cos 14240b57cec5SDimitry Andricdef SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 14250b57cec5SDimitry Andric "sin.approx.f32 \t$dst, $src;", 14260b57cec5SDimitry Andric [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>, 14270b57cec5SDimitry Andric Requires<[allowUnsafeFPMath]>; 14280b57cec5SDimitry Andricdef COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 14290b57cec5SDimitry Andric "cos.approx.f32 \t$dst, $src;", 14300b57cec5SDimitry Andric [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>, 14310b57cec5SDimitry Andric Requires<[allowUnsafeFPMath]>; 14320b57cec5SDimitry Andric 1433fe6060f1SDimitry Andric// Lower (frem x, y) into (sub x, (mul (ftrunc (div x, y)) y)), 1434bdd1243dSDimitry Andric// i.e. "poor man's fmod()". When y is infinite, x is returned. This matches the 1435bdd1243dSDimitry Andric// semantics of LLVM's frem. 14360b57cec5SDimitry Andric 14370b57cec5SDimitry Andric// frem - f32 FTZ 14380b57cec5SDimitry Andricdef : Pat<(frem Float32Regs:$x, Float32Regs:$y), 14390b57cec5SDimitry Andric (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32 1440fe6060f1SDimitry Andric (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRZI_FTZ), 14410b57cec5SDimitry Andric Float32Regs:$y))>, 1442bdd1243dSDimitry Andric Requires<[doF32FTZ, allowUnsafeFPMath]>; 14430b57cec5SDimitry Andricdef : Pat<(frem Float32Regs:$x, fpimm:$y), 14440b57cec5SDimitry Andric (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32 1445fe6060f1SDimitry Andric (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRZI_FTZ), 14460b57cec5SDimitry Andric fpimm:$y))>, 1447bdd1243dSDimitry Andric Requires<[doF32FTZ, allowUnsafeFPMath]>; 1448bdd1243dSDimitry Andric 1449bdd1243dSDimitry Andricdef : Pat<(frem Float32Regs:$x, Float32Regs:$y), 1450bdd1243dSDimitry Andric (SELP_f32rr Float32Regs:$x, 1451bdd1243dSDimitry Andric (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32 1452bdd1243dSDimitry Andric (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRZI_FTZ), 1453bdd1243dSDimitry Andric Float32Regs:$y)), 1454bdd1243dSDimitry Andric (TESTINF_f32r Float32Regs:$y))>, 1455bdd1243dSDimitry Andric Requires<[doF32FTZ, noUnsafeFPMath]>; 1456bdd1243dSDimitry Andricdef : Pat<(frem Float32Regs:$x, fpimm:$y), 1457bdd1243dSDimitry Andric (SELP_f32rr Float32Regs:$x, 1458bdd1243dSDimitry Andric (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32 1459bdd1243dSDimitry Andric (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRZI_FTZ), 1460bdd1243dSDimitry Andric fpimm:$y)), 1461bdd1243dSDimitry Andric (TESTINF_f32i fpimm:$y))>, 1462bdd1243dSDimitry Andric Requires<[doF32FTZ, noUnsafeFPMath]>; 14630b57cec5SDimitry Andric 14640b57cec5SDimitry Andric// frem - f32 14650b57cec5SDimitry Andricdef : Pat<(frem Float32Regs:$x, Float32Regs:$y), 14660b57cec5SDimitry Andric (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32 1467fe6060f1SDimitry Andric (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRZI), 1468bdd1243dSDimitry Andric Float32Regs:$y))>, 1469bdd1243dSDimitry Andric Requires<[allowUnsafeFPMath]>; 14700b57cec5SDimitry Andricdef : Pat<(frem Float32Regs:$x, fpimm:$y), 14710b57cec5SDimitry Andric (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32 1472fe6060f1SDimitry Andric (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRZI), 1473bdd1243dSDimitry Andric fpimm:$y))>, 1474bdd1243dSDimitry Andric Requires<[allowUnsafeFPMath]>; 1475bdd1243dSDimitry Andric 1476bdd1243dSDimitry Andricdef : Pat<(frem Float32Regs:$x, Float32Regs:$y), 1477bdd1243dSDimitry Andric (SELP_f32rr Float32Regs:$x, 1478bdd1243dSDimitry Andric (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32 1479bdd1243dSDimitry Andric (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRZI), 1480bdd1243dSDimitry Andric Float32Regs:$y)), 1481bdd1243dSDimitry Andric (TESTINF_f32r Float32Regs:$y))>, 1482bdd1243dSDimitry Andric Requires<[noUnsafeFPMath]>; 1483bdd1243dSDimitry Andricdef : Pat<(frem Float32Regs:$x, fpimm:$y), 1484bdd1243dSDimitry Andric (SELP_f32rr Float32Regs:$x, 1485bdd1243dSDimitry Andric (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32 1486bdd1243dSDimitry Andric (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRZI), 1487bdd1243dSDimitry Andric fpimm:$y)), 1488bdd1243dSDimitry Andric (TESTINF_f32i fpimm:$y))>, 1489bdd1243dSDimitry Andric Requires<[noUnsafeFPMath]>; 14900b57cec5SDimitry Andric 14910b57cec5SDimitry Andric// frem - f64 14920b57cec5SDimitry Andricdef : Pat<(frem Float64Regs:$x, Float64Regs:$y), 14930b57cec5SDimitry Andric (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64 1494fe6060f1SDimitry Andric (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRZI), 1495bdd1243dSDimitry Andric Float64Regs:$y))>, 1496bdd1243dSDimitry Andric Requires<[allowUnsafeFPMath]>; 14970b57cec5SDimitry Andricdef : Pat<(frem Float64Regs:$x, fpimm:$y), 14980b57cec5SDimitry Andric (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64 1499fe6060f1SDimitry Andric (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRZI), 1500bdd1243dSDimitry Andric fpimm:$y))>, 1501bdd1243dSDimitry Andric Requires<[allowUnsafeFPMath]>; 1502bdd1243dSDimitry Andric 1503bdd1243dSDimitry Andricdef : Pat<(frem Float64Regs:$x, Float64Regs:$y), 1504bdd1243dSDimitry Andric (SELP_f64rr Float64Regs:$x, 1505bdd1243dSDimitry Andric (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64 1506bdd1243dSDimitry Andric (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRZI), 1507bdd1243dSDimitry Andric Float64Regs:$y)), 1508bdd1243dSDimitry Andric (TESTINF_f64r Float64Regs:$y))>, 1509bdd1243dSDimitry Andric Requires<[noUnsafeFPMath]>; 1510bdd1243dSDimitry Andricdef : Pat<(frem Float64Regs:$x, fpimm:$y), 1511bdd1243dSDimitry Andric (SELP_f64rr Float64Regs:$x, 1512bdd1243dSDimitry Andric (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64 1513bdd1243dSDimitry Andric (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRZI), 1514bdd1243dSDimitry Andric fpimm:$y)), 1515bdd1243dSDimitry Andric (TESTINF_f64r Float64Regs:$y))>, 1516bdd1243dSDimitry Andric Requires<[noUnsafeFPMath]>; 15170b57cec5SDimitry Andric 15180b57cec5SDimitry Andric//----------------------------------- 15190b57cec5SDimitry Andric// Bitwise operations 15200b57cec5SDimitry Andric//----------------------------------- 15210b57cec5SDimitry Andric 15220b57cec5SDimitry Andric// Template for three-arg bitwise operations. Takes three args, Creates .b16, 15230b57cec5SDimitry Andric// .b32, .b64, and .pred (predicate registers -- i.e., i1) versions of OpcStr. 15240b57cec5SDimitry Andricmulticlass BITWISE<string OpcStr, SDNode OpNode> { 15250b57cec5SDimitry Andric def b1rr : 15260b57cec5SDimitry Andric NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), 15270b57cec5SDimitry Andric !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), 15280b57cec5SDimitry Andric [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; 15290b57cec5SDimitry Andric def b1ri : 15300b57cec5SDimitry Andric NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), 15310b57cec5SDimitry Andric !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), 15320b57cec5SDimitry Andric [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; 15330b57cec5SDimitry Andric def b16rr : 15340b57cec5SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 15350b57cec5SDimitry Andric !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 15360b57cec5SDimitry Andric [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; 15370b57cec5SDimitry Andric def b16ri : 15380b57cec5SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 15390b57cec5SDimitry Andric !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 15400b57cec5SDimitry Andric [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; 15410b57cec5SDimitry Andric def b32rr : 15420b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 15430b57cec5SDimitry Andric !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), 15445f757f3fSDimitry Andric [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>; 15450b57cec5SDimitry Andric def b32ri : 15460b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 15470b57cec5SDimitry Andric !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), 15485f757f3fSDimitry Andric [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>; 15490b57cec5SDimitry Andric def b64rr : 15500b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 15510b57cec5SDimitry Andric !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), 15520b57cec5SDimitry Andric [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; 15530b57cec5SDimitry Andric def b64ri : 15540b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 15550b57cec5SDimitry Andric !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), 15560b57cec5SDimitry Andric [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 15570b57cec5SDimitry Andric} 15580b57cec5SDimitry Andric 15590b57cec5SDimitry Andricdefm OR : BITWISE<"or", or>; 15600b57cec5SDimitry Andricdefm AND : BITWISE<"and", and>; 15610b57cec5SDimitry Andricdefm XOR : BITWISE<"xor", xor>; 15620b57cec5SDimitry Andric 1563*0fca6ea1SDimitry Andric// PTX does not support mul on predicates, convert to and instructions 1564*0fca6ea1SDimitry Andricdef : Pat<(mul Int1Regs:$a, Int1Regs:$b), (ANDb1rr Int1Regs:$a, Int1Regs:$b)>; 1565*0fca6ea1SDimitry Andricdef : Pat<(mul Int1Regs:$a, (i1 imm:$b)), (ANDb1ri Int1Regs:$a, imm:$b)>; 1566*0fca6ea1SDimitry Andric 1567*0fca6ea1SDimitry Andric// These transformations were once reliably performed by instcombine, but thanks 1568*0fca6ea1SDimitry Andric// to poison semantics they are no longer safe for LLVM IR, perform them here 1569*0fca6ea1SDimitry Andric// instead. 1570*0fca6ea1SDimitry Andricdef : Pat<(select Int1Regs:$a, Int1Regs:$b, 0), (ANDb1rr Int1Regs:$a, Int1Regs:$b)>; 1571*0fca6ea1SDimitry Andricdef : Pat<(select Int1Regs:$a, 1, Int1Regs:$b), (ORb1rr Int1Regs:$a, Int1Regs:$b)>; 1572*0fca6ea1SDimitry Andric 15735f757f3fSDimitry Andric// Lower logical v2i16/v4i8 ops as bitwise ops on b32. 15745f757f3fSDimitry Andricforeach vt = [v2i16, v4i8] in { 15755f757f3fSDimitry Andric def: Pat<(or (vt Int32Regs:$a), (vt Int32Regs:$b)), 15765f757f3fSDimitry Andric (ORb32rr Int32Regs:$a, Int32Regs:$b)>; 15775f757f3fSDimitry Andric def: Pat<(xor (vt Int32Regs:$a), (vt Int32Regs:$b)), 15785f757f3fSDimitry Andric (XORb32rr Int32Regs:$a, Int32Regs:$b)>; 15795f757f3fSDimitry Andric def: Pat<(and (vt Int32Regs:$a), (vt Int32Regs:$b)), 15805f757f3fSDimitry Andric (ANDb32rr Int32Regs:$a, Int32Regs:$b)>; 15815f757f3fSDimitry Andric 15825f757f3fSDimitry Andric // The constants get legalized into a bitcast from i32, so that's what we need 15835f757f3fSDimitry Andric // to match here. 15845f757f3fSDimitry Andric def: Pat<(or Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))), 15855f757f3fSDimitry Andric (ORb32ri Int32Regs:$a, imm:$b)>; 15865f757f3fSDimitry Andric def: Pat<(xor Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))), 15875f757f3fSDimitry Andric (XORb32ri Int32Regs:$a, imm:$b)>; 15885f757f3fSDimitry Andric def: Pat<(and Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))), 15895f757f3fSDimitry Andric (ANDb32ri Int32Regs:$a, imm:$b)>; 15905f757f3fSDimitry Andric} 15915f757f3fSDimitry Andric 15920b57cec5SDimitry Andricdef NOT1 : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), 15930b57cec5SDimitry Andric "not.pred \t$dst, $src;", 15940b57cec5SDimitry Andric [(set Int1Regs:$dst, (not Int1Regs:$src))]>; 15950b57cec5SDimitry Andricdef NOT16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 15960b57cec5SDimitry Andric "not.b16 \t$dst, $src;", 15970b57cec5SDimitry Andric [(set Int16Regs:$dst, (not Int16Regs:$src))]>; 15980b57cec5SDimitry Andricdef NOT32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 15990b57cec5SDimitry Andric "not.b32 \t$dst, $src;", 16005f757f3fSDimitry Andric [(set (i32 Int32Regs:$dst), (not (i32 Int32Regs:$src)))]>; 16010b57cec5SDimitry Andricdef NOT64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 16020b57cec5SDimitry Andric "not.b64 \t$dst, $src;", 16030b57cec5SDimitry Andric [(set Int64Regs:$dst, (not Int64Regs:$src))]>; 16040b57cec5SDimitry Andric 16050b57cec5SDimitry Andric// Template for left/right shifts. Takes three operands, 16060b57cec5SDimitry Andric// [dest (reg), src (reg), shift (reg or imm)]. 16070b57cec5SDimitry Andric// dest and src may be int64, int32, or int16, but shift is always int32. 16080b57cec5SDimitry Andric// 16090b57cec5SDimitry Andric// This template also defines a 32-bit shift (imm, imm) instruction. 16100b57cec5SDimitry Andricmulticlass SHIFT<string OpcStr, SDNode OpNode> { 16110b57cec5SDimitry Andric def i64rr : 16120b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b), 16130b57cec5SDimitry Andric !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 16145f757f3fSDimitry Andric [(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 Int32Regs:$b)))]>; 16150b57cec5SDimitry Andric def i64ri : 16160b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), 16170b57cec5SDimitry Andric !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 16180b57cec5SDimitry Andric [(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 imm:$b)))]>; 16190b57cec5SDimitry Andric def i32rr : 16200b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 16210b57cec5SDimitry Andric !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 16225f757f3fSDimitry Andric [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>; 16230b57cec5SDimitry Andric def i32ri : 16240b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 16250b57cec5SDimitry Andric !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 16265f757f3fSDimitry Andric [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 imm:$b)))]>; 16270b57cec5SDimitry Andric def i32ii : 16280b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), 16290b57cec5SDimitry Andric !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 16300b57cec5SDimitry Andric [(set Int32Regs:$dst, (OpNode (i32 imm:$a), (i32 imm:$b)))]>; 16310b57cec5SDimitry Andric def i16rr : 16320b57cec5SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int32Regs:$b), 16330b57cec5SDimitry Andric !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 16345f757f3fSDimitry Andric [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 Int32Regs:$b)))]>; 16350b57cec5SDimitry Andric def i16ri : 16360b57cec5SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 16370b57cec5SDimitry Andric !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 16380b57cec5SDimitry Andric [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 imm:$b)))]>; 16390b57cec5SDimitry Andric} 16400b57cec5SDimitry Andric 16410b57cec5SDimitry Andricdefm SHL : SHIFT<"shl.b", shl>; 16420b57cec5SDimitry Andricdefm SRA : SHIFT<"shr.s", sra>; 16430b57cec5SDimitry Andricdefm SRL : SHIFT<"shr.u", srl>; 16440b57cec5SDimitry Andric 16450b57cec5SDimitry Andric// Bit-reverse 16460b57cec5SDimitry Andricdef BREV32 : 16470b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), 16480b57cec5SDimitry Andric "brev.b32 \t$dst, $a;", 16495f757f3fSDimitry Andric [(set Int32Regs:$dst, (bitreverse (i32 Int32Regs:$a)))]>; 16500b57cec5SDimitry Andricdef BREV64 : 16510b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a), 16520b57cec5SDimitry Andric "brev.b64 \t$dst, $a;", 16530b57cec5SDimitry Andric [(set Int64Regs:$dst, (bitreverse Int64Regs:$a))]>; 16540b57cec5SDimitry Andric 16550b57cec5SDimitry Andric// 16560b57cec5SDimitry Andric// Rotate: Use ptx shf instruction if available. 16570b57cec5SDimitry Andric// 16580b57cec5SDimitry Andric 16590b57cec5SDimitry Andric// 32 bit r2 = rotl r1, n 16600b57cec5SDimitry Andric// => 16610b57cec5SDimitry Andric// r2 = shf.l r1, r1, n 16620b57cec5SDimitry Andricdef ROTL32imm_hw : 16630b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), 16640b57cec5SDimitry Andric "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 16655f757f3fSDimitry Andric [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 imm:$amt)))]>, 16660b57cec5SDimitry Andric Requires<[hasHWROT32]>; 16670b57cec5SDimitry Andric 16680b57cec5SDimitry Andricdef ROTL32reg_hw : 16690b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), 16700b57cec5SDimitry Andric "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 16715f757f3fSDimitry Andric [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, 16720b57cec5SDimitry Andric Requires<[hasHWROT32]>; 16730b57cec5SDimitry Andric 16740b57cec5SDimitry Andric// 32 bit r2 = rotr r1, n 16750b57cec5SDimitry Andric// => 16760b57cec5SDimitry Andric// r2 = shf.r r1, r1, n 16770b57cec5SDimitry Andricdef ROTR32imm_hw : 16780b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), 16790b57cec5SDimitry Andric "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", 16805f757f3fSDimitry Andric [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 imm:$amt)))]>, 16810b57cec5SDimitry Andric Requires<[hasHWROT32]>; 16820b57cec5SDimitry Andric 16830b57cec5SDimitry Andricdef ROTR32reg_hw : 16840b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), 16850b57cec5SDimitry Andric "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", 16865f757f3fSDimitry Andric [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, 16870b57cec5SDimitry Andric Requires<[hasHWROT32]>; 16880b57cec5SDimitry Andric 16890b57cec5SDimitry Andric// 32-bit software rotate by immediate. $amt2 should equal 32 - $amt1. 16900b57cec5SDimitry Andricdef ROT32imm_sw : 16910b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 16920b57cec5SDimitry Andric (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), 16930b57cec5SDimitry Andric "{{\n\t" 16940b57cec5SDimitry Andric ".reg .b32 %lhs;\n\t" 16950b57cec5SDimitry Andric ".reg .b32 %rhs;\n\t" 16960b57cec5SDimitry Andric "shl.b32 \t%lhs, $src, $amt1;\n\t" 16970b57cec5SDimitry Andric "shr.b32 \t%rhs, $src, $amt2;\n\t" 16980b57cec5SDimitry Andric "add.u32 \t$dst, %lhs, %rhs;\n\t" 16990b57cec5SDimitry Andric "}}", 17000b57cec5SDimitry Andric []>; 17010b57cec5SDimitry Andric 17020b57cec5SDimitry Andricdef SUB_FRM_32 : SDNodeXForm<imm, [{ 17030b57cec5SDimitry Andric return CurDAG->getTargetConstant(32 - N->getZExtValue(), SDLoc(N), MVT::i32); 17040b57cec5SDimitry Andric}]>; 17050b57cec5SDimitry Andric 17065f757f3fSDimitry Andricdef : Pat<(rotl (i32 Int32Regs:$src), (i32 imm:$amt)), 17070b57cec5SDimitry Andric (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 17080b57cec5SDimitry Andric Requires<[noHWROT32]>; 17095f757f3fSDimitry Andricdef : Pat<(rotr (i32 Int32Regs:$src), (i32 imm:$amt)), 17100b57cec5SDimitry Andric (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>, 17110b57cec5SDimitry Andric Requires<[noHWROT32]>; 17120b57cec5SDimitry Andric 17130b57cec5SDimitry Andric// 32-bit software rotate left by register. 17140b57cec5SDimitry Andricdef ROTL32reg_sw : 17150b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), 17160b57cec5SDimitry Andric "{{\n\t" 17170b57cec5SDimitry Andric ".reg .b32 %lhs;\n\t" 17180b57cec5SDimitry Andric ".reg .b32 %rhs;\n\t" 17190b57cec5SDimitry Andric ".reg .b32 %amt2;\n\t" 17200b57cec5SDimitry Andric "shl.b32 \t%lhs, $src, $amt;\n\t" 17210b57cec5SDimitry Andric "sub.s32 \t%amt2, 32, $amt;\n\t" 17220b57cec5SDimitry Andric "shr.b32 \t%rhs, $src, %amt2;\n\t" 17230b57cec5SDimitry Andric "add.u32 \t$dst, %lhs, %rhs;\n\t" 17240b57cec5SDimitry Andric "}}", 17255f757f3fSDimitry Andric [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, 17260b57cec5SDimitry Andric Requires<[noHWROT32]>; 17270b57cec5SDimitry Andric 17280b57cec5SDimitry Andric// 32-bit software rotate right by register. 17290b57cec5SDimitry Andricdef ROTR32reg_sw : 17300b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), 17310b57cec5SDimitry Andric "{{\n\t" 17320b57cec5SDimitry Andric ".reg .b32 %lhs;\n\t" 17330b57cec5SDimitry Andric ".reg .b32 %rhs;\n\t" 17340b57cec5SDimitry Andric ".reg .b32 %amt2;\n\t" 17350b57cec5SDimitry Andric "shr.b32 \t%lhs, $src, $amt;\n\t" 17360b57cec5SDimitry Andric "sub.s32 \t%amt2, 32, $amt;\n\t" 17370b57cec5SDimitry Andric "shl.b32 \t%rhs, $src, %amt2;\n\t" 17380b57cec5SDimitry Andric "add.u32 \t$dst, %lhs, %rhs;\n\t" 17390b57cec5SDimitry Andric "}}", 17405f757f3fSDimitry Andric [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, 17410b57cec5SDimitry Andric Requires<[noHWROT32]>; 17420b57cec5SDimitry Andric 17430b57cec5SDimitry Andric// 64-bit software rotate by immediate. $amt2 should equal 64 - $amt1. 17440b57cec5SDimitry Andricdef ROT64imm_sw : 17450b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), 17460b57cec5SDimitry Andric (ins Int64Regs:$src, i32imm:$amt1, i32imm:$amt2), 17470b57cec5SDimitry Andric "{{\n\t" 17480b57cec5SDimitry Andric ".reg .b64 %lhs;\n\t" 17490b57cec5SDimitry Andric ".reg .b64 %rhs;\n\t" 17500b57cec5SDimitry Andric "shl.b64 \t%lhs, $src, $amt1;\n\t" 17510b57cec5SDimitry Andric "shr.b64 \t%rhs, $src, $amt2;\n\t" 17520b57cec5SDimitry Andric "add.u64 \t$dst, %lhs, %rhs;\n\t" 17530b57cec5SDimitry Andric "}}", 17540b57cec5SDimitry Andric []>; 17550b57cec5SDimitry Andric 17560b57cec5SDimitry Andricdef SUB_FRM_64 : SDNodeXForm<imm, [{ 17570b57cec5SDimitry Andric return CurDAG->getTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32); 17580b57cec5SDimitry Andric}]>; 17590b57cec5SDimitry Andric 17600b57cec5SDimitry Andricdef : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), 17610b57cec5SDimitry Andric (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; 17620b57cec5SDimitry Andricdef : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), 17630b57cec5SDimitry Andric (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; 17640b57cec5SDimitry Andric 17650b57cec5SDimitry Andric// 64-bit software rotate left by register. 17660b57cec5SDimitry Andricdef ROTL64reg_sw : 17670b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), 17680b57cec5SDimitry Andric "{{\n\t" 17690b57cec5SDimitry Andric ".reg .b64 %lhs;\n\t" 17700b57cec5SDimitry Andric ".reg .b64 %rhs;\n\t" 17710b57cec5SDimitry Andric ".reg .u32 %amt2;\n\t" 1772*0fca6ea1SDimitry Andric "and.b32 \t%amt2, $amt, 63;\n\t" 1773*0fca6ea1SDimitry Andric "shl.b64 \t%lhs, $src, %amt2;\n\t" 1774*0fca6ea1SDimitry Andric "sub.u32 \t%amt2, 64, %amt2;\n\t" 17750b57cec5SDimitry Andric "shr.b64 \t%rhs, $src, %amt2;\n\t" 17760b57cec5SDimitry Andric "add.u64 \t$dst, %lhs, %rhs;\n\t" 17770b57cec5SDimitry Andric "}}", 17785f757f3fSDimitry Andric [(set Int64Regs:$dst, (rotl Int64Regs:$src, (i32 Int32Regs:$amt)))]>; 17790b57cec5SDimitry Andric 17800b57cec5SDimitry Andricdef ROTR64reg_sw : 17810b57cec5SDimitry Andric NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), 17820b57cec5SDimitry Andric "{{\n\t" 17830b57cec5SDimitry Andric ".reg .b64 %lhs;\n\t" 17840b57cec5SDimitry Andric ".reg .b64 %rhs;\n\t" 17850b57cec5SDimitry Andric ".reg .u32 %amt2;\n\t" 1786*0fca6ea1SDimitry Andric "and.b32 \t%amt2, $amt, 63;\n\t" 1787*0fca6ea1SDimitry Andric "shr.b64 \t%lhs, $src, %amt2;\n\t" 1788*0fca6ea1SDimitry Andric "sub.u32 \t%amt2, 64, %amt2;\n\t" 17890b57cec5SDimitry Andric "shl.b64 \t%rhs, $src, %amt2;\n\t" 17900b57cec5SDimitry Andric "add.u64 \t$dst, %lhs, %rhs;\n\t" 17910b57cec5SDimitry Andric "}}", 17925f757f3fSDimitry Andric [(set Int64Regs:$dst, (rotr Int64Regs:$src, (i32 Int32Regs:$amt)))]>; 17930b57cec5SDimitry Andric 17940b57cec5SDimitry Andric// 17950b57cec5SDimitry Andric// Funnnel shift in clamp mode 17960b57cec5SDimitry Andric// 17970b57cec5SDimitry Andric 17980b57cec5SDimitry Andric// Create SDNodes so they can be used in the DAG code, e.g. 17990b57cec5SDimitry Andric// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) 18000b57cec5SDimitry Andricdef FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; 18010b57cec5SDimitry Andricdef FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; 18020b57cec5SDimitry Andric 18030b57cec5SDimitry Andricdef FUNSHFLCLAMP : 18040b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 18050b57cec5SDimitry Andric (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 18060b57cec5SDimitry Andric "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;", 18070b57cec5SDimitry Andric [(set Int32Regs:$dst, 18085f757f3fSDimitry Andric (FUN_SHFL_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>; 18090b57cec5SDimitry Andric 18100b57cec5SDimitry Andricdef FUNSHFRCLAMP : 18110b57cec5SDimitry Andric NVPTXInst<(outs Int32Regs:$dst), 18120b57cec5SDimitry Andric (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 18130b57cec5SDimitry Andric "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;", 18140b57cec5SDimitry Andric [(set Int32Regs:$dst, 18155f757f3fSDimitry Andric (FUN_SHFR_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>; 18160b57cec5SDimitry Andric 18170b57cec5SDimitry Andric// 18180b57cec5SDimitry Andric// BFE - bit-field extract 18190b57cec5SDimitry Andric// 18200b57cec5SDimitry Andric 18215f757f3fSDimitry Andric// Template for BFE/BFI instructions. 18225f757f3fSDimitry Andric// Args: [dest (reg), src (reg), start (reg or imm), end (reg or imm)]. 18230b57cec5SDimitry Andric// Start may be an imm only if end is also an imm. FIXME: Is this a 18240b57cec5SDimitry Andric// restriction in PTX? 18250b57cec5SDimitry Andric// 18260b57cec5SDimitry Andric// dest and src may be int32 or int64, but start and end are always int32. 18275f757f3fSDimitry Andricdef SDTBFE : 18285f757f3fSDimitry Andric SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, 18295f757f3fSDimitry Andric SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; 18305f757f3fSDimitry Andricdef bfe : SDNode<"NVPTXISD::BFE", SDTBFE>; 18315f757f3fSDimitry Andric 18325f757f3fSDimitry Andricdef SDTBFI : 18335f757f3fSDimitry Andric SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, 18345f757f3fSDimitry Andric SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; 18355f757f3fSDimitry Andricdef bfi : SDNode<"NVPTXISD::BFI", SDTBFI>; 18365f757f3fSDimitry Andric 18375f757f3fSDimitry Andricdef SDTPRMT : 18385f757f3fSDimitry Andric SDTypeProfile<1, 4, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, 18395f757f3fSDimitry Andric SDTCisVT<2, i32>, SDTCisVT<3, i32>, SDTCisVT<4, i32>,]>; 18405f757f3fSDimitry Andricdef prmt : SDNode<"NVPTXISD::PRMT", SDTPRMT>; 18415f757f3fSDimitry Andric 18425f757f3fSDimitry Andricmulticlass BFE<string Instr, ValueType T, RegisterClass RC> { 18430b57cec5SDimitry Andric def rrr 18440b57cec5SDimitry Andric : NVPTXInst<(outs RC:$d), 18450b57cec5SDimitry Andric (ins RC:$a, Int32Regs:$b, Int32Regs:$c), 18465f757f3fSDimitry Andric !strconcat(Instr, " \t$d, $a, $b, $c;"), 18475f757f3fSDimitry Andric [(set (T RC:$d), (bfe (T RC:$a), (i32 Int32Regs:$b), (i32 Int32Regs:$c)))]>; 18480b57cec5SDimitry Andric def rri 18490b57cec5SDimitry Andric : NVPTXInst<(outs RC:$d), 18500b57cec5SDimitry Andric (ins RC:$a, Int32Regs:$b, i32imm:$c), 18515f757f3fSDimitry Andric !strconcat(Instr, " \t$d, $a, $b, $c;"), 18525f757f3fSDimitry Andric [(set (T RC:$d), (bfe (T RC:$a), (i32 Int32Regs:$b), (i32 imm:$c)))]>; 18530b57cec5SDimitry Andric def rii 18540b57cec5SDimitry Andric : NVPTXInst<(outs RC:$d), 18550b57cec5SDimitry Andric (ins RC:$a, i32imm:$b, i32imm:$c), 18565f757f3fSDimitry Andric !strconcat(Instr, " \t$d, $a, $b, $c;"), 18575f757f3fSDimitry Andric [(set (T RC:$d), (bfe (T RC:$a), (i32 imm:$b), (i32 imm:$c)))]>; 18585f757f3fSDimitry Andric} 18595f757f3fSDimitry Andric 18605f757f3fSDimitry Andricmulticlass BFI<string Instr, ValueType T, RegisterClass RC, Operand ImmCls> { 18615f757f3fSDimitry Andric def rrrr 18625f757f3fSDimitry Andric : NVPTXInst<(outs RC:$f), 18635f757f3fSDimitry Andric (ins RC:$a, RC:$b, Int32Regs:$c, Int32Regs:$d), 18645f757f3fSDimitry Andric !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), 18655f757f3fSDimitry Andric [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 Int32Regs:$d)))]>; 18665f757f3fSDimitry Andric def rrri 18675f757f3fSDimitry Andric : NVPTXInst<(outs RC:$f), 18685f757f3fSDimitry Andric (ins RC:$a, RC:$b, Int32Regs:$c, i32imm:$d), 18695f757f3fSDimitry Andric !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), 18705f757f3fSDimitry Andric [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 imm:$d)))]>; 18715f757f3fSDimitry Andric def rrii 18725f757f3fSDimitry Andric : NVPTXInst<(outs RC:$f), 18735f757f3fSDimitry Andric (ins RC:$a, RC:$b, i32imm:$c, i32imm:$d), 18745f757f3fSDimitry Andric !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), 18755f757f3fSDimitry Andric [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>; 18765f757f3fSDimitry Andric def irrr 18775f757f3fSDimitry Andric : NVPTXInst<(outs RC:$f), 18785f757f3fSDimitry Andric (ins ImmCls:$a, RC:$b, Int32Regs:$c, Int32Regs:$d), 18795f757f3fSDimitry Andric !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), 18805f757f3fSDimitry Andric [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 Int32Regs:$d)))]>; 18815f757f3fSDimitry Andric def irri 18825f757f3fSDimitry Andric : NVPTXInst<(outs RC:$f), 18835f757f3fSDimitry Andric (ins ImmCls:$a, RC:$b, Int32Regs:$c, i32imm:$d), 18845f757f3fSDimitry Andric !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), 18855f757f3fSDimitry Andric [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 imm:$d)))]>; 18865f757f3fSDimitry Andric def irii 18875f757f3fSDimitry Andric : NVPTXInst<(outs RC:$f), 18885f757f3fSDimitry Andric (ins ImmCls:$a, RC:$b, i32imm:$c, i32imm:$d), 18895f757f3fSDimitry Andric !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), 18905f757f3fSDimitry Andric [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>; 18915f757f3fSDimitry Andric} 18925f757f3fSDimitry Andric 18935f757f3fSDimitry Andricmulticlass PRMT<ValueType T, RegisterClass RC> { 18945f757f3fSDimitry Andric def rrr 18955f757f3fSDimitry Andric : NVPTXInst<(outs RC:$d), 18965f757f3fSDimitry Andric (ins RC:$a, Int32Regs:$b, Int32Regs:$c, PrmtMode:$mode), 18975f757f3fSDimitry Andric !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), 18985f757f3fSDimitry Andric [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), imm:$mode))]>; 18995f757f3fSDimitry Andric def rri 19005f757f3fSDimitry Andric : NVPTXInst<(outs RC:$d), 19015f757f3fSDimitry Andric (ins RC:$a, Int32Regs:$b, i32imm:$c, PrmtMode:$mode), 19025f757f3fSDimitry Andric !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), 19035f757f3fSDimitry Andric [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 imm:$c), imm:$mode))]>; 19045f757f3fSDimitry Andric def rii 19055f757f3fSDimitry Andric : NVPTXInst<(outs RC:$d), 19065f757f3fSDimitry Andric (ins RC:$a, i32imm:$b, i32imm:$c, PrmtMode:$mode), 19075f757f3fSDimitry Andric !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), 19085f757f3fSDimitry Andric [(set (T RC:$d), (prmt (T RC:$a), (T imm:$b), (i32 imm:$c), imm:$mode))]>; 19090b57cec5SDimitry Andric} 19100b57cec5SDimitry Andric 1911e8d8bef9SDimitry Andriclet hasSideEffects = false in { 1912*0fca6ea1SDimitry Andric // order is somewhat important here. signed/unsigned variants match 1913*0fca6ea1SDimitry Andric // the same patterns, so the first one wins. Having unsigned byte extraction 1914*0fca6ea1SDimitry Andric // has the benefit of always having zero in unused bits, which makes some 1915*0fca6ea1SDimitry Andric // optimizations easier (e.g. no need to mask them). 19165f757f3fSDimitry Andric defm BFE_U32 : BFE<"bfe.u32", i32, Int32Regs>; 1917*0fca6ea1SDimitry Andric defm BFE_S32 : BFE<"bfe.s32", i32, Int32Regs>; 19185f757f3fSDimitry Andric defm BFE_U64 : BFE<"bfe.u64", i64, Int64Regs>; 1919*0fca6ea1SDimitry Andric defm BFE_S64 : BFE<"bfe.s64", i64, Int64Regs>; 19205f757f3fSDimitry Andric 19215f757f3fSDimitry Andric defm BFI_B32 : BFI<"bfi.b32", i32, Int32Regs, i32imm>; 19225f757f3fSDimitry Andric defm BFI_B64 : BFI<"bfi.b64", i64, Int64Regs, i64imm>; 19235f757f3fSDimitry Andric 19245f757f3fSDimitry Andric defm PRMT_B32 : PRMT<i32, Int32Regs>; 19250b57cec5SDimitry Andric} 19260b57cec5SDimitry Andric 19275f757f3fSDimitry Andric 19285f757f3fSDimitry Andric// byte extraction + signed/unsigned extension to i32. 19295f757f3fSDimitry Andricdef : Pat<(i32 (sext_inreg (bfe (i32 Int32Regs:$s), (i32 Int32Regs:$o), 8), i8)), 19305f757f3fSDimitry Andric (BFE_S32rri Int32Regs:$s, Int32Regs:$o, 8)>; 19315f757f3fSDimitry Andricdef : Pat<(i32 (sext_inreg (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8), i8)), 19325f757f3fSDimitry Andric (BFE_S32rii Int32Regs:$s, imm:$o, 8)>; 19335f757f3fSDimitry Andricdef : Pat<(i32 (and (bfe (i32 Int32Regs:$s), (i32 Int32Regs:$o), 8), 255)), 19345f757f3fSDimitry Andric (BFE_U32rri Int32Regs:$s, Int32Regs:$o, 8)>; 19355f757f3fSDimitry Andricdef : Pat<(i32 (and (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8), 255)), 19365f757f3fSDimitry Andric (BFE_U32rii Int32Regs:$s, imm:$o, 8)>; 19375f757f3fSDimitry Andric 19385f757f3fSDimitry Andric// byte extraction + signed extension to i16 19395f757f3fSDimitry Andricdef : Pat<(i16 (sext_inreg (trunc (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8)), i8)), 19405f757f3fSDimitry Andric (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, imm:$o, 8), CvtNONE)>; 19415f757f3fSDimitry Andric 19425f757f3fSDimitry Andric 19435f757f3fSDimitry Andric// Byte extraction via shift/trunc/sext 19445f757f3fSDimitry Andricdef : Pat<(i16 (sext_inreg (trunc Int32Regs:$s), i8)), 19455f757f3fSDimitry Andric (CVT_s8_s32 Int32Regs:$s, CvtNONE)>; 19465f757f3fSDimitry Andricdef : Pat<(i16 (sext_inreg (trunc (srl (i32 Int32Regs:$s), (i32 imm:$o))), i8)), 19475f757f3fSDimitry Andric (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, imm:$o, 8), CvtNONE)>; 19485f757f3fSDimitry Andricdef : Pat<(sext_inreg (srl (i32 Int32Regs:$s), (i32 imm:$o)), i8), 19495f757f3fSDimitry Andric (BFE_S32rii Int32Regs:$s, imm:$o, 8)>; 19505f757f3fSDimitry Andricdef : Pat<(i16 (sra (i16 (trunc Int32Regs:$s)), (i32 8))), 19515f757f3fSDimitry Andric (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, 8, 8), CvtNONE)>; 19525f757f3fSDimitry Andricdef : Pat<(sext_inreg (srl (i64 Int64Regs:$s), (i32 imm:$o)), i8), 19535f757f3fSDimitry Andric (BFE_S64rii Int64Regs:$s, imm:$o, 8)>; 19545f757f3fSDimitry Andricdef : Pat<(i16 (sext_inreg (trunc Int64Regs:$s), i8)), 19555f757f3fSDimitry Andric (CVT_s8_s64 Int64Regs:$s, CvtNONE)>; 19565f757f3fSDimitry Andricdef : Pat<(i16 (sext_inreg (trunc (srl (i64 Int64Regs:$s), (i32 imm:$o))), i8)), 19575f757f3fSDimitry Andric (CVT_s8_s64 (BFE_S64rii Int64Regs:$s, imm:$o, 8), CvtNONE)>; 19585f757f3fSDimitry Andric 19590b57cec5SDimitry Andric//----------------------------------- 19600b57cec5SDimitry Andric// Comparison instructions (setp, set) 19610b57cec5SDimitry Andric//----------------------------------- 19620b57cec5SDimitry Andric 19630b57cec5SDimitry Andric// FIXME: This doesn't cover versions of set and setp that combine with a 19640b57cec5SDimitry Andric// boolean predicate, e.g. setp.eq.and.b16. 19650b57cec5SDimitry Andric 1966e8d8bef9SDimitry Andriclet hasSideEffects = false in { 19670b57cec5SDimitry Andric multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> { 19680b57cec5SDimitry Andric def rr : 19690b57cec5SDimitry Andric NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp), 19700b57cec5SDimitry Andric !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, 19710b57cec5SDimitry Andric " \t$dst, $a, $b;"), []>; 19720b57cec5SDimitry Andric def ri : 19730b57cec5SDimitry Andric NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp), 19740b57cec5SDimitry Andric !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, 19750b57cec5SDimitry Andric " \t$dst, $a, $b;"), []>; 19760b57cec5SDimitry Andric def ir : 19770b57cec5SDimitry Andric NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp), 19780b57cec5SDimitry Andric !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, 19790b57cec5SDimitry Andric " \t$dst, $a, $b;"), []>; 19800b57cec5SDimitry Andric } 19810b57cec5SDimitry Andric} 19820b57cec5SDimitry Andric 19830b57cec5SDimitry Andricdefm SETP_b16 : SETP<"b16", Int16Regs, i16imm>; 19840b57cec5SDimitry Andricdefm SETP_s16 : SETP<"s16", Int16Regs, i16imm>; 19850b57cec5SDimitry Andricdefm SETP_u16 : SETP<"u16", Int16Regs, i16imm>; 19860b57cec5SDimitry Andricdefm SETP_b32 : SETP<"b32", Int32Regs, i32imm>; 19870b57cec5SDimitry Andricdefm SETP_s32 : SETP<"s32", Int32Regs, i32imm>; 19880b57cec5SDimitry Andricdefm SETP_u32 : SETP<"u32", Int32Regs, i32imm>; 19890b57cec5SDimitry Andricdefm SETP_b64 : SETP<"b64", Int64Regs, i64imm>; 19900b57cec5SDimitry Andricdefm SETP_s64 : SETP<"s64", Int64Regs, i64imm>; 19910b57cec5SDimitry Andricdefm SETP_u64 : SETP<"u64", Int64Regs, i64imm>; 19920b57cec5SDimitry Andricdefm SETP_f32 : SETP<"f32", Float32Regs, f32imm>; 19930b57cec5SDimitry Andricdefm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; 19940b57cec5SDimitry Andricdef SETP_f16rr : 19950b57cec5SDimitry Andric NVPTXInst<(outs Int1Regs:$dst), 199606c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b, CmpMode:$cmp), 19970b57cec5SDimitry Andric "setp${cmp:base}${cmp:ftz}.f16 \t$dst, $a, $b;", 19980b57cec5SDimitry Andric []>, Requires<[useFP16Math]>; 19990b57cec5SDimitry Andric 20000b57cec5SDimitry Andricdef SETP_f16x2rr : 20010b57cec5SDimitry Andric NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q), 200206c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b, CmpMode:$cmp), 20030b57cec5SDimitry Andric "setp${cmp:base}${cmp:ftz}.f16x2 \t$p|$q, $a, $b;", 20040b57cec5SDimitry Andric []>, 20050b57cec5SDimitry Andric Requires<[useFP16Math]>; 200606c3fb27SDimitry Andricdef SETP_bf16rr : 200706c3fb27SDimitry Andric NVPTXInst<(outs Int1Regs:$dst), 200806c3fb27SDimitry Andric (ins Int16Regs:$a, Int16Regs:$b, CmpMode:$cmp), 200906c3fb27SDimitry Andric "setp${cmp:base}${cmp:ftz}.bf16 \t$dst, $a, $b;", 20105f757f3fSDimitry Andric []>, Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>; 201106c3fb27SDimitry Andric 201206c3fb27SDimitry Andricdef SETP_bf16x2rr : 201306c3fb27SDimitry Andric NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q), 201406c3fb27SDimitry Andric (ins Int32Regs:$a, Int32Regs:$b, CmpMode:$cmp), 201506c3fb27SDimitry Andric "setp${cmp:base}${cmp:ftz}.bf16x2 \t$p|$q, $a, $b;", 201606c3fb27SDimitry Andric []>, 20175f757f3fSDimitry Andric Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>; 20180b57cec5SDimitry Andric 20190b57cec5SDimitry Andric 20200b57cec5SDimitry Andric// FIXME: This doesn't appear to be correct. The "set" mnemonic has the form 20210b57cec5SDimitry Andric// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination 20220b57cec5SDimitry Andric// reg, either u32, s32, or f32. Anyway these aren't used at the moment. 20230b57cec5SDimitry Andric 2024e8d8bef9SDimitry Andriclet hasSideEffects = false in { 20250b57cec5SDimitry Andric multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> { 20260b57cec5SDimitry Andric def rr : NVPTXInst<(outs Int32Regs:$dst), 20270b57cec5SDimitry Andric (ins RC:$a, RC:$b, CmpMode:$cmp), 20280b57cec5SDimitry Andric !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>; 20290b57cec5SDimitry Andric def ri : NVPTXInst<(outs Int32Regs:$dst), 20300b57cec5SDimitry Andric (ins RC:$a, ImmCls:$b, CmpMode:$cmp), 20310b57cec5SDimitry Andric !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>; 20320b57cec5SDimitry Andric def ir : NVPTXInst<(outs Int32Regs:$dst), 20330b57cec5SDimitry Andric (ins ImmCls:$a, RC:$b, CmpMode:$cmp), 20340b57cec5SDimitry Andric !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>; 20350b57cec5SDimitry Andric } 20360b57cec5SDimitry Andric} 20370b57cec5SDimitry Andric 20380b57cec5SDimitry Andricdefm SET_b16 : SET<"b16", Int16Regs, i16imm>; 20390b57cec5SDimitry Andricdefm SET_s16 : SET<"s16", Int16Regs, i16imm>; 20400b57cec5SDimitry Andricdefm SET_u16 : SET<"u16", Int16Regs, i16imm>; 20410b57cec5SDimitry Andricdefm SET_b32 : SET<"b32", Int32Regs, i32imm>; 20420b57cec5SDimitry Andricdefm SET_s32 : SET<"s32", Int32Regs, i32imm>; 20430b57cec5SDimitry Andricdefm SET_u32 : SET<"u32", Int32Regs, i32imm>; 20440b57cec5SDimitry Andricdefm SET_b64 : SET<"b64", Int64Regs, i64imm>; 20450b57cec5SDimitry Andricdefm SET_s64 : SET<"s64", Int64Regs, i64imm>; 20460b57cec5SDimitry Andricdefm SET_u64 : SET<"u64", Int64Regs, i64imm>; 204706c3fb27SDimitry Andricdefm SET_f16 : SET<"f16", Int16Regs, f16imm>; 20485f757f3fSDimitry Andricdefm SET_bf16 : SET<"bf16", Int16Regs, bf16imm>, Requires<[hasPTX<78>, hasSM<90>]>; 20490b57cec5SDimitry Andricdefm SET_f32 : SET<"f32", Float32Regs, f32imm>; 20500b57cec5SDimitry Andricdefm SET_f64 : SET<"f64", Float64Regs, f64imm>; 20510b57cec5SDimitry Andric 20520b57cec5SDimitry Andric//----------------------------------- 20530b57cec5SDimitry Andric// Data Movement (Load / Store, Move) 20540b57cec5SDimitry Andric//----------------------------------- 20550b57cec5SDimitry Andric 20560b57cec5SDimitry Andricdef ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], 20570b57cec5SDimitry Andric [SDNPWantRoot]>; 20580b57cec5SDimitry Andricdef ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex], 20590b57cec5SDimitry Andric [SDNPWantRoot]>; 20600b57cec5SDimitry Andricdef ADDRvar : ComplexPattern<iPTR, 1, "SelectDirectAddr", [], []>; 20610b57cec5SDimitry Andric 20620b57cec5SDimitry Andricdef MEMri : Operand<i32> { 20630b57cec5SDimitry Andric let PrintMethod = "printMemOperand"; 20640b57cec5SDimitry Andric let MIOperandInfo = (ops Int32Regs, i32imm); 20650b57cec5SDimitry Andric} 20660b57cec5SDimitry Andricdef MEMri64 : Operand<i64> { 20670b57cec5SDimitry Andric let PrintMethod = "printMemOperand"; 20680b57cec5SDimitry Andric let MIOperandInfo = (ops Int64Regs, i64imm); 20690b57cec5SDimitry Andric} 20700b57cec5SDimitry Andric 20710b57cec5SDimitry Andricdef imem : Operand<iPTR> { 20720b57cec5SDimitry Andric let PrintMethod = "printOperand"; 20730b57cec5SDimitry Andric} 20740b57cec5SDimitry Andric 20750b57cec5SDimitry Andricdef imemAny : Operand<iPTRAny> { 20760b57cec5SDimitry Andric let PrintMethod = "printOperand"; 20770b57cec5SDimitry Andric} 20780b57cec5SDimitry Andric 20790b57cec5SDimitry Andricdef LdStCode : Operand<i32> { 20800b57cec5SDimitry Andric let PrintMethod = "printLdStCode"; 20810b57cec5SDimitry Andric} 20820b57cec5SDimitry Andric 20830b57cec5SDimitry Andricdef MmaCode : Operand<i32> { 20840b57cec5SDimitry Andric let PrintMethod = "printMmaCode"; 20850b57cec5SDimitry Andric} 20860b57cec5SDimitry Andric 20870b57cec5SDimitry Andricdef SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; 20880b57cec5SDimitry Andricdef Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; 20890b57cec5SDimitry Andric 20900b57cec5SDimitry Andric// Load a memory address into a u32 or u64 register. 20910b57cec5SDimitry Andricdef MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), 20920b57cec5SDimitry Andric "mov.u32 \t$dst, $a;", 20930b57cec5SDimitry Andric [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>; 20940b57cec5SDimitry Andricdef MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), 20950b57cec5SDimitry Andric "mov.u64 \t$dst, $a;", 20960b57cec5SDimitry Andric [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; 20970b57cec5SDimitry Andric 20980b57cec5SDimitry Andric// Get pointer to local stack. 2099e8d8bef9SDimitry Andriclet hasSideEffects = false in { 21000b57cec5SDimitry Andric def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), 21010b57cec5SDimitry Andric "mov.u32 \t$d, __local_depot$num;", []>; 21020b57cec5SDimitry Andric def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), 21030b57cec5SDimitry Andric "mov.u64 \t$d, __local_depot$num;", []>; 21040b57cec5SDimitry Andric} 21050b57cec5SDimitry Andric 21060b57cec5SDimitry Andric 21070b57cec5SDimitry Andric// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp 21080b57cec5SDimitry Andriclet IsSimpleMove=1, hasSideEffects=0 in { 21090b57cec5SDimitry Andric def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), 21100b57cec5SDimitry Andric "mov.pred \t$dst, $sss;", []>; 21110b57cec5SDimitry Andric def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), 21120b57cec5SDimitry Andric "mov.u16 \t$dst, $sss;", []>; 21130b57cec5SDimitry Andric def IMOV32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), 21140b57cec5SDimitry Andric "mov.u32 \t$dst, $sss;", []>; 21150b57cec5SDimitry Andric def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), 21160b57cec5SDimitry Andric "mov.u64 \t$dst, $sss;", []>; 2117*0fca6ea1SDimitry Andric def IMOV128rr : NVPTXInst<(outs Int128Regs:$dst), (ins Int128Regs:$sss), 2118*0fca6ea1SDimitry Andric "mov.b128 \t$dst, $sss;", []>; 21190b57cec5SDimitry Andric 212006c3fb27SDimitry Andric def IMOVB16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), 212106c3fb27SDimitry Andric "mov.b16 \t$dst, $sss;", []>; 212206c3fb27SDimitry Andric def IMOVB32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), 212306c3fb27SDimitry Andric "mov.b32 \t$dst, $sss;", []>; 212406c3fb27SDimitry Andric def IMOVB64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), 212506c3fb27SDimitry Andric "mov.b64 \t$dst, $sss;", []>; 212606c3fb27SDimitry Andric 212706c3fb27SDimitry Andric def FMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 21280b57cec5SDimitry Andric // We have to use .b16 here as there's no mov.f16. 21290b57cec5SDimitry Andric "mov.b16 \t$dst, $src;", []>; 21300b57cec5SDimitry Andric def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 21310b57cec5SDimitry Andric "mov.f32 \t$dst, $src;", []>; 21320b57cec5SDimitry Andric def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), 21330b57cec5SDimitry Andric "mov.f64 \t$dst, $src;", []>; 21340b57cec5SDimitry Andric} 21350b57cec5SDimitry Andric 21360b57cec5SDimitry Andricdef IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), 21370b57cec5SDimitry Andric "mov.pred \t$dst, $src;", 21380b57cec5SDimitry Andric [(set Int1Regs:$dst, imm:$src)]>; 21390b57cec5SDimitry Andricdef IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), 21400b57cec5SDimitry Andric "mov.u16 \t$dst, $src;", 21410b57cec5SDimitry Andric [(set Int16Regs:$dst, imm:$src)]>; 21420b57cec5SDimitry Andricdef IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), 21430b57cec5SDimitry Andric "mov.u32 \t$dst, $src;", 21445f757f3fSDimitry Andric [(set (i32 Int32Regs:$dst), imm:$src)]>; 2145bdd1243dSDimitry Andricdef IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), 21460b57cec5SDimitry Andric "mov.u64 \t$dst, $src;", 21470b57cec5SDimitry Andric [(set Int64Regs:$dst, imm:$src)]>; 21480b57cec5SDimitry Andric 214906c3fb27SDimitry Andricdef IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), 215006c3fb27SDimitry Andric "mov.b16 \t$dst, $src;", []>; 215106c3fb27SDimitry Andricdef IMOVB32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), 215206c3fb27SDimitry Andric "mov.b32 \t$dst, $src;", []>; 215306c3fb27SDimitry Andricdef IMOVB64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), 215406c3fb27SDimitry Andric "mov.b64 \t$dst, $src;", []>; 215506c3fb27SDimitry Andric 21560b57cec5SDimitry Andricdef FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), 21570b57cec5SDimitry Andric "mov.f32 \t$dst, $src;", 21580b57cec5SDimitry Andric [(set Float32Regs:$dst, fpimm:$src)]>; 21590b57cec5SDimitry Andricdef FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), 21600b57cec5SDimitry Andric "mov.f64 \t$dst, $src;", 21610b57cec5SDimitry Andric [(set Float64Regs:$dst, fpimm:$src)]>; 21620b57cec5SDimitry Andric 21630b57cec5SDimitry Andricdef : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; 2164bdd1243dSDimitry Andricdef : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>; 21650b57cec5SDimitry Andric 21660b57cec5SDimitry Andric//---- Copy Frame Index ---- 21670b57cec5SDimitry Andricdef LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), 21680b57cec5SDimitry Andric "add.u32 \t$dst, ${addr:add};", 21690b57cec5SDimitry Andric [(set Int32Regs:$dst, ADDRri:$addr)]>; 21700b57cec5SDimitry Andricdef LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), 21710b57cec5SDimitry Andric "add.u64 \t$dst, ${addr:add};", 21720b57cec5SDimitry Andric [(set Int64Regs:$dst, ADDRri64:$addr)]>; 21730b57cec5SDimitry Andric 21740b57cec5SDimitry Andric//----------------------------------- 21750b57cec5SDimitry Andric// Comparison and Selection 21760b57cec5SDimitry Andric//----------------------------------- 21770b57cec5SDimitry Andric 21780b57cec5SDimitry Andricmulticlass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode, 21790b57cec5SDimitry Andric Instruction setp_16rr, 21800b57cec5SDimitry Andric Instruction setp_16ri, 21810b57cec5SDimitry Andric Instruction setp_16ir, 21820b57cec5SDimitry Andric Instruction setp_32rr, 21830b57cec5SDimitry Andric Instruction setp_32ri, 21840b57cec5SDimitry Andric Instruction setp_32ir, 21850b57cec5SDimitry Andric Instruction setp_64rr, 21860b57cec5SDimitry Andric Instruction setp_64ri, 21870b57cec5SDimitry Andric Instruction setp_64ir, 21880b57cec5SDimitry Andric Instruction set_16rr, 21890b57cec5SDimitry Andric Instruction set_16ri, 21900b57cec5SDimitry Andric Instruction set_16ir, 21910b57cec5SDimitry Andric Instruction set_32rr, 21920b57cec5SDimitry Andric Instruction set_32ri, 21930b57cec5SDimitry Andric Instruction set_32ir, 21940b57cec5SDimitry Andric Instruction set_64rr, 21950b57cec5SDimitry Andric Instruction set_64ri, 21960b57cec5SDimitry Andric Instruction set_64ir> { 21970b57cec5SDimitry Andric // i16 -> pred 219806c3fb27SDimitry Andric def : Pat<(i1 (OpNode i16:$a, i16:$b)), 21990b57cec5SDimitry Andric (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; 22000b57cec5SDimitry Andric def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)), 22010b57cec5SDimitry Andric (setp_16ri Int16Regs:$a, imm:$b, Mode)>; 22020b57cec5SDimitry Andric def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)), 22030b57cec5SDimitry Andric (setp_16ir imm:$a, Int16Regs:$b, Mode)>; 22040b57cec5SDimitry Andric // i32 -> pred 220506c3fb27SDimitry Andric def : Pat<(i1 (OpNode i32:$a, i32:$b)), 22060b57cec5SDimitry Andric (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; 22075f757f3fSDimitry Andric def : Pat<(i1 (OpNode (i32 Int32Regs:$a), imm:$b)), 22080b57cec5SDimitry Andric (setp_32ri Int32Regs:$a, imm:$b, Mode)>; 22095f757f3fSDimitry Andric def : Pat<(i1 (OpNode imm:$a, (i32 Int32Regs:$b))), 22100b57cec5SDimitry Andric (setp_32ir imm:$a, Int32Regs:$b, Mode)>; 22110b57cec5SDimitry Andric // i64 -> pred 22120b57cec5SDimitry Andric def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)), 22130b57cec5SDimitry Andric (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; 22140b57cec5SDimitry Andric def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)), 22150b57cec5SDimitry Andric (setp_64ri Int64Regs:$a, imm:$b, Mode)>; 22160b57cec5SDimitry Andric def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)), 22170b57cec5SDimitry Andric (setp_64ir imm:$a, Int64Regs:$b, Mode)>; 22180b57cec5SDimitry Andric 22190b57cec5SDimitry Andric // i16 -> i32 222006c3fb27SDimitry Andric def : Pat<(i32 (OpNode i16:$a, i16:$b)), 22210b57cec5SDimitry Andric (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; 22220b57cec5SDimitry Andric def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)), 22230b57cec5SDimitry Andric (set_16ri Int16Regs:$a, imm:$b, Mode)>; 22240b57cec5SDimitry Andric def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)), 22250b57cec5SDimitry Andric (set_16ir imm:$a, Int16Regs:$b, Mode)>; 22260b57cec5SDimitry Andric // i32 -> i32 222706c3fb27SDimitry Andric def : Pat<(i32 (OpNode i32:$a, i32:$b)), 22280b57cec5SDimitry Andric (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; 22295f757f3fSDimitry Andric def : Pat<(i32 (OpNode (i32 Int32Regs:$a), imm:$b)), 22300b57cec5SDimitry Andric (set_32ri Int32Regs:$a, imm:$b, Mode)>; 22315f757f3fSDimitry Andric def : Pat<(i32 (OpNode imm:$a, (i32 Int32Regs:$b))), 22320b57cec5SDimitry Andric (set_32ir imm:$a, Int32Regs:$b, Mode)>; 22330b57cec5SDimitry Andric // i64 -> i32 22340b57cec5SDimitry Andric def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)), 22350b57cec5SDimitry Andric (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; 22360b57cec5SDimitry Andric def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)), 22370b57cec5SDimitry Andric (set_64ri Int64Regs:$a, imm:$b, Mode)>; 22380b57cec5SDimitry Andric def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)), 22390b57cec5SDimitry Andric (set_64ir imm:$a, Int64Regs:$b, Mode)>; 22400b57cec5SDimitry Andric} 22410b57cec5SDimitry Andric 22420b57cec5SDimitry Andricmulticlass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode> 22430b57cec5SDimitry Andric : ISET_FORMAT<OpNode, Mode, 22440b57cec5SDimitry Andric SETP_s16rr, SETP_s16ri, SETP_s16ir, 22450b57cec5SDimitry Andric SETP_s32rr, SETP_s32ri, SETP_s32ir, 22460b57cec5SDimitry Andric SETP_s64rr, SETP_s64ri, SETP_s64ir, 22470b57cec5SDimitry Andric SET_s16rr, SET_s16ri, SET_s16ir, 22480b57cec5SDimitry Andric SET_s32rr, SET_s32ri, SET_s32ir, 22490b57cec5SDimitry Andric SET_s64rr, SET_s64ri, SET_s64ir> { 22500b57cec5SDimitry Andric // TableGen doesn't like empty multiclasses. 22510b57cec5SDimitry Andric def : PatLeaf<(i32 0)>; 22520b57cec5SDimitry Andric} 22530b57cec5SDimitry Andric 22540b57cec5SDimitry Andricmulticlass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode> 22550b57cec5SDimitry Andric : ISET_FORMAT<OpNode, Mode, 22560b57cec5SDimitry Andric SETP_u16rr, SETP_u16ri, SETP_u16ir, 22570b57cec5SDimitry Andric SETP_u32rr, SETP_u32ri, SETP_u32ir, 22580b57cec5SDimitry Andric SETP_u64rr, SETP_u64ri, SETP_u64ir, 22590b57cec5SDimitry Andric SET_u16rr, SET_u16ri, SET_u16ir, 22600b57cec5SDimitry Andric SET_u32rr, SET_u32ri, SET_u32ir, 22610b57cec5SDimitry Andric SET_u64rr, SET_u64ri, SET_u64ir> { 22620b57cec5SDimitry Andric // TableGen doesn't like empty multiclasses. 22630b57cec5SDimitry Andric def : PatLeaf<(i32 0)>; 22640b57cec5SDimitry Andric} 22650b57cec5SDimitry Andric 22660b57cec5SDimitry Andricdefm : ISET_FORMAT_SIGNED<setgt, CmpGT>; 22670b57cec5SDimitry Andricdefm : ISET_FORMAT_SIGNED<setlt, CmpLT>; 22680b57cec5SDimitry Andricdefm : ISET_FORMAT_SIGNED<setge, CmpGE>; 22690b57cec5SDimitry Andricdefm : ISET_FORMAT_SIGNED<setle, CmpLE>; 22700b57cec5SDimitry Andricdefm : ISET_FORMAT_SIGNED<seteq, CmpEQ>; 22710b57cec5SDimitry Andricdefm : ISET_FORMAT_SIGNED<setne, CmpNE>; 22720b57cec5SDimitry Andricdefm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>; 22730b57cec5SDimitry Andricdefm : ISET_FORMAT_UNSIGNED<setult, CmpLT>; 22740b57cec5SDimitry Andricdefm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>; 22750b57cec5SDimitry Andricdefm : ISET_FORMAT_UNSIGNED<setule, CmpLE>; 22760b57cec5SDimitry Andricdefm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>; 22770b57cec5SDimitry Andricdefm : ISET_FORMAT_UNSIGNED<setune, CmpNE>; 22780b57cec5SDimitry Andric 22790b57cec5SDimitry Andric// i1 compares 22800b57cec5SDimitry Andricdef : Pat<(setne Int1Regs:$a, Int1Regs:$b), 22810b57cec5SDimitry Andric (XORb1rr Int1Regs:$a, Int1Regs:$b)>; 22820b57cec5SDimitry Andricdef : Pat<(setune Int1Regs:$a, Int1Regs:$b), 22830b57cec5SDimitry Andric (XORb1rr Int1Regs:$a, Int1Regs:$b)>; 22840b57cec5SDimitry Andric 22850b57cec5SDimitry Andricdef : Pat<(seteq Int1Regs:$a, Int1Regs:$b), 22860b57cec5SDimitry Andric (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; 22870b57cec5SDimitry Andricdef : Pat<(setueq Int1Regs:$a, Int1Regs:$b), 22880b57cec5SDimitry Andric (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; 22890b57cec5SDimitry Andric 22905f757f3fSDimitry Andric// comparisons of i8 extracted with BFE as i32 2291*0fca6ea1SDimitry Andric// It's faster to do comparison directly on i32 extracted by BFE, 2292*0fca6ea1SDimitry Andric// instead of the long conversion and sign extending. 2293*0fca6ea1SDimitry Andricdef: Pat<(setgt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)), 2294*0fca6ea1SDimitry Andric (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))), 2295*0fca6ea1SDimitry Andric (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGT)>; 2296*0fca6ea1SDimitry Andricdef: Pat<(setgt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)), 2297*0fca6ea1SDimitry Andric (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))), 2298*0fca6ea1SDimitry Andric (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGT)>; 2299*0fca6ea1SDimitry Andricdef: Pat<(setge (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)), 2300*0fca6ea1SDimitry Andric (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))), 2301*0fca6ea1SDimitry Andric (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGE)>; 2302*0fca6ea1SDimitry Andricdef: Pat<(setge (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)), 2303*0fca6ea1SDimitry Andric (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))), 2304*0fca6ea1SDimitry Andric (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGE)>; 2305*0fca6ea1SDimitry Andricdef: Pat<(setlt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)), 2306*0fca6ea1SDimitry Andric (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))), 2307*0fca6ea1SDimitry Andric (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLT)>; 2308*0fca6ea1SDimitry Andricdef: Pat<(setlt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)), 2309*0fca6ea1SDimitry Andric (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))), 2310*0fca6ea1SDimitry Andric (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLT)>; 2311*0fca6ea1SDimitry Andricdef: Pat<(setle (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)), 2312*0fca6ea1SDimitry Andric (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))), 2313*0fca6ea1SDimitry Andric (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLE)>; 2314*0fca6ea1SDimitry Andricdef: Pat<(setle (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)), 2315*0fca6ea1SDimitry Andric (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))), 2316*0fca6ea1SDimitry Andric (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLE)>; 23175f757f3fSDimitry Andric 2318*0fca6ea1SDimitry Andricdef: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)), 2319*0fca6ea1SDimitry Andric (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))), 2320*0fca6ea1SDimitry Andric (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHI)>; 2321*0fca6ea1SDimitry Andricdef: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)), 2322*0fca6ea1SDimitry Andric (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))), 2323*0fca6ea1SDimitry Andric (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHI)>; 2324*0fca6ea1SDimitry Andricdef: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)), 2325*0fca6ea1SDimitry Andric (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))), 2326*0fca6ea1SDimitry Andric (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHS)>; 2327*0fca6ea1SDimitry Andricdef: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)), 2328*0fca6ea1SDimitry Andric (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))), 2329*0fca6ea1SDimitry Andric (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHS)>; 2330*0fca6ea1SDimitry Andricdef: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)), 2331*0fca6ea1SDimitry Andric (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))), 2332*0fca6ea1SDimitry Andric (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLO)>; 2333*0fca6ea1SDimitry Andricdef: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)), 2334*0fca6ea1SDimitry Andric (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))), 2335*0fca6ea1SDimitry Andric (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLO)>; 2336*0fca6ea1SDimitry Andricdef: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)), 2337*0fca6ea1SDimitry Andric (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))), 2338*0fca6ea1SDimitry Andric (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLS)>; 2339*0fca6ea1SDimitry Andricdef: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)), 2340*0fca6ea1SDimitry Andric (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))), 2341*0fca6ea1SDimitry Andric (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLS)>; 2342*0fca6ea1SDimitry Andricdef: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)), 2343*0fca6ea1SDimitry Andric (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))), 2344*0fca6ea1SDimitry Andric (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpEQ)>; 2345*0fca6ea1SDimitry Andricdef: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)), 2346*0fca6ea1SDimitry Andric (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))), 2347*0fca6ea1SDimitry Andric (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpEQ)>; 2348*0fca6ea1SDimitry Andricdef: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)), 2349*0fca6ea1SDimitry Andric (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))), 2350*0fca6ea1SDimitry Andric (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpNE)>; 2351*0fca6ea1SDimitry Andricdef: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)), 2352*0fca6ea1SDimitry Andric (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))), 2353*0fca6ea1SDimitry Andric (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpNE)>; 23545f757f3fSDimitry Andric 23550b57cec5SDimitry Andric// i1 compare -> i32 23560b57cec5SDimitry Andricdef : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), 23570b57cec5SDimitry Andric (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; 23580b57cec5SDimitry Andricdef : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), 23590b57cec5SDimitry Andric (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; 23600b57cec5SDimitry Andric 23610b57cec5SDimitry Andric 23620b57cec5SDimitry Andric 23630b57cec5SDimitry Andricmulticlass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> { 23640b57cec5SDimitry Andric // f16 -> pred 236506c3fb27SDimitry Andric def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), 236606c3fb27SDimitry Andric (SETP_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, 23670b57cec5SDimitry Andric Requires<[useFP16Math,doF32FTZ]>; 236806c3fb27SDimitry Andric def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), 236906c3fb27SDimitry Andric (SETP_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>, 23700b57cec5SDimitry Andric Requires<[useFP16Math]>; 237106c3fb27SDimitry Andric def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)), 237206c3fb27SDimitry Andric (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, 23730b57cec5SDimitry Andric Requires<[useFP16Math,doF32FTZ]>; 237406c3fb27SDimitry Andric def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)), 237506c3fb27SDimitry Andric (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, 23760b57cec5SDimitry Andric Requires<[useFP16Math]>; 237706c3fb27SDimitry Andric def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))), 237806c3fb27SDimitry Andric (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, 23790b57cec5SDimitry Andric Requires<[useFP16Math,doF32FTZ]>; 238006c3fb27SDimitry Andric def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))), 238106c3fb27SDimitry Andric (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>, 23820b57cec5SDimitry Andric Requires<[useFP16Math]>; 23830b57cec5SDimitry Andric 238406c3fb27SDimitry Andric // bf16 -> pred 238506c3fb27SDimitry Andric def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), 238606c3fb27SDimitry Andric (SETP_bf16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, 238706c3fb27SDimitry Andric Requires<[hasBF16Math,doF32FTZ]>; 238806c3fb27SDimitry Andric def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), 238906c3fb27SDimitry Andric (SETP_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>, 239006c3fb27SDimitry Andric Requires<[hasBF16Math]>; 239106c3fb27SDimitry Andric def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), fpimm:$b)), 239206c3fb27SDimitry Andric (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>, 239306c3fb27SDimitry Andric Requires<[hasBF16Math,doF32FTZ]>; 239406c3fb27SDimitry Andric def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), fpimm:$b)), 239506c3fb27SDimitry Andric (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>, 239606c3fb27SDimitry Andric Requires<[hasBF16Math]>; 239706c3fb27SDimitry Andric def : Pat<(i1 (OpNode fpimm:$a, (bf16 Int16Regs:$b))), 239806c3fb27SDimitry Andric (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, 239906c3fb27SDimitry Andric Requires<[hasBF16Math,doF32FTZ]>; 240006c3fb27SDimitry Andric def : Pat<(i1 (OpNode fpimm:$a, (bf16 Int16Regs:$b))), 240106c3fb27SDimitry Andric (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>, 240206c3fb27SDimitry Andric Requires<[hasBF16Math]>; 240306c3fb27SDimitry Andric 24040b57cec5SDimitry Andric // f32 -> pred 24050b57cec5SDimitry Andric def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), 24060b57cec5SDimitry Andric (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, 24070b57cec5SDimitry Andric Requires<[doF32FTZ]>; 24080b57cec5SDimitry Andric def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), 24090b57cec5SDimitry Andric (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; 24100b57cec5SDimitry Andric def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), 24110b57cec5SDimitry Andric (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, 24120b57cec5SDimitry Andric Requires<[doF32FTZ]>; 24130b57cec5SDimitry Andric def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), 24140b57cec5SDimitry Andric (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>; 24150b57cec5SDimitry Andric def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), 24160b57cec5SDimitry Andric (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, 24170b57cec5SDimitry Andric Requires<[doF32FTZ]>; 24180b57cec5SDimitry Andric def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), 24190b57cec5SDimitry Andric (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>; 24200b57cec5SDimitry Andric 24210b57cec5SDimitry Andric // f64 -> pred 24220b57cec5SDimitry Andric def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)), 24230b57cec5SDimitry Andric (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; 24240b57cec5SDimitry Andric def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)), 24250b57cec5SDimitry Andric (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>; 24260b57cec5SDimitry Andric def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)), 24270b57cec5SDimitry Andric (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>; 24280b57cec5SDimitry Andric 24290b57cec5SDimitry Andric // f16 -> i32 243006c3fb27SDimitry Andric def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), 243106c3fb27SDimitry Andric (SET_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, 24320b57cec5SDimitry Andric Requires<[useFP16Math, doF32FTZ]>; 243306c3fb27SDimitry Andric def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), 243406c3fb27SDimitry Andric (SET_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>, 24350b57cec5SDimitry Andric Requires<[useFP16Math]>; 243606c3fb27SDimitry Andric def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)), 243706c3fb27SDimitry Andric (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, 24380b57cec5SDimitry Andric Requires<[useFP16Math, doF32FTZ]>; 243906c3fb27SDimitry Andric def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)), 244006c3fb27SDimitry Andric (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, 24410b57cec5SDimitry Andric Requires<[useFP16Math]>; 244206c3fb27SDimitry Andric def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))), 244306c3fb27SDimitry Andric (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, 24440b57cec5SDimitry Andric Requires<[useFP16Math, doF32FTZ]>; 244506c3fb27SDimitry Andric def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))), 244606c3fb27SDimitry Andric (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>, 24470b57cec5SDimitry Andric Requires<[useFP16Math]>; 24480b57cec5SDimitry Andric 244906c3fb27SDimitry Andric // bf16 -> i32 245006c3fb27SDimitry Andric def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), 245106c3fb27SDimitry Andric (SET_bf16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, 245206c3fb27SDimitry Andric Requires<[hasBF16Math, doF32FTZ]>; 245306c3fb27SDimitry Andric def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), 245406c3fb27SDimitry Andric (SET_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>, 245506c3fb27SDimitry Andric Requires<[hasBF16Math]>; 245606c3fb27SDimitry Andric def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), fpimm:$b)), 245706c3fb27SDimitry Andric (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>, 245806c3fb27SDimitry Andric Requires<[hasBF16Math, doF32FTZ]>; 245906c3fb27SDimitry Andric def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), fpimm:$b)), 246006c3fb27SDimitry Andric (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>, 246106c3fb27SDimitry Andric Requires<[hasBF16Math]>; 246206c3fb27SDimitry Andric def : Pat<(i32 (OpNode fpimm:$a, (bf16 Int16Regs:$b))), 246306c3fb27SDimitry Andric (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, 246406c3fb27SDimitry Andric Requires<[hasBF16Math, doF32FTZ]>; 246506c3fb27SDimitry Andric def : Pat<(i32 (OpNode fpimm:$a, (bf16 Int16Regs:$b))), 246606c3fb27SDimitry Andric (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>, 246706c3fb27SDimitry Andric Requires<[hasBF16Math]>; 246806c3fb27SDimitry Andric 24690b57cec5SDimitry Andric // f32 -> i32 24700b57cec5SDimitry Andric def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), 24710b57cec5SDimitry Andric (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, 24720b57cec5SDimitry Andric Requires<[doF32FTZ]>; 24730b57cec5SDimitry Andric def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), 24740b57cec5SDimitry Andric (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; 24750b57cec5SDimitry Andric def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), 24760b57cec5SDimitry Andric (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, 24770b57cec5SDimitry Andric Requires<[doF32FTZ]>; 24780b57cec5SDimitry Andric def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), 24790b57cec5SDimitry Andric (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>; 24800b57cec5SDimitry Andric def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), 24810b57cec5SDimitry Andric (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, 24820b57cec5SDimitry Andric Requires<[doF32FTZ]>; 24830b57cec5SDimitry Andric def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), 24840b57cec5SDimitry Andric (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>; 24850b57cec5SDimitry Andric 24860b57cec5SDimitry Andric // f64 -> i32 24870b57cec5SDimitry Andric def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)), 24880b57cec5SDimitry Andric (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; 24890b57cec5SDimitry Andric def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)), 24900b57cec5SDimitry Andric (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>; 24910b57cec5SDimitry Andric def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)), 24920b57cec5SDimitry Andric (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>; 24930b57cec5SDimitry Andric} 24940b57cec5SDimitry Andric 24950b57cec5SDimitry Andricdefm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>; 24960b57cec5SDimitry Andricdefm FSetOLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>; 24970b57cec5SDimitry Andricdefm FSetOGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>; 24980b57cec5SDimitry Andricdefm FSetOLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>; 24990b57cec5SDimitry Andricdefm FSetOEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>; 25000b57cec5SDimitry Andricdefm FSetONE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>; 25010b57cec5SDimitry Andric 25020b57cec5SDimitry Andricdefm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>; 25030b57cec5SDimitry Andricdefm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>; 25040b57cec5SDimitry Andricdefm FSetUGE : FSET_FORMAT<setuge, CmpGEU, CmpGEU_FTZ>; 25050b57cec5SDimitry Andricdefm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>; 25060b57cec5SDimitry Andricdefm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>; 25070b57cec5SDimitry Andricdefm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>; 25080b57cec5SDimitry Andric 25090b57cec5SDimitry Andricdefm FSetGT : FSET_FORMAT<setgt, CmpGT, CmpGT_FTZ>; 25100b57cec5SDimitry Andricdefm FSetLT : FSET_FORMAT<setlt, CmpLT, CmpLT_FTZ>; 25110b57cec5SDimitry Andricdefm FSetGE : FSET_FORMAT<setge, CmpGE, CmpGE_FTZ>; 25120b57cec5SDimitry Andricdefm FSetLE : FSET_FORMAT<setle, CmpLE, CmpLE_FTZ>; 25130b57cec5SDimitry Andricdefm FSetEQ : FSET_FORMAT<seteq, CmpEQ, CmpEQ_FTZ>; 25140b57cec5SDimitry Andricdefm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>; 25150b57cec5SDimitry Andric 25160b57cec5SDimitry Andricdefm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>; 25170b57cec5SDimitry Andricdefm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>; 25180b57cec5SDimitry Andric 25190b57cec5SDimitry Andricdef SDTDeclareParamProfile : 25200b57cec5SDimitry Andric SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; 25210b57cec5SDimitry Andricdef SDTDeclareScalarParamProfile : 25220b57cec5SDimitry Andric SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; 25230b57cec5SDimitry Andricdef SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 25240b57cec5SDimitry Andricdef SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>; 25250b57cec5SDimitry Andricdef SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>; 25260b57cec5SDimitry Andricdef SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 25270b57cec5SDimitry Andricdef SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 25280b57cec5SDimitry Andricdef SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; 25290b57cec5SDimitry Andricdef SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>; 25300b57cec5SDimitry Andricdef SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>; 25310b57cec5SDimitry Andricdef SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; 25320b57cec5SDimitry Andricdef SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; 25330b57cec5SDimitry Andricdef SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; 25340b57cec5SDimitry Andricdef SDTCallVoidProfile : SDTypeProfile<0, 1, []>; 25350b57cec5SDimitry Andricdef SDTCallValProfile : SDTypeProfile<1, 0, []>; 25360b57cec5SDimitry Andricdef SDTMoveParamProfile : SDTypeProfile<1, 1, []>; 25370b57cec5SDimitry Andricdef SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; 25380b57cec5SDimitry Andricdef SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>; 25390b57cec5SDimitry Andricdef SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>; 25400b57cec5SDimitry Andricdef SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; 25410b57cec5SDimitry Andricdef SDTProxyRegProfile : SDTypeProfile<1, 1, []>; 25420b57cec5SDimitry Andric 25430b57cec5SDimitry Andricdef DeclareParam : 25440b57cec5SDimitry Andric SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, 25450b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25460b57cec5SDimitry Andricdef DeclareScalarParam : 25470b57cec5SDimitry Andric SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParamProfile, 25480b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25490b57cec5SDimitry Andricdef DeclareRetParam : 25500b57cec5SDimitry Andric SDNode<"NVPTXISD::DeclareRetParam", SDTDeclareParamProfile, 25510b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25520b57cec5SDimitry Andricdef DeclareRet : 25530b57cec5SDimitry Andric SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, 25540b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25550b57cec5SDimitry Andricdef LoadParam : 25560b57cec5SDimitry Andric SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, 25570b57cec5SDimitry Andric [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; 25580b57cec5SDimitry Andricdef LoadParamV2 : 25590b57cec5SDimitry Andric SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile, 25600b57cec5SDimitry Andric [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; 25610b57cec5SDimitry Andricdef LoadParamV4 : 25620b57cec5SDimitry Andric SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile, 25630b57cec5SDimitry Andric [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; 25640b57cec5SDimitry Andricdef PrintCall : 25650b57cec5SDimitry Andric SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, 25660b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25670b57cec5SDimitry Andricdef PrintConvergentCall : 25680b57cec5SDimitry Andric SDNode<"NVPTXISD::PrintConvergentCall", SDTPrintCallProfile, 25690b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25700b57cec5SDimitry Andricdef PrintCallUni : 25710b57cec5SDimitry Andric SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile, 25720b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25730b57cec5SDimitry Andricdef PrintConvergentCallUni : 25740b57cec5SDimitry Andric SDNode<"NVPTXISD::PrintConvergentCallUni", SDTPrintCallUniProfile, 25750b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25760b57cec5SDimitry Andricdef StoreParam : 25770b57cec5SDimitry Andric SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, 25780b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25790b57cec5SDimitry Andricdef StoreParamV2 : 25800b57cec5SDimitry Andric SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile, 25810b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25820b57cec5SDimitry Andricdef StoreParamV4 : 25830b57cec5SDimitry Andric SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile, 25840b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25850b57cec5SDimitry Andricdef StoreParamU32 : 25860b57cec5SDimitry Andric SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, 25870b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25880b57cec5SDimitry Andricdef StoreParamS32 : 25890b57cec5SDimitry Andric SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, 25900b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25910b57cec5SDimitry Andricdef CallArgBegin : 25920b57cec5SDimitry Andric SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, 25930b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25940b57cec5SDimitry Andricdef CallArg : 25950b57cec5SDimitry Andric SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, 25960b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 25970b57cec5SDimitry Andricdef LastCallArg : 25980b57cec5SDimitry Andric SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile, 25990b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 26000b57cec5SDimitry Andricdef CallArgEnd : 26010b57cec5SDimitry Andric SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile, 26020b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 26030b57cec5SDimitry Andricdef CallVoid : 26040b57cec5SDimitry Andric SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile, 26050b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 26060b57cec5SDimitry Andricdef Prototype : 26070b57cec5SDimitry Andric SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile, 26080b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 26090b57cec5SDimitry Andricdef CallVal : 26100b57cec5SDimitry Andric SDNode<"NVPTXISD::CallVal", SDTCallValProfile, 26110b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 26120b57cec5SDimitry Andricdef MoveParam : 26130b57cec5SDimitry Andric SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>; 26140b57cec5SDimitry Andricdef StoreRetval : 26150b57cec5SDimitry Andric SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, 26160b57cec5SDimitry Andric [SDNPHasChain, SDNPSideEffect]>; 26170b57cec5SDimitry Andricdef StoreRetvalV2 : 26180b57cec5SDimitry Andric SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile, 26190b57cec5SDimitry Andric [SDNPHasChain, SDNPSideEffect]>; 26200b57cec5SDimitry Andricdef StoreRetvalV4 : 26210b57cec5SDimitry Andric SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile, 26220b57cec5SDimitry Andric [SDNPHasChain, SDNPSideEffect]>; 26230b57cec5SDimitry Andricdef PseudoUseParam : 26240b57cec5SDimitry Andric SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile, 26250b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 26260b57cec5SDimitry Andricdef RETURNNode : 26270b57cec5SDimitry Andric SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, 26280b57cec5SDimitry Andric [SDNPHasChain, SDNPSideEffect]>; 26290b57cec5SDimitry Andricdef ProxyReg : 26300b57cec5SDimitry Andric SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile, 26310b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 26320b57cec5SDimitry Andric 2633e8d8bef9SDimitry Andriclet mayLoad = true in { 26340b57cec5SDimitry Andric class LoadParamMemInst<NVPTXRegClass regclass, string opstr> : 26350b57cec5SDimitry Andric NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), 26360b57cec5SDimitry Andric !strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"), 26370b57cec5SDimitry Andric []>; 26380b57cec5SDimitry Andric 26390b57cec5SDimitry Andric class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> : 26400b57cec5SDimitry Andric NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b), 26410b57cec5SDimitry Andric !strconcat("ld.param.v2", opstr, 26420b57cec5SDimitry Andric " \t{{$dst, $dst2}}, [retval0+$b];"), []>; 26430b57cec5SDimitry Andric 26440b57cec5SDimitry Andric class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> : 26450b57cec5SDimitry Andric NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3, 26460b57cec5SDimitry Andric regclass:$dst4), 26470b57cec5SDimitry Andric (ins i32imm:$b), 26480b57cec5SDimitry Andric !strconcat("ld.param.v4", opstr, 26490b57cec5SDimitry Andric " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), 26500b57cec5SDimitry Andric []>; 26510b57cec5SDimitry Andric} 26520b57cec5SDimitry Andric 26530b57cec5SDimitry Andricclass LoadParamRegInst<NVPTXRegClass regclass, string opstr> : 26540b57cec5SDimitry Andric NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), 26550b57cec5SDimitry Andric !strconcat("mov", opstr, " \t$dst, retval$b;"), 26560b57cec5SDimitry Andric [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; 26570b57cec5SDimitry Andric 2658e8d8bef9SDimitry Andriclet mayStore = true in { 26590b57cec5SDimitry Andric 2660*0fca6ea1SDimitry Andric multiclass StoreParamInst<NVPTXRegClass regclass, Operand IMMType, string opstr, bit support_imm = true> { 2661*0fca6ea1SDimitry Andric foreach op = [IMMType, regclass] in 2662*0fca6ea1SDimitry Andric if !or(support_imm, !isa<NVPTXRegClass>(op)) then 2663*0fca6ea1SDimitry Andric def _ # !if(!isa<NVPTXRegClass>(op), "r", "i") 2664*0fca6ea1SDimitry Andric : NVPTXInst<(outs), 2665*0fca6ea1SDimitry Andric (ins op:$val, i32imm:$a, i32imm:$b), 2666*0fca6ea1SDimitry Andric "st.param" # opstr # " \t[param$a+$b], $val;", 2667*0fca6ea1SDimitry Andric []>; 2668*0fca6ea1SDimitry Andric } 2669*0fca6ea1SDimitry Andric 2670*0fca6ea1SDimitry Andric multiclass StoreParamV2Inst<NVPTXRegClass regclass, Operand IMMType, string opstr> { 2671*0fca6ea1SDimitry Andric foreach op1 = [IMMType, regclass] in 2672*0fca6ea1SDimitry Andric foreach op2 = [IMMType, regclass] in 2673*0fca6ea1SDimitry Andric def _ # !if(!isa<NVPTXRegClass>(op1), "r", "i") 2674*0fca6ea1SDimitry Andric # !if(!isa<NVPTXRegClass>(op2), "r", "i") 2675*0fca6ea1SDimitry Andric : NVPTXInst<(outs), 2676*0fca6ea1SDimitry Andric (ins op1:$val1, op2:$val2, 26770b57cec5SDimitry Andric i32imm:$a, i32imm:$b), 2678*0fca6ea1SDimitry Andric "st.param.v2" # opstr # " \t[param$a+$b], {{$val1, $val2}};", 26790b57cec5SDimitry Andric []>; 2680*0fca6ea1SDimitry Andric } 26810b57cec5SDimitry Andric 2682*0fca6ea1SDimitry Andric multiclass StoreParamV4Inst<NVPTXRegClass regclass, Operand IMMType, string opstr> { 2683*0fca6ea1SDimitry Andric foreach op1 = [IMMType, regclass] in 2684*0fca6ea1SDimitry Andric foreach op2 = [IMMType, regclass] in 2685*0fca6ea1SDimitry Andric foreach op3 = [IMMType, regclass] in 2686*0fca6ea1SDimitry Andric foreach op4 = [IMMType, regclass] in 2687*0fca6ea1SDimitry Andric def _ # !if(!isa<NVPTXRegClass>(op1), "r", "i") 2688*0fca6ea1SDimitry Andric # !if(!isa<NVPTXRegClass>(op2), "r", "i") 2689*0fca6ea1SDimitry Andric # !if(!isa<NVPTXRegClass>(op3), "r", "i") 2690*0fca6ea1SDimitry Andric # !if(!isa<NVPTXRegClass>(op4), "r", "i") 2691*0fca6ea1SDimitry Andric 2692*0fca6ea1SDimitry Andric : NVPTXInst<(outs), 2693*0fca6ea1SDimitry Andric (ins op1:$val1, op2:$val2, op3:$val3, op4:$val4, 2694*0fca6ea1SDimitry Andric i32imm:$a, i32imm:$b), 2695*0fca6ea1SDimitry Andric "st.param.v4" # opstr # 2696*0fca6ea1SDimitry Andric " \t[param$a+$b], {{$val1, $val2, $val3, $val4}};", 26970b57cec5SDimitry Andric []>; 2698*0fca6ea1SDimitry Andric } 26990b57cec5SDimitry Andric 27000b57cec5SDimitry Andric class StoreRetvalInst<NVPTXRegClass regclass, string opstr> : 27010b57cec5SDimitry Andric NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), 27020b57cec5SDimitry Andric !strconcat("st.param", opstr, " \t[func_retval0+$a], $val;"), 27030b57cec5SDimitry Andric []>; 27040b57cec5SDimitry Andric 27050b57cec5SDimitry Andric class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> : 27060b57cec5SDimitry Andric NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a), 27070b57cec5SDimitry Andric !strconcat("st.param.v2", opstr, 27080b57cec5SDimitry Andric " \t[func_retval0+$a], {{$val, $val2}};"), 27090b57cec5SDimitry Andric []>; 27100b57cec5SDimitry Andric 27110b57cec5SDimitry Andric class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> : 27120b57cec5SDimitry Andric NVPTXInst<(outs), 27130b57cec5SDimitry Andric (ins regclass:$val, regclass:$val2, regclass:$val3, 27140b57cec5SDimitry Andric regclass:$val4, i32imm:$a), 27150b57cec5SDimitry Andric !strconcat("st.param.v4", opstr, 27160b57cec5SDimitry Andric " \t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), 27170b57cec5SDimitry Andric []>; 27180b57cec5SDimitry Andric} 27190b57cec5SDimitry Andric 27200b57cec5SDimitry Andriclet isCall=1 in { 27210b57cec5SDimitry Andric multiclass CALL<string OpcStr, SDNode OpNode> { 27220b57cec5SDimitry Andric def PrintCallNoRetInst : NVPTXInst<(outs), (ins), 27230b57cec5SDimitry Andric !strconcat(OpcStr, " "), [(OpNode (i32 0))]>; 27240b57cec5SDimitry Andric def PrintCallRetInst1 : NVPTXInst<(outs), (ins), 27250b57cec5SDimitry Andric !strconcat(OpcStr, " (retval0), "), [(OpNode (i32 1))]>; 27260b57cec5SDimitry Andric def PrintCallRetInst2 : NVPTXInst<(outs), (ins), 27270b57cec5SDimitry Andric !strconcat(OpcStr, " (retval0, retval1), "), [(OpNode (i32 2))]>; 27280b57cec5SDimitry Andric def PrintCallRetInst3 : NVPTXInst<(outs), (ins), 27290b57cec5SDimitry Andric !strconcat(OpcStr, " (retval0, retval1, retval2), "), [(OpNode (i32 3))]>; 27300b57cec5SDimitry Andric def PrintCallRetInst4 : NVPTXInst<(outs), (ins), 27310b57cec5SDimitry Andric !strconcat(OpcStr, " (retval0, retval1, retval2, retval3), "), 27320b57cec5SDimitry Andric [(OpNode (i32 4))]>; 27330b57cec5SDimitry Andric def PrintCallRetInst5 : NVPTXInst<(outs), (ins), 27340b57cec5SDimitry Andric !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4), "), 27350b57cec5SDimitry Andric [(OpNode (i32 5))]>; 27360b57cec5SDimitry Andric def PrintCallRetInst6 : NVPTXInst<(outs), (ins), 27370b57cec5SDimitry Andric !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, " 27380b57cec5SDimitry Andric "retval5), "), 27390b57cec5SDimitry Andric [(OpNode (i32 6))]>; 27400b57cec5SDimitry Andric def PrintCallRetInst7 : NVPTXInst<(outs), (ins), 27410b57cec5SDimitry Andric !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, " 27420b57cec5SDimitry Andric "retval5, retval6), "), 27430b57cec5SDimitry Andric [(OpNode (i32 7))]>; 27440b57cec5SDimitry Andric def PrintCallRetInst8 : NVPTXInst<(outs), (ins), 27450b57cec5SDimitry Andric !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, " 27460b57cec5SDimitry Andric "retval5, retval6, retval7), "), 27470b57cec5SDimitry Andric [(OpNode (i32 8))]>; 27480b57cec5SDimitry Andric } 27490b57cec5SDimitry Andric} 27500b57cec5SDimitry Andric 27510b57cec5SDimitry Andricdefm Call : CALL<"call", PrintCall>; 27520b57cec5SDimitry Andricdefm CallUni : CALL<"call.uni", PrintCallUni>; 27530b57cec5SDimitry Andric 27540b57cec5SDimitry Andric// Convergent call instructions. These are identical to regular calls, except 27550b57cec5SDimitry Andric// they have the isConvergent bit set. 27560b57cec5SDimitry Andriclet isConvergent=1 in { 27570b57cec5SDimitry Andric defm ConvergentCall : CALL<"call", PrintConvergentCall>; 27580b57cec5SDimitry Andric defm ConvergentCallUni : CALL<"call.uni", PrintConvergentCallUni>; 27590b57cec5SDimitry Andric} 27600b57cec5SDimitry Andric 27610b57cec5SDimitry Andricdef LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">; 27620b57cec5SDimitry Andricdef LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">; 27630b57cec5SDimitry Andricdef LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">; 27640b57cec5SDimitry Andricdef LoadParamMemI8 : LoadParamMemInst<Int16Regs, ".b8">; 27650b57cec5SDimitry Andricdef LoadParamMemV2I64 : LoadParamV2MemInst<Int64Regs, ".b64">; 27660b57cec5SDimitry Andricdef LoadParamMemV2I32 : LoadParamV2MemInst<Int32Regs, ".b32">; 27670b57cec5SDimitry Andricdef LoadParamMemV2I16 : LoadParamV2MemInst<Int16Regs, ".b16">; 27680b57cec5SDimitry Andricdef LoadParamMemV2I8 : LoadParamV2MemInst<Int16Regs, ".b8">; 27690b57cec5SDimitry Andricdef LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">; 27700b57cec5SDimitry Andricdef LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">; 27710b57cec5SDimitry Andricdef LoadParamMemV4I8 : LoadParamV4MemInst<Int16Regs, ".b8">; 27720b57cec5SDimitry Andricdef LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">; 27730b57cec5SDimitry Andricdef LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">; 27740b57cec5SDimitry Andricdef LoadParamMemV2F32 : LoadParamV2MemInst<Float32Regs, ".f32">; 27750b57cec5SDimitry Andricdef LoadParamMemV2F64 : LoadParamV2MemInst<Float64Regs, ".f64">; 27760b57cec5SDimitry Andricdef LoadParamMemV4F32 : LoadParamV4MemInst<Float32Regs, ".f32">; 27770b57cec5SDimitry Andric 2778*0fca6ea1SDimitry Andricdefm StoreParamI64 : StoreParamInst<Int64Regs, i64imm, ".b64">; 2779*0fca6ea1SDimitry Andricdefm StoreParamI32 : StoreParamInst<Int32Regs, i32imm, ".b32">; 2780*0fca6ea1SDimitry Andricdefm StoreParamI16 : StoreParamInst<Int16Regs, i16imm, ".b16">; 2781*0fca6ea1SDimitry Andricdefm StoreParamI8 : StoreParamInst<Int16Regs, i8imm, ".b8">; 27820b57cec5SDimitry Andric 2783*0fca6ea1SDimitry Andricdefm StoreParamI8TruncI32 : StoreParamInst<Int32Regs, i8imm, ".b8", /* support_imm */ false>; 2784*0fca6ea1SDimitry Andricdefm StoreParamI8TruncI64 : StoreParamInst<Int64Regs, i8imm, ".b8", /* support_imm */ false>; 27850b57cec5SDimitry Andric 2786*0fca6ea1SDimitry Andricdefm StoreParamV2I64 : StoreParamV2Inst<Int64Regs, i64imm, ".b64">; 2787*0fca6ea1SDimitry Andricdefm StoreParamV2I32 : StoreParamV2Inst<Int32Regs, i32imm, ".b32">; 2788*0fca6ea1SDimitry Andricdefm StoreParamV2I16 : StoreParamV2Inst<Int16Regs, i16imm, ".b16">; 2789*0fca6ea1SDimitry Andricdefm StoreParamV2I8 : StoreParamV2Inst<Int16Regs, i8imm, ".b8">; 27900b57cec5SDimitry Andric 2791*0fca6ea1SDimitry Andricdefm StoreParamV4I32 : StoreParamV4Inst<Int32Regs, i32imm, ".b32">; 2792*0fca6ea1SDimitry Andricdefm StoreParamV4I16 : StoreParamV4Inst<Int16Regs, i16imm, ".b16">; 2793*0fca6ea1SDimitry Andricdefm StoreParamV4I8 : StoreParamV4Inst<Int16Regs, i8imm, ".b8">; 2794*0fca6ea1SDimitry Andric 2795*0fca6ea1SDimitry Andricdefm StoreParamF32 : StoreParamInst<Float32Regs, f32imm, ".f32">; 2796*0fca6ea1SDimitry Andricdefm StoreParamF64 : StoreParamInst<Float64Regs, f64imm, ".f64">; 2797*0fca6ea1SDimitry Andric 2798*0fca6ea1SDimitry Andricdefm StoreParamV2F32 : StoreParamV2Inst<Float32Regs, f32imm, ".f32">; 2799*0fca6ea1SDimitry Andricdefm StoreParamV2F64 : StoreParamV2Inst<Float64Regs, f64imm, ".f64">; 2800*0fca6ea1SDimitry Andric 2801*0fca6ea1SDimitry Andricdefm StoreParamV4F32 : StoreParamV4Inst<Float32Regs, f32imm, ".f32">; 28020b57cec5SDimitry Andric 28030b57cec5SDimitry Andricdef StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">; 28040b57cec5SDimitry Andricdef StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">; 28050b57cec5SDimitry Andricdef StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">; 28060b57cec5SDimitry Andricdef StoreRetvalI8 : StoreRetvalInst<Int16Regs, ".b8">; 2807*0fca6ea1SDimitry Andricdef StoreRetvalI8TruncI32 : StoreRetvalInst<Int32Regs, ".b8">; 2808*0fca6ea1SDimitry Andricdef StoreRetvalI8TruncI64 : StoreRetvalInst<Int64Regs, ".b8">; 28090b57cec5SDimitry Andricdef StoreRetvalV2I64 : StoreRetvalV2Inst<Int64Regs, ".b64">; 28100b57cec5SDimitry Andricdef StoreRetvalV2I32 : StoreRetvalV2Inst<Int32Regs, ".b32">; 28110b57cec5SDimitry Andricdef StoreRetvalV2I16 : StoreRetvalV2Inst<Int16Regs, ".b16">; 28120b57cec5SDimitry Andricdef StoreRetvalV2I8 : StoreRetvalV2Inst<Int16Regs, ".b8">; 28130b57cec5SDimitry Andricdef StoreRetvalV4I32 : StoreRetvalV4Inst<Int32Regs, ".b32">; 28140b57cec5SDimitry Andricdef StoreRetvalV4I16 : StoreRetvalV4Inst<Int16Regs, ".b16">; 28150b57cec5SDimitry Andricdef StoreRetvalV4I8 : StoreRetvalV4Inst<Int16Regs, ".b8">; 28160b57cec5SDimitry Andric 28170b57cec5SDimitry Andricdef StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">; 28180b57cec5SDimitry Andricdef StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">; 28190b57cec5SDimitry Andricdef StoreRetvalV2F64 : StoreRetvalV2Inst<Float64Regs, ".f64">; 28200b57cec5SDimitry Andricdef StoreRetvalV2F32 : StoreRetvalV2Inst<Float32Regs, ".f32">; 28210b57cec5SDimitry Andricdef StoreRetvalV4F32 : StoreRetvalV4Inst<Float32Regs, ".f32">; 28220b57cec5SDimitry Andric 28230b57cec5SDimitry Andricdef CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; 28240b57cec5SDimitry Andricdef CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; 28250b57cec5SDimitry Andricdef CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>; 28260b57cec5SDimitry Andricdef RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>; 28270b57cec5SDimitry Andric 28280b57cec5SDimitry Andricclass CallArgInst<NVPTXRegClass regclass> : 28290b57cec5SDimitry Andric NVPTXInst<(outs), (ins regclass:$a), "$a, ", 28300b57cec5SDimitry Andric [(CallArg (i32 0), regclass:$a)]>; 28310b57cec5SDimitry Andric 283206c3fb27SDimitry Andricclass CallArgInstVT<NVPTXRegClass regclass, ValueType vt> : 283306c3fb27SDimitry Andric NVPTXInst<(outs), (ins regclass:$a), "$a, ", 283406c3fb27SDimitry Andric [(CallArg (i32 0), vt:$a)]>; 283506c3fb27SDimitry Andric 28360b57cec5SDimitry Andricclass LastCallArgInst<NVPTXRegClass regclass> : 28370b57cec5SDimitry Andric NVPTXInst<(outs), (ins regclass:$a), "$a", 28380b57cec5SDimitry Andric [(LastCallArg (i32 0), regclass:$a)]>; 283906c3fb27SDimitry Andricclass LastCallArgInstVT<NVPTXRegClass regclass, ValueType vt> : 284006c3fb27SDimitry Andric NVPTXInst<(outs), (ins regclass:$a), "$a", 284106c3fb27SDimitry Andric [(LastCallArg (i32 0), vt:$a)]>; 28420b57cec5SDimitry Andric 28430b57cec5SDimitry Andricdef CallArgI64 : CallArgInst<Int64Regs>; 284406c3fb27SDimitry Andricdef CallArgI32 : CallArgInstVT<Int32Regs, i32>; 284506c3fb27SDimitry Andricdef CallArgI16 : CallArgInstVT<Int16Regs, i16>; 28460b57cec5SDimitry Andricdef CallArgF64 : CallArgInst<Float64Regs>; 28470b57cec5SDimitry Andricdef CallArgF32 : CallArgInst<Float32Regs>; 28480b57cec5SDimitry Andric 28490b57cec5SDimitry Andricdef LastCallArgI64 : LastCallArgInst<Int64Regs>; 285006c3fb27SDimitry Andricdef LastCallArgI32 : LastCallArgInstVT<Int32Regs, i32>; 285106c3fb27SDimitry Andricdef LastCallArgI16 : LastCallArgInstVT<Int16Regs, i16>; 28520b57cec5SDimitry Andricdef LastCallArgF64 : LastCallArgInst<Float64Regs>; 28530b57cec5SDimitry Andricdef LastCallArgF32 : LastCallArgInst<Float32Regs>; 28540b57cec5SDimitry Andric 28550b57cec5SDimitry Andricdef CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ", 28560b57cec5SDimitry Andric [(CallArg (i32 0), (i32 imm:$a))]>; 28570b57cec5SDimitry Andricdef LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a", 28580b57cec5SDimitry Andric [(LastCallArg (i32 0), (i32 imm:$a))]>; 28590b57cec5SDimitry Andric 28600b57cec5SDimitry Andricdef CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", 28610b57cec5SDimitry Andric [(CallArg (i32 1), (i32 imm:$a))]>; 28620b57cec5SDimitry Andricdef LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", 28630b57cec5SDimitry Andric [(LastCallArg (i32 1), (i32 imm:$a))]>; 28640b57cec5SDimitry Andric 28650b57cec5SDimitry Andricdef CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ", 28660b57cec5SDimitry Andric [(CallVoid (Wrapper tglobaladdr:$addr))]>; 28670b57cec5SDimitry Andricdef CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ", 286806c3fb27SDimitry Andric [(CallVoid i32:$addr)]>; 28690b57cec5SDimitry Andricdef CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ", 28700b57cec5SDimitry Andric [(CallVoid Int64Regs:$addr)]>; 28710b57cec5SDimitry Andricdef PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;", 28720b57cec5SDimitry Andric [(Prototype (i32 imm:$val))]>; 28730b57cec5SDimitry Andric 28740b57cec5SDimitry Andricdef DeclareRetMemInst : 28750b57cec5SDimitry Andric NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size, i32imm:$num), 28760b57cec5SDimitry Andric ".param .align $align .b8 retval$num[$size];", 28770b57cec5SDimitry Andric [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>; 28780b57cec5SDimitry Andricdef DeclareRetScalarInst : 28790b57cec5SDimitry Andric NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), 28800b57cec5SDimitry Andric ".param .b$size retval$num;", 28810b57cec5SDimitry Andric [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>; 28820b57cec5SDimitry Andricdef DeclareRetRegInst : 28830b57cec5SDimitry Andric NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), 28840b57cec5SDimitry Andric ".reg .b$size retval$num;", 28850b57cec5SDimitry Andric [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>; 28860b57cec5SDimitry Andric 28870b57cec5SDimitry Andricdef DeclareParamInst : 28880b57cec5SDimitry Andric NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size), 28890b57cec5SDimitry Andric ".param .align $align .b8 param$a[$size];", 28900b57cec5SDimitry Andric [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>; 28910b57cec5SDimitry Andricdef DeclareScalarParamInst : 28920b57cec5SDimitry Andric NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), 28930b57cec5SDimitry Andric ".param .b$size param$a;", 28940b57cec5SDimitry Andric [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>; 28950b57cec5SDimitry Andricdef DeclareScalarRegInst : 28960b57cec5SDimitry Andric NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), 28970b57cec5SDimitry Andric ".reg .b$size param$a;", 28980b57cec5SDimitry Andric [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>; 28990b57cec5SDimitry Andric 2900bdd1243dSDimitry Andricclass MoveParamInst<ValueType T, NVPTXRegClass regclass, string asmstr> : 29010b57cec5SDimitry Andric NVPTXInst<(outs regclass:$dst), (ins regclass:$src), 29020b57cec5SDimitry Andric !strconcat("mov", asmstr, " \t$dst, $src;"), 2903bdd1243dSDimitry Andric [(set (T regclass:$dst), (MoveParam (T regclass:$src)))]>; 29040b57cec5SDimitry Andric 290506c3fb27SDimitry Andricclass MoveParamSymbolInst<NVPTXRegClass regclass, Operand srcty, ValueType vt, 2906349cc55cSDimitry Andric string asmstr> : 2907349cc55cSDimitry Andric NVPTXInst<(outs regclass:$dst), (ins srcty:$src), 2908349cc55cSDimitry Andric !strconcat("mov", asmstr, " \t$dst, $src;"), 290906c3fb27SDimitry Andric [(set vt:$dst, (MoveParam texternalsym:$src))]>; 2910349cc55cSDimitry Andric 2911bdd1243dSDimitry Andricdef MoveParamI64 : MoveParamInst<i64, Int64Regs, ".b64">; 2912bdd1243dSDimitry Andricdef MoveParamI32 : MoveParamInst<i32, Int32Regs, ".b32">; 2913349cc55cSDimitry Andric 291406c3fb27SDimitry Andricdef MoveParamSymbolI64 : MoveParamSymbolInst<Int64Regs, i64imm, i64, ".b64">; 291506c3fb27SDimitry Andricdef MoveParamSymbolI32 : MoveParamSymbolInst<Int32Regs, i32imm, i32, ".b32">; 2916349cc55cSDimitry Andric 29170b57cec5SDimitry Andricdef MoveParamI16 : 29180b57cec5SDimitry Andric NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 291906c3fb27SDimitry Andric "cvt.u16.u32 \t$dst, $src;", // ??? Why cvt.u16.u32 ? 292006c3fb27SDimitry Andric [(set i16:$dst, (MoveParam i16:$src))]>; 2921bdd1243dSDimitry Andricdef MoveParamF64 : MoveParamInst<f64, Float64Regs, ".f64">; 2922bdd1243dSDimitry Andricdef MoveParamF32 : MoveParamInst<f32, Float32Regs, ".f32">; 29230b57cec5SDimitry Andric 292406c3fb27SDimitry Andricclass PseudoUseParamInst<NVPTXRegClass regclass, ValueType vt> : 29250b57cec5SDimitry Andric NVPTXInst<(outs), (ins regclass:$src), 29260b57cec5SDimitry Andric "// Pseudo use of $src", 292706c3fb27SDimitry Andric [(PseudoUseParam vt:$src)]>; 29280b57cec5SDimitry Andric 292906c3fb27SDimitry Andricdef PseudoUseParamI64 : PseudoUseParamInst<Int64Regs, i64>; 293006c3fb27SDimitry Andricdef PseudoUseParamI32 : PseudoUseParamInst<Int32Regs, i32>; 293106c3fb27SDimitry Andricdef PseudoUseParamI16 : PseudoUseParamInst<Int16Regs, i16>; 293206c3fb27SDimitry Andricdef PseudoUseParamF64 : PseudoUseParamInst<Float64Regs, f64>; 293306c3fb27SDimitry Andricdef PseudoUseParamF32 : PseudoUseParamInst<Float32Regs, f32>; 29340b57cec5SDimitry Andric 2935bdd1243dSDimitry Andricclass ProxyRegInst<string SzStr, ValueType T, NVPTXRegClass regclass> : 29360b57cec5SDimitry Andric NVPTXInst<(outs regclass:$dst), (ins regclass:$src), 29370b57cec5SDimitry Andric !strconcat("mov.", SzStr, " \t$dst, $src;"), 2938bdd1243dSDimitry Andric [(set (T regclass:$dst), (ProxyReg (T regclass:$src)))]>; 29390b57cec5SDimitry Andric 2940bdd1243dSDimitry Andricdef ProxyRegI1 : ProxyRegInst<"pred", i1, Int1Regs>; 2941bdd1243dSDimitry Andricdef ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>; 2942bdd1243dSDimitry Andricdef ProxyRegI32 : ProxyRegInst<"b32", i32, Int32Regs>; 2943bdd1243dSDimitry Andricdef ProxyRegI64 : ProxyRegInst<"b64", i64, Int64Regs>; 2944bdd1243dSDimitry Andricdef ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>; 2945bdd1243dSDimitry Andricdef ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>; 294606c3fb27SDimitry Andric 294706c3fb27SDimitry Andricforeach vt = [f16, bf16] in { 294806c3fb27SDimitry Andric def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI16 Int16Regs:$src)>; 294906c3fb27SDimitry Andric} 295006c3fb27SDimitry Andric 29515f757f3fSDimitry Andricforeach vt = [v2f16, v2bf16, v2i16, v4i8] in { 295206c3fb27SDimitry Andric def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI32 Int32Regs:$src)>; 29530b57cec5SDimitry Andric} 29540b57cec5SDimitry Andric 29550b57cec5SDimitry Andric// 29560b57cec5SDimitry Andric// Load / Store Handling 29570b57cec5SDimitry Andric// 29580b57cec5SDimitry Andricmulticlass LD<NVPTXRegClass regclass> { 29590b57cec5SDimitry Andric def _avar : NVPTXInst< 29600b57cec5SDimitry Andric (outs regclass:$dst), 29610b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 29620b57cec5SDimitry Andric i32imm:$fromWidth, imem:$addr), 29630b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 29640b57cec5SDimitry Andric "\t$dst, [$addr];", []>; 29650b57cec5SDimitry Andric def _areg : NVPTXInst< 29660b57cec5SDimitry Andric (outs regclass:$dst), 29670b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 29680b57cec5SDimitry Andric i32imm:$fromWidth, Int32Regs:$addr), 29690b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 29700b57cec5SDimitry Andric "\t$dst, [$addr];", []>; 29710b57cec5SDimitry Andric def _areg_64 : NVPTXInst< 29720b57cec5SDimitry Andric (outs regclass:$dst), 29730b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 29740b57cec5SDimitry Andric i32imm:$fromWidth, Int64Regs:$addr), 29750b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 29760b57cec5SDimitry Andric "\t$dst, [$addr];", []>; 29770b57cec5SDimitry Andric def _ari : NVPTXInst< 29780b57cec5SDimitry Andric (outs regclass:$dst), 29790b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 29800b57cec5SDimitry Andric i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 29810b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 29820b57cec5SDimitry Andric "\t$dst, [$addr+$offset];", []>; 29830b57cec5SDimitry Andric def _ari_64 : NVPTXInst< 29840b57cec5SDimitry Andric (outs regclass:$dst), 29850b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 29860b57cec5SDimitry Andric LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 29870b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 29880b57cec5SDimitry Andric "\t$dst, [$addr+$offset];", []>; 29890b57cec5SDimitry Andric def _asi : NVPTXInst< 29900b57cec5SDimitry Andric (outs regclass:$dst), 29910b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 29920b57cec5SDimitry Andric LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), 29930b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 29940b57cec5SDimitry Andric "\t$dst, [$addr+$offset];", []>; 29950b57cec5SDimitry Andric} 29960b57cec5SDimitry Andric 29970b57cec5SDimitry Andriclet mayLoad=1, hasSideEffects=0 in { 29980b57cec5SDimitry Andric defm LD_i8 : LD<Int16Regs>; 29990b57cec5SDimitry Andric defm LD_i16 : LD<Int16Regs>; 30000b57cec5SDimitry Andric defm LD_i32 : LD<Int32Regs>; 30010b57cec5SDimitry Andric defm LD_i64 : LD<Int64Regs>; 30020b57cec5SDimitry Andric defm LD_f32 : LD<Float32Regs>; 30030b57cec5SDimitry Andric defm LD_f64 : LD<Float64Regs>; 30040b57cec5SDimitry Andric} 30050b57cec5SDimitry Andric 30060b57cec5SDimitry Andricmulticlass ST<NVPTXRegClass regclass> { 30070b57cec5SDimitry Andric def _avar : NVPTXInst< 30080b57cec5SDimitry Andric (outs), 30090b57cec5SDimitry Andric (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 30100b57cec5SDimitry Andric LdStCode:$Sign, i32imm:$toWidth, imem:$addr), 30110b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" 30120b57cec5SDimitry Andric " \t[$addr], $src;", []>; 30130b57cec5SDimitry Andric def _areg : NVPTXInst< 30140b57cec5SDimitry Andric (outs), 30150b57cec5SDimitry Andric (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, 30160b57cec5SDimitry Andric LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), 30170b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" 30180b57cec5SDimitry Andric " \t[$addr], $src;", []>; 30190b57cec5SDimitry Andric def _areg_64 : NVPTXInst< 30200b57cec5SDimitry Andric (outs), 30210b57cec5SDimitry Andric (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 30220b57cec5SDimitry Andric LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr), 30230b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" 30240b57cec5SDimitry Andric " \t[$addr], $src;", []>; 30250b57cec5SDimitry Andric def _ari : NVPTXInst< 30260b57cec5SDimitry Andric (outs), 30270b57cec5SDimitry Andric (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 30280b57cec5SDimitry Andric LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), 30290b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" 30300b57cec5SDimitry Andric " \t[$addr+$offset], $src;", []>; 30310b57cec5SDimitry Andric def _ari_64 : NVPTXInst< 30320b57cec5SDimitry Andric (outs), 30330b57cec5SDimitry Andric (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 30340b57cec5SDimitry Andric LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset), 30350b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" 30360b57cec5SDimitry Andric " \t[$addr+$offset], $src;", []>; 30370b57cec5SDimitry Andric def _asi : NVPTXInst< 30380b57cec5SDimitry Andric (outs), 30390b57cec5SDimitry Andric (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 30400b57cec5SDimitry Andric LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), 30410b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" 30420b57cec5SDimitry Andric " \t[$addr+$offset], $src;", []>; 30430b57cec5SDimitry Andric} 30440b57cec5SDimitry Andric 30450b57cec5SDimitry Andriclet mayStore=1, hasSideEffects=0 in { 30460b57cec5SDimitry Andric defm ST_i8 : ST<Int16Regs>; 30470b57cec5SDimitry Andric defm ST_i16 : ST<Int16Regs>; 30480b57cec5SDimitry Andric defm ST_i32 : ST<Int32Regs>; 30490b57cec5SDimitry Andric defm ST_i64 : ST<Int64Regs>; 30500b57cec5SDimitry Andric defm ST_f32 : ST<Float32Regs>; 30510b57cec5SDimitry Andric defm ST_f64 : ST<Float64Regs>; 30520b57cec5SDimitry Andric} 30530b57cec5SDimitry Andric 30540b57cec5SDimitry Andric// The following is used only in and after vector elementizations. Vector 30550b57cec5SDimitry Andric// elementization happens at the machine instruction level, so the following 30560b57cec5SDimitry Andric// instructions never appear in the DAG. 30570b57cec5SDimitry Andricmulticlass LD_VEC<NVPTXRegClass regclass> { 30580b57cec5SDimitry Andric def _v2_avar : NVPTXInst< 30590b57cec5SDimitry Andric (outs regclass:$dst1, regclass:$dst2), 30600b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 30610b57cec5SDimitry Andric i32imm:$fromWidth, imem:$addr), 30620b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 30630b57cec5SDimitry Andric "\t{{$dst1, $dst2}}, [$addr];", []>; 30640b57cec5SDimitry Andric def _v2_areg : NVPTXInst< 30650b57cec5SDimitry Andric (outs regclass:$dst1, regclass:$dst2), 30660b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 30670b57cec5SDimitry Andric i32imm:$fromWidth, Int32Regs:$addr), 30680b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 30690b57cec5SDimitry Andric "\t{{$dst1, $dst2}}, [$addr];", []>; 30700b57cec5SDimitry Andric def _v2_areg_64 : NVPTXInst< 30710b57cec5SDimitry Andric (outs regclass:$dst1, regclass:$dst2), 30720b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 30730b57cec5SDimitry Andric i32imm:$fromWidth, Int64Regs:$addr), 30740b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 30750b57cec5SDimitry Andric "\t{{$dst1, $dst2}}, [$addr];", []>; 30760b57cec5SDimitry Andric def _v2_ari : NVPTXInst< 30770b57cec5SDimitry Andric (outs regclass:$dst1, regclass:$dst2), 30780b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 30790b57cec5SDimitry Andric i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 30800b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 30810b57cec5SDimitry Andric "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; 30820b57cec5SDimitry Andric def _v2_ari_64 : NVPTXInst< 30830b57cec5SDimitry Andric (outs regclass:$dst1, regclass:$dst2), 30840b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 30850b57cec5SDimitry Andric i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 30860b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 30870b57cec5SDimitry Andric "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; 30880b57cec5SDimitry Andric def _v2_asi : NVPTXInst< 30890b57cec5SDimitry Andric (outs regclass:$dst1, regclass:$dst2), 30900b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 30910b57cec5SDimitry Andric i32imm:$fromWidth, imem:$addr, i32imm:$offset), 30920b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 30930b57cec5SDimitry Andric "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; 30940b57cec5SDimitry Andric def _v4_avar : NVPTXInst< 30950b57cec5SDimitry Andric (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), 30960b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 30970b57cec5SDimitry Andric i32imm:$fromWidth, imem:$addr), 30980b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 30990b57cec5SDimitry Andric "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; 31000b57cec5SDimitry Andric def _v4_areg : NVPTXInst< 31010b57cec5SDimitry Andric (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), 31020b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 31030b57cec5SDimitry Andric i32imm:$fromWidth, Int32Regs:$addr), 31040b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 31050b57cec5SDimitry Andric "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; 31060b57cec5SDimitry Andric def _v4_areg_64 : NVPTXInst< 31070b57cec5SDimitry Andric (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), 31080b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 31090b57cec5SDimitry Andric i32imm:$fromWidth, Int64Regs:$addr), 31100b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 31110b57cec5SDimitry Andric "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; 31120b57cec5SDimitry Andric def _v4_ari : NVPTXInst< 31130b57cec5SDimitry Andric (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), 31140b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 31150b57cec5SDimitry Andric i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 31160b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 31170b57cec5SDimitry Andric "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; 31180b57cec5SDimitry Andric def _v4_ari_64 : NVPTXInst< 31190b57cec5SDimitry Andric (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), 31200b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 31210b57cec5SDimitry Andric i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 31220b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 31230b57cec5SDimitry Andric "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; 31240b57cec5SDimitry Andric def _v4_asi : NVPTXInst< 31250b57cec5SDimitry Andric (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), 31260b57cec5SDimitry Andric (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 31270b57cec5SDimitry Andric i32imm:$fromWidth, imem:$addr, i32imm:$offset), 31280b57cec5SDimitry Andric "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 31290b57cec5SDimitry Andric "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; 31300b57cec5SDimitry Andric} 31310b57cec5SDimitry Andriclet mayLoad=1, hasSideEffects=0 in { 31320b57cec5SDimitry Andric defm LDV_i8 : LD_VEC<Int16Regs>; 31330b57cec5SDimitry Andric defm LDV_i16 : LD_VEC<Int16Regs>; 31340b57cec5SDimitry Andric defm LDV_i32 : LD_VEC<Int32Regs>; 31350b57cec5SDimitry Andric defm LDV_i64 : LD_VEC<Int64Regs>; 31360b57cec5SDimitry Andric defm LDV_f32 : LD_VEC<Float32Regs>; 31370b57cec5SDimitry Andric defm LDV_f64 : LD_VEC<Float64Regs>; 31380b57cec5SDimitry Andric} 31390b57cec5SDimitry Andric 31400b57cec5SDimitry Andricmulticlass ST_VEC<NVPTXRegClass regclass> { 31410b57cec5SDimitry Andric def _v2_avar : NVPTXInst< 31420b57cec5SDimitry Andric (outs), 31430b57cec5SDimitry Andric (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 31440b57cec5SDimitry Andric LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr), 31450b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 31460b57cec5SDimitry Andric "\t[$addr], {{$src1, $src2}};", []>; 31470b57cec5SDimitry Andric def _v2_areg : NVPTXInst< 31480b57cec5SDimitry Andric (outs), 31490b57cec5SDimitry Andric (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 31500b57cec5SDimitry Andric LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), 31510b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 31520b57cec5SDimitry Andric "\t[$addr], {{$src1, $src2}};", []>; 31530b57cec5SDimitry Andric def _v2_areg_64 : NVPTXInst< 31540b57cec5SDimitry Andric (outs), 31550b57cec5SDimitry Andric (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 31560b57cec5SDimitry Andric LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr), 31570b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 31580b57cec5SDimitry Andric "\t[$addr], {{$src1, $src2}};", []>; 31590b57cec5SDimitry Andric def _v2_ari : NVPTXInst< 31600b57cec5SDimitry Andric (outs), 31610b57cec5SDimitry Andric (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 31620b57cec5SDimitry Andric LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, 31630b57cec5SDimitry Andric i32imm:$offset), 31640b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 31650b57cec5SDimitry Andric "\t[$addr+$offset], {{$src1, $src2}};", []>; 31660b57cec5SDimitry Andric def _v2_ari_64 : NVPTXInst< 31670b57cec5SDimitry Andric (outs), 31680b57cec5SDimitry Andric (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 31690b57cec5SDimitry Andric LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, 31700b57cec5SDimitry Andric i32imm:$offset), 31710b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 31720b57cec5SDimitry Andric "\t[$addr+$offset], {{$src1, $src2}};", []>; 31730b57cec5SDimitry Andric def _v2_asi : NVPTXInst< 31740b57cec5SDimitry Andric (outs), 31750b57cec5SDimitry Andric (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 31760b57cec5SDimitry Andric LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, 31770b57cec5SDimitry Andric i32imm:$offset), 31780b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 31790b57cec5SDimitry Andric "\t[$addr+$offset], {{$src1, $src2}};", []>; 31800b57cec5SDimitry Andric def _v4_avar : NVPTXInst< 31810b57cec5SDimitry Andric (outs), 31820b57cec5SDimitry Andric (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 31830b57cec5SDimitry Andric LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 31840b57cec5SDimitry Andric i32imm:$fromWidth, imem:$addr), 31850b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 31860b57cec5SDimitry Andric "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; 31870b57cec5SDimitry Andric def _v4_areg : NVPTXInst< 31880b57cec5SDimitry Andric (outs), 31890b57cec5SDimitry Andric (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 31900b57cec5SDimitry Andric LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 31910b57cec5SDimitry Andric i32imm:$fromWidth, Int32Regs:$addr), 31920b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 31930b57cec5SDimitry Andric "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; 31940b57cec5SDimitry Andric def _v4_areg_64 : NVPTXInst< 31950b57cec5SDimitry Andric (outs), 31960b57cec5SDimitry Andric (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 31970b57cec5SDimitry Andric LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 31980b57cec5SDimitry Andric i32imm:$fromWidth, Int64Regs:$addr), 31990b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 32000b57cec5SDimitry Andric "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; 32010b57cec5SDimitry Andric def _v4_ari : NVPTXInst< 32020b57cec5SDimitry Andric (outs), 32030b57cec5SDimitry Andric (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 32040b57cec5SDimitry Andric LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 32050b57cec5SDimitry Andric i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 32060b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 32070b57cec5SDimitry Andric "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; 32080b57cec5SDimitry Andric def _v4_ari_64 : NVPTXInst< 32090b57cec5SDimitry Andric (outs), 32100b57cec5SDimitry Andric (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 32110b57cec5SDimitry Andric LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 32120b57cec5SDimitry Andric i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 32130b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 32140b57cec5SDimitry Andric "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; 32150b57cec5SDimitry Andric def _v4_asi : NVPTXInst< 32160b57cec5SDimitry Andric (outs), 32170b57cec5SDimitry Andric (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 32180b57cec5SDimitry Andric LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 32190b57cec5SDimitry Andric i32imm:$fromWidth, imem:$addr, i32imm:$offset), 32200b57cec5SDimitry Andric "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}" 32210b57cec5SDimitry Andric "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; 32220b57cec5SDimitry Andric} 32230b57cec5SDimitry Andric 32240b57cec5SDimitry Andriclet mayStore=1, hasSideEffects=0 in { 32250b57cec5SDimitry Andric defm STV_i8 : ST_VEC<Int16Regs>; 32260b57cec5SDimitry Andric defm STV_i16 : ST_VEC<Int16Regs>; 32270b57cec5SDimitry Andric defm STV_i32 : ST_VEC<Int32Regs>; 32280b57cec5SDimitry Andric defm STV_i64 : ST_VEC<Int64Regs>; 32290b57cec5SDimitry Andric defm STV_f32 : ST_VEC<Float32Regs>; 32300b57cec5SDimitry Andric defm STV_f64 : ST_VEC<Float64Regs>; 32310b57cec5SDimitry Andric} 32320b57cec5SDimitry Andric 32330b57cec5SDimitry Andric//---- Conversion ---- 32340b57cec5SDimitry Andric 3235bdd1243dSDimitry Andricclass F_BITCONVERT<string SzStr, ValueType TIn, ValueType TOut, 3236bdd1243dSDimitry Andric NVPTXRegClass regclassIn = ValueToRegClass<TIn>.ret, 3237bdd1243dSDimitry Andric NVPTXRegClass regclassOut = ValueToRegClass<TOut>.ret> : 32380b57cec5SDimitry Andric NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a), 32390b57cec5SDimitry Andric !strconcat("mov.b", SzStr, " \t$d, $a;"), 3240bdd1243dSDimitry Andric [(set (TOut regclassOut:$d), (bitconvert (TIn regclassIn:$a)))]>; 32410b57cec5SDimitry Andric 3242bdd1243dSDimitry Andricdef BITCONVERT_32_I2F : F_BITCONVERT<"32", i32, f32>; 3243bdd1243dSDimitry Andricdef BITCONVERT_32_F2I : F_BITCONVERT<"32", f32, i32>; 3244bdd1243dSDimitry Andricdef BITCONVERT_64_I2F : F_BITCONVERT<"64", i64, f64>; 3245bdd1243dSDimitry Andricdef BITCONVERT_64_F2I : F_BITCONVERT<"64", f64, i64>; 324606c3fb27SDimitry Andric 32475f757f3fSDimitry Andricforeach vt = [v2f16, v2bf16, v2i16, v4i8] in { 324806c3fb27SDimitry Andricdef: Pat<(vt (bitconvert (f32 Float32Regs:$a))), 324906c3fb27SDimitry Andric (BITCONVERT_32_F2I Float32Regs:$a)>; 32505f757f3fSDimitry Andricdef: Pat<(f32 (bitconvert (vt Int32Regs:$a))), 32515f757f3fSDimitry Andric (BITCONVERT_32_I2F Int32Regs:$a)>; 325206c3fb27SDimitry Andric} 325306c3fb27SDimitry Andricforeach vt = [f16, bf16] in { 325406c3fb27SDimitry Andricdef: Pat<(vt (bitconvert (i16 UInt16Const:$a))), 325506c3fb27SDimitry Andric (IMOVB16ri UInt16Const:$a)>; 325606c3fb27SDimitry Andricdef: Pat<(vt (bitconvert (i16 Int16Regs:$a))), 325706c3fb27SDimitry Andric (ProxyRegI16 Int16Regs:$a)>; 325806c3fb27SDimitry Andricdef: Pat<(i16 (bitconvert (vt Int16Regs:$a))), 325906c3fb27SDimitry Andric (ProxyRegI16 Int16Regs:$a)>; 326006c3fb27SDimitry Andric} 32610b57cec5SDimitry Andric 32625f757f3fSDimitry Andricforeach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in { 32635f757f3fSDimitry Andric def: Pat<(ta (bitconvert (i32 UInt32Const:$a))), 32645f757f3fSDimitry Andric (IMOVB32ri UInt32Const:$a)>; 32655f757f3fSDimitry Andric foreach tb = [v2f16, v2bf16, v2i16, v4i8, i32] in { 32665f757f3fSDimitry Andric if !ne(ta, tb) then { 32675f757f3fSDimitry Andric def: Pat<(ta (bitconvert (tb Int32Regs:$a))), 32685f757f3fSDimitry Andric (ProxyRegI32 Int32Regs:$a)>; 32695f757f3fSDimitry Andric } 32705f757f3fSDimitry Andric } 32715f757f3fSDimitry Andric} 32725f757f3fSDimitry Andric 32730b57cec5SDimitry Andric// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where 32740b57cec5SDimitry Andric// we cannot specify floating-point literals in isel patterns. Therefore, we 32750b57cec5SDimitry Andric// use an integer selp to select either 1 or 0 and then cvt to floating-point. 32760b57cec5SDimitry Andric 32770b57cec5SDimitry Andric// sint -> f16 32780b57cec5SDimitry Andricdef : Pat<(f16 (sint_to_fp Int1Regs:$a)), 32790b57cec5SDimitry Andric (CVT_f16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 32800b57cec5SDimitry Andricdef : Pat<(f16 (sint_to_fp Int16Regs:$a)), 32810b57cec5SDimitry Andric (CVT_f16_s16 Int16Regs:$a, CvtRN)>; 32820b57cec5SDimitry Andricdef : Pat<(f16 (sint_to_fp Int32Regs:$a)), 32830b57cec5SDimitry Andric (CVT_f16_s32 Int32Regs:$a, CvtRN)>; 32840b57cec5SDimitry Andricdef : Pat<(f16 (sint_to_fp Int64Regs:$a)), 32850b57cec5SDimitry Andric (CVT_f16_s64 Int64Regs:$a, CvtRN)>; 32860b57cec5SDimitry Andric 32870b57cec5SDimitry Andric// uint -> f16 32880b57cec5SDimitry Andricdef : Pat<(f16 (uint_to_fp Int1Regs:$a)), 32890b57cec5SDimitry Andric (CVT_f16_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 32900b57cec5SDimitry Andricdef : Pat<(f16 (uint_to_fp Int16Regs:$a)), 32910b57cec5SDimitry Andric (CVT_f16_u16 Int16Regs:$a, CvtRN)>; 32920b57cec5SDimitry Andricdef : Pat<(f16 (uint_to_fp Int32Regs:$a)), 32930b57cec5SDimitry Andric (CVT_f16_u32 Int32Regs:$a, CvtRN)>; 32940b57cec5SDimitry Andricdef : Pat<(f16 (uint_to_fp Int64Regs:$a)), 32950b57cec5SDimitry Andric (CVT_f16_u64 Int64Regs:$a, CvtRN)>; 32960b57cec5SDimitry Andric 329706c3fb27SDimitry Andric// sint -> bf16 329806c3fb27SDimitry Andricdef : Pat<(bf16 (sint_to_fp Int1Regs:$a)), 3299*0fca6ea1SDimitry Andric (CVT_bf16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; 330006c3fb27SDimitry Andricdef : Pat<(bf16 (sint_to_fp Int16Regs:$a)), 3301*0fca6ea1SDimitry Andric (CVT_bf16_s16 Int16Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; 330206c3fb27SDimitry Andricdef : Pat<(bf16 (sint_to_fp Int32Regs:$a)), 3303*0fca6ea1SDimitry Andric (CVT_bf16_s32 Int32Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; 330406c3fb27SDimitry Andricdef : Pat<(bf16 (sint_to_fp Int64Regs:$a)), 3305*0fca6ea1SDimitry Andric (CVT_bf16_s64 Int64Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; 330606c3fb27SDimitry Andric 330706c3fb27SDimitry Andric// uint -> bf16 330806c3fb27SDimitry Andricdef : Pat<(bf16 (uint_to_fp Int1Regs:$a)), 3309*0fca6ea1SDimitry Andric (CVT_bf16_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; 331006c3fb27SDimitry Andricdef : Pat<(bf16 (uint_to_fp Int16Regs:$a)), 3311*0fca6ea1SDimitry Andric (CVT_bf16_u16 Int16Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; 331206c3fb27SDimitry Andricdef : Pat<(bf16 (uint_to_fp Int32Regs:$a)), 3313*0fca6ea1SDimitry Andric (CVT_bf16_u32 Int32Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; 331406c3fb27SDimitry Andricdef : Pat<(bf16 (uint_to_fp Int64Regs:$a)), 3315*0fca6ea1SDimitry Andric (CVT_bf16_u64 Int64Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; 331606c3fb27SDimitry Andric 33170b57cec5SDimitry Andric// sint -> f32 33180b57cec5SDimitry Andricdef : Pat<(f32 (sint_to_fp Int1Regs:$a)), 33190b57cec5SDimitry Andric (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 33200b57cec5SDimitry Andricdef : Pat<(f32 (sint_to_fp Int16Regs:$a)), 33210b57cec5SDimitry Andric (CVT_f32_s16 Int16Regs:$a, CvtRN)>; 33220b57cec5SDimitry Andricdef : Pat<(f32 (sint_to_fp Int32Regs:$a)), 33230b57cec5SDimitry Andric (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 33240b57cec5SDimitry Andricdef : Pat<(f32 (sint_to_fp Int64Regs:$a)), 33250b57cec5SDimitry Andric (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 33260b57cec5SDimitry Andric 33270b57cec5SDimitry Andric// uint -> f32 33280b57cec5SDimitry Andricdef : Pat<(f32 (uint_to_fp Int1Regs:$a)), 33290b57cec5SDimitry Andric (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 33300b57cec5SDimitry Andricdef : Pat<(f32 (uint_to_fp Int16Regs:$a)), 33310b57cec5SDimitry Andric (CVT_f32_u16 Int16Regs:$a, CvtRN)>; 33320b57cec5SDimitry Andricdef : Pat<(f32 (uint_to_fp Int32Regs:$a)), 33330b57cec5SDimitry Andric (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 33340b57cec5SDimitry Andricdef : Pat<(f32 (uint_to_fp Int64Regs:$a)), 33350b57cec5SDimitry Andric (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 33360b57cec5SDimitry Andric 33370b57cec5SDimitry Andric// sint -> f64 33380b57cec5SDimitry Andricdef : Pat<(f64 (sint_to_fp Int1Regs:$a)), 33390b57cec5SDimitry Andric (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 33400b57cec5SDimitry Andricdef : Pat<(f64 (sint_to_fp Int16Regs:$a)), 33410b57cec5SDimitry Andric (CVT_f64_s16 Int16Regs:$a, CvtRN)>; 33420b57cec5SDimitry Andricdef : Pat<(f64 (sint_to_fp Int32Regs:$a)), 33430b57cec5SDimitry Andric (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 33440b57cec5SDimitry Andricdef : Pat<(f64 (sint_to_fp Int64Regs:$a)), 33450b57cec5SDimitry Andric (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 33460b57cec5SDimitry Andric 33470b57cec5SDimitry Andric// uint -> f64 33480b57cec5SDimitry Andricdef : Pat<(f64 (uint_to_fp Int1Regs:$a)), 33490b57cec5SDimitry Andric (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 33500b57cec5SDimitry Andricdef : Pat<(f64 (uint_to_fp Int16Regs:$a)), 33510b57cec5SDimitry Andric (CVT_f64_u16 Int16Regs:$a, CvtRN)>; 33520b57cec5SDimitry Andricdef : Pat<(f64 (uint_to_fp Int32Regs:$a)), 33530b57cec5SDimitry Andric (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 33540b57cec5SDimitry Andricdef : Pat<(f64 (uint_to_fp Int64Regs:$a)), 33550b57cec5SDimitry Andric (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 33560b57cec5SDimitry Andric 33570b57cec5SDimitry Andric 33580b57cec5SDimitry Andric// f16 -> sint 335906c3fb27SDimitry Andricdef : Pat<(i1 (fp_to_sint (f16 Int16Regs:$a))), 336006c3fb27SDimitry Andric (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>; 336106c3fb27SDimitry Andricdef : Pat<(i16 (fp_to_sint (f16 Int16Regs:$a))), 336206c3fb27SDimitry Andric (CVT_s16_f16 (f16 Int16Regs:$a), CvtRZI)>; 336306c3fb27SDimitry Andricdef : Pat<(i32 (fp_to_sint (f16 Int16Regs:$a))), 336406c3fb27SDimitry Andric (CVT_s32_f16 (f16 Int16Regs:$a), CvtRZI)>; 336506c3fb27SDimitry Andricdef : Pat<(i64 (fp_to_sint (f16 Int16Regs:$a))), 336606c3fb27SDimitry Andric (CVT_s64_f16 Int16Regs:$a, CvtRZI)>; 33670b57cec5SDimitry Andric 33680b57cec5SDimitry Andric// f16 -> uint 336906c3fb27SDimitry Andricdef : Pat<(i1 (fp_to_uint (f16 Int16Regs:$a))), 337006c3fb27SDimitry Andric (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>; 337106c3fb27SDimitry Andricdef : Pat<(i16 (fp_to_uint (f16 Int16Regs:$a))), 337206c3fb27SDimitry Andric (CVT_u16_f16 Int16Regs:$a, CvtRZI)>; 337306c3fb27SDimitry Andricdef : Pat<(i32 (fp_to_uint (f16 Int16Regs:$a))), 337406c3fb27SDimitry Andric (CVT_u32_f16 Int16Regs:$a, CvtRZI)>; 337506c3fb27SDimitry Andricdef : Pat<(i64 (fp_to_uint (f16 Int16Regs:$a))), 337606c3fb27SDimitry Andric (CVT_u64_f16 Int16Regs:$a, CvtRZI)>; 33770b57cec5SDimitry Andric 337806c3fb27SDimitry Andric// bf16 -> sint 337906c3fb27SDimitry Andricdef : Pat<(i1 (fp_to_sint (bf16 Int16Regs:$a))), 338006c3fb27SDimitry Andric (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>; 338106c3fb27SDimitry Andricdef : Pat<(i16 (fp_to_sint (bf16 Int16Regs:$a))), 338206c3fb27SDimitry Andric (CVT_s16_bf16 (bf16 Int16Regs:$a), CvtRZI)>; 338306c3fb27SDimitry Andricdef : Pat<(i32 (fp_to_sint (bf16 Int16Regs:$a))), 338406c3fb27SDimitry Andric (CVT_s32_bf16 (bf16 Int16Regs:$a), CvtRZI)>; 338506c3fb27SDimitry Andricdef : Pat<(i64 (fp_to_sint (bf16 Int16Regs:$a))), 338606c3fb27SDimitry Andric (CVT_s64_bf16 Int16Regs:$a, CvtRZI)>; 338706c3fb27SDimitry Andric 338806c3fb27SDimitry Andric// bf16 -> uint 338906c3fb27SDimitry Andricdef : Pat<(i1 (fp_to_uint (bf16 Int16Regs:$a))), 339006c3fb27SDimitry Andric (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>; 339106c3fb27SDimitry Andricdef : Pat<(i16 (fp_to_uint (bf16 Int16Regs:$a))), 339206c3fb27SDimitry Andric (CVT_u16_bf16 Int16Regs:$a, CvtRZI)>; 339306c3fb27SDimitry Andricdef : Pat<(i32 (fp_to_uint (bf16 Int16Regs:$a))), 339406c3fb27SDimitry Andric (CVT_u32_bf16 Int16Regs:$a, CvtRZI)>; 339506c3fb27SDimitry Andricdef : Pat<(i64 (fp_to_uint (bf16 Int16Regs:$a))), 339606c3fb27SDimitry Andric (CVT_u64_bf16 Int16Regs:$a, CvtRZI)>; 33970b57cec5SDimitry Andric// f32 -> sint 33980b57cec5SDimitry Andricdef : Pat<(i1 (fp_to_sint Float32Regs:$a)), 33990b57cec5SDimitry Andric (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; 34000b57cec5SDimitry Andricdef : Pat<(i16 (fp_to_sint Float32Regs:$a)), 34010b57cec5SDimitry Andric (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 34020b57cec5SDimitry Andricdef : Pat<(i16 (fp_to_sint Float32Regs:$a)), 34030b57cec5SDimitry Andric (CVT_s16_f32 Float32Regs:$a, CvtRZI)>; 34040b57cec5SDimitry Andricdef : Pat<(i32 (fp_to_sint Float32Regs:$a)), 34050b57cec5SDimitry Andric (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 34060b57cec5SDimitry Andricdef : Pat<(i32 (fp_to_sint Float32Regs:$a)), 34070b57cec5SDimitry Andric (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 34080b57cec5SDimitry Andricdef : Pat<(i64 (fp_to_sint Float32Regs:$a)), 34090b57cec5SDimitry Andric (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 34100b57cec5SDimitry Andricdef : Pat<(i64 (fp_to_sint Float32Regs:$a)), 34110b57cec5SDimitry Andric (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 34120b57cec5SDimitry Andric 34130b57cec5SDimitry Andric// f32 -> uint 34140b57cec5SDimitry Andricdef : Pat<(i1 (fp_to_uint Float32Regs:$a)), 34150b57cec5SDimitry Andric (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; 34160b57cec5SDimitry Andricdef : Pat<(i16 (fp_to_uint Float32Regs:$a)), 34170b57cec5SDimitry Andric (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 34180b57cec5SDimitry Andricdef : Pat<(i16 (fp_to_uint Float32Regs:$a)), 34190b57cec5SDimitry Andric (CVT_u16_f32 Float32Regs:$a, CvtRZI)>; 34200b57cec5SDimitry Andricdef : Pat<(i32 (fp_to_uint Float32Regs:$a)), 34210b57cec5SDimitry Andric (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 34220b57cec5SDimitry Andricdef : Pat<(i32 (fp_to_uint Float32Regs:$a)), 34230b57cec5SDimitry Andric (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 34240b57cec5SDimitry Andricdef : Pat<(i64 (fp_to_uint Float32Regs:$a)), 34250b57cec5SDimitry Andric (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 34260b57cec5SDimitry Andricdef : Pat<(i64 (fp_to_uint Float32Regs:$a)), 34270b57cec5SDimitry Andric (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 34280b57cec5SDimitry Andric 34290b57cec5SDimitry Andric// f64 -> sint 34300b57cec5SDimitry Andricdef : Pat<(i1 (fp_to_sint Float64Regs:$a)), 34310b57cec5SDimitry Andric (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; 34320b57cec5SDimitry Andricdef : Pat<(i16 (fp_to_sint Float64Regs:$a)), 34330b57cec5SDimitry Andric (CVT_s16_f64 Float64Regs:$a, CvtRZI)>; 34340b57cec5SDimitry Andricdef : Pat<(i32 (fp_to_sint Float64Regs:$a)), 34350b57cec5SDimitry Andric (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 34360b57cec5SDimitry Andricdef : Pat<(i64 (fp_to_sint Float64Regs:$a)), 34370b57cec5SDimitry Andric (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 34380b57cec5SDimitry Andric 34390b57cec5SDimitry Andric// f64 -> uint 34400b57cec5SDimitry Andricdef : Pat<(i1 (fp_to_uint Float64Regs:$a)), 34410b57cec5SDimitry Andric (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; 34420b57cec5SDimitry Andricdef : Pat<(i16 (fp_to_uint Float64Regs:$a)), 34430b57cec5SDimitry Andric (CVT_u16_f64 Float64Regs:$a, CvtRZI)>; 34440b57cec5SDimitry Andricdef : Pat<(i32 (fp_to_uint Float64Regs:$a)), 34450b57cec5SDimitry Andric (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 34460b57cec5SDimitry Andricdef : Pat<(i64 (fp_to_uint Float64Regs:$a)), 34470b57cec5SDimitry Andric (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 34480b57cec5SDimitry Andric 34490b57cec5SDimitry Andric// sext i1 34500b57cec5SDimitry Andricdef : Pat<(i16 (sext Int1Regs:$a)), 34510b57cec5SDimitry Andric (SELP_s16ii -1, 0, Int1Regs:$a)>; 34520b57cec5SDimitry Andricdef : Pat<(i32 (sext Int1Regs:$a)), 34530b57cec5SDimitry Andric (SELP_s32ii -1, 0, Int1Regs:$a)>; 34540b57cec5SDimitry Andricdef : Pat<(i64 (sext Int1Regs:$a)), 34550b57cec5SDimitry Andric (SELP_s64ii -1, 0, Int1Regs:$a)>; 34560b57cec5SDimitry Andric 34570b57cec5SDimitry Andric// zext i1 34580b57cec5SDimitry Andricdef : Pat<(i16 (zext Int1Regs:$a)), 34590b57cec5SDimitry Andric (SELP_u16ii 1, 0, Int1Regs:$a)>; 34600b57cec5SDimitry Andricdef : Pat<(i32 (zext Int1Regs:$a)), 34610b57cec5SDimitry Andric (SELP_u32ii 1, 0, Int1Regs:$a)>; 34620b57cec5SDimitry Andricdef : Pat<(i64 (zext Int1Regs:$a)), 34630b57cec5SDimitry Andric (SELP_u64ii 1, 0, Int1Regs:$a)>; 34640b57cec5SDimitry Andric 34650b57cec5SDimitry Andric// anyext i1 34660b57cec5SDimitry Andricdef : Pat<(i16 (anyext Int1Regs:$a)), 34670b57cec5SDimitry Andric (SELP_u16ii -1, 0, Int1Regs:$a)>; 34680b57cec5SDimitry Andricdef : Pat<(i32 (anyext Int1Regs:$a)), 34690b57cec5SDimitry Andric (SELP_u32ii -1, 0, Int1Regs:$a)>; 34700b57cec5SDimitry Andricdef : Pat<(i64 (anyext Int1Regs:$a)), 34710b57cec5SDimitry Andric (SELP_u64ii -1, 0, Int1Regs:$a)>; 34720b57cec5SDimitry Andric 34730b57cec5SDimitry Andric// sext i16 34740b57cec5SDimitry Andricdef : Pat<(i32 (sext Int16Regs:$a)), 34750b57cec5SDimitry Andric (CVT_s32_s16 Int16Regs:$a, CvtNONE)>; 34760b57cec5SDimitry Andricdef : Pat<(i64 (sext Int16Regs:$a)), 34770b57cec5SDimitry Andric (CVT_s64_s16 Int16Regs:$a, CvtNONE)>; 34780b57cec5SDimitry Andric 34790b57cec5SDimitry Andric// zext i16 34800b57cec5SDimitry Andricdef : Pat<(i32 (zext Int16Regs:$a)), 34810b57cec5SDimitry Andric (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; 34820b57cec5SDimitry Andricdef : Pat<(i64 (zext Int16Regs:$a)), 34830b57cec5SDimitry Andric (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; 34840b57cec5SDimitry Andric 34850b57cec5SDimitry Andric// anyext i16 34860b57cec5SDimitry Andricdef : Pat<(i32 (anyext Int16Regs:$a)), 34870b57cec5SDimitry Andric (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; 34880b57cec5SDimitry Andricdef : Pat<(i64 (anyext Int16Regs:$a)), 34890b57cec5SDimitry Andric (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; 34900b57cec5SDimitry Andric 34910b57cec5SDimitry Andric// sext i32 34920b57cec5SDimitry Andricdef : Pat<(i64 (sext Int32Regs:$a)), 34930b57cec5SDimitry Andric (CVT_s64_s32 Int32Regs:$a, CvtNONE)>; 34940b57cec5SDimitry Andric 34950b57cec5SDimitry Andric// zext i32 34960b57cec5SDimitry Andricdef : Pat<(i64 (zext Int32Regs:$a)), 34970b57cec5SDimitry Andric (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; 34980b57cec5SDimitry Andric 34990b57cec5SDimitry Andric// anyext i32 35000b57cec5SDimitry Andricdef : Pat<(i64 (anyext Int32Regs:$a)), 35010b57cec5SDimitry Andric (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; 35020b57cec5SDimitry Andric 35030b57cec5SDimitry Andric 35040b57cec5SDimitry Andric// truncate i64 35050b57cec5SDimitry Andricdef : Pat<(i32 (trunc Int64Regs:$a)), 35060b57cec5SDimitry Andric (CVT_u32_u64 Int64Regs:$a, CvtNONE)>; 35070b57cec5SDimitry Andricdef : Pat<(i16 (trunc Int64Regs:$a)), 35080b57cec5SDimitry Andric (CVT_u16_u64 Int64Regs:$a, CvtNONE)>; 35090b57cec5SDimitry Andricdef : Pat<(i1 (trunc Int64Regs:$a)), 35100b57cec5SDimitry Andric (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>; 35110b57cec5SDimitry Andric 35120b57cec5SDimitry Andric// truncate i32 35130b57cec5SDimitry Andricdef : Pat<(i16 (trunc Int32Regs:$a)), 35140b57cec5SDimitry Andric (CVT_u16_u32 Int32Regs:$a, CvtNONE)>; 35150b57cec5SDimitry Andricdef : Pat<(i1 (trunc Int32Regs:$a)), 35160b57cec5SDimitry Andric (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>; 35170b57cec5SDimitry Andric 35180b57cec5SDimitry Andric// truncate i16 35190b57cec5SDimitry Andricdef : Pat<(i1 (trunc Int16Regs:$a)), 35200b57cec5SDimitry Andric (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>; 35210b57cec5SDimitry Andric 35220b57cec5SDimitry Andric// sext_inreg 35230b57cec5SDimitry Andricdef : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>; 35240b57cec5SDimitry Andricdef : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>; 35250b57cec5SDimitry Andricdef : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>; 35260b57cec5SDimitry Andricdef : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>; 35270b57cec5SDimitry Andricdef : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>; 35280b57cec5SDimitry Andricdef : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>; 35290b57cec5SDimitry Andric 35300b57cec5SDimitry Andric 35310b57cec5SDimitry Andric// Select instructions with 32-bit predicates 35325f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), i16:$a, i16:$b), 35330b57cec5SDimitry Andric (SELP_b16rr Int16Regs:$a, Int16Regs:$b, 35340b57cec5SDimitry Andric (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 35355f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), i32:$a, i32:$b), 35360b57cec5SDimitry Andric (SELP_b32rr Int32Regs:$a, Int32Regs:$b, 35370b57cec5SDimitry Andric (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 35385f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), Int64Regs:$a, Int64Regs:$b), 35390b57cec5SDimitry Andric (SELP_b64rr Int64Regs:$a, Int64Regs:$b, 35400b57cec5SDimitry Andric (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 35415f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), (f16 Int16Regs:$a), (f16 Int16Regs:$b)), 354206c3fb27SDimitry Andric (SELP_f16rr Int16Regs:$a, Int16Regs:$b, 354306c3fb27SDimitry Andric (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 35445f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)), 354506c3fb27SDimitry Andric (SELP_bf16rr Int16Regs:$a, Int16Regs:$b, 35460b57cec5SDimitry Andric (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 35475f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), Float32Regs:$a, Float32Regs:$b), 35480b57cec5SDimitry Andric (SELP_f32rr Float32Regs:$a, Float32Regs:$b, 35490b57cec5SDimitry Andric (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 35505f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), Float64Regs:$a, Float64Regs:$b), 35510b57cec5SDimitry Andric (SELP_f64rr Float64Regs:$a, Float64Regs:$b, 35520b57cec5SDimitry Andric (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 35530b57cec5SDimitry Andric 35540b57cec5SDimitry Andric 3555e8d8bef9SDimitry Andriclet hasSideEffects = false in { 35560b57cec5SDimitry Andric // pack a set of smaller int registers to a larger int register 35570b57cec5SDimitry Andric def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), 35580b57cec5SDimitry Andric (ins Int16Regs:$s1, Int16Regs:$s2, 35590b57cec5SDimitry Andric Int16Regs:$s3, Int16Regs:$s4), 35600b57cec5SDimitry Andric "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", []>; 35610b57cec5SDimitry Andric def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), 35620b57cec5SDimitry Andric (ins Int16Regs:$s1, Int16Regs:$s2), 35630b57cec5SDimitry Andric "mov.b32 \t$d, {{$s1, $s2}};", []>; 35640b57cec5SDimitry Andric def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), 35650b57cec5SDimitry Andric (ins Int32Regs:$s1, Int32Regs:$s2), 35660b57cec5SDimitry Andric "mov.b64 \t$d, {{$s1, $s2}};", []>; 3567*0fca6ea1SDimitry Andric def V2I64toI128 : NVPTXInst<(outs Int128Regs:$d), 3568*0fca6ea1SDimitry Andric (ins Int64Regs:$s1, Int64Regs:$s2), 3569*0fca6ea1SDimitry Andric "mov.b128 \t$d, {{$s1, $s2}};", []>; 35700b57cec5SDimitry Andric def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), 35710b57cec5SDimitry Andric (ins Float32Regs:$s1, Float32Regs:$s2), 35720b57cec5SDimitry Andric "mov.b64 \t$d, {{$s1, $s2}};", []>; 35730b57cec5SDimitry Andric 35740b57cec5SDimitry Andric // unpack a larger int register to a set of smaller int registers 35750b57cec5SDimitry Andric def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, 35760b57cec5SDimitry Andric Int16Regs:$d3, Int16Regs:$d4), 35770b57cec5SDimitry Andric (ins Int64Regs:$s), 35780b57cec5SDimitry Andric "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", []>; 35790b57cec5SDimitry Andric def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), 35800b57cec5SDimitry Andric (ins Int32Regs:$s), 35810b57cec5SDimitry Andric "mov.b32 \t{{$d1, $d2}}, $s;", []>; 35820b57cec5SDimitry Andric def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), 35830b57cec5SDimitry Andric (ins Int64Regs:$s), 35840b57cec5SDimitry Andric "mov.b64 \t{{$d1, $d2}}, $s;", []>; 3585*0fca6ea1SDimitry Andric def I128toV2I64: NVPTXInst<(outs Int64Regs:$d1, Int64Regs:$d2), 3586*0fca6ea1SDimitry Andric (ins Int128Regs:$s), 3587*0fca6ea1SDimitry Andric "mov.b128 \t{{$d1, $d2}}, $s;", []>; 35880b57cec5SDimitry Andric def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), 35890b57cec5SDimitry Andric (ins Float64Regs:$s), 35900b57cec5SDimitry Andric "mov.b64 \t{{$d1, $d2}}, $s;", []>; 35910b57cec5SDimitry Andric 359206c3fb27SDimitry Andric def I32toI16H : NVPTXInst<(outs Int16Regs:$high), 359306c3fb27SDimitry Andric (ins Int32Regs:$s), 359406c3fb27SDimitry Andric "{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}", 35950b57cec5SDimitry Andric []>; 359606c3fb27SDimitry Andric def I32toI16L : NVPTXInst<(outs Int16Regs:$low), 359706c3fb27SDimitry Andric (ins Int32Regs:$s), 359806c3fb27SDimitry Andric "{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}", 35990b57cec5SDimitry Andric []>; 360006c3fb27SDimitry Andric def I64toI32H : NVPTXInst<(outs Int32Regs:$high), 360106c3fb27SDimitry Andric (ins Int64Regs:$s), 360206c3fb27SDimitry Andric "{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}", 36030b57cec5SDimitry Andric []>; 3604*0fca6ea1SDimitry Andric def I64toI32L : NVPTXInst<(outs Int32Regs:$low), 3605*0fca6ea1SDimitry Andric (ins Int64Regs:$s), 3606*0fca6ea1SDimitry Andric "{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}", 3607*0fca6ea1SDimitry Andric []>; 3608*0fca6ea1SDimitry Andric 36090b57cec5SDimitry Andric} 36100b57cec5SDimitry Andric 361106c3fb27SDimitry Andric// Using partial vectorized move produces better SASS code for extraction of 361206c3fb27SDimitry Andric// upper/lower parts of an integer. 361306c3fb27SDimitry Andricdef : Pat<(i16 (trunc (srl Int32Regs:$s, (i32 16)))), 361406c3fb27SDimitry Andric (I32toI16H Int32Regs:$s)>; 361506c3fb27SDimitry Andricdef : Pat<(i16 (trunc (sra Int32Regs:$s, (i32 16)))), 361606c3fb27SDimitry Andric (I32toI16H Int32Regs:$s)>; 361706c3fb27SDimitry Andricdef : Pat<(i32 (trunc (srl Int64Regs:$s, (i32 32)))), 361806c3fb27SDimitry Andric (I64toI32H Int64Regs:$s)>; 361906c3fb27SDimitry Andricdef : Pat<(i32 (trunc (sra Int64Regs:$s, (i32 32)))), 362006c3fb27SDimitry Andric (I64toI32H Int64Regs:$s)>; 362106c3fb27SDimitry Andric 36225f757f3fSDimitry Andricdef: Pat<(i32 (sext (extractelt (v2i16 Int32Regs:$src), 0))), 36235f757f3fSDimitry Andric (CVT_INREG_s32_s16 Int32Regs:$src)>; 36245f757f3fSDimitry Andric 36255f757f3fSDimitry Andricforeach vt = [v2f16, v2bf16, v2i16] in { 36265f757f3fSDimitry Andricdef : Pat<(extractelt (vt Int32Regs:$src), 0), 362706c3fb27SDimitry Andric (I32toI16L Int32Regs:$src)>; 36285f757f3fSDimitry Andricdef : Pat<(extractelt (vt Int32Regs:$src), 1), 362906c3fb27SDimitry Andric (I32toI16H Int32Regs:$src)>; 36305f757f3fSDimitry Andric} 363106c3fb27SDimitry Andricdef : Pat<(v2f16 (build_vector (f16 Int16Regs:$a), (f16 Int16Regs:$b))), 363206c3fb27SDimitry Andric (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; 363306c3fb27SDimitry Andricdef : Pat<(v2bf16 (build_vector (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), 363406c3fb27SDimitry Andric (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; 36355f757f3fSDimitry Andricdef : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))), 36365f757f3fSDimitry Andric (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; 36375f757f3fSDimitry Andric 36385f757f3fSDimitry Andricdef: Pat<(v2i16 (scalar_to_vector (i16 Int16Regs:$a))), 36395f757f3fSDimitry Andric (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; 364006c3fb27SDimitry Andric 36410b57cec5SDimitry Andric// Count leading zeros 3642e8d8bef9SDimitry Andriclet hasSideEffects = false in { 36430b57cec5SDimitry Andric def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), 36440b57cec5SDimitry Andric "clz.b32 \t$d, $a;", []>; 36450b57cec5SDimitry Andric def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 36460b57cec5SDimitry Andric "clz.b64 \t$d, $a;", []>; 36470b57cec5SDimitry Andric} 36480b57cec5SDimitry Andric 36490b57cec5SDimitry Andric// 32-bit has a direct PTX instruction 36505f757f3fSDimitry Andricdef : Pat<(i32 (ctlz (i32 Int32Regs:$a))), (CLZr32 Int32Regs:$a)>; 36510b57cec5SDimitry Andric 36520b57cec5SDimitry Andric// The return type of the ctlz ISD node is the same as its input, but the PTX 36530b57cec5SDimitry Andric// ctz instruction always returns a 32-bit value. For ctlz.i64, convert the 36540b57cec5SDimitry Andric// ptx value to 64 bits to match the ISD node's semantics, unless we know we're 36550b57cec5SDimitry Andric// truncating back down to 32 bits. 36568bcb0991SDimitry Andricdef : Pat<(i64 (ctlz Int64Regs:$a)), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; 3657*0fca6ea1SDimitry Andricdef : Pat<(i32 (trunc (i64 (ctlz Int64Regs:$a)))), (CLZr64 Int64Regs:$a)>; 36580b57cec5SDimitry Andric 36590b57cec5SDimitry Andric// For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the 36600b57cec5SDimitry Andric// result back to 16-bits if necessary. We also need to subtract 16 because 36610b57cec5SDimitry Andric// the high-order 16 zeros were counted. 36620b57cec5SDimitry Andric// 36630b57cec5SDimitry Andric// TODO: NVPTX has a mov.b32 b32reg, {imm, b16reg} instruction, which we could 36640b57cec5SDimitry Andric// use to save one SASS instruction (on sm_35 anyway): 36650b57cec5SDimitry Andric// 36660b57cec5SDimitry Andric// mov.b32 $tmp, {0xffff, $a} 36670b57cec5SDimitry Andric// ctlz.b32 $result, $tmp 36680b57cec5SDimitry Andric// 36690b57cec5SDimitry Andric// That is, instead of zero-extending the input to 32 bits, we'd "one-extend" 36700b57cec5SDimitry Andric// and then ctlz that value. This way we don't have to subtract 16 from the 36710b57cec5SDimitry Andric// result. Unfortunately today we don't have a way to generate 36720b57cec5SDimitry Andric// "mov b32reg, {b16imm, b16reg}", so we don't do this optimization. 36738bcb0991SDimitry Andricdef : Pat<(i16 (ctlz Int16Regs:$a)), 36740b57cec5SDimitry Andric (SUBi16ri (CVT_u16_u32 36750b57cec5SDimitry Andric (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>; 36768bcb0991SDimitry Andricdef : Pat<(i32 (zext (i16 (ctlz Int16Regs:$a)))), 36770b57cec5SDimitry Andric (SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>; 36780b57cec5SDimitry Andric 36790b57cec5SDimitry Andric// Population count 3680e8d8bef9SDimitry Andriclet hasSideEffects = false in { 36810b57cec5SDimitry Andric def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), 36820b57cec5SDimitry Andric "popc.b32 \t$d, $a;", []>; 36830b57cec5SDimitry Andric def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 36840b57cec5SDimitry Andric "popc.b64 \t$d, $a;", []>; 36850b57cec5SDimitry Andric} 36860b57cec5SDimitry Andric 36870b57cec5SDimitry Andric// 32-bit has a direct PTX instruction 36885f757f3fSDimitry Andricdef : Pat<(i32 (ctpop (i32 Int32Regs:$a))), (POPCr32 Int32Regs:$a)>; 36890b57cec5SDimitry Andric 36900b57cec5SDimitry Andric// For 64-bit, the result in PTX is actually 32-bit so we zero-extend to 64-bit 36910b57cec5SDimitry Andric// to match the LLVM semantics. Just as with ctlz.i64, we provide a second 36920b57cec5SDimitry Andric// pattern that avoids the type conversion if we're truncating the result to 36930b57cec5SDimitry Andric// i32 anyway. 36940b57cec5SDimitry Andricdef : Pat<(ctpop Int64Regs:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>; 3695*0fca6ea1SDimitry Andricdef : Pat<(i32 (trunc (i64 (ctpop Int64Regs:$a)))), (POPCr64 Int64Regs:$a)>; 36960b57cec5SDimitry Andric 36970b57cec5SDimitry Andric// For 16-bit, we zero-extend to 32-bit, then trunc the result back to 16-bits. 36980b57cec5SDimitry Andric// If we know that we're storing into an i32, we can avoid the final trunc. 36990b57cec5SDimitry Andricdef : Pat<(ctpop Int16Regs:$a), 37000b57cec5SDimitry Andric (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>; 37018bcb0991SDimitry Andricdef : Pat<(i32 (zext (i16 (ctpop Int16Regs:$a)))), 37020b57cec5SDimitry Andric (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>; 37030b57cec5SDimitry Andric 37040b57cec5SDimitry Andric// fpround f32 -> f16 37050b57cec5SDimitry Andricdef : Pat<(f16 (fpround Float32Regs:$a)), 37060b57cec5SDimitry Andric (CVT_f16_f32 Float32Regs:$a, CvtRN)>; 37070b57cec5SDimitry Andric 370806c3fb27SDimitry Andric// fpround f32 -> bf16 370906c3fb27SDimitry Andricdef : Pat<(bf16 (fpround Float32Regs:$a)), 3710*0fca6ea1SDimitry Andric (CVT_bf16_f32 Float32Regs:$a, CvtRN)>, Requires<[hasPTX<70>, hasSM<80>]>; 371106c3fb27SDimitry Andric 37120b57cec5SDimitry Andric// fpround f64 -> f16 37130b57cec5SDimitry Andricdef : Pat<(f16 (fpround Float64Regs:$a)), 37140b57cec5SDimitry Andric (CVT_f16_f64 Float64Regs:$a, CvtRN)>; 37150b57cec5SDimitry Andric 371606c3fb27SDimitry Andric// fpround f64 -> bf16 371706c3fb27SDimitry Andricdef : Pat<(bf16 (fpround Float64Regs:$a)), 3718*0fca6ea1SDimitry Andric (CVT_bf16_f64 Float64Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; 37190b57cec5SDimitry Andric// fpround f64 -> f32 37200b57cec5SDimitry Andricdef : Pat<(f32 (fpround Float64Regs:$a)), 37210b57cec5SDimitry Andric (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; 37220b57cec5SDimitry Andricdef : Pat<(f32 (fpround Float64Regs:$a)), 37230b57cec5SDimitry Andric (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 37240b57cec5SDimitry Andric 37250b57cec5SDimitry Andric// fpextend f16 -> f32 372606c3fb27SDimitry Andricdef : Pat<(f32 (fpextend (f16 Int16Regs:$a))), 372706c3fb27SDimitry Andric (CVT_f32_f16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; 372806c3fb27SDimitry Andricdef : Pat<(f32 (fpextend (f16 Int16Regs:$a))), 372906c3fb27SDimitry Andric (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; 373006c3fb27SDimitry Andric// fpextend bf16 -> f32 373106c3fb27SDimitry Andricdef : Pat<(f32 (fpextend (bf16 Int16Regs:$a))), 373206c3fb27SDimitry Andric (CVT_f32_bf16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; 373306c3fb27SDimitry Andricdef : Pat<(f32 (fpextend (bf16 Int16Regs:$a))), 3734*0fca6ea1SDimitry Andric (CVT_f32_bf16 Int16Regs:$a, CvtNONE)>, Requires<[hasPTX<71>, hasSM<80>]>; 37350b57cec5SDimitry Andric 37360b57cec5SDimitry Andric// fpextend f16 -> f64 373706c3fb27SDimitry Andricdef : Pat<(f64 (fpextend (f16 Int16Regs:$a))), 373806c3fb27SDimitry Andric (CVT_f64_f16 Int16Regs:$a, CvtNONE)>; 373906c3fb27SDimitry Andric 374006c3fb27SDimitry Andric// fpextend bf16 -> f64 374106c3fb27SDimitry Andricdef : Pat<(f64 (fpextend (bf16 Int16Regs:$a))), 3742*0fca6ea1SDimitry Andric (CVT_f64_bf16 Int16Regs:$a, CvtNONE)>, Requires<[hasPTX<78>, hasSM<90>]>; 37430b57cec5SDimitry Andric 37440b57cec5SDimitry Andric// fpextend f32 -> f64 37450b57cec5SDimitry Andricdef : Pat<(f64 (fpextend Float32Regs:$a)), 37460b57cec5SDimitry Andric (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; 37470b57cec5SDimitry Andricdef : Pat<(f64 (fpextend Float32Regs:$a)), 37480b57cec5SDimitry Andric (CVT_f64_f32 Float32Regs:$a, CvtNONE)>; 37490b57cec5SDimitry Andric 375006c3fb27SDimitry Andricdef retglue : SDNode<"NVPTXISD::RET_GLUE", SDTNone, 37510b57cec5SDimitry Andric [SDNPHasChain, SDNPOptInGlue]>; 37520b57cec5SDimitry Andric 3753bdd1243dSDimitry Andric// fceil, ffloor, froundeven, ftrunc. 37540b57cec5SDimitry Andric 3755bdd1243dSDimitry Andricmulticlass CVT_ROUND<SDNode OpNode, PatLeaf Mode, PatLeaf ModeFTZ> { 375606c3fb27SDimitry Andric def : Pat<(OpNode (f16 Int16Regs:$a)), 375706c3fb27SDimitry Andric (CVT_f16_f16 Int16Regs:$a, Mode)>; 375806c3fb27SDimitry Andric def : Pat<(OpNode (bf16 Int16Regs:$a)), 375906c3fb27SDimitry Andric (CVT_bf16_bf16 Int16Regs:$a, Mode)>; 3760bdd1243dSDimitry Andric def : Pat<(OpNode Float32Regs:$a), 3761bdd1243dSDimitry Andric (CVT_f32_f32 Float32Regs:$a, ModeFTZ)>, Requires<[doF32FTZ]>; 3762bdd1243dSDimitry Andric def : Pat<(OpNode Float32Regs:$a), 3763bdd1243dSDimitry Andric (CVT_f32_f32 Float32Regs:$a, Mode)>, Requires<[doNoF32FTZ]>; 3764bdd1243dSDimitry Andric def : Pat<(OpNode Float64Regs:$a), 3765bdd1243dSDimitry Andric (CVT_f64_f64 Float64Regs:$a, Mode)>; 3766bdd1243dSDimitry Andric} 37670b57cec5SDimitry Andric 3768bdd1243dSDimitry Andricdefm : CVT_ROUND<fceil, CvtRPI, CvtRPI_FTZ>; 3769bdd1243dSDimitry Andricdefm : CVT_ROUND<ffloor, CvtRMI, CvtRMI_FTZ>; 3770bdd1243dSDimitry Andricdefm : CVT_ROUND<froundeven, CvtRNI, CvtRNI_FTZ>; 3771bdd1243dSDimitry Andricdefm : CVT_ROUND<ftrunc, CvtRZI, CvtRZI_FTZ>; 37720b57cec5SDimitry Andric 37730b57cec5SDimitry Andric// nearbyint and rint are implemented as rounding to nearest even. This isn't 37740b57cec5SDimitry Andric// strictly correct, because it causes us to ignore the rounding mode. But it 37750b57cec5SDimitry Andric// matches what CUDA's "libm" does. 37760b57cec5SDimitry Andric 3777bdd1243dSDimitry Andricdefm : CVT_ROUND<fnearbyint, CvtRNI, CvtRNI_FTZ>; 3778bdd1243dSDimitry Andricdefm : CVT_ROUND<frint, CvtRNI, CvtRNI_FTZ>; 37790b57cec5SDimitry Andric 37800b57cec5SDimitry Andric//----------------------------------- 37810b57cec5SDimitry Andric// Control-flow 37820b57cec5SDimitry Andric//----------------------------------- 37830b57cec5SDimitry Andric 37840b57cec5SDimitry Andriclet isTerminator=1 in { 37850b57cec5SDimitry Andric let isReturn=1, isBarrier=1 in 378606c3fb27SDimitry Andric def Return : NVPTXInst<(outs), (ins), "ret;", [(retglue)]>; 37870b57cec5SDimitry Andric 37880b57cec5SDimitry Andric let isBranch=1 in 37890b57cec5SDimitry Andric def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), 37900b57cec5SDimitry Andric "@$a bra \t$target;", 37910b57cec5SDimitry Andric [(brcond Int1Regs:$a, bb:$target)]>; 37920b57cec5SDimitry Andric let isBranch=1 in 37930b57cec5SDimitry Andric def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), 37940b57cec5SDimitry Andric "@!$a bra \t$target;", []>; 37950b57cec5SDimitry Andric 37960b57cec5SDimitry Andric let isBranch=1, isBarrier=1 in 37970b57cec5SDimitry Andric def GOTO : NVPTXInst<(outs), (ins brtarget:$target), 37980b57cec5SDimitry Andric "bra.uni \t$target;", [(br bb:$target)]>; 37990b57cec5SDimitry Andric} 38000b57cec5SDimitry Andric 38015f757f3fSDimitry Andricdef : Pat<(brcond (i32 Int32Regs:$a), bb:$target), 38020b57cec5SDimitry Andric (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>; 38030b57cec5SDimitry Andric 38040b57cec5SDimitry Andric// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a 38050b57cec5SDimitry Andric// conditional branch if the target block is the next block so that the code 38060b57cec5SDimitry Andric// can fall through to the target block. The invertion is done by 'xor 38070b57cec5SDimitry Andric// condition, 1', which will be translated to (setne condition, -1). Since ptx 38080b57cec5SDimitry Andric// supports '@!pred bra target', we should use it. 38090b57cec5SDimitry Andricdef : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target), 38100b57cec5SDimitry Andric (CBranchOther Int1Regs:$a, bb:$target)>; 38110b57cec5SDimitry Andric 38120b57cec5SDimitry Andric// Call 38130b57cec5SDimitry Andricdef SDT_NVPTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, 38140b57cec5SDimitry Andric SDTCisVT<1, i32>]>; 38150b57cec5SDimitry Andricdef SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; 38160b57cec5SDimitry Andric 38170b57cec5SDimitry Andricdef callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart, 38180b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; 38190b57cec5SDimitry Andricdef callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd, 38200b57cec5SDimitry Andric [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 38210b57cec5SDimitry Andric SDNPSideEffect]>; 38220b57cec5SDimitry Andric 38230b57cec5SDimitry Andricdef SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; 38240b57cec5SDimitry Andricdef call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall, 38250b57cec5SDimitry Andric [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 38260b57cec5SDimitry Andricdef calltarget : Operand<i32>; 38270b57cec5SDimitry Andriclet isCall=1 in { 38280b57cec5SDimitry Andric def CALL : NVPTXInst<(outs), (ins calltarget:$dst), "call \t$dst, (1);", []>; 38290b57cec5SDimitry Andric} 38300b57cec5SDimitry Andric 38310b57cec5SDimitry Andricdef : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>; 38320b57cec5SDimitry Andricdef : Pat<(call texternalsym:$dst), (CALL texternalsym:$dst)>; 38330b57cec5SDimitry Andric 38340b57cec5SDimitry Andric// Pseudo instructions. 38350b57cec5SDimitry Andricclass Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> 38360b57cec5SDimitry Andric : NVPTXInst<outs, ins, asmstr, pattern>; 38370b57cec5SDimitry Andric 38380b57cec5SDimitry Andricdef Callseq_Start : 38390b57cec5SDimitry Andric NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), 3840*0fca6ea1SDimitry Andric "\\{ // callseq $amt1, $amt2", 38410b57cec5SDimitry Andric [(callseq_start timm:$amt1, timm:$amt2)]>; 38420b57cec5SDimitry Andricdef Callseq_End : 38430b57cec5SDimitry Andric NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), 38440b57cec5SDimitry Andric "\\} // callseq $amt1", 38450b57cec5SDimitry Andric [(callseq_end timm:$amt1, timm:$amt2)]>; 38460b57cec5SDimitry Andric 38470b57cec5SDimitry Andric// trap instruction 38485f757f3fSDimitry Andric// Emit an `exit` as well to convey to ptxas that `trap` exits the CFG. 38495f757f3fSDimitry Andric// This won't be necessary in a future version of ptxas. 38505f757f3fSDimitry Andricdef trapinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>; 38510b57cec5SDimitry Andric 38520b57cec5SDimitry Andric// Call prototype wrapper 38530b57cec5SDimitry Andricdef SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 38540b57cec5SDimitry Andricdef CallPrototype : 38550b57cec5SDimitry Andric SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype, 38560b57cec5SDimitry Andric [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 38570b57cec5SDimitry Andricdef ProtoIdent : Operand<i32> { 38580b57cec5SDimitry Andric let PrintMethod = "printProtoIdent"; 38590b57cec5SDimitry Andric} 38600b57cec5SDimitry Andricdef CALL_PROTOTYPE : 38610b57cec5SDimitry Andric NVPTXInst<(outs), (ins ProtoIdent:$ident), 38620b57cec5SDimitry Andric "$ident", [(CallPrototype (i32 texternalsym:$ident))]>; 38630b57cec5SDimitry Andric 3864*0fca6ea1SDimitry Andricdef SDTDynAllocaOp : 3865*0fca6ea1SDimitry Andric SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<1>, SDTCisInt<2>]>; 3866*0fca6ea1SDimitry Andric 3867*0fca6ea1SDimitry Andricdef dyn_alloca : 3868*0fca6ea1SDimitry Andric SDNode<"NVPTXISD::DYNAMIC_STACKALLOC", SDTDynAllocaOp, 3869*0fca6ea1SDimitry Andric [SDNPHasChain, SDNPSideEffect]>; 3870*0fca6ea1SDimitry Andric 3871*0fca6ea1SDimitry Andricdef DYNAMIC_STACKALLOC32 : 3872*0fca6ea1SDimitry Andric NVPTXInst<(outs Int32Regs:$ptr), 3873*0fca6ea1SDimitry Andric (ins Int32Regs:$size, i32imm:$align), 3874*0fca6ea1SDimitry Andric "alloca.u32 \t$ptr, $size, $align;\n\t" 3875*0fca6ea1SDimitry Andric "cvta.local.u32 \t$ptr, $ptr;", 3876*0fca6ea1SDimitry Andric [(set (i32 Int32Regs:$ptr), (dyn_alloca Int32Regs:$size, (i32 timm:$align)))]>, 3877*0fca6ea1SDimitry Andric Requires<[hasPTX<73>, hasSM<52>]>; 3878*0fca6ea1SDimitry Andric 3879*0fca6ea1SDimitry Andricdef DYNAMIC_STACKALLOC64 : 3880*0fca6ea1SDimitry Andric NVPTXInst<(outs Int64Regs:$ptr), 3881*0fca6ea1SDimitry Andric (ins Int64Regs:$size, i32imm:$align), 3882*0fca6ea1SDimitry Andric "alloca.u64 \t$ptr, $size, $align;\n\t" 3883*0fca6ea1SDimitry Andric "cvta.local.u64 \t$ptr, $ptr;", 3884*0fca6ea1SDimitry Andric [(set Int64Regs:$ptr, (dyn_alloca Int64Regs:$size, (i32 timm:$align)))]>, 3885*0fca6ea1SDimitry Andric Requires<[hasPTX<73>, hasSM<52>]>; 38860b57cec5SDimitry Andric 38870b57cec5SDimitry Andricinclude "NVPTXIntrinsics.td" 38880b57cec5SDimitry Andric 38890b57cec5SDimitry Andric//----------------------------------- 38900b57cec5SDimitry Andric// Notes 38910b57cec5SDimitry Andric//----------------------------------- 38920b57cec5SDimitry Andric// BSWAP is currently expanded. The following is a more efficient 38930b57cec5SDimitry Andric// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register 38940b57cec5SDimitry Andric// - for sm_20, use pmpt (use vector scalar mov to get the pack and 38950b57cec5SDimitry Andric// unpack). sm_20 supports native 32-bit register, but not native 16-bit 38960b57cec5SDimitry Andric// register. 3897*0fca6ea1SDimitry Andric 3898*0fca6ea1SDimitry Andricdef : Pat < 3899*0fca6ea1SDimitry Andric (i32 (bswap i32:$a)), 3900*0fca6ea1SDimitry Andric (INT_NVVM_PRMT Int32Regs:$a, (i32 0), (i32 0x0123))>; 3901*0fca6ea1SDimitry Andric 3902*0fca6ea1SDimitry Andricdef : Pat < 3903*0fca6ea1SDimitry Andric (v2i16 (bswap v2i16:$a)), 3904*0fca6ea1SDimitry Andric (INT_NVVM_PRMT Int32Regs:$a, (i32 0), (i32 0x2301))>; 3905*0fca6ea1SDimitry Andric 3906*0fca6ea1SDimitry Andricdef : Pat < 3907*0fca6ea1SDimitry Andric (i64 (bswap i64:$a)), 3908*0fca6ea1SDimitry Andric (V2I32toI64 3909*0fca6ea1SDimitry Andric (INT_NVVM_PRMT (I64toI32H Int64Regs:$a), (i32 0), (i32 0x0123)), 3910*0fca6ea1SDimitry Andric (INT_NVVM_PRMT (I64toI32L Int64Regs:$a), (i32 0), (i32 0x0123)))>; 3911