xref: /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
20b57cec5SDimitry Andric//
30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric//
70b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric//
90b57cec5SDimitry Andric// This file describes the PTX instructions in TableGen format.
100b57cec5SDimitry Andric//
110b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andricinclude "NVPTXInstrFormats.td"
140b57cec5SDimitry Andric
150b57cec5SDimitry Andriclet OperandType = "OPERAND_IMMEDIATE" in {
160b57cec5SDimitry Andric  def f16imm : Operand<f16>;
1706c3fb27SDimitry Andric  def bf16imm : Operand<bf16>;
1806c3fb27SDimitry Andric
190b57cec5SDimitry Andric}
200b57cec5SDimitry Andric
210b57cec5SDimitry Andric// List of vector specific properties
220b57cec5SDimitry Andricdef isVecLD      : VecInstTypeEnum<1>;
230b57cec5SDimitry Andricdef isVecST      : VecInstTypeEnum<2>;
240b57cec5SDimitry Andricdef isVecBuild   : VecInstTypeEnum<3>;
250b57cec5SDimitry Andricdef isVecShuffle : VecInstTypeEnum<4>;
260b57cec5SDimitry Andricdef isVecExtract : VecInstTypeEnum<5>;
270b57cec5SDimitry Andricdef isVecInsert  : VecInstTypeEnum<6>;
280b57cec5SDimitry Andricdef isVecDest    : VecInstTypeEnum<7>;
290b57cec5SDimitry Andricdef isVecOther   : VecInstTypeEnum<15>;
300b57cec5SDimitry Andric
310b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
320b57cec5SDimitry Andric// NVPTX Operand Definitions.
330b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
340b57cec5SDimitry Andric
350b57cec5SDimitry Andricdef brtarget    : Operand<OtherVT>;
360b57cec5SDimitry Andric
370b57cec5SDimitry Andric// CVT conversion modes
380b57cec5SDimitry Andric// These must match the enum in NVPTX.h
390b57cec5SDimitry Andricdef CvtNONE : PatLeaf<(i32 0x0)>;
400b57cec5SDimitry Andricdef CvtRNI  : PatLeaf<(i32 0x1)>;
410b57cec5SDimitry Andricdef CvtRZI  : PatLeaf<(i32 0x2)>;
420b57cec5SDimitry Andricdef CvtRMI  : PatLeaf<(i32 0x3)>;
430b57cec5SDimitry Andricdef CvtRPI  : PatLeaf<(i32 0x4)>;
440b57cec5SDimitry Andricdef CvtRN   : PatLeaf<(i32 0x5)>;
450b57cec5SDimitry Andricdef CvtRZ   : PatLeaf<(i32 0x6)>;
460b57cec5SDimitry Andricdef CvtRM   : PatLeaf<(i32 0x7)>;
470b57cec5SDimitry Andricdef CvtRP   : PatLeaf<(i32 0x8)>;
4804eeddc0SDimitry Andricdef CvtRNA   : PatLeaf<(i32 0x9)>;
490b57cec5SDimitry Andric
500b57cec5SDimitry Andricdef CvtNONE_FTZ : PatLeaf<(i32 0x10)>;
510b57cec5SDimitry Andricdef CvtRNI_FTZ  : PatLeaf<(i32 0x11)>;
520b57cec5SDimitry Andricdef CvtRZI_FTZ  : PatLeaf<(i32 0x12)>;
530b57cec5SDimitry Andricdef CvtRMI_FTZ  : PatLeaf<(i32 0x13)>;
540b57cec5SDimitry Andricdef CvtRPI_FTZ  : PatLeaf<(i32 0x14)>;
550b57cec5SDimitry Andricdef CvtRN_FTZ   : PatLeaf<(i32 0x15)>;
560b57cec5SDimitry Andricdef CvtRZ_FTZ   : PatLeaf<(i32 0x16)>;
570b57cec5SDimitry Andricdef CvtRM_FTZ   : PatLeaf<(i32 0x17)>;
580b57cec5SDimitry Andricdef CvtRP_FTZ   : PatLeaf<(i32 0x18)>;
590b57cec5SDimitry Andric
600b57cec5SDimitry Andricdef CvtSAT      : PatLeaf<(i32 0x20)>;
610b57cec5SDimitry Andricdef CvtSAT_FTZ  : PatLeaf<(i32 0x30)>;
620b57cec5SDimitry Andric
6304eeddc0SDimitry Andricdef CvtNONE_RELU   : PatLeaf<(i32 0x40)>;
6404eeddc0SDimitry Andricdef CvtRN_RELU   : PatLeaf<(i32 0x45)>;
6504eeddc0SDimitry Andricdef CvtRZ_RELU   : PatLeaf<(i32 0x46)>;
6604eeddc0SDimitry Andric
670b57cec5SDimitry Andricdef CvtMode : Operand<i32> {
680b57cec5SDimitry Andric  let PrintMethod = "printCvtMode";
690b57cec5SDimitry Andric}
700b57cec5SDimitry Andric
710b57cec5SDimitry Andric// Compare modes
720b57cec5SDimitry Andric// These must match the enum in NVPTX.h
730b57cec5SDimitry Andricdef CmpEQ   : PatLeaf<(i32 0)>;
740b57cec5SDimitry Andricdef CmpNE   : PatLeaf<(i32 1)>;
750b57cec5SDimitry Andricdef CmpLT   : PatLeaf<(i32 2)>;
760b57cec5SDimitry Andricdef CmpLE   : PatLeaf<(i32 3)>;
770b57cec5SDimitry Andricdef CmpGT   : PatLeaf<(i32 4)>;
780b57cec5SDimitry Andricdef CmpGE   : PatLeaf<(i32 5)>;
795f757f3fSDimitry Andricdef CmpLO   : PatLeaf<(i32 6)>;
805f757f3fSDimitry Andricdef CmpLS   : PatLeaf<(i32 7)>;
815f757f3fSDimitry Andricdef CmpHI   : PatLeaf<(i32 8)>;
825f757f3fSDimitry Andricdef CmpHS   : PatLeaf<(i32 9)>;
830b57cec5SDimitry Andricdef CmpEQU  : PatLeaf<(i32 10)>;
840b57cec5SDimitry Andricdef CmpNEU  : PatLeaf<(i32 11)>;
850b57cec5SDimitry Andricdef CmpLTU  : PatLeaf<(i32 12)>;
860b57cec5SDimitry Andricdef CmpLEU  : PatLeaf<(i32 13)>;
870b57cec5SDimitry Andricdef CmpGTU  : PatLeaf<(i32 14)>;
880b57cec5SDimitry Andricdef CmpGEU  : PatLeaf<(i32 15)>;
890b57cec5SDimitry Andricdef CmpNUM  : PatLeaf<(i32 16)>;
900b57cec5SDimitry Andricdef CmpNAN  : PatLeaf<(i32 17)>;
910b57cec5SDimitry Andric
920b57cec5SDimitry Andricdef CmpEQ_FTZ   : PatLeaf<(i32 0x100)>;
930b57cec5SDimitry Andricdef CmpNE_FTZ   : PatLeaf<(i32 0x101)>;
940b57cec5SDimitry Andricdef CmpLT_FTZ   : PatLeaf<(i32 0x102)>;
950b57cec5SDimitry Andricdef CmpLE_FTZ   : PatLeaf<(i32 0x103)>;
960b57cec5SDimitry Andricdef CmpGT_FTZ   : PatLeaf<(i32 0x104)>;
970b57cec5SDimitry Andricdef CmpGE_FTZ   : PatLeaf<(i32 0x105)>;
980b57cec5SDimitry Andricdef CmpEQU_FTZ  : PatLeaf<(i32 0x10A)>;
990b57cec5SDimitry Andricdef CmpNEU_FTZ  : PatLeaf<(i32 0x10B)>;
1000b57cec5SDimitry Andricdef CmpLTU_FTZ  : PatLeaf<(i32 0x10C)>;
1010b57cec5SDimitry Andricdef CmpLEU_FTZ  : PatLeaf<(i32 0x10D)>;
1020b57cec5SDimitry Andricdef CmpGTU_FTZ  : PatLeaf<(i32 0x10E)>;
1030b57cec5SDimitry Andricdef CmpGEU_FTZ  : PatLeaf<(i32 0x10F)>;
1040b57cec5SDimitry Andricdef CmpNUM_FTZ  : PatLeaf<(i32 0x110)>;
1050b57cec5SDimitry Andricdef CmpNAN_FTZ  : PatLeaf<(i32 0x111)>;
1060b57cec5SDimitry Andric
1070b57cec5SDimitry Andricdef CmpMode : Operand<i32> {
1080b57cec5SDimitry Andric  let PrintMethod = "printCmpMode";
1090b57cec5SDimitry Andric}
1100b57cec5SDimitry Andricdef VecElement : Operand<i32> {
1110b57cec5SDimitry Andric  let PrintMethod = "printVecElement";
1120b57cec5SDimitry Andric}
1130b57cec5SDimitry Andric
1145f757f3fSDimitry Andric// PRMT modes
1155f757f3fSDimitry Andric// These must match the enum in NVPTX.h
1165f757f3fSDimitry Andricdef PrmtNONE : PatLeaf<(i32 0x0)>;
1175f757f3fSDimitry Andricdef PrmtF4E  : PatLeaf<(i32 0x1)>;
1185f757f3fSDimitry Andricdef PrmtB4E  : PatLeaf<(i32 0x2)>;
1195f757f3fSDimitry Andricdef PrmtRC8  : PatLeaf<(i32 0x3)>;
1205f757f3fSDimitry Andricdef PrmtECL  : PatLeaf<(i32 0x4)>;
1215f757f3fSDimitry Andricdef PrmtECR  : PatLeaf<(i32 0x5)>;
1225f757f3fSDimitry Andricdef PrmtRC16 : PatLeaf<(i32 0x6)>;
1235f757f3fSDimitry Andric
1245f757f3fSDimitry Andricdef PrmtMode : Operand<i32> {
1255f757f3fSDimitry Andric  let PrintMethod = "printPrmtMode";
1265f757f3fSDimitry Andric}
1275f757f3fSDimitry Andric
1285f757f3fSDimitry Andric
1290b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1300b57cec5SDimitry Andric// NVPTX Instruction Predicate Definitions
1310b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1320b57cec5SDimitry Andric
1330b57cec5SDimitry Andric
1340b57cec5SDimitry Andricdef hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">;
1350b57cec5SDimitry Andricdef hasAtomScope : Predicate<"Subtarget->hasAtomScope()">;
1360b57cec5SDimitry Andricdef hasAtomBitwise64 : Predicate<"Subtarget->hasAtomBitwise64()">;
1370b57cec5SDimitry Andricdef hasAtomMinMax64 : Predicate<"Subtarget->hasAtomMinMax64()">;
1380b57cec5SDimitry Andricdef hasVote : Predicate<"Subtarget->hasVote()">;
1390b57cec5SDimitry Andricdef hasDouble : Predicate<"Subtarget->hasDouble()">;
1400b57cec5SDimitry Andricdef hasLDG : Predicate<"Subtarget->hasLDG()">;
1410b57cec5SDimitry Andricdef hasLDU : Predicate<"Subtarget->hasLDU()">;
1420b57cec5SDimitry Andric
1430b57cec5SDimitry Andricdef doF32FTZ : Predicate<"useF32FTZ()">;
1440b57cec5SDimitry Andricdef doNoF32FTZ : Predicate<"!useF32FTZ()">;
145*0fca6ea1SDimitry Andricdef doRsqrtOpt : Predicate<"doRsqrtOpt()">;
1460b57cec5SDimitry Andric
1470b57cec5SDimitry Andricdef doMulWide      : Predicate<"doMulWide">;
1480b57cec5SDimitry Andric
1490b57cec5SDimitry Andricdef allowFMA : Predicate<"allowFMA()">;
1500b57cec5SDimitry Andricdef noFMA : Predicate<"!allowFMA()">;
1510b57cec5SDimitry Andricdef allowUnsafeFPMath : Predicate<"allowUnsafeFPMath()">;
152bdd1243dSDimitry Andricdef noUnsafeFPMath : Predicate<"!allowUnsafeFPMath()">;
1530b57cec5SDimitry Andric
1540b57cec5SDimitry Andricdef do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
1550b57cec5SDimitry Andricdef do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
1560b57cec5SDimitry Andric
1570b57cec5SDimitry Andricdef do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
1580b57cec5SDimitry Andricdef do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
1590b57cec5SDimitry Andric
1600b57cec5SDimitry Andricdef hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">;
1610b57cec5SDimitry Andricdef noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
1620b57cec5SDimitry Andric
163e8d8bef9SDimitry Andricdef True : Predicate<"true">;
164*0fca6ea1SDimitry Andricdef False : Predicate<"false">;
1650b57cec5SDimitry Andric
16606c3fb27SDimitry Andricclass hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>;
16706c3fb27SDimitry Andricclass hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>;
1680b57cec5SDimitry Andric
169297eecfbSDimitry Andric// Explicit records for arch-accelerated SM versions
170297eecfbSDimitry Andricdef hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">;
171297eecfbSDimitry Andric
1728bcb0991SDimitry Andric// non-sync shfl instructions are not available on sm_70+ in PTX6.4+
1738bcb0991SDimitry Andricdef hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"
1748bcb0991SDimitry Andric                          "&& Subtarget->getPTXVersion() >= 64)">;
1758bcb0991SDimitry Andric
176*0fca6ea1SDimitry Andricdef useShortPtrLocal : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_LOCAL) == 32">;
177*0fca6ea1SDimitry Andricdef useShortPtrShared : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_SHARED) == 32">;
178*0fca6ea1SDimitry Andricdef useShortPtrConst : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_CONST) == 32">;
179*0fca6ea1SDimitry Andric
1800b57cec5SDimitry Andricdef useFP16Math: Predicate<"Subtarget->allowFP16Math()">;
18106c3fb27SDimitry Andricdef hasBF16Math: Predicate<"Subtarget->hasBF16Math()">;
1820b57cec5SDimitry Andric
183bdd1243dSDimitry Andric// Helper class to aid conversion between ValueType and a matching RegisterClass.
184bdd1243dSDimitry Andric
185bdd1243dSDimitry Andricclass ValueToRegClass<ValueType T> {
186bdd1243dSDimitry Andric   string name = !cast<string>(T);
187bdd1243dSDimitry Andric   NVPTXRegClass ret = !cond(
188bdd1243dSDimitry Andric     !eq(name, "i1"): Int1Regs,
189bdd1243dSDimitry Andric     !eq(name, "i16"): Int16Regs,
1905f757f3fSDimitry Andric     !eq(name, "v2i16"): Int32Regs,
191bdd1243dSDimitry Andric     !eq(name, "i32"): Int32Regs,
192bdd1243dSDimitry Andric     !eq(name, "i64"): Int64Regs,
19306c3fb27SDimitry Andric     !eq(name, "f16"): Int16Regs,
19406c3fb27SDimitry Andric     !eq(name, "v2f16"): Int32Regs,
19506c3fb27SDimitry Andric     !eq(name, "bf16"): Int16Regs,
19606c3fb27SDimitry Andric     !eq(name, "v2bf16"): Int32Regs,
197bdd1243dSDimitry Andric     !eq(name, "f32"): Float32Regs,
198bdd1243dSDimitry Andric     !eq(name, "f64"): Float64Regs,
199bdd1243dSDimitry Andric     !eq(name, "ai32"): Int32ArgRegs,
200bdd1243dSDimitry Andric     !eq(name, "ai64"): Int64ArgRegs,
201bdd1243dSDimitry Andric     !eq(name, "af32"): Float32ArgRegs,
202bdd1243dSDimitry Andric     !eq(name, "if64"): Float64ArgRegs,
203bdd1243dSDimitry Andric    );
204bdd1243dSDimitry Andric}
205bdd1243dSDimitry Andric
206bdd1243dSDimitry Andric
2070b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
2080b57cec5SDimitry Andric// Some Common Instruction Class Templates
2090b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
2100b57cec5SDimitry Andric
2110b57cec5SDimitry Andric// Template for instructions which take three int64, int32, or int16 args.
2120b57cec5SDimitry Andric// The instructions are named "<OpcStr><Width>" (e.g. "add.s64").
2130b57cec5SDimitry Andricmulticlass I3<string OpcStr, SDNode OpNode> {
2140b57cec5SDimitry Andric  def i64rr :
2150b57cec5SDimitry Andric    NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
2160b57cec5SDimitry Andric              !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
2170b57cec5SDimitry Andric              [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
2180b57cec5SDimitry Andric  def i64ri :
2190b57cec5SDimitry Andric    NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
2200b57cec5SDimitry Andric              !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
2210b57cec5SDimitry Andric              [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
2220b57cec5SDimitry Andric  def i32rr :
2230b57cec5SDimitry Andric    NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
2240b57cec5SDimitry Andric              !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
2255f757f3fSDimitry Andric              [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>;
2260b57cec5SDimitry Andric  def i32ri :
2270b57cec5SDimitry Andric    NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
2280b57cec5SDimitry Andric              !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
2295f757f3fSDimitry Andric              [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>;
2300b57cec5SDimitry Andric  def i16rr :
2310b57cec5SDimitry Andric    NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
2320b57cec5SDimitry Andric              !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
2330b57cec5SDimitry Andric              [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
2340b57cec5SDimitry Andric  def i16ri :
2350b57cec5SDimitry Andric    NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
2360b57cec5SDimitry Andric              !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
2370b57cec5SDimitry Andric              [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
2380b57cec5SDimitry Andric}
2390b57cec5SDimitry Andric
2405f757f3fSDimitry Andricclass I16x2<string OpcStr, SDNode OpNode> :
2415f757f3fSDimitry Andric NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
2425f757f3fSDimitry Andric              !strconcat(OpcStr, "16x2 \t$dst, $a, $b;"),
2435f757f3fSDimitry Andric              [(set Int32Regs:$dst, (OpNode (v2i16 Int32Regs:$a), (v2i16 Int32Regs:$b)))]>,
2445f757f3fSDimitry Andric              Requires<[hasPTX<80>, hasSM<90>]>;
2455f757f3fSDimitry Andric
24681ad6265SDimitry Andric// Template for instructions which take 3 int args.  The instructions are
2470b57cec5SDimitry Andric// named "<OpcStr>.s32" (e.g. "addc.cc.s32").
24881ad6265SDimitry Andricmulticlass ADD_SUB_INT_CARRY<string OpcStr, SDNode OpNode> {
24981ad6265SDimitry Andric  let hasSideEffects = 1 in {
2500b57cec5SDimitry Andric    def i32rr :
2510b57cec5SDimitry Andric      NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
2520b57cec5SDimitry Andric                !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
2535f757f3fSDimitry Andric                [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>;
2540b57cec5SDimitry Andric    def i32ri :
2550b57cec5SDimitry Andric      NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
2560b57cec5SDimitry Andric                !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
2575f757f3fSDimitry Andric                [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>;
25881ad6265SDimitry Andric    def i64rr :
25981ad6265SDimitry Andric      NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
26081ad6265SDimitry Andric                !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
26181ad6265SDimitry Andric                [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>,
26206c3fb27SDimitry Andric      Requires<[hasPTX<43>]>;
26381ad6265SDimitry Andric    def i64ri :
26481ad6265SDimitry Andric      NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
26581ad6265SDimitry Andric                !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
26681ad6265SDimitry Andric                [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>,
26706c3fb27SDimitry Andric      Requires<[hasPTX<43>]>;
26881ad6265SDimitry Andric  }
2690b57cec5SDimitry Andric}
2700b57cec5SDimitry Andric
2710b57cec5SDimitry Andric// Template for instructions which take three fp64 or fp32 args.  The
2720b57cec5SDimitry Andric// instructions are named "<OpcStr>.f<Width>" (e.g. "min.f64").
2730b57cec5SDimitry Andric//
2740b57cec5SDimitry Andric// Also defines ftz (flush subnormal inputs and results to sign-preserving
2750b57cec5SDimitry Andric// zero) variants for fp32 functions.
2760b57cec5SDimitry Andric//
2770b57cec5SDimitry Andric// This multiclass should be used for nodes that cannot be folded into FMAs.
2780b57cec5SDimitry Andric// For nodes that can be folded into FMAs (i.e. adds and muls), use
2790b57cec5SDimitry Andric// F3_fma_component.
2800b57cec5SDimitry Andricmulticlass F3<string OpcStr, SDNode OpNode> {
2810b57cec5SDimitry Andric   def f64rr :
2820b57cec5SDimitry Andric     NVPTXInst<(outs Float64Regs:$dst),
2830b57cec5SDimitry Andric               (ins Float64Regs:$a, Float64Regs:$b),
2840b57cec5SDimitry Andric               !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
2850b57cec5SDimitry Andric               [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
2860b57cec5SDimitry Andric   def f64ri :
2870b57cec5SDimitry Andric     NVPTXInst<(outs Float64Regs:$dst),
2880b57cec5SDimitry Andric               (ins Float64Regs:$a, f64imm:$b),
2890b57cec5SDimitry Andric               !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
2900b57cec5SDimitry Andric               [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
2910b57cec5SDimitry Andric   def f32rr_ftz :
2920b57cec5SDimitry Andric     NVPTXInst<(outs Float32Regs:$dst),
2930b57cec5SDimitry Andric               (ins Float32Regs:$a, Float32Regs:$b),
2940b57cec5SDimitry Andric               !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
2950b57cec5SDimitry Andric               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
2960b57cec5SDimitry Andric               Requires<[doF32FTZ]>;
2970b57cec5SDimitry Andric   def f32ri_ftz :
2980b57cec5SDimitry Andric     NVPTXInst<(outs Float32Regs:$dst),
2990b57cec5SDimitry Andric               (ins Float32Regs:$a, f32imm:$b),
3000b57cec5SDimitry Andric               !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
3010b57cec5SDimitry Andric               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
3020b57cec5SDimitry Andric               Requires<[doF32FTZ]>;
3030b57cec5SDimitry Andric   def f32rr :
3040b57cec5SDimitry Andric     NVPTXInst<(outs Float32Regs:$dst),
3050b57cec5SDimitry Andric               (ins Float32Regs:$a, Float32Regs:$b),
3060b57cec5SDimitry Andric               !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
3070b57cec5SDimitry Andric               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
3080b57cec5SDimitry Andric   def f32ri :
3090b57cec5SDimitry Andric     NVPTXInst<(outs Float32Regs:$dst),
3100b57cec5SDimitry Andric               (ins Float32Regs:$a, f32imm:$b),
3110b57cec5SDimitry Andric               !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
3120b57cec5SDimitry Andric               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
31304eeddc0SDimitry Andric
31404eeddc0SDimitry Andric   def f16rr_ftz :
31506c3fb27SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst),
31606c3fb27SDimitry Andric               (ins Int16Regs:$a, Int16Regs:$b),
31704eeddc0SDimitry Andric               !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"),
31806c3fb27SDimitry Andric               [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
31904eeddc0SDimitry Andric               Requires<[useFP16Math, doF32FTZ]>;
32004eeddc0SDimitry Andric   def f16rr :
32106c3fb27SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst),
32206c3fb27SDimitry Andric               (ins Int16Regs:$a, Int16Regs:$b),
32304eeddc0SDimitry Andric               !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"),
32406c3fb27SDimitry Andric               [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
32504eeddc0SDimitry Andric               Requires<[useFP16Math]>;
32604eeddc0SDimitry Andric
32704eeddc0SDimitry Andric   def f16x2rr_ftz :
32806c3fb27SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst),
32906c3fb27SDimitry Andric               (ins Int32Regs:$a, Int32Regs:$b),
33004eeddc0SDimitry Andric               !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"),
33106c3fb27SDimitry Andric               [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
33204eeddc0SDimitry Andric               Requires<[useFP16Math, doF32FTZ]>;
33304eeddc0SDimitry Andric   def f16x2rr :
33406c3fb27SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst),
33506c3fb27SDimitry Andric               (ins Int32Regs:$a, Int32Regs:$b),
33604eeddc0SDimitry Andric               !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"),
33706c3fb27SDimitry Andric               [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
33804eeddc0SDimitry Andric               Requires<[useFP16Math]>;
33906c3fb27SDimitry Andric   def bf16rr_ftz :
34006c3fb27SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst),
34106c3fb27SDimitry Andric               (ins Int16Regs:$a, Int16Regs:$b),
34206c3fb27SDimitry Andric               !strconcat(OpcStr, ".ftz.bf16 \t$dst, $a, $b;"),
34306c3fb27SDimitry Andric               [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>,
34406c3fb27SDimitry Andric               Requires<[hasBF16Math, doF32FTZ]>;
34506c3fb27SDimitry Andric   def bf16rr :
34606c3fb27SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst),
34706c3fb27SDimitry Andric               (ins Int16Regs:$a, Int16Regs:$b),
34806c3fb27SDimitry Andric               !strconcat(OpcStr, ".bf16 \t$dst, $a, $b;"),
34906c3fb27SDimitry Andric               [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>,
35006c3fb27SDimitry Andric               Requires<[hasBF16Math]>;
35106c3fb27SDimitry Andric
35206c3fb27SDimitry Andric   def bf16x2rr_ftz :
35306c3fb27SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst),
35406c3fb27SDimitry Andric               (ins Int32Regs:$a, Int32Regs:$b),
35506c3fb27SDimitry Andric               !strconcat(OpcStr, ".ftz.bf16x2 \t$dst, $a, $b;"),
35606c3fb27SDimitry Andric               [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>,
35706c3fb27SDimitry Andric               Requires<[hasBF16Math, doF32FTZ]>;
35806c3fb27SDimitry Andric   def bf16x2rr :
35906c3fb27SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst),
36006c3fb27SDimitry Andric               (ins Int32Regs:$a, Int32Regs:$b),
36106c3fb27SDimitry Andric               !strconcat(OpcStr, ".bf16x2 \t$dst, $a, $b;"),
36206c3fb27SDimitry Andric               [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>,
36306c3fb27SDimitry Andric               Requires<[hasBF16Math]>;
3640b57cec5SDimitry Andric}
3650b57cec5SDimitry Andric
3660b57cec5SDimitry Andric// Template for instructions which take three FP args.  The
3670b57cec5SDimitry Andric// instructions are named "<OpcStr>.f<Width>" (e.g. "add.f64").
3680b57cec5SDimitry Andric//
3690b57cec5SDimitry Andric// Also defines ftz (flush subnormal inputs and results to sign-preserving
3700b57cec5SDimitry Andric// zero) variants for fp32/fp16 functions.
3710b57cec5SDimitry Andric//
3720b57cec5SDimitry Andric// This multiclass should be used for nodes that can be folded to make fma ops.
3730b57cec5SDimitry Andric// In this case, we use the ".rn" variant when FMA is disabled, as this behaves
3740b57cec5SDimitry Andric// just like the non ".rn" op, but prevents ptxas from creating FMAs.
3750b57cec5SDimitry Andricmulticlass F3_fma_component<string OpcStr, SDNode OpNode> {
3760b57cec5SDimitry Andric   def f64rr :
3770b57cec5SDimitry Andric     NVPTXInst<(outs Float64Regs:$dst),
3780b57cec5SDimitry Andric               (ins Float64Regs:$a, Float64Regs:$b),
3790b57cec5SDimitry Andric               !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
3800b57cec5SDimitry Andric               [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>,
3810b57cec5SDimitry Andric               Requires<[allowFMA]>;
3820b57cec5SDimitry Andric   def f64ri :
3830b57cec5SDimitry Andric     NVPTXInst<(outs Float64Regs:$dst),
3840b57cec5SDimitry Andric               (ins Float64Regs:$a, f64imm:$b),
3850b57cec5SDimitry Andric               !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
3860b57cec5SDimitry Andric               [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>,
3870b57cec5SDimitry Andric               Requires<[allowFMA]>;
3880b57cec5SDimitry Andric   def f32rr_ftz :
3890b57cec5SDimitry Andric     NVPTXInst<(outs Float32Regs:$dst),
3900b57cec5SDimitry Andric               (ins Float32Regs:$a, Float32Regs:$b),
3910b57cec5SDimitry Andric               !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
3920b57cec5SDimitry Andric               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
3930b57cec5SDimitry Andric               Requires<[allowFMA, doF32FTZ]>;
3940b57cec5SDimitry Andric   def f32ri_ftz :
3950b57cec5SDimitry Andric     NVPTXInst<(outs Float32Regs:$dst),
3960b57cec5SDimitry Andric               (ins Float32Regs:$a, f32imm:$b),
3970b57cec5SDimitry Andric               !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
3980b57cec5SDimitry Andric               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
3990b57cec5SDimitry Andric               Requires<[allowFMA, doF32FTZ]>;
4000b57cec5SDimitry Andric   def f32rr :
4010b57cec5SDimitry Andric     NVPTXInst<(outs Float32Regs:$dst),
4020b57cec5SDimitry Andric               (ins Float32Regs:$a, Float32Regs:$b),
4030b57cec5SDimitry Andric               !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
4040b57cec5SDimitry Andric               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
4050b57cec5SDimitry Andric               Requires<[allowFMA]>;
4060b57cec5SDimitry Andric   def f32ri :
4070b57cec5SDimitry Andric     NVPTXInst<(outs Float32Regs:$dst),
4080b57cec5SDimitry Andric               (ins Float32Regs:$a, f32imm:$b),
4090b57cec5SDimitry Andric               !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
4100b57cec5SDimitry Andric               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
4110b57cec5SDimitry Andric               Requires<[allowFMA]>;
4120b57cec5SDimitry Andric
4130b57cec5SDimitry Andric   def f16rr_ftz :
41406c3fb27SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst),
41506c3fb27SDimitry Andric               (ins Int16Regs:$a, Int16Regs:$b),
4160b57cec5SDimitry Andric               !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"),
41706c3fb27SDimitry Andric               [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
4180b57cec5SDimitry Andric               Requires<[useFP16Math, allowFMA, doF32FTZ]>;
4190b57cec5SDimitry Andric   def f16rr :
42006c3fb27SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst),
42106c3fb27SDimitry Andric               (ins Int16Regs:$a, Int16Regs:$b),
4220b57cec5SDimitry Andric               !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"),
42306c3fb27SDimitry Andric               [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
4240b57cec5SDimitry Andric               Requires<[useFP16Math, allowFMA]>;
4250b57cec5SDimitry Andric
4260b57cec5SDimitry Andric   def f16x2rr_ftz :
42706c3fb27SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst),
42806c3fb27SDimitry Andric               (ins Int32Regs:$a, Int32Regs:$b),
4290b57cec5SDimitry Andric               !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"),
43006c3fb27SDimitry Andric               [(set (v2f16 Int32Regs:$dst), (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
4310b57cec5SDimitry Andric               Requires<[useFP16Math, allowFMA, doF32FTZ]>;
4320b57cec5SDimitry Andric   def f16x2rr :
43306c3fb27SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst),
43406c3fb27SDimitry Andric               (ins Int32Regs:$a, Int32Regs:$b),
4350b57cec5SDimitry Andric               !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"),
43606c3fb27SDimitry Andric               [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
4370b57cec5SDimitry Andric               Requires<[useFP16Math, allowFMA]>;
43806c3fb27SDimitry Andric   def bf16rr_ftz :
43906c3fb27SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst),
44006c3fb27SDimitry Andric               (ins Int16Regs:$a, Int16Regs:$b),
44106c3fb27SDimitry Andric               !strconcat(OpcStr, ".ftz.bf16 \t$dst, $a, $b;"),
44206c3fb27SDimitry Andric               [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>,
44306c3fb27SDimitry Andric               Requires<[hasBF16Math, allowFMA, doF32FTZ]>;
44406c3fb27SDimitry Andric   def bf16rr :
44506c3fb27SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst),
44606c3fb27SDimitry Andric               (ins Int16Regs:$a, Int16Regs:$b),
44706c3fb27SDimitry Andric               !strconcat(OpcStr, ".bf16 \t$dst, $a, $b;"),
44806c3fb27SDimitry Andric               [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>,
44906c3fb27SDimitry Andric               Requires<[hasBF16Math, allowFMA]>;
4500b57cec5SDimitry Andric
45106c3fb27SDimitry Andric   def bf16x2rr_ftz :
45206c3fb27SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst),
45306c3fb27SDimitry Andric               (ins Int32Regs:$a, Int32Regs:$b),
45406c3fb27SDimitry Andric               !strconcat(OpcStr, ".ftz.bf16x2 \t$dst, $a, $b;"),
45506c3fb27SDimitry Andric               [(set (v2bf16 Int32Regs:$dst), (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>,
45606c3fb27SDimitry Andric               Requires<[hasBF16Math, allowFMA, doF32FTZ]>;
45706c3fb27SDimitry Andric   def bf16x2rr :
45806c3fb27SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst),
45906c3fb27SDimitry Andric               (ins Int32Regs:$a, Int32Regs:$b),
46006c3fb27SDimitry Andric               !strconcat(OpcStr, ".bf16x2 \t$dst, $a, $b;"),
46106c3fb27SDimitry Andric               [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>,
46206c3fb27SDimitry Andric               Requires<[hasBF16Math, allowFMA]>;
4630b57cec5SDimitry Andric   // These have strange names so we don't perturb existing mir tests.
4640b57cec5SDimitry Andric   def _rnf64rr :
4650b57cec5SDimitry Andric     NVPTXInst<(outs Float64Regs:$dst),
4660b57cec5SDimitry Andric               (ins Float64Regs:$a, Float64Regs:$b),
4670b57cec5SDimitry Andric               !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
4680b57cec5SDimitry Andric               [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>,
4690b57cec5SDimitry Andric               Requires<[noFMA]>;
4700b57cec5SDimitry Andric   def _rnf64ri :
4710b57cec5SDimitry Andric     NVPTXInst<(outs Float64Regs:$dst),
4720b57cec5SDimitry Andric               (ins Float64Regs:$a, f64imm:$b),
4730b57cec5SDimitry Andric               !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
4740b57cec5SDimitry Andric               [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>,
4750b57cec5SDimitry Andric               Requires<[noFMA]>;
4760b57cec5SDimitry Andric   def _rnf32rr_ftz :
4770b57cec5SDimitry Andric     NVPTXInst<(outs Float32Regs:$dst),
4780b57cec5SDimitry Andric               (ins Float32Regs:$a, Float32Regs:$b),
4790b57cec5SDimitry Andric               !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
4800b57cec5SDimitry Andric               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
4810b57cec5SDimitry Andric               Requires<[noFMA, doF32FTZ]>;
4820b57cec5SDimitry Andric   def _rnf32ri_ftz :
4830b57cec5SDimitry Andric     NVPTXInst<(outs Float32Regs:$dst),
4840b57cec5SDimitry Andric               (ins Float32Regs:$a, f32imm:$b),
4850b57cec5SDimitry Andric               !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
4860b57cec5SDimitry Andric               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
4870b57cec5SDimitry Andric               Requires<[noFMA, doF32FTZ]>;
4880b57cec5SDimitry Andric   def _rnf32rr :
4890b57cec5SDimitry Andric     NVPTXInst<(outs Float32Regs:$dst),
4900b57cec5SDimitry Andric               (ins Float32Regs:$a, Float32Regs:$b),
4910b57cec5SDimitry Andric               !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
4920b57cec5SDimitry Andric               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
4930b57cec5SDimitry Andric               Requires<[noFMA]>;
4940b57cec5SDimitry Andric   def _rnf32ri :
4950b57cec5SDimitry Andric     NVPTXInst<(outs Float32Regs:$dst),
4960b57cec5SDimitry Andric               (ins Float32Regs:$a, f32imm:$b),
4970b57cec5SDimitry Andric               !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
4980b57cec5SDimitry Andric               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
4990b57cec5SDimitry Andric               Requires<[noFMA]>;
5000b57cec5SDimitry Andric   def _rnf16rr_ftz :
50106c3fb27SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst),
50206c3fb27SDimitry Andric               (ins Int16Regs:$a, Int16Regs:$b),
5030b57cec5SDimitry Andric               !strconcat(OpcStr, ".rn.ftz.f16 \t$dst, $a, $b;"),
50406c3fb27SDimitry Andric               [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
5050b57cec5SDimitry Andric               Requires<[useFP16Math, noFMA, doF32FTZ]>;
5060b57cec5SDimitry Andric   def _rnf16rr :
50706c3fb27SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst),
50806c3fb27SDimitry Andric               (ins Int16Regs:$a, Int16Regs:$b),
5090b57cec5SDimitry Andric               !strconcat(OpcStr, ".rn.f16 \t$dst, $a, $b;"),
51006c3fb27SDimitry Andric               [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
5110b57cec5SDimitry Andric               Requires<[useFP16Math, noFMA]>;
5120b57cec5SDimitry Andric   def _rnf16x2rr_ftz :
51306c3fb27SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst),
51406c3fb27SDimitry Andric               (ins Int32Regs:$a, Int32Regs:$b),
5150b57cec5SDimitry Andric               !strconcat(OpcStr, ".rn.ftz.f16x2 \t$dst, $a, $b;"),
51606c3fb27SDimitry Andric               [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
5170b57cec5SDimitry Andric               Requires<[useFP16Math, noFMA, doF32FTZ]>;
5180b57cec5SDimitry Andric   def _rnf16x2rr :
51906c3fb27SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst),
52006c3fb27SDimitry Andric               (ins Int32Regs:$a, Int32Regs:$b),
5210b57cec5SDimitry Andric               !strconcat(OpcStr, ".rn.f16x2 \t$dst, $a, $b;"),
52206c3fb27SDimitry Andric               [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
5230b57cec5SDimitry Andric               Requires<[useFP16Math, noFMA]>;
52406c3fb27SDimitry Andric  def _rnbf16rr_ftz :
52506c3fb27SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst),
52606c3fb27SDimitry Andric               (ins Int16Regs:$a, Int16Regs:$b),
52706c3fb27SDimitry Andric               !strconcat(OpcStr, ".rn.ftz.bf16 \t$dst, $a, $b;"),
52806c3fb27SDimitry Andric               [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>,
52906c3fb27SDimitry Andric               Requires<[hasBF16Math, noFMA, doF32FTZ]>;
53006c3fb27SDimitry Andric   def _rnbf16rr :
53106c3fb27SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst),
53206c3fb27SDimitry Andric               (ins Int16Regs:$a, Int16Regs:$b),
53306c3fb27SDimitry Andric               !strconcat(OpcStr, ".rn.bf16 \t$dst, $a, $b;"),
53406c3fb27SDimitry Andric               [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>,
53506c3fb27SDimitry Andric               Requires<[hasBF16Math, noFMA]>;
53606c3fb27SDimitry Andric   def _rnbf16x2rr_ftz :
53706c3fb27SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst),
53806c3fb27SDimitry Andric               (ins Int32Regs:$a, Int32Regs:$b),
53906c3fb27SDimitry Andric               !strconcat(OpcStr, ".rn.ftz.bf16x2 \t$dst, $a, $b;"),
54006c3fb27SDimitry Andric               [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>,
54106c3fb27SDimitry Andric               Requires<[hasBF16Math, noFMA, doF32FTZ]>;
54206c3fb27SDimitry Andric   def _rnbf16x2rr :
54306c3fb27SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst),
54406c3fb27SDimitry Andric               (ins Int32Regs:$a, Int32Regs:$b),
54506c3fb27SDimitry Andric               !strconcat(OpcStr, ".rn.bf16x2 \t$dst, $a, $b;"),
54606c3fb27SDimitry Andric               [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>,
54706c3fb27SDimitry Andric               Requires<[hasBF16Math, noFMA]>;
5480b57cec5SDimitry Andric}
5490b57cec5SDimitry Andric
5500b57cec5SDimitry Andric// Template for operations which take two f32 or f64 operands.  Provides three
5510b57cec5SDimitry Andric// instructions: <OpcStr>.f64, <OpcStr>.f32, and <OpcStr>.ftz.f32 (flush
5520b57cec5SDimitry Andric// subnormal inputs and results to zero).
5530b57cec5SDimitry Andricmulticlass F2<string OpcStr, SDNode OpNode> {
5540b57cec5SDimitry Andric   def f64 :     NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
5550b57cec5SDimitry Andric                           !strconcat(OpcStr, ".f64 \t$dst, $a;"),
5560b57cec5SDimitry Andric                           [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
5570b57cec5SDimitry Andric   def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
5580b57cec5SDimitry Andric                           !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
5590b57cec5SDimitry Andric                           [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
5600b57cec5SDimitry Andric                           Requires<[doF32FTZ]>;
5610b57cec5SDimitry Andric   def f32 :     NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
5620b57cec5SDimitry Andric                           !strconcat(OpcStr, ".f32 \t$dst, $a;"),
5630b57cec5SDimitry Andric                           [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
5640b57cec5SDimitry Andric}
5650b57cec5SDimitry Andric
5665f757f3fSDimitry Andricmulticlass F2_Support_Half<string OpcStr, SDNode OpNode> {
5675f757f3fSDimitry Andric   def bf16 :      NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a),
5685f757f3fSDimitry Andric                           !strconcat(OpcStr, ".bf16 \t$dst, $a;"),
5695f757f3fSDimitry Andric                           [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a)))]>,
5705f757f3fSDimitry Andric                           Requires<[hasSM<80>, hasPTX<70>]>;
5715f757f3fSDimitry Andric   def bf16x2 :    NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
5725f757f3fSDimitry Andric                           !strconcat(OpcStr, ".bf16x2 \t$dst, $a;"),
5735f757f3fSDimitry Andric                           [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a)))]>,
5745f757f3fSDimitry Andric                           Requires<[hasSM<80>, hasPTX<70>]>;
5755f757f3fSDimitry Andric   def f16_ftz :   NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a),
5765f757f3fSDimitry Andric                           !strconcat(OpcStr, ".ftz.f16 \t$dst, $a;"),
5775f757f3fSDimitry Andric                           [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a)))]>,
5785f757f3fSDimitry Andric                           Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>;
5795f757f3fSDimitry Andric   def f16x2_ftz : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
5805f757f3fSDimitry Andric                           !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a;"),
5815f757f3fSDimitry Andric                           [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a)))]>,
5825f757f3fSDimitry Andric                           Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>;
5835f757f3fSDimitry Andric   def f16 :       NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a),
5845f757f3fSDimitry Andric                           !strconcat(OpcStr, ".f16 \t$dst, $a;"),
5855f757f3fSDimitry Andric                           [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a)))]>,
5865f757f3fSDimitry Andric                           Requires<[hasSM<53>, hasPTX<65>]>;
5875f757f3fSDimitry Andric   def f16x2 :     NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
5885f757f3fSDimitry Andric                           !strconcat(OpcStr, ".f16x2 \t$dst, $a;"),
5895f757f3fSDimitry Andric                           [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a)))]>,
5905f757f3fSDimitry Andric                           Requires<[hasSM<53>, hasPTX<65>]>;
5915f757f3fSDimitry Andric
5925f757f3fSDimitry Andric}
5935f757f3fSDimitry Andric
5940b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5950b57cec5SDimitry Andric// NVPTX Instructions.
5960b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5970b57cec5SDimitry Andric
5980b57cec5SDimitry Andric//-----------------------------------
5990b57cec5SDimitry Andric// Type Conversion
6000b57cec5SDimitry Andric//-----------------------------------
6010b57cec5SDimitry Andric
602e8d8bef9SDimitry Andriclet hasSideEffects = false in {
6030b57cec5SDimitry Andric  // Generate a cvt to the given type from all possible types.  Each instance
6040b57cec5SDimitry Andric  // takes a CvtMode immediate that defines the conversion mode to use.  It can
6050b57cec5SDimitry Andric  // be CvtNONE to omit a conversion mode.
60606c3fb27SDimitry Andric  multiclass CVT_FROM_ALL<string ToType, RegisterClass RC, list<Predicate> Preds = []> {
6070b57cec5SDimitry Andric    def _s8 :
6080b57cec5SDimitry Andric      NVPTXInst<(outs RC:$dst),
6090b57cec5SDimitry Andric                (ins Int16Regs:$src, CvtMode:$mode),
6100b57cec5SDimitry Andric                !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
61106c3fb27SDimitry Andric                ToType, ".s8 \t$dst, $src;"), []>,
61206c3fb27SDimitry Andric      Requires<Preds>;
6130b57cec5SDimitry Andric    def _u8 :
6140b57cec5SDimitry Andric      NVPTXInst<(outs RC:$dst),
6150b57cec5SDimitry Andric                (ins Int16Regs:$src, CvtMode:$mode),
6160b57cec5SDimitry Andric                !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
61706c3fb27SDimitry Andric                ToType, ".u8 \t$dst, $src;"), []>,
61806c3fb27SDimitry Andric      Requires<Preds>;
6190b57cec5SDimitry Andric    def _s16 :
6200b57cec5SDimitry Andric      NVPTXInst<(outs RC:$dst),
6210b57cec5SDimitry Andric                (ins Int16Regs:$src, CvtMode:$mode),
6220b57cec5SDimitry Andric                !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
62306c3fb27SDimitry Andric                ToType, ".s16 \t$dst, $src;"), []>,
62406c3fb27SDimitry Andric      Requires<Preds>;
6250b57cec5SDimitry Andric    def _u16 :
6260b57cec5SDimitry Andric      NVPTXInst<(outs RC:$dst),
6270b57cec5SDimitry Andric                (ins Int16Regs:$src, CvtMode:$mode),
6280b57cec5SDimitry Andric                !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
62906c3fb27SDimitry Andric                ToType, ".u16 \t$dst, $src;"), []>,
63006c3fb27SDimitry Andric      Requires<Preds>;
6310b57cec5SDimitry Andric    def _s32 :
6320b57cec5SDimitry Andric      NVPTXInst<(outs RC:$dst),
6330b57cec5SDimitry Andric                (ins Int32Regs:$src, CvtMode:$mode),
6340b57cec5SDimitry Andric                !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
63506c3fb27SDimitry Andric                ToType, ".s32 \t$dst, $src;"), []>,
63606c3fb27SDimitry Andric      Requires<Preds>;
6370b57cec5SDimitry Andric    def _u32 :
6380b57cec5SDimitry Andric      NVPTXInst<(outs RC:$dst),
6390b57cec5SDimitry Andric                (ins Int32Regs:$src, CvtMode:$mode),
6400b57cec5SDimitry Andric                !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
64106c3fb27SDimitry Andric                ToType, ".u32 \t$dst, $src;"), []>,
64206c3fb27SDimitry Andric      Requires<Preds>;
6430b57cec5SDimitry Andric    def _s64 :
6440b57cec5SDimitry Andric      NVPTXInst<(outs RC:$dst),
6450b57cec5SDimitry Andric                (ins Int64Regs:$src, CvtMode:$mode),
6460b57cec5SDimitry Andric                !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
64706c3fb27SDimitry Andric                ToType, ".s64 \t$dst, $src;"), []>,
64806c3fb27SDimitry Andric      Requires<Preds>;
6490b57cec5SDimitry Andric    def _u64 :
6500b57cec5SDimitry Andric      NVPTXInst<(outs RC:$dst),
6510b57cec5SDimitry Andric                (ins Int64Regs:$src, CvtMode:$mode),
6520b57cec5SDimitry Andric                !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
65306c3fb27SDimitry Andric                ToType, ".u64 \t$dst, $src;"), []>,
65406c3fb27SDimitry Andric      Requires<Preds>;
6550b57cec5SDimitry Andric    def _f16 :
6560b57cec5SDimitry Andric      NVPTXInst<(outs RC:$dst),
65706c3fb27SDimitry Andric                (ins Int16Regs:$src, CvtMode:$mode),
6580b57cec5SDimitry Andric                !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
65906c3fb27SDimitry Andric                ToType, ".f16 \t$dst, $src;"), []>,
66006c3fb27SDimitry Andric      Requires<Preds>;
66106c3fb27SDimitry Andric    def _bf16 :
66206c3fb27SDimitry Andric      NVPTXInst<(outs RC:$dst),
66306c3fb27SDimitry Andric                (ins Int16Regs:$src, CvtMode:$mode),
66406c3fb27SDimitry Andric                !strconcat("cvt${mode:base}${mode:ftz}${mode:relu}${mode:sat}.",
66506c3fb27SDimitry Andric                ToType, ".bf16 \t$dst, $src;"), []>,
66606c3fb27SDimitry Andric      Requires<!if(!eq(ToType, "f32"),
66706c3fb27SDimitry Andric                   // bf16->f32 was introduced early.
66806c3fb27SDimitry Andric                   [hasPTX<71>, hasSM<80>],
66906c3fb27SDimitry Andric                   // bf16->everything else needs sm90/ptx78
67006c3fb27SDimitry Andric                   [hasPTX<78>, hasSM<90>])>;
6710b57cec5SDimitry Andric    def _f32 :
6720b57cec5SDimitry Andric      NVPTXInst<(outs RC:$dst),
6730b57cec5SDimitry Andric                (ins Float32Regs:$src, CvtMode:$mode),
67406c3fb27SDimitry Andric                !strconcat("cvt${mode:base}${mode:ftz}${mode:relu}${mode:sat}.",
67506c3fb27SDimitry Andric                ToType, ".f32 \t$dst, $src;"), []>,
67606c3fb27SDimitry Andric      Requires<!if(!eq(ToType, "bf16"),
67706c3fb27SDimitry Andric                   // f32->bf16 was introduced early.
67806c3fb27SDimitry Andric                   [hasPTX<70>, hasSM<80>],
67906c3fb27SDimitry Andric                   Preds)>;
6800b57cec5SDimitry Andric    def _f64 :
6810b57cec5SDimitry Andric      NVPTXInst<(outs RC:$dst),
6820b57cec5SDimitry Andric                (ins Float64Regs:$src, CvtMode:$mode),
6830b57cec5SDimitry Andric                !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
68406c3fb27SDimitry Andric                ToType, ".f64 \t$dst, $src;"), []>,
68506c3fb27SDimitry Andric      Requires<Preds>;
6860b57cec5SDimitry Andric  }
6870b57cec5SDimitry Andric
6880b57cec5SDimitry Andric  // Generate cvts from all types to all types.
6890b57cec5SDimitry Andric  defm CVT_s8  : CVT_FROM_ALL<"s8",  Int16Regs>;
6900b57cec5SDimitry Andric  defm CVT_u8  : CVT_FROM_ALL<"u8",  Int16Regs>;
6910b57cec5SDimitry Andric  defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
6920b57cec5SDimitry Andric  defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
6930b57cec5SDimitry Andric  defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>;
6940b57cec5SDimitry Andric  defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
6950b57cec5SDimitry Andric  defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
6960b57cec5SDimitry Andric  defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
69706c3fb27SDimitry Andric  defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
69806c3fb27SDimitry Andric  defm CVT_bf16 : CVT_FROM_ALL<"bf16", Int16Regs, [hasPTX<78>, hasSM<90>]>;
6990b57cec5SDimitry Andric  defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
7000b57cec5SDimitry Andric  defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
7010b57cec5SDimitry Andric
7020b57cec5SDimitry Andric  // These cvts are different from those above: The source and dest registers
7030b57cec5SDimitry Andric  // are of the same type.
7040b57cec5SDimitry Andric  def CVT_INREG_s16_s8 :  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
7050b57cec5SDimitry Andric                                    "cvt.s16.s8 \t$dst, $src;", []>;
7060b57cec5SDimitry Andric  def CVT_INREG_s32_s8 :  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
7070b57cec5SDimitry Andric                                    "cvt.s32.s8 \t$dst, $src;", []>;
7080b57cec5SDimitry Andric  def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
7090b57cec5SDimitry Andric                                    "cvt.s32.s16 \t$dst, $src;", []>;
7100b57cec5SDimitry Andric  def CVT_INREG_s64_s8 :  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
7110b57cec5SDimitry Andric                                    "cvt.s64.s8 \t$dst, $src;", []>;
7120b57cec5SDimitry Andric  def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
7130b57cec5SDimitry Andric                                    "cvt.s64.s16 \t$dst, $src;", []>;
7140b57cec5SDimitry Andric  def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
7150b57cec5SDimitry Andric                                    "cvt.s64.s32 \t$dst, $src;", []>;
71604eeddc0SDimitry Andric
71704eeddc0SDimitry Andric  multiclass CVT_FROM_FLOAT_V2_SM80<string FromName, RegisterClass RC> {
71804eeddc0SDimitry Andric    def _f32 :
71904eeddc0SDimitry Andric      NVPTXInst<(outs RC:$dst),
72004eeddc0SDimitry Andric                (ins Float32Regs:$src1, Float32Regs:$src2,  CvtMode:$mode),
72104eeddc0SDimitry Andric                !strconcat("cvt${mode:base}${mode:relu}.",
72204eeddc0SDimitry Andric                FromName, ".f32 \t$dst, $src1, $src2;"), []>,
72306c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
72404eeddc0SDimitry Andric  }
72504eeddc0SDimitry Andric
72606c3fb27SDimitry Andric  defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", Int32Regs>;
72704eeddc0SDimitry Andric  defm CVT_bf16x2 : CVT_FROM_FLOAT_V2_SM80<"bf16x2", Int32Regs>;
7280b57cec5SDimitry Andric}
7290b57cec5SDimitry Andric
7300b57cec5SDimitry Andric//-----------------------------------
731bdd1243dSDimitry Andric// Selection instructions (selp)
732bdd1243dSDimitry Andric//-----------------------------------
733bdd1243dSDimitry Andric
734bdd1243dSDimitry Andric// TODO: Missing slct
735bdd1243dSDimitry Andric
736bdd1243dSDimitry Andric// selp instructions that don't have any pattern matches; we explicitly use
737bdd1243dSDimitry Andric// them within this file.
738bdd1243dSDimitry Andriclet hasSideEffects = false in {
739bdd1243dSDimitry Andric  multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
740bdd1243dSDimitry Andric    def rr : NVPTXInst<(outs RC:$dst),
741bdd1243dSDimitry Andric                       (ins RC:$a, RC:$b, Int1Regs:$p),
742bdd1243dSDimitry Andric                       !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
743bdd1243dSDimitry Andric    def ri : NVPTXInst<(outs RC:$dst),
744bdd1243dSDimitry Andric                       (ins RC:$a, ImmCls:$b, Int1Regs:$p),
745bdd1243dSDimitry Andric                       !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
746bdd1243dSDimitry Andric    def ir : NVPTXInst<(outs RC:$dst),
747bdd1243dSDimitry Andric                       (ins ImmCls:$a, RC:$b, Int1Regs:$p),
748bdd1243dSDimitry Andric                       !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
749bdd1243dSDimitry Andric    def ii : NVPTXInst<(outs RC:$dst),
750bdd1243dSDimitry Andric                       (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
751bdd1243dSDimitry Andric                       !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
752bdd1243dSDimitry Andric  }
753bdd1243dSDimitry Andric
754bdd1243dSDimitry Andric  multiclass SELP_PATTERN<string TypeStr, ValueType T, RegisterClass RC,
755bdd1243dSDimitry Andric                          Operand ImmCls, SDNode ImmNode> {
756bdd1243dSDimitry Andric    def rr :
757bdd1243dSDimitry Andric      NVPTXInst<(outs RC:$dst),
758bdd1243dSDimitry Andric                (ins RC:$a, RC:$b, Int1Regs:$p),
759bdd1243dSDimitry Andric                !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
760bdd1243dSDimitry Andric                [(set (T RC:$dst), (select Int1Regs:$p, (T RC:$a), (T RC:$b)))]>;
761bdd1243dSDimitry Andric    def ri :
762bdd1243dSDimitry Andric      NVPTXInst<(outs RC:$dst),
763bdd1243dSDimitry Andric                (ins RC:$a, ImmCls:$b, Int1Regs:$p),
764bdd1243dSDimitry Andric                !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
765bdd1243dSDimitry Andric                [(set (T RC:$dst), (select Int1Regs:$p, (T RC:$a), (T ImmNode:$b)))]>;
766bdd1243dSDimitry Andric    def ir :
767bdd1243dSDimitry Andric      NVPTXInst<(outs RC:$dst),
768bdd1243dSDimitry Andric                (ins ImmCls:$a, RC:$b, Int1Regs:$p),
769bdd1243dSDimitry Andric                !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
770bdd1243dSDimitry Andric                [(set (T RC:$dst), (select Int1Regs:$p, ImmNode:$a, (T RC:$b)))]>;
771bdd1243dSDimitry Andric    def ii :
772bdd1243dSDimitry Andric      NVPTXInst<(outs RC:$dst),
773bdd1243dSDimitry Andric                (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
774bdd1243dSDimitry Andric                !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
775bdd1243dSDimitry Andric                [(set (T RC:$dst), (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>;
776bdd1243dSDimitry Andric  }
777bdd1243dSDimitry Andric}
778bdd1243dSDimitry Andric
779bdd1243dSDimitry Andric// Don't pattern match on selp.{s,u}{16,32,64} -- selp.b{16,32,64} is just as
780bdd1243dSDimitry Andric// good.
781bdd1243dSDimitry Andricdefm SELP_b16 : SELP_PATTERN<"b16", i16, Int16Regs, i16imm, imm>;
782bdd1243dSDimitry Andricdefm SELP_s16 : SELP<"s16", Int16Regs, i16imm>;
783bdd1243dSDimitry Andricdefm SELP_u16 : SELP<"u16", Int16Regs, i16imm>;
784bdd1243dSDimitry Andricdefm SELP_b32 : SELP_PATTERN<"b32", i32, Int32Regs, i32imm, imm>;
785bdd1243dSDimitry Andricdefm SELP_s32 : SELP<"s32", Int32Regs, i32imm>;
786bdd1243dSDimitry Andricdefm SELP_u32 : SELP<"u32", Int32Regs, i32imm>;
787bdd1243dSDimitry Andricdefm SELP_b64 : SELP_PATTERN<"b64", i64, Int64Regs, i64imm, imm>;
788bdd1243dSDimitry Andricdefm SELP_s64 : SELP<"s64", Int64Regs, i64imm>;
789bdd1243dSDimitry Andricdefm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
79006c3fb27SDimitry Andricdefm SELP_f16 : SELP_PATTERN<"b16", f16, Int16Regs, f16imm, fpimm>;
79106c3fb27SDimitry Andricdefm SELP_bf16 : SELP_PATTERN<"b16", bf16, Int16Regs, bf16imm, fpimm>;
792bdd1243dSDimitry Andric
793bdd1243dSDimitry Andricdefm SELP_f32 : SELP_PATTERN<"f32", f32, Float32Regs, f32imm, fpimm>;
794bdd1243dSDimitry Andricdefm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>;
795bdd1243dSDimitry Andric
796bdd1243dSDimitry Andric// This does not work as tablegen fails to infer the type of 'imm'.
797bdd1243dSDimitry Andric// def v2f16imm : Operand<v2f16>;
79806c3fb27SDimitry Andric// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Int32Regs, v2f16imm, imm>;
799bdd1243dSDimitry Andric
8005f757f3fSDimitry Andricforeach vt = [v2f16, v2bf16, v2i16, v4i8] in {
8015f757f3fSDimitry Andricdef : Pat<(vt (select Int1Regs:$p, (vt Int32Regs:$a), (vt Int32Regs:$b))),
8025f757f3fSDimitry Andric          (SELP_b32rr Int32Regs:$a, Int32Regs:$b, Int1Regs:$p)>;
8035f757f3fSDimitry Andric}
804bdd1243dSDimitry Andric
805bdd1243dSDimitry Andric//-----------------------------------
806bdd1243dSDimitry Andric// Test Instructions
807bdd1243dSDimitry Andric//-----------------------------------
808bdd1243dSDimitry Andric
809bdd1243dSDimitry Andricdef TESTINF_f32r : NVPTXInst<(outs Int1Regs:$p), (ins Float32Regs:$a),
810bdd1243dSDimitry Andric                             "testp.infinite.f32 \t$p, $a;",
811bdd1243dSDimitry Andric                             []>;
812bdd1243dSDimitry Andricdef TESTINF_f32i : NVPTXInst<(outs Int1Regs:$p), (ins f32imm:$a),
813bdd1243dSDimitry Andric                             "testp.infinite.f32 \t$p, $a;",
814bdd1243dSDimitry Andric                             []>;
815bdd1243dSDimitry Andricdef TESTINF_f64r : NVPTXInst<(outs Int1Regs:$p), (ins Float64Regs:$a),
816bdd1243dSDimitry Andric                             "testp.infinite.f64 \t$p, $a;",
817bdd1243dSDimitry Andric                             []>;
818bdd1243dSDimitry Andricdef TESTINF_f64i : NVPTXInst<(outs Int1Regs:$p), (ins f64imm:$a),
819bdd1243dSDimitry Andric                             "testp.infinite.f64 \t$p, $a;",
820bdd1243dSDimitry Andric                             []>;
821bdd1243dSDimitry Andric
822bdd1243dSDimitry Andric//-----------------------------------
8230b57cec5SDimitry Andric// Integer Arithmetic
8240b57cec5SDimitry Andric//-----------------------------------
8250b57cec5SDimitry Andric
8260b57cec5SDimitry Andric// Template for xor masquerading as int1 arithmetic.
8270b57cec5SDimitry Andricmulticlass ADD_SUB_i1<SDNode OpNode> {
8280b57cec5SDimitry Andric   def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
8290b57cec5SDimitry Andric                      "xor.pred \t$dst, $a, $b;",
8300b57cec5SDimitry Andric                      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
8310b57cec5SDimitry Andric   def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
8320b57cec5SDimitry Andric                      "xor.pred \t$dst, $a, $b;",
8330b57cec5SDimitry Andric                      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
8340b57cec5SDimitry Andric}
8350b57cec5SDimitry Andric
8360b57cec5SDimitry Andric// int1 addition and subtraction are both just xor.
8370b57cec5SDimitry Andricdefm ADD_i1 : ADD_SUB_i1<add>;
8380b57cec5SDimitry Andricdefm SUB_i1 : ADD_SUB_i1<sub>;
8390b57cec5SDimitry Andric
8400b57cec5SDimitry Andric// int16, int32, and int64 signed addition.  Since nvptx is 2's complement, we
8410b57cec5SDimitry Andric// also use these for unsigned arithmetic.
8420b57cec5SDimitry Andricdefm ADD : I3<"add.s", add>;
8430b57cec5SDimitry Andricdefm SUB : I3<"sub.s", sub>;
8440b57cec5SDimitry Andric
8455f757f3fSDimitry Andricdef ADD16x2 : I16x2<"add.s", add>;
8465f757f3fSDimitry Andric
84781ad6265SDimitry Andric// in32 and int64 addition and subtraction with carry-out.
84881ad6265SDimitry Andricdefm ADDCC : ADD_SUB_INT_CARRY<"add.cc", addc>;
84981ad6265SDimitry Andricdefm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>;
8500b57cec5SDimitry Andric
85181ad6265SDimitry Andric// int32 and int64 addition and subtraction with carry-in and carry-out.
85281ad6265SDimitry Andricdefm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde>;
85381ad6265SDimitry Andricdefm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube>;
8540b57cec5SDimitry Andric
8550b57cec5SDimitry Andricdefm MULT : I3<"mul.lo.s", mul>;
8560b57cec5SDimitry Andric
8570b57cec5SDimitry Andricdefm MULTHS : I3<"mul.hi.s", mulhs>;
8580b57cec5SDimitry Andricdefm MULTHU : I3<"mul.hi.u", mulhu>;
8590b57cec5SDimitry Andric
8600b57cec5SDimitry Andricdefm SDIV : I3<"div.s", sdiv>;
8610b57cec5SDimitry Andricdefm UDIV : I3<"div.u", udiv>;
8620b57cec5SDimitry Andric
8630b57cec5SDimitry Andric// The ri versions of rem.s and rem.u won't be selected; DAGCombiner::visitSREM
8640b57cec5SDimitry Andric// will lower it.
8650b57cec5SDimitry Andricdefm SREM : I3<"rem.s", srem>;
8660b57cec5SDimitry Andricdefm UREM : I3<"rem.u", urem>;
8670b57cec5SDimitry Andric
8680b57cec5SDimitry Andric// Integer absolute value.  NumBits should be one minus the bit width of RC.
8690b57cec5SDimitry Andric// This idiom implements the algorithm at
8700b57cec5SDimitry Andric// http://graphics.stanford.edu/~seander/bithacks.html#IntegerAbs.
8715f757f3fSDimitry Andricmulticlass ABS<ValueType T, RegisterClass RC, string SizeName> {
8720b57cec5SDimitry Andric  def : NVPTXInst<(outs RC:$dst), (ins RC:$a),
8730b57cec5SDimitry Andric                  !strconcat("abs", SizeName, " \t$dst, $a;"),
8745f757f3fSDimitry Andric                  [(set (T RC:$dst), (abs (T RC:$a)))]>;
8750b57cec5SDimitry Andric}
8765f757f3fSDimitry Andricdefm ABS_16 : ABS<i16, Int16Regs, ".s16">;
8775f757f3fSDimitry Andricdefm ABS_32 : ABS<i32, Int32Regs, ".s32">;
8785f757f3fSDimitry Andricdefm ABS_64 : ABS<i64, Int64Regs, ".s64">;
8790b57cec5SDimitry Andric
8800b57cec5SDimitry Andric// Integer min/max.
8810b57cec5SDimitry Andricdefm SMAX : I3<"max.s", smax>;
8820b57cec5SDimitry Andricdefm UMAX : I3<"max.u", umax>;
8830b57cec5SDimitry Andricdefm SMIN : I3<"min.s", smin>;
8840b57cec5SDimitry Andricdefm UMIN : I3<"min.u", umin>;
8850b57cec5SDimitry Andric
8865f757f3fSDimitry Andricdef SMAX16x2 : I16x2<"max.s", smax>;
8875f757f3fSDimitry Andricdef UMAX16x2 : I16x2<"max.u", umax>;
8885f757f3fSDimitry Andricdef SMIN16x2 : I16x2<"min.s", smin>;
8895f757f3fSDimitry Andricdef UMIN16x2 : I16x2<"min.u", umin>;
8905f757f3fSDimitry Andric
8915f757f3fSDimitry Andric
8920b57cec5SDimitry Andric//
8930b57cec5SDimitry Andric// Wide multiplication
8940b57cec5SDimitry Andric//
8950b57cec5SDimitry Andricdef MULWIDES64 :
8960b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
8970b57cec5SDimitry Andric            "mul.wide.s32 \t$dst, $a, $b;", []>;
8980b57cec5SDimitry Andricdef MULWIDES64Imm :
8990b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
9000b57cec5SDimitry Andric            "mul.wide.s32 \t$dst, $a, $b;", []>;
9010b57cec5SDimitry Andricdef MULWIDES64Imm64 :
9020b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
9030b57cec5SDimitry Andric            "mul.wide.s32 \t$dst, $a, $b;", []>;
9040b57cec5SDimitry Andric
9050b57cec5SDimitry Andricdef MULWIDEU64 :
9060b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
9070b57cec5SDimitry Andric            "mul.wide.u32 \t$dst, $a, $b;", []>;
9080b57cec5SDimitry Andricdef MULWIDEU64Imm :
9090b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
9100b57cec5SDimitry Andric            "mul.wide.u32 \t$dst, $a, $b;", []>;
9110b57cec5SDimitry Andricdef MULWIDEU64Imm64 :
9120b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
9130b57cec5SDimitry Andric            "mul.wide.u32 \t$dst, $a, $b;", []>;
9140b57cec5SDimitry Andric
9150b57cec5SDimitry Andricdef MULWIDES32 :
9160b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
9170b57cec5SDimitry Andric            "mul.wide.s16 \t$dst, $a, $b;", []>;
9180b57cec5SDimitry Andricdef MULWIDES32Imm :
9190b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
9200b57cec5SDimitry Andric            "mul.wide.s16 \t$dst, $a, $b;", []>;
9210b57cec5SDimitry Andricdef MULWIDES32Imm32 :
9220b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
9230b57cec5SDimitry Andric            "mul.wide.s16 \t$dst, $a, $b;", []>;
9240b57cec5SDimitry Andric
9250b57cec5SDimitry Andricdef MULWIDEU32 :
9260b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
9270b57cec5SDimitry Andric            "mul.wide.u16 \t$dst, $a, $b;", []>;
9280b57cec5SDimitry Andricdef MULWIDEU32Imm :
9290b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
9300b57cec5SDimitry Andric            "mul.wide.u16 \t$dst, $a, $b;", []>;
9310b57cec5SDimitry Andricdef MULWIDEU32Imm32 :
9320b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
9330b57cec5SDimitry Andric            "mul.wide.u16 \t$dst, $a, $b;", []>;
9340b57cec5SDimitry Andric
9350b57cec5SDimitry Andricdef SDTMulWide : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;
9360b57cec5SDimitry Andricdef mul_wide_signed : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
9370b57cec5SDimitry Andricdef mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
9380b57cec5SDimitry Andric
9390b57cec5SDimitry Andric// Matchers for signed, unsigned mul.wide ISD nodes.
94006c3fb27SDimitry Andricdef : Pat<(i32 (mul_wide_signed i16:$a, i16:$b)),
94106c3fb27SDimitry Andric          (MULWIDES32 i16:$a, i16:$b)>,
9420b57cec5SDimitry Andric      Requires<[doMulWide]>;
9430b57cec5SDimitry Andricdef : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),
9440b57cec5SDimitry Andric          (MULWIDES32Imm Int16Regs:$a, imm:$b)>,
9450b57cec5SDimitry Andric      Requires<[doMulWide]>;
94606c3fb27SDimitry Andricdef : Pat<(i32 (mul_wide_unsigned i16:$a, i16:$b)),
9470b57cec5SDimitry Andric          (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
9480b57cec5SDimitry Andric      Requires<[doMulWide]>;
9490b57cec5SDimitry Andricdef : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
9500b57cec5SDimitry Andric          (MULWIDEU32Imm Int16Regs:$a, imm:$b)>,
9510b57cec5SDimitry Andric      Requires<[doMulWide]>;
9520b57cec5SDimitry Andric
95306c3fb27SDimitry Andricdef : Pat<(i64 (mul_wide_signed i32:$a, i32:$b)),
9540b57cec5SDimitry Andric          (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
9550b57cec5SDimitry Andric      Requires<[doMulWide]>;
9565f757f3fSDimitry Andricdef : Pat<(i64 (mul_wide_signed (i32 Int32Regs:$a), imm:$b)),
9570b57cec5SDimitry Andric          (MULWIDES64Imm Int32Regs:$a, imm:$b)>,
9580b57cec5SDimitry Andric      Requires<[doMulWide]>;
95906c3fb27SDimitry Andricdef : Pat<(i64 (mul_wide_unsigned i32:$a, i32:$b)),
9600b57cec5SDimitry Andric          (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
9610b57cec5SDimitry Andric      Requires<[doMulWide]>;
9625f757f3fSDimitry Andricdef : Pat<(i64 (mul_wide_unsigned (i32 Int32Regs:$a), imm:$b)),
9630b57cec5SDimitry Andric          (MULWIDEU64Imm Int32Regs:$a, imm:$b)>,
9640b57cec5SDimitry Andric      Requires<[doMulWide]>;
9650b57cec5SDimitry Andric
9660b57cec5SDimitry Andric// Predicates used for converting some patterns to mul.wide.
9670b57cec5SDimitry Andricdef SInt32Const : PatLeaf<(imm), [{
9680b57cec5SDimitry Andric  const APInt &v = N->getAPIntValue();
9690b57cec5SDimitry Andric  return v.isSignedIntN(32);
9700b57cec5SDimitry Andric}]>;
9710b57cec5SDimitry Andric
9720b57cec5SDimitry Andricdef UInt32Const : PatLeaf<(imm), [{
9730b57cec5SDimitry Andric  const APInt &v = N->getAPIntValue();
9740b57cec5SDimitry Andric  return v.isIntN(32);
9750b57cec5SDimitry Andric}]>;
9760b57cec5SDimitry Andric
9770b57cec5SDimitry Andricdef SInt16Const : PatLeaf<(imm), [{
9780b57cec5SDimitry Andric  const APInt &v = N->getAPIntValue();
9790b57cec5SDimitry Andric  return v.isSignedIntN(16);
9800b57cec5SDimitry Andric}]>;
9810b57cec5SDimitry Andric
9820b57cec5SDimitry Andricdef UInt16Const : PatLeaf<(imm), [{
9830b57cec5SDimitry Andric  const APInt &v = N->getAPIntValue();
9840b57cec5SDimitry Andric  return v.isIntN(16);
9850b57cec5SDimitry Andric}]>;
9860b57cec5SDimitry Andric
987bdd1243dSDimitry Andricdef IntConst_0_30 : PatLeaf<(imm), [{
988bdd1243dSDimitry Andric  // Check if 0 <= v < 31; only then will the result of (x << v) be an int32.
9890b57cec5SDimitry Andric  const APInt &v = N->getAPIntValue();
990bdd1243dSDimitry Andric  return v.sge(0) && v.slt(31);
9910b57cec5SDimitry Andric}]>;
9920b57cec5SDimitry Andric
993bdd1243dSDimitry Andricdef IntConst_0_14 : PatLeaf<(imm), [{
994bdd1243dSDimitry Andric  // Check if 0 <= v < 15; only then will the result of (x << v) be an int16.
9950b57cec5SDimitry Andric  const APInt &v = N->getAPIntValue();
996bdd1243dSDimitry Andric  return v.sge(0) && v.slt(15);
9970b57cec5SDimitry Andric}]>;
9980b57cec5SDimitry Andric
9990b57cec5SDimitry Andricdef SHL2MUL32 : SDNodeXForm<imm, [{
10000b57cec5SDimitry Andric  const APInt &v = N->getAPIntValue();
10010b57cec5SDimitry Andric  APInt temp(32, 1);
10020b57cec5SDimitry Andric  return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i32);
10030b57cec5SDimitry Andric}]>;
10040b57cec5SDimitry Andric
10050b57cec5SDimitry Andricdef SHL2MUL16 : SDNodeXForm<imm, [{
10060b57cec5SDimitry Andric  const APInt &v = N->getAPIntValue();
10070b57cec5SDimitry Andric  APInt temp(16, 1);
10080b57cec5SDimitry Andric  return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i16);
10090b57cec5SDimitry Andric}]>;
10100b57cec5SDimitry Andric
10110b57cec5SDimitry Andric// Convert "sign/zero-extend, then shift left by an immediate" to mul.wide.
1012bdd1243dSDimitry Andricdef : Pat<(shl (sext Int32Regs:$a), (i32 IntConst_0_30:$b)),
10130b57cec5SDimitry Andric          (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
10140b57cec5SDimitry Andric      Requires<[doMulWide]>;
1015bdd1243dSDimitry Andricdef : Pat<(shl (zext Int32Regs:$a), (i32 IntConst_0_30:$b)),
10160b57cec5SDimitry Andric          (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
10170b57cec5SDimitry Andric      Requires<[doMulWide]>;
10180b57cec5SDimitry Andric
1019bdd1243dSDimitry Andricdef : Pat<(shl (sext Int16Regs:$a), (i16 IntConst_0_14:$b)),
10200b57cec5SDimitry Andric          (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
10210b57cec5SDimitry Andric      Requires<[doMulWide]>;
1022bdd1243dSDimitry Andricdef : Pat<(shl (zext Int16Regs:$a), (i16 IntConst_0_14:$b)),
10230b57cec5SDimitry Andric          (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
10240b57cec5SDimitry Andric      Requires<[doMulWide]>;
10250b57cec5SDimitry Andric
10260b57cec5SDimitry Andric// Convert "sign/zero-extend then multiply" to mul.wide.
10270b57cec5SDimitry Andricdef : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
10280b57cec5SDimitry Andric          (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
10290b57cec5SDimitry Andric      Requires<[doMulWide]>;
10300b57cec5SDimitry Andricdef : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
10310b57cec5SDimitry Andric          (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>,
10320b57cec5SDimitry Andric      Requires<[doMulWide]>;
10330b57cec5SDimitry Andric
10340b57cec5SDimitry Andricdef : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
10350b57cec5SDimitry Andric          (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
10360b57cec5SDimitry Andric      Requires<[doMulWide]>;
10370b57cec5SDimitry Andricdef : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
10380b57cec5SDimitry Andric          (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>,
10390b57cec5SDimitry Andric      Requires<[doMulWide]>;
10400b57cec5SDimitry Andric
10410b57cec5SDimitry Andricdef : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
10420b57cec5SDimitry Andric          (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
10430b57cec5SDimitry Andric      Requires<[doMulWide]>;
10440b57cec5SDimitry Andricdef : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
10450b57cec5SDimitry Andric          (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>,
10460b57cec5SDimitry Andric      Requires<[doMulWide]>;
10470b57cec5SDimitry Andric
10480b57cec5SDimitry Andricdef : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
10490b57cec5SDimitry Andric          (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
10500b57cec5SDimitry Andric      Requires<[doMulWide]>;
10510b57cec5SDimitry Andricdef : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
10520b57cec5SDimitry Andric          (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>,
10530b57cec5SDimitry Andric      Requires<[doMulWide]>;
10540b57cec5SDimitry Andric
10550b57cec5SDimitry Andric//
10560b57cec5SDimitry Andric// Integer multiply-add
10570b57cec5SDimitry Andric//
10580b57cec5SDimitry Andricdef SDTIMAD :
10590b57cec5SDimitry Andric  SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>,
10600b57cec5SDimitry Andric                       SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>;
10610b57cec5SDimitry Andricdef imad : SDNode<"NVPTXISD::IMAD", SDTIMAD>;
10620b57cec5SDimitry Andric
10630b57cec5SDimitry Andricdef MAD16rrr :
10640b57cec5SDimitry Andric  NVPTXInst<(outs Int16Regs:$dst),
10650b57cec5SDimitry Andric            (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
10660b57cec5SDimitry Andric            "mad.lo.s16 \t$dst, $a, $b, $c;",
10670b57cec5SDimitry Andric            [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>;
10680b57cec5SDimitry Andricdef MAD16rri :
10690b57cec5SDimitry Andric  NVPTXInst<(outs Int16Regs:$dst),
10700b57cec5SDimitry Andric            (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
10710b57cec5SDimitry Andric            "mad.lo.s16 \t$dst, $a, $b, $c;",
10720b57cec5SDimitry Andric            [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>;
10730b57cec5SDimitry Andricdef MAD16rir :
10740b57cec5SDimitry Andric  NVPTXInst<(outs Int16Regs:$dst),
10750b57cec5SDimitry Andric            (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
10760b57cec5SDimitry Andric            "mad.lo.s16 \t$dst, $a, $b, $c;",
10770b57cec5SDimitry Andric            [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>;
10780b57cec5SDimitry Andricdef MAD16rii :
10790b57cec5SDimitry Andric  NVPTXInst<(outs Int16Regs:$dst),
10800b57cec5SDimitry Andric            (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
10810b57cec5SDimitry Andric            "mad.lo.s16 \t$dst, $a, $b, $c;",
10820b57cec5SDimitry Andric            [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, imm:$c))]>;
10830b57cec5SDimitry Andric
10840b57cec5SDimitry Andricdef MAD32rrr :
10850b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst),
10860b57cec5SDimitry Andric            (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
10870b57cec5SDimitry Andric            "mad.lo.s32 \t$dst, $a, $b, $c;",
10885f757f3fSDimitry Andric            [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), (i32 Int32Regs:$b), (i32 Int32Regs:$c)))]>;
10890b57cec5SDimitry Andricdef MAD32rri :
10900b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst),
10910b57cec5SDimitry Andric            (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
10920b57cec5SDimitry Andric            "mad.lo.s32 \t$dst, $a, $b, $c;",
10935f757f3fSDimitry Andric            [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), (i32 Int32Regs:$b), imm:$c))]>;
10940b57cec5SDimitry Andricdef MAD32rir :
10950b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst),
10960b57cec5SDimitry Andric            (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
10970b57cec5SDimitry Andric            "mad.lo.s32 \t$dst, $a, $b, $c;",
10985f757f3fSDimitry Andric            [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), imm:$b, (i32 Int32Regs:$c)))]>;
10990b57cec5SDimitry Andricdef MAD32rii :
11000b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst),
11010b57cec5SDimitry Andric            (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
11020b57cec5SDimitry Andric            "mad.lo.s32 \t$dst, $a, $b, $c;",
11035f757f3fSDimitry Andric            [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), imm:$b, imm:$c))]>;
11040b57cec5SDimitry Andric
11050b57cec5SDimitry Andricdef MAD64rrr :
11060b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst),
11070b57cec5SDimitry Andric            (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
11080b57cec5SDimitry Andric            "mad.lo.s64 \t$dst, $a, $b, $c;",
11090b57cec5SDimitry Andric            [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>;
11100b57cec5SDimitry Andricdef MAD64rri :
11110b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst),
11120b57cec5SDimitry Andric            (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
11130b57cec5SDimitry Andric            "mad.lo.s64 \t$dst, $a, $b, $c;",
11140b57cec5SDimitry Andric            [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>;
11150b57cec5SDimitry Andricdef MAD64rir :
11160b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst),
11170b57cec5SDimitry Andric            (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
11180b57cec5SDimitry Andric            "mad.lo.s64 \t$dst, $a, $b, $c;",
11190b57cec5SDimitry Andric            [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>;
11200b57cec5SDimitry Andricdef MAD64rii :
11210b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst),
11220b57cec5SDimitry Andric            (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
11230b57cec5SDimitry Andric            "mad.lo.s64 \t$dst, $a, $b, $c;",
11240b57cec5SDimitry Andric            [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, imm:$c))]>;
11250b57cec5SDimitry Andric
11260b57cec5SDimitry Andricdef INEG16 :
11270b57cec5SDimitry Andric  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
11280b57cec5SDimitry Andric            "neg.s16 \t$dst, $src;",
11290b57cec5SDimitry Andric            [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
11300b57cec5SDimitry Andricdef INEG32 :
11310b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
11320b57cec5SDimitry Andric            "neg.s32 \t$dst, $src;",
11335f757f3fSDimitry Andric            [(set (i32 Int32Regs:$dst), (ineg (i32 Int32Regs:$src)))]>;
11340b57cec5SDimitry Andricdef INEG64 :
11350b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
11360b57cec5SDimitry Andric            "neg.s64 \t$dst, $src;",
11370b57cec5SDimitry Andric            [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
11380b57cec5SDimitry Andric
11390b57cec5SDimitry Andric//-----------------------------------
11400b57cec5SDimitry Andric// Floating Point Arithmetic
11410b57cec5SDimitry Andric//-----------------------------------
11420b57cec5SDimitry Andric
11430b57cec5SDimitry Andric// Constant 1.0f
11440b57cec5SDimitry Andricdef FloatConst1 : PatLeaf<(fpimm), [{
11450b57cec5SDimitry Andric  return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEsingle() &&
11460b57cec5SDimitry Andric         N->getValueAPF().convertToFloat() == 1.0f;
11470b57cec5SDimitry Andric}]>;
11480b57cec5SDimitry Andric// Constant 1.0 (double)
11490b57cec5SDimitry Andricdef DoubleConst1 : PatLeaf<(fpimm), [{
11500b57cec5SDimitry Andric  return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() &&
11510b57cec5SDimitry Andric         N->getValueAPF().convertToDouble() == 1.0;
11520b57cec5SDimitry Andric}]>;
1153*0fca6ea1SDimitry Andric// Constant -1.0 (double)
1154*0fca6ea1SDimitry Andricdef DoubleConstNeg1 : PatLeaf<(fpimm), [{
1155*0fca6ea1SDimitry Andric  return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() &&
1156*0fca6ea1SDimitry Andric         N->getValueAPF().convertToDouble() == -1.0;
1157*0fca6ea1SDimitry Andric}]>;
1158*0fca6ea1SDimitry Andric
1159*0fca6ea1SDimitry Andric
1160*0fca6ea1SDimitry Andric// Constant -X -> X (double)
1161*0fca6ea1SDimitry Andricdef NegDoubleConst : SDNodeXForm<fpimm, [{
1162*0fca6ea1SDimitry Andric  return CurDAG->getTargetConstantFP(-(N->getValueAPF()),
1163*0fca6ea1SDimitry Andric                                     SDLoc(N), MVT::f64);
1164*0fca6ea1SDimitry Andric}]>;
11650b57cec5SDimitry Andric
11660b57cec5SDimitry Andric// Loads FP16 constant into a register.
11670b57cec5SDimitry Andric//
11680b57cec5SDimitry Andric// ptxas does not have hex representation for fp16, so we can't use
11690b57cec5SDimitry Andric// fp16 immediate values in .f16 instructions. Instead we have to load
11700b57cec5SDimitry Andric// the constant into a register using mov.b16.
11710b57cec5SDimitry Andricdef LOAD_CONST_F16 :
117206c3fb27SDimitry Andric  NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$a),
11730b57cec5SDimitry Andric            "mov.b16 \t$dst, $a;", []>;
117406c3fb27SDimitry Andricdef LOAD_CONST_BF16 :
117506c3fb27SDimitry Andric  NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$a),
117606c3fb27SDimitry Andric            "mov.b16 \t$dst, $a;", []>;
11770b57cec5SDimitry Andricdefm FADD : F3_fma_component<"add", fadd>;
11780b57cec5SDimitry Andricdefm FSUB : F3_fma_component<"sub", fsub>;
11790b57cec5SDimitry Andricdefm FMUL : F3_fma_component<"mul", fmul>;
11800b57cec5SDimitry Andric
11810b57cec5SDimitry Andricdefm FMIN : F3<"min", fminnum>;
11820b57cec5SDimitry Andricdefm FMAX : F3<"max", fmaxnum>;
11831fd87a68SDimitry Andric// Note: min.NaN.f64 and max.NaN.f64 do not actually exist.
118404eeddc0SDimitry Andricdefm FMINNAN : F3<"min.NaN", fminimum>;
118504eeddc0SDimitry Andricdefm FMAXNAN : F3<"max.NaN", fmaximum>;
11860b57cec5SDimitry Andric
11870b57cec5SDimitry Andricdefm FABS  : F2<"abs", fabs>;
11880b57cec5SDimitry Andricdefm FNEG  : F2<"neg", fneg>;
11895f757f3fSDimitry Andricdefm FABS_H: F2_Support_Half<"abs", fabs>;
11905f757f3fSDimitry Andricdefm FNEG_H: F2_Support_Half<"neg", fneg>;
11915f757f3fSDimitry Andric
11920b57cec5SDimitry Andricdefm FSQRT : F2<"sqrt.rn", fsqrt>;
11930b57cec5SDimitry Andric
11940b57cec5SDimitry Andric//
1195bdd1243dSDimitry Andric// F16 NEG
1196bdd1243dSDimitry Andric//
1197bdd1243dSDimitry Andricclass FNEG_F16_F16X2<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> :
1198bdd1243dSDimitry Andric      NVPTXInst<(outs RC:$dst), (ins RC:$src),
1199bdd1243dSDimitry Andric                !strconcat(OpcStr, " \t$dst, $src;"),
1200bdd1243dSDimitry Andric                [(set RC:$dst, (fneg (T RC:$src)))]>,
120106c3fb27SDimitry Andric                Requires<[useFP16Math, hasPTX<60>, hasSM<53>, Pred]>;
120206c3fb27SDimitry Andricdef FNEG16_ftz   : FNEG_F16_F16X2<"neg.ftz.f16", f16, Int16Regs, doF32FTZ>;
120306c3fb27SDimitry Andricdef FNEG16       : FNEG_F16_F16X2<"neg.f16", f16, Int16Regs, True>;
120406c3fb27SDimitry Andricdef FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>;
120506c3fb27SDimitry Andricdef FNEG16x2     : FNEG_F16_F16X2<"neg.f16x2", v2f16, Int32Regs, True>;
120606c3fb27SDimitry Andric
120706c3fb27SDimitry Andric//
120806c3fb27SDimitry Andric// BF16 NEG
120906c3fb27SDimitry Andric//
121006c3fb27SDimitry Andric
121106c3fb27SDimitry Andricclass FNEG_BF16_F16X2<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> :
121206c3fb27SDimitry Andric      NVPTXInst<(outs RC:$dst), (ins RC:$src),
121306c3fb27SDimitry Andric                !strconcat(OpcStr, " \t$dst, $src;"),
121406c3fb27SDimitry Andric                [(set RC:$dst, (fneg (T RC:$src)))]>,
121506c3fb27SDimitry Andric                Requires<[hasBF16Math, hasPTX<70>, hasSM<80>, Pred]>;
121606c3fb27SDimitry Andricdef BFNEG16_ftz   : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, Int16Regs, doF32FTZ>;
121706c3fb27SDimitry Andricdef BFNEG16       : FNEG_BF16_F16X2<"neg.bf16", bf16, Int16Regs, True>;
121806c3fb27SDimitry Andricdef BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, Int32Regs, doF32FTZ>;
121906c3fb27SDimitry Andricdef BFNEG16x2     : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, Int32Regs, True>;
1220bdd1243dSDimitry Andric
1221bdd1243dSDimitry Andric//
12220b57cec5SDimitry Andric// F64 division
12230b57cec5SDimitry Andric//
12240b57cec5SDimitry Andricdef FDIV641r :
12250b57cec5SDimitry Andric  NVPTXInst<(outs Float64Regs:$dst),
12260b57cec5SDimitry Andric            (ins f64imm:$a, Float64Regs:$b),
12270b57cec5SDimitry Andric            "rcp.rn.f64 \t$dst, $b;",
12280b57cec5SDimitry Andric            [(set Float64Regs:$dst, (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
12290b57cec5SDimitry Andricdef FDIV64rr :
12300b57cec5SDimitry Andric  NVPTXInst<(outs Float64Regs:$dst),
12310b57cec5SDimitry Andric            (ins Float64Regs:$a, Float64Regs:$b),
12320b57cec5SDimitry Andric            "div.rn.f64 \t$dst, $a, $b;",
12330b57cec5SDimitry Andric            [(set Float64Regs:$dst, (fdiv Float64Regs:$a, Float64Regs:$b))]>;
12340b57cec5SDimitry Andricdef FDIV64ri :
12350b57cec5SDimitry Andric  NVPTXInst<(outs Float64Regs:$dst),
12360b57cec5SDimitry Andric            (ins Float64Regs:$a, f64imm:$b),
12370b57cec5SDimitry Andric            "div.rn.f64 \t$dst, $a, $b;",
12380b57cec5SDimitry Andric            [(set Float64Regs:$dst, (fdiv Float64Regs:$a, fpimm:$b))]>;
12390b57cec5SDimitry Andric
1240*0fca6ea1SDimitry Andric// fdiv will be converted to rcp
1241*0fca6ea1SDimitry Andric// fneg (fdiv 1.0, X) => fneg (rcp.rn X)
1242*0fca6ea1SDimitry Andricdef : Pat<(fdiv DoubleConstNeg1:$a, Float64Regs:$b),
1243*0fca6ea1SDimitry Andric          (FNEGf64 (FDIV641r (NegDoubleConst node:$a), Float64Regs:$b))>;
1244*0fca6ea1SDimitry Andric
12450b57cec5SDimitry Andric//
12460b57cec5SDimitry Andric// F32 Approximate reciprocal
12470b57cec5SDimitry Andric//
12480b57cec5SDimitry Andricdef FDIV321r_ftz :
12490b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
12500b57cec5SDimitry Andric            (ins f32imm:$a, Float32Regs:$b),
12510b57cec5SDimitry Andric            "rcp.approx.ftz.f32 \t$dst, $b;",
12520b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
12530b57cec5SDimitry Andric            Requires<[do_DIVF32_APPROX, doF32FTZ]>;
12540b57cec5SDimitry Andricdef FDIV321r :
12550b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
12560b57cec5SDimitry Andric            (ins f32imm:$a, Float32Regs:$b),
12570b57cec5SDimitry Andric            "rcp.approx.f32 \t$dst, $b;",
12580b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
12590b57cec5SDimitry Andric            Requires<[do_DIVF32_APPROX]>;
12600b57cec5SDimitry Andric//
12610b57cec5SDimitry Andric// F32 Approximate division
12620b57cec5SDimitry Andric//
12630b57cec5SDimitry Andricdef FDIV32approxrr_ftz :
12640b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
12650b57cec5SDimitry Andric            (ins Float32Regs:$a, Float32Regs:$b),
12660b57cec5SDimitry Andric            "div.approx.ftz.f32 \t$dst, $a, $b;",
12670b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
12680b57cec5SDimitry Andric            Requires<[do_DIVF32_APPROX, doF32FTZ]>;
12690b57cec5SDimitry Andricdef FDIV32approxri_ftz :
12700b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
12710b57cec5SDimitry Andric            (ins Float32Regs:$a, f32imm:$b),
12720b57cec5SDimitry Andric            "div.approx.ftz.f32 \t$dst, $a, $b;",
12730b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
12740b57cec5SDimitry Andric            Requires<[do_DIVF32_APPROX, doF32FTZ]>;
12750b57cec5SDimitry Andricdef FDIV32approxrr :
12760b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
12770b57cec5SDimitry Andric            (ins Float32Regs:$a, Float32Regs:$b),
12780b57cec5SDimitry Andric            "div.approx.f32 \t$dst, $a, $b;",
12790b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
12800b57cec5SDimitry Andric            Requires<[do_DIVF32_APPROX]>;
12810b57cec5SDimitry Andricdef FDIV32approxri :
12820b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
12830b57cec5SDimitry Andric            (ins Float32Regs:$a, f32imm:$b),
12840b57cec5SDimitry Andric            "div.approx.f32 \t$dst, $a, $b;",
12850b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
12860b57cec5SDimitry Andric            Requires<[do_DIVF32_APPROX]>;
12870b57cec5SDimitry Andric//
12880b57cec5SDimitry Andric// F32 Semi-accurate reciprocal
12890b57cec5SDimitry Andric//
12900b57cec5SDimitry Andric// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
12910b57cec5SDimitry Andric//
12920b57cec5SDimitry Andricdef FDIV321r_approx_ftz :
12930b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
12940b57cec5SDimitry Andric            (ins f32imm:$a, Float32Regs:$b),
12950b57cec5SDimitry Andric            "rcp.approx.ftz.f32 \t$dst, $b;",
12960b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
12970b57cec5SDimitry Andric            Requires<[do_DIVF32_FULL, doF32FTZ]>;
12980b57cec5SDimitry Andricdef FDIV321r_approx :
12990b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
13000b57cec5SDimitry Andric            (ins f32imm:$a, Float32Regs:$b),
13010b57cec5SDimitry Andric            "rcp.approx.f32 \t$dst, $b;",
13020b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
13030b57cec5SDimitry Andric            Requires<[do_DIVF32_FULL]>;
13040b57cec5SDimitry Andric//
13050b57cec5SDimitry Andric// F32 Semi-accurate division
13060b57cec5SDimitry Andric//
13070b57cec5SDimitry Andricdef FDIV32rr_ftz :
13080b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
13090b57cec5SDimitry Andric            (ins Float32Regs:$a, Float32Regs:$b),
13100b57cec5SDimitry Andric            "div.full.ftz.f32 \t$dst, $a, $b;",
13110b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
13120b57cec5SDimitry Andric            Requires<[do_DIVF32_FULL, doF32FTZ]>;
13130b57cec5SDimitry Andricdef FDIV32ri_ftz :
13140b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
13150b57cec5SDimitry Andric            (ins Float32Regs:$a, f32imm:$b),
13160b57cec5SDimitry Andric            "div.full.ftz.f32 \t$dst, $a, $b;",
13170b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
13180b57cec5SDimitry Andric            Requires<[do_DIVF32_FULL, doF32FTZ]>;
13190b57cec5SDimitry Andricdef FDIV32rr :
13200b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
13210b57cec5SDimitry Andric            (ins Float32Regs:$a, Float32Regs:$b),
13220b57cec5SDimitry Andric            "div.full.f32 \t$dst, $a, $b;",
13230b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
13240b57cec5SDimitry Andric            Requires<[do_DIVF32_FULL]>;
13250b57cec5SDimitry Andricdef FDIV32ri :
13260b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
13270b57cec5SDimitry Andric            (ins Float32Regs:$a, f32imm:$b),
13280b57cec5SDimitry Andric            "div.full.f32 \t$dst, $a, $b;",
13290b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
13300b57cec5SDimitry Andric            Requires<[do_DIVF32_FULL]>;
13310b57cec5SDimitry Andric//
13320b57cec5SDimitry Andric// F32 Accurate reciprocal
13330b57cec5SDimitry Andric//
13340b57cec5SDimitry Andricdef FDIV321r_prec_ftz :
13350b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
13360b57cec5SDimitry Andric            (ins f32imm:$a, Float32Regs:$b),
13370b57cec5SDimitry Andric            "rcp.rn.ftz.f32 \t$dst, $b;",
13380b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
13390b57cec5SDimitry Andric            Requires<[doF32FTZ]>;
13400b57cec5SDimitry Andricdef FDIV321r_prec :
13410b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
13420b57cec5SDimitry Andric            (ins f32imm:$a, Float32Regs:$b),
13430b57cec5SDimitry Andric            "rcp.rn.f32 \t$dst, $b;",
13440b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>;
13450b57cec5SDimitry Andric//
13460b57cec5SDimitry Andric// F32 Accurate division
13470b57cec5SDimitry Andric//
13480b57cec5SDimitry Andricdef FDIV32rr_prec_ftz :
13490b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
13500b57cec5SDimitry Andric            (ins Float32Regs:$a, Float32Regs:$b),
13510b57cec5SDimitry Andric            "div.rn.ftz.f32 \t$dst, $a, $b;",
13520b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
13530b57cec5SDimitry Andric            Requires<[doF32FTZ]>;
13540b57cec5SDimitry Andricdef FDIV32ri_prec_ftz :
13550b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
13560b57cec5SDimitry Andric            (ins Float32Regs:$a, f32imm:$b),
13570b57cec5SDimitry Andric            "div.rn.ftz.f32 \t$dst, $a, $b;",
13580b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
13590b57cec5SDimitry Andric            Requires<[doF32FTZ]>;
13600b57cec5SDimitry Andricdef FDIV32rr_prec :
13610b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
13620b57cec5SDimitry Andric            (ins Float32Regs:$a, Float32Regs:$b),
13630b57cec5SDimitry Andric            "div.rn.f32 \t$dst, $a, $b;",
13640b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>;
13650b57cec5SDimitry Andricdef FDIV32ri_prec :
13660b57cec5SDimitry Andric  NVPTXInst<(outs Float32Regs:$dst),
13670b57cec5SDimitry Andric            (ins Float32Regs:$a, f32imm:$b),
13680b57cec5SDimitry Andric            "div.rn.f32 \t$dst, $a, $b;",
13690b57cec5SDimitry Andric            [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>;
13700b57cec5SDimitry Andric
13710b57cec5SDimitry Andric//
13720b57cec5SDimitry Andric// FMA
13730b57cec5SDimitry Andric//
13740b57cec5SDimitry Andric
13750b57cec5SDimitry Andricmulticlass FMA<string OpcStr, RegisterClass RC, Operand ImmCls, Predicate Pred> {
13760b57cec5SDimitry Andric   def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
13770b57cec5SDimitry Andric                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
13780b57cec5SDimitry Andric                       [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>,
13790b57cec5SDimitry Andric                       Requires<[Pred]>;
13800b57cec5SDimitry Andric   def rri : NVPTXInst<(outs RC:$dst),
13810b57cec5SDimitry Andric                       (ins RC:$a, RC:$b, ImmCls:$c),
13820b57cec5SDimitry Andric                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
13830b57cec5SDimitry Andric                       [(set RC:$dst, (fma RC:$a, RC:$b, fpimm:$c))]>,
13840b57cec5SDimitry Andric                       Requires<[Pred]>;
13850b57cec5SDimitry Andric   def rir : NVPTXInst<(outs RC:$dst),
13860b57cec5SDimitry Andric                       (ins RC:$a, ImmCls:$b, RC:$c),
13870b57cec5SDimitry Andric                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
13880b57cec5SDimitry Andric                       [(set RC:$dst, (fma RC:$a, fpimm:$b, RC:$c))]>,
13890b57cec5SDimitry Andric                       Requires<[Pred]>;
13900b57cec5SDimitry Andric   def rii : NVPTXInst<(outs RC:$dst),
13910b57cec5SDimitry Andric                       (ins RC:$a, ImmCls:$b, ImmCls:$c),
13920b57cec5SDimitry Andric                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
13930b57cec5SDimitry Andric                       [(set RC:$dst, (fma RC:$a, fpimm:$b, fpimm:$c))]>,
13940b57cec5SDimitry Andric                       Requires<[Pred]>;
13950b57cec5SDimitry Andric}
13960b57cec5SDimitry Andric
1397bdd1243dSDimitry Andricmulticlass FMA_F16<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> {
13980b57cec5SDimitry Andric   def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
13990b57cec5SDimitry Andric                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
1400bdd1243dSDimitry Andric                       [(set RC:$dst, (fma (T RC:$a), (T RC:$b), (T RC:$c)))]>,
14010b57cec5SDimitry Andric                       Requires<[useFP16Math, Pred]>;
14020b57cec5SDimitry Andric}
14030b57cec5SDimitry Andric
140406c3fb27SDimitry Andricmulticlass FMA_BF16<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> {
140506c3fb27SDimitry Andric   def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
140606c3fb27SDimitry Andric                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
140706c3fb27SDimitry Andric                       [(set RC:$dst, (fma (T RC:$a), (T RC:$b), (T RC:$c)))]>,
140806c3fb27SDimitry Andric                       Requires<[hasBF16Math, Pred]>;
140906c3fb27SDimitry Andric}
141006c3fb27SDimitry Andric
141106c3fb27SDimitry Andricdefm FMA16_ftz    : FMA_F16<"fma.rn.ftz.f16", f16, Int16Regs, doF32FTZ>;
141206c3fb27SDimitry Andricdefm FMA16        : FMA_F16<"fma.rn.f16", f16, Int16Regs, True>;
141306c3fb27SDimitry Andricdefm FMA16x2_ftz  : FMA_F16<"fma.rn.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>;
141406c3fb27SDimitry Andricdefm FMA16x2      : FMA_F16<"fma.rn.f16x2", v2f16, Int32Regs, True>;
141506c3fb27SDimitry Andricdefm BFMA16_ftz   : FMA_BF16<"fma.rn.ftz.bf16", bf16, Int16Regs, doF32FTZ>;
141606c3fb27SDimitry Andricdefm BFMA16       : FMA_BF16<"fma.rn.bf16", bf16, Int16Regs, True>;
141706c3fb27SDimitry Andricdefm BFMA16x2_ftz : FMA_BF16<"fma.rn.ftz.bf16x2", v2bf16, Int32Regs, doF32FTZ>;
141806c3fb27SDimitry Andricdefm BFMA16x2     : FMA_BF16<"fma.rn.bf16x2", v2bf16, Int32Regs, True>;
14190b57cec5SDimitry Andricdefm FMA32_ftz    : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>;
1420e8d8bef9SDimitry Andricdefm FMA32        : FMA<"fma.rn.f32", Float32Regs, f32imm, True>;
1421e8d8bef9SDimitry Andricdefm FMA64        : FMA<"fma.rn.f64", Float64Regs, f64imm, True>;
14220b57cec5SDimitry Andric
14230b57cec5SDimitry Andric// sin/cos
14240b57cec5SDimitry Andricdef SINF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
14250b57cec5SDimitry Andric                      "sin.approx.f32 \t$dst, $src;",
14260b57cec5SDimitry Andric                      [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>,
14270b57cec5SDimitry Andric                      Requires<[allowUnsafeFPMath]>;
14280b57cec5SDimitry Andricdef COSF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
14290b57cec5SDimitry Andric                      "cos.approx.f32 \t$dst, $src;",
14300b57cec5SDimitry Andric                      [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>,
14310b57cec5SDimitry Andric                      Requires<[allowUnsafeFPMath]>;
14320b57cec5SDimitry Andric
1433fe6060f1SDimitry Andric// Lower (frem x, y) into (sub x, (mul (ftrunc (div x, y)) y)),
1434bdd1243dSDimitry Andric// i.e. "poor man's fmod()". When y is infinite, x is returned. This matches the
1435bdd1243dSDimitry Andric// semantics of LLVM's frem.
14360b57cec5SDimitry Andric
14370b57cec5SDimitry Andric// frem - f32 FTZ
14380b57cec5SDimitry Andricdef : Pat<(frem Float32Regs:$x, Float32Regs:$y),
14390b57cec5SDimitry Andric          (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32
1440fe6060f1SDimitry Andric            (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRZI_FTZ),
14410b57cec5SDimitry Andric             Float32Regs:$y))>,
1442bdd1243dSDimitry Andric          Requires<[doF32FTZ, allowUnsafeFPMath]>;
14430b57cec5SDimitry Andricdef : Pat<(frem Float32Regs:$x, fpimm:$y),
14440b57cec5SDimitry Andric          (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32
1445fe6060f1SDimitry Andric            (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRZI_FTZ),
14460b57cec5SDimitry Andric             fpimm:$y))>,
1447bdd1243dSDimitry Andric          Requires<[doF32FTZ, allowUnsafeFPMath]>;
1448bdd1243dSDimitry Andric
1449bdd1243dSDimitry Andricdef : Pat<(frem Float32Regs:$x, Float32Regs:$y),
1450bdd1243dSDimitry Andric          (SELP_f32rr Float32Regs:$x,
1451bdd1243dSDimitry Andric            (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32
1452bdd1243dSDimitry Andric              (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRZI_FTZ),
1453bdd1243dSDimitry Andric              Float32Regs:$y)),
1454bdd1243dSDimitry Andric            (TESTINF_f32r Float32Regs:$y))>,
1455bdd1243dSDimitry Andric          Requires<[doF32FTZ, noUnsafeFPMath]>;
1456bdd1243dSDimitry Andricdef : Pat<(frem Float32Regs:$x, fpimm:$y),
1457bdd1243dSDimitry Andric          (SELP_f32rr Float32Regs:$x,
1458bdd1243dSDimitry Andric            (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32
1459bdd1243dSDimitry Andric              (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRZI_FTZ),
1460bdd1243dSDimitry Andric              fpimm:$y)),
1461bdd1243dSDimitry Andric            (TESTINF_f32i fpimm:$y))>,
1462bdd1243dSDimitry Andric          Requires<[doF32FTZ, noUnsafeFPMath]>;
14630b57cec5SDimitry Andric
14640b57cec5SDimitry Andric// frem - f32
14650b57cec5SDimitry Andricdef : Pat<(frem Float32Regs:$x, Float32Regs:$y),
14660b57cec5SDimitry Andric          (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32
1467fe6060f1SDimitry Andric            (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRZI),
1468bdd1243dSDimitry Andric             Float32Regs:$y))>,
1469bdd1243dSDimitry Andric          Requires<[allowUnsafeFPMath]>;
14700b57cec5SDimitry Andricdef : Pat<(frem Float32Regs:$x, fpimm:$y),
14710b57cec5SDimitry Andric          (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32
1472fe6060f1SDimitry Andric            (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRZI),
1473bdd1243dSDimitry Andric             fpimm:$y))>,
1474bdd1243dSDimitry Andric          Requires<[allowUnsafeFPMath]>;
1475bdd1243dSDimitry Andric
1476bdd1243dSDimitry Andricdef : Pat<(frem Float32Regs:$x, Float32Regs:$y),
1477bdd1243dSDimitry Andric          (SELP_f32rr Float32Regs:$x,
1478bdd1243dSDimitry Andric            (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32
1479bdd1243dSDimitry Andric              (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRZI),
1480bdd1243dSDimitry Andric              Float32Regs:$y)),
1481bdd1243dSDimitry Andric            (TESTINF_f32r Float32Regs:$y))>,
1482bdd1243dSDimitry Andric          Requires<[noUnsafeFPMath]>;
1483bdd1243dSDimitry Andricdef : Pat<(frem Float32Regs:$x, fpimm:$y),
1484bdd1243dSDimitry Andric          (SELP_f32rr Float32Regs:$x,
1485bdd1243dSDimitry Andric            (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32
1486bdd1243dSDimitry Andric              (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRZI),
1487bdd1243dSDimitry Andric              fpimm:$y)),
1488bdd1243dSDimitry Andric            (TESTINF_f32i fpimm:$y))>,
1489bdd1243dSDimitry Andric          Requires<[noUnsafeFPMath]>;
14900b57cec5SDimitry Andric
14910b57cec5SDimitry Andric// frem - f64
14920b57cec5SDimitry Andricdef : Pat<(frem Float64Regs:$x, Float64Regs:$y),
14930b57cec5SDimitry Andric          (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64
1494fe6060f1SDimitry Andric            (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRZI),
1495bdd1243dSDimitry Andric             Float64Regs:$y))>,
1496bdd1243dSDimitry Andric          Requires<[allowUnsafeFPMath]>;
14970b57cec5SDimitry Andricdef : Pat<(frem Float64Regs:$x, fpimm:$y),
14980b57cec5SDimitry Andric          (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64
1499fe6060f1SDimitry Andric            (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRZI),
1500bdd1243dSDimitry Andric             fpimm:$y))>,
1501bdd1243dSDimitry Andric          Requires<[allowUnsafeFPMath]>;
1502bdd1243dSDimitry Andric
1503bdd1243dSDimitry Andricdef : Pat<(frem Float64Regs:$x, Float64Regs:$y),
1504bdd1243dSDimitry Andric          (SELP_f64rr Float64Regs:$x,
1505bdd1243dSDimitry Andric            (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64
1506bdd1243dSDimitry Andric              (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRZI),
1507bdd1243dSDimitry Andric               Float64Regs:$y)),
1508bdd1243dSDimitry Andric            (TESTINF_f64r Float64Regs:$y))>,
1509bdd1243dSDimitry Andric          Requires<[noUnsafeFPMath]>;
1510bdd1243dSDimitry Andricdef : Pat<(frem Float64Regs:$x, fpimm:$y),
1511bdd1243dSDimitry Andric          (SELP_f64rr Float64Regs:$x,
1512bdd1243dSDimitry Andric            (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64
1513bdd1243dSDimitry Andric              (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRZI),
1514bdd1243dSDimitry Andric              fpimm:$y)),
1515bdd1243dSDimitry Andric            (TESTINF_f64r Float64Regs:$y))>,
1516bdd1243dSDimitry Andric          Requires<[noUnsafeFPMath]>;
15170b57cec5SDimitry Andric
15180b57cec5SDimitry Andric//-----------------------------------
15190b57cec5SDimitry Andric// Bitwise operations
15200b57cec5SDimitry Andric//-----------------------------------
15210b57cec5SDimitry Andric
15220b57cec5SDimitry Andric// Template for three-arg bitwise operations.  Takes three args, Creates .b16,
15230b57cec5SDimitry Andric// .b32, .b64, and .pred (predicate registers -- i.e., i1) versions of OpcStr.
15240b57cec5SDimitry Andricmulticlass BITWISE<string OpcStr, SDNode OpNode> {
15250b57cec5SDimitry Andric  def b1rr :
15260b57cec5SDimitry Andric    NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
15270b57cec5SDimitry Andric              !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
15280b57cec5SDimitry Andric              [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
15290b57cec5SDimitry Andric  def b1ri :
15300b57cec5SDimitry Andric    NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
15310b57cec5SDimitry Andric              !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
15320b57cec5SDimitry Andric              [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
15330b57cec5SDimitry Andric  def b16rr :
15340b57cec5SDimitry Andric    NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
15350b57cec5SDimitry Andric              !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
15360b57cec5SDimitry Andric              [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
15370b57cec5SDimitry Andric  def b16ri :
15380b57cec5SDimitry Andric    NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
15390b57cec5SDimitry Andric              !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
15400b57cec5SDimitry Andric              [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
15410b57cec5SDimitry Andric  def b32rr :
15420b57cec5SDimitry Andric    NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
15430b57cec5SDimitry Andric              !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
15445f757f3fSDimitry Andric              [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>;
15450b57cec5SDimitry Andric  def b32ri :
15460b57cec5SDimitry Andric    NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
15470b57cec5SDimitry Andric              !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
15485f757f3fSDimitry Andric              [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>;
15490b57cec5SDimitry Andric  def b64rr :
15500b57cec5SDimitry Andric    NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
15510b57cec5SDimitry Andric              !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
15520b57cec5SDimitry Andric              [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
15530b57cec5SDimitry Andric  def b64ri :
15540b57cec5SDimitry Andric    NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
15550b57cec5SDimitry Andric              !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
15560b57cec5SDimitry Andric              [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
15570b57cec5SDimitry Andric}
15580b57cec5SDimitry Andric
15590b57cec5SDimitry Andricdefm OR  : BITWISE<"or", or>;
15600b57cec5SDimitry Andricdefm AND : BITWISE<"and", and>;
15610b57cec5SDimitry Andricdefm XOR : BITWISE<"xor", xor>;
15620b57cec5SDimitry Andric
1563*0fca6ea1SDimitry Andric// PTX does not support mul on predicates, convert to and instructions
1564*0fca6ea1SDimitry Andricdef : Pat<(mul Int1Regs:$a, Int1Regs:$b), (ANDb1rr Int1Regs:$a, Int1Regs:$b)>;
1565*0fca6ea1SDimitry Andricdef : Pat<(mul Int1Regs:$a, (i1 imm:$b)), (ANDb1ri Int1Regs:$a, imm:$b)>;
1566*0fca6ea1SDimitry Andric
1567*0fca6ea1SDimitry Andric// These transformations were once reliably performed by instcombine, but thanks
1568*0fca6ea1SDimitry Andric// to poison semantics they are no longer safe for LLVM IR, perform them here
1569*0fca6ea1SDimitry Andric// instead.
1570*0fca6ea1SDimitry Andricdef : Pat<(select Int1Regs:$a, Int1Regs:$b, 0), (ANDb1rr Int1Regs:$a, Int1Regs:$b)>;
1571*0fca6ea1SDimitry Andricdef : Pat<(select Int1Regs:$a, 1, Int1Regs:$b), (ORb1rr Int1Regs:$a, Int1Regs:$b)>;
1572*0fca6ea1SDimitry Andric
15735f757f3fSDimitry Andric// Lower logical v2i16/v4i8 ops as bitwise ops on b32.
15745f757f3fSDimitry Andricforeach vt = [v2i16, v4i8] in {
15755f757f3fSDimitry Andric  def: Pat<(or (vt Int32Regs:$a), (vt Int32Regs:$b)),
15765f757f3fSDimitry Andric           (ORb32rr Int32Regs:$a, Int32Regs:$b)>;
15775f757f3fSDimitry Andric  def: Pat<(xor (vt Int32Regs:$a), (vt Int32Regs:$b)),
15785f757f3fSDimitry Andric           (XORb32rr Int32Regs:$a, Int32Regs:$b)>;
15795f757f3fSDimitry Andric  def: Pat<(and (vt Int32Regs:$a), (vt Int32Regs:$b)),
15805f757f3fSDimitry Andric           (ANDb32rr Int32Regs:$a, Int32Regs:$b)>;
15815f757f3fSDimitry Andric
15825f757f3fSDimitry Andric  // The constants get legalized into a bitcast from i32, so that's what we need
15835f757f3fSDimitry Andric  // to match here.
15845f757f3fSDimitry Andric  def: Pat<(or Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))),
15855f757f3fSDimitry Andric           (ORb32ri Int32Regs:$a, imm:$b)>;
15865f757f3fSDimitry Andric  def: Pat<(xor Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))),
15875f757f3fSDimitry Andric           (XORb32ri Int32Regs:$a, imm:$b)>;
15885f757f3fSDimitry Andric  def: Pat<(and Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))),
15895f757f3fSDimitry Andric           (ANDb32ri Int32Regs:$a, imm:$b)>;
15905f757f3fSDimitry Andric}
15915f757f3fSDimitry Andric
15920b57cec5SDimitry Andricdef NOT1  : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
15930b57cec5SDimitry Andric                      "not.pred \t$dst, $src;",
15940b57cec5SDimitry Andric                      [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
15950b57cec5SDimitry Andricdef NOT16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
15960b57cec5SDimitry Andric                      "not.b16 \t$dst, $src;",
15970b57cec5SDimitry Andric                      [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
15980b57cec5SDimitry Andricdef NOT32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
15990b57cec5SDimitry Andric                      "not.b32 \t$dst, $src;",
16005f757f3fSDimitry Andric                      [(set (i32 Int32Regs:$dst), (not (i32 Int32Regs:$src)))]>;
16010b57cec5SDimitry Andricdef NOT64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
16020b57cec5SDimitry Andric                       "not.b64 \t$dst, $src;",
16030b57cec5SDimitry Andric                       [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
16040b57cec5SDimitry Andric
16050b57cec5SDimitry Andric// Template for left/right shifts.  Takes three operands,
16060b57cec5SDimitry Andric//   [dest (reg), src (reg), shift (reg or imm)].
16070b57cec5SDimitry Andric// dest and src may be int64, int32, or int16, but shift is always int32.
16080b57cec5SDimitry Andric//
16090b57cec5SDimitry Andric// This template also defines a 32-bit shift (imm, imm) instruction.
16100b57cec5SDimitry Andricmulticlass SHIFT<string OpcStr, SDNode OpNode> {
16110b57cec5SDimitry Andric   def i64rr :
16120b57cec5SDimitry Andric     NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b),
16130b57cec5SDimitry Andric               !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
16145f757f3fSDimitry Andric               [(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 Int32Regs:$b)))]>;
16150b57cec5SDimitry Andric   def i64ri :
16160b57cec5SDimitry Andric     NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
16170b57cec5SDimitry Andric               !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
16180b57cec5SDimitry Andric               [(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 imm:$b)))]>;
16190b57cec5SDimitry Andric   def i32rr :
16200b57cec5SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
16210b57cec5SDimitry Andric               !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
16225f757f3fSDimitry Andric               [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>;
16230b57cec5SDimitry Andric   def i32ri :
16240b57cec5SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
16250b57cec5SDimitry Andric               !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
16265f757f3fSDimitry Andric               [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 imm:$b)))]>;
16270b57cec5SDimitry Andric   def i32ii :
16280b57cec5SDimitry Andric     NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
16290b57cec5SDimitry Andric               !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
16300b57cec5SDimitry Andric               [(set Int32Regs:$dst, (OpNode (i32 imm:$a), (i32 imm:$b)))]>;
16310b57cec5SDimitry Andric   def i16rr :
16320b57cec5SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int32Regs:$b),
16330b57cec5SDimitry Andric               !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
16345f757f3fSDimitry Andric               [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 Int32Regs:$b)))]>;
16350b57cec5SDimitry Andric   def i16ri :
16360b57cec5SDimitry Andric     NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
16370b57cec5SDimitry Andric               !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
16380b57cec5SDimitry Andric               [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 imm:$b)))]>;
16390b57cec5SDimitry Andric}
16400b57cec5SDimitry Andric
16410b57cec5SDimitry Andricdefm SHL : SHIFT<"shl.b", shl>;
16420b57cec5SDimitry Andricdefm SRA : SHIFT<"shr.s", sra>;
16430b57cec5SDimitry Andricdefm SRL : SHIFT<"shr.u", srl>;
16440b57cec5SDimitry Andric
16450b57cec5SDimitry Andric// Bit-reverse
16460b57cec5SDimitry Andricdef BREV32 :
16470b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
16480b57cec5SDimitry Andric             "brev.b32 \t$dst, $a;",
16495f757f3fSDimitry Andric             [(set Int32Regs:$dst, (bitreverse (i32 Int32Regs:$a)))]>;
16500b57cec5SDimitry Andricdef BREV64 :
16510b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a),
16520b57cec5SDimitry Andric             "brev.b64 \t$dst, $a;",
16530b57cec5SDimitry Andric             [(set Int64Regs:$dst, (bitreverse Int64Regs:$a))]>;
16540b57cec5SDimitry Andric
16550b57cec5SDimitry Andric//
16560b57cec5SDimitry Andric// Rotate: Use ptx shf instruction if available.
16570b57cec5SDimitry Andric//
16580b57cec5SDimitry Andric
16590b57cec5SDimitry Andric// 32 bit r2 = rotl r1, n
16600b57cec5SDimitry Andric//    =>
16610b57cec5SDimitry Andric//        r2 = shf.l r1, r1, n
16620b57cec5SDimitry Andricdef ROTL32imm_hw :
16630b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt),
16640b57cec5SDimitry Andric            "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
16655f757f3fSDimitry Andric            [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 imm:$amt)))]>,
16660b57cec5SDimitry Andric           Requires<[hasHWROT32]>;
16670b57cec5SDimitry Andric
16680b57cec5SDimitry Andricdef ROTL32reg_hw :
16690b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
16700b57cec5SDimitry Andric            "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
16715f757f3fSDimitry Andric            [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>,
16720b57cec5SDimitry Andric           Requires<[hasHWROT32]>;
16730b57cec5SDimitry Andric
16740b57cec5SDimitry Andric// 32 bit r2 = rotr r1, n
16750b57cec5SDimitry Andric//    =>
16760b57cec5SDimitry Andric//        r2 = shf.r r1, r1, n
16770b57cec5SDimitry Andricdef ROTR32imm_hw :
16780b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt),
16790b57cec5SDimitry Andric            "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
16805f757f3fSDimitry Andric            [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 imm:$amt)))]>,
16810b57cec5SDimitry Andric           Requires<[hasHWROT32]>;
16820b57cec5SDimitry Andric
16830b57cec5SDimitry Andricdef ROTR32reg_hw :
16840b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
16850b57cec5SDimitry Andric            "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
16865f757f3fSDimitry Andric            [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>,
16870b57cec5SDimitry Andric           Requires<[hasHWROT32]>;
16880b57cec5SDimitry Andric
16890b57cec5SDimitry Andric// 32-bit software rotate by immediate.  $amt2 should equal 32 - $amt1.
16900b57cec5SDimitry Andricdef ROT32imm_sw :
16910b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst),
16920b57cec5SDimitry Andric            (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
16930b57cec5SDimitry Andric            "{{\n\t"
16940b57cec5SDimitry Andric            ".reg .b32 %lhs;\n\t"
16950b57cec5SDimitry Andric            ".reg .b32 %rhs;\n\t"
16960b57cec5SDimitry Andric            "shl.b32 \t%lhs, $src, $amt1;\n\t"
16970b57cec5SDimitry Andric            "shr.b32 \t%rhs, $src, $amt2;\n\t"
16980b57cec5SDimitry Andric            "add.u32 \t$dst, %lhs, %rhs;\n\t"
16990b57cec5SDimitry Andric            "}}",
17000b57cec5SDimitry Andric            []>;
17010b57cec5SDimitry Andric
17020b57cec5SDimitry Andricdef SUB_FRM_32 : SDNodeXForm<imm, [{
17030b57cec5SDimitry Andric  return CurDAG->getTargetConstant(32 - N->getZExtValue(), SDLoc(N), MVT::i32);
17040b57cec5SDimitry Andric}]>;
17050b57cec5SDimitry Andric
17065f757f3fSDimitry Andricdef : Pat<(rotl (i32 Int32Regs:$src), (i32 imm:$amt)),
17070b57cec5SDimitry Andric          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
17080b57cec5SDimitry Andric      Requires<[noHWROT32]>;
17095f757f3fSDimitry Andricdef : Pat<(rotr (i32 Int32Regs:$src), (i32 imm:$amt)),
17100b57cec5SDimitry Andric          (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>,
17110b57cec5SDimitry Andric      Requires<[noHWROT32]>;
17120b57cec5SDimitry Andric
17130b57cec5SDimitry Andric// 32-bit software rotate left by register.
17140b57cec5SDimitry Andricdef ROTL32reg_sw :
17150b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
17160b57cec5SDimitry Andric            "{{\n\t"
17170b57cec5SDimitry Andric            ".reg .b32 %lhs;\n\t"
17180b57cec5SDimitry Andric            ".reg .b32 %rhs;\n\t"
17190b57cec5SDimitry Andric            ".reg .b32 %amt2;\n\t"
17200b57cec5SDimitry Andric            "shl.b32 \t%lhs, $src, $amt;\n\t"
17210b57cec5SDimitry Andric            "sub.s32 \t%amt2, 32, $amt;\n\t"
17220b57cec5SDimitry Andric            "shr.b32 \t%rhs, $src, %amt2;\n\t"
17230b57cec5SDimitry Andric            "add.u32 \t$dst, %lhs, %rhs;\n\t"
17240b57cec5SDimitry Andric            "}}",
17255f757f3fSDimitry Andric            [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>,
17260b57cec5SDimitry Andric           Requires<[noHWROT32]>;
17270b57cec5SDimitry Andric
17280b57cec5SDimitry Andric// 32-bit software rotate right by register.
17290b57cec5SDimitry Andricdef ROTR32reg_sw :
17300b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
17310b57cec5SDimitry Andric            "{{\n\t"
17320b57cec5SDimitry Andric            ".reg .b32 %lhs;\n\t"
17330b57cec5SDimitry Andric            ".reg .b32 %rhs;\n\t"
17340b57cec5SDimitry Andric            ".reg .b32 %amt2;\n\t"
17350b57cec5SDimitry Andric            "shr.b32 \t%lhs, $src, $amt;\n\t"
17360b57cec5SDimitry Andric            "sub.s32 \t%amt2, 32, $amt;\n\t"
17370b57cec5SDimitry Andric            "shl.b32 \t%rhs, $src, %amt2;\n\t"
17380b57cec5SDimitry Andric            "add.u32 \t$dst, %lhs, %rhs;\n\t"
17390b57cec5SDimitry Andric            "}}",
17405f757f3fSDimitry Andric            [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>,
17410b57cec5SDimitry Andric           Requires<[noHWROT32]>;
17420b57cec5SDimitry Andric
17430b57cec5SDimitry Andric// 64-bit software rotate by immediate.  $amt2 should equal 64 - $amt1.
17440b57cec5SDimitry Andricdef ROT64imm_sw :
17450b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst),
17460b57cec5SDimitry Andric            (ins Int64Regs:$src, i32imm:$amt1, i32imm:$amt2),
17470b57cec5SDimitry Andric            "{{\n\t"
17480b57cec5SDimitry Andric            ".reg .b64 %lhs;\n\t"
17490b57cec5SDimitry Andric            ".reg .b64 %rhs;\n\t"
17500b57cec5SDimitry Andric            "shl.b64 \t%lhs, $src, $amt1;\n\t"
17510b57cec5SDimitry Andric            "shr.b64 \t%rhs, $src, $amt2;\n\t"
17520b57cec5SDimitry Andric            "add.u64 \t$dst, %lhs, %rhs;\n\t"
17530b57cec5SDimitry Andric            "}}",
17540b57cec5SDimitry Andric            []>;
17550b57cec5SDimitry Andric
17560b57cec5SDimitry Andricdef SUB_FRM_64 : SDNodeXForm<imm, [{
17570b57cec5SDimitry Andric    return CurDAG->getTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32);
17580b57cec5SDimitry Andric}]>;
17590b57cec5SDimitry Andric
17600b57cec5SDimitry Andricdef : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
17610b57cec5SDimitry Andric          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
17620b57cec5SDimitry Andricdef : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
17630b57cec5SDimitry Andric          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
17640b57cec5SDimitry Andric
17650b57cec5SDimitry Andric// 64-bit software rotate left by register.
17660b57cec5SDimitry Andricdef ROTL64reg_sw :
17670b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt),
17680b57cec5SDimitry Andric            "{{\n\t"
17690b57cec5SDimitry Andric            ".reg .b64 %lhs;\n\t"
17700b57cec5SDimitry Andric            ".reg .b64 %rhs;\n\t"
17710b57cec5SDimitry Andric            ".reg .u32 %amt2;\n\t"
1772*0fca6ea1SDimitry Andric            "and.b32 \t%amt2, $amt, 63;\n\t"
1773*0fca6ea1SDimitry Andric            "shl.b64 \t%lhs, $src, %amt2;\n\t"
1774*0fca6ea1SDimitry Andric            "sub.u32 \t%amt2, 64, %amt2;\n\t"
17750b57cec5SDimitry Andric            "shr.b64 \t%rhs, $src, %amt2;\n\t"
17760b57cec5SDimitry Andric            "add.u64 \t$dst, %lhs, %rhs;\n\t"
17770b57cec5SDimitry Andric            "}}",
17785f757f3fSDimitry Andric            [(set Int64Regs:$dst, (rotl Int64Regs:$src, (i32 Int32Regs:$amt)))]>;
17790b57cec5SDimitry Andric
17800b57cec5SDimitry Andricdef ROTR64reg_sw :
17810b57cec5SDimitry Andric  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt),
17820b57cec5SDimitry Andric            "{{\n\t"
17830b57cec5SDimitry Andric            ".reg .b64 %lhs;\n\t"
17840b57cec5SDimitry Andric            ".reg .b64 %rhs;\n\t"
17850b57cec5SDimitry Andric            ".reg .u32 %amt2;\n\t"
1786*0fca6ea1SDimitry Andric            "and.b32 \t%amt2, $amt, 63;\n\t"
1787*0fca6ea1SDimitry Andric            "shr.b64 \t%lhs, $src, %amt2;\n\t"
1788*0fca6ea1SDimitry Andric            "sub.u32 \t%amt2, 64, %amt2;\n\t"
17890b57cec5SDimitry Andric            "shl.b64 \t%rhs, $src, %amt2;\n\t"
17900b57cec5SDimitry Andric            "add.u64 \t$dst, %lhs, %rhs;\n\t"
17910b57cec5SDimitry Andric            "}}",
17925f757f3fSDimitry Andric            [(set Int64Regs:$dst, (rotr Int64Regs:$src, (i32 Int32Regs:$amt)))]>;
17930b57cec5SDimitry Andric
17940b57cec5SDimitry Andric//
17950b57cec5SDimitry Andric// Funnnel shift in clamp mode
17960b57cec5SDimitry Andric//
17970b57cec5SDimitry Andric
17980b57cec5SDimitry Andric// Create SDNodes so they can be used in the DAG code, e.g.
17990b57cec5SDimitry Andric// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
18000b57cec5SDimitry Andricdef FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
18010b57cec5SDimitry Andricdef FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;
18020b57cec5SDimitry Andric
18030b57cec5SDimitry Andricdef FUNSHFLCLAMP :
18040b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst),
18050b57cec5SDimitry Andric            (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
18060b57cec5SDimitry Andric            "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",
18070b57cec5SDimitry Andric            [(set Int32Regs:$dst,
18085f757f3fSDimitry Andric              (FUN_SHFL_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>;
18090b57cec5SDimitry Andric
18100b57cec5SDimitry Andricdef FUNSHFRCLAMP :
18110b57cec5SDimitry Andric  NVPTXInst<(outs Int32Regs:$dst),
18120b57cec5SDimitry Andric            (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
18130b57cec5SDimitry Andric            "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",
18140b57cec5SDimitry Andric            [(set Int32Regs:$dst,
18155f757f3fSDimitry Andric             (FUN_SHFR_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>;
18160b57cec5SDimitry Andric
18170b57cec5SDimitry Andric//
18180b57cec5SDimitry Andric// BFE - bit-field extract
18190b57cec5SDimitry Andric//
18200b57cec5SDimitry Andric
18215f757f3fSDimitry Andric// Template for BFE/BFI instructions.
18225f757f3fSDimitry Andric// Args: [dest (reg), src (reg), start (reg or imm), end (reg or imm)].
18230b57cec5SDimitry Andric// Start may be an imm only if end is also an imm.  FIXME: Is this a
18240b57cec5SDimitry Andric// restriction in PTX?
18250b57cec5SDimitry Andric//
18260b57cec5SDimitry Andric// dest and src may be int32 or int64, but start and end are always int32.
18275f757f3fSDimitry Andricdef SDTBFE :
18285f757f3fSDimitry Andric  SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,
18295f757f3fSDimitry Andric                       SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
18305f757f3fSDimitry Andricdef bfe : SDNode<"NVPTXISD::BFE", SDTBFE>;
18315f757f3fSDimitry Andric
18325f757f3fSDimitry Andricdef SDTBFI :
18335f757f3fSDimitry Andric  SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
18345f757f3fSDimitry Andric                       SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
18355f757f3fSDimitry Andricdef bfi : SDNode<"NVPTXISD::BFI", SDTBFI>;
18365f757f3fSDimitry Andric
18375f757f3fSDimitry Andricdef SDTPRMT :
18385f757f3fSDimitry Andric  SDTypeProfile<1, 4, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
18395f757f3fSDimitry Andric                       SDTCisVT<2, i32>, SDTCisVT<3, i32>, SDTCisVT<4, i32>,]>;
18405f757f3fSDimitry Andricdef prmt : SDNode<"NVPTXISD::PRMT", SDTPRMT>;
18415f757f3fSDimitry Andric
18425f757f3fSDimitry Andricmulticlass BFE<string Instr, ValueType T, RegisterClass RC> {
18430b57cec5SDimitry Andric  def rrr
18440b57cec5SDimitry Andric    : NVPTXInst<(outs RC:$d),
18450b57cec5SDimitry Andric                (ins RC:$a, Int32Regs:$b, Int32Regs:$c),
18465f757f3fSDimitry Andric                !strconcat(Instr, " \t$d, $a, $b, $c;"),
18475f757f3fSDimitry Andric                [(set (T RC:$d), (bfe (T RC:$a), (i32 Int32Regs:$b), (i32 Int32Regs:$c)))]>;
18480b57cec5SDimitry Andric  def rri
18490b57cec5SDimitry Andric    : NVPTXInst<(outs RC:$d),
18500b57cec5SDimitry Andric                (ins RC:$a, Int32Regs:$b, i32imm:$c),
18515f757f3fSDimitry Andric                !strconcat(Instr, " \t$d, $a, $b, $c;"),
18525f757f3fSDimitry Andric                [(set (T RC:$d), (bfe (T RC:$a), (i32 Int32Regs:$b), (i32 imm:$c)))]>;
18530b57cec5SDimitry Andric  def rii
18540b57cec5SDimitry Andric    : NVPTXInst<(outs RC:$d),
18550b57cec5SDimitry Andric                (ins RC:$a, i32imm:$b, i32imm:$c),
18565f757f3fSDimitry Andric                !strconcat(Instr, " \t$d, $a, $b, $c;"),
18575f757f3fSDimitry Andric                [(set (T RC:$d), (bfe (T RC:$a), (i32 imm:$b), (i32 imm:$c)))]>;
18585f757f3fSDimitry Andric}
18595f757f3fSDimitry Andric
18605f757f3fSDimitry Andricmulticlass BFI<string Instr, ValueType T, RegisterClass RC, Operand ImmCls> {
18615f757f3fSDimitry Andric  def rrrr
18625f757f3fSDimitry Andric    : NVPTXInst<(outs RC:$f),
18635f757f3fSDimitry Andric                (ins RC:$a, RC:$b, Int32Regs:$c, Int32Regs:$d),
18645f757f3fSDimitry Andric                !strconcat(Instr, " \t$f, $a, $b, $c, $d;"),
18655f757f3fSDimitry Andric                [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 Int32Regs:$d)))]>;
18665f757f3fSDimitry Andric  def rrri
18675f757f3fSDimitry Andric    : NVPTXInst<(outs RC:$f),
18685f757f3fSDimitry Andric                (ins RC:$a, RC:$b, Int32Regs:$c, i32imm:$d),
18695f757f3fSDimitry Andric                !strconcat(Instr, " \t$f, $a, $b, $c, $d;"),
18705f757f3fSDimitry Andric                [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 imm:$d)))]>;
18715f757f3fSDimitry Andric  def rrii
18725f757f3fSDimitry Andric    : NVPTXInst<(outs RC:$f),
18735f757f3fSDimitry Andric                (ins RC:$a, RC:$b, i32imm:$c, i32imm:$d),
18745f757f3fSDimitry Andric                !strconcat(Instr, " \t$f, $a, $b, $c, $d;"),
18755f757f3fSDimitry Andric                [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>;
18765f757f3fSDimitry Andric  def irrr
18775f757f3fSDimitry Andric    : NVPTXInst<(outs RC:$f),
18785f757f3fSDimitry Andric                (ins ImmCls:$a, RC:$b, Int32Regs:$c, Int32Regs:$d),
18795f757f3fSDimitry Andric                !strconcat(Instr, " \t$f, $a, $b, $c, $d;"),
18805f757f3fSDimitry Andric                [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 Int32Regs:$d)))]>;
18815f757f3fSDimitry Andric  def irri
18825f757f3fSDimitry Andric    : NVPTXInst<(outs RC:$f),
18835f757f3fSDimitry Andric                (ins ImmCls:$a, RC:$b, Int32Regs:$c, i32imm:$d),
18845f757f3fSDimitry Andric                !strconcat(Instr, " \t$f, $a, $b, $c, $d;"),
18855f757f3fSDimitry Andric                [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 imm:$d)))]>;
18865f757f3fSDimitry Andric  def irii
18875f757f3fSDimitry Andric    : NVPTXInst<(outs RC:$f),
18885f757f3fSDimitry Andric                (ins ImmCls:$a, RC:$b, i32imm:$c, i32imm:$d),
18895f757f3fSDimitry Andric                !strconcat(Instr, " \t$f, $a, $b, $c, $d;"),
18905f757f3fSDimitry Andric                [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>;
18915f757f3fSDimitry Andric}
18925f757f3fSDimitry Andric
18935f757f3fSDimitry Andricmulticlass PRMT<ValueType T, RegisterClass RC> {
18945f757f3fSDimitry Andric  def rrr
18955f757f3fSDimitry Andric    : NVPTXInst<(outs RC:$d),
18965f757f3fSDimitry Andric                (ins RC:$a, Int32Regs:$b, Int32Regs:$c, PrmtMode:$mode),
18975f757f3fSDimitry Andric                !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"),
18985f757f3fSDimitry Andric                [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), imm:$mode))]>;
18995f757f3fSDimitry Andric  def rri
19005f757f3fSDimitry Andric    : NVPTXInst<(outs RC:$d),
19015f757f3fSDimitry Andric                (ins RC:$a, Int32Regs:$b, i32imm:$c, PrmtMode:$mode),
19025f757f3fSDimitry Andric                !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"),
19035f757f3fSDimitry Andric                [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 imm:$c), imm:$mode))]>;
19045f757f3fSDimitry Andric  def rii
19055f757f3fSDimitry Andric    : NVPTXInst<(outs RC:$d),
19065f757f3fSDimitry Andric                (ins RC:$a, i32imm:$b, i32imm:$c, PrmtMode:$mode),
19075f757f3fSDimitry Andric                !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"),
19085f757f3fSDimitry Andric                [(set (T RC:$d), (prmt (T RC:$a), (T imm:$b), (i32 imm:$c), imm:$mode))]>;
19090b57cec5SDimitry Andric}
19100b57cec5SDimitry Andric
1911e8d8bef9SDimitry Andriclet hasSideEffects = false in {
1912*0fca6ea1SDimitry Andric  // order is somewhat important here. signed/unsigned variants match
1913*0fca6ea1SDimitry Andric  // the same patterns, so the first one wins. Having unsigned byte extraction
1914*0fca6ea1SDimitry Andric  // has the benefit of always having zero in unused bits, which makes some
1915*0fca6ea1SDimitry Andric  // optimizations easier (e.g. no need to mask them).
19165f757f3fSDimitry Andric  defm BFE_U32 : BFE<"bfe.u32", i32, Int32Regs>;
1917*0fca6ea1SDimitry Andric  defm BFE_S32 : BFE<"bfe.s32", i32, Int32Regs>;
19185f757f3fSDimitry Andric  defm BFE_U64 : BFE<"bfe.u64", i64, Int64Regs>;
1919*0fca6ea1SDimitry Andric  defm BFE_S64 : BFE<"bfe.s64", i64, Int64Regs>;
19205f757f3fSDimitry Andric
19215f757f3fSDimitry Andric  defm BFI_B32 : BFI<"bfi.b32", i32, Int32Regs, i32imm>;
19225f757f3fSDimitry Andric  defm BFI_B64 : BFI<"bfi.b64", i64, Int64Regs, i64imm>;
19235f757f3fSDimitry Andric
19245f757f3fSDimitry Andric  defm PRMT_B32 : PRMT<i32, Int32Regs>;
19250b57cec5SDimitry Andric}
19260b57cec5SDimitry Andric
19275f757f3fSDimitry Andric
19285f757f3fSDimitry Andric// byte extraction + signed/unsigned extension to i32.
19295f757f3fSDimitry Andricdef : Pat<(i32 (sext_inreg (bfe (i32 Int32Regs:$s),  (i32 Int32Regs:$o), 8), i8)),
19305f757f3fSDimitry Andric          (BFE_S32rri Int32Regs:$s, Int32Regs:$o, 8)>;
19315f757f3fSDimitry Andricdef : Pat<(i32 (sext_inreg (bfe (i32 Int32Regs:$s),  (i32 imm:$o), 8), i8)),
19325f757f3fSDimitry Andric          (BFE_S32rii Int32Regs:$s, imm:$o, 8)>;
19335f757f3fSDimitry Andricdef : Pat<(i32 (and (bfe (i32 Int32Regs:$s),  (i32 Int32Regs:$o), 8), 255)),
19345f757f3fSDimitry Andric          (BFE_U32rri Int32Regs:$s, Int32Regs:$o, 8)>;
19355f757f3fSDimitry Andricdef : Pat<(i32 (and (bfe (i32 Int32Regs:$s),  (i32 imm:$o), 8), 255)),
19365f757f3fSDimitry Andric          (BFE_U32rii Int32Regs:$s, imm:$o, 8)>;
19375f757f3fSDimitry Andric
19385f757f3fSDimitry Andric// byte extraction + signed extension to i16
19395f757f3fSDimitry Andricdef : Pat<(i16 (sext_inreg (trunc (bfe (i32 Int32Regs:$s),  (i32 imm:$o), 8)), i8)),
19405f757f3fSDimitry Andric          (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, imm:$o, 8), CvtNONE)>;
19415f757f3fSDimitry Andric
19425f757f3fSDimitry Andric
19435f757f3fSDimitry Andric// Byte extraction via shift/trunc/sext
19445f757f3fSDimitry Andricdef : Pat<(i16 (sext_inreg (trunc Int32Regs:$s), i8)),
19455f757f3fSDimitry Andric          (CVT_s8_s32 Int32Regs:$s, CvtNONE)>;
19465f757f3fSDimitry Andricdef : Pat<(i16 (sext_inreg (trunc (srl (i32 Int32Regs:$s),  (i32 imm:$o))), i8)),
19475f757f3fSDimitry Andric          (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, imm:$o, 8), CvtNONE)>;
19485f757f3fSDimitry Andricdef : Pat<(sext_inreg (srl (i32 Int32Regs:$s),  (i32 imm:$o)), i8),
19495f757f3fSDimitry Andric          (BFE_S32rii Int32Regs:$s, imm:$o, 8)>;
19505f757f3fSDimitry Andricdef : Pat<(i16 (sra (i16 (trunc Int32Regs:$s)), (i32 8))),
19515f757f3fSDimitry Andric          (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, 8, 8), CvtNONE)>;
19525f757f3fSDimitry Andricdef : Pat<(sext_inreg (srl (i64 Int64Regs:$s),  (i32 imm:$o)), i8),
19535f757f3fSDimitry Andric          (BFE_S64rii Int64Regs:$s, imm:$o, 8)>;
19545f757f3fSDimitry Andricdef : Pat<(i16 (sext_inreg (trunc Int64Regs:$s), i8)),
19555f757f3fSDimitry Andric          (CVT_s8_s64 Int64Regs:$s, CvtNONE)>;
19565f757f3fSDimitry Andricdef : Pat<(i16 (sext_inreg (trunc (srl (i64 Int64Regs:$s),  (i32 imm:$o))), i8)),
19575f757f3fSDimitry Andric          (CVT_s8_s64 (BFE_S64rii Int64Regs:$s, imm:$o, 8), CvtNONE)>;
19585f757f3fSDimitry Andric
19590b57cec5SDimitry Andric//-----------------------------------
19600b57cec5SDimitry Andric// Comparison instructions (setp, set)
19610b57cec5SDimitry Andric//-----------------------------------
19620b57cec5SDimitry Andric
19630b57cec5SDimitry Andric// FIXME: This doesn't cover versions of set and setp that combine with a
19640b57cec5SDimitry Andric// boolean predicate, e.g. setp.eq.and.b16.
19650b57cec5SDimitry Andric
1966e8d8bef9SDimitry Andriclet hasSideEffects = false in {
19670b57cec5SDimitry Andric  multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
19680b57cec5SDimitry Andric    def rr :
19690b57cec5SDimitry Andric      NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp),
19700b57cec5SDimitry Andric                !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
19710b57cec5SDimitry Andric                           " \t$dst, $a, $b;"), []>;
19720b57cec5SDimitry Andric    def ri :
19730b57cec5SDimitry Andric      NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
19740b57cec5SDimitry Andric                !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
19750b57cec5SDimitry Andric                           " \t$dst, $a, $b;"), []>;
19760b57cec5SDimitry Andric    def ir :
19770b57cec5SDimitry Andric      NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
19780b57cec5SDimitry Andric                !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
19790b57cec5SDimitry Andric                           " \t$dst, $a, $b;"), []>;
19800b57cec5SDimitry Andric  }
19810b57cec5SDimitry Andric}
19820b57cec5SDimitry Andric
19830b57cec5SDimitry Andricdefm SETP_b16 : SETP<"b16", Int16Regs, i16imm>;
19840b57cec5SDimitry Andricdefm SETP_s16 : SETP<"s16", Int16Regs, i16imm>;
19850b57cec5SDimitry Andricdefm SETP_u16 : SETP<"u16", Int16Regs, i16imm>;
19860b57cec5SDimitry Andricdefm SETP_b32 : SETP<"b32", Int32Regs, i32imm>;
19870b57cec5SDimitry Andricdefm SETP_s32 : SETP<"s32", Int32Regs, i32imm>;
19880b57cec5SDimitry Andricdefm SETP_u32 : SETP<"u32", Int32Regs, i32imm>;
19890b57cec5SDimitry Andricdefm SETP_b64 : SETP<"b64", Int64Regs, i64imm>;
19900b57cec5SDimitry Andricdefm SETP_s64 : SETP<"s64", Int64Regs, i64imm>;
19910b57cec5SDimitry Andricdefm SETP_u64 : SETP<"u64", Int64Regs, i64imm>;
19920b57cec5SDimitry Andricdefm SETP_f32 : SETP<"f32", Float32Regs, f32imm>;
19930b57cec5SDimitry Andricdefm SETP_f64 : SETP<"f64", Float64Regs, f64imm>;
19940b57cec5SDimitry Andricdef SETP_f16rr :
19950b57cec5SDimitry Andric      NVPTXInst<(outs Int1Regs:$dst),
199606c3fb27SDimitry Andric                (ins Int16Regs:$a, Int16Regs:$b, CmpMode:$cmp),
19970b57cec5SDimitry Andric                "setp${cmp:base}${cmp:ftz}.f16 \t$dst, $a, $b;",
19980b57cec5SDimitry Andric                []>, Requires<[useFP16Math]>;
19990b57cec5SDimitry Andric
20000b57cec5SDimitry Andricdef SETP_f16x2rr :
20010b57cec5SDimitry Andric      NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q),
200206c3fb27SDimitry Andric                (ins Int32Regs:$a, Int32Regs:$b, CmpMode:$cmp),
20030b57cec5SDimitry Andric                "setp${cmp:base}${cmp:ftz}.f16x2 \t$p|$q, $a, $b;",
20040b57cec5SDimitry Andric                []>,
20050b57cec5SDimitry Andric                Requires<[useFP16Math]>;
200606c3fb27SDimitry Andricdef SETP_bf16rr :
200706c3fb27SDimitry Andric      NVPTXInst<(outs Int1Regs:$dst),
200806c3fb27SDimitry Andric                (ins Int16Regs:$a, Int16Regs:$b, CmpMode:$cmp),
200906c3fb27SDimitry Andric                "setp${cmp:base}${cmp:ftz}.bf16 \t$dst, $a, $b;",
20105f757f3fSDimitry Andric                []>, Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;
201106c3fb27SDimitry Andric
201206c3fb27SDimitry Andricdef SETP_bf16x2rr :
201306c3fb27SDimitry Andric      NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q),
201406c3fb27SDimitry Andric                (ins Int32Regs:$a, Int32Regs:$b, CmpMode:$cmp),
201506c3fb27SDimitry Andric                "setp${cmp:base}${cmp:ftz}.bf16x2 \t$p|$q, $a, $b;",
201606c3fb27SDimitry Andric                []>,
20175f757f3fSDimitry Andric                Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;
20180b57cec5SDimitry Andric
20190b57cec5SDimitry Andric
20200b57cec5SDimitry Andric// FIXME: This doesn't appear to be correct.  The "set" mnemonic has the form
20210b57cec5SDimitry Andric// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination
20220b57cec5SDimitry Andric// reg, either u32, s32, or f32.  Anyway these aren't used at the moment.
20230b57cec5SDimitry Andric
2024e8d8bef9SDimitry Andriclet hasSideEffects = false in {
20250b57cec5SDimitry Andric  multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
20260b57cec5SDimitry Andric    def rr : NVPTXInst<(outs Int32Regs:$dst),
20270b57cec5SDimitry Andric                       (ins RC:$a, RC:$b, CmpMode:$cmp),
20280b57cec5SDimitry Andric                       !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
20290b57cec5SDimitry Andric    def ri : NVPTXInst<(outs Int32Regs:$dst),
20300b57cec5SDimitry Andric                       (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
20310b57cec5SDimitry Andric                       !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
20320b57cec5SDimitry Andric    def ir : NVPTXInst<(outs Int32Regs:$dst),
20330b57cec5SDimitry Andric                       (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
20340b57cec5SDimitry Andric                       !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
20350b57cec5SDimitry Andric  }
20360b57cec5SDimitry Andric}
20370b57cec5SDimitry Andric
20380b57cec5SDimitry Andricdefm SET_b16 : SET<"b16", Int16Regs, i16imm>;
20390b57cec5SDimitry Andricdefm SET_s16 : SET<"s16", Int16Regs, i16imm>;
20400b57cec5SDimitry Andricdefm SET_u16 : SET<"u16", Int16Regs, i16imm>;
20410b57cec5SDimitry Andricdefm SET_b32 : SET<"b32", Int32Regs, i32imm>;
20420b57cec5SDimitry Andricdefm SET_s32 : SET<"s32", Int32Regs, i32imm>;
20430b57cec5SDimitry Andricdefm SET_u32 : SET<"u32", Int32Regs, i32imm>;
20440b57cec5SDimitry Andricdefm SET_b64 : SET<"b64", Int64Regs, i64imm>;
20450b57cec5SDimitry Andricdefm SET_s64 : SET<"s64", Int64Regs, i64imm>;
20460b57cec5SDimitry Andricdefm SET_u64 : SET<"u64", Int64Regs, i64imm>;
204706c3fb27SDimitry Andricdefm SET_f16 : SET<"f16", Int16Regs, f16imm>;
20485f757f3fSDimitry Andricdefm SET_bf16 : SET<"bf16", Int16Regs, bf16imm>, Requires<[hasPTX<78>, hasSM<90>]>;
20490b57cec5SDimitry Andricdefm SET_f32 : SET<"f32", Float32Regs, f32imm>;
20500b57cec5SDimitry Andricdefm SET_f64 : SET<"f64", Float64Regs, f64imm>;
20510b57cec5SDimitry Andric
20520b57cec5SDimitry Andric//-----------------------------------
20530b57cec5SDimitry Andric// Data Movement (Load / Store, Move)
20540b57cec5SDimitry Andric//-----------------------------------
20550b57cec5SDimitry Andric
20560b57cec5SDimitry Andricdef ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
20570b57cec5SDimitry Andric                            [SDNPWantRoot]>;
20580b57cec5SDimitry Andricdef ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
20590b57cec5SDimitry Andric                              [SDNPWantRoot]>;
20600b57cec5SDimitry Andricdef ADDRvar : ComplexPattern<iPTR, 1, "SelectDirectAddr", [], []>;
20610b57cec5SDimitry Andric
20620b57cec5SDimitry Andricdef MEMri : Operand<i32> {
20630b57cec5SDimitry Andric  let PrintMethod = "printMemOperand";
20640b57cec5SDimitry Andric  let MIOperandInfo = (ops Int32Regs, i32imm);
20650b57cec5SDimitry Andric}
20660b57cec5SDimitry Andricdef MEMri64 : Operand<i64> {
20670b57cec5SDimitry Andric  let PrintMethod = "printMemOperand";
20680b57cec5SDimitry Andric  let MIOperandInfo = (ops Int64Regs, i64imm);
20690b57cec5SDimitry Andric}
20700b57cec5SDimitry Andric
20710b57cec5SDimitry Andricdef imem : Operand<iPTR> {
20720b57cec5SDimitry Andric  let PrintMethod = "printOperand";
20730b57cec5SDimitry Andric}
20740b57cec5SDimitry Andric
20750b57cec5SDimitry Andricdef imemAny : Operand<iPTRAny> {
20760b57cec5SDimitry Andric  let PrintMethod = "printOperand";
20770b57cec5SDimitry Andric}
20780b57cec5SDimitry Andric
20790b57cec5SDimitry Andricdef LdStCode : Operand<i32> {
20800b57cec5SDimitry Andric  let PrintMethod = "printLdStCode";
20810b57cec5SDimitry Andric}
20820b57cec5SDimitry Andric
20830b57cec5SDimitry Andricdef MmaCode : Operand<i32> {
20840b57cec5SDimitry Andric  let PrintMethod = "printMmaCode";
20850b57cec5SDimitry Andric}
20860b57cec5SDimitry Andric
20870b57cec5SDimitry Andricdef SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
20880b57cec5SDimitry Andricdef Wrapper    : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
20890b57cec5SDimitry Andric
20900b57cec5SDimitry Andric// Load a memory address into a u32 or u64 register.
20910b57cec5SDimitry Andricdef MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
20920b57cec5SDimitry Andric                         "mov.u32 \t$dst, $a;",
20930b57cec5SDimitry Andric                         [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
20940b57cec5SDimitry Andricdef MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
20950b57cec5SDimitry Andric                           "mov.u64 \t$dst, $a;",
20960b57cec5SDimitry Andric                           [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
20970b57cec5SDimitry Andric
20980b57cec5SDimitry Andric// Get pointer to local stack.
2099e8d8bef9SDimitry Andriclet hasSideEffects = false in {
21000b57cec5SDimitry Andric  def MOV_DEPOT_ADDR :    NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
21010b57cec5SDimitry Andric                                     "mov.u32 \t$d, __local_depot$num;", []>;
21020b57cec5SDimitry Andric  def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
21030b57cec5SDimitry Andric                                    "mov.u64 \t$d, __local_depot$num;", []>;
21040b57cec5SDimitry Andric}
21050b57cec5SDimitry Andric
21060b57cec5SDimitry Andric
21070b57cec5SDimitry Andric// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
21080b57cec5SDimitry Andriclet IsSimpleMove=1, hasSideEffects=0 in {
21090b57cec5SDimitry Andric  def IMOV1rr :  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
21100b57cec5SDimitry Andric                           "mov.pred \t$dst, $sss;", []>;
21110b57cec5SDimitry Andric  def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
21120b57cec5SDimitry Andric                           "mov.u16 \t$dst, $sss;", []>;
21130b57cec5SDimitry Andric  def IMOV32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
21140b57cec5SDimitry Andric                           "mov.u32 \t$dst, $sss;", []>;
21150b57cec5SDimitry Andric  def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
21160b57cec5SDimitry Andric                           "mov.u64 \t$dst, $sss;", []>;
2117*0fca6ea1SDimitry Andric  def IMOV128rr : NVPTXInst<(outs Int128Regs:$dst), (ins Int128Regs:$sss),
2118*0fca6ea1SDimitry Andric                           "mov.b128 \t$dst, $sss;", []>;
21190b57cec5SDimitry Andric
212006c3fb27SDimitry Andric  def IMOVB16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
212106c3fb27SDimitry Andric                           "mov.b16 \t$dst, $sss;", []>;
212206c3fb27SDimitry Andric  def IMOVB32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
212306c3fb27SDimitry Andric                           "mov.b32 \t$dst, $sss;", []>;
212406c3fb27SDimitry Andric  def IMOVB64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
212506c3fb27SDimitry Andric                           "mov.b64 \t$dst, $sss;", []>;
212606c3fb27SDimitry Andric
212706c3fb27SDimitry Andric  def FMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
21280b57cec5SDimitry Andric                           // We have to use .b16 here as there's no mov.f16.
21290b57cec5SDimitry Andric                           "mov.b16 \t$dst, $src;", []>;
21300b57cec5SDimitry Andric  def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
21310b57cec5SDimitry Andric                           "mov.f32 \t$dst, $src;", []>;
21320b57cec5SDimitry Andric  def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
21330b57cec5SDimitry Andric                           "mov.f64 \t$dst, $src;", []>;
21340b57cec5SDimitry Andric}
21350b57cec5SDimitry Andric
21360b57cec5SDimitry Andricdef IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
21370b57cec5SDimitry Andric                        "mov.pred \t$dst, $src;",
21380b57cec5SDimitry Andric                        [(set Int1Regs:$dst, imm:$src)]>;
21390b57cec5SDimitry Andricdef IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
21400b57cec5SDimitry Andric                         "mov.u16 \t$dst, $src;",
21410b57cec5SDimitry Andric                         [(set Int16Regs:$dst, imm:$src)]>;
21420b57cec5SDimitry Andricdef IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
21430b57cec5SDimitry Andric                         "mov.u32 \t$dst, $src;",
21445f757f3fSDimitry Andric                         [(set (i32 Int32Regs:$dst), imm:$src)]>;
2145bdd1243dSDimitry Andricdef IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
21460b57cec5SDimitry Andric                        "mov.u64 \t$dst, $src;",
21470b57cec5SDimitry Andric                        [(set Int64Regs:$dst, imm:$src)]>;
21480b57cec5SDimitry Andric
214906c3fb27SDimitry Andricdef IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
215006c3fb27SDimitry Andric                         "mov.b16 \t$dst, $src;", []>;
215106c3fb27SDimitry Andricdef IMOVB32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
215206c3fb27SDimitry Andric                         "mov.b32 \t$dst, $src;", []>;
215306c3fb27SDimitry Andricdef IMOVB64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
215406c3fb27SDimitry Andric                        "mov.b64 \t$dst, $src;", []>;
215506c3fb27SDimitry Andric
21560b57cec5SDimitry Andricdef FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
21570b57cec5SDimitry Andric                         "mov.f32 \t$dst, $src;",
21580b57cec5SDimitry Andric                         [(set Float32Regs:$dst, fpimm:$src)]>;
21590b57cec5SDimitry Andricdef FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
21600b57cec5SDimitry Andric                         "mov.f64 \t$dst, $src;",
21610b57cec5SDimitry Andric                         [(set Float64Regs:$dst, fpimm:$src)]>;
21620b57cec5SDimitry Andric
21630b57cec5SDimitry Andricdef : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
2164bdd1243dSDimitry Andricdef : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
21650b57cec5SDimitry Andric
21660b57cec5SDimitry Andric//---- Copy Frame Index ----
21670b57cec5SDimitry Andricdef LEA_ADDRi :   NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
21680b57cec5SDimitry Andric                            "add.u32 \t$dst, ${addr:add};",
21690b57cec5SDimitry Andric                            [(set Int32Regs:$dst, ADDRri:$addr)]>;
21700b57cec5SDimitry Andricdef LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
21710b57cec5SDimitry Andric                            "add.u64 \t$dst, ${addr:add};",
21720b57cec5SDimitry Andric                            [(set Int64Regs:$dst, ADDRri64:$addr)]>;
21730b57cec5SDimitry Andric
21740b57cec5SDimitry Andric//-----------------------------------
21750b57cec5SDimitry Andric// Comparison and Selection
21760b57cec5SDimitry Andric//-----------------------------------
21770b57cec5SDimitry Andric
21780b57cec5SDimitry Andricmulticlass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
21790b57cec5SDimitry Andric                       Instruction setp_16rr,
21800b57cec5SDimitry Andric                       Instruction setp_16ri,
21810b57cec5SDimitry Andric                       Instruction setp_16ir,
21820b57cec5SDimitry Andric                       Instruction setp_32rr,
21830b57cec5SDimitry Andric                       Instruction setp_32ri,
21840b57cec5SDimitry Andric                       Instruction setp_32ir,
21850b57cec5SDimitry Andric                       Instruction setp_64rr,
21860b57cec5SDimitry Andric                       Instruction setp_64ri,
21870b57cec5SDimitry Andric                       Instruction setp_64ir,
21880b57cec5SDimitry Andric                       Instruction set_16rr,
21890b57cec5SDimitry Andric                       Instruction set_16ri,
21900b57cec5SDimitry Andric                       Instruction set_16ir,
21910b57cec5SDimitry Andric                       Instruction set_32rr,
21920b57cec5SDimitry Andric                       Instruction set_32ri,
21930b57cec5SDimitry Andric                       Instruction set_32ir,
21940b57cec5SDimitry Andric                       Instruction set_64rr,
21950b57cec5SDimitry Andric                       Instruction set_64ri,
21960b57cec5SDimitry Andric                       Instruction set_64ir> {
21970b57cec5SDimitry Andric  // i16 -> pred
219806c3fb27SDimitry Andric  def : Pat<(i1 (OpNode i16:$a, i16:$b)),
21990b57cec5SDimitry Andric            (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
22000b57cec5SDimitry Andric  def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)),
22010b57cec5SDimitry Andric            (setp_16ri Int16Regs:$a, imm:$b, Mode)>;
22020b57cec5SDimitry Andric  def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)),
22030b57cec5SDimitry Andric            (setp_16ir imm:$a, Int16Regs:$b, Mode)>;
22040b57cec5SDimitry Andric  // i32 -> pred
220506c3fb27SDimitry Andric  def : Pat<(i1 (OpNode i32:$a, i32:$b)),
22060b57cec5SDimitry Andric            (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
22075f757f3fSDimitry Andric  def : Pat<(i1 (OpNode (i32 Int32Regs:$a), imm:$b)),
22080b57cec5SDimitry Andric            (setp_32ri Int32Regs:$a, imm:$b, Mode)>;
22095f757f3fSDimitry Andric  def : Pat<(i1 (OpNode imm:$a, (i32 Int32Regs:$b))),
22100b57cec5SDimitry Andric            (setp_32ir imm:$a, Int32Regs:$b, Mode)>;
22110b57cec5SDimitry Andric  // i64 -> pred
22120b57cec5SDimitry Andric  def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)),
22130b57cec5SDimitry Andric            (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
22140b57cec5SDimitry Andric  def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)),
22150b57cec5SDimitry Andric            (setp_64ri Int64Regs:$a, imm:$b, Mode)>;
22160b57cec5SDimitry Andric  def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)),
22170b57cec5SDimitry Andric            (setp_64ir imm:$a, Int64Regs:$b, Mode)>;
22180b57cec5SDimitry Andric
22190b57cec5SDimitry Andric  // i16 -> i32
222006c3fb27SDimitry Andric  def : Pat<(i32 (OpNode i16:$a, i16:$b)),
22210b57cec5SDimitry Andric            (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
22220b57cec5SDimitry Andric  def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)),
22230b57cec5SDimitry Andric            (set_16ri Int16Regs:$a, imm:$b, Mode)>;
22240b57cec5SDimitry Andric  def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)),
22250b57cec5SDimitry Andric            (set_16ir imm:$a, Int16Regs:$b, Mode)>;
22260b57cec5SDimitry Andric  // i32 -> i32
222706c3fb27SDimitry Andric  def : Pat<(i32 (OpNode i32:$a, i32:$b)),
22280b57cec5SDimitry Andric            (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
22295f757f3fSDimitry Andric  def : Pat<(i32 (OpNode (i32 Int32Regs:$a), imm:$b)),
22300b57cec5SDimitry Andric            (set_32ri Int32Regs:$a, imm:$b, Mode)>;
22315f757f3fSDimitry Andric  def : Pat<(i32 (OpNode imm:$a, (i32 Int32Regs:$b))),
22320b57cec5SDimitry Andric            (set_32ir imm:$a, Int32Regs:$b, Mode)>;
22330b57cec5SDimitry Andric  // i64 -> i32
22340b57cec5SDimitry Andric  def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)),
22350b57cec5SDimitry Andric            (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
22360b57cec5SDimitry Andric  def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)),
22370b57cec5SDimitry Andric            (set_64ri Int64Regs:$a, imm:$b, Mode)>;
22380b57cec5SDimitry Andric  def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)),
22390b57cec5SDimitry Andric            (set_64ir imm:$a, Int64Regs:$b, Mode)>;
22400b57cec5SDimitry Andric}
22410b57cec5SDimitry Andric
22420b57cec5SDimitry Andricmulticlass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
22430b57cec5SDimitry Andric  : ISET_FORMAT<OpNode, Mode,
22440b57cec5SDimitry Andric                SETP_s16rr, SETP_s16ri, SETP_s16ir,
22450b57cec5SDimitry Andric                SETP_s32rr, SETP_s32ri, SETP_s32ir,
22460b57cec5SDimitry Andric                SETP_s64rr, SETP_s64ri, SETP_s64ir,
22470b57cec5SDimitry Andric                SET_s16rr, SET_s16ri, SET_s16ir,
22480b57cec5SDimitry Andric                SET_s32rr, SET_s32ri, SET_s32ir,
22490b57cec5SDimitry Andric                SET_s64rr, SET_s64ri, SET_s64ir> {
22500b57cec5SDimitry Andric  // TableGen doesn't like empty multiclasses.
22510b57cec5SDimitry Andric  def : PatLeaf<(i32 0)>;
22520b57cec5SDimitry Andric}
22530b57cec5SDimitry Andric
22540b57cec5SDimitry Andricmulticlass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode>
22550b57cec5SDimitry Andric  : ISET_FORMAT<OpNode, Mode,
22560b57cec5SDimitry Andric                SETP_u16rr, SETP_u16ri, SETP_u16ir,
22570b57cec5SDimitry Andric                SETP_u32rr, SETP_u32ri, SETP_u32ir,
22580b57cec5SDimitry Andric                SETP_u64rr, SETP_u64ri, SETP_u64ir,
22590b57cec5SDimitry Andric                SET_u16rr, SET_u16ri, SET_u16ir,
22600b57cec5SDimitry Andric                SET_u32rr, SET_u32ri, SET_u32ir,
22610b57cec5SDimitry Andric                SET_u64rr, SET_u64ri, SET_u64ir> {
22620b57cec5SDimitry Andric  // TableGen doesn't like empty multiclasses.
22630b57cec5SDimitry Andric  def : PatLeaf<(i32 0)>;
22640b57cec5SDimitry Andric}
22650b57cec5SDimitry Andric
22660b57cec5SDimitry Andricdefm : ISET_FORMAT_SIGNED<setgt, CmpGT>;
22670b57cec5SDimitry Andricdefm : ISET_FORMAT_SIGNED<setlt, CmpLT>;
22680b57cec5SDimitry Andricdefm : ISET_FORMAT_SIGNED<setge, CmpGE>;
22690b57cec5SDimitry Andricdefm : ISET_FORMAT_SIGNED<setle, CmpLE>;
22700b57cec5SDimitry Andricdefm : ISET_FORMAT_SIGNED<seteq, CmpEQ>;
22710b57cec5SDimitry Andricdefm : ISET_FORMAT_SIGNED<setne, CmpNE>;
22720b57cec5SDimitry Andricdefm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>;
22730b57cec5SDimitry Andricdefm : ISET_FORMAT_UNSIGNED<setult, CmpLT>;
22740b57cec5SDimitry Andricdefm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>;
22750b57cec5SDimitry Andricdefm : ISET_FORMAT_UNSIGNED<setule, CmpLE>;
22760b57cec5SDimitry Andricdefm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>;
22770b57cec5SDimitry Andricdefm : ISET_FORMAT_UNSIGNED<setune, CmpNE>;
22780b57cec5SDimitry Andric
22790b57cec5SDimitry Andric// i1 compares
22800b57cec5SDimitry Andricdef : Pat<(setne Int1Regs:$a, Int1Regs:$b),
22810b57cec5SDimitry Andric          (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
22820b57cec5SDimitry Andricdef : Pat<(setune Int1Regs:$a, Int1Regs:$b),
22830b57cec5SDimitry Andric          (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
22840b57cec5SDimitry Andric
22850b57cec5SDimitry Andricdef : Pat<(seteq Int1Regs:$a, Int1Regs:$b),
22860b57cec5SDimitry Andric          (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
22870b57cec5SDimitry Andricdef : Pat<(setueq Int1Regs:$a, Int1Regs:$b),
22880b57cec5SDimitry Andric          (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
22890b57cec5SDimitry Andric
22905f757f3fSDimitry Andric// comparisons of i8 extracted with BFE as i32
2291*0fca6ea1SDimitry Andric// It's faster to do comparison directly on i32 extracted by BFE,
2292*0fca6ea1SDimitry Andric// instead of the long conversion and sign extending.
2293*0fca6ea1SDimitry Andricdef: Pat<(setgt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2294*0fca6ea1SDimitry Andric                (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2295*0fca6ea1SDimitry Andric         (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGT)>;
2296*0fca6ea1SDimitry Andricdef: Pat<(setgt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2297*0fca6ea1SDimitry Andric                (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2298*0fca6ea1SDimitry Andric         (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGT)>;
2299*0fca6ea1SDimitry Andricdef: Pat<(setge (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2300*0fca6ea1SDimitry Andric                (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2301*0fca6ea1SDimitry Andric         (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGE)>;
2302*0fca6ea1SDimitry Andricdef: Pat<(setge (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2303*0fca6ea1SDimitry Andric                (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2304*0fca6ea1SDimitry Andric         (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGE)>;
2305*0fca6ea1SDimitry Andricdef: Pat<(setlt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2306*0fca6ea1SDimitry Andric                (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2307*0fca6ea1SDimitry Andric         (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLT)>;
2308*0fca6ea1SDimitry Andricdef: Pat<(setlt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2309*0fca6ea1SDimitry Andric                (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2310*0fca6ea1SDimitry Andric         (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLT)>;
2311*0fca6ea1SDimitry Andricdef: Pat<(setle (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2312*0fca6ea1SDimitry Andric                (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2313*0fca6ea1SDimitry Andric         (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLE)>;
2314*0fca6ea1SDimitry Andricdef: Pat<(setle (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2315*0fca6ea1SDimitry Andric                (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2316*0fca6ea1SDimitry Andric         (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLE)>;
23175f757f3fSDimitry Andric
2318*0fca6ea1SDimitry Andricdef: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2319*0fca6ea1SDimitry Andric                 (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2320*0fca6ea1SDimitry Andric         (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHI)>;
2321*0fca6ea1SDimitry Andricdef: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2322*0fca6ea1SDimitry Andric                 (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2323*0fca6ea1SDimitry Andric         (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHI)>;
2324*0fca6ea1SDimitry Andricdef: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2325*0fca6ea1SDimitry Andric                 (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2326*0fca6ea1SDimitry Andric         (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHS)>;
2327*0fca6ea1SDimitry Andricdef: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2328*0fca6ea1SDimitry Andric                 (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2329*0fca6ea1SDimitry Andric         (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHS)>;
2330*0fca6ea1SDimitry Andricdef: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2331*0fca6ea1SDimitry Andric                 (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2332*0fca6ea1SDimitry Andric         (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLO)>;
2333*0fca6ea1SDimitry Andricdef: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2334*0fca6ea1SDimitry Andric                 (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2335*0fca6ea1SDimitry Andric         (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLO)>;
2336*0fca6ea1SDimitry Andricdef: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2337*0fca6ea1SDimitry Andric                 (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2338*0fca6ea1SDimitry Andric         (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLS)>;
2339*0fca6ea1SDimitry Andricdef: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2340*0fca6ea1SDimitry Andric                 (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2341*0fca6ea1SDimitry Andric         (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLS)>;
2342*0fca6ea1SDimitry Andricdef: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2343*0fca6ea1SDimitry Andric                 (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2344*0fca6ea1SDimitry Andric         (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpEQ)>;
2345*0fca6ea1SDimitry Andricdef: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2346*0fca6ea1SDimitry Andric                 (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2347*0fca6ea1SDimitry Andric         (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpEQ)>;
2348*0fca6ea1SDimitry Andricdef: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2349*0fca6ea1SDimitry Andric                 (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2350*0fca6ea1SDimitry Andric         (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpNE)>;
2351*0fca6ea1SDimitry Andricdef: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2352*0fca6ea1SDimitry Andric                 (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2353*0fca6ea1SDimitry Andric         (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpNE)>;
23545f757f3fSDimitry Andric
23550b57cec5SDimitry Andric// i1 compare -> i32
23560b57cec5SDimitry Andricdef : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
23570b57cec5SDimitry Andric          (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
23580b57cec5SDimitry Andricdef : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
23590b57cec5SDimitry Andric          (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
23600b57cec5SDimitry Andric
23610b57cec5SDimitry Andric
23620b57cec5SDimitry Andric
23630b57cec5SDimitry Andricmulticlass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
23640b57cec5SDimitry Andric  // f16 -> pred
236506c3fb27SDimitry Andric  def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
236606c3fb27SDimitry Andric            (SETP_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
23670b57cec5SDimitry Andric        Requires<[useFP16Math,doF32FTZ]>;
236806c3fb27SDimitry Andric  def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
236906c3fb27SDimitry Andric            (SETP_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
23700b57cec5SDimitry Andric        Requires<[useFP16Math]>;
237106c3fb27SDimitry Andric  def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)),
237206c3fb27SDimitry Andric            (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
23730b57cec5SDimitry Andric        Requires<[useFP16Math,doF32FTZ]>;
237406c3fb27SDimitry Andric  def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)),
237506c3fb27SDimitry Andric            (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
23760b57cec5SDimitry Andric        Requires<[useFP16Math]>;
237706c3fb27SDimitry Andric  def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))),
237806c3fb27SDimitry Andric            (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
23790b57cec5SDimitry Andric        Requires<[useFP16Math,doF32FTZ]>;
238006c3fb27SDimitry Andric  def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))),
238106c3fb27SDimitry Andric            (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
23820b57cec5SDimitry Andric        Requires<[useFP16Math]>;
23830b57cec5SDimitry Andric
238406c3fb27SDimitry Andric  // bf16 -> pred
238506c3fb27SDimitry Andric  def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))),
238606c3fb27SDimitry Andric            (SETP_bf16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
238706c3fb27SDimitry Andric        Requires<[hasBF16Math,doF32FTZ]>;
238806c3fb27SDimitry Andric  def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))),
238906c3fb27SDimitry Andric            (SETP_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
239006c3fb27SDimitry Andric        Requires<[hasBF16Math]>;
239106c3fb27SDimitry Andric  def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), fpimm:$b)),
239206c3fb27SDimitry Andric            (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>,
239306c3fb27SDimitry Andric        Requires<[hasBF16Math,doF32FTZ]>;
239406c3fb27SDimitry Andric  def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), fpimm:$b)),
239506c3fb27SDimitry Andric            (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>,
239606c3fb27SDimitry Andric        Requires<[hasBF16Math]>;
239706c3fb27SDimitry Andric  def : Pat<(i1 (OpNode fpimm:$a, (bf16 Int16Regs:$b))),
239806c3fb27SDimitry Andric            (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
239906c3fb27SDimitry Andric        Requires<[hasBF16Math,doF32FTZ]>;
240006c3fb27SDimitry Andric  def : Pat<(i1 (OpNode fpimm:$a, (bf16 Int16Regs:$b))),
240106c3fb27SDimitry Andric            (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>,
240206c3fb27SDimitry Andric        Requires<[hasBF16Math]>;
240306c3fb27SDimitry Andric
24040b57cec5SDimitry Andric  // f32 -> pred
24050b57cec5SDimitry Andric  def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
24060b57cec5SDimitry Andric            (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
24070b57cec5SDimitry Andric        Requires<[doF32FTZ]>;
24080b57cec5SDimitry Andric  def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
24090b57cec5SDimitry Andric            (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
24100b57cec5SDimitry Andric  def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)),
24110b57cec5SDimitry Andric            (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
24120b57cec5SDimitry Andric        Requires<[doF32FTZ]>;
24130b57cec5SDimitry Andric  def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)),
24140b57cec5SDimitry Andric            (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
24150b57cec5SDimitry Andric  def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)),
24160b57cec5SDimitry Andric            (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
24170b57cec5SDimitry Andric        Requires<[doF32FTZ]>;
24180b57cec5SDimitry Andric  def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)),
24190b57cec5SDimitry Andric            (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
24200b57cec5SDimitry Andric
24210b57cec5SDimitry Andric  // f64 -> pred
24220b57cec5SDimitry Andric  def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)),
24230b57cec5SDimitry Andric            (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
24240b57cec5SDimitry Andric  def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)),
24250b57cec5SDimitry Andric            (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
24260b57cec5SDimitry Andric  def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)),
24270b57cec5SDimitry Andric            (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
24280b57cec5SDimitry Andric
24290b57cec5SDimitry Andric  // f16 -> i32
243006c3fb27SDimitry Andric  def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
243106c3fb27SDimitry Andric            (SET_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
24320b57cec5SDimitry Andric        Requires<[useFP16Math, doF32FTZ]>;
243306c3fb27SDimitry Andric  def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
243406c3fb27SDimitry Andric            (SET_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
24350b57cec5SDimitry Andric        Requires<[useFP16Math]>;
243606c3fb27SDimitry Andric  def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)),
243706c3fb27SDimitry Andric            (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
24380b57cec5SDimitry Andric        Requires<[useFP16Math, doF32FTZ]>;
243906c3fb27SDimitry Andric  def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)),
244006c3fb27SDimitry Andric            (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
24410b57cec5SDimitry Andric        Requires<[useFP16Math]>;
244206c3fb27SDimitry Andric  def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))),
244306c3fb27SDimitry Andric            (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
24440b57cec5SDimitry Andric        Requires<[useFP16Math, doF32FTZ]>;
244506c3fb27SDimitry Andric  def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))),
244606c3fb27SDimitry Andric            (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
24470b57cec5SDimitry Andric        Requires<[useFP16Math]>;
24480b57cec5SDimitry Andric
244906c3fb27SDimitry Andric  // bf16 -> i32
245006c3fb27SDimitry Andric  def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))),
245106c3fb27SDimitry Andric            (SET_bf16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
245206c3fb27SDimitry Andric        Requires<[hasBF16Math, doF32FTZ]>;
245306c3fb27SDimitry Andric  def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))),
245406c3fb27SDimitry Andric            (SET_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
245506c3fb27SDimitry Andric        Requires<[hasBF16Math]>;
245606c3fb27SDimitry Andric  def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), fpimm:$b)),
245706c3fb27SDimitry Andric            (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>,
245806c3fb27SDimitry Andric        Requires<[hasBF16Math, doF32FTZ]>;
245906c3fb27SDimitry Andric  def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), fpimm:$b)),
246006c3fb27SDimitry Andric            (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>,
246106c3fb27SDimitry Andric        Requires<[hasBF16Math]>;
246206c3fb27SDimitry Andric  def : Pat<(i32 (OpNode fpimm:$a, (bf16 Int16Regs:$b))),
246306c3fb27SDimitry Andric            (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
246406c3fb27SDimitry Andric        Requires<[hasBF16Math, doF32FTZ]>;
246506c3fb27SDimitry Andric  def : Pat<(i32 (OpNode fpimm:$a, (bf16 Int16Regs:$b))),
246606c3fb27SDimitry Andric            (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>,
246706c3fb27SDimitry Andric        Requires<[hasBF16Math]>;
246806c3fb27SDimitry Andric
24690b57cec5SDimitry Andric  // f32 -> i32
24700b57cec5SDimitry Andric  def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
24710b57cec5SDimitry Andric            (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
24720b57cec5SDimitry Andric        Requires<[doF32FTZ]>;
24730b57cec5SDimitry Andric  def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
24740b57cec5SDimitry Andric            (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
24750b57cec5SDimitry Andric  def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)),
24760b57cec5SDimitry Andric            (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
24770b57cec5SDimitry Andric        Requires<[doF32FTZ]>;
24780b57cec5SDimitry Andric  def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)),
24790b57cec5SDimitry Andric            (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
24800b57cec5SDimitry Andric  def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)),
24810b57cec5SDimitry Andric            (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
24820b57cec5SDimitry Andric        Requires<[doF32FTZ]>;
24830b57cec5SDimitry Andric  def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)),
24840b57cec5SDimitry Andric            (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
24850b57cec5SDimitry Andric
24860b57cec5SDimitry Andric  // f64 -> i32
24870b57cec5SDimitry Andric  def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)),
24880b57cec5SDimitry Andric            (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
24890b57cec5SDimitry Andric  def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)),
24900b57cec5SDimitry Andric            (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
24910b57cec5SDimitry Andric  def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)),
24920b57cec5SDimitry Andric            (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
24930b57cec5SDimitry Andric}
24940b57cec5SDimitry Andric
24950b57cec5SDimitry Andricdefm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
24960b57cec5SDimitry Andricdefm FSetOLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
24970b57cec5SDimitry Andricdefm FSetOGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
24980b57cec5SDimitry Andricdefm FSetOLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
24990b57cec5SDimitry Andricdefm FSetOEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
25000b57cec5SDimitry Andricdefm FSetONE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
25010b57cec5SDimitry Andric
25020b57cec5SDimitry Andricdefm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>;
25030b57cec5SDimitry Andricdefm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>;
25040b57cec5SDimitry Andricdefm FSetUGE : FSET_FORMAT<setuge, CmpGEU, CmpGEU_FTZ>;
25050b57cec5SDimitry Andricdefm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>;
25060b57cec5SDimitry Andricdefm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>;
25070b57cec5SDimitry Andricdefm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>;
25080b57cec5SDimitry Andric
25090b57cec5SDimitry Andricdefm FSetGT : FSET_FORMAT<setgt, CmpGT, CmpGT_FTZ>;
25100b57cec5SDimitry Andricdefm FSetLT : FSET_FORMAT<setlt, CmpLT, CmpLT_FTZ>;
25110b57cec5SDimitry Andricdefm FSetGE : FSET_FORMAT<setge, CmpGE, CmpGE_FTZ>;
25120b57cec5SDimitry Andricdefm FSetLE : FSET_FORMAT<setle, CmpLE, CmpLE_FTZ>;
25130b57cec5SDimitry Andricdefm FSetEQ : FSET_FORMAT<seteq, CmpEQ, CmpEQ_FTZ>;
25140b57cec5SDimitry Andricdefm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>;
25150b57cec5SDimitry Andric
25160b57cec5SDimitry Andricdefm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
25170b57cec5SDimitry Andricdefm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
25180b57cec5SDimitry Andric
25190b57cec5SDimitry Andricdef SDTDeclareParamProfile :
25200b57cec5SDimitry Andric  SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
25210b57cec5SDimitry Andricdef SDTDeclareScalarParamProfile :
25220b57cec5SDimitry Andric  SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
25230b57cec5SDimitry Andricdef SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
25240b57cec5SDimitry Andricdef SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
25250b57cec5SDimitry Andricdef SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
25260b57cec5SDimitry Andricdef SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
25270b57cec5SDimitry Andricdef SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
25280b57cec5SDimitry Andricdef SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
25290b57cec5SDimitry Andricdef SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
25300b57cec5SDimitry Andricdef SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
25310b57cec5SDimitry Andricdef SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
25320b57cec5SDimitry Andricdef SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
25330b57cec5SDimitry Andricdef SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
25340b57cec5SDimitry Andricdef SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
25350b57cec5SDimitry Andricdef SDTCallValProfile : SDTypeProfile<1, 0, []>;
25360b57cec5SDimitry Andricdef SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
25370b57cec5SDimitry Andricdef SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
25380b57cec5SDimitry Andricdef SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
25390b57cec5SDimitry Andricdef SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
25400b57cec5SDimitry Andricdef SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
25410b57cec5SDimitry Andricdef SDTProxyRegProfile : SDTypeProfile<1, 1, []>;
25420b57cec5SDimitry Andric
25430b57cec5SDimitry Andricdef DeclareParam :
25440b57cec5SDimitry Andric  SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
25450b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25460b57cec5SDimitry Andricdef DeclareScalarParam :
25470b57cec5SDimitry Andric  SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParamProfile,
25480b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25490b57cec5SDimitry Andricdef DeclareRetParam :
25500b57cec5SDimitry Andric  SDNode<"NVPTXISD::DeclareRetParam", SDTDeclareParamProfile,
25510b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25520b57cec5SDimitry Andricdef DeclareRet :
25530b57cec5SDimitry Andric  SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
25540b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25550b57cec5SDimitry Andricdef LoadParam :
25560b57cec5SDimitry Andric  SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
25570b57cec5SDimitry Andric         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
25580b57cec5SDimitry Andricdef LoadParamV2 :
25590b57cec5SDimitry Andric  SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile,
25600b57cec5SDimitry Andric         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
25610b57cec5SDimitry Andricdef LoadParamV4 :
25620b57cec5SDimitry Andric  SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
25630b57cec5SDimitry Andric         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
25640b57cec5SDimitry Andricdef PrintCall :
25650b57cec5SDimitry Andric  SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
25660b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25670b57cec5SDimitry Andricdef PrintConvergentCall :
25680b57cec5SDimitry Andric  SDNode<"NVPTXISD::PrintConvergentCall", SDTPrintCallProfile,
25690b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25700b57cec5SDimitry Andricdef PrintCallUni :
25710b57cec5SDimitry Andric  SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
25720b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25730b57cec5SDimitry Andricdef PrintConvergentCallUni :
25740b57cec5SDimitry Andric  SDNode<"NVPTXISD::PrintConvergentCallUni", SDTPrintCallUniProfile,
25750b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25760b57cec5SDimitry Andricdef StoreParam :
25770b57cec5SDimitry Andric  SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
25780b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25790b57cec5SDimitry Andricdef StoreParamV2 :
25800b57cec5SDimitry Andric  SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile,
25810b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25820b57cec5SDimitry Andricdef StoreParamV4 :
25830b57cec5SDimitry Andric  SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
25840b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25850b57cec5SDimitry Andricdef StoreParamU32 :
25860b57cec5SDimitry Andric  SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
25870b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25880b57cec5SDimitry Andricdef StoreParamS32 :
25890b57cec5SDimitry Andric  SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
25900b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25910b57cec5SDimitry Andricdef CallArgBegin :
25920b57cec5SDimitry Andric  SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
25930b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25940b57cec5SDimitry Andricdef CallArg :
25950b57cec5SDimitry Andric  SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
25960b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
25970b57cec5SDimitry Andricdef LastCallArg :
25980b57cec5SDimitry Andric  SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
25990b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
26000b57cec5SDimitry Andricdef CallArgEnd :
26010b57cec5SDimitry Andric  SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
26020b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
26030b57cec5SDimitry Andricdef CallVoid :
26040b57cec5SDimitry Andric  SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
26050b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
26060b57cec5SDimitry Andricdef Prototype :
26070b57cec5SDimitry Andric  SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
26080b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
26090b57cec5SDimitry Andricdef CallVal :
26100b57cec5SDimitry Andric  SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
26110b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
26120b57cec5SDimitry Andricdef MoveParam :
26130b57cec5SDimitry Andric  SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
26140b57cec5SDimitry Andricdef StoreRetval :
26150b57cec5SDimitry Andric  SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
26160b57cec5SDimitry Andric         [SDNPHasChain, SDNPSideEffect]>;
26170b57cec5SDimitry Andricdef StoreRetvalV2 :
26180b57cec5SDimitry Andric  SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile,
26190b57cec5SDimitry Andric         [SDNPHasChain, SDNPSideEffect]>;
26200b57cec5SDimitry Andricdef StoreRetvalV4 :
26210b57cec5SDimitry Andric  SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
26220b57cec5SDimitry Andric         [SDNPHasChain, SDNPSideEffect]>;
26230b57cec5SDimitry Andricdef PseudoUseParam :
26240b57cec5SDimitry Andric  SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile,
26250b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
26260b57cec5SDimitry Andricdef RETURNNode :
26270b57cec5SDimitry Andric  SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
26280b57cec5SDimitry Andric         [SDNPHasChain, SDNPSideEffect]>;
26290b57cec5SDimitry Andricdef ProxyReg :
26300b57cec5SDimitry Andric  SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
26310b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
26320b57cec5SDimitry Andric
2633e8d8bef9SDimitry Andriclet mayLoad = true in {
26340b57cec5SDimitry Andric  class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
26350b57cec5SDimitry Andric        NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
26360b57cec5SDimitry Andric                  !strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"),
26370b57cec5SDimitry Andric                  []>;
26380b57cec5SDimitry Andric
26390b57cec5SDimitry Andric  class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
26400b57cec5SDimitry Andric        NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
26410b57cec5SDimitry Andric                  !strconcat("ld.param.v2", opstr,
26420b57cec5SDimitry Andric                             " \t{{$dst, $dst2}}, [retval0+$b];"), []>;
26430b57cec5SDimitry Andric
26440b57cec5SDimitry Andric  class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
26450b57cec5SDimitry Andric        NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
26460b57cec5SDimitry Andric                        regclass:$dst4),
26470b57cec5SDimitry Andric                  (ins i32imm:$b),
26480b57cec5SDimitry Andric                  !strconcat("ld.param.v4", opstr,
26490b57cec5SDimitry Andric                             " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"),
26500b57cec5SDimitry Andric                  []>;
26510b57cec5SDimitry Andric}
26520b57cec5SDimitry Andric
26530b57cec5SDimitry Andricclass LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
26540b57cec5SDimitry Andric      NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
26550b57cec5SDimitry Andric                !strconcat("mov", opstr, " \t$dst, retval$b;"),
26560b57cec5SDimitry Andric                [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
26570b57cec5SDimitry Andric
2658e8d8bef9SDimitry Andriclet mayStore = true in {
26590b57cec5SDimitry Andric
2660*0fca6ea1SDimitry Andric  multiclass StoreParamInst<NVPTXRegClass regclass, Operand IMMType, string opstr, bit support_imm = true> {
2661*0fca6ea1SDimitry Andric    foreach op = [IMMType, regclass] in
2662*0fca6ea1SDimitry Andric      if !or(support_imm, !isa<NVPTXRegClass>(op)) then
2663*0fca6ea1SDimitry Andric        def _ # !if(!isa<NVPTXRegClass>(op), "r", "i")
2664*0fca6ea1SDimitry Andric          : NVPTXInst<(outs),
2665*0fca6ea1SDimitry Andric                      (ins op:$val, i32imm:$a, i32imm:$b),
2666*0fca6ea1SDimitry Andric                      "st.param" # opstr # " \t[param$a+$b], $val;",
2667*0fca6ea1SDimitry Andric                      []>;
2668*0fca6ea1SDimitry Andric  }
2669*0fca6ea1SDimitry Andric
2670*0fca6ea1SDimitry Andric  multiclass StoreParamV2Inst<NVPTXRegClass regclass, Operand IMMType, string opstr> {
2671*0fca6ea1SDimitry Andric    foreach op1 = [IMMType, regclass] in
2672*0fca6ea1SDimitry Andric      foreach op2 = [IMMType, regclass] in
2673*0fca6ea1SDimitry Andric        def _ # !if(!isa<NVPTXRegClass>(op1), "r", "i")
2674*0fca6ea1SDimitry Andric              # !if(!isa<NVPTXRegClass>(op2), "r", "i")
2675*0fca6ea1SDimitry Andric          : NVPTXInst<(outs),
2676*0fca6ea1SDimitry Andric                      (ins op1:$val1, op2:$val2,
26770b57cec5SDimitry Andric                           i32imm:$a, i32imm:$b),
2678*0fca6ea1SDimitry Andric                      "st.param.v2" # opstr # " \t[param$a+$b], {{$val1, $val2}};",
26790b57cec5SDimitry Andric                      []>;
2680*0fca6ea1SDimitry Andric  }
26810b57cec5SDimitry Andric
2682*0fca6ea1SDimitry Andric  multiclass StoreParamV4Inst<NVPTXRegClass regclass, Operand IMMType, string opstr> {
2683*0fca6ea1SDimitry Andric    foreach op1 = [IMMType, regclass] in
2684*0fca6ea1SDimitry Andric      foreach op2 = [IMMType, regclass] in
2685*0fca6ea1SDimitry Andric        foreach op3 = [IMMType, regclass] in
2686*0fca6ea1SDimitry Andric          foreach op4 = [IMMType, regclass] in
2687*0fca6ea1SDimitry Andric            def _ # !if(!isa<NVPTXRegClass>(op1), "r", "i")
2688*0fca6ea1SDimitry Andric                  # !if(!isa<NVPTXRegClass>(op2), "r", "i")
2689*0fca6ea1SDimitry Andric                  # !if(!isa<NVPTXRegClass>(op3), "r", "i")
2690*0fca6ea1SDimitry Andric                  # !if(!isa<NVPTXRegClass>(op4), "r", "i")
2691*0fca6ea1SDimitry Andric
2692*0fca6ea1SDimitry Andric              : NVPTXInst<(outs),
2693*0fca6ea1SDimitry Andric                          (ins op1:$val1, op2:$val2, op3:$val3, op4:$val4,
2694*0fca6ea1SDimitry Andric                               i32imm:$a, i32imm:$b),
2695*0fca6ea1SDimitry Andric                          "st.param.v4" # opstr #
2696*0fca6ea1SDimitry Andric                          " \t[param$a+$b], {{$val1, $val2, $val3, $val4}};",
26970b57cec5SDimitry Andric                          []>;
2698*0fca6ea1SDimitry Andric  }
26990b57cec5SDimitry Andric
27000b57cec5SDimitry Andric  class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
27010b57cec5SDimitry Andric        NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
27020b57cec5SDimitry Andric                  !strconcat("st.param", opstr, " \t[func_retval0+$a], $val;"),
27030b57cec5SDimitry Andric                  []>;
27040b57cec5SDimitry Andric
27050b57cec5SDimitry Andric  class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
27060b57cec5SDimitry Andric        NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
27070b57cec5SDimitry Andric                  !strconcat("st.param.v2", opstr,
27080b57cec5SDimitry Andric                             " \t[func_retval0+$a], {{$val, $val2}};"),
27090b57cec5SDimitry Andric                  []>;
27100b57cec5SDimitry Andric
27110b57cec5SDimitry Andric  class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
27120b57cec5SDimitry Andric        NVPTXInst<(outs),
27130b57cec5SDimitry Andric                  (ins regclass:$val, regclass:$val2, regclass:$val3,
27140b57cec5SDimitry Andric                       regclass:$val4, i32imm:$a),
27150b57cec5SDimitry Andric                  !strconcat("st.param.v4", opstr,
27160b57cec5SDimitry Andric                             " \t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
27170b57cec5SDimitry Andric                  []>;
27180b57cec5SDimitry Andric}
27190b57cec5SDimitry Andric
27200b57cec5SDimitry Andriclet isCall=1 in {
27210b57cec5SDimitry Andric  multiclass CALL<string OpcStr, SDNode OpNode> {
27220b57cec5SDimitry Andric     def PrintCallNoRetInst : NVPTXInst<(outs), (ins),
27230b57cec5SDimitry Andric       !strconcat(OpcStr, " "), [(OpNode (i32 0))]>;
27240b57cec5SDimitry Andric     def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
27250b57cec5SDimitry Andric       !strconcat(OpcStr, " (retval0), "), [(OpNode (i32 1))]>;
27260b57cec5SDimitry Andric     def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
27270b57cec5SDimitry Andric       !strconcat(OpcStr, " (retval0, retval1), "), [(OpNode (i32 2))]>;
27280b57cec5SDimitry Andric     def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
27290b57cec5SDimitry Andric       !strconcat(OpcStr, " (retval0, retval1, retval2), "), [(OpNode (i32 3))]>;
27300b57cec5SDimitry Andric     def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
27310b57cec5SDimitry Andric       !strconcat(OpcStr, " (retval0, retval1, retval2, retval3), "),
27320b57cec5SDimitry Andric       [(OpNode (i32 4))]>;
27330b57cec5SDimitry Andric     def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
27340b57cec5SDimitry Andric       !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4), "),
27350b57cec5SDimitry Andric       [(OpNode (i32 5))]>;
27360b57cec5SDimitry Andric     def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
27370b57cec5SDimitry Andric       !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, "
27380b57cec5SDimitry Andric                            "retval5), "),
27390b57cec5SDimitry Andric       [(OpNode (i32 6))]>;
27400b57cec5SDimitry Andric     def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
27410b57cec5SDimitry Andric       !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, "
27420b57cec5SDimitry Andric                            "retval5, retval6), "),
27430b57cec5SDimitry Andric       [(OpNode (i32 7))]>;
27440b57cec5SDimitry Andric     def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
27450b57cec5SDimitry Andric       !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, "
27460b57cec5SDimitry Andric                            "retval5, retval6, retval7), "),
27470b57cec5SDimitry Andric       [(OpNode (i32 8))]>;
27480b57cec5SDimitry Andric  }
27490b57cec5SDimitry Andric}
27500b57cec5SDimitry Andric
27510b57cec5SDimitry Andricdefm Call : CALL<"call", PrintCall>;
27520b57cec5SDimitry Andricdefm CallUni : CALL<"call.uni", PrintCallUni>;
27530b57cec5SDimitry Andric
27540b57cec5SDimitry Andric// Convergent call instructions.  These are identical to regular calls, except
27550b57cec5SDimitry Andric// they have the isConvergent bit set.
27560b57cec5SDimitry Andriclet isConvergent=1 in {
27570b57cec5SDimitry Andric  defm ConvergentCall : CALL<"call", PrintConvergentCall>;
27580b57cec5SDimitry Andric  defm ConvergentCallUni : CALL<"call.uni", PrintConvergentCallUni>;
27590b57cec5SDimitry Andric}
27600b57cec5SDimitry Andric
27610b57cec5SDimitry Andricdef LoadParamMemI64    : LoadParamMemInst<Int64Regs, ".b64">;
27620b57cec5SDimitry Andricdef LoadParamMemI32    : LoadParamMemInst<Int32Regs, ".b32">;
27630b57cec5SDimitry Andricdef LoadParamMemI16    : LoadParamMemInst<Int16Regs, ".b16">;
27640b57cec5SDimitry Andricdef LoadParamMemI8     : LoadParamMemInst<Int16Regs, ".b8">;
27650b57cec5SDimitry Andricdef LoadParamMemV2I64  : LoadParamV2MemInst<Int64Regs, ".b64">;
27660b57cec5SDimitry Andricdef LoadParamMemV2I32  : LoadParamV2MemInst<Int32Regs, ".b32">;
27670b57cec5SDimitry Andricdef LoadParamMemV2I16  : LoadParamV2MemInst<Int16Regs, ".b16">;
27680b57cec5SDimitry Andricdef LoadParamMemV2I8   : LoadParamV2MemInst<Int16Regs, ".b8">;
27690b57cec5SDimitry Andricdef LoadParamMemV4I32  : LoadParamV4MemInst<Int32Regs, ".b32">;
27700b57cec5SDimitry Andricdef LoadParamMemV4I16  : LoadParamV4MemInst<Int16Regs, ".b16">;
27710b57cec5SDimitry Andricdef LoadParamMemV4I8   : LoadParamV4MemInst<Int16Regs, ".b8">;
27720b57cec5SDimitry Andricdef LoadParamMemF32    : LoadParamMemInst<Float32Regs, ".f32">;
27730b57cec5SDimitry Andricdef LoadParamMemF64    : LoadParamMemInst<Float64Regs, ".f64">;
27740b57cec5SDimitry Andricdef LoadParamMemV2F32  : LoadParamV2MemInst<Float32Regs, ".f32">;
27750b57cec5SDimitry Andricdef LoadParamMemV2F64  : LoadParamV2MemInst<Float64Regs, ".f64">;
27760b57cec5SDimitry Andricdef LoadParamMemV4F32  : LoadParamV4MemInst<Float32Regs, ".f32">;
27770b57cec5SDimitry Andric
2778*0fca6ea1SDimitry Andricdefm StoreParamI64    : StoreParamInst<Int64Regs, i64imm, ".b64">;
2779*0fca6ea1SDimitry Andricdefm StoreParamI32    : StoreParamInst<Int32Regs, i32imm, ".b32">;
2780*0fca6ea1SDimitry Andricdefm StoreParamI16    : StoreParamInst<Int16Regs, i16imm, ".b16">;
2781*0fca6ea1SDimitry Andricdefm StoreParamI8     : StoreParamInst<Int16Regs, i8imm,  ".b8">;
27820b57cec5SDimitry Andric
2783*0fca6ea1SDimitry Andricdefm StoreParamI8TruncI32 : StoreParamInst<Int32Regs, i8imm, ".b8", /* support_imm */ false>;
2784*0fca6ea1SDimitry Andricdefm StoreParamI8TruncI64 : StoreParamInst<Int64Regs, i8imm, ".b8", /* support_imm */ false>;
27850b57cec5SDimitry Andric
2786*0fca6ea1SDimitry Andricdefm StoreParamV2I64  : StoreParamV2Inst<Int64Regs, i64imm, ".b64">;
2787*0fca6ea1SDimitry Andricdefm StoreParamV2I32  : StoreParamV2Inst<Int32Regs, i32imm, ".b32">;
2788*0fca6ea1SDimitry Andricdefm StoreParamV2I16  : StoreParamV2Inst<Int16Regs, i16imm, ".b16">;
2789*0fca6ea1SDimitry Andricdefm StoreParamV2I8   : StoreParamV2Inst<Int16Regs, i8imm,  ".b8">;
27900b57cec5SDimitry Andric
2791*0fca6ea1SDimitry Andricdefm StoreParamV4I32  : StoreParamV4Inst<Int32Regs, i32imm, ".b32">;
2792*0fca6ea1SDimitry Andricdefm StoreParamV4I16  : StoreParamV4Inst<Int16Regs, i16imm, ".b16">;
2793*0fca6ea1SDimitry Andricdefm StoreParamV4I8   : StoreParamV4Inst<Int16Regs, i8imm,  ".b8">;
2794*0fca6ea1SDimitry Andric
2795*0fca6ea1SDimitry Andricdefm StoreParamF32    : StoreParamInst<Float32Regs, f32imm, ".f32">;
2796*0fca6ea1SDimitry Andricdefm StoreParamF64    : StoreParamInst<Float64Regs, f64imm, ".f64">;
2797*0fca6ea1SDimitry Andric
2798*0fca6ea1SDimitry Andricdefm StoreParamV2F32  : StoreParamV2Inst<Float32Regs, f32imm, ".f32">;
2799*0fca6ea1SDimitry Andricdefm StoreParamV2F64  : StoreParamV2Inst<Float64Regs, f64imm, ".f64">;
2800*0fca6ea1SDimitry Andric
2801*0fca6ea1SDimitry Andricdefm StoreParamV4F32  : StoreParamV4Inst<Float32Regs, f32imm, ".f32">;
28020b57cec5SDimitry Andric
28030b57cec5SDimitry Andricdef StoreRetvalI64    : StoreRetvalInst<Int64Regs, ".b64">;
28040b57cec5SDimitry Andricdef StoreRetvalI32    : StoreRetvalInst<Int32Regs, ".b32">;
28050b57cec5SDimitry Andricdef StoreRetvalI16    : StoreRetvalInst<Int16Regs, ".b16">;
28060b57cec5SDimitry Andricdef StoreRetvalI8     : StoreRetvalInst<Int16Regs, ".b8">;
2807*0fca6ea1SDimitry Andricdef StoreRetvalI8TruncI32 : StoreRetvalInst<Int32Regs, ".b8">;
2808*0fca6ea1SDimitry Andricdef StoreRetvalI8TruncI64 : StoreRetvalInst<Int64Regs, ".b8">;
28090b57cec5SDimitry Andricdef StoreRetvalV2I64  : StoreRetvalV2Inst<Int64Regs, ".b64">;
28100b57cec5SDimitry Andricdef StoreRetvalV2I32  : StoreRetvalV2Inst<Int32Regs, ".b32">;
28110b57cec5SDimitry Andricdef StoreRetvalV2I16  : StoreRetvalV2Inst<Int16Regs, ".b16">;
28120b57cec5SDimitry Andricdef StoreRetvalV2I8   : StoreRetvalV2Inst<Int16Regs, ".b8">;
28130b57cec5SDimitry Andricdef StoreRetvalV4I32  : StoreRetvalV4Inst<Int32Regs, ".b32">;
28140b57cec5SDimitry Andricdef StoreRetvalV4I16  : StoreRetvalV4Inst<Int16Regs, ".b16">;
28150b57cec5SDimitry Andricdef StoreRetvalV4I8   : StoreRetvalV4Inst<Int16Regs, ".b8">;
28160b57cec5SDimitry Andric
28170b57cec5SDimitry Andricdef StoreRetvalF64    : StoreRetvalInst<Float64Regs, ".f64">;
28180b57cec5SDimitry Andricdef StoreRetvalF32    : StoreRetvalInst<Float32Regs, ".f32">;
28190b57cec5SDimitry Andricdef StoreRetvalV2F64  : StoreRetvalV2Inst<Float64Regs, ".f64">;
28200b57cec5SDimitry Andricdef StoreRetvalV2F32  : StoreRetvalV2Inst<Float32Regs, ".f32">;
28210b57cec5SDimitry Andricdef StoreRetvalV4F32  : StoreRetvalV4Inst<Float32Regs, ".f32">;
28220b57cec5SDimitry Andric
28230b57cec5SDimitry Andricdef CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
28240b57cec5SDimitry Andricdef CallArgEndInst1  : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
28250b57cec5SDimitry Andricdef CallArgEndInst0  : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
28260b57cec5SDimitry Andricdef RETURNInst       : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
28270b57cec5SDimitry Andric
28280b57cec5SDimitry Andricclass CallArgInst<NVPTXRegClass regclass> :
28290b57cec5SDimitry Andric  NVPTXInst<(outs), (ins regclass:$a), "$a, ",
28300b57cec5SDimitry Andric            [(CallArg (i32 0), regclass:$a)]>;
28310b57cec5SDimitry Andric
283206c3fb27SDimitry Andricclass CallArgInstVT<NVPTXRegClass regclass, ValueType vt> :
283306c3fb27SDimitry Andric  NVPTXInst<(outs), (ins regclass:$a), "$a, ",
283406c3fb27SDimitry Andric            [(CallArg (i32 0), vt:$a)]>;
283506c3fb27SDimitry Andric
28360b57cec5SDimitry Andricclass LastCallArgInst<NVPTXRegClass regclass> :
28370b57cec5SDimitry Andric  NVPTXInst<(outs), (ins regclass:$a), "$a",
28380b57cec5SDimitry Andric            [(LastCallArg (i32 0), regclass:$a)]>;
283906c3fb27SDimitry Andricclass LastCallArgInstVT<NVPTXRegClass regclass, ValueType vt> :
284006c3fb27SDimitry Andric  NVPTXInst<(outs), (ins regclass:$a), "$a",
284106c3fb27SDimitry Andric            [(LastCallArg (i32 0), vt:$a)]>;
28420b57cec5SDimitry Andric
28430b57cec5SDimitry Andricdef CallArgI64     : CallArgInst<Int64Regs>;
284406c3fb27SDimitry Andricdef CallArgI32     : CallArgInstVT<Int32Regs, i32>;
284506c3fb27SDimitry Andricdef CallArgI16     : CallArgInstVT<Int16Regs, i16>;
28460b57cec5SDimitry Andricdef CallArgF64     : CallArgInst<Float64Regs>;
28470b57cec5SDimitry Andricdef CallArgF32     : CallArgInst<Float32Regs>;
28480b57cec5SDimitry Andric
28490b57cec5SDimitry Andricdef LastCallArgI64 : LastCallArgInst<Int64Regs>;
285006c3fb27SDimitry Andricdef LastCallArgI32 : LastCallArgInstVT<Int32Regs, i32>;
285106c3fb27SDimitry Andricdef LastCallArgI16 : LastCallArgInstVT<Int16Regs, i16>;
28520b57cec5SDimitry Andricdef LastCallArgF64 : LastCallArgInst<Float64Regs>;
28530b57cec5SDimitry Andricdef LastCallArgF32 : LastCallArgInst<Float32Regs>;
28540b57cec5SDimitry Andric
28550b57cec5SDimitry Andricdef CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
28560b57cec5SDimitry Andric                              [(CallArg (i32 0), (i32 imm:$a))]>;
28570b57cec5SDimitry Andricdef LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
28580b57cec5SDimitry Andric                                  [(LastCallArg (i32 0), (i32 imm:$a))]>;
28590b57cec5SDimitry Andric
28600b57cec5SDimitry Andricdef CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
28610b57cec5SDimitry Andric                             [(CallArg (i32 1), (i32 imm:$a))]>;
28620b57cec5SDimitry Andricdef LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
28630b57cec5SDimitry Andric                                 [(LastCallArg (i32 1), (i32 imm:$a))]>;
28640b57cec5SDimitry Andric
28650b57cec5SDimitry Andricdef CallVoidInst :      NVPTXInst<(outs), (ins imem:$addr), "$addr, ",
28660b57cec5SDimitry Andric                                  [(CallVoid (Wrapper tglobaladdr:$addr))]>;
28670b57cec5SDimitry Andricdef CallVoidInstReg :   NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ",
286806c3fb27SDimitry Andric                                  [(CallVoid i32:$addr)]>;
28690b57cec5SDimitry Andricdef CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ",
28700b57cec5SDimitry Andric                                  [(CallVoid Int64Regs:$addr)]>;
28710b57cec5SDimitry Andricdef PrototypeInst :     NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;",
28720b57cec5SDimitry Andric                                  [(Prototype (i32 imm:$val))]>;
28730b57cec5SDimitry Andric
28740b57cec5SDimitry Andricdef DeclareRetMemInst :
28750b57cec5SDimitry Andric  NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size, i32imm:$num),
28760b57cec5SDimitry Andric            ".param .align $align .b8 retval$num[$size];",
28770b57cec5SDimitry Andric            [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
28780b57cec5SDimitry Andricdef DeclareRetScalarInst :
28790b57cec5SDimitry Andric  NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
28800b57cec5SDimitry Andric            ".param .b$size retval$num;",
28810b57cec5SDimitry Andric            [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
28820b57cec5SDimitry Andricdef DeclareRetRegInst :
28830b57cec5SDimitry Andric  NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
28840b57cec5SDimitry Andric            ".reg .b$size retval$num;",
28850b57cec5SDimitry Andric            [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
28860b57cec5SDimitry Andric
28870b57cec5SDimitry Andricdef DeclareParamInst :
28880b57cec5SDimitry Andric  NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size),
28890b57cec5SDimitry Andric            ".param .align $align .b8 param$a[$size];",
28900b57cec5SDimitry Andric            [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
28910b57cec5SDimitry Andricdef DeclareScalarParamInst :
28920b57cec5SDimitry Andric  NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
28930b57cec5SDimitry Andric            ".param .b$size param$a;",
28940b57cec5SDimitry Andric            [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
28950b57cec5SDimitry Andricdef DeclareScalarRegInst :
28960b57cec5SDimitry Andric  NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
28970b57cec5SDimitry Andric            ".reg .b$size param$a;",
28980b57cec5SDimitry Andric            [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
28990b57cec5SDimitry Andric
2900bdd1243dSDimitry Andricclass MoveParamInst<ValueType T, NVPTXRegClass regclass, string asmstr> :
29010b57cec5SDimitry Andric  NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
29020b57cec5SDimitry Andric            !strconcat("mov", asmstr, " \t$dst, $src;"),
2903bdd1243dSDimitry Andric            [(set (T regclass:$dst), (MoveParam (T regclass:$src)))]>;
29040b57cec5SDimitry Andric
290506c3fb27SDimitry Andricclass MoveParamSymbolInst<NVPTXRegClass regclass, Operand srcty, ValueType vt,
2906349cc55cSDimitry Andric                          string asmstr> :
2907349cc55cSDimitry Andric  NVPTXInst<(outs regclass:$dst), (ins srcty:$src),
2908349cc55cSDimitry Andric            !strconcat("mov", asmstr, " \t$dst, $src;"),
290906c3fb27SDimitry Andric            [(set vt:$dst, (MoveParam texternalsym:$src))]>;
2910349cc55cSDimitry Andric
2911bdd1243dSDimitry Andricdef MoveParamI64 : MoveParamInst<i64, Int64Regs, ".b64">;
2912bdd1243dSDimitry Andricdef MoveParamI32 : MoveParamInst<i32, Int32Regs, ".b32">;
2913349cc55cSDimitry Andric
291406c3fb27SDimitry Andricdef MoveParamSymbolI64 : MoveParamSymbolInst<Int64Regs, i64imm, i64, ".b64">;
291506c3fb27SDimitry Andricdef MoveParamSymbolI32 : MoveParamSymbolInst<Int32Regs, i32imm, i32, ".b32">;
2916349cc55cSDimitry Andric
29170b57cec5SDimitry Andricdef MoveParamI16 :
29180b57cec5SDimitry Andric  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
291906c3fb27SDimitry Andric            "cvt.u16.u32 \t$dst, $src;", // ??? Why cvt.u16.u32 ?
292006c3fb27SDimitry Andric            [(set i16:$dst, (MoveParam i16:$src))]>;
2921bdd1243dSDimitry Andricdef MoveParamF64 : MoveParamInst<f64, Float64Regs, ".f64">;
2922bdd1243dSDimitry Andricdef MoveParamF32 : MoveParamInst<f32, Float32Regs, ".f32">;
29230b57cec5SDimitry Andric
292406c3fb27SDimitry Andricclass PseudoUseParamInst<NVPTXRegClass regclass, ValueType vt> :
29250b57cec5SDimitry Andric  NVPTXInst<(outs), (ins regclass:$src),
29260b57cec5SDimitry Andric            "// Pseudo use of $src",
292706c3fb27SDimitry Andric            [(PseudoUseParam vt:$src)]>;
29280b57cec5SDimitry Andric
292906c3fb27SDimitry Andricdef PseudoUseParamI64 : PseudoUseParamInst<Int64Regs, i64>;
293006c3fb27SDimitry Andricdef PseudoUseParamI32 : PseudoUseParamInst<Int32Regs, i32>;
293106c3fb27SDimitry Andricdef PseudoUseParamI16 : PseudoUseParamInst<Int16Regs, i16>;
293206c3fb27SDimitry Andricdef PseudoUseParamF64 : PseudoUseParamInst<Float64Regs, f64>;
293306c3fb27SDimitry Andricdef PseudoUseParamF32 : PseudoUseParamInst<Float32Regs, f32>;
29340b57cec5SDimitry Andric
2935bdd1243dSDimitry Andricclass ProxyRegInst<string SzStr, ValueType T, NVPTXRegClass regclass> :
29360b57cec5SDimitry Andric  NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
29370b57cec5SDimitry Andric            !strconcat("mov.", SzStr, " \t$dst, $src;"),
2938bdd1243dSDimitry Andric            [(set (T regclass:$dst), (ProxyReg (T regclass:$src)))]>;
29390b57cec5SDimitry Andric
2940bdd1243dSDimitry Andricdef ProxyRegI1    : ProxyRegInst<"pred", i1, Int1Regs>;
2941bdd1243dSDimitry Andricdef ProxyRegI16   : ProxyRegInst<"b16",  i16, Int16Regs>;
2942bdd1243dSDimitry Andricdef ProxyRegI32   : ProxyRegInst<"b32",  i32, Int32Regs>;
2943bdd1243dSDimitry Andricdef ProxyRegI64   : ProxyRegInst<"b64",  i64, Int64Regs>;
2944bdd1243dSDimitry Andricdef ProxyRegF32   : ProxyRegInst<"f32",  f32, Float32Regs>;
2945bdd1243dSDimitry Andricdef ProxyRegF64   : ProxyRegInst<"f64",  f64, Float64Regs>;
294606c3fb27SDimitry Andric
294706c3fb27SDimitry Andricforeach vt = [f16, bf16] in {
294806c3fb27SDimitry Andric  def: Pat<(vt (ProxyReg  vt:$src)), (ProxyRegI16 Int16Regs:$src)>;
294906c3fb27SDimitry Andric}
295006c3fb27SDimitry Andric
29515f757f3fSDimitry Andricforeach vt = [v2f16, v2bf16, v2i16, v4i8] in {
295206c3fb27SDimitry Andric  def: Pat<(vt (ProxyReg  vt:$src)), (ProxyRegI32 Int32Regs:$src)>;
29530b57cec5SDimitry Andric}
29540b57cec5SDimitry Andric
29550b57cec5SDimitry Andric//
29560b57cec5SDimitry Andric// Load / Store Handling
29570b57cec5SDimitry Andric//
29580b57cec5SDimitry Andricmulticlass LD<NVPTXRegClass regclass> {
29590b57cec5SDimitry Andric  def _avar : NVPTXInst<
29600b57cec5SDimitry Andric    (outs regclass:$dst),
29610b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
29620b57cec5SDimitry Andric         i32imm:$fromWidth, imem:$addr),
29630b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
29640b57cec5SDimitry Andric    "\t$dst, [$addr];", []>;
29650b57cec5SDimitry Andric  def _areg : NVPTXInst<
29660b57cec5SDimitry Andric    (outs regclass:$dst),
29670b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
29680b57cec5SDimitry Andric         i32imm:$fromWidth, Int32Regs:$addr),
29690b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
29700b57cec5SDimitry Andric    "\t$dst, [$addr];", []>;
29710b57cec5SDimitry Andric  def _areg_64 : NVPTXInst<
29720b57cec5SDimitry Andric    (outs regclass:$dst),
29730b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
29740b57cec5SDimitry Andric         i32imm:$fromWidth, Int64Regs:$addr),
29750b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
29760b57cec5SDimitry Andric    "\t$dst, [$addr];", []>;
29770b57cec5SDimitry Andric  def _ari : NVPTXInst<
29780b57cec5SDimitry Andric    (outs regclass:$dst),
29790b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
29800b57cec5SDimitry Andric         i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
29810b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
29820b57cec5SDimitry Andric    "\t$dst, [$addr+$offset];", []>;
29830b57cec5SDimitry Andric  def _ari_64 : NVPTXInst<
29840b57cec5SDimitry Andric    (outs regclass:$dst),
29850b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
29860b57cec5SDimitry Andric         LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
29870b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
29880b57cec5SDimitry Andric    "\t$dst, [$addr+$offset];", []>;
29890b57cec5SDimitry Andric  def _asi : NVPTXInst<
29900b57cec5SDimitry Andric    (outs regclass:$dst),
29910b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
29920b57cec5SDimitry Andric         LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
29930b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
29940b57cec5SDimitry Andric    "\t$dst, [$addr+$offset];", []>;
29950b57cec5SDimitry Andric}
29960b57cec5SDimitry Andric
29970b57cec5SDimitry Andriclet mayLoad=1, hasSideEffects=0 in {
29980b57cec5SDimitry Andric  defm LD_i8  : LD<Int16Regs>;
29990b57cec5SDimitry Andric  defm LD_i16 : LD<Int16Regs>;
30000b57cec5SDimitry Andric  defm LD_i32 : LD<Int32Regs>;
30010b57cec5SDimitry Andric  defm LD_i64 : LD<Int64Regs>;
30020b57cec5SDimitry Andric  defm LD_f32 : LD<Float32Regs>;
30030b57cec5SDimitry Andric  defm LD_f64 : LD<Float64Regs>;
30040b57cec5SDimitry Andric}
30050b57cec5SDimitry Andric
30060b57cec5SDimitry Andricmulticlass ST<NVPTXRegClass regclass> {
30070b57cec5SDimitry Andric  def _avar : NVPTXInst<
30080b57cec5SDimitry Andric    (outs),
30090b57cec5SDimitry Andric    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
30100b57cec5SDimitry Andric         LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
30110b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
30120b57cec5SDimitry Andric    " \t[$addr], $src;", []>;
30130b57cec5SDimitry Andric  def _areg : NVPTXInst<
30140b57cec5SDimitry Andric    (outs),
30150b57cec5SDimitry Andric    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp,
30160b57cec5SDimitry Andric         LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
30170b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
30180b57cec5SDimitry Andric    " \t[$addr], $src;", []>;
30190b57cec5SDimitry Andric  def _areg_64 : NVPTXInst<
30200b57cec5SDimitry Andric    (outs),
30210b57cec5SDimitry Andric    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
30220b57cec5SDimitry Andric         LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
30230b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
30240b57cec5SDimitry Andric    " \t[$addr], $src;", []>;
30250b57cec5SDimitry Andric  def _ari : NVPTXInst<
30260b57cec5SDimitry Andric    (outs),
30270b57cec5SDimitry Andric    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
30280b57cec5SDimitry Andric         LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
30290b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
30300b57cec5SDimitry Andric    " \t[$addr+$offset], $src;", []>;
30310b57cec5SDimitry Andric  def _ari_64 : NVPTXInst<
30320b57cec5SDimitry Andric    (outs),
30330b57cec5SDimitry Andric    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
30340b57cec5SDimitry Andric         LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
30350b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
30360b57cec5SDimitry Andric    " \t[$addr+$offset], $src;", []>;
30370b57cec5SDimitry Andric  def _asi : NVPTXInst<
30380b57cec5SDimitry Andric    (outs),
30390b57cec5SDimitry Andric    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
30400b57cec5SDimitry Andric         LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
30410b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
30420b57cec5SDimitry Andric    " \t[$addr+$offset], $src;", []>;
30430b57cec5SDimitry Andric}
30440b57cec5SDimitry Andric
30450b57cec5SDimitry Andriclet mayStore=1, hasSideEffects=0 in {
30460b57cec5SDimitry Andric  defm ST_i8  : ST<Int16Regs>;
30470b57cec5SDimitry Andric  defm ST_i16 : ST<Int16Regs>;
30480b57cec5SDimitry Andric  defm ST_i32 : ST<Int32Regs>;
30490b57cec5SDimitry Andric  defm ST_i64 : ST<Int64Regs>;
30500b57cec5SDimitry Andric  defm ST_f32 : ST<Float32Regs>;
30510b57cec5SDimitry Andric  defm ST_f64 : ST<Float64Regs>;
30520b57cec5SDimitry Andric}
30530b57cec5SDimitry Andric
30540b57cec5SDimitry Andric// The following is used only in and after vector elementizations.  Vector
30550b57cec5SDimitry Andric// elementization happens at the machine instruction level, so the following
30560b57cec5SDimitry Andric// instructions never appear in the DAG.
30570b57cec5SDimitry Andricmulticlass LD_VEC<NVPTXRegClass regclass> {
30580b57cec5SDimitry Andric  def _v2_avar : NVPTXInst<
30590b57cec5SDimitry Andric    (outs regclass:$dst1, regclass:$dst2),
30600b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
30610b57cec5SDimitry Andric         i32imm:$fromWidth, imem:$addr),
30620b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
30630b57cec5SDimitry Andric    "\t{{$dst1, $dst2}}, [$addr];", []>;
30640b57cec5SDimitry Andric  def _v2_areg : NVPTXInst<
30650b57cec5SDimitry Andric    (outs regclass:$dst1, regclass:$dst2),
30660b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
30670b57cec5SDimitry Andric         i32imm:$fromWidth, Int32Regs:$addr),
30680b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
30690b57cec5SDimitry Andric    "\t{{$dst1, $dst2}}, [$addr];", []>;
30700b57cec5SDimitry Andric  def _v2_areg_64 : NVPTXInst<
30710b57cec5SDimitry Andric    (outs regclass:$dst1, regclass:$dst2),
30720b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
30730b57cec5SDimitry Andric         i32imm:$fromWidth, Int64Regs:$addr),
30740b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
30750b57cec5SDimitry Andric    "\t{{$dst1, $dst2}}, [$addr];", []>;
30760b57cec5SDimitry Andric  def _v2_ari : NVPTXInst<
30770b57cec5SDimitry Andric    (outs regclass:$dst1, regclass:$dst2),
30780b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
30790b57cec5SDimitry Andric         i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
30800b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
30810b57cec5SDimitry Andric    "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
30820b57cec5SDimitry Andric  def _v2_ari_64 : NVPTXInst<
30830b57cec5SDimitry Andric    (outs regclass:$dst1, regclass:$dst2),
30840b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
30850b57cec5SDimitry Andric         i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
30860b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
30870b57cec5SDimitry Andric    "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
30880b57cec5SDimitry Andric  def _v2_asi : NVPTXInst<
30890b57cec5SDimitry Andric    (outs regclass:$dst1, regclass:$dst2),
30900b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
30910b57cec5SDimitry Andric         i32imm:$fromWidth, imem:$addr, i32imm:$offset),
30920b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
30930b57cec5SDimitry Andric    "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
30940b57cec5SDimitry Andric  def _v4_avar : NVPTXInst<
30950b57cec5SDimitry Andric    (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
30960b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
30970b57cec5SDimitry Andric         i32imm:$fromWidth, imem:$addr),
30980b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
30990b57cec5SDimitry Andric    "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
31000b57cec5SDimitry Andric  def _v4_areg : NVPTXInst<
31010b57cec5SDimitry Andric    (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
31020b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
31030b57cec5SDimitry Andric         i32imm:$fromWidth, Int32Regs:$addr),
31040b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
31050b57cec5SDimitry Andric    "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
31060b57cec5SDimitry Andric  def _v4_areg_64 : NVPTXInst<
31070b57cec5SDimitry Andric    (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
31080b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
31090b57cec5SDimitry Andric         i32imm:$fromWidth, Int64Regs:$addr),
31100b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
31110b57cec5SDimitry Andric    "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
31120b57cec5SDimitry Andric  def _v4_ari : NVPTXInst<
31130b57cec5SDimitry Andric    (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
31140b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
31150b57cec5SDimitry Andric         i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
31160b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
31170b57cec5SDimitry Andric    "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
31180b57cec5SDimitry Andric  def _v4_ari_64 : NVPTXInst<
31190b57cec5SDimitry Andric    (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
31200b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
31210b57cec5SDimitry Andric         i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
31220b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
31230b57cec5SDimitry Andric    "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
31240b57cec5SDimitry Andric  def _v4_asi : NVPTXInst<
31250b57cec5SDimitry Andric    (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
31260b57cec5SDimitry Andric    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
31270b57cec5SDimitry Andric         i32imm:$fromWidth, imem:$addr, i32imm:$offset),
31280b57cec5SDimitry Andric    "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
31290b57cec5SDimitry Andric    "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
31300b57cec5SDimitry Andric}
31310b57cec5SDimitry Andriclet mayLoad=1, hasSideEffects=0 in {
31320b57cec5SDimitry Andric  defm LDV_i8  : LD_VEC<Int16Regs>;
31330b57cec5SDimitry Andric  defm LDV_i16 : LD_VEC<Int16Regs>;
31340b57cec5SDimitry Andric  defm LDV_i32 : LD_VEC<Int32Regs>;
31350b57cec5SDimitry Andric  defm LDV_i64 : LD_VEC<Int64Regs>;
31360b57cec5SDimitry Andric  defm LDV_f32 : LD_VEC<Float32Regs>;
31370b57cec5SDimitry Andric  defm LDV_f64 : LD_VEC<Float64Regs>;
31380b57cec5SDimitry Andric}
31390b57cec5SDimitry Andric
31400b57cec5SDimitry Andricmulticlass ST_VEC<NVPTXRegClass regclass> {
31410b57cec5SDimitry Andric  def _v2_avar : NVPTXInst<
31420b57cec5SDimitry Andric    (outs),
31430b57cec5SDimitry Andric    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
31440b57cec5SDimitry Andric         LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
31450b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
31460b57cec5SDimitry Andric    "\t[$addr], {{$src1, $src2}};", []>;
31470b57cec5SDimitry Andric  def _v2_areg : NVPTXInst<
31480b57cec5SDimitry Andric    (outs),
31490b57cec5SDimitry Andric    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
31500b57cec5SDimitry Andric         LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
31510b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
31520b57cec5SDimitry Andric    "\t[$addr], {{$src1, $src2}};", []>;
31530b57cec5SDimitry Andric  def _v2_areg_64 : NVPTXInst<
31540b57cec5SDimitry Andric    (outs),
31550b57cec5SDimitry Andric    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
31560b57cec5SDimitry Andric         LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
31570b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
31580b57cec5SDimitry Andric    "\t[$addr], {{$src1, $src2}};", []>;
31590b57cec5SDimitry Andric  def _v2_ari : NVPTXInst<
31600b57cec5SDimitry Andric    (outs),
31610b57cec5SDimitry Andric    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
31620b57cec5SDimitry Andric         LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
31630b57cec5SDimitry Andric         i32imm:$offset),
31640b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
31650b57cec5SDimitry Andric    "\t[$addr+$offset], {{$src1, $src2}};", []>;
31660b57cec5SDimitry Andric  def _v2_ari_64 : NVPTXInst<
31670b57cec5SDimitry Andric    (outs),
31680b57cec5SDimitry Andric    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
31690b57cec5SDimitry Andric         LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
31700b57cec5SDimitry Andric         i32imm:$offset),
31710b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
31720b57cec5SDimitry Andric    "\t[$addr+$offset], {{$src1, $src2}};", []>;
31730b57cec5SDimitry Andric  def _v2_asi : NVPTXInst<
31740b57cec5SDimitry Andric    (outs),
31750b57cec5SDimitry Andric    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
31760b57cec5SDimitry Andric         LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
31770b57cec5SDimitry Andric         i32imm:$offset),
31780b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
31790b57cec5SDimitry Andric    "\t[$addr+$offset], {{$src1, $src2}};", []>;
31800b57cec5SDimitry Andric  def _v4_avar : NVPTXInst<
31810b57cec5SDimitry Andric    (outs),
31820b57cec5SDimitry Andric    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
31830b57cec5SDimitry Andric         LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
31840b57cec5SDimitry Andric         i32imm:$fromWidth, imem:$addr),
31850b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
31860b57cec5SDimitry Andric    "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
31870b57cec5SDimitry Andric  def _v4_areg : NVPTXInst<
31880b57cec5SDimitry Andric    (outs),
31890b57cec5SDimitry Andric    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
31900b57cec5SDimitry Andric         LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
31910b57cec5SDimitry Andric         i32imm:$fromWidth, Int32Regs:$addr),
31920b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
31930b57cec5SDimitry Andric    "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
31940b57cec5SDimitry Andric  def _v4_areg_64 : NVPTXInst<
31950b57cec5SDimitry Andric    (outs),
31960b57cec5SDimitry Andric    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
31970b57cec5SDimitry Andric         LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
31980b57cec5SDimitry Andric         i32imm:$fromWidth, Int64Regs:$addr),
31990b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
32000b57cec5SDimitry Andric    "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
32010b57cec5SDimitry Andric  def _v4_ari : NVPTXInst<
32020b57cec5SDimitry Andric    (outs),
32030b57cec5SDimitry Andric    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
32040b57cec5SDimitry Andric         LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
32050b57cec5SDimitry Andric         i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
32060b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
32070b57cec5SDimitry Andric    "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
32080b57cec5SDimitry Andric  def _v4_ari_64 : NVPTXInst<
32090b57cec5SDimitry Andric    (outs),
32100b57cec5SDimitry Andric    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
32110b57cec5SDimitry Andric         LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
32120b57cec5SDimitry Andric         i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
32130b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
32140b57cec5SDimitry Andric    "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
32150b57cec5SDimitry Andric  def _v4_asi : NVPTXInst<
32160b57cec5SDimitry Andric    (outs),
32170b57cec5SDimitry Andric    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
32180b57cec5SDimitry Andric         LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
32190b57cec5SDimitry Andric         i32imm:$fromWidth, imem:$addr, i32imm:$offset),
32200b57cec5SDimitry Andric    "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}"
32210b57cec5SDimitry Andric    "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
32220b57cec5SDimitry Andric}
32230b57cec5SDimitry Andric
32240b57cec5SDimitry Andriclet mayStore=1, hasSideEffects=0 in {
32250b57cec5SDimitry Andric  defm STV_i8  : ST_VEC<Int16Regs>;
32260b57cec5SDimitry Andric  defm STV_i16 : ST_VEC<Int16Regs>;
32270b57cec5SDimitry Andric  defm STV_i32 : ST_VEC<Int32Regs>;
32280b57cec5SDimitry Andric  defm STV_i64 : ST_VEC<Int64Regs>;
32290b57cec5SDimitry Andric  defm STV_f32 : ST_VEC<Float32Regs>;
32300b57cec5SDimitry Andric  defm STV_f64 : ST_VEC<Float64Regs>;
32310b57cec5SDimitry Andric}
32320b57cec5SDimitry Andric
32330b57cec5SDimitry Andric//---- Conversion ----
32340b57cec5SDimitry Andric
3235bdd1243dSDimitry Andricclass F_BITCONVERT<string SzStr, ValueType TIn, ValueType TOut,
3236bdd1243dSDimitry Andric  NVPTXRegClass regclassIn = ValueToRegClass<TIn>.ret,
3237bdd1243dSDimitry Andric  NVPTXRegClass regclassOut = ValueToRegClass<TOut>.ret> :
32380b57cec5SDimitry Andric           NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
32390b57cec5SDimitry Andric           !strconcat("mov.b", SzStr, " \t$d, $a;"),
3240bdd1243dSDimitry Andric     [(set (TOut regclassOut:$d), (bitconvert (TIn regclassIn:$a)))]>;
32410b57cec5SDimitry Andric
3242bdd1243dSDimitry Andricdef BITCONVERT_32_I2F : F_BITCONVERT<"32", i32, f32>;
3243bdd1243dSDimitry Andricdef BITCONVERT_32_F2I : F_BITCONVERT<"32", f32, i32>;
3244bdd1243dSDimitry Andricdef BITCONVERT_64_I2F : F_BITCONVERT<"64", i64, f64>;
3245bdd1243dSDimitry Andricdef BITCONVERT_64_F2I : F_BITCONVERT<"64", f64, i64>;
324606c3fb27SDimitry Andric
32475f757f3fSDimitry Andricforeach vt = [v2f16, v2bf16, v2i16, v4i8] in {
324806c3fb27SDimitry Andricdef: Pat<(vt (bitconvert (f32 Float32Regs:$a))),
324906c3fb27SDimitry Andric         (BITCONVERT_32_F2I Float32Regs:$a)>;
32505f757f3fSDimitry Andricdef: Pat<(f32 (bitconvert (vt Int32Regs:$a))),
32515f757f3fSDimitry Andric         (BITCONVERT_32_I2F Int32Regs:$a)>;
325206c3fb27SDimitry Andric}
325306c3fb27SDimitry Andricforeach vt = [f16, bf16] in {
325406c3fb27SDimitry Andricdef: Pat<(vt (bitconvert (i16 UInt16Const:$a))),
325506c3fb27SDimitry Andric         (IMOVB16ri UInt16Const:$a)>;
325606c3fb27SDimitry Andricdef: Pat<(vt (bitconvert (i16 Int16Regs:$a))),
325706c3fb27SDimitry Andric         (ProxyRegI16 Int16Regs:$a)>;
325806c3fb27SDimitry Andricdef: Pat<(i16 (bitconvert (vt Int16Regs:$a))),
325906c3fb27SDimitry Andric         (ProxyRegI16 Int16Regs:$a)>;
326006c3fb27SDimitry Andric}
32610b57cec5SDimitry Andric
32625f757f3fSDimitry Andricforeach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {
32635f757f3fSDimitry Andric  def: Pat<(ta (bitconvert (i32 UInt32Const:$a))),
32645f757f3fSDimitry Andric           (IMOVB32ri UInt32Const:$a)>;
32655f757f3fSDimitry Andric  foreach tb = [v2f16, v2bf16, v2i16, v4i8, i32] in {
32665f757f3fSDimitry Andric    if !ne(ta, tb) then {
32675f757f3fSDimitry Andric      def: Pat<(ta (bitconvert (tb Int32Regs:$a))),
32685f757f3fSDimitry Andric             (ProxyRegI32 Int32Regs:$a)>;
32695f757f3fSDimitry Andric    }
32705f757f3fSDimitry Andric  }
32715f757f3fSDimitry Andric}
32725f757f3fSDimitry Andric
32730b57cec5SDimitry Andric// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
32740b57cec5SDimitry Andric// we cannot specify floating-point literals in isel patterns.  Therefore, we
32750b57cec5SDimitry Andric// use an integer selp to select either 1 or 0 and then cvt to floating-point.
32760b57cec5SDimitry Andric
32770b57cec5SDimitry Andric// sint -> f16
32780b57cec5SDimitry Andricdef : Pat<(f16 (sint_to_fp Int1Regs:$a)),
32790b57cec5SDimitry Andric          (CVT_f16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
32800b57cec5SDimitry Andricdef : Pat<(f16 (sint_to_fp Int16Regs:$a)),
32810b57cec5SDimitry Andric          (CVT_f16_s16 Int16Regs:$a, CvtRN)>;
32820b57cec5SDimitry Andricdef : Pat<(f16 (sint_to_fp Int32Regs:$a)),
32830b57cec5SDimitry Andric          (CVT_f16_s32 Int32Regs:$a, CvtRN)>;
32840b57cec5SDimitry Andricdef : Pat<(f16 (sint_to_fp Int64Regs:$a)),
32850b57cec5SDimitry Andric          (CVT_f16_s64 Int64Regs:$a, CvtRN)>;
32860b57cec5SDimitry Andric
32870b57cec5SDimitry Andric// uint -> f16
32880b57cec5SDimitry Andricdef : Pat<(f16 (uint_to_fp Int1Regs:$a)),
32890b57cec5SDimitry Andric          (CVT_f16_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
32900b57cec5SDimitry Andricdef : Pat<(f16 (uint_to_fp Int16Regs:$a)),
32910b57cec5SDimitry Andric          (CVT_f16_u16 Int16Regs:$a, CvtRN)>;
32920b57cec5SDimitry Andricdef : Pat<(f16 (uint_to_fp Int32Regs:$a)),
32930b57cec5SDimitry Andric          (CVT_f16_u32 Int32Regs:$a, CvtRN)>;
32940b57cec5SDimitry Andricdef : Pat<(f16 (uint_to_fp Int64Regs:$a)),
32950b57cec5SDimitry Andric          (CVT_f16_u64 Int64Regs:$a, CvtRN)>;
32960b57cec5SDimitry Andric
329706c3fb27SDimitry Andric// sint -> bf16
329806c3fb27SDimitry Andricdef : Pat<(bf16 (sint_to_fp Int1Regs:$a)),
3299*0fca6ea1SDimitry Andric          (CVT_bf16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
330006c3fb27SDimitry Andricdef : Pat<(bf16 (sint_to_fp Int16Regs:$a)),
3301*0fca6ea1SDimitry Andric          (CVT_bf16_s16 Int16Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
330206c3fb27SDimitry Andricdef : Pat<(bf16 (sint_to_fp Int32Regs:$a)),
3303*0fca6ea1SDimitry Andric          (CVT_bf16_s32 Int32Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
330406c3fb27SDimitry Andricdef : Pat<(bf16 (sint_to_fp Int64Regs:$a)),
3305*0fca6ea1SDimitry Andric          (CVT_bf16_s64 Int64Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
330606c3fb27SDimitry Andric
330706c3fb27SDimitry Andric// uint -> bf16
330806c3fb27SDimitry Andricdef : Pat<(bf16 (uint_to_fp Int1Regs:$a)),
3309*0fca6ea1SDimitry Andric          (CVT_bf16_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
331006c3fb27SDimitry Andricdef : Pat<(bf16 (uint_to_fp Int16Regs:$a)),
3311*0fca6ea1SDimitry Andric          (CVT_bf16_u16 Int16Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
331206c3fb27SDimitry Andricdef : Pat<(bf16 (uint_to_fp Int32Regs:$a)),
3313*0fca6ea1SDimitry Andric          (CVT_bf16_u32 Int32Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
331406c3fb27SDimitry Andricdef : Pat<(bf16 (uint_to_fp Int64Regs:$a)),
3315*0fca6ea1SDimitry Andric          (CVT_bf16_u64 Int64Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
331606c3fb27SDimitry Andric
33170b57cec5SDimitry Andric// sint -> f32
33180b57cec5SDimitry Andricdef : Pat<(f32 (sint_to_fp Int1Regs:$a)),
33190b57cec5SDimitry Andric          (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
33200b57cec5SDimitry Andricdef : Pat<(f32 (sint_to_fp Int16Regs:$a)),
33210b57cec5SDimitry Andric          (CVT_f32_s16 Int16Regs:$a, CvtRN)>;
33220b57cec5SDimitry Andricdef : Pat<(f32 (sint_to_fp Int32Regs:$a)),
33230b57cec5SDimitry Andric          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
33240b57cec5SDimitry Andricdef : Pat<(f32 (sint_to_fp Int64Regs:$a)),
33250b57cec5SDimitry Andric          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
33260b57cec5SDimitry Andric
33270b57cec5SDimitry Andric// uint -> f32
33280b57cec5SDimitry Andricdef : Pat<(f32 (uint_to_fp Int1Regs:$a)),
33290b57cec5SDimitry Andric          (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
33300b57cec5SDimitry Andricdef : Pat<(f32 (uint_to_fp Int16Regs:$a)),
33310b57cec5SDimitry Andric          (CVT_f32_u16 Int16Regs:$a, CvtRN)>;
33320b57cec5SDimitry Andricdef : Pat<(f32 (uint_to_fp Int32Regs:$a)),
33330b57cec5SDimitry Andric          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
33340b57cec5SDimitry Andricdef : Pat<(f32 (uint_to_fp Int64Regs:$a)),
33350b57cec5SDimitry Andric          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
33360b57cec5SDimitry Andric
33370b57cec5SDimitry Andric// sint -> f64
33380b57cec5SDimitry Andricdef : Pat<(f64 (sint_to_fp Int1Regs:$a)),
33390b57cec5SDimitry Andric          (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
33400b57cec5SDimitry Andricdef : Pat<(f64 (sint_to_fp Int16Regs:$a)),
33410b57cec5SDimitry Andric          (CVT_f64_s16 Int16Regs:$a, CvtRN)>;
33420b57cec5SDimitry Andricdef : Pat<(f64 (sint_to_fp Int32Regs:$a)),
33430b57cec5SDimitry Andric          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
33440b57cec5SDimitry Andricdef : Pat<(f64 (sint_to_fp Int64Regs:$a)),
33450b57cec5SDimitry Andric          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
33460b57cec5SDimitry Andric
33470b57cec5SDimitry Andric// uint -> f64
33480b57cec5SDimitry Andricdef : Pat<(f64 (uint_to_fp Int1Regs:$a)),
33490b57cec5SDimitry Andric          (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
33500b57cec5SDimitry Andricdef : Pat<(f64 (uint_to_fp Int16Regs:$a)),
33510b57cec5SDimitry Andric          (CVT_f64_u16 Int16Regs:$a, CvtRN)>;
33520b57cec5SDimitry Andricdef : Pat<(f64 (uint_to_fp Int32Regs:$a)),
33530b57cec5SDimitry Andric          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
33540b57cec5SDimitry Andricdef : Pat<(f64 (uint_to_fp Int64Regs:$a)),
33550b57cec5SDimitry Andric          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
33560b57cec5SDimitry Andric
33570b57cec5SDimitry Andric
33580b57cec5SDimitry Andric// f16 -> sint
335906c3fb27SDimitry Andricdef : Pat<(i1 (fp_to_sint (f16 Int16Regs:$a))),
336006c3fb27SDimitry Andric          (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
336106c3fb27SDimitry Andricdef : Pat<(i16 (fp_to_sint (f16 Int16Regs:$a))),
336206c3fb27SDimitry Andric          (CVT_s16_f16 (f16 Int16Regs:$a), CvtRZI)>;
336306c3fb27SDimitry Andricdef : Pat<(i32 (fp_to_sint (f16 Int16Regs:$a))),
336406c3fb27SDimitry Andric          (CVT_s32_f16 (f16 Int16Regs:$a), CvtRZI)>;
336506c3fb27SDimitry Andricdef : Pat<(i64 (fp_to_sint (f16 Int16Regs:$a))),
336606c3fb27SDimitry Andric          (CVT_s64_f16 Int16Regs:$a, CvtRZI)>;
33670b57cec5SDimitry Andric
33680b57cec5SDimitry Andric// f16 -> uint
336906c3fb27SDimitry Andricdef : Pat<(i1 (fp_to_uint (f16 Int16Regs:$a))),
337006c3fb27SDimitry Andric          (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
337106c3fb27SDimitry Andricdef : Pat<(i16 (fp_to_uint (f16 Int16Regs:$a))),
337206c3fb27SDimitry Andric          (CVT_u16_f16 Int16Regs:$a, CvtRZI)>;
337306c3fb27SDimitry Andricdef : Pat<(i32 (fp_to_uint (f16 Int16Regs:$a))),
337406c3fb27SDimitry Andric          (CVT_u32_f16 Int16Regs:$a, CvtRZI)>;
337506c3fb27SDimitry Andricdef : Pat<(i64 (fp_to_uint (f16 Int16Regs:$a))),
337606c3fb27SDimitry Andric          (CVT_u64_f16 Int16Regs:$a, CvtRZI)>;
33770b57cec5SDimitry Andric
337806c3fb27SDimitry Andric// bf16 -> sint
337906c3fb27SDimitry Andricdef : Pat<(i1 (fp_to_sint (bf16 Int16Regs:$a))),
338006c3fb27SDimitry Andric          (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
338106c3fb27SDimitry Andricdef : Pat<(i16 (fp_to_sint (bf16 Int16Regs:$a))),
338206c3fb27SDimitry Andric          (CVT_s16_bf16 (bf16 Int16Regs:$a), CvtRZI)>;
338306c3fb27SDimitry Andricdef : Pat<(i32 (fp_to_sint (bf16 Int16Regs:$a))),
338406c3fb27SDimitry Andric          (CVT_s32_bf16 (bf16 Int16Regs:$a), CvtRZI)>;
338506c3fb27SDimitry Andricdef : Pat<(i64 (fp_to_sint (bf16 Int16Regs:$a))),
338606c3fb27SDimitry Andric          (CVT_s64_bf16 Int16Regs:$a, CvtRZI)>;
338706c3fb27SDimitry Andric
338806c3fb27SDimitry Andric// bf16 -> uint
338906c3fb27SDimitry Andricdef : Pat<(i1 (fp_to_uint (bf16 Int16Regs:$a))),
339006c3fb27SDimitry Andric          (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
339106c3fb27SDimitry Andricdef : Pat<(i16 (fp_to_uint (bf16 Int16Regs:$a))),
339206c3fb27SDimitry Andric          (CVT_u16_bf16 Int16Regs:$a, CvtRZI)>;
339306c3fb27SDimitry Andricdef : Pat<(i32 (fp_to_uint (bf16 Int16Regs:$a))),
339406c3fb27SDimitry Andric          (CVT_u32_bf16 Int16Regs:$a, CvtRZI)>;
339506c3fb27SDimitry Andricdef : Pat<(i64 (fp_to_uint (bf16 Int16Regs:$a))),
339606c3fb27SDimitry Andric          (CVT_u64_bf16 Int16Regs:$a, CvtRZI)>;
33970b57cec5SDimitry Andric// f32 -> sint
33980b57cec5SDimitry Andricdef : Pat<(i1 (fp_to_sint Float32Regs:$a)),
33990b57cec5SDimitry Andric          (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
34000b57cec5SDimitry Andricdef : Pat<(i16 (fp_to_sint Float32Regs:$a)),
34010b57cec5SDimitry Andric          (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
34020b57cec5SDimitry Andricdef : Pat<(i16 (fp_to_sint Float32Regs:$a)),
34030b57cec5SDimitry Andric          (CVT_s16_f32 Float32Regs:$a, CvtRZI)>;
34040b57cec5SDimitry Andricdef : Pat<(i32 (fp_to_sint Float32Regs:$a)),
34050b57cec5SDimitry Andric          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
34060b57cec5SDimitry Andricdef : Pat<(i32 (fp_to_sint Float32Regs:$a)),
34070b57cec5SDimitry Andric          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
34080b57cec5SDimitry Andricdef : Pat<(i64 (fp_to_sint Float32Regs:$a)),
34090b57cec5SDimitry Andric          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
34100b57cec5SDimitry Andricdef : Pat<(i64 (fp_to_sint Float32Regs:$a)),
34110b57cec5SDimitry Andric          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
34120b57cec5SDimitry Andric
34130b57cec5SDimitry Andric// f32 -> uint
34140b57cec5SDimitry Andricdef : Pat<(i1 (fp_to_uint Float32Regs:$a)),
34150b57cec5SDimitry Andric          (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
34160b57cec5SDimitry Andricdef : Pat<(i16 (fp_to_uint Float32Regs:$a)),
34170b57cec5SDimitry Andric          (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
34180b57cec5SDimitry Andricdef : Pat<(i16 (fp_to_uint Float32Regs:$a)),
34190b57cec5SDimitry Andric          (CVT_u16_f32 Float32Regs:$a, CvtRZI)>;
34200b57cec5SDimitry Andricdef : Pat<(i32 (fp_to_uint Float32Regs:$a)),
34210b57cec5SDimitry Andric          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
34220b57cec5SDimitry Andricdef : Pat<(i32 (fp_to_uint Float32Regs:$a)),
34230b57cec5SDimitry Andric          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
34240b57cec5SDimitry Andricdef : Pat<(i64 (fp_to_uint Float32Regs:$a)),
34250b57cec5SDimitry Andric          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
34260b57cec5SDimitry Andricdef : Pat<(i64 (fp_to_uint Float32Regs:$a)),
34270b57cec5SDimitry Andric          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
34280b57cec5SDimitry Andric
34290b57cec5SDimitry Andric// f64 -> sint
34300b57cec5SDimitry Andricdef : Pat<(i1 (fp_to_sint Float64Regs:$a)),
34310b57cec5SDimitry Andric          (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
34320b57cec5SDimitry Andricdef : Pat<(i16 (fp_to_sint Float64Regs:$a)),
34330b57cec5SDimitry Andric          (CVT_s16_f64 Float64Regs:$a, CvtRZI)>;
34340b57cec5SDimitry Andricdef : Pat<(i32 (fp_to_sint Float64Regs:$a)),
34350b57cec5SDimitry Andric          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
34360b57cec5SDimitry Andricdef : Pat<(i64 (fp_to_sint Float64Regs:$a)),
34370b57cec5SDimitry Andric          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
34380b57cec5SDimitry Andric
34390b57cec5SDimitry Andric// f64 -> uint
34400b57cec5SDimitry Andricdef : Pat<(i1 (fp_to_uint Float64Regs:$a)),
34410b57cec5SDimitry Andric          (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
34420b57cec5SDimitry Andricdef : Pat<(i16 (fp_to_uint Float64Regs:$a)),
34430b57cec5SDimitry Andric          (CVT_u16_f64 Float64Regs:$a, CvtRZI)>;
34440b57cec5SDimitry Andricdef : Pat<(i32 (fp_to_uint Float64Regs:$a)),
34450b57cec5SDimitry Andric          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
34460b57cec5SDimitry Andricdef : Pat<(i64 (fp_to_uint Float64Regs:$a)),
34470b57cec5SDimitry Andric          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
34480b57cec5SDimitry Andric
34490b57cec5SDimitry Andric// sext i1
34500b57cec5SDimitry Andricdef : Pat<(i16 (sext Int1Regs:$a)),
34510b57cec5SDimitry Andric          (SELP_s16ii -1, 0, Int1Regs:$a)>;
34520b57cec5SDimitry Andricdef : Pat<(i32 (sext Int1Regs:$a)),
34530b57cec5SDimitry Andric          (SELP_s32ii -1, 0, Int1Regs:$a)>;
34540b57cec5SDimitry Andricdef : Pat<(i64 (sext Int1Regs:$a)),
34550b57cec5SDimitry Andric          (SELP_s64ii -1, 0, Int1Regs:$a)>;
34560b57cec5SDimitry Andric
34570b57cec5SDimitry Andric// zext i1
34580b57cec5SDimitry Andricdef : Pat<(i16 (zext Int1Regs:$a)),
34590b57cec5SDimitry Andric          (SELP_u16ii 1, 0, Int1Regs:$a)>;
34600b57cec5SDimitry Andricdef : Pat<(i32 (zext Int1Regs:$a)),
34610b57cec5SDimitry Andric          (SELP_u32ii 1, 0, Int1Regs:$a)>;
34620b57cec5SDimitry Andricdef : Pat<(i64 (zext Int1Regs:$a)),
34630b57cec5SDimitry Andric          (SELP_u64ii 1, 0, Int1Regs:$a)>;
34640b57cec5SDimitry Andric
34650b57cec5SDimitry Andric// anyext i1
34660b57cec5SDimitry Andricdef : Pat<(i16 (anyext Int1Regs:$a)),
34670b57cec5SDimitry Andric          (SELP_u16ii -1, 0, Int1Regs:$a)>;
34680b57cec5SDimitry Andricdef : Pat<(i32 (anyext Int1Regs:$a)),
34690b57cec5SDimitry Andric          (SELP_u32ii -1, 0, Int1Regs:$a)>;
34700b57cec5SDimitry Andricdef : Pat<(i64 (anyext Int1Regs:$a)),
34710b57cec5SDimitry Andric          (SELP_u64ii -1, 0, Int1Regs:$a)>;
34720b57cec5SDimitry Andric
34730b57cec5SDimitry Andric// sext i16
34740b57cec5SDimitry Andricdef : Pat<(i32 (sext Int16Regs:$a)),
34750b57cec5SDimitry Andric          (CVT_s32_s16 Int16Regs:$a, CvtNONE)>;
34760b57cec5SDimitry Andricdef : Pat<(i64 (sext Int16Regs:$a)),
34770b57cec5SDimitry Andric          (CVT_s64_s16 Int16Regs:$a, CvtNONE)>;
34780b57cec5SDimitry Andric
34790b57cec5SDimitry Andric// zext i16
34800b57cec5SDimitry Andricdef : Pat<(i32 (zext Int16Regs:$a)),
34810b57cec5SDimitry Andric          (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
34820b57cec5SDimitry Andricdef : Pat<(i64 (zext Int16Regs:$a)),
34830b57cec5SDimitry Andric          (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
34840b57cec5SDimitry Andric
34850b57cec5SDimitry Andric// anyext i16
34860b57cec5SDimitry Andricdef : Pat<(i32 (anyext Int16Regs:$a)),
34870b57cec5SDimitry Andric          (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
34880b57cec5SDimitry Andricdef : Pat<(i64 (anyext Int16Regs:$a)),
34890b57cec5SDimitry Andric          (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
34900b57cec5SDimitry Andric
34910b57cec5SDimitry Andric// sext i32
34920b57cec5SDimitry Andricdef : Pat<(i64 (sext Int32Regs:$a)),
34930b57cec5SDimitry Andric          (CVT_s64_s32 Int32Regs:$a, CvtNONE)>;
34940b57cec5SDimitry Andric
34950b57cec5SDimitry Andric// zext i32
34960b57cec5SDimitry Andricdef : Pat<(i64 (zext Int32Regs:$a)),
34970b57cec5SDimitry Andric          (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
34980b57cec5SDimitry Andric
34990b57cec5SDimitry Andric// anyext i32
35000b57cec5SDimitry Andricdef : Pat<(i64 (anyext Int32Regs:$a)),
35010b57cec5SDimitry Andric          (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
35020b57cec5SDimitry Andric
35030b57cec5SDimitry Andric
35040b57cec5SDimitry Andric// truncate i64
35050b57cec5SDimitry Andricdef : Pat<(i32 (trunc Int64Regs:$a)),
35060b57cec5SDimitry Andric          (CVT_u32_u64 Int64Regs:$a, CvtNONE)>;
35070b57cec5SDimitry Andricdef : Pat<(i16 (trunc Int64Regs:$a)),
35080b57cec5SDimitry Andric          (CVT_u16_u64 Int64Regs:$a, CvtNONE)>;
35090b57cec5SDimitry Andricdef : Pat<(i1 (trunc Int64Regs:$a)),
35100b57cec5SDimitry Andric          (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>;
35110b57cec5SDimitry Andric
35120b57cec5SDimitry Andric// truncate i32
35130b57cec5SDimitry Andricdef : Pat<(i16 (trunc Int32Regs:$a)),
35140b57cec5SDimitry Andric          (CVT_u16_u32 Int32Regs:$a, CvtNONE)>;
35150b57cec5SDimitry Andricdef : Pat<(i1 (trunc Int32Regs:$a)),
35160b57cec5SDimitry Andric          (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>;
35170b57cec5SDimitry Andric
35180b57cec5SDimitry Andric// truncate i16
35190b57cec5SDimitry Andricdef : Pat<(i1 (trunc Int16Regs:$a)),
35200b57cec5SDimitry Andric          (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>;
35210b57cec5SDimitry Andric
35220b57cec5SDimitry Andric// sext_inreg
35230b57cec5SDimitry Andricdef : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>;
35240b57cec5SDimitry Andricdef : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>;
35250b57cec5SDimitry Andricdef : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>;
35260b57cec5SDimitry Andricdef : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>;
35270b57cec5SDimitry Andricdef : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>;
35280b57cec5SDimitry Andricdef : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>;
35290b57cec5SDimitry Andric
35300b57cec5SDimitry Andric
35310b57cec5SDimitry Andric// Select instructions with 32-bit predicates
35325f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), i16:$a, i16:$b),
35330b57cec5SDimitry Andric          (SELP_b16rr Int16Regs:$a, Int16Regs:$b,
35340b57cec5SDimitry Andric          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
35355f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), i32:$a, i32:$b),
35360b57cec5SDimitry Andric          (SELP_b32rr Int32Regs:$a, Int32Regs:$b,
35370b57cec5SDimitry Andric          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
35385f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), Int64Regs:$a, Int64Regs:$b),
35390b57cec5SDimitry Andric          (SELP_b64rr Int64Regs:$a, Int64Regs:$b,
35400b57cec5SDimitry Andric          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
35415f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), (f16 Int16Regs:$a), (f16 Int16Regs:$b)),
354206c3fb27SDimitry Andric          (SELP_f16rr Int16Regs:$a, Int16Regs:$b,
354306c3fb27SDimitry Andric          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
35445f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)),
354506c3fb27SDimitry Andric          (SELP_bf16rr Int16Regs:$a, Int16Regs:$b,
35460b57cec5SDimitry Andric          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
35475f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), Float32Regs:$a, Float32Regs:$b),
35480b57cec5SDimitry Andric          (SELP_f32rr Float32Regs:$a, Float32Regs:$b,
35490b57cec5SDimitry Andric          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
35505f757f3fSDimitry Andricdef : Pat<(select (i32 Int32Regs:$pred), Float64Regs:$a, Float64Regs:$b),
35510b57cec5SDimitry Andric          (SELP_f64rr Float64Regs:$a, Float64Regs:$b,
35520b57cec5SDimitry Andric          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
35530b57cec5SDimitry Andric
35540b57cec5SDimitry Andric
3555e8d8bef9SDimitry Andriclet hasSideEffects = false in {
35560b57cec5SDimitry Andric  // pack a set of smaller int registers to a larger int register
35570b57cec5SDimitry Andric  def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
35580b57cec5SDimitry Andric                             (ins Int16Regs:$s1, Int16Regs:$s2,
35590b57cec5SDimitry Andric                                  Int16Regs:$s3, Int16Regs:$s4),
35600b57cec5SDimitry Andric                             "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", []>;
35610b57cec5SDimitry Andric  def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
35620b57cec5SDimitry Andric                             (ins Int16Regs:$s1, Int16Regs:$s2),
35630b57cec5SDimitry Andric                             "mov.b32 \t$d, {{$s1, $s2}};", []>;
35640b57cec5SDimitry Andric  def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
35650b57cec5SDimitry Andric                             (ins Int32Regs:$s1, Int32Regs:$s2),
35660b57cec5SDimitry Andric                             "mov.b64 \t$d, {{$s1, $s2}};", []>;
3567*0fca6ea1SDimitry Andric  def V2I64toI128 : NVPTXInst<(outs Int128Regs:$d),
3568*0fca6ea1SDimitry Andric                              (ins Int64Regs:$s1, Int64Regs:$s2),
3569*0fca6ea1SDimitry Andric                              "mov.b128 \t$d, {{$s1, $s2}};", []>;
35700b57cec5SDimitry Andric  def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
35710b57cec5SDimitry Andric                             (ins Float32Regs:$s1, Float32Regs:$s2),
35720b57cec5SDimitry Andric                             "mov.b64 \t$d, {{$s1, $s2}};", []>;
35730b57cec5SDimitry Andric
35740b57cec5SDimitry Andric  // unpack a larger int register to a set of smaller int registers
35750b57cec5SDimitry Andric  def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
35760b57cec5SDimitry Andric                                   Int16Regs:$d3, Int16Regs:$d4),
35770b57cec5SDimitry Andric                             (ins Int64Regs:$s),
35780b57cec5SDimitry Andric                             "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", []>;
35790b57cec5SDimitry Andric  def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
35800b57cec5SDimitry Andric                             (ins Int32Regs:$s),
35810b57cec5SDimitry Andric                             "mov.b32 \t{{$d1, $d2}}, $s;", []>;
35820b57cec5SDimitry Andric  def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
35830b57cec5SDimitry Andric                             (ins Int64Regs:$s),
35840b57cec5SDimitry Andric                             "mov.b64 \t{{$d1, $d2}}, $s;", []>;
3585*0fca6ea1SDimitry Andric  def I128toV2I64: NVPTXInst<(outs Int64Regs:$d1, Int64Regs:$d2),
3586*0fca6ea1SDimitry Andric                              (ins Int128Regs:$s),
3587*0fca6ea1SDimitry Andric                              "mov.b128 \t{{$d1, $d2}}, $s;", []>;
35880b57cec5SDimitry Andric  def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
35890b57cec5SDimitry Andric                             (ins Float64Regs:$s),
35900b57cec5SDimitry Andric                             "mov.b64 \t{{$d1, $d2}}, $s;", []>;
35910b57cec5SDimitry Andric
359206c3fb27SDimitry Andric  def I32toI16H  : NVPTXInst<(outs Int16Regs:$high),
359306c3fb27SDimitry Andric                             (ins Int32Regs:$s),
359406c3fb27SDimitry Andric                             "{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}",
35950b57cec5SDimitry Andric                             []>;
359606c3fb27SDimitry Andric  def I32toI16L  : NVPTXInst<(outs Int16Regs:$low),
359706c3fb27SDimitry Andric                             (ins Int32Regs:$s),
359806c3fb27SDimitry Andric                             "{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}",
35990b57cec5SDimitry Andric                             []>;
360006c3fb27SDimitry Andric  def I64toI32H  : NVPTXInst<(outs Int32Regs:$high),
360106c3fb27SDimitry Andric                             (ins Int64Regs:$s),
360206c3fb27SDimitry Andric                             "{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}",
36030b57cec5SDimitry Andric                             []>;
3604*0fca6ea1SDimitry Andric  def I64toI32L  : NVPTXInst<(outs Int32Regs:$low),
3605*0fca6ea1SDimitry Andric                             (ins Int64Regs:$s),
3606*0fca6ea1SDimitry Andric                             "{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}",
3607*0fca6ea1SDimitry Andric                             []>;
3608*0fca6ea1SDimitry Andric
36090b57cec5SDimitry Andric}
36100b57cec5SDimitry Andric
361106c3fb27SDimitry Andric// Using partial vectorized move produces better SASS code for extraction of
361206c3fb27SDimitry Andric// upper/lower parts of an integer.
361306c3fb27SDimitry Andricdef : Pat<(i16 (trunc (srl Int32Regs:$s, (i32 16)))),
361406c3fb27SDimitry Andric          (I32toI16H Int32Regs:$s)>;
361506c3fb27SDimitry Andricdef : Pat<(i16 (trunc (sra Int32Regs:$s, (i32 16)))),
361606c3fb27SDimitry Andric          (I32toI16H Int32Regs:$s)>;
361706c3fb27SDimitry Andricdef : Pat<(i32 (trunc (srl Int64Regs:$s, (i32 32)))),
361806c3fb27SDimitry Andric          (I64toI32H Int64Regs:$s)>;
361906c3fb27SDimitry Andricdef : Pat<(i32 (trunc (sra Int64Regs:$s, (i32 32)))),
362006c3fb27SDimitry Andric          (I64toI32H Int64Regs:$s)>;
362106c3fb27SDimitry Andric
36225f757f3fSDimitry Andricdef: Pat<(i32 (sext (extractelt (v2i16 Int32Regs:$src), 0))),
36235f757f3fSDimitry Andric         (CVT_INREG_s32_s16 Int32Regs:$src)>;
36245f757f3fSDimitry Andric
36255f757f3fSDimitry Andricforeach vt = [v2f16, v2bf16, v2i16] in {
36265f757f3fSDimitry Andricdef : Pat<(extractelt (vt Int32Regs:$src), 0),
362706c3fb27SDimitry Andric          (I32toI16L Int32Regs:$src)>;
36285f757f3fSDimitry Andricdef : Pat<(extractelt (vt Int32Regs:$src), 1),
362906c3fb27SDimitry Andric          (I32toI16H Int32Regs:$src)>;
36305f757f3fSDimitry Andric}
363106c3fb27SDimitry Andricdef : Pat<(v2f16 (build_vector (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
363206c3fb27SDimitry Andric          (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
363306c3fb27SDimitry Andricdef : Pat<(v2bf16 (build_vector (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))),
363406c3fb27SDimitry Andric          (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
36355f757f3fSDimitry Andricdef : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))),
36365f757f3fSDimitry Andric          (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
36375f757f3fSDimitry Andric
36385f757f3fSDimitry Andricdef: Pat<(v2i16 (scalar_to_vector (i16 Int16Regs:$a))),
36395f757f3fSDimitry Andric         (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
364006c3fb27SDimitry Andric
36410b57cec5SDimitry Andric// Count leading zeros
3642e8d8bef9SDimitry Andriclet hasSideEffects = false in {
36430b57cec5SDimitry Andric  def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
36440b57cec5SDimitry Andric                         "clz.b32 \t$d, $a;", []>;
36450b57cec5SDimitry Andric  def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
36460b57cec5SDimitry Andric                         "clz.b64 \t$d, $a;", []>;
36470b57cec5SDimitry Andric}
36480b57cec5SDimitry Andric
36490b57cec5SDimitry Andric// 32-bit has a direct PTX instruction
36505f757f3fSDimitry Andricdef : Pat<(i32 (ctlz (i32 Int32Regs:$a))), (CLZr32 Int32Regs:$a)>;
36510b57cec5SDimitry Andric
36520b57cec5SDimitry Andric// The return type of the ctlz ISD node is the same as its input, but the PTX
36530b57cec5SDimitry Andric// ctz instruction always returns a 32-bit value.  For ctlz.i64, convert the
36540b57cec5SDimitry Andric// ptx value to 64 bits to match the ISD node's semantics, unless we know we're
36550b57cec5SDimitry Andric// truncating back down to 32 bits.
36568bcb0991SDimitry Andricdef : Pat<(i64 (ctlz Int64Regs:$a)), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
3657*0fca6ea1SDimitry Andricdef : Pat<(i32 (trunc (i64 (ctlz Int64Regs:$a)))), (CLZr64 Int64Regs:$a)>;
36580b57cec5SDimitry Andric
36590b57cec5SDimitry Andric// For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the
36600b57cec5SDimitry Andric// result back to 16-bits if necessary.  We also need to subtract 16 because
36610b57cec5SDimitry Andric// the high-order 16 zeros were counted.
36620b57cec5SDimitry Andric//
36630b57cec5SDimitry Andric// TODO: NVPTX has a mov.b32 b32reg, {imm, b16reg} instruction, which we could
36640b57cec5SDimitry Andric// use to save one SASS instruction (on sm_35 anyway):
36650b57cec5SDimitry Andric//
36660b57cec5SDimitry Andric//   mov.b32 $tmp, {0xffff, $a}
36670b57cec5SDimitry Andric//   ctlz.b32 $result, $tmp
36680b57cec5SDimitry Andric//
36690b57cec5SDimitry Andric// That is, instead of zero-extending the input to 32 bits, we'd "one-extend"
36700b57cec5SDimitry Andric// and then ctlz that value.  This way we don't have to subtract 16 from the
36710b57cec5SDimitry Andric// result.  Unfortunately today we don't have a way to generate
36720b57cec5SDimitry Andric// "mov b32reg, {b16imm, b16reg}", so we don't do this optimization.
36738bcb0991SDimitry Andricdef : Pat<(i16 (ctlz Int16Regs:$a)),
36740b57cec5SDimitry Andric          (SUBi16ri (CVT_u16_u32
36750b57cec5SDimitry Andric           (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>;
36768bcb0991SDimitry Andricdef : Pat<(i32 (zext (i16 (ctlz Int16Regs:$a)))),
36770b57cec5SDimitry Andric          (SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>;
36780b57cec5SDimitry Andric
36790b57cec5SDimitry Andric// Population count
3680e8d8bef9SDimitry Andriclet hasSideEffects = false in {
36810b57cec5SDimitry Andric  def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
36820b57cec5SDimitry Andric                          "popc.b32 \t$d, $a;", []>;
36830b57cec5SDimitry Andric  def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
36840b57cec5SDimitry Andric                          "popc.b64 \t$d, $a;", []>;
36850b57cec5SDimitry Andric}
36860b57cec5SDimitry Andric
36870b57cec5SDimitry Andric// 32-bit has a direct PTX instruction
36885f757f3fSDimitry Andricdef : Pat<(i32 (ctpop (i32 Int32Regs:$a))), (POPCr32 Int32Regs:$a)>;
36890b57cec5SDimitry Andric
36900b57cec5SDimitry Andric// For 64-bit, the result in PTX is actually 32-bit so we zero-extend to 64-bit
36910b57cec5SDimitry Andric// to match the LLVM semantics.  Just as with ctlz.i64, we provide a second
36920b57cec5SDimitry Andric// pattern that avoids the type conversion if we're truncating the result to
36930b57cec5SDimitry Andric// i32 anyway.
36940b57cec5SDimitry Andricdef : Pat<(ctpop Int64Regs:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
3695*0fca6ea1SDimitry Andricdef : Pat<(i32 (trunc (i64 (ctpop Int64Regs:$a)))), (POPCr64 Int64Regs:$a)>;
36960b57cec5SDimitry Andric
36970b57cec5SDimitry Andric// For 16-bit, we zero-extend to 32-bit, then trunc the result back to 16-bits.
36980b57cec5SDimitry Andric// If we know that we're storing into an i32, we can avoid the final trunc.
36990b57cec5SDimitry Andricdef : Pat<(ctpop Int16Regs:$a),
37000b57cec5SDimitry Andric          (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>;
37018bcb0991SDimitry Andricdef : Pat<(i32 (zext (i16 (ctpop Int16Regs:$a)))),
37020b57cec5SDimitry Andric          (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>;
37030b57cec5SDimitry Andric
37040b57cec5SDimitry Andric// fpround f32 -> f16
37050b57cec5SDimitry Andricdef : Pat<(f16 (fpround Float32Regs:$a)),
37060b57cec5SDimitry Andric          (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
37070b57cec5SDimitry Andric
370806c3fb27SDimitry Andric// fpround f32 -> bf16
370906c3fb27SDimitry Andricdef : Pat<(bf16 (fpround Float32Regs:$a)),
3710*0fca6ea1SDimitry Andric          (CVT_bf16_f32 Float32Regs:$a, CvtRN)>, Requires<[hasPTX<70>, hasSM<80>]>;
371106c3fb27SDimitry Andric
37120b57cec5SDimitry Andric// fpround f64 -> f16
37130b57cec5SDimitry Andricdef : Pat<(f16 (fpround Float64Regs:$a)),
37140b57cec5SDimitry Andric          (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
37150b57cec5SDimitry Andric
371606c3fb27SDimitry Andric// fpround f64 -> bf16
371706c3fb27SDimitry Andricdef : Pat<(bf16 (fpround Float64Regs:$a)),
3718*0fca6ea1SDimitry Andric          (CVT_bf16_f64 Float64Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
37190b57cec5SDimitry Andric// fpround f64 -> f32
37200b57cec5SDimitry Andricdef : Pat<(f32 (fpround Float64Regs:$a)),
37210b57cec5SDimitry Andric          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
37220b57cec5SDimitry Andricdef : Pat<(f32 (fpround Float64Regs:$a)),
37230b57cec5SDimitry Andric          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
37240b57cec5SDimitry Andric
37250b57cec5SDimitry Andric// fpextend f16 -> f32
372606c3fb27SDimitry Andricdef : Pat<(f32 (fpextend (f16 Int16Regs:$a))),
372706c3fb27SDimitry Andric          (CVT_f32_f16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
372806c3fb27SDimitry Andricdef : Pat<(f32 (fpextend (f16 Int16Regs:$a))),
372906c3fb27SDimitry Andric          (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
373006c3fb27SDimitry Andric// fpextend bf16 -> f32
373106c3fb27SDimitry Andricdef : Pat<(f32 (fpextend (bf16 Int16Regs:$a))),
373206c3fb27SDimitry Andric          (CVT_f32_bf16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
373306c3fb27SDimitry Andricdef : Pat<(f32 (fpextend (bf16 Int16Regs:$a))),
3734*0fca6ea1SDimitry Andric          (CVT_f32_bf16 Int16Regs:$a, CvtNONE)>, Requires<[hasPTX<71>, hasSM<80>]>;
37350b57cec5SDimitry Andric
37360b57cec5SDimitry Andric// fpextend f16 -> f64
373706c3fb27SDimitry Andricdef : Pat<(f64 (fpextend (f16 Int16Regs:$a))),
373806c3fb27SDimitry Andric          (CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
373906c3fb27SDimitry Andric
374006c3fb27SDimitry Andric// fpextend bf16 -> f64
374106c3fb27SDimitry Andricdef : Pat<(f64 (fpextend (bf16 Int16Regs:$a))),
3742*0fca6ea1SDimitry Andric          (CVT_f64_bf16 Int16Regs:$a, CvtNONE)>, Requires<[hasPTX<78>, hasSM<90>]>;
37430b57cec5SDimitry Andric
37440b57cec5SDimitry Andric// fpextend f32 -> f64
37450b57cec5SDimitry Andricdef : Pat<(f64 (fpextend Float32Regs:$a)),
37460b57cec5SDimitry Andric          (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
37470b57cec5SDimitry Andricdef : Pat<(f64 (fpextend Float32Regs:$a)),
37480b57cec5SDimitry Andric          (CVT_f64_f32 Float32Regs:$a, CvtNONE)>;
37490b57cec5SDimitry Andric
375006c3fb27SDimitry Andricdef retglue : SDNode<"NVPTXISD::RET_GLUE", SDTNone,
37510b57cec5SDimitry Andric                     [SDNPHasChain, SDNPOptInGlue]>;
37520b57cec5SDimitry Andric
3753bdd1243dSDimitry Andric// fceil, ffloor, froundeven, ftrunc.
37540b57cec5SDimitry Andric
3755bdd1243dSDimitry Andricmulticlass CVT_ROUND<SDNode OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
375606c3fb27SDimitry Andric  def : Pat<(OpNode (f16 Int16Regs:$a)),
375706c3fb27SDimitry Andric            (CVT_f16_f16 Int16Regs:$a, Mode)>;
375806c3fb27SDimitry Andric  def : Pat<(OpNode (bf16 Int16Regs:$a)),
375906c3fb27SDimitry Andric            (CVT_bf16_bf16 Int16Regs:$a, Mode)>;
3760bdd1243dSDimitry Andric  def : Pat<(OpNode Float32Regs:$a),
3761bdd1243dSDimitry Andric            (CVT_f32_f32 Float32Regs:$a, ModeFTZ)>, Requires<[doF32FTZ]>;
3762bdd1243dSDimitry Andric  def : Pat<(OpNode Float32Regs:$a),
3763bdd1243dSDimitry Andric            (CVT_f32_f32 Float32Regs:$a, Mode)>, Requires<[doNoF32FTZ]>;
3764bdd1243dSDimitry Andric  def : Pat<(OpNode Float64Regs:$a),
3765bdd1243dSDimitry Andric            (CVT_f64_f64 Float64Regs:$a, Mode)>;
3766bdd1243dSDimitry Andric}
37670b57cec5SDimitry Andric
3768bdd1243dSDimitry Andricdefm : CVT_ROUND<fceil, CvtRPI, CvtRPI_FTZ>;
3769bdd1243dSDimitry Andricdefm : CVT_ROUND<ffloor, CvtRMI, CvtRMI_FTZ>;
3770bdd1243dSDimitry Andricdefm : CVT_ROUND<froundeven, CvtRNI, CvtRNI_FTZ>;
3771bdd1243dSDimitry Andricdefm : CVT_ROUND<ftrunc, CvtRZI, CvtRZI_FTZ>;
37720b57cec5SDimitry Andric
37730b57cec5SDimitry Andric// nearbyint and rint are implemented as rounding to nearest even.  This isn't
37740b57cec5SDimitry Andric// strictly correct, because it causes us to ignore the rounding mode.  But it
37750b57cec5SDimitry Andric// matches what CUDA's "libm" does.
37760b57cec5SDimitry Andric
3777bdd1243dSDimitry Andricdefm : CVT_ROUND<fnearbyint, CvtRNI, CvtRNI_FTZ>;
3778bdd1243dSDimitry Andricdefm : CVT_ROUND<frint, CvtRNI, CvtRNI_FTZ>;
37790b57cec5SDimitry Andric
37800b57cec5SDimitry Andric//-----------------------------------
37810b57cec5SDimitry Andric// Control-flow
37820b57cec5SDimitry Andric//-----------------------------------
37830b57cec5SDimitry Andric
37840b57cec5SDimitry Andriclet isTerminator=1 in {
37850b57cec5SDimitry Andric   let isReturn=1, isBarrier=1 in
378606c3fb27SDimitry Andric      def Return : NVPTXInst<(outs), (ins), "ret;", [(retglue)]>;
37870b57cec5SDimitry Andric
37880b57cec5SDimitry Andric   let isBranch=1 in
37890b57cec5SDimitry Andric      def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
37900b57cec5SDimitry Andric                              "@$a bra \t$target;",
37910b57cec5SDimitry Andric                              [(brcond Int1Regs:$a, bb:$target)]>;
37920b57cec5SDimitry Andric   let isBranch=1 in
37930b57cec5SDimitry Andric      def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
37940b57cec5SDimitry Andric                                   "@!$a bra \t$target;", []>;
37950b57cec5SDimitry Andric
37960b57cec5SDimitry Andric   let isBranch=1, isBarrier=1 in
37970b57cec5SDimitry Andric      def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
37980b57cec5SDimitry Andric                           "bra.uni \t$target;", [(br bb:$target)]>;
37990b57cec5SDimitry Andric}
38000b57cec5SDimitry Andric
38015f757f3fSDimitry Andricdef : Pat<(brcond (i32 Int32Regs:$a), bb:$target),
38020b57cec5SDimitry Andric          (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>;
38030b57cec5SDimitry Andric
38040b57cec5SDimitry Andric// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
38050b57cec5SDimitry Andric// conditional branch if the target block is the next block so that the code
38060b57cec5SDimitry Andric// can fall through to the target block.  The invertion is done by 'xor
38070b57cec5SDimitry Andric// condition, 1', which will be translated to (setne condition, -1).  Since ptx
38080b57cec5SDimitry Andric// supports '@!pred bra target', we should use it.
38090b57cec5SDimitry Andricdef : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
38100b57cec5SDimitry Andric          (CBranchOther Int1Regs:$a, bb:$target)>;
38110b57cec5SDimitry Andric
38120b57cec5SDimitry Andric// Call
38130b57cec5SDimitry Andricdef SDT_NVPTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
38140b57cec5SDimitry Andric                                            SDTCisVT<1, i32>]>;
38150b57cec5SDimitry Andricdef SDT_NVPTXCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
38160b57cec5SDimitry Andric
38170b57cec5SDimitry Andricdef callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
38180b57cec5SDimitry Andric                           [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
38190b57cec5SDimitry Andricdef callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
38200b57cec5SDimitry Andric                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
38210b57cec5SDimitry Andric                            SDNPSideEffect]>;
38220b57cec5SDimitry Andric
38230b57cec5SDimitry Andricdef SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
38240b57cec5SDimitry Andricdef call          : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
38250b57cec5SDimitry Andric                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
38260b57cec5SDimitry Andricdef calltarget : Operand<i32>;
38270b57cec5SDimitry Andriclet isCall=1 in {
38280b57cec5SDimitry Andric   def CALL : NVPTXInst<(outs), (ins calltarget:$dst), "call \t$dst, (1);", []>;
38290b57cec5SDimitry Andric}
38300b57cec5SDimitry Andric
38310b57cec5SDimitry Andricdef : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>;
38320b57cec5SDimitry Andricdef : Pat<(call texternalsym:$dst), (CALL texternalsym:$dst)>;
38330b57cec5SDimitry Andric
38340b57cec5SDimitry Andric// Pseudo instructions.
38350b57cec5SDimitry Andricclass Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
38360b57cec5SDimitry Andric   : NVPTXInst<outs, ins, asmstr, pattern>;
38370b57cec5SDimitry Andric
38380b57cec5SDimitry Andricdef Callseq_Start :
38390b57cec5SDimitry Andric  NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
3840*0fca6ea1SDimitry Andric            "\\{ // callseq $amt1, $amt2",
38410b57cec5SDimitry Andric            [(callseq_start timm:$amt1, timm:$amt2)]>;
38420b57cec5SDimitry Andricdef Callseq_End :
38430b57cec5SDimitry Andric  NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
38440b57cec5SDimitry Andric            "\\} // callseq $amt1",
38450b57cec5SDimitry Andric            [(callseq_end timm:$amt1, timm:$amt2)]>;
38460b57cec5SDimitry Andric
38470b57cec5SDimitry Andric// trap instruction
38485f757f3fSDimitry Andric// Emit an `exit` as well to convey to ptxas that `trap` exits the CFG.
38495f757f3fSDimitry Andric// This won't be necessary in a future version of ptxas.
38505f757f3fSDimitry Andricdef trapinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>;
38510b57cec5SDimitry Andric
38520b57cec5SDimitry Andric// Call prototype wrapper
38530b57cec5SDimitry Andricdef SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
38540b57cec5SDimitry Andricdef CallPrototype :
38550b57cec5SDimitry Andric  SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
38560b57cec5SDimitry Andric         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
38570b57cec5SDimitry Andricdef ProtoIdent : Operand<i32> {
38580b57cec5SDimitry Andric  let PrintMethod = "printProtoIdent";
38590b57cec5SDimitry Andric}
38600b57cec5SDimitry Andricdef CALL_PROTOTYPE :
38610b57cec5SDimitry Andric  NVPTXInst<(outs), (ins ProtoIdent:$ident),
38620b57cec5SDimitry Andric            "$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
38630b57cec5SDimitry Andric
3864*0fca6ea1SDimitry Andricdef SDTDynAllocaOp :
3865*0fca6ea1SDimitry Andric  SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<1>, SDTCisInt<2>]>;
3866*0fca6ea1SDimitry Andric
3867*0fca6ea1SDimitry Andricdef dyn_alloca :
3868*0fca6ea1SDimitry Andric  SDNode<"NVPTXISD::DYNAMIC_STACKALLOC", SDTDynAllocaOp,
3869*0fca6ea1SDimitry Andric         [SDNPHasChain, SDNPSideEffect]>;
3870*0fca6ea1SDimitry Andric
3871*0fca6ea1SDimitry Andricdef DYNAMIC_STACKALLOC32 :
3872*0fca6ea1SDimitry Andric  NVPTXInst<(outs Int32Regs:$ptr),
3873*0fca6ea1SDimitry Andric            (ins Int32Regs:$size, i32imm:$align),
3874*0fca6ea1SDimitry Andric            "alloca.u32 \t$ptr, $size, $align;\n\t"
3875*0fca6ea1SDimitry Andric            "cvta.local.u32 \t$ptr, $ptr;",
3876*0fca6ea1SDimitry Andric            [(set (i32 Int32Regs:$ptr), (dyn_alloca Int32Regs:$size, (i32 timm:$align)))]>,
3877*0fca6ea1SDimitry Andric            Requires<[hasPTX<73>, hasSM<52>]>;
3878*0fca6ea1SDimitry Andric
3879*0fca6ea1SDimitry Andricdef DYNAMIC_STACKALLOC64 :
3880*0fca6ea1SDimitry Andric  NVPTXInst<(outs Int64Regs:$ptr),
3881*0fca6ea1SDimitry Andric            (ins Int64Regs:$size, i32imm:$align),
3882*0fca6ea1SDimitry Andric            "alloca.u64 \t$ptr, $size, $align;\n\t"
3883*0fca6ea1SDimitry Andric            "cvta.local.u64 \t$ptr, $ptr;",
3884*0fca6ea1SDimitry Andric            [(set Int64Regs:$ptr, (dyn_alloca Int64Regs:$size, (i32 timm:$align)))]>,
3885*0fca6ea1SDimitry Andric            Requires<[hasPTX<73>, hasSM<52>]>;
38860b57cec5SDimitry Andric
38870b57cec5SDimitry Andricinclude "NVPTXIntrinsics.td"
38880b57cec5SDimitry Andric
38890b57cec5SDimitry Andric//-----------------------------------
38900b57cec5SDimitry Andric// Notes
38910b57cec5SDimitry Andric//-----------------------------------
38920b57cec5SDimitry Andric// BSWAP is currently expanded. The following is a more efficient
38930b57cec5SDimitry Andric// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
38940b57cec5SDimitry Andric// - for sm_20, use pmpt (use vector scalar mov to get the pack and
38950b57cec5SDimitry Andric//   unpack). sm_20 supports native 32-bit register, but not native 16-bit
38960b57cec5SDimitry Andric// register.
3897*0fca6ea1SDimitry Andric
3898*0fca6ea1SDimitry Andricdef : Pat <
3899*0fca6ea1SDimitry Andric  (i32 (bswap i32:$a)),
3900*0fca6ea1SDimitry Andric  (INT_NVVM_PRMT Int32Regs:$a, (i32 0), (i32 0x0123))>;
3901*0fca6ea1SDimitry Andric
3902*0fca6ea1SDimitry Andricdef : Pat <
3903*0fca6ea1SDimitry Andric  (v2i16 (bswap v2i16:$a)),
3904*0fca6ea1SDimitry Andric  (INT_NVVM_PRMT Int32Regs:$a, (i32 0), (i32 0x2301))>;
3905*0fca6ea1SDimitry Andric
3906*0fca6ea1SDimitry Andricdef : Pat <
3907*0fca6ea1SDimitry Andric  (i64 (bswap i64:$a)),
3908*0fca6ea1SDimitry Andric  (V2I32toI64
3909*0fca6ea1SDimitry Andric    (INT_NVVM_PRMT (I64toI32H Int64Regs:$a), (i32 0), (i32 0x0123)),
3910*0fca6ea1SDimitry Andric    (INT_NVVM_PRMT (I64toI32L Int64Regs:$a), (i32 0), (i32 0x0123)))>;
3911