xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (revision 19261079b74319502c6ffa1249920079f0f69a72)
1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains instruction defs that are common to all hw codegen
10// targets.
11//
12//===----------------------------------------------------------------------===//
13
14class AddressSpacesImpl {
15  int Flat = 0;
16  int Global = 1;
17  int Region = 2;
18  int Local = 3;
19  int Constant = 4;
20  int Private = 5;
21}
22
23def AddrSpaces : AddressSpacesImpl;
24
25
26class AMDGPUInst <dag outs, dag ins, string asm = "",
27  list<dag> pattern = []> : Instruction {
28  field bit isRegisterLoad = 0;
29  field bit isRegisterStore = 0;
30
31  let Namespace = "AMDGPU";
32  let OutOperandList = outs;
33  let InOperandList = ins;
34  let AsmString = asm;
35  let Pattern = pattern;
36  let Itinerary = NullALU;
37
38  // SoftFail is a field the disassembler can use to provide a way for
39  // instructions to not match without killing the whole decode process. It is
40  // mainly used for ARM, but Tablegen expects this field to exist or it fails
41  // to build the decode table.
42  field bits<64> SoftFail = 0;
43
44  let DecoderNamespace = Namespace;
45
46  let TSFlags{63} = isRegisterLoad;
47  let TSFlags{62} = isRegisterStore;
48}
49
50class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
51  list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
52
53  field bits<32> Inst = 0xffffffff;
54}
55
56//===---------------------------------------------------------------------===//
57// Return instruction
58//===---------------------------------------------------------------------===//
59
60class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
61: Instruction {
62
63     let Namespace = "AMDGPU";
64     dag OutOperandList = outs;
65     dag InOperandList = ins;
66     let Pattern = pattern;
67     let AsmString = !strconcat(asmstr, "\n");
68     let isPseudo = 1;
69     let Itinerary = NullALU;
70     bit hasIEEEFlag = 0;
71     bit hasZeroOpFlag = 0;
72     let mayLoad = 0;
73     let mayStore = 0;
74     let hasSideEffects = 0;
75     let isCodeGenOnly = 1;
76}
77
78def TruePredicate : Predicate<"">;
79
80// FIXME: Tablegen should specially supports this
81def FalsePredicate : Predicate<"false">;
82
83// Add a predicate to the list if does not already exist to deduplicate it.
84class PredConcat<list<Predicate> lst, Predicate pred> {
85  list<Predicate> ret =
86      !listconcat([pred], !filter(item, lst,
87                                  !ne(!cast<string>(item), !cast<string>(pred))));
88}
89
90class PredicateControl {
91  Predicate SubtargetPredicate = TruePredicate;
92  Predicate AssemblerPredicate = TruePredicate;
93  Predicate WaveSizePredicate = TruePredicate;
94  list<Predicate> OtherPredicates = [];
95  list<Predicate> Predicates = PredConcat<
96                                 PredConcat<PredConcat<OtherPredicates,
97                                                       SubtargetPredicate>.ret,
98                                            AssemblerPredicate>.ret,
99                                 WaveSizePredicate>.ret;
100}
101
102class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
103      PredicateControl;
104
105let RecomputePerFunction = 1 in {
106def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
107def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
108def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
109def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
110def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
111def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
112def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
113}
114
115def FMA : Predicate<"Subtarget->hasFMA()">;
116
117def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
118
119def u16ImmTarget : AsmOperandClass {
120  let Name = "U16Imm";
121  let RenderMethod = "addImmOperands";
122}
123
124def s16ImmTarget : AsmOperandClass {
125  let Name = "S16Imm";
126  let RenderMethod = "addImmOperands";
127}
128
129let OperandType = "OPERAND_IMMEDIATE" in {
130
131def u32imm : Operand<i32> {
132  let PrintMethod = "printU32ImmOperand";
133}
134
135def u16imm : Operand<i16> {
136  let PrintMethod = "printU16ImmOperand";
137  let ParserMatchClass = u16ImmTarget;
138}
139
140def s16imm : Operand<i16> {
141  let PrintMethod = "printU16ImmOperand";
142  let ParserMatchClass = s16ImmTarget;
143}
144
145def u8imm : Operand<i8> {
146  let PrintMethod = "printU8ImmOperand";
147}
148
149} // End OperandType = "OPERAND_IMMEDIATE"
150
151//===--------------------------------------------------------------------===//
152// Custom Operands
153//===--------------------------------------------------------------------===//
154def brtarget   : Operand<OtherVT>;
155
156//===----------------------------------------------------------------------===//
157// Misc. PatFrags
158//===----------------------------------------------------------------------===//
159
160class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
161  (ops node:$src0),
162  (op $src0),
163  [{ return N->hasOneUse(); }]> {
164
165  let GISelPredicateCode = [{
166    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
167  }];
168}
169
170class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
171  (ops node:$src0, node:$src1),
172  (op $src0, $src1),
173  [{ return N->hasOneUse(); }]> {
174  let GISelPredicateCode = [{
175    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
176  }];
177}
178
179class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
180  (ops node:$src0, node:$src1, node:$src2),
181  (op $src0, $src1, $src2),
182  [{ return N->hasOneUse(); }]> {
183  let GISelPredicateCode = [{
184    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
185  }];
186}
187
188let Properties = [SDNPCommutative, SDNPAssociative] in {
189def smax_oneuse : HasOneUseBinOp<smax>;
190def smin_oneuse : HasOneUseBinOp<smin>;
191def umax_oneuse : HasOneUseBinOp<umax>;
192def umin_oneuse : HasOneUseBinOp<umin>;
193
194def fminnum_oneuse : HasOneUseBinOp<fminnum>;
195def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
196
197def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
198def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
199
200
201def and_oneuse : HasOneUseBinOp<and>;
202def or_oneuse : HasOneUseBinOp<or>;
203def xor_oneuse : HasOneUseBinOp<xor>;
204} // Properties = [SDNPCommutative, SDNPAssociative]
205
206def not_oneuse : HasOneUseUnaryOp<not>;
207
208def add_oneuse : HasOneUseBinOp<add>;
209def sub_oneuse : HasOneUseBinOp<sub>;
210
211def srl_oneuse : HasOneUseBinOp<srl>;
212def shl_oneuse : HasOneUseBinOp<shl>;
213
214def select_oneuse : HasOneUseTernaryOp<select>;
215
216def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
217def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
218
219def srl_16 : PatFrag<
220  (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
221>;
222
223
224def hi_i16_elt : PatFrag<
225  (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0))))
226>;
227
228
229def hi_f16_elt : PatLeaf<
230  (vt), [{
231  if (N->getOpcode() != ISD::BITCAST)
232    return false;
233  SDValue Tmp = N->getOperand(0);
234
235  if (Tmp.getOpcode() != ISD::SRL)
236    return false;
237    if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1))
238      return RHS->getZExtValue() == 16;
239    return false;
240}]>;
241
242//===----------------------------------------------------------------------===//
243// PatLeafs for floating-point comparisons
244//===----------------------------------------------------------------------===//
245
246def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>;
247def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>;
248def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>;
249def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>;
250def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>;
251def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>;
252def COND_O   : PatFrags<(ops), [(OtherVT SETO)]>;
253def COND_UO  : PatFrags<(ops), [(OtherVT SETUO)]>;
254
255//===----------------------------------------------------------------------===//
256// PatLeafs for unsigned / unordered comparisons
257//===----------------------------------------------------------------------===//
258
259def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>;
260def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>;
261def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>;
262def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>;
263def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>;
264def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>;
265
266// XXX - For some reason R600 version is preferring to use unordered
267// for setne?
268def COND_UNE_NE  : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>;
269
270//===----------------------------------------------------------------------===//
271// PatLeafs for signed comparisons
272//===----------------------------------------------------------------------===//
273
274def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>;
275def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>;
276def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>;
277def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>;
278
279//===----------------------------------------------------------------------===//
280// PatLeafs for integer equality
281//===----------------------------------------------------------------------===//
282
283def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>;
284def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>;
285
286// FIXME: Should not need code predicate
287//def COND_NULL : PatLeaf<(OtherVT null_frag)>;
288def COND_NULL : PatLeaf <
289  (cond),
290  [{(void)N; return false;}]
291>;
292
293//===----------------------------------------------------------------------===//
294// PatLeafs for Texture Constants
295//===----------------------------------------------------------------------===//
296
297def TEX_ARRAY : PatLeaf<
298  (imm),
299  [{uint32_t TType = (uint32_t)N->getZExtValue();
300    return TType == 9 || TType == 10 || TType == 16;
301  }]
302>;
303
304def TEX_RECT : PatLeaf<
305  (imm),
306  [{uint32_t TType = (uint32_t)N->getZExtValue();
307    return TType == 5;
308  }]
309>;
310
311def TEX_SHADOW : PatLeaf<
312  (imm),
313  [{uint32_t TType = (uint32_t)N->getZExtValue();
314    return (TType >= 6 && TType <= 8) || TType == 13;
315  }]
316>;
317
318def TEX_SHADOW_ARRAY : PatLeaf<
319  (imm),
320  [{uint32_t TType = (uint32_t)N->getZExtValue();
321    return TType == 11 || TType == 12 || TType == 17;
322  }]
323>;
324
325//===----------------------------------------------------------------------===//
326// Load/Store Pattern Fragments
327//===----------------------------------------------------------------------===//
328
329def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
330  [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
331>;
332
333class AddressSpaceList<list<int> AS> {
334  list<int> AddrSpaces = AS;
335}
336
337class Aligned<int Bytes> {
338  int MinAlignment = Bytes;
339}
340
341class StoreHi16<SDPatternOperator op> : PatFrag <
342  (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> {
343  let IsStore = 1;
344}
345
346def LoadAddress_constant : AddressSpaceList<[  AddrSpaces.Constant ]>;
347def LoadAddress_global : AddressSpaceList<[  AddrSpaces.Global, AddrSpaces.Constant ]>;
348def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
349
350def LoadAddress_flat : AddressSpaceList<[  AddrSpaces.Flat,
351                                           AddrSpaces.Global,
352                                           AddrSpaces.Constant ]>;
353def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
354
355def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
356def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
357
358def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
359def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
360
361def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
362def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
363
364
365
366foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
367let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
368
369def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
370  let IsLoad = 1;
371  let IsNonExtLoad = 1;
372}
373
374def extloadi8_#as  : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
375  let IsLoad = 1;
376  let MemoryVT = i8;
377}
378
379def extloadi16_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
380  let IsLoad = 1;
381  let MemoryVT = i16;
382}
383
384def sextloadi8_#as  : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
385  let IsLoad = 1;
386  let MemoryVT = i8;
387}
388
389def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
390  let IsLoad = 1;
391  let MemoryVT = i16;
392}
393
394def zextloadi8_#as  : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
395  let IsLoad = 1;
396  let MemoryVT = i8;
397}
398
399def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
400  let IsLoad = 1;
401  let MemoryVT = i16;
402}
403
404def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
405  let IsAtomic = 1;
406  let MemoryVT = i32;
407}
408
409def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
410  let IsAtomic = 1;
411  let MemoryVT = i64;
412}
413} // End let AddressSpaces
414} // End foreach as
415
416
417foreach as = [ "global", "flat", "local", "private", "region" ] in {
418let AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
419def store_#as : PatFrag<(ops node:$val, node:$ptr),
420                    (unindexedstore node:$val, node:$ptr)> {
421  let IsStore = 1;
422  let IsTruncStore = 0;
423}
424
425// truncstore fragments.
426def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
427                             (unindexedstore node:$val, node:$ptr)> {
428  let IsStore = 1;
429  let IsTruncStore = 1;
430}
431
432// TODO: We don't really need the truncstore here. We can use
433// unindexedstore with MemoryVT directly, which will save an
434// unnecessary check that the memory size is less than the value type
435// in the generated matcher table.
436def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
437                               (truncstore node:$val, node:$ptr)> {
438  let IsStore = 1;
439  let MemoryVT = i8;
440}
441
442def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
443                                (truncstore node:$val, node:$ptr)> {
444  let IsStore = 1;
445  let MemoryVT = i16;
446}
447
448def store_hi16_#as : StoreHi16 <truncstorei16>;
449def truncstorei8_hi16_#as : StoreHi16<truncstorei8>;
450def truncstorei16_hi16_#as : StoreHi16<truncstorei16>;
451
452defm atomic_store_#as : binary_atomic_op<atomic_store>;
453
454} // End let AddressSpaces
455} // End foreach as
456
457
458multiclass ret_noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
459  foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
460    let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
461      defm "_"#as : binary_atomic_op<atomic_op, IsInt>;
462
463      let PredicateCode = [{return (SDValue(N, 0).use_empty());}] in {
464        defm "_"#as#"_noret" : binary_atomic_op<atomic_op, IsInt>;
465      }
466
467      let PredicateCode = [{return !(SDValue(N, 0).use_empty());}] in {
468        defm "_"#as#"_ret" : binary_atomic_op<atomic_op, IsInt>;
469      }
470    }
471  }
472}
473
474defm atomic_swap : ret_noret_binary_atomic_op<atomic_swap>;
475defm atomic_load_add : ret_noret_binary_atomic_op<atomic_load_add>;
476defm atomic_load_and : ret_noret_binary_atomic_op<atomic_load_and>;
477defm atomic_load_max : ret_noret_binary_atomic_op<atomic_load_max>;
478defm atomic_load_min : ret_noret_binary_atomic_op<atomic_load_min>;
479defm atomic_load_or : ret_noret_binary_atomic_op<atomic_load_or>;
480defm atomic_load_sub : ret_noret_binary_atomic_op<atomic_load_sub>;
481defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>;
482defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>;
483defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>;
484defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
485let MemoryVT = v2f16 in
486defm atomic_load_fadd_v2f16 : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
487defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op<AMDGPUatomic_cmp_swap>;
488
489def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
490                        Aligned<8> {
491  let IsLoad = 1;
492  let IsNonExtLoad = 1;
493}
494
495def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
496                        Aligned<16> {
497  let IsLoad = 1;
498  let IsNonExtLoad = 1;
499}
500
501def store_align8_local: PatFrag<(ops node:$val, node:$ptr),
502                                (store_local node:$val, node:$ptr)>, Aligned<8> {
503  let IsStore = 1;
504  let IsTruncStore = 0;
505}
506
507def store_align16_local: PatFrag<(ops node:$val, node:$ptr),
508                                (store_local node:$val, node:$ptr)>, Aligned<16> {
509  let IsStore = 1;
510  let IsTruncStore = 0;
511}
512
513let AddressSpaces = StoreAddress_local.AddrSpaces in {
514defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
515defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
516}
517
518let AddressSpaces = StoreAddress_region.AddrSpaces in {
519defm atomic_cmp_swap_region : ternary_atomic_op<atomic_cmp_swap>;
520defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
521}
522
523//===----------------------------------------------------------------------===//
524// Misc Pattern Fragments
525//===----------------------------------------------------------------------===//
526
527class Constants {
528int TWO_PI = 0x40c90fdb;
529int PI = 0x40490fdb;
530int TWO_PI_INV = 0x3e22f983;
531int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9
532int FP16_ONE = 0x3C00;
533int FP16_NEG_ONE = 0xBC00;
534int FP32_ONE = 0x3f800000;
535int FP32_NEG_ONE = 0xbf800000;
536int FP64_ONE = 0x3ff0000000000000;
537int FP64_NEG_ONE = 0xbff0000000000000;
538}
539def CONST : Constants;
540
541def FP_ZERO : PatLeaf <
542  (fpimm),
543  [{return N->getValueAPF().isZero();}]
544>;
545
546def FP_ONE : PatLeaf <
547  (fpimm),
548  [{return N->isExactlyValue(1.0);}]
549>;
550
551def FP_HALF : PatLeaf <
552  (fpimm),
553  [{return N->isExactlyValue(0.5);}]
554>;
555
556/* Generic helper patterns for intrinsics */
557/* -------------------------------------- */
558
559class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
560  : AMDGPUPat <
561  (fpow f32:$src0, f32:$src1),
562  (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
563>;
564
565/* Other helper patterns */
566/* --------------------- */
567
568/* Extract element pattern */
569class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
570                       SubRegIndex sub_reg>
571  : AMDGPUPat<
572  (sub_type (extractelt vec_type:$src, sub_idx)),
573  (EXTRACT_SUBREG $src, sub_reg)
574>;
575
576/* Insert element pattern */
577class Insert_Element <ValueType elem_type, ValueType vec_type,
578                      int sub_idx, SubRegIndex sub_reg>
579  : AMDGPUPat <
580  (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
581  (INSERT_SUBREG $vec, $elem, sub_reg)
582>;
583
584// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
585// can handle COPY instructions.
586// bitconvert pattern
587class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat <
588  (dt (bitconvert (st rc:$src0))),
589  (dt rc:$src0)
590>;
591
592// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
593// can handle COPY instructions.
594class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
595  (vt (AMDGPUdwordaddr (vt rc:$addr))),
596  (vt rc:$addr)
597>;
598
599// fshr pattern
600class FSHRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
601  (fshr i32:$src0, i32:$src1, i32:$src2),
602  (BIT_ALIGN $src0, $src1, $src2)
603>;
604
605// rotr pattern
606class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
607  (rotr i32:$src0, i32:$src1),
608  (BIT_ALIGN $src0, $src0, $src1)
609>;
610
611// Special conversion patterns
612
613def cvt_rpi_i32_f32 : PatFrag <
614  (ops node:$src),
615  (fp_to_sint (ffloor (fadd $src, FP_HALF))),
616  [{ (void) N; return TM.Options.NoNaNsFPMath; }]
617>;
618
619def cvt_flr_i32_f32 : PatFrag <
620  (ops node:$src),
621  (fp_to_sint (ffloor $src)),
622  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
623>;
624
625let AddedComplexity = 2 in {
626class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
627  (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
628  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
629                (Inst $src0, $src1, $src2))
630>;
631
632class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
633  (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
634  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
635                (Inst $src0, $src1, $src2))
636>;
637} // AddedComplexity.
638
639class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
640  (fdiv FP_ONE, vt:$src),
641  (RcpInst $src)
642>;
643
644class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <
645  (AMDGPUrcp (fsqrt vt:$src)),
646  (RsqInst $src)
647>;
648
649// Instructions which select to the same v_min_f*
650def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
651  [(fminnum_ieee node:$src0, node:$src1),
652   (fminnum node:$src0, node:$src1)]
653>;
654
655// Instructions which select to the same v_max_f*
656def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
657  [(fmaxnum_ieee node:$src0, node:$src1),
658   (fmaxnum node:$src0, node:$src1)]
659>;
660
661def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
662  [(fminnum_ieee_oneuse node:$src0, node:$src1),
663   (fminnum_oneuse node:$src0, node:$src1)]
664>;
665
666def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
667  [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
668   (fmaxnum_oneuse node:$src0, node:$src1)]
669>;
670
671def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2),
672  [(fmad node:$src0, node:$src1, node:$src2),
673   (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)]
674>;
675
676// FIXME: fsqrt should not select directly
677def any_amdgcn_sqrt : PatFrags<(ops node:$src0),
678  [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)]
679>;
680