xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (revision 79085fd3b922771b329bc3d218b6d52daab8a9a4)
1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains instruction defs that are common to all hw codegen
10// targets.
11//
12//===----------------------------------------------------------------------===//
13
14class AddressSpacesImpl {
15  int Flat = 0;
16  int Global = 1;
17  int Region = 2;
18  int Local = 3;
19  int Constant = 4;
20  int Private = 5;
21}
22
23def AddrSpaces : AddressSpacesImpl;
24
25
26class AMDGPUInst <dag outs, dag ins, string asm = "",
27  list<dag> pattern = []> : Instruction {
28  field bit isRegisterLoad = 0;
29  field bit isRegisterStore = 0;
30
31  let Namespace = "AMDGPU";
32  let OutOperandList = outs;
33  let InOperandList = ins;
34  let AsmString = asm;
35  let Pattern = pattern;
36  let Itinerary = NullALU;
37
38  // SoftFail is a field the disassembler can use to provide a way for
39  // instructions to not match without killing the whole decode process. It is
40  // mainly used for ARM, but Tablegen expects this field to exist or it fails
41  // to build the decode table.
42  field bits<64> SoftFail = 0;
43
44  let DecoderNamespace = Namespace;
45
46  let TSFlags{63} = isRegisterLoad;
47  let TSFlags{62} = isRegisterStore;
48}
49
50class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
51  list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
52
53  field bits<32> Inst = 0xffffffff;
54}
55
56//===---------------------------------------------------------------------===//
57// Return instruction
58//===---------------------------------------------------------------------===//
59
60class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
61: Instruction {
62
63     let Namespace = "AMDGPU";
64     dag OutOperandList = outs;
65     dag InOperandList = ins;
66     let Pattern = pattern;
67     let AsmString = !strconcat(asmstr, "\n");
68     let isPseudo = 1;
69     let Itinerary = NullALU;
70     bit hasIEEEFlag = 0;
71     bit hasZeroOpFlag = 0;
72     let mayLoad = 0;
73     let mayStore = 0;
74     let hasSideEffects = 0;
75     let isCodeGenOnly = 1;
76}
77
78def TruePredicate : Predicate<"true">;
79
80class PredicateControl {
81  Predicate SubtargetPredicate = TruePredicate;
82  list<Predicate> AssemblerPredicates = [];
83  Predicate AssemblerPredicate = TruePredicate;
84  Predicate WaveSizePredicate = TruePredicate;
85  list<Predicate> OtherPredicates = [];
86  list<Predicate> Predicates = !listconcat([SubtargetPredicate,
87                                            AssemblerPredicate,
88                                            WaveSizePredicate],
89                                            AssemblerPredicates,
90                                            OtherPredicates);
91}
92class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
93      PredicateControl;
94
95def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">;
96def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">;
97def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">;
98def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">;
99def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">;
100def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">;
101def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
102def FMA : Predicate<"Subtarget->hasFMA()">;
103
104def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
105
106def u16ImmTarget : AsmOperandClass {
107  let Name = "U16Imm";
108  let RenderMethod = "addImmOperands";
109}
110
111def s16ImmTarget : AsmOperandClass {
112  let Name = "S16Imm";
113  let RenderMethod = "addImmOperands";
114}
115
116let OperandType = "OPERAND_IMMEDIATE" in {
117
118def u32imm : Operand<i32> {
119  let PrintMethod = "printU32ImmOperand";
120}
121
122def u16imm : Operand<i16> {
123  let PrintMethod = "printU16ImmOperand";
124  let ParserMatchClass = u16ImmTarget;
125}
126
127def s16imm : Operand<i16> {
128  let PrintMethod = "printU16ImmOperand";
129  let ParserMatchClass = s16ImmTarget;
130}
131
132def u8imm : Operand<i8> {
133  let PrintMethod = "printU8ImmOperand";
134}
135
136} // End OperandType = "OPERAND_IMMEDIATE"
137
138//===--------------------------------------------------------------------===//
139// Custom Operands
140//===--------------------------------------------------------------------===//
141def brtarget   : Operand<OtherVT>;
142
143//===----------------------------------------------------------------------===//
144// Misc. PatFrags
145//===----------------------------------------------------------------------===//
146
147class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
148  (ops node:$src0),
149  (op $src0),
150  [{ return N->hasOneUse(); }]
151>;
152
153class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
154  (ops node:$src0, node:$src1),
155  (op $src0, $src1),
156  [{ return N->hasOneUse(); }]
157>;
158
159class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
160  (ops node:$src0, node:$src1, node:$src2),
161  (op $src0, $src1, $src2),
162  [{ return N->hasOneUse(); }]
163>;
164
165let Properties = [SDNPCommutative, SDNPAssociative] in {
166def smax_oneuse : HasOneUseBinOp<smax>;
167def smin_oneuse : HasOneUseBinOp<smin>;
168def umax_oneuse : HasOneUseBinOp<umax>;
169def umin_oneuse : HasOneUseBinOp<umin>;
170
171def fminnum_oneuse : HasOneUseBinOp<fminnum>;
172def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
173
174def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
175def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
176
177
178def and_oneuse : HasOneUseBinOp<and>;
179def or_oneuse : HasOneUseBinOp<or>;
180def xor_oneuse : HasOneUseBinOp<xor>;
181} // Properties = [SDNPCommutative, SDNPAssociative]
182
183def not_oneuse : HasOneUseUnaryOp<not>;
184
185def add_oneuse : HasOneUseBinOp<add>;
186def sub_oneuse : HasOneUseBinOp<sub>;
187
188def srl_oneuse : HasOneUseBinOp<srl>;
189def shl_oneuse : HasOneUseBinOp<shl>;
190
191def select_oneuse : HasOneUseTernaryOp<select>;
192
193def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
194def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
195
196def srl_16 : PatFrag<
197  (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
198>;
199
200
201def hi_i16_elt : PatFrag<
202  (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0))))
203>;
204
205
206def hi_f16_elt : PatLeaf<
207  (vt), [{
208  if (N->getOpcode() != ISD::BITCAST)
209    return false;
210  SDValue Tmp = N->getOperand(0);
211
212  if (Tmp.getOpcode() != ISD::SRL)
213    return false;
214    if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1))
215      return RHS->getZExtValue() == 16;
216    return false;
217}]>;
218
219//===----------------------------------------------------------------------===//
220// PatLeafs for floating-point comparisons
221//===----------------------------------------------------------------------===//
222
223def COND_OEQ : PatLeaf <
224  (cond),
225  [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
226>;
227
228def COND_ONE : PatLeaf <
229  (cond),
230  [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}]
231>;
232
233def COND_OGT : PatLeaf <
234  (cond),
235  [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
236>;
237
238def COND_OGE : PatLeaf <
239  (cond),
240  [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
241>;
242
243def COND_OLT : PatLeaf <
244  (cond),
245  [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}]
246>;
247
248def COND_OLE : PatLeaf <
249  (cond),
250  [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
251>;
252
253def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
254def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
255
256//===----------------------------------------------------------------------===//
257// PatLeafs for unsigned / unordered comparisons
258//===----------------------------------------------------------------------===//
259
260def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>;
261def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>;
262def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
263def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
264def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
265def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
266
267// XXX - For some reason R600 version is preferring to use unordered
268// for setne?
269def COND_UNE_NE : PatLeaf <
270  (cond),
271  [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
272>;
273
274//===----------------------------------------------------------------------===//
275// PatLeafs for signed comparisons
276//===----------------------------------------------------------------------===//
277
278def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>;
279def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>;
280def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>;
281def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>;
282
283//===----------------------------------------------------------------------===//
284// PatLeafs for integer equality
285//===----------------------------------------------------------------------===//
286
287def COND_EQ : PatLeaf <
288  (cond),
289  [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}]
290>;
291
292def COND_NE : PatLeaf <
293  (cond),
294  [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}]
295>;
296
297def COND_NULL : PatLeaf <
298  (cond),
299  [{(void)N; return false;}]
300>;
301
302//===----------------------------------------------------------------------===//
303// PatLeafs for Texture Constants
304//===----------------------------------------------------------------------===//
305
306def TEX_ARRAY : PatLeaf<
307  (imm),
308  [{uint32_t TType = (uint32_t)N->getZExtValue();
309    return TType == 9 || TType == 10 || TType == 16;
310  }]
311>;
312
313def TEX_RECT : PatLeaf<
314  (imm),
315  [{uint32_t TType = (uint32_t)N->getZExtValue();
316    return TType == 5;
317  }]
318>;
319
320def TEX_SHADOW : PatLeaf<
321  (imm),
322  [{uint32_t TType = (uint32_t)N->getZExtValue();
323    return (TType >= 6 && TType <= 8) || TType == 13;
324  }]
325>;
326
327def TEX_SHADOW_ARRAY : PatLeaf<
328  (imm),
329  [{uint32_t TType = (uint32_t)N->getZExtValue();
330    return TType == 11 || TType == 12 || TType == 17;
331  }]
332>;
333
334//===----------------------------------------------------------------------===//
335// Load/Store Pattern Fragments
336//===----------------------------------------------------------------------===//
337
338class AddressSpaceList<list<int> AS> {
339  list<int> AddrSpaces = AS;
340}
341
342class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
343  return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
344}]>;
345
346class Aligned16Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
347  return cast<MemSDNode>(N)->getAlignment() >= 16;
348}]>;
349
350class LoadFrag <SDPatternOperator op> : PatFrag<(ops node:$ptr), (op node:$ptr)>;
351
352class StoreFrag<SDPatternOperator op> : PatFrag <
353  (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
354>;
355
356class StoreHi16<SDPatternOperator op> : PatFrag <
357  (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)
358>;
359
360def LoadAddress_constant : AddressSpaceList<[  AddrSpaces.Constant ]>;
361def LoadAddress_global : AddressSpaceList<[  AddrSpaces.Global, AddrSpaces.Constant ]>;
362def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
363
364def LoadAddress_flat : AddressSpaceList<[  AddrSpaces.Flat,
365                                           AddrSpaces.Global,
366                                           AddrSpaces.Constant ]>;
367def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
368
369def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
370def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
371
372def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
373def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
374
375def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
376def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
377
378
379
380class GlobalLoadAddress : CodePatPred<[{
381  auto AS = cast<MemSDNode>(N)->getAddressSpace();
382  return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS;
383}]>;
384
385class FlatLoadAddress : CodePatPred<[{
386  const auto AS = cast<MemSDNode>(N)->getAddressSpace();
387  return AS == AMDGPUAS::FLAT_ADDRESS ||
388         AS == AMDGPUAS::GLOBAL_ADDRESS ||
389         AS == AMDGPUAS::CONSTANT_ADDRESS;
390}]>;
391
392class GlobalAddress : CodePatPred<[{
393  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
394}]>;
395
396class PrivateAddress : CodePatPred<[{
397  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
398}]>;
399
400class LocalAddress : CodePatPred<[{
401  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
402}]>;
403
404class RegionAddress : CodePatPred<[{
405  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
406}]>;
407
408class FlatStoreAddress : CodePatPred<[{
409  const auto AS = cast<MemSDNode>(N)->getAddressSpace();
410  return AS == AMDGPUAS::FLAT_ADDRESS ||
411         AS == AMDGPUAS::GLOBAL_ADDRESS;
412}]>;
413
414// TODO: Remove these when stores to new PatFrag format.
415class PrivateStore <SDPatternOperator op> : StoreFrag <op>, PrivateAddress;
416class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress;
417class RegionStore <SDPatternOperator op> : StoreFrag <op>, RegionAddress;
418class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress;
419class FlatStore <SDPatternOperator op> : StoreFrag <op>, FlatStoreAddress;
420
421
422foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
423let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
424
425def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
426  let IsLoad = 1;
427  let IsNonExtLoad = 1;
428}
429
430def extloadi8_#as  : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
431  let IsLoad = 1;
432  let MemoryVT = i8;
433}
434
435def extloadi16_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
436  let IsLoad = 1;
437  let MemoryVT = i16;
438}
439
440def sextloadi8_#as  : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
441  let IsLoad = 1;
442  let MemoryVT = i8;
443}
444
445def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
446  let IsLoad = 1;
447  let MemoryVT = i16;
448}
449
450def zextloadi8_#as  : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
451  let IsLoad = 1;
452  let MemoryVT = i8;
453}
454
455def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
456  let IsLoad = 1;
457  let MemoryVT = i16;
458}
459
460def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
461  let IsAtomic = 1;
462  let MemoryVT = i32;
463}
464
465def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
466  let IsAtomic = 1;
467  let MemoryVT = i64;
468}
469
470def store_#as : PatFrag<(ops node:$val, node:$ptr),
471                    (unindexedstore node:$val, node:$ptr)> {
472  let IsStore = 1;
473  let IsTruncStore = 0;
474}
475
476// truncstore fragments.
477def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
478                             (unindexedstore node:$val, node:$ptr)> {
479  let IsStore = 1;
480  let IsTruncStore = 1;
481}
482
483// TODO: We don't really need the truncstore here. We can use
484// unindexedstore with MemoryVT directly, which will save an
485// unnecessary check that the memory size is less than the value type
486// in the generated matcher table.
487def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
488                               (truncstore node:$val, node:$ptr)> {
489  let IsStore = 1;
490  let MemoryVT = i8;
491}
492
493def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
494                                (truncstore node:$val, node:$ptr)> {
495  let IsStore = 1;
496  let MemoryVT = i16;
497}
498
499defm atomic_store_#as : binary_atomic_op<atomic_store>;
500
501} // End let AddressSpaces = ...
502} // End foreach AddrSpace
503
504
505def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
506def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
507
508def store_atomic_global : GlobalStore<atomic_store>;
509def truncstorei8_hi16_global : StoreHi16 <truncstorei8>, GlobalAddress;
510def truncstorei16_hi16_global : StoreHi16 <truncstorei16>, GlobalAddress;
511
512def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress;
513def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress;
514def atomic_store_local : LocalStore <atomic_store>;
515
516def load_align8_local : Aligned8Bytes <
517  (ops node:$ptr), (load_local node:$ptr)
518>;
519
520def load_align16_local : Aligned16Bytes <
521  (ops node:$ptr), (load_local node:$ptr)
522>;
523
524def store_align8_local : Aligned8Bytes <
525  (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
526>;
527
528def store_align16_local : Aligned16Bytes <
529  (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
530>;
531
532def atomic_store_flat  : FlatStore <atomic_store>;
533def truncstorei8_hi16_flat  : StoreHi16<truncstorei8>, FlatStoreAddress;
534def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress;
535
536
537class local_binary_atomic_op<SDNode atomic_op> :
538  PatFrag<(ops node:$ptr, node:$value),
539    (atomic_op node:$ptr, node:$value), [{
540  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
541}]>;
542
543class region_binary_atomic_op<SDNode atomic_op> :
544  PatFrag<(ops node:$ptr, node:$value),
545    (atomic_op node:$ptr, node:$value), [{
546  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
547}]>;
548
549
550def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
551def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
552def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
553def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>;
554def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>;
555def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>;
556def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>;
557def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>;
558def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>;
559def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>;
560def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
561
562def mskor_global : PatFrag<(ops node:$val, node:$ptr),
563                            (AMDGPUstore_mskor node:$val, node:$ptr), [{
564  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
565}]>;
566
567class AtomicCmpSwapLocal <SDNode cmp_swap_node> : PatFrag<
568    (ops node:$ptr, node:$cmp, node:$swap),
569    (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
570      AtomicSDNode *AN = cast<AtomicSDNode>(N);
571      return AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
572}]>;
573
574class AtomicCmpSwapRegion <SDNode cmp_swap_node> : PatFrag<
575    (ops node:$ptr, node:$cmp, node:$swap),
576    (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
577      AtomicSDNode *AN = cast<AtomicSDNode>(N);
578      return AN->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
579}]>;
580
581def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>;
582
583class global_binary_atomic_op_frag<SDNode atomic_op> : PatFrag<
584    (ops node:$ptr, node:$value),
585    (atomic_op node:$ptr, node:$value),
586    [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
587
588multiclass global_binary_atomic_op<SDNode atomic_op> {
589  def "" : global_binary_atomic_op_frag<atomic_op>;
590
591  def _noret : PatFrag<
592        (ops node:$ptr, node:$value),
593        (atomic_op node:$ptr, node:$value),
594        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
595
596  def _ret : PatFrag<
597        (ops node:$ptr, node:$value),
598        (atomic_op node:$ptr, node:$value),
599        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
600}
601
602defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
603defm atomic_add_global : global_binary_atomic_op<atomic_load_add>;
604defm atomic_and_global : global_binary_atomic_op<atomic_load_and>;
605defm atomic_max_global : global_binary_atomic_op<atomic_load_max>;
606defm atomic_min_global : global_binary_atomic_op<atomic_load_min>;
607defm atomic_or_global : global_binary_atomic_op<atomic_load_or>;
608defm atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
609defm atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
610defm atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
611defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
612
613// Legacy.
614def AMDGPUatomic_cmp_swap_global : PatFrag<
615  (ops node:$ptr, node:$value),
616  (AMDGPUatomic_cmp_swap node:$ptr, node:$value)>, GlobalAddress;
617
618def atomic_cmp_swap_global : PatFrag<
619  (ops node:$ptr, node:$cmp, node:$value),
620  (atomic_cmp_swap node:$ptr, node:$cmp, node:$value)>, GlobalAddress;
621
622
623def atomic_cmp_swap_global_noret : PatFrag<
624  (ops node:$ptr, node:$cmp, node:$value),
625  (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
626  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
627
628def atomic_cmp_swap_global_ret : PatFrag<
629  (ops node:$ptr, node:$cmp, node:$value),
630  (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
631  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
632
633//===----------------------------------------------------------------------===//
634// Misc Pattern Fragments
635//===----------------------------------------------------------------------===//
636
637class Constants {
638int TWO_PI = 0x40c90fdb;
639int PI = 0x40490fdb;
640int TWO_PI_INV = 0x3e22f983;
641int FP_UINT_MAX_PLUS_1 = 0x4f800000;    // 1 << 32 in floating point encoding
642int FP16_ONE = 0x3C00;
643int FP16_NEG_ONE = 0xBC00;
644int FP32_ONE = 0x3f800000;
645int FP32_NEG_ONE = 0xbf800000;
646int FP64_ONE = 0x3ff0000000000000;
647int FP64_NEG_ONE = 0xbff0000000000000;
648}
649def CONST : Constants;
650
651def FP_ZERO : PatLeaf <
652  (fpimm),
653  [{return N->getValueAPF().isZero();}]
654>;
655
656def FP_ONE : PatLeaf <
657  (fpimm),
658  [{return N->isExactlyValue(1.0);}]
659>;
660
661def FP_HALF : PatLeaf <
662  (fpimm),
663  [{return N->isExactlyValue(0.5);}]
664>;
665
666/* Generic helper patterns for intrinsics */
667/* -------------------------------------- */
668
669class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
670  : AMDGPUPat <
671  (fpow f32:$src0, f32:$src1),
672  (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
673>;
674
675/* Other helper patterns */
676/* --------------------- */
677
678/* Extract element pattern */
679class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
680                       SubRegIndex sub_reg>
681  : AMDGPUPat<
682  (sub_type (extractelt vec_type:$src, sub_idx)),
683  (EXTRACT_SUBREG $src, sub_reg)
684>;
685
686/* Insert element pattern */
687class Insert_Element <ValueType elem_type, ValueType vec_type,
688                      int sub_idx, SubRegIndex sub_reg>
689  : AMDGPUPat <
690  (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
691  (INSERT_SUBREG $vec, $elem, sub_reg)
692>;
693
694// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
695// can handle COPY instructions.
696// bitconvert pattern
697class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat <
698  (dt (bitconvert (st rc:$src0))),
699  (dt rc:$src0)
700>;
701
702// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
703// can handle COPY instructions.
704class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
705  (vt (AMDGPUdwordaddr (vt rc:$addr))),
706  (vt rc:$addr)
707>;
708
709// BFI_INT patterns
710
711multiclass BFIPatterns <Instruction BFI_INT,
712                        Instruction LoadImm32,
713                        RegisterClass RC64> {
714  // Definition from ISA doc:
715  // (y & x) | (z & ~x)
716  def : AMDGPUPat <
717    (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
718    (BFI_INT $x, $y, $z)
719  >;
720
721  // 64-bit version
722  def : AMDGPUPat <
723    (or (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
724    (REG_SEQUENCE RC64,
725      (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
726               (i32 (EXTRACT_SUBREG $y, sub0)),
727               (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
728      (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
729               (i32 (EXTRACT_SUBREG $y, sub1)),
730               (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
731  >;
732
733  // SHA-256 Ch function
734  // z ^ (x & (y ^ z))
735  def : AMDGPUPat <
736    (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
737    (BFI_INT $x, $y, $z)
738  >;
739
740  // 64-bit version
741  def : AMDGPUPat <
742    (xor i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
743    (REG_SEQUENCE RC64,
744      (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
745               (i32 (EXTRACT_SUBREG $y, sub0)),
746               (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
747      (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
748               (i32 (EXTRACT_SUBREG $y, sub1)),
749               (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
750  >;
751
752  def : AMDGPUPat <
753    (fcopysign f32:$src0, f32:$src1),
754    (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1)
755  >;
756
757  def : AMDGPUPat <
758    (f32 (fcopysign f32:$src0, f64:$src1)),
759    (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0,
760             (i32 (EXTRACT_SUBREG $src1, sub1)))
761  >;
762
763  def : AMDGPUPat <
764    (f64 (fcopysign f64:$src0, f64:$src1)),
765    (REG_SEQUENCE RC64,
766      (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
767      (BFI_INT (LoadImm32 (i32 0x7fffffff)),
768               (i32 (EXTRACT_SUBREG $src0, sub1)),
769               (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
770  >;
771
772  def : AMDGPUPat <
773    (f64 (fcopysign f64:$src0, f32:$src1)),
774    (REG_SEQUENCE RC64,
775      (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
776      (BFI_INT (LoadImm32 (i32 0x7fffffff)),
777               (i32 (EXTRACT_SUBREG $src0, sub1)),
778               $src1), sub1)
779  >;
780}
781
782// SHA-256 Ma patterns
783
784// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
785multiclass SHA256MaPattern <Instruction BFI_INT, Instruction XOR, RegisterClass RC64> {
786  def : AMDGPUPat <
787    (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
788    (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
789  >;
790
791  def : AMDGPUPat <
792    (or (and i64:$x, i64:$z), (and i64:$y, (or i64:$x, i64:$z))),
793    (REG_SEQUENCE RC64,
794      (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub0)),
795                    (i32 (EXTRACT_SUBREG $y, sub0))),
796               (i32 (EXTRACT_SUBREG $z, sub0)),
797               (i32 (EXTRACT_SUBREG $y, sub0))), sub0,
798      (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub1)),
799                    (i32 (EXTRACT_SUBREG $y, sub1))),
800               (i32 (EXTRACT_SUBREG $z, sub1)),
801               (i32 (EXTRACT_SUBREG $y, sub1))), sub1)
802  >;
803}
804
805// Bitfield extract patterns
806
807def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
808  return isMask_32(N->getZExtValue());
809}]>;
810
811def IMMPopCount : SDNodeXForm<imm, [{
812  return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
813                                   MVT::i32);
814}]>;
815
816multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
817  def : AMDGPUPat <
818    (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
819    (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
820  >;
821
822  // x & ((1 << y) - 1)
823  def : AMDGPUPat <
824    (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
825    (UBFE $src, (MOV (i32 0)), $width)
826  >;
827
828  // x & ~(-1 << y)
829  def : AMDGPUPat <
830    (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
831    (UBFE $src, (MOV (i32 0)), $width)
832  >;
833
834  // x & (-1 >> (bitwidth - y))
835  def : AMDGPUPat <
836    (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
837    (UBFE $src, (MOV (i32 0)), $width)
838  >;
839
840  // x << (bitwidth - y) >> (bitwidth - y)
841  def : AMDGPUPat <
842    (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
843    (UBFE $src, (MOV (i32 0)), $width)
844  >;
845
846  def : AMDGPUPat <
847    (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
848    (SBFE $src, (MOV (i32 0)), $width)
849  >;
850}
851
852// rotr pattern
853class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
854  (rotr i32:$src0, i32:$src1),
855  (BIT_ALIGN $src0, $src0, $src1)
856>;
857
858multiclass IntMed3Pat<Instruction med3Inst,
859                 SDPatternOperator min,
860                 SDPatternOperator max,
861                 SDPatternOperator min_oneuse,
862                 SDPatternOperator max_oneuse,
863                 ValueType vt = i32> {
864
865  // This matches 16 permutations of
866  // min(max(a, b), max(min(a, b), c))
867  def : AMDGPUPat <
868  (min (max_oneuse vt:$src0, vt:$src1),
869       (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
870  (med3Inst vt:$src0, vt:$src1, vt:$src2)
871>;
872
873  // This matches 16 permutations of
874  // max(min(x, y), min(max(x, y), z))
875  def : AMDGPUPat <
876  (max (min_oneuse vt:$src0, vt:$src1),
877       (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
878  (med3Inst $src0, $src1, $src2)
879>;
880}
881
882// Special conversion patterns
883
884def cvt_rpi_i32_f32 : PatFrag <
885  (ops node:$src),
886  (fp_to_sint (ffloor (fadd $src, FP_HALF))),
887  [{ (void) N; return TM.Options.NoNaNsFPMath; }]
888>;
889
890def cvt_flr_i32_f32 : PatFrag <
891  (ops node:$src),
892  (fp_to_sint (ffloor $src)),
893  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
894>;
895
896let AddedComplexity = 2 in {
897class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
898  (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
899  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
900                (Inst $src0, $src1, $src2))
901>;
902
903class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
904  (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
905  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
906                (Inst $src0, $src1, $src2))
907>;
908} // AddedComplexity.
909
910class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
911  (fdiv FP_ONE, vt:$src),
912  (RcpInst $src)
913>;
914
915class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <
916  (AMDGPUrcp (fsqrt vt:$src)),
917  (RsqInst $src)
918>;
919
920// Instructions which select to the same v_min_f*
921def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
922  [(fminnum_ieee node:$src0, node:$src1),
923   (fminnum node:$src0, node:$src1)]
924>;
925
926// Instructions which select to the same v_max_f*
927def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
928  [(fmaxnum_ieee node:$src0, node:$src1),
929   (fmaxnum node:$src0, node:$src1)]
930>;
931
932def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
933  [(fminnum_ieee_oneuse node:$src0, node:$src1),
934   (fminnum_oneuse node:$src0, node:$src1)]
935>;
936
937def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
938  [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
939   (fmaxnum_oneuse node:$src0, node:$src1)]
940>;
941